llama-cpp-haystack 0.4.0__tar.gz → 0.4.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -135,3 +135,12 @@ dmypy.json
135
135
  # Docs generation artifacts
136
136
  _readme_*.md
137
137
  .idea
138
+
139
+ # macOS
140
+ .DS_Store
141
+
142
+ # http cache (requests-cache)
143
+ **/http_cache.sqlite
144
+
145
+ # ruff
146
+ .ruff_cache
@@ -0,0 +1,50 @@
1
+ # Changelog
2
+
3
+ ## [integrations/llama_cpp-v0.4.1] - 2024-08-08
4
+
5
+ ### 🐛 Bug Fixes
6
+
7
+ - Replace DynamicChatPromptBuilder with ChatPromptBuilder (#940)
8
+
9
+ ### ⚙️ Miscellaneous Tasks
10
+
11
+ - Retry tests to reduce flakyness (#836)
12
+ - Update ruff invocation to include check parameter (#853)
13
+ - Pin `llama-cpp-python>=0.2.87` (#955)
14
+
15
+ ## [integrations/llama_cpp-v0.4.0] - 2024-05-13
16
+
17
+ ### 🐛 Bug Fixes
18
+
19
+ - Fix commit (#436)
20
+
21
+
22
+ - Fix order of API docs (#447)
23
+
24
+ This PR will also push the docs to Readme
25
+
26
+ ### 📚 Documentation
27
+
28
+ - Update category slug (#442)
29
+ - Small consistency improvements (#536)
30
+ - Disable-class-def (#556)
31
+
32
+ ### ⚙️ Miscellaneous Tasks
33
+
34
+ - [**breaking**] Rename model_path to model in the Llama.cpp integration (#243)
35
+
36
+ ### Llama.cpp
37
+
38
+ - Generate api docs (#353)
39
+
40
+ ## [integrations/llama_cpp-v0.2.1] - 2024-01-18
41
+
42
+ ## [integrations/llama_cpp-v0.2.0] - 2024-01-17
43
+
44
+ ## [integrations/llama_cpp-v0.1.0] - 2024-01-09
45
+
46
+ ### 🚀 Features
47
+
48
+ - Add Llama.cpp Generator (#179)
49
+
50
+ <!-- generated by git-cliff -->
@@ -1,14 +1,13 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: llama-cpp-haystack
3
- Version: 0.4.0
3
+ Version: 0.4.2
4
4
  Summary: An integration between the llama.cpp LLM framework and Haystack
5
5
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/llama_cpp#readme
6
6
  Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
7
7
  Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/llama_cpp
8
8
  Author: Ashwin Mathur
9
9
  Author-email: deepset GmbH <info@deepset.ai>
10
- License-Expression: Apache-2.0
11
- License-File: LICENSE.txt
10
+ License: Apache-2.0
12
11
  Classifier: Development Status :: 4 - Beta
13
12
  Classifier: License :: OSI Approved :: Apache Software License
14
13
  Classifier: Programming Language :: Python
@@ -21,7 +20,7 @@ Classifier: Programming Language :: Python :: Implementation :: CPython
21
20
  Classifier: Programming Language :: Python :: Implementation :: PyPy
22
21
  Requires-Python: >=3.8
23
22
  Requires-Dist: haystack-ai
24
- Requires-Dist: llama-cpp-python
23
+ Requires-Dist: llama-cpp-python>=0.2.87
25
24
  Description-Content-Type: text/markdown
26
25
 
27
26
  # llama-cpp-haystack
@@ -6,6 +6,7 @@ from haystack.components.embedders import SentenceTransformersDocumentEmbedder,
6
6
  from haystack.components.retrievers import InMemoryEmbeddingRetriever
7
7
  from haystack.components.writers import DocumentWriter
8
8
  from haystack.document_stores import InMemoryDocumentStore
9
+
9
10
  from haystack_integrations.components.generators.llama_cpp import LlamaCppGenerator
10
11
 
11
12
  # Load first 100 rows of the Simple Wikipedia Dataset from HuggingFace
@@ -15,7 +15,7 @@ authors = [
15
15
  { name = "Ashwin Mathur", email = "" },
16
16
  ]
17
17
  classifiers = [
18
- "License :: OSI Approved :: Apache Software License",
18
+ "License :: OSI Approved :: Apache Software License",
19
19
  "Development Status :: 4 - Beta",
20
20
  "Programming Language :: Python",
21
21
  "Programming Language :: Python :: 3.8",
@@ -26,10 +26,7 @@ classifiers = [
26
26
  "Programming Language :: Python :: Implementation :: CPython",
27
27
  "Programming Language :: Python :: Implementation :: PyPy",
28
28
  ]
29
- dependencies = [
30
- "haystack-ai",
31
- "llama-cpp-python"
32
- ]
29
+ dependencies = ["haystack-ai", "llama-cpp-python>=0.2.87"]
33
30
 
34
31
  [project.urls]
35
32
  Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/llama_cpp#readme"
@@ -48,59 +45,42 @@ root = "../.."
48
45
  git_describe_command = 'git describe --tags --match="integrations/llama_cpp-v[0-9]*"'
49
46
 
50
47
  [tool.hatch.envs.default]
48
+ installer = "uv"
51
49
  dependencies = [
52
50
  "coverage[toml]>=6.5",
53
51
  "pytest",
52
+ "pytest-rerunfailures",
54
53
  "haystack-pydoc-tools",
55
- "transformers[sentencepiece]"
54
+ "transformers[sentencepiece]",
56
55
  ]
57
56
  [tool.hatch.envs.default.scripts]
58
57
  test = "pytest {args:tests}"
59
58
  test-cov = "coverage run -m pytest {args:tests}"
60
- cov-report = [
61
- "- coverage combine",
62
- "coverage report",
63
- ]
64
- cov = [
65
- "test-cov",
66
- "cov-report",
67
- ]
68
- docs = [
69
- "pydoc-markdown pydoc/config.yml"
70
- ]
59
+ test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
60
+ cov-report = ["- coverage combine", "coverage report"]
61
+ cov = ["test-cov", "cov-report"]
62
+ cov-retry = ["test-cov-retry", "cov-report"]
63
+ docs = ["pydoc-markdown pydoc/config.yml"]
71
64
  [[tool.hatch.envs.all.matrix]]
72
65
  python = ["3.8", "3.9", "3.10", "3.11", "3.12"]
73
66
 
74
67
 
75
68
  [tool.hatch.envs.lint]
69
+ installer = "uv"
76
70
  detached = true
77
- dependencies = [
78
- "black>=23.1.0",
79
- "mypy>=1.0.0",
80
- "ruff>=0.0.243",
81
- ]
71
+ dependencies = ["pip", "black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"]
82
72
 
83
73
  [tool.hatch.envs.lint.scripts]
84
74
  typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}"
85
- style = [
86
- "ruff {args:.}",
87
- "black --check --diff {args:.}",
88
- ]
89
- fmt = [
90
- "black {args:.}",
91
- "ruff --fix {args:.}",
92
- "style",
93
- ]
94
- all = [
95
- "style",
96
- "typing",
97
- ]
75
+ style = ["ruff check {args:.}", "black --check --diff {args:.}"]
76
+ fmt = ["black {args:.}", "ruff check --fix {args:.}", "style"]
77
+ all = ["style", "typing"]
98
78
 
99
79
  [tool.hatch.metadata]
100
80
  allow-direct-references = true
101
81
 
102
- [tool.ruff.isort]
103
- known-first-party = ["src"]
82
+ [tool.ruff.lint.isort]
83
+ known-first-party = ["haystack_integrations"]
104
84
 
105
85
  [tool.black]
106
86
  target-version = ["py38"]
@@ -110,6 +90,8 @@ skip-string-normalization = true
110
90
  [tool.ruff]
111
91
  target-version = "py38"
112
92
  line-length = 120
93
+
94
+ [tool.ruff.lint]
113
95
  select = [
114
96
  "A",
115
97
  "ARG",
@@ -140,19 +122,25 @@ ignore = [
140
122
  # Allow non-abstract empty methods in abstract base classes
141
123
  "B027",
142
124
  # Ignore checks for possible passwords
143
- "S105", "S106", "S107",
125
+ "S105",
126
+ "S106",
127
+ "S107",
144
128
  # Ignore complexity
145
- "C901", "PLR0911", "PLR0912", "PLR0913", "PLR0915",
129
+ "C901",
130
+ "PLR0911",
131
+ "PLR0912",
132
+ "PLR0913",
133
+ "PLR0915",
146
134
  ]
147
135
  unfixable = [
148
136
  # Don't touch unused imports
149
137
  "F401",
150
138
  ]
151
139
 
152
- [tool.ruff.flake8-tidy-imports]
140
+ [tool.ruff.lint.flake8-tidy-imports]
153
141
  ban-relative-imports = "parents"
154
142
 
155
- [tool.ruff.per-file-ignores]
143
+ [tool.ruff.lint.per-file-ignores]
156
144
  # Tests can use magic values, assertions, and relative imports
157
145
  "tests/**/*" = ["PLR2004", "S101", "TID252"]
158
146
  # Examples can print their output
@@ -167,27 +155,16 @@ parallel = false
167
155
 
168
156
  [tool.coverage.report]
169
157
  omit = ["*/tests/*", "*/__init__.py"]
170
- show_missing=true
171
- exclude_lines = [
172
- "no cov",
173
- "if __name__ == .__main__.:",
174
- "if TYPE_CHECKING:",
175
- ]
158
+ show_missing = true
159
+ exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"]
176
160
 
177
161
 
178
162
  [tool.pytest.ini_options]
179
163
  markers = [
180
164
  "integration: marks tests as slow (deselect with '-m \"not integration\"')",
181
165
  ]
182
- addopts = [
183
- "--import-mode=importlib",
184
- ]
166
+ addopts = ["--import-mode=importlib"]
185
167
 
186
168
  [[tool.mypy.overrides]]
187
- module = [
188
- "haystack.*",
189
- "haystack_integrations.*",
190
- "pytest.*",
191
- "llama_cpp.*"
192
- ]
169
+ module = ["haystack.*", "haystack_integrations.*", "pytest.*", "llama_cpp.*"]
193
170
  ignore_missing_imports = true
@@ -5,4 +5,4 @@
5
5
  from .chat.chat_generator import LlamaCppChatGenerator
6
6
  from .generator import LlamaCppGenerator
7
7
 
8
- __all__ = ["LlamaCppGenerator", "LlamaCppChatGenerator"]
8
+ __all__ = ["LlamaCppChatGenerator", "LlamaCppGenerator"]
@@ -9,6 +9,21 @@ from llama_cpp.llama_tokenizer import LlamaHFTokenizer
9
9
  logger = logging.getLogger(__name__)
10
10
 
11
11
 
12
+ def _convert_message_to_llamacpp_format(message: ChatMessage) -> Dict[str, str]:
13
+ """
14
+ Convert a message to the format expected by Llama.cpp.
15
+ :returns: A dictionary with the following keys:
16
+ - `role`
17
+ - `content`
18
+ - `name` (optional)
19
+ """
20
+ formatted_msg = {"role": message.role.value, "content": message.text}
21
+ if message.name:
22
+ formatted_msg["name"] = message.name
23
+
24
+ return formatted_msg
25
+
26
+
12
27
  @component
13
28
  class LlamaCppChatGenerator:
14
29
  """
@@ -96,7 +111,7 @@ class LlamaCppChatGenerator:
96
111
  return {"replies": []}
97
112
 
98
113
  updated_generation_kwargs = {**self.generation_kwargs, **(generation_kwargs or {})}
99
- formatted_messages = [msg.to_openai_format() for msg in messages]
114
+ formatted_messages = [_convert_message_to_llamacpp_format(msg) for msg in messages]
100
115
 
101
116
  response = self.model.create_chat_completion(messages=formatted_messages, **updated_generation_kwargs)
102
117
  replies = [
@@ -6,11 +6,15 @@ from unittest.mock import MagicMock
6
6
 
7
7
  import pytest
8
8
  from haystack import Document, Pipeline
9
- from haystack.components.builders.dynamic_chat_prompt_builder import DynamicChatPromptBuilder
9
+ from haystack.components.builders import ChatPromptBuilder
10
10
  from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
11
11
  from haystack.dataclasses import ChatMessage, ChatRole
12
12
  from haystack.document_stores.in_memory import InMemoryDocumentStore
13
- from haystack_integrations.components.generators.llama_cpp import LlamaCppChatGenerator
13
+
14
+ from haystack_integrations.components.generators.llama_cpp.chat.chat_generator import (
15
+ LlamaCppChatGenerator,
16
+ _convert_message_to_llamacpp_format,
17
+ )
14
18
 
15
19
 
16
20
  @pytest.fixture
@@ -29,6 +33,21 @@ def download_file(file_link, filename, capsys):
29
33
  print("\nModel file already exists.")
30
34
 
31
35
 
36
+ def test_convert_message_to_llamacpp_format():
37
+ message = ChatMessage.from_system("You are good assistant")
38
+ assert _convert_message_to_llamacpp_format(message) == {"role": "system", "content": "You are good assistant"}
39
+
40
+ message = ChatMessage.from_user("I have a question")
41
+ assert _convert_message_to_llamacpp_format(message) == {"role": "user", "content": "I have a question"}
42
+
43
+ message = ChatMessage.from_function("Function call", "function_name")
44
+ assert _convert_message_to_llamacpp_format(message) == {
45
+ "role": "function",
46
+ "content": "Function call",
47
+ "name": "function_name",
48
+ }
49
+
50
+
32
51
  class TestLlamaCppChatGenerator:
33
52
  @pytest.fixture
34
53
  def generator(self, model_path, capsys):
@@ -144,7 +163,7 @@ class TestLlamaCppChatGenerator:
144
163
  assert isinstance(result["replies"], list)
145
164
  assert len(result["replies"]) == 1
146
165
  assert isinstance(result["replies"][0], ChatMessage)
147
- assert result["replies"][0].content == "Generated text"
166
+ assert result["replies"][0].text == "Generated text"
148
167
  assert result["replies"][0].role == ChatRole.ASSISTANT
149
168
 
150
169
  def test_run_with_generation_kwargs(self, generator_mock):
@@ -164,7 +183,7 @@ class TestLlamaCppChatGenerator:
164
183
  mock_model.create_chat_completion.return_value = mock_output
165
184
  generation_kwargs = {"max_tokens": 128}
166
185
  result = generator.run([ChatMessage.from_system("Write a 200 word paragraph.")], generation_kwargs)
167
- assert result["replies"][0].content == "Generated text"
186
+ assert result["replies"][0].text == "Generated text"
168
187
  assert result["replies"][0].meta["finish_reason"] == "length"
169
188
 
170
189
  @pytest.mark.integration
@@ -187,7 +206,7 @@ class TestLlamaCppChatGenerator:
187
206
  assert "replies" in result
188
207
  assert isinstance(result["replies"], list)
189
208
  assert len(result["replies"]) > 0
190
- assert any(answer.lower() in reply.content.lower() for reply in result["replies"])
209
+ assert any(answer.lower() in reply.text.lower() for reply in result["replies"])
191
210
 
192
211
  @pytest.mark.integration
193
212
  def test_run_rag_pipeline(self, generator):
@@ -213,9 +232,7 @@ class TestLlamaCppChatGenerator:
213
232
  instance=InMemoryBM25Retriever(document_store=document_store, top_k=1),
214
233
  name="retriever",
215
234
  )
216
- pipeline.add_component(
217
- instance=DynamicChatPromptBuilder(runtime_variables=["query", "documents"]), name="prompt_builder"
218
- )
235
+ pipeline.add_component(instance=ChatPromptBuilder(variables=["query", "documents"]), name="prompt_builder")
219
236
  pipeline.add_component(instance=generator, name="llm")
220
237
  pipeline.connect("retriever.documents", "prompt_builder.documents")
221
238
  pipeline.connect("prompt_builder.prompt", "llm.messages")
@@ -245,7 +262,7 @@ class TestLlamaCppChatGenerator:
245
262
  "retriever": {"query": question},
246
263
  "prompt_builder": {
247
264
  "template_variables": {"location": location},
248
- "prompt_source": messages,
265
+ "template": messages,
249
266
  "query": question,
250
267
  },
251
268
  }
@@ -253,7 +270,7 @@ class TestLlamaCppChatGenerator:
253
270
 
254
271
  replies = result["llm"]["replies"]
255
272
  assert len(replies) > 0
256
- assert any("bioluminescent waves" in reply.content for reply in replies)
273
+ assert any("bioluminescent waves" in reply.text.lower() for reply in replies)
257
274
  assert all(reply.role == ChatRole.ASSISTANT for reply in replies)
258
275
 
259
276
  @pytest.mark.integration
@@ -291,15 +308,15 @@ class TestLlamaCppChatGenerator:
291
308
  assert len(result["replies"]) > 0
292
309
  assert all(reply.role == ChatRole.ASSISTANT for reply in result["replies"])
293
310
  for reply in result["replies"]:
294
- assert json.loads(reply.content)
295
- assert isinstance(json.loads(reply.content), dict)
296
- assert "people" in json.loads(reply.content)
297
- assert isinstance(json.loads(reply.content)["people"], list)
298
- assert all(isinstance(person, dict) for person in json.loads(reply.content)["people"])
299
- assert all("name" in person for person in json.loads(reply.content)["people"])
300
- assert all("age" in person for person in json.loads(reply.content)["people"])
301
- assert all(isinstance(person["name"], str) for person in json.loads(reply.content)["people"])
302
- assert all(isinstance(person["age"], int) for person in json.loads(reply.content)["people"])
311
+ assert json.loads(reply.text)
312
+ assert isinstance(json.loads(reply.text), dict)
313
+ assert "people" in json.loads(reply.text)
314
+ assert isinstance(json.loads(reply.text)["people"], list)
315
+ assert all(isinstance(person, dict) for person in json.loads(reply.text)["people"])
316
+ assert all("name" in person for person in json.loads(reply.text)["people"])
317
+ assert all("age" in person for person in json.loads(reply.text)["people"])
318
+ assert all(isinstance(person["name"], str) for person in json.loads(reply.text)["people"])
319
+ assert all(isinstance(person["age"], int) for person in json.loads(reply.text)["people"])
303
320
 
304
321
 
305
322
  class TestLlamaCppChatGeneratorFunctionary:
@@ -325,7 +342,7 @@ class TestLlamaCppChatGeneratorFunctionary:
325
342
  hf_tokenizer_path = "meetkai/functionary-small-v2.4-GGUF"
326
343
  generator = LlamaCppChatGenerator(
327
344
  model=model_path,
328
- n_ctx=8192,
345
+ n_ctx=512,
329
346
  n_batch=512,
330
347
  model_kwargs={
331
348
  "chat_format": "functionary-v2",
@@ -382,7 +399,6 @@ class TestLlamaCppChatGeneratorFunctionary:
382
399
  "type": "string",
383
400
  "description": "The city and state, e.g. San Francisco, CA",
384
401
  },
385
- "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
386
402
  },
387
403
  "required": ["location"],
388
404
  },
@@ -390,7 +406,8 @@ class TestLlamaCppChatGeneratorFunctionary:
390
406
  }
391
407
  ]
392
408
 
393
- response = generator.run(messages=messages, generation_kwargs={"tools": tools})
409
+ tool_choice = {"type": "function", "function": {"name": "get_current_temperature"}}
410
+ response = generator.run(messages=messages, generation_kwargs={"tools": tools, "tool_choice": tool_choice})
394
411
 
395
412
  available_functions = {
396
413
  "get_current_temperature": self.get_current_temperature,
@@ -412,11 +429,10 @@ class TestLlamaCppChatGeneratorFunctionary:
412
429
  messages.append(function_message)
413
430
 
414
431
  second_response = generator.run(messages=messages)
415
- print(second_response)
416
432
  assert "replies" in second_response
417
433
  assert len(second_response["replies"]) > 0
418
- assert any("San Francisco" in reply.content for reply in second_response["replies"])
419
- assert any("72" in reply.content for reply in second_response["replies"])
434
+ assert any("San Francisco" in reply.text for reply in second_response["replies"])
435
+ assert any("72" in reply.text for reply in second_response["replies"])
420
436
 
421
437
 
422
438
  class TestLlamaCppChatGeneratorChatML:
@@ -9,6 +9,7 @@ from haystack.components.builders.answer_builder import AnswerBuilder
9
9
  from haystack.components.builders.prompt_builder import PromptBuilder
10
10
  from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
11
11
  from haystack.document_stores.in_memory import InMemoryDocumentStore
12
+
12
13
  from haystack_integrations.components.generators.llama_cpp import LlamaCppGenerator
13
14
 
14
15