llama-cpp-haystack 2.0.0__tar.gz → 2.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/CHANGELOG.md +16 -0
- {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/LICENSE.txt +1 -1
- {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/PKG-INFO +1 -1
- {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/src/haystack_integrations/components/generators/llama_cpp/chat/chat_generator.py +14 -8
- {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/src/haystack_integrations/components/generators/llama_cpp/generator.py +8 -5
- {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/tests/test_chat_generator.py +4 -13
- {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/tests/test_generator.py +4 -9
- {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/.gitignore +0 -0
- {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/README.md +0 -0
- {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/examples/llama_cpp_generator_example.py +0 -0
- {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/examples/rag_pipeline_example.py +0 -0
- {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/pydoc/config_docusaurus.yml +0 -0
- {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/pyproject.toml +0 -0
- {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/src/haystack_integrations/components/generators/llama_cpp/__init__.py +0 -0
- {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/src/haystack_integrations/components/generators/py.typed +0 -0
- {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/tests/__init__.py +0 -0
- {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/tests/models/.gitignore +0 -0
- {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/tests/test_files/apple.jpg +0 -0
|
@@ -1,5 +1,21 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [integrations/llama_cpp-v2.0.0] - 2026-01-12
|
|
4
|
+
|
|
5
|
+
### ⚙️ CI
|
|
6
|
+
|
|
7
|
+
- Change pytest command (#2475)
|
|
8
|
+
|
|
9
|
+
### 🧹 Chores
|
|
10
|
+
|
|
11
|
+
- Remove Readme API CI workflow and configs (#2573)
|
|
12
|
+
- Make fmt command more forgiving (#2671)
|
|
13
|
+
- [**breaking**] Llama_cpp - drop Python 3.9 and use X|Y typing (#2710)
|
|
14
|
+
|
|
15
|
+
### 🌀 Miscellaneous
|
|
16
|
+
|
|
17
|
+
- Enhancement: Adopt PEP 585 type hinting (part 4) (#2527)
|
|
18
|
+
|
|
3
19
|
## [integrations/llama_cpp-v1.4.0] - 2025-10-23
|
|
4
20
|
|
|
5
21
|
### 📚 Documentation
|
|
@@ -58,7 +58,7 @@ APPENDIX: How to apply the Apache License to your work.
|
|
|
58
58
|
|
|
59
59
|
To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives.
|
|
60
60
|
|
|
61
|
-
Copyright
|
|
61
|
+
Copyright 2024 deepset GmbH
|
|
62
62
|
|
|
63
63
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
64
64
|
you may not use this file except in compliance with the License.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: llama-cpp-haystack
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.1.0
|
|
4
4
|
Summary: An integration between the llama.cpp LLM framework and Haystack
|
|
5
5
|
Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/llama_cpp#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
1
5
|
import json
|
|
2
6
|
from collections.abc import Iterator
|
|
3
7
|
from datetime import datetime, timezone
|
|
@@ -201,7 +205,7 @@ class LlamaCppChatGenerator:
|
|
|
201
205
|
streaming_callback: StreamingCallbackT | None = None,
|
|
202
206
|
chat_handler_name: str | None = None,
|
|
203
207
|
model_clip_path: str | None = None,
|
|
204
|
-
):
|
|
208
|
+
) -> None:
|
|
205
209
|
"""
|
|
206
210
|
:param model: The path of a quantized model for text generation, for example, "zephyr-7b-beta.Q4_0.gguf".
|
|
207
211
|
If the model path is also specified in the `model_kwargs`, this parameter will be ignored.
|
|
@@ -263,7 +267,7 @@ class LlamaCppChatGenerator:
|
|
|
263
267
|
self.model_clip_path = model_clip_path
|
|
264
268
|
self._handler = handler
|
|
265
269
|
|
|
266
|
-
def warm_up(self):
|
|
270
|
+
def warm_up(self) -> None:
|
|
267
271
|
if self._model is not None:
|
|
268
272
|
return
|
|
269
273
|
|
|
@@ -347,8 +351,7 @@ class LlamaCppChatGenerator:
|
|
|
347
351
|
- `replies`: The responses from the model
|
|
348
352
|
"""
|
|
349
353
|
if self._model is None:
|
|
350
|
-
|
|
351
|
-
raise RuntimeError(error_msg)
|
|
354
|
+
self.warm_up()
|
|
352
355
|
|
|
353
356
|
if not messages:
|
|
354
357
|
return {"replies": []}
|
|
@@ -381,7 +384,7 @@ class LlamaCppChatGenerator:
|
|
|
381
384
|
)
|
|
382
385
|
|
|
383
386
|
if streaming_callback:
|
|
384
|
-
response_stream = self._model.create_chat_completion(
|
|
387
|
+
response_stream = self._model.create_chat_completion( # type: ignore[union-attr]
|
|
385
388
|
messages=formatted_messages, tools=llamacpp_tools, **updated_generation_kwargs, stream=True
|
|
386
389
|
)
|
|
387
390
|
return self._handle_streaming_response(
|
|
@@ -391,7 +394,7 @@ class LlamaCppChatGenerator:
|
|
|
391
394
|
) # we know that response_stream is Iterator[CreateChatCompletionStreamResponse]
|
|
392
395
|
# because create_chat_completion was called with stream=True, but mypy doesn't know that
|
|
393
396
|
|
|
394
|
-
response = self._model.create_chat_completion(
|
|
397
|
+
response = self._model.create_chat_completion( # type: ignore[union-attr]
|
|
395
398
|
messages=formatted_messages, tools=llamacpp_tools, **updated_generation_kwargs
|
|
396
399
|
)
|
|
397
400
|
replies = []
|
|
@@ -399,8 +402,11 @@ class LlamaCppChatGenerator:
|
|
|
399
402
|
msg = f"Expected a dictionary response, got a different object: {response}"
|
|
400
403
|
raise ValueError(msg)
|
|
401
404
|
|
|
402
|
-
for choice in response["choices"]:
|
|
403
|
-
chat_message = self._convert_chat_completion_choice_to_chat_message(
|
|
405
|
+
for choice in response["choices"]: # type: ignore[index]
|
|
406
|
+
chat_message = self._convert_chat_completion_choice_to_chat_message(
|
|
407
|
+
choice,
|
|
408
|
+
response, # type: ignore[arg-type]
|
|
409
|
+
)
|
|
404
410
|
replies.append(chat_message)
|
|
405
411
|
return {"replies": replies}
|
|
406
412
|
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
1
5
|
from typing import Any
|
|
2
6
|
|
|
3
7
|
from haystack import component, logging
|
|
@@ -32,7 +36,7 @@ class LlamaCppGenerator:
|
|
|
32
36
|
n_batch: int | None = 512,
|
|
33
37
|
model_kwargs: dict[str, Any] | None = None,
|
|
34
38
|
generation_kwargs: dict[str, Any] | None = None,
|
|
35
|
-
):
|
|
39
|
+
) -> None:
|
|
36
40
|
"""
|
|
37
41
|
:param model: The path of a quantized model for text generation, for example, "zephyr-7b-beta.Q4_0.gguf".
|
|
38
42
|
If the model path is also specified in the `model_kwargs`, this parameter will be ignored.
|
|
@@ -64,7 +68,7 @@ class LlamaCppGenerator:
|
|
|
64
68
|
self.generation_kwargs = generation_kwargs
|
|
65
69
|
self.model: Llama | None = None
|
|
66
70
|
|
|
67
|
-
def warm_up(self):
|
|
71
|
+
def warm_up(self) -> None:
|
|
68
72
|
if self.model is None:
|
|
69
73
|
self.model = Llama(**self.model_kwargs)
|
|
70
74
|
|
|
@@ -84,8 +88,7 @@ class LlamaCppGenerator:
|
|
|
84
88
|
- `meta`: metadata about the request.
|
|
85
89
|
"""
|
|
86
90
|
if self.model is None:
|
|
87
|
-
|
|
88
|
-
raise RuntimeError(error_msg)
|
|
91
|
+
self.warm_up()
|
|
89
92
|
|
|
90
93
|
if not prompt:
|
|
91
94
|
return {"replies": []}
|
|
@@ -93,7 +96,7 @@ class LlamaCppGenerator:
|
|
|
93
96
|
# merge generation kwargs from init method with those from run method
|
|
94
97
|
updated_generation_kwargs = {**self.generation_kwargs, **(generation_kwargs or {})}
|
|
95
98
|
|
|
96
|
-
output = self.model.create_completion(prompt=prompt, **updated_generation_kwargs)
|
|
99
|
+
output = self.model.create_completion(prompt=prompt, **updated_generation_kwargs) # type: ignore[union-attr]
|
|
97
100
|
if not isinstance(output, dict):
|
|
98
101
|
msg = f"Expected a dictionary response, got a different object: {output}"
|
|
99
102
|
raise ValueError(msg)
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
1
5
|
import json
|
|
2
6
|
import os
|
|
3
7
|
import urllib.request
|
|
@@ -678,7 +682,6 @@ class TestLlamaCppChatGenerator:
|
|
|
678
682
|
|
|
679
683
|
model_path = str(model_path / filename)
|
|
680
684
|
generator = LlamaCppChatGenerator(model=model_path, n_ctx=8192, n_batch=512)
|
|
681
|
-
generator.warm_up()
|
|
682
685
|
return generator
|
|
683
686
|
|
|
684
687
|
@pytest.fixture
|
|
@@ -898,14 +901,6 @@ class TestLlamaCppChatGenerator:
|
|
|
898
901
|
)
|
|
899
902
|
assert generator.model_kwargs["n_batch"] == 1024
|
|
900
903
|
|
|
901
|
-
def test_raises_error_without_warm_up(self):
|
|
902
|
-
"""
|
|
903
|
-
Test that the generator raises an error if warm_up() is not called before running.
|
|
904
|
-
"""
|
|
905
|
-
generator = LlamaCppChatGenerator(model="test_model.gguf", n_ctx=512, n_batch=512)
|
|
906
|
-
with pytest.raises(RuntimeError):
|
|
907
|
-
generator.run("What is the capital of China?")
|
|
908
|
-
|
|
909
904
|
def test_run_with_empty_message(self, generator_mock):
|
|
910
905
|
"""
|
|
911
906
|
Test that an empty message returns an empty list of replies.
|
|
@@ -1179,7 +1174,6 @@ class TestLlamaCppChatGeneratorFunctionary:
|
|
|
1179
1174
|
"hf_tokenizer_path": hf_tokenizer_path,
|
|
1180
1175
|
},
|
|
1181
1176
|
)
|
|
1182
|
-
generator.warm_up()
|
|
1183
1177
|
return generator
|
|
1184
1178
|
|
|
1185
1179
|
@pytest.mark.integration
|
|
@@ -1260,7 +1254,6 @@ class TestLlamaCppChatGeneratorChatML:
|
|
|
1260
1254
|
"chat_format": "chatml-function-calling",
|
|
1261
1255
|
},
|
|
1262
1256
|
)
|
|
1263
|
-
generator.warm_up()
|
|
1264
1257
|
return generator
|
|
1265
1258
|
|
|
1266
1259
|
@pytest.mark.integration
|
|
@@ -1326,8 +1319,6 @@ class TestLlamaCppChatGeneratorChatML:
|
|
|
1326
1319
|
generation_kwargs={"max_tokens": 50, "temperature": 0.1},
|
|
1327
1320
|
)
|
|
1328
1321
|
|
|
1329
|
-
generator.warm_up()
|
|
1330
|
-
|
|
1331
1322
|
result = generator.run(messages)
|
|
1332
1323
|
|
|
1333
1324
|
assert "replies" in result
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
1
5
|
import os
|
|
2
6
|
import urllib.request
|
|
3
7
|
from pathlib import Path
|
|
@@ -42,7 +46,6 @@ class TestLlamaCppGenerator:
|
|
|
42
46
|
|
|
43
47
|
model_path = str(model_path / filename)
|
|
44
48
|
generator = LlamaCppGenerator(model=model_path, n_ctx=128, n_batch=128)
|
|
45
|
-
generator.warm_up()
|
|
46
49
|
return generator
|
|
47
50
|
|
|
48
51
|
@pytest.fixture
|
|
@@ -106,14 +109,6 @@ class TestLlamaCppGenerator:
|
|
|
106
109
|
generator = LlamaCppGenerator(model="test_model.gguf", n_ctx=512, n_batch=512, model_kwargs={"n_batch": 1024})
|
|
107
110
|
assert generator.model_kwargs["n_batch"] == 1024
|
|
108
111
|
|
|
109
|
-
def test_raises_error_without_warm_up(self):
|
|
110
|
-
"""
|
|
111
|
-
Test that the generator raises an error if warm_up() is not called before running.
|
|
112
|
-
"""
|
|
113
|
-
generator = LlamaCppGenerator(model="test_model.gguf", n_ctx=512, n_batch=512)
|
|
114
|
-
with pytest.raises(RuntimeError):
|
|
115
|
-
generator.run("What is the capital of China?")
|
|
116
|
-
|
|
117
112
|
def test_run_with_empty_prompt(self, generator_mock):
|
|
118
113
|
"""
|
|
119
114
|
Test that an empty prompt returns an empty list of replies.
|
|
File without changes
|
|
File without changes
|
{llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/examples/llama_cpp_generator_example.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|