llama-cpp-haystack 1.3.0__tar.gz → 2.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {llama_cpp_haystack-1.3.0 → llama_cpp_haystack-2.0.0}/CHANGELOG.md +29 -0
- {llama_cpp_haystack-1.3.0 → llama_cpp_haystack-2.0.0}/PKG-INFO +3 -4
- llama_cpp_haystack-2.0.0/pydoc/config_docusaurus.yml +28 -0
- {llama_cpp_haystack-1.3.0 → llama_cpp_haystack-2.0.0}/pyproject.toml +13 -11
- {llama_cpp_haystack-1.3.0 → llama_cpp_haystack-2.0.0}/src/haystack_integrations/components/generators/llama_cpp/chat/chat_generator.py +41 -36
- {llama_cpp_haystack-1.3.0 → llama_cpp_haystack-2.0.0}/src/haystack_integrations/components/generators/llama_cpp/generator.py +9 -9
- {llama_cpp_haystack-1.3.0 → llama_cpp_haystack-2.0.0}/tests/test_chat_generator.py +53 -0
- llama_cpp_haystack-1.3.0/pydoc/config.yml +0 -29
- {llama_cpp_haystack-1.3.0 → llama_cpp_haystack-2.0.0}/.gitignore +0 -0
- {llama_cpp_haystack-1.3.0 → llama_cpp_haystack-2.0.0}/LICENSE.txt +0 -0
- {llama_cpp_haystack-1.3.0 → llama_cpp_haystack-2.0.0}/README.md +0 -0
- {llama_cpp_haystack-1.3.0 → llama_cpp_haystack-2.0.0}/examples/llama_cpp_generator_example.py +0 -0
- {llama_cpp_haystack-1.3.0 → llama_cpp_haystack-2.0.0}/examples/rag_pipeline_example.py +0 -0
- {llama_cpp_haystack-1.3.0 → llama_cpp_haystack-2.0.0}/src/haystack_integrations/components/generators/llama_cpp/__init__.py +0 -0
- {llama_cpp_haystack-1.3.0 → llama_cpp_haystack-2.0.0}/src/haystack_integrations/components/generators/py.typed +0 -0
- {llama_cpp_haystack-1.3.0 → llama_cpp_haystack-2.0.0}/tests/__init__.py +0 -0
- {llama_cpp_haystack-1.3.0 → llama_cpp_haystack-2.0.0}/tests/models/.gitignore +0 -0
- {llama_cpp_haystack-1.3.0 → llama_cpp_haystack-2.0.0}/tests/test_files/apple.jpg +0 -0
- {llama_cpp_haystack-1.3.0 → llama_cpp_haystack-2.0.0}/tests/test_generator.py +0 -0
|
@@ -1,5 +1,34 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [integrations/llama_cpp-v1.4.0] - 2025-10-23
|
|
4
|
+
|
|
5
|
+
### 📚 Documentation
|
|
6
|
+
|
|
7
|
+
- Add pydoc configurations for Docusaurus (#2411)
|
|
8
|
+
|
|
9
|
+
### ⚙️ CI
|
|
10
|
+
|
|
11
|
+
- Download pre-built wheels for llama-cpp-python on macOS (#2235)
|
|
12
|
+
|
|
13
|
+
### 🧹 Chores
|
|
14
|
+
|
|
15
|
+
- Fix llama.cpp types (#2271)
|
|
16
|
+
|
|
17
|
+
### 🌀 Miscellaneous
|
|
18
|
+
|
|
19
|
+
- Feat: `LlamaCppChatGenerator` update tools param to ToolsType (#2438)
|
|
20
|
+
|
|
21
|
+
## [integrations/llama_cpp-v1.3.0] - 2025-08-22
|
|
22
|
+
|
|
23
|
+
### 🚀 Features
|
|
24
|
+
|
|
25
|
+
- Add image support to LlamaCppChatGenerator (#2197)
|
|
26
|
+
|
|
27
|
+
### 🧹 Chores
|
|
28
|
+
|
|
29
|
+
- Standardize readmes - part 2 (#2205)
|
|
30
|
+
|
|
31
|
+
|
|
3
32
|
## [integrations/llama_cpp-v1.2.0] - 2025-07-28
|
|
4
33
|
|
|
5
34
|
### 🚀 Features
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: llama-cpp-haystack
|
|
3
|
-
Version:
|
|
3
|
+
Version: 2.0.0
|
|
4
4
|
Summary: An integration between the llama.cpp LLM framework and Haystack
|
|
5
5
|
Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/llama_cpp#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
|
|
@@ -12,15 +12,14 @@ License-File: LICENSE.txt
|
|
|
12
12
|
Classifier: Development Status :: 4 - Beta
|
|
13
13
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
14
14
|
Classifier: Programming Language :: Python
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
16
15
|
Classifier: Programming Language :: Python :: 3.10
|
|
17
16
|
Classifier: Programming Language :: Python :: 3.11
|
|
18
17
|
Classifier: Programming Language :: Python :: 3.12
|
|
19
18
|
Classifier: Programming Language :: Python :: 3.13
|
|
20
19
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
21
20
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
22
|
-
Requires-Python: >=3.
|
|
23
|
-
Requires-Dist: haystack-ai>=2.
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Requires-Dist: haystack-ai>=2.22.0
|
|
24
23
|
Requires-Dist: llama-cpp-python>=0.2.87
|
|
25
24
|
Description-Content-Type: text/markdown
|
|
26
25
|
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
loaders:
|
|
2
|
+
- ignore_when_discovered:
|
|
3
|
+
- __init__
|
|
4
|
+
modules:
|
|
5
|
+
- haystack_integrations.components.generators.llama_cpp.generator
|
|
6
|
+
search_path:
|
|
7
|
+
- ../src
|
|
8
|
+
type: haystack_pydoc_tools.loaders.CustomPythonLoader
|
|
9
|
+
processors:
|
|
10
|
+
- do_not_filter_modules: false
|
|
11
|
+
documented_only: true
|
|
12
|
+
expression: null
|
|
13
|
+
skip_empty_modules: true
|
|
14
|
+
type: filter
|
|
15
|
+
- type: smart
|
|
16
|
+
- type: crossref
|
|
17
|
+
renderer:
|
|
18
|
+
description: Llama.cpp integration for Haystack
|
|
19
|
+
id: integrations-llama-cpp
|
|
20
|
+
markdown:
|
|
21
|
+
add_member_class_prefix: false
|
|
22
|
+
add_method_class_prefix: true
|
|
23
|
+
classdef_code_block: false
|
|
24
|
+
descriptive_class_title: false
|
|
25
|
+
descriptive_module_title: true
|
|
26
|
+
filename: llama_cpp.md
|
|
27
|
+
title: Llama.cpp
|
|
28
|
+
type: haystack_pydoc_tools.renderers.DocusaurusRenderer
|
|
@@ -7,7 +7,7 @@ name = "llama-cpp-haystack"
|
|
|
7
7
|
dynamic = ["version"]
|
|
8
8
|
description = 'An integration between the llama.cpp LLM framework and Haystack'
|
|
9
9
|
readme = "README.md"
|
|
10
|
-
requires-python = ">=3.
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
11
|
license = "Apache-2.0"
|
|
12
12
|
keywords = []
|
|
13
13
|
authors = [
|
|
@@ -18,7 +18,6 @@ classifiers = [
|
|
|
18
18
|
"License :: OSI Approved :: Apache Software License",
|
|
19
19
|
"Development Status :: 4 - Beta",
|
|
20
20
|
"Programming Language :: Python",
|
|
21
|
-
"Programming Language :: Python :: 3.9",
|
|
22
21
|
"Programming Language :: Python :: 3.10",
|
|
23
22
|
"Programming Language :: Python :: 3.11",
|
|
24
23
|
"Programming Language :: Python :: 3.12",
|
|
@@ -26,7 +25,15 @@ classifiers = [
|
|
|
26
25
|
"Programming Language :: Python :: Implementation :: CPython",
|
|
27
26
|
"Programming Language :: Python :: Implementation :: PyPy",
|
|
28
27
|
]
|
|
29
|
-
dependencies = ["haystack-ai>=2.
|
|
28
|
+
dependencies = ["haystack-ai>=2.22.0", "llama-cpp-python>=0.2.87"]
|
|
29
|
+
|
|
30
|
+
# On macOS GitHub runners, we use a custom index to download pre-built wheels.
|
|
31
|
+
# Installing from source might fail due to missing dependencies (CMake fails with "OpenMP not found")
|
|
32
|
+
[tool.uv]
|
|
33
|
+
index-strategy = "unsafe-best-match" # this ensures that packages are not only searched in the below index but also in PyPI
|
|
34
|
+
[[tool.uv.index]]
|
|
35
|
+
name = "llama-cpp-python-macos"
|
|
36
|
+
url = "https://abetlen.github.io/llama-cpp-python/whl/metal/"
|
|
30
37
|
|
|
31
38
|
[project.urls]
|
|
32
39
|
Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/llama_cpp#readme"
|
|
@@ -48,8 +55,8 @@ git_describe_command = 'git describe --tags --match="integrations/llama_cpp-v[0-
|
|
|
48
55
|
installer = "uv"
|
|
49
56
|
dependencies = ["haystack-pydoc-tools", "ruff"]
|
|
50
57
|
[tool.hatch.envs.default.scripts]
|
|
51
|
-
docs = ["pydoc-markdown pydoc/
|
|
52
|
-
fmt = "ruff check --fix {args}
|
|
58
|
+
docs = ["pydoc-markdown pydoc/config_docusaurus.yml"]
|
|
59
|
+
fmt = "ruff check --fix {args}; ruff format {args}"
|
|
53
60
|
fmt-check = "ruff check {args} && ruff format --check {args}"
|
|
54
61
|
|
|
55
62
|
[tool.hatch.envs.test]
|
|
@@ -67,7 +74,7 @@ dependencies = [
|
|
|
67
74
|
unit = 'pytest -m "not integration" {args:tests}'
|
|
68
75
|
integration = 'pytest -m "integration" {args:tests}'
|
|
69
76
|
all = 'pytest {args:tests}'
|
|
70
|
-
cov-retry = '
|
|
77
|
+
cov-retry = 'pytest --cov=haystack_integrations --reruns 3 --reruns-delay 30 -x {args:tests}'
|
|
71
78
|
types = "mypy -p haystack_integrations.components.generators.llama_cpp {args}"
|
|
72
79
|
|
|
73
80
|
[tool.mypy]
|
|
@@ -84,7 +91,6 @@ known-first-party = ["haystack_integrations"]
|
|
|
84
91
|
|
|
85
92
|
|
|
86
93
|
[tool.ruff]
|
|
87
|
-
target-version = "py38"
|
|
88
94
|
line-length = 120
|
|
89
95
|
|
|
90
96
|
[tool.ruff.lint]
|
|
@@ -128,10 +134,6 @@ ignore = [
|
|
|
128
134
|
"PLR0913",
|
|
129
135
|
"PLR0915",
|
|
130
136
|
]
|
|
131
|
-
unfixable = [
|
|
132
|
-
# Don't touch unused imports
|
|
133
|
-
"F401",
|
|
134
|
-
]
|
|
135
137
|
|
|
136
138
|
[tool.ruff.lint.flake8-tidy-imports]
|
|
137
139
|
ban-relative-imports = "parents"
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
|
+
from collections.abc import Iterator
|
|
2
3
|
from datetime import datetime, timezone
|
|
3
|
-
from typing import Any
|
|
4
|
+
from typing import Any
|
|
4
5
|
|
|
5
6
|
from haystack import component, default_from_dict, default_to_dict, logging
|
|
6
7
|
from haystack.components.generators.utils import _convert_streaming_chunks_to_chat_message
|
|
@@ -16,10 +17,10 @@ from haystack.dataclasses import (
|
|
|
16
17
|
)
|
|
17
18
|
from haystack.dataclasses.streaming_chunk import FinishReason, StreamingChunk, SyncStreamingCallbackT
|
|
18
19
|
from haystack.tools import (
|
|
19
|
-
|
|
20
|
-
Toolset,
|
|
20
|
+
ToolsType,
|
|
21
21
|
_check_duplicate_tool_names,
|
|
22
22
|
deserialize_tools_or_toolset_inplace,
|
|
23
|
+
flatten_tools_or_toolsets,
|
|
23
24
|
serialize_tools_or_toolset,
|
|
24
25
|
)
|
|
25
26
|
from haystack.utils import deserialize_callable, serialize_callable
|
|
@@ -29,6 +30,8 @@ from llama_cpp import (
|
|
|
29
30
|
ChatCompletionRequestMessage,
|
|
30
31
|
ChatCompletionRequestMessageContentPart,
|
|
31
32
|
ChatCompletionResponseChoice,
|
|
33
|
+
ChatCompletionStreamResponseDelta,
|
|
34
|
+
ChatCompletionStreamResponseDeltaEmpty,
|
|
32
35
|
ChatCompletionTool,
|
|
33
36
|
CreateChatCompletionResponse,
|
|
34
37
|
CreateChatCompletionStreamResponse,
|
|
@@ -40,7 +43,7 @@ from llama_cpp.llama_tokenizer import LlamaHFTokenizer
|
|
|
40
43
|
|
|
41
44
|
logger = logging.getLogger(__name__)
|
|
42
45
|
|
|
43
|
-
FINISH_REASON_MAPPING:
|
|
46
|
+
FINISH_REASON_MAPPING: dict[str, FinishReason] = {
|
|
44
47
|
"stop": "stop",
|
|
45
48
|
"length": "length",
|
|
46
49
|
"tool_calls": "tool_calls",
|
|
@@ -122,7 +125,7 @@ def _convert_message_to_llamacpp_format(message: ChatMessage) -> ChatCompletionR
|
|
|
122
125
|
result["content"] = text_contents[0]
|
|
123
126
|
|
|
124
127
|
if tool_calls:
|
|
125
|
-
llamacpp_tool_calls:
|
|
128
|
+
llamacpp_tool_calls: list[ChatCompletionMessageToolCall] = []
|
|
126
129
|
for tc in tool_calls:
|
|
127
130
|
if tc.id is None:
|
|
128
131
|
msg = "`ToolCall` must have a non-null `id` attribute to be used with llama.cpp."
|
|
@@ -189,15 +192,15 @@ class LlamaCppChatGenerator:
|
|
|
189
192
|
def __init__(
|
|
190
193
|
self,
|
|
191
194
|
model: str,
|
|
192
|
-
n_ctx:
|
|
193
|
-
n_batch:
|
|
194
|
-
model_kwargs:
|
|
195
|
-
generation_kwargs:
|
|
195
|
+
n_ctx: int | None = 0,
|
|
196
|
+
n_batch: int | None = 512,
|
|
197
|
+
model_kwargs: dict[str, Any] | None = None,
|
|
198
|
+
generation_kwargs: dict[str, Any] | None = None,
|
|
196
199
|
*,
|
|
197
|
-
tools:
|
|
198
|
-
streaming_callback:
|
|
199
|
-
chat_handler_name:
|
|
200
|
-
model_clip_path:
|
|
200
|
+
tools: ToolsType | None = None,
|
|
201
|
+
streaming_callback: StreamingCallbackT | None = None,
|
|
202
|
+
chat_handler_name: str | None = None,
|
|
203
|
+
model_clip_path: str | None = None,
|
|
201
204
|
):
|
|
202
205
|
"""
|
|
203
206
|
:param model: The path of a quantized model for text generation, for example, "zephyr-7b-beta.Q4_0.gguf".
|
|
@@ -213,8 +216,8 @@ class LlamaCppChatGenerator:
|
|
|
213
216
|
For more information on the available kwargs, see
|
|
214
217
|
[llama.cpp documentation](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.create_chat_completion).
|
|
215
218
|
:param tools:
|
|
216
|
-
A list of
|
|
217
|
-
|
|
219
|
+
A list of Tool and/or Toolset objects, or a single Toolset for which the model can prepare calls.
|
|
220
|
+
Each tool should have a unique name.
|
|
218
221
|
:param streaming_callback: A callback function that is called when a new token is received from the stream.
|
|
219
222
|
:param chat_handler_name: Name of the chat handler for multimodal models.
|
|
220
223
|
Common options include: "Llava16ChatHandler", "MoondreamChatHandler", "Qwen25VLChatHandler".
|
|
@@ -233,9 +236,9 @@ class LlamaCppChatGenerator:
|
|
|
233
236
|
model_kwargs.setdefault("n_ctx", n_ctx)
|
|
234
237
|
model_kwargs.setdefault("n_batch", n_batch)
|
|
235
238
|
|
|
236
|
-
_check_duplicate_tool_names(
|
|
239
|
+
_check_duplicate_tool_names(flatten_tools_or_toolsets(tools))
|
|
237
240
|
|
|
238
|
-
handler:
|
|
241
|
+
handler: Llava15ChatHandler | None = None
|
|
239
242
|
# Validate multimodal requirements
|
|
240
243
|
if chat_handler_name is not None:
|
|
241
244
|
if model_clip_path is None:
|
|
@@ -253,7 +256,7 @@ class LlamaCppChatGenerator:
|
|
|
253
256
|
self.n_batch = n_batch
|
|
254
257
|
self.model_kwargs = model_kwargs
|
|
255
258
|
self.generation_kwargs = generation_kwargs
|
|
256
|
-
self._model:
|
|
259
|
+
self._model: Llama | None = None
|
|
257
260
|
self.tools = tools
|
|
258
261
|
self.streaming_callback = streaming_callback
|
|
259
262
|
self.chat_handler_name = chat_handler_name
|
|
@@ -276,7 +279,7 @@ class LlamaCppChatGenerator:
|
|
|
276
279
|
|
|
277
280
|
self._model = Llama(**kwargs)
|
|
278
281
|
|
|
279
|
-
def to_dict(self) ->
|
|
282
|
+
def to_dict(self) -> dict[str, Any]:
|
|
280
283
|
"""
|
|
281
284
|
Serializes the component to a dictionary.
|
|
282
285
|
|
|
@@ -298,7 +301,7 @@ class LlamaCppChatGenerator:
|
|
|
298
301
|
)
|
|
299
302
|
|
|
300
303
|
@classmethod
|
|
301
|
-
def from_dict(cls, data:
|
|
304
|
+
def from_dict(cls, data: dict[str, Any]) -> "LlamaCppChatGenerator":
|
|
302
305
|
"""
|
|
303
306
|
Deserializes the component from a dictionary.
|
|
304
307
|
|
|
@@ -317,15 +320,15 @@ class LlamaCppChatGenerator:
|
|
|
317
320
|
)
|
|
318
321
|
return default_from_dict(cls, data)
|
|
319
322
|
|
|
320
|
-
@component.output_types(replies=
|
|
323
|
+
@component.output_types(replies=list[ChatMessage])
|
|
321
324
|
def run(
|
|
322
325
|
self,
|
|
323
|
-
messages:
|
|
324
|
-
generation_kwargs:
|
|
326
|
+
messages: list[ChatMessage],
|
|
327
|
+
generation_kwargs: dict[str, Any] | None = None,
|
|
325
328
|
*,
|
|
326
|
-
tools:
|
|
327
|
-
streaming_callback:
|
|
328
|
-
) ->
|
|
329
|
+
tools: ToolsType | None = None,
|
|
330
|
+
streaming_callback: StreamingCallbackT | None = None,
|
|
331
|
+
) -> dict[str, list[ChatMessage]]:
|
|
329
332
|
"""
|
|
330
333
|
Run the text generation model on the given list of ChatMessages.
|
|
331
334
|
|
|
@@ -335,8 +338,9 @@ class LlamaCppChatGenerator:
|
|
|
335
338
|
For more information on the available kwargs, see
|
|
336
339
|
[llama.cpp documentation](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.create_chat_completion).
|
|
337
340
|
:param tools:
|
|
338
|
-
A list of
|
|
339
|
-
parameter set during
|
|
341
|
+
A list of Tool and/or Toolset objects, or a single Toolset for which the model can prepare calls.
|
|
342
|
+
Each tool should have a unique name. If set, it will override the `tools` parameter set during
|
|
343
|
+
component initialization.
|
|
340
344
|
:param streaming_callback: A callback function that is called when a new token is received from the stream.
|
|
341
345
|
If set, it will override the `streaming_callback` parameter set during component initialization.
|
|
342
346
|
:returns: A dictionary with the following keys:
|
|
@@ -353,13 +357,12 @@ class LlamaCppChatGenerator:
|
|
|
353
357
|
formatted_messages = [_convert_message_to_llamacpp_format(msg) for msg in messages]
|
|
354
358
|
|
|
355
359
|
tools = tools or self.tools
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
_check_duplicate_tool_names(tools)
|
|
360
|
+
flattened_tools = flatten_tools_or_toolsets(tools)
|
|
361
|
+
_check_duplicate_tool_names(flattened_tools)
|
|
359
362
|
|
|
360
|
-
llamacpp_tools:
|
|
361
|
-
if
|
|
362
|
-
for t in
|
|
363
|
+
llamacpp_tools: list[ChatCompletionTool] = []
|
|
364
|
+
if flattened_tools:
|
|
365
|
+
for t in flattened_tools:
|
|
363
366
|
llamacpp_tools.append(
|
|
364
367
|
{
|
|
365
368
|
"type": "function",
|
|
@@ -406,7 +409,7 @@ class LlamaCppChatGenerator:
|
|
|
406
409
|
response_stream: Iterator[CreateChatCompletionStreamResponse],
|
|
407
410
|
streaming_callback: SyncStreamingCallbackT,
|
|
408
411
|
component_info: ComponentInfo,
|
|
409
|
-
) ->
|
|
412
|
+
) -> dict[str, list[ChatMessage]]:
|
|
410
413
|
"""
|
|
411
414
|
Take streaming responses from llama.cpp, convert to Haystack StreamingChunk objects, stream them,
|
|
412
415
|
and finally convert them to a ChatMessage.
|
|
@@ -432,7 +435,9 @@ class LlamaCppChatGenerator:
|
|
|
432
435
|
|
|
433
436
|
if chunk.get("choices") and len(chunk["choices"]) > 0:
|
|
434
437
|
choice = chunk["choices"][0]
|
|
435
|
-
delta = choice.get(
|
|
438
|
+
delta: ChatCompletionStreamResponseDelta | ChatCompletionStreamResponseDeltaEmpty | dict = choice.get(
|
|
439
|
+
"delta", {}
|
|
440
|
+
)
|
|
436
441
|
|
|
437
442
|
finish_reason = choice.get("finish_reason")
|
|
438
443
|
mapped_finish_reason = FINISH_REASON_MAPPING.get(finish_reason or "")
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
from haystack import component, logging
|
|
4
4
|
|
|
@@ -28,10 +28,10 @@ class LlamaCppGenerator:
|
|
|
28
28
|
def __init__(
|
|
29
29
|
self,
|
|
30
30
|
model: str,
|
|
31
|
-
n_ctx:
|
|
32
|
-
n_batch:
|
|
33
|
-
model_kwargs:
|
|
34
|
-
generation_kwargs:
|
|
31
|
+
n_ctx: int | None = 0,
|
|
32
|
+
n_batch: int | None = 512,
|
|
33
|
+
model_kwargs: dict[str, Any] | None = None,
|
|
34
|
+
generation_kwargs: dict[str, Any] | None = None,
|
|
35
35
|
):
|
|
36
36
|
"""
|
|
37
37
|
:param model: The path of a quantized model for text generation, for example, "zephyr-7b-beta.Q4_0.gguf".
|
|
@@ -62,16 +62,16 @@ class LlamaCppGenerator:
|
|
|
62
62
|
self.n_batch = n_batch
|
|
63
63
|
self.model_kwargs = model_kwargs
|
|
64
64
|
self.generation_kwargs = generation_kwargs
|
|
65
|
-
self.model:
|
|
65
|
+
self.model: Llama | None = None
|
|
66
66
|
|
|
67
67
|
def warm_up(self):
|
|
68
68
|
if self.model is None:
|
|
69
69
|
self.model = Llama(**self.model_kwargs)
|
|
70
70
|
|
|
71
|
-
@component.output_types(replies=
|
|
71
|
+
@component.output_types(replies=list[str], meta=list[dict[str, Any]])
|
|
72
72
|
def run(
|
|
73
|
-
self, prompt: str, generation_kwargs:
|
|
74
|
-
) ->
|
|
73
|
+
self, prompt: str, generation_kwargs: dict[str, Any] | None = None
|
|
74
|
+
) -> dict[str, list[str] | list[dict[str, Any]]]:
|
|
75
75
|
"""
|
|
76
76
|
Run the text generation model on the given prompt.
|
|
77
77
|
|
|
@@ -723,6 +723,59 @@ class TestLlamaCppChatGenerator:
|
|
|
723
723
|
generator = LlamaCppChatGenerator(model="test_model.gguf", tools=toolset)
|
|
724
724
|
assert generator.tools == toolset
|
|
725
725
|
|
|
726
|
+
def test_init_with_mixed_tools(self, temperature_tool):
|
|
727
|
+
"""Test initialization with mixed Tool and Toolset objects."""
|
|
728
|
+
|
|
729
|
+
def population(city: str):
|
|
730
|
+
"""Get population for a given city."""
|
|
731
|
+
return f"The population of {city} is 2.2 million"
|
|
732
|
+
|
|
733
|
+
population_tool = create_tool_from_function(population)
|
|
734
|
+
toolset = Toolset([population_tool])
|
|
735
|
+
|
|
736
|
+
generator = LlamaCppChatGenerator(model="test_model.gguf", tools=[temperature_tool, toolset])
|
|
737
|
+
assert generator.tools == [temperature_tool, toolset]
|
|
738
|
+
|
|
739
|
+
def test_run_with_mixed_tools(self, temperature_tool):
|
|
740
|
+
"""Test run method with mixed Tool and Toolset objects."""
|
|
741
|
+
|
|
742
|
+
def population(city: str):
|
|
743
|
+
"""Get population for a given city."""
|
|
744
|
+
return f"The population of {city} is 2.2 million"
|
|
745
|
+
|
|
746
|
+
population_tool = create_tool_from_function(population)
|
|
747
|
+
toolset = Toolset([population_tool])
|
|
748
|
+
|
|
749
|
+
generator = LlamaCppChatGenerator(model="test_model.gguf")
|
|
750
|
+
|
|
751
|
+
# Mock the model
|
|
752
|
+
mock_model = MagicMock()
|
|
753
|
+
mock_response = {
|
|
754
|
+
"choices": [{"message": {"content": "Generated text"}, "index": 0, "finish_reason": "stop"}],
|
|
755
|
+
"id": "test_id",
|
|
756
|
+
"model": "test_model",
|
|
757
|
+
"created": 1234567890,
|
|
758
|
+
"usage": {"prompt_tokens": 10, "completion_tokens": 5},
|
|
759
|
+
}
|
|
760
|
+
mock_model.create_chat_completion.return_value = mock_response
|
|
761
|
+
generator._model = mock_model
|
|
762
|
+
|
|
763
|
+
generator.run(
|
|
764
|
+
messages=[ChatMessage.from_user("What's the weather in Paris and population of Berlin?")],
|
|
765
|
+
tools=[temperature_tool, toolset],
|
|
766
|
+
)
|
|
767
|
+
|
|
768
|
+
# Verify the model was called with the correct tools
|
|
769
|
+
mock_model.create_chat_completion.assert_called_once()
|
|
770
|
+
call_args = mock_model.create_chat_completion.call_args[1]
|
|
771
|
+
assert "tools" in call_args
|
|
772
|
+
assert len(call_args["tools"]) == 2 # Both tools should be flattened
|
|
773
|
+
|
|
774
|
+
# Verify tool names
|
|
775
|
+
tool_names = {tool["function"]["name"] for tool in call_args["tools"]}
|
|
776
|
+
assert "get_current_temperature" in tool_names
|
|
777
|
+
assert "population" in tool_names
|
|
778
|
+
|
|
726
779
|
def test_init_with_multimodal_params(self):
|
|
727
780
|
"""Test initialization with multimodal parameters."""
|
|
728
781
|
generator = LlamaCppChatGenerator(
|
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
loaders:
|
|
2
|
-
- type: haystack_pydoc_tools.loaders.CustomPythonLoader
|
|
3
|
-
search_path: [../src]
|
|
4
|
-
modules: [
|
|
5
|
-
"haystack_integrations.components.generators.llama_cpp.generator",
|
|
6
|
-
]
|
|
7
|
-
ignore_when_discovered: ["__init__"]
|
|
8
|
-
processors:
|
|
9
|
-
- type: filter
|
|
10
|
-
expression:
|
|
11
|
-
documented_only: true
|
|
12
|
-
do_not_filter_modules: false
|
|
13
|
-
skip_empty_modules: true
|
|
14
|
-
- type: smart
|
|
15
|
-
- type: crossref
|
|
16
|
-
renderer:
|
|
17
|
-
type: haystack_pydoc_tools.renderers.ReadmeIntegrationRenderer
|
|
18
|
-
excerpt: Llama.cpp integration for Haystack
|
|
19
|
-
category_slug: integrations-api
|
|
20
|
-
title: Llama.cpp
|
|
21
|
-
slug: integrations-llama-cpp
|
|
22
|
-
order: 140
|
|
23
|
-
markdown:
|
|
24
|
-
descriptive_class_title: false
|
|
25
|
-
classdef_code_block: false
|
|
26
|
-
descriptive_module_title: true
|
|
27
|
-
add_method_class_prefix: true
|
|
28
|
-
add_member_class_prefix: false
|
|
29
|
-
filename: _readme_llama_cpp.md
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{llama_cpp_haystack-1.3.0 → llama_cpp_haystack-2.0.0}/examples/llama_cpp_generator_example.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|