PyPI - llama-cpp-haystack - Versions diffs - 1.3.0__tar.gz → 2.0.0__tar.gz - Mend

llama-cpp-haystack 1.3.0tar.gz → 2.0.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

{llama_cpp_haystack-1.3.0 → llama_cpp_haystack-2.0.0}/CHANGELOG.md RENAMED Viewed

@@ -1,5 +1,34 @@
 # Changelog
+## [integrations/llama_cpp-v1.4.0] - 2025-10-23
+### 📚 Documentation
+- Add pydoc configurations for Docusaurus (#2411)
+### ⚙️ CI
+- Download pre-built wheels for llama-cpp-python on macOS (#2235)
+### 🧹 Chores
+- Fix llama.cpp types (#2271)
+### 🌀 Miscellaneous
+- Feat: `LlamaCppChatGenerator` update tools param to ToolsType (#2438)
+## [integrations/llama_cpp-v1.3.0] - 2025-08-22
+### 🚀 Features
+- Add image support to LlamaCppChatGenerator (#2197)
+### 🧹 Chores
+- Standardize readmes - part 2 (#2205)
 ## [integrations/llama_cpp-v1.2.0] - 2025-07-28
 ### 🚀 Features

{llama_cpp_haystack-1.3.0 → llama_cpp_haystack-2.0.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: llama-cpp-haystack
-Version: 1.3.0
+Version: 2.0.0
 Summary: An integration between the llama.cpp LLM framework and Haystack
 Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/llama_cpp#readme
 Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
@@ -12,15 +12,14 @@ License-File: LICENSE.txt
 Classifier: Development Status :: 4 - Beta
 Classifier: License :: OSI Approved :: Apache Software License
 Classifier: Programming Language :: Python
-Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
 Classifier: Programming Language :: Python :: Implementation :: CPython
 Classifier: Programming Language :: Python :: Implementation :: PyPy
-Requires-Python: >=3.9
-Requires-Dist: haystack-ai>=2.16.1
+Requires-Python: >=3.10
+Requires-Dist: haystack-ai>=2.22.0
 Requires-Dist: llama-cpp-python>=0.2.87
 Description-Content-Type: text/markdown

llama_cpp_haystack-2.0.0/pydoc/config_docusaurus.yml ADDED Viewed

@@ -0,0 +1,28 @@
+loaders:
+- ignore_when_discovered:
+  - __init__
+  modules:
+  - haystack_integrations.components.generators.llama_cpp.generator
+  search_path:
+  - ../src
+  type: haystack_pydoc_tools.loaders.CustomPythonLoader
+processors:
+- do_not_filter_modules: false
+  documented_only: true
+  expression: null
+  skip_empty_modules: true
+  type: filter
+- type: smart
+- type: crossref
+renderer:
+  description: Llama.cpp integration for Haystack
+  id: integrations-llama-cpp
+  markdown:
+    add_member_class_prefix: false
+    add_method_class_prefix: true
+    classdef_code_block: false
+    descriptive_class_title: false
+    descriptive_module_title: true
+    filename: llama_cpp.md
+  title: Llama.cpp
+  type: haystack_pydoc_tools.renderers.DocusaurusRenderer

{llama_cpp_haystack-1.3.0 → llama_cpp_haystack-2.0.0}/pyproject.toml RENAMED Viewed

@@ -7,7 +7,7 @@ name = "llama-cpp-haystack"
 dynamic = ["version"]
 description = 'An integration between the llama.cpp LLM framework and Haystack'
 readme = "README.md"
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 license = "Apache-2.0"
 keywords = []
 authors = [
@@ -18,7 +18,6 @@ classifiers = [
     "License :: OSI Approved :: Apache Software License",
     "Development Status :: 4 - Beta",
     "Programming Language :: Python",
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
@@ -26,7 +25,15 @@ classifiers = [
     "Programming Language :: Python :: Implementation :: CPython",
     "Programming Language :: Python :: Implementation :: PyPy",
 ]
-dependencies = ["haystack-ai>=2.16.1", "llama-cpp-python>=0.2.87"]
+dependencies = ["haystack-ai>=2.22.0", "llama-cpp-python>=0.2.87"]
+# On macOS GitHub runners, we use a custom index to download pre-built wheels.
+# Installing from source might fail due to missing dependencies (CMake fails with "OpenMP not found")
+[tool.uv]
+index-strategy = "unsafe-best-match"  # this ensures that packages are not only searched in the below index but also in PyPI
+[[tool.uv.index]]
+name = "llama-cpp-python-macos"
+url = "https://abetlen.github.io/llama-cpp-python/whl/metal/"
 [project.urls]
 Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/llama_cpp#readme"
@@ -48,8 +55,8 @@ git_describe_command = 'git describe --tags --match="integrations/llama_cpp-v[0-
 installer = "uv"
 dependencies = ["haystack-pydoc-tools", "ruff"]
 [tool.hatch.envs.default.scripts]
-docs = ["pydoc-markdown pydoc/config.yml"]
-fmt = "ruff check --fix {args} && ruff format {args}"
+docs = ["pydoc-markdown pydoc/config_docusaurus.yml"]
+fmt = "ruff check --fix {args}; ruff format {args}"
 fmt-check = "ruff check {args} && ruff format --check {args}"
 [tool.hatch.envs.test]
@@ -67,7 +74,7 @@ dependencies = [
 unit = 'pytest -m "not integration" {args:tests}'
 integration = 'pytest -m "integration" {args:tests}'
 all = 'pytest {args:tests}'
-cov-retry = 'all --cov=haystack_integrations --reruns 3 --reruns-delay 30 -x'
+cov-retry = 'pytest --cov=haystack_integrations --reruns 3 --reruns-delay 30 -x {args:tests}'
 types = "mypy -p haystack_integrations.components.generators.llama_cpp {args}"
 [tool.mypy]
@@ -84,7 +91,6 @@ known-first-party = ["haystack_integrations"]
 [tool.ruff]
-target-version = "py38"
 line-length = 120
 [tool.ruff.lint]
@@ -128,10 +134,6 @@ ignore = [
     "PLR0913",
     "PLR0915",
 ]
-unfixable = [
-    # Don't touch unused imports
-    "F401",
-]
 [tool.ruff.lint.flake8-tidy-imports]
 ban-relative-imports = "parents"

{llama_cpp_haystack-1.3.0 → llama_cpp_haystack-2.0.0}/src/haystack_integrations/components/generators/llama_cpp/chat/chat_generator.py RENAMED Viewed

@@ -1,6 +1,7 @@
 import json
+from collections.abc import Iterator
 from datetime import datetime, timezone
-from typing import Any, Dict, Iterator, List, Optional, Union
+from typing import Any
 from haystack import component, default_from_dict, default_to_dict, logging
 from haystack.components.generators.utils import _convert_streaming_chunks_to_chat_message
@@ -16,10 +17,10 @@ from haystack.dataclasses import (
 )
 from haystack.dataclasses.streaming_chunk import FinishReason, StreamingChunk, SyncStreamingCallbackT
 from haystack.tools import (
-    Tool,
-    Toolset,
+    ToolsType,
     _check_duplicate_tool_names,
     deserialize_tools_or_toolset_inplace,
+    flatten_tools_or_toolsets,
     serialize_tools_or_toolset,
 )
 from haystack.utils import deserialize_callable, serialize_callable
@@ -29,6 +30,8 @@ from llama_cpp import (
     ChatCompletionRequestMessage,
     ChatCompletionRequestMessageContentPart,
     ChatCompletionResponseChoice,
+    ChatCompletionStreamResponseDelta,
+    ChatCompletionStreamResponseDeltaEmpty,
     ChatCompletionTool,
     CreateChatCompletionResponse,
     CreateChatCompletionStreamResponse,
@@ -40,7 +43,7 @@ from llama_cpp.llama_tokenizer import LlamaHFTokenizer
 logger = logging.getLogger(__name__)
-FINISH_REASON_MAPPING: Dict[str, FinishReason] = {
+FINISH_REASON_MAPPING: dict[str, FinishReason] = {
     "stop": "stop",
     "length": "length",
     "tool_calls": "tool_calls",
@@ -122,7 +125,7 @@ def _convert_message_to_llamacpp_format(message: ChatMessage) -> ChatCompletionR
             result["content"] = text_contents[0]
         if tool_calls:
-            llamacpp_tool_calls: List[ChatCompletionMessageToolCall] = []
+            llamacpp_tool_calls: list[ChatCompletionMessageToolCall] = []
             for tc in tool_calls:
                 if tc.id is None:
                     msg = "`ToolCall` must have a non-null `id` attribute to be used with llama.cpp."
@@ -189,15 +192,15 @@ class LlamaCppChatGenerator:
     def __init__(
         self,
         model: str,
-        n_ctx: Optional[int] = 0,
-        n_batch: Optional[int] = 512,
-        model_kwargs: Optional[Dict[str, Any]] = None,
-        generation_kwargs: Optional[Dict[str, Any]] = None,
+        n_ctx: int | None = 0,
+        n_batch: int | None = 512,
+        model_kwargs: dict[str, Any] | None = None,
+        generation_kwargs: dict[str, Any] | None = None,
         *,
-        tools: Optional[Union[List[Tool], Toolset]] = None,
-        streaming_callback: Optional[StreamingCallbackT] = None,
-        chat_handler_name: Optional[str] = None,
-        model_clip_path: Optional[str] = None,
+        tools: ToolsType | None = None,
+        streaming_callback: StreamingCallbackT | None = None,
+        chat_handler_name: str | None = None,
+        model_clip_path: str | None = None,
     ):
         """
         :param model: The path of a quantized model for text generation, for example, "zephyr-7b-beta.Q4_0.gguf".
@@ -213,8 +216,8 @@ class LlamaCppChatGenerator:
             For more information on the available kwargs, see
             [llama.cpp documentation](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.create_chat_completion).
         :param tools:
-            A list of tools or a Toolset for which the model can prepare calls.
-            This parameter can accept either a list of `Tool` objects or a `Toolset` instance.
+            A list of Tool and/or Toolset objects, or a single Toolset for which the model can prepare calls.
+            Each tool should have a unique name.
         :param streaming_callback: A callback function that is called when a new token is received from the stream.
         :param chat_handler_name: Name of the chat handler for multimodal models.
             Common options include: "Llava16ChatHandler", "MoondreamChatHandler", "Qwen25VLChatHandler".
@@ -233,9 +236,9 @@ class LlamaCppChatGenerator:
         model_kwargs.setdefault("n_ctx", n_ctx)
         model_kwargs.setdefault("n_batch", n_batch)
-        _check_duplicate_tool_names(list(tools or []))
+        _check_duplicate_tool_names(flatten_tools_or_toolsets(tools))
-        handler: Optional[Llava15ChatHandler] = None
+        handler: Llava15ChatHandler | None = None
         # Validate multimodal requirements
         if chat_handler_name is not None:
             if model_clip_path is None:
@@ -253,7 +256,7 @@ class LlamaCppChatGenerator:
         self.n_batch = n_batch
         self.model_kwargs = model_kwargs
         self.generation_kwargs = generation_kwargs
-        self._model: Optional[Llama] = None
+        self._model: Llama | None = None
         self.tools = tools
         self.streaming_callback = streaming_callback
         self.chat_handler_name = chat_handler_name
@@ -276,7 +279,7 @@ class LlamaCppChatGenerator:
         self._model = Llama(**kwargs)
-    def to_dict(self) -> Dict[str, Any]:
+    def to_dict(self) -> dict[str, Any]:
         """
         Serializes the component to a dictionary.
@@ -298,7 +301,7 @@ class LlamaCppChatGenerator:
         )
     @classmethod
-    def from_dict(cls, data: Dict[str, Any]) -> "LlamaCppChatGenerator":
+    def from_dict(cls, data: dict[str, Any]) -> "LlamaCppChatGenerator":
         """
         Deserializes the component from a dictionary.
@@ -317,15 +320,15 @@ class LlamaCppChatGenerator:
             )
         return default_from_dict(cls, data)
-    @component.output_types(replies=List[ChatMessage])
+    @component.output_types(replies=list[ChatMessage])
     def run(
         self,
-        messages: List[ChatMessage],
-        generation_kwargs: Optional[Dict[str, Any]] = None,
+        messages: list[ChatMessage],
+        generation_kwargs: dict[str, Any] | None = None,
         *,
-        tools: Optional[Union[List[Tool], Toolset]] = None,
-        streaming_callback: Optional[StreamingCallbackT] = None,
-    ) -> Dict[str, List[ChatMessage]]:
+        tools: ToolsType | None = None,
+        streaming_callback: StreamingCallbackT | None = None,
+    ) -> dict[str, list[ChatMessage]]:
         """
         Run the text generation model on the given list of ChatMessages.
@@ -335,8 +338,9 @@ class LlamaCppChatGenerator:
             For more information on the available kwargs, see
             [llama.cpp documentation](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.create_chat_completion).
         :param tools:
-            A list of tools or a Toolset for which the model can prepare calls. If set, it will override the `tools`
-            parameter set during component initialization.
+            A list of Tool and/or Toolset objects, or a single Toolset for which the model can prepare calls.
+            Each tool should have a unique name. If set, it will override the `tools` parameter set during
+            component initialization.
         :param streaming_callback: A callback function that is called when a new token is received from the stream.
             If set, it will override the `streaming_callback` parameter set during component initialization.
         :returns: A dictionary with the following keys:
@@ -353,13 +357,12 @@ class LlamaCppChatGenerator:
         formatted_messages = [_convert_message_to_llamacpp_format(msg) for msg in messages]
         tools = tools or self.tools
-        if isinstance(tools, Toolset):
-            tools = list(tools)
-        _check_duplicate_tool_names(tools)
+        flattened_tools = flatten_tools_or_toolsets(tools)
+        _check_duplicate_tool_names(flattened_tools)
-        llamacpp_tools: List[ChatCompletionTool] = []
-        if tools:
-            for t in tools:
+        llamacpp_tools: list[ChatCompletionTool] = []
+        if flattened_tools:
+            for t in flattened_tools:
                 llamacpp_tools.append(
                     {
                         "type": "function",
@@ -406,7 +409,7 @@ class LlamaCppChatGenerator:
         response_stream: Iterator[CreateChatCompletionStreamResponse],
         streaming_callback: SyncStreamingCallbackT,
         component_info: ComponentInfo,
-    ) -> Dict[str, List[ChatMessage]]:
+    ) -> dict[str, list[ChatMessage]]:
         """
         Take streaming responses from llama.cpp, convert to Haystack StreamingChunk objects, stream them,
         and finally convert them to a ChatMessage.
@@ -432,7 +435,9 @@ class LlamaCppChatGenerator:
             if chunk.get("choices") and len(chunk["choices"]) > 0:
                 choice = chunk["choices"][0]
-                delta = choice.get("delta", {})
+                delta: ChatCompletionStreamResponseDelta | ChatCompletionStreamResponseDeltaEmpty | dict = choice.get(
+                    "delta", {}
+                )
                 finish_reason = choice.get("finish_reason")
                 mapped_finish_reason = FINISH_REASON_MAPPING.get(finish_reason or "")

{llama_cpp_haystack-1.3.0 → llama_cpp_haystack-2.0.0}/src/haystack_integrations/components/generators/llama_cpp/generator.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from typing import Any, Dict, List, Optional, Union
+from typing import Any
 from haystack import component, logging
@@ -28,10 +28,10 @@ class LlamaCppGenerator:
     def __init__(
         self,
         model: str,
-        n_ctx: Optional[int] = 0,
-        n_batch: Optional[int] = 512,
-        model_kwargs: Optional[Dict[str, Any]] = None,
-        generation_kwargs: Optional[Dict[str, Any]] = None,
+        n_ctx: int | None = 0,
+        n_batch: int | None = 512,
+        model_kwargs: dict[str, Any] | None = None,
+        generation_kwargs: dict[str, Any] | None = None,
     ):
         """
         :param model: The path of a quantized model for text generation, for example, "zephyr-7b-beta.Q4_0.gguf".
@@ -62,16 +62,16 @@ class LlamaCppGenerator:
         self.n_batch = n_batch
         self.model_kwargs = model_kwargs
         self.generation_kwargs = generation_kwargs
-        self.model: Optional[Llama] = None
+        self.model: Llama | None = None
     def warm_up(self):
         if self.model is None:
             self.model = Llama(**self.model_kwargs)
-    @component.output_types(replies=List[str], meta=List[Dict[str, Any]])
+    @component.output_types(replies=list[str], meta=list[dict[str, Any]])
     def run(
-        self, prompt: str, generation_kwargs: Optional[Dict[str, Any]] = None
-    ) -> Dict[str, Union[List[str], List[Dict[str, Any]]]]:
+        self, prompt: str, generation_kwargs: dict[str, Any] | None = None
+    ) -> dict[str, list[str] | list[dict[str, Any]]]:
         """
         Run the text generation model on the given prompt.

{llama_cpp_haystack-1.3.0 → llama_cpp_haystack-2.0.0}/tests/test_chat_generator.py RENAMED Viewed

@@ -723,6 +723,59 @@ class TestLlamaCppChatGenerator:
         generator = LlamaCppChatGenerator(model="test_model.gguf", tools=toolset)
         assert generator.tools == toolset
+    def test_init_with_mixed_tools(self, temperature_tool):
+        """Test initialization with mixed Tool and Toolset objects."""
+        def population(city: str):
+            """Get population for a given city."""
+            return f"The population of {city} is 2.2 million"
+        population_tool = create_tool_from_function(population)
+        toolset = Toolset([population_tool])
+        generator = LlamaCppChatGenerator(model="test_model.gguf", tools=[temperature_tool, toolset])
+        assert generator.tools == [temperature_tool, toolset]
+    def test_run_with_mixed_tools(self, temperature_tool):
+        """Test run method with mixed Tool and Toolset objects."""
+        def population(city: str):
+            """Get population for a given city."""
+            return f"The population of {city} is 2.2 million"
+        population_tool = create_tool_from_function(population)
+        toolset = Toolset([population_tool])
+        generator = LlamaCppChatGenerator(model="test_model.gguf")
+        # Mock the model
+        mock_model = MagicMock()
+        mock_response = {
+            "choices": [{"message": {"content": "Generated text"}, "index": 0, "finish_reason": "stop"}],
+            "id": "test_id",
+            "model": "test_model",
+            "created": 1234567890,
+            "usage": {"prompt_tokens": 10, "completion_tokens": 5},
+        }
+        mock_model.create_chat_completion.return_value = mock_response
+        generator._model = mock_model
+        generator.run(
+            messages=[ChatMessage.from_user("What's the weather in Paris and population of Berlin?")],
+            tools=[temperature_tool, toolset],
+        )
+        # Verify the model was called with the correct tools
+        mock_model.create_chat_completion.assert_called_once()
+        call_args = mock_model.create_chat_completion.call_args[1]
+        assert "tools" in call_args
+        assert len(call_args["tools"]) == 2  # Both tools should be flattened
+        # Verify tool names
+        tool_names = {tool["function"]["name"] for tool in call_args["tools"]}
+        assert "get_current_temperature" in tool_names
+        assert "population" in tool_names
     def test_init_with_multimodal_params(self):
         """Test initialization with multimodal parameters."""
         generator = LlamaCppChatGenerator(

llama_cpp_haystack-1.3.0/pydoc/config.yml DELETED Viewed

@@ -1,29 +0,0 @@
-loaders:
-  - type: haystack_pydoc_tools.loaders.CustomPythonLoader
-    search_path: [../src]
-    modules: [
-      "haystack_integrations.components.generators.llama_cpp.generator",
-    ]
-    ignore_when_discovered: ["__init__"]
-processors:
-  - type: filter
-    expression:
-    documented_only: true
-    do_not_filter_modules: false
-    skip_empty_modules: true
-  - type: smart
-  - type: crossref
-renderer:
-  type: haystack_pydoc_tools.renderers.ReadmeIntegrationRenderer
-  excerpt: Llama.cpp integration for Haystack
-  category_slug: integrations-api
-  title: Llama.cpp
-  slug: integrations-llama-cpp
-  order: 140
-  markdown:
-    descriptive_class_title: false
-    classdef_code_block: false
-    descriptive_module_title: true
-    add_method_class_prefix: true
-    add_member_class_prefix: false
-    filename: _readme_llama_cpp.md