PyPI - llama-cpp-haystack - Versions diffs - 2.0.0__tar.gz → 2.1.0__tar.gz - Mend

llama-cpp-haystack 2.0.0tar.gz → 2.1.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

{llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/CHANGELOG.md RENAMED Viewed

@@ -1,5 +1,21 @@
 # Changelog
+## [integrations/llama_cpp-v2.0.0] - 2026-01-12
+### ⚙️ CI
+- Change pytest command (#2475)
+### 🧹 Chores
+- Remove Readme API CI workflow and configs (#2573)
+- Make fmt command more forgiving (#2671)
+- [**breaking**] Llama_cpp - drop Python 3.9 and use X|Y typing (#2710)
+### 🌀 Miscellaneous
+- Enhancement: Adopt PEP 585 type hinting (part 4) (#2527)
 ## [integrations/llama_cpp-v1.4.0] - 2025-10-23
 ### 📚 Documentation

{llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/LICENSE.txt RENAMED Viewed

@@ -58,7 +58,7 @@ APPENDIX: How to apply the Apache License to your work.
 To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!)  The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives.
-Copyright [yyyy] [name of copyright owner]
+Copyright 2024 deepset GmbH
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.

{llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: llama-cpp-haystack
-Version: 2.0.0
+Version: 2.1.0
 Summary: An integration between the llama.cpp LLM framework and Haystack
 Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/llama_cpp#readme
 Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues

{llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/src/haystack_integrations/components/generators/llama_cpp/chat/chat_generator.py RENAMED Viewed

@@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
 import json
 from collections.abc import Iterator
 from datetime import datetime, timezone
@@ -201,7 +205,7 @@ class LlamaCppChatGenerator:
         streaming_callback: StreamingCallbackT | None = None,
         chat_handler_name: str | None = None,
         model_clip_path: str | None = None,
-    ):
+    ) -> None:
         """
         :param model: The path of a quantized model for text generation, for example, "zephyr-7b-beta.Q4_0.gguf".
             If the model path is also specified in the `model_kwargs`, this parameter will be ignored.
@@ -263,7 +267,7 @@ class LlamaCppChatGenerator:
         self.model_clip_path = model_clip_path
         self._handler = handler
-    def warm_up(self):
+    def warm_up(self) -> None:
         if self._model is not None:
             return
@@ -347,8 +351,7 @@ class LlamaCppChatGenerator:
             - `replies`: The responses from the model
         """
         if self._model is None:
-            error_msg = "The model has not been loaded. Please call warm_up() before running."
-            raise RuntimeError(error_msg)
+            self.warm_up()
         if not messages:
             return {"replies": []}
@@ -381,7 +384,7 @@ class LlamaCppChatGenerator:
         )
         if streaming_callback:
-            response_stream = self._model.create_chat_completion(
+            response_stream = self._model.create_chat_completion(  # type: ignore[union-attr]
                 messages=formatted_messages, tools=llamacpp_tools, **updated_generation_kwargs, stream=True
             )
             return self._handle_streaming_response(
@@ -391,7 +394,7 @@ class LlamaCppChatGenerator:
             )  # we know that response_stream is Iterator[CreateChatCompletionStreamResponse]
             # because create_chat_completion was called with stream=True, but mypy doesn't know that
-        response = self._model.create_chat_completion(
+        response = self._model.create_chat_completion(  # type: ignore[union-attr]
             messages=formatted_messages, tools=llamacpp_tools, **updated_generation_kwargs
         )
         replies = []
@@ -399,8 +402,11 @@ class LlamaCppChatGenerator:
             msg = f"Expected a dictionary response, got a different object: {response}"
             raise ValueError(msg)
-        for choice in response["choices"]:
-            chat_message = self._convert_chat_completion_choice_to_chat_message(choice, response)
+        for choice in response["choices"]:  # type: ignore[index]
+            chat_message = self._convert_chat_completion_choice_to_chat_message(
+                choice,
+                response,  # type: ignore[arg-type]
+            )
             replies.append(chat_message)
         return {"replies": replies}

{llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/src/haystack_integrations/components/generators/llama_cpp/generator.py RENAMED Viewed

@@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
 from typing import Any
 from haystack import component, logging
@@ -32,7 +36,7 @@ class LlamaCppGenerator:
         n_batch: int | None = 512,
         model_kwargs: dict[str, Any] | None = None,
         generation_kwargs: dict[str, Any] | None = None,
-    ):
+    ) -> None:
         """
         :param model: The path of a quantized model for text generation, for example, "zephyr-7b-beta.Q4_0.gguf".
             If the model path is also specified in the `model_kwargs`, this parameter will be ignored.
@@ -64,7 +68,7 @@ class LlamaCppGenerator:
         self.generation_kwargs = generation_kwargs
         self.model: Llama | None = None
-    def warm_up(self):
+    def warm_up(self) -> None:
         if self.model is None:
             self.model = Llama(**self.model_kwargs)
@@ -84,8 +88,7 @@ class LlamaCppGenerator:
             - `meta`: metadata about the request.
         """
         if self.model is None:
-            error_msg = "The model has not been loaded. Please call warm_up() before running."
-            raise RuntimeError(error_msg)
+            self.warm_up()
         if not prompt:
             return {"replies": []}
@@ -93,7 +96,7 @@ class LlamaCppGenerator:
         # merge generation kwargs from init method with those from run method
         updated_generation_kwargs = {**self.generation_kwargs, **(generation_kwargs or {})}
-        output = self.model.create_completion(prompt=prompt, **updated_generation_kwargs)
+        output = self.model.create_completion(prompt=prompt, **updated_generation_kwargs)  # type: ignore[union-attr]
         if not isinstance(output, dict):
             msg = f"Expected a dictionary response, got a different object: {output}"
             raise ValueError(msg)

{llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/tests/test_chat_generator.py RENAMED Viewed

@@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
 import json
 import os
 import urllib.request
@@ -678,7 +682,6 @@ class TestLlamaCppChatGenerator:
         model_path = str(model_path / filename)
         generator = LlamaCppChatGenerator(model=model_path, n_ctx=8192, n_batch=512)
-        generator.warm_up()
         return generator
     @pytest.fixture
@@ -898,14 +901,6 @@ class TestLlamaCppChatGenerator:
         )
         assert generator.model_kwargs["n_batch"] == 1024
-    def test_raises_error_without_warm_up(self):
-        """
-        Test that the generator raises an error if warm_up() is not called before running.
-        """
-        generator = LlamaCppChatGenerator(model="test_model.gguf", n_ctx=512, n_batch=512)
-        with pytest.raises(RuntimeError):
-            generator.run("What is the capital of China?")
     def test_run_with_empty_message(self, generator_mock):
         """
         Test that an empty message returns an empty list of replies.
@@ -1179,7 +1174,6 @@ class TestLlamaCppChatGeneratorFunctionary:
                 "hf_tokenizer_path": hf_tokenizer_path,
             },
         )
-        generator.warm_up()
         return generator
     @pytest.mark.integration
@@ -1260,7 +1254,6 @@ class TestLlamaCppChatGeneratorChatML:
                 "chat_format": "chatml-function-calling",
             },
         )
-        generator.warm_up()
         return generator
     @pytest.mark.integration
@@ -1326,8 +1319,6 @@ class TestLlamaCppChatGeneratorChatML:
             generation_kwargs={"max_tokens": 50, "temperature": 0.1},
         )
-        generator.warm_up()
         result = generator.run(messages)
         assert "replies" in result

{llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/tests/test_generator.py RENAMED Viewed

@@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
 import os
 import urllib.request
 from pathlib import Path
@@ -42,7 +46,6 @@ class TestLlamaCppGenerator:
         model_path = str(model_path / filename)
         generator = LlamaCppGenerator(model=model_path, n_ctx=128, n_batch=128)
-        generator.warm_up()
         return generator
     @pytest.fixture
@@ -106,14 +109,6 @@ class TestLlamaCppGenerator:
         generator = LlamaCppGenerator(model="test_model.gguf", n_ctx=512, n_batch=512, model_kwargs={"n_batch": 1024})
         assert generator.model_kwargs["n_batch"] == 1024
-    def test_raises_error_without_warm_up(self):
-        """
-        Test that the generator raises an error if warm_up() is not called before running.
-        """
-        generator = LlamaCppGenerator(model="test_model.gguf", n_ctx=512, n_batch=512)
-        with pytest.raises(RuntimeError):
-            generator.run("What is the capital of China?")
     def test_run_with_empty_prompt(self, generator_mock):
         """
         Test that an empty prompt returns an empty list of replies.