PyPI - llama-cpp-haystack - Versions diffs - 0.4.0__tar.gz → 0.4.2__tar.gz - Mend

llama-cpp-haystack 0.4.0tar.gz → 0.4.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

{llama_cpp_haystack-0.4.0 → llama_cpp_haystack-0.4.2}/.gitignore RENAMED Viewed

@@ -135,3 +135,12 @@ dmypy.json
 # Docs generation artifacts
 _readme_*.md
 .idea
+# macOS
+.DS_Store
+# http cache (requests-cache)
+**/http_cache.sqlite
+# ruff
+.ruff_cache

llama_cpp_haystack-0.4.2/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,50 @@
+# Changelog
+## [integrations/llama_cpp-v0.4.1] - 2024-08-08
+### 🐛 Bug Fixes
+- Replace DynamicChatPromptBuilder with ChatPromptBuilder (#940)
+### ⚙️ Miscellaneous Tasks
+- Retry tests to reduce flakyness (#836)
+- Update ruff invocation to include check parameter (#853)
+- Pin `llama-cpp-python>=0.2.87` (#955)
+## [integrations/llama_cpp-v0.4.0] - 2024-05-13
+### 🐛 Bug Fixes
+- Fix commit (#436)
+- Fix order of API docs (#447)
+This PR will also push the docs to Readme
+### 📚 Documentation
+- Update category slug (#442)
+- Small consistency improvements (#536)
+- Disable-class-def (#556)
+### ⚙️ Miscellaneous Tasks
+- [**breaking**] Rename model_path to model in the Llama.cpp integration (#243)
+### Llama.cpp
+- Generate api docs (#353)
+## [integrations/llama_cpp-v0.2.1] - 2024-01-18
+## [integrations/llama_cpp-v0.2.0] - 2024-01-17
+## [integrations/llama_cpp-v0.1.0] - 2024-01-09
+### 🚀 Features
+- Add Llama.cpp Generator (#179)
+<!-- generated by git-cliff -->

{llama_cpp_haystack-0.4.0 → llama_cpp_haystack-0.4.2}/PKG-INFO RENAMED Viewed

@@ -1,14 +1,13 @@
 Metadata-Version: 2.3
 Name: llama-cpp-haystack
-Version: 0.4.0
+Version: 0.4.2
 Summary: An integration between the llama.cpp LLM framework and Haystack
 Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/llama_cpp#readme
 Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
 Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/llama_cpp
 Author: Ashwin Mathur
 Author-email: deepset GmbH <info@deepset.ai>
-License-Expression: Apache-2.0
-License-File: LICENSE.txt
+License: Apache-2.0
 Classifier: Development Status :: 4 - Beta
 Classifier: License :: OSI Approved :: Apache Software License
 Classifier: Programming Language :: Python
@@ -21,7 +20,7 @@ Classifier: Programming Language :: Python :: Implementation :: CPython
 Classifier: Programming Language :: Python :: Implementation :: PyPy
 Requires-Python: >=3.8
 Requires-Dist: haystack-ai
-Requires-Dist: llama-cpp-python
+Requires-Dist: llama-cpp-python>=0.2.87
 Description-Content-Type: text/markdown
 # llama-cpp-haystack

{llama_cpp_haystack-0.4.0 → llama_cpp_haystack-0.4.2}/examples/rag_pipeline_example.py RENAMED Viewed

@@ -6,6 +6,7 @@ from haystack.components.embedders import SentenceTransformersDocumentEmbedder,
 from haystack.components.retrievers import InMemoryEmbeddingRetriever
 from haystack.components.writers import DocumentWriter
 from haystack.document_stores import InMemoryDocumentStore
 from haystack_integrations.components.generators.llama_cpp import LlamaCppGenerator
 # Load first 100 rows of the Simple Wikipedia Dataset from HuggingFace

{llama_cpp_haystack-0.4.0 → llama_cpp_haystack-0.4.2}/pyproject.toml RENAMED Viewed

@@ -15,7 +15,7 @@ authors = [
     { name = "Ashwin Mathur", email = "" },
 ]
 classifiers = [
-  "License :: OSI Approved :: Apache Software License",
+    "License :: OSI Approved :: Apache Software License",
     "Development Status :: 4 - Beta",
     "Programming Language :: Python",
     "Programming Language :: Python :: 3.8",
@@ -26,10 +26,7 @@ classifiers = [
     "Programming Language :: Python :: Implementation :: CPython",
     "Programming Language :: Python :: Implementation :: PyPy",
 ]
-dependencies = [
-  "haystack-ai",
-  "llama-cpp-python"
-]
+dependencies = ["haystack-ai", "llama-cpp-python>=0.2.87"]
 [project.urls]
 Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/llama_cpp#readme"
@@ -48,59 +45,42 @@ root = "../.."
 git_describe_command = 'git describe --tags --match="integrations/llama_cpp-v[0-9]*"'
 [tool.hatch.envs.default]
+installer = "uv"
 dependencies = [
     "coverage[toml]>=6.5",
     "pytest",
+    "pytest-rerunfailures",
     "haystack-pydoc-tools",
-    "transformers[sentencepiece]"
+    "transformers[sentencepiece]",
 ]
 [tool.hatch.envs.default.scripts]
 test = "pytest {args:tests}"
 test-cov = "coverage run -m pytest {args:tests}"
-cov-report = [
-    "- coverage combine",
-    "coverage report",
-]
-cov = [
-    "test-cov",
-    "cov-report",
-]
-docs = [
-  "pydoc-markdown pydoc/config.yml"
-]
+test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
+cov-report = ["- coverage combine", "coverage report"]
+cov = ["test-cov", "cov-report"]
+cov-retry = ["test-cov-retry", "cov-report"]
+docs = ["pydoc-markdown pydoc/config.yml"]
 [[tool.hatch.envs.all.matrix]]
 python = ["3.8", "3.9", "3.10", "3.11", "3.12"]
 [tool.hatch.envs.lint]
+installer = "uv"
 detached = true
-dependencies = [
-    "black>=23.1.0",
-    "mypy>=1.0.0",
-    "ruff>=0.0.243",
-]
+dependencies = ["pip", "black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"]
 [tool.hatch.envs.lint.scripts]
 typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}"
-style = [
-    "ruff {args:.}",
-    "black --check --diff {args:.}",
-]
-fmt = [
-    "black {args:.}",
-    "ruff --fix {args:.}",
-    "style",
-]
-all = [
-    "style",
-    "typing",
-]
+style = ["ruff check {args:.}", "black --check --diff {args:.}"]
+fmt = ["black {args:.}", "ruff check --fix {args:.}", "style"]
+all = ["style", "typing"]
 [tool.hatch.metadata]
 allow-direct-references = true
-[tool.ruff.isort]
-known-first-party = ["src"]
+[tool.ruff.lint.isort]
+known-first-party = ["haystack_integrations"]
 [tool.black]
 target-version = ["py38"]
@@ -110,6 +90,8 @@ skip-string-normalization = true
 [tool.ruff]
 target-version = "py38"
 line-length = 120
+[tool.ruff.lint]
 select = [
     "A",
     "ARG",
@@ -140,19 +122,25 @@ ignore = [
     # Allow non-abstract empty methods in abstract base classes
     "B027",
     # Ignore checks for possible passwords
-    "S105", "S106", "S107",
+    "S105",
+    "S106",
+    "S107",
     # Ignore complexity
-    "C901", "PLR0911", "PLR0912", "PLR0913", "PLR0915",
+    "C901",
+    "PLR0911",
+    "PLR0912",
+    "PLR0913",
+    "PLR0915",
 ]
 unfixable = [
     # Don't touch unused imports
     "F401",
 ]
-[tool.ruff.flake8-tidy-imports]
+[tool.ruff.lint.flake8-tidy-imports]
 ban-relative-imports = "parents"
-[tool.ruff.per-file-ignores]
+[tool.ruff.lint.per-file-ignores]
 # Tests can use magic values, assertions, and relative imports
 "tests/**/*" = ["PLR2004", "S101", "TID252"]
 # Examples can print their output
@@ -167,27 +155,16 @@ parallel = false
 [tool.coverage.report]
 omit = ["*/tests/*", "*/__init__.py"]
-show_missing=true
-exclude_lines = [
-  "no cov",
-  "if __name__ == .__main__.:",
-  "if TYPE_CHECKING:",
-]
+show_missing = true
+exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"]
 [tool.pytest.ini_options]
 markers = [
     "integration: marks tests as slow (deselect with '-m \"not integration\"')",
 ]
-addopts = [
-    "--import-mode=importlib",
-]
+addopts = ["--import-mode=importlib"]
 [[tool.mypy.overrides]]
-module = [
-    "haystack.*",
-    "haystack_integrations.*",
-    "pytest.*",
-    "llama_cpp.*"
-]
+module = ["haystack.*", "haystack_integrations.*", "pytest.*", "llama_cpp.*"]
 ignore_missing_imports = true

{llama_cpp_haystack-0.4.0 → llama_cpp_haystack-0.4.2}/src/haystack_integrations/components/generators/llama_cpp/__init__.py RENAMED Viewed

@@ -5,4 +5,4 @@
 from .chat.chat_generator import LlamaCppChatGenerator
 from .generator import LlamaCppGenerator
-__all__ = ["LlamaCppGenerator", "LlamaCppChatGenerator"]
+__all__ = ["LlamaCppChatGenerator", "LlamaCppGenerator"]

{llama_cpp_haystack-0.4.0 → llama_cpp_haystack-0.4.2}/src/haystack_integrations/components/generators/llama_cpp/chat/chat_generator.py RENAMED Viewed

@@ -9,6 +9,21 @@ from llama_cpp.llama_tokenizer import LlamaHFTokenizer
 logger = logging.getLogger(__name__)
+def _convert_message_to_llamacpp_format(message: ChatMessage) -> Dict[str, str]:
+    """
+    Convert a message to the format expected by Llama.cpp.
+    :returns: A dictionary with the following keys:
+        - `role`
+        - `content`
+        - `name` (optional)
+    """
+    formatted_msg = {"role": message.role.value, "content": message.text}
+    if message.name:
+        formatted_msg["name"] = message.name
+    return formatted_msg
 @component
 class LlamaCppChatGenerator:
     """
@@ -96,7 +111,7 @@ class LlamaCppChatGenerator:
             return {"replies": []}
         updated_generation_kwargs = {**self.generation_kwargs, **(generation_kwargs or {})}
-        formatted_messages = [msg.to_openai_format() for msg in messages]
+        formatted_messages = [_convert_message_to_llamacpp_format(msg) for msg in messages]
         response = self.model.create_chat_completion(messages=formatted_messages, **updated_generation_kwargs)
         replies = [

{llama_cpp_haystack-0.4.0 → llama_cpp_haystack-0.4.2}/tests/test_chat_generator.py RENAMED Viewed

@@ -6,11 +6,15 @@ from unittest.mock import MagicMock
 import pytest
 from haystack import Document, Pipeline
-from haystack.components.builders.dynamic_chat_prompt_builder import DynamicChatPromptBuilder
+from haystack.components.builders import ChatPromptBuilder
 from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
 from haystack.dataclasses import ChatMessage, ChatRole
 from haystack.document_stores.in_memory import InMemoryDocumentStore
-from haystack_integrations.components.generators.llama_cpp import LlamaCppChatGenerator
+from haystack_integrations.components.generators.llama_cpp.chat.chat_generator import (
+    LlamaCppChatGenerator,
+    _convert_message_to_llamacpp_format,
+)
 @pytest.fixture
@@ -29,6 +33,21 @@ def download_file(file_link, filename, capsys):
             print("\nModel file already exists.")
+def test_convert_message_to_llamacpp_format():
+    message = ChatMessage.from_system("You are good assistant")
+    assert _convert_message_to_llamacpp_format(message) == {"role": "system", "content": "You are good assistant"}
+    message = ChatMessage.from_user("I have a question")
+    assert _convert_message_to_llamacpp_format(message) == {"role": "user", "content": "I have a question"}
+    message = ChatMessage.from_function("Function call", "function_name")
+    assert _convert_message_to_llamacpp_format(message) == {
+        "role": "function",
+        "content": "Function call",
+        "name": "function_name",
+    }
 class TestLlamaCppChatGenerator:
     @pytest.fixture
     def generator(self, model_path, capsys):
@@ -144,7 +163,7 @@ class TestLlamaCppChatGenerator:
         assert isinstance(result["replies"], list)
         assert len(result["replies"]) == 1
         assert isinstance(result["replies"][0], ChatMessage)
-        assert result["replies"][0].content == "Generated text"
+        assert result["replies"][0].text == "Generated text"
         assert result["replies"][0].role == ChatRole.ASSISTANT
     def test_run_with_generation_kwargs(self, generator_mock):
@@ -164,7 +183,7 @@ class TestLlamaCppChatGenerator:
         mock_model.create_chat_completion.return_value = mock_output
         generation_kwargs = {"max_tokens": 128}
         result = generator.run([ChatMessage.from_system("Write a 200 word paragraph.")], generation_kwargs)
-        assert result["replies"][0].content == "Generated text"
+        assert result["replies"][0].text == "Generated text"
         assert result["replies"][0].meta["finish_reason"] == "length"
     @pytest.mark.integration
@@ -187,7 +206,7 @@ class TestLlamaCppChatGenerator:
             assert "replies" in result
             assert isinstance(result["replies"], list)
             assert len(result["replies"]) > 0
-            assert any(answer.lower() in reply.content.lower() for reply in result["replies"])
+            assert any(answer.lower() in reply.text.lower() for reply in result["replies"])
     @pytest.mark.integration
     def test_run_rag_pipeline(self, generator):
@@ -213,9 +232,7 @@ class TestLlamaCppChatGenerator:
             instance=InMemoryBM25Retriever(document_store=document_store, top_k=1),
             name="retriever",
         )
-        pipeline.add_component(
-            instance=DynamicChatPromptBuilder(runtime_variables=["query", "documents"]), name="prompt_builder"
-        )
+        pipeline.add_component(instance=ChatPromptBuilder(variables=["query", "documents"]), name="prompt_builder")
         pipeline.add_component(instance=generator, name="llm")
         pipeline.connect("retriever.documents", "prompt_builder.documents")
         pipeline.connect("prompt_builder.prompt", "llm.messages")
@@ -245,7 +262,7 @@ class TestLlamaCppChatGenerator:
                 "retriever": {"query": question},
                 "prompt_builder": {
                     "template_variables": {"location": location},
-                    "prompt_source": messages,
+                    "template": messages,
                     "query": question,
                 },
             }
@@ -253,7 +270,7 @@ class TestLlamaCppChatGenerator:
         replies = result["llm"]["replies"]
         assert len(replies) > 0
-        assert any("bioluminescent waves" in reply.content for reply in replies)
+        assert any("bioluminescent waves" in reply.text.lower() for reply in replies)
         assert all(reply.role == ChatRole.ASSISTANT for reply in replies)
     @pytest.mark.integration
@@ -291,15 +308,15 @@ class TestLlamaCppChatGenerator:
         assert len(result["replies"]) > 0
         assert all(reply.role == ChatRole.ASSISTANT for reply in result["replies"])
         for reply in result["replies"]:
-            assert json.loads(reply.content)
-            assert isinstance(json.loads(reply.content), dict)
-            assert "people" in json.loads(reply.content)
-            assert isinstance(json.loads(reply.content)["people"], list)
-            assert all(isinstance(person, dict) for person in json.loads(reply.content)["people"])
-            assert all("name" in person for person in json.loads(reply.content)["people"])
-            assert all("age" in person for person in json.loads(reply.content)["people"])
-            assert all(isinstance(person["name"], str) for person in json.loads(reply.content)["people"])
-            assert all(isinstance(person["age"], int) for person in json.loads(reply.content)["people"])
+            assert json.loads(reply.text)
+            assert isinstance(json.loads(reply.text), dict)
+            assert "people" in json.loads(reply.text)
+            assert isinstance(json.loads(reply.text)["people"], list)
+            assert all(isinstance(person, dict) for person in json.loads(reply.text)["people"])
+            assert all("name" in person for person in json.loads(reply.text)["people"])
+            assert all("age" in person for person in json.loads(reply.text)["people"])
+            assert all(isinstance(person["name"], str) for person in json.loads(reply.text)["people"])
+            assert all(isinstance(person["age"], int) for person in json.loads(reply.text)["people"])
 class TestLlamaCppChatGeneratorFunctionary:
@@ -325,7 +342,7 @@ class TestLlamaCppChatGeneratorFunctionary:
         hf_tokenizer_path = "meetkai/functionary-small-v2.4-GGUF"
         generator = LlamaCppChatGenerator(
             model=model_path,
-            n_ctx=8192,
+            n_ctx=512,
             n_batch=512,
             model_kwargs={
                 "chat_format": "functionary-v2",
@@ -382,7 +399,6 @@ class TestLlamaCppChatGeneratorFunctionary:
                                 "type": "string",
                                 "description": "The city and state, e.g. San Francisco, CA",
                             },
-                            "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
                         },
                         "required": ["location"],
                     },
@@ -390,7 +406,8 @@ class TestLlamaCppChatGeneratorFunctionary:
             }
         ]
-        response = generator.run(messages=messages, generation_kwargs={"tools": tools})
+        tool_choice = {"type": "function", "function": {"name": "get_current_temperature"}}
+        response = generator.run(messages=messages, generation_kwargs={"tools": tools, "tool_choice": tool_choice})
         available_functions = {
             "get_current_temperature": self.get_current_temperature,
@@ -412,11 +429,10 @@ class TestLlamaCppChatGeneratorFunctionary:
             messages.append(function_message)
         second_response = generator.run(messages=messages)
-        print(second_response)
         assert "replies" in second_response
         assert len(second_response["replies"]) > 0
-        assert any("San Francisco" in reply.content for reply in second_response["replies"])
-        assert any("72" in reply.content for reply in second_response["replies"])
+        assert any("San Francisco" in reply.text for reply in second_response["replies"])
+        assert any("72" in reply.text for reply in second_response["replies"])
 class TestLlamaCppChatGeneratorChatML:

{llama_cpp_haystack-0.4.0 → llama_cpp_haystack-0.4.2}/tests/test_generator.py RENAMED Viewed

@@ -9,6 +9,7 @@ from haystack.components.builders.answer_builder import AnswerBuilder
 from haystack.components.builders.prompt_builder import PromptBuilder
 from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
 from haystack.document_stores.in_memory import InMemoryDocumentStore
 from haystack_integrations.components.generators.llama_cpp import LlamaCppGenerator

{llama_cpp_haystack-0.4.0 → llama_cpp_haystack-0.4.2}/LICENSE.txt RENAMED Viewed

File without changes

{llama_cpp_haystack-0.4.0 → llama_cpp_haystack-0.4.2}/README.md RENAMED Viewed

File without changes

{llama_cpp_haystack-0.4.0 → llama_cpp_haystack-0.4.2}/examples/llama_cpp_generator_example.py RENAMED Viewed

File without changes

{llama_cpp_haystack-0.4.0 → llama_cpp_haystack-0.4.2}/pydoc/config.yml RENAMED Viewed

File without changes

{llama_cpp_haystack-0.4.0 → llama_cpp_haystack-0.4.2}/src/haystack_integrations/components/generators/llama_cpp/generator.py RENAMED Viewed

File without changes

{llama_cpp_haystack-0.4.0 → llama_cpp_haystack-0.4.2}/tests/__init__.py RENAMED Viewed

File without changes

{llama_cpp_haystack-0.4.0 → llama_cpp_haystack-0.4.2}/tests/models/.gitignore RENAMED Viewed

File without changes

llama-cpp-haystack 0.4.0__tar.gz → 0.4.2__tar.gz

llama-cpp-haystack 0.4.0tar.gz → 0.4.2tar.gz