llama-cpp-haystack 0.4.1__tar.gz → 0.4.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,93 @@
1
+ # Changelog
2
+
3
+ ## [integrations/llama_cpp-v0.4.2] - 2024-12-10
4
+
5
+ ### 🧪 Testing
6
+
7
+ - Do not retry tests in `hatch run test` command (#954)
8
+
9
+ ### ⚙️ CI
10
+
11
+ - Adopt uv as installer (#1142)
12
+
13
+ ### 🧹 Chores
14
+
15
+ - Update ruff linting scripts and settings (#1105)
16
+ - Unpin `llama-cpp-python` (#1115)
17
+ - Fix linting/isort (#1215)
18
+ - Use text instead of content for ChatMessage in Llama.cpp, Langfuse and Mistral (#1238)
19
+
20
+ ### 🌀 Miscellaneous
21
+
22
+ - Chore: lamma_cpp - ruff update, don't ruff tests (#998)
23
+ - Fix: pin `llama-cpp-python<0.3.0` (#1111)
24
+
25
+ ## [integrations/llama_cpp-v0.4.1] - 2024-08-08
26
+
27
+ ### 🐛 Bug Fixes
28
+
29
+ - Replace DynamicChatPromptBuilder with ChatPromptBuilder (#940)
30
+
31
+ ### ⚙️ CI
32
+
33
+ - Retry tests to reduce flakyness (#836)
34
+
35
+ ### 🧹 Chores
36
+
37
+ - Update ruff invocation to include check parameter (#853)
38
+ - Pin `llama-cpp-python>=0.2.87` (#955)
39
+
40
+ ### 🌀 Miscellaneous
41
+
42
+ - Ci: install `pytest-rerunfailures` where needed; add retry config to `test-cov` script (#845)
43
+ - Fix: pin llama-cpp-python to an older version (#943)
44
+ - Refactor: introduce `_convert_message_to_llamacpp_format` utility function (#939)
45
+
46
+ ## [integrations/llama_cpp-v0.4.0] - 2024-05-13
47
+
48
+ ### 🐛 Bug Fixes
49
+
50
+ - Llama.cpp: change wrong links and imports (#436)
51
+ - Fix order of API docs (#447)
52
+
53
+ ### 📚 Documentation
54
+
55
+ - Update category slug (#442)
56
+ - Small consistency improvements (#536)
57
+ - Disable-class-def (#556)
58
+
59
+ ### 🧹 Chores
60
+
61
+ - [**breaking**] Rename model_path to model in the Llama.cpp integration (#243)
62
+
63
+ ### 🌀 Miscellaneous
64
+
65
+ - Generate api docs (#353)
66
+ - Model_name_or_path > model (#418)
67
+ - Llama.cpp - review docstrings (#510)
68
+ - Llama.cpp - update examples (#511)
69
+ - Make tests show coverage (#566)
70
+ - Remove references to Python 3.7 (#601)
71
+ - Chore: add license classifiers (#680)
72
+ - Chore: change the pydoc renderer class (#718)
73
+ - Basic implementation of llama.cpp chat generation (#723)
74
+
75
+ ## [integrations/llama_cpp-v0.2.1] - 2024-01-18
76
+
77
+ ### 🌀 Miscellaneous
78
+
79
+ - Update import paths for beta5 (#233)
80
+
81
+ ## [integrations/llama_cpp-v0.2.0] - 2024-01-17
82
+
83
+ ### 🌀 Miscellaneous
84
+
85
+ - Mount llama_cpp in haystack_integrations (#217)
86
+
87
+ ## [integrations/llama_cpp-v0.1.0] - 2024-01-09
88
+
89
+ ### 🚀 Features
90
+
91
+ - Add Llama.cpp Generator (#179)
92
+
93
+ <!-- generated by git-cliff -->
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: llama-cpp-haystack
3
- Version: 0.4.1
3
+ Version: 0.4.3
4
4
  Summary: An integration between the llama.cpp LLM framework and Haystack
5
5
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/llama_cpp#readme
6
6
  Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
@@ -6,6 +6,7 @@ from haystack.components.embedders import SentenceTransformersDocumentEmbedder,
6
6
  from haystack.components.retrievers import InMemoryEmbeddingRetriever
7
7
  from haystack.components.writers import DocumentWriter
8
8
  from haystack.document_stores import InMemoryDocumentStore
9
+
9
10
  from haystack_integrations.components.generators.llama_cpp import LlamaCppGenerator
10
11
 
11
12
  # Load first 100 rows of the Simple Wikipedia Dataset from HuggingFace
@@ -45,6 +45,7 @@ root = "../.."
45
45
  git_describe_command = 'git describe --tags --match="integrations/llama_cpp-v[0-9]*"'
46
46
 
47
47
  [tool.hatch.envs.default]
48
+ installer = "uv"
48
49
  dependencies = [
49
50
  "coverage[toml]>=6.5",
50
51
  "pytest",
@@ -53,30 +54,33 @@ dependencies = [
53
54
  "transformers[sentencepiece]",
54
55
  ]
55
56
  [tool.hatch.envs.default.scripts]
56
- test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
57
- test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
57
+ test = "pytest {args:tests}"
58
+ test-cov = "coverage run -m pytest {args:tests}"
59
+ test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
58
60
  cov-report = ["- coverage combine", "coverage report"]
59
61
  cov = ["test-cov", "cov-report"]
62
+ cov-retry = ["test-cov-retry", "cov-report"]
60
63
  docs = ["pydoc-markdown pydoc/config.yml"]
61
64
  [[tool.hatch.envs.all.matrix]]
62
65
  python = ["3.8", "3.9", "3.10", "3.11", "3.12"]
63
66
 
64
67
 
65
68
  [tool.hatch.envs.lint]
69
+ installer = "uv"
66
70
  detached = true
67
- dependencies = ["black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"]
71
+ dependencies = ["pip", "black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"]
68
72
 
69
73
  [tool.hatch.envs.lint.scripts]
70
74
  typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}"
71
75
  style = ["ruff check {args:.}", "black --check --diff {args:.}"]
72
- fmt = ["black {args:.}", "ruff --fix {args:.}", "style"]
76
+ fmt = ["black {args:.}", "ruff check --fix {args:.}", "style"]
73
77
  all = ["style", "typing"]
74
78
 
75
79
  [tool.hatch.metadata]
76
80
  allow-direct-references = true
77
81
 
78
- [tool.ruff.isort]
79
- known-first-party = ["src"]
82
+ [tool.ruff.lint.isort]
83
+ known-first-party = ["haystack_integrations"]
80
84
 
81
85
  [tool.black]
82
86
  target-version = ["py38"]
@@ -86,6 +90,8 @@ skip-string-normalization = true
86
90
  [tool.ruff]
87
91
  target-version = "py38"
88
92
  line-length = 120
93
+
94
+ [tool.ruff.lint]
89
95
  select = [
90
96
  "A",
91
97
  "ARG",
@@ -131,10 +137,10 @@ unfixable = [
131
137
  "F401",
132
138
  ]
133
139
 
134
- [tool.ruff.flake8-tidy-imports]
140
+ [tool.ruff.lint.flake8-tidy-imports]
135
141
  ban-relative-imports = "parents"
136
142
 
137
- [tool.ruff.per-file-ignores]
143
+ [tool.ruff.lint.per-file-ignores]
138
144
  # Tests can use magic values, assertions, and relative imports
139
145
  "tests/**/*" = ["PLR2004", "S101", "TID252"]
140
146
  # Examples can print their output
@@ -5,4 +5,4 @@
5
5
  from .chat.chat_generator import LlamaCppChatGenerator
6
6
  from .generator import LlamaCppGenerator
7
7
 
8
- __all__ = ["LlamaCppGenerator", "LlamaCppChatGenerator"]
8
+ __all__ = ["LlamaCppChatGenerator", "LlamaCppGenerator"]
@@ -2,7 +2,7 @@ import logging
2
2
  from typing import Any, Dict, List, Optional
3
3
 
4
4
  from haystack import component
5
- from haystack.dataclasses import ChatMessage, ChatRole
5
+ from haystack.dataclasses import ChatMessage
6
6
  from llama_cpp import Llama
7
7
  from llama_cpp.llama_tokenizer import LlamaHFTokenizer
8
8
 
@@ -17,10 +17,14 @@ def _convert_message_to_llamacpp_format(message: ChatMessage) -> Dict[str, str]:
17
17
  - `content`
18
18
  - `name` (optional)
19
19
  """
20
- formatted_msg = {"role": message.role.value, "content": message.content}
20
+ formatted_msg = {"role": message.role.value, "content": message.text}
21
21
  if message.name:
22
22
  formatted_msg["name"] = message.name
23
23
 
24
+ if formatted_msg["role"] == "tool":
25
+ formatted_msg["name"] = message.tool_call_result.origin.tool_name
26
+ formatted_msg["content"] = message.tool_call_result.result
27
+
24
28
  return formatted_msg
25
29
 
26
30
 
@@ -114,26 +118,31 @@ class LlamaCppChatGenerator:
114
118
  formatted_messages = [_convert_message_to_llamacpp_format(msg) for msg in messages]
115
119
 
116
120
  response = self.model.create_chat_completion(messages=formatted_messages, **updated_generation_kwargs)
117
- replies = [
118
- ChatMessage(
119
- content=choice["message"]["content"],
120
- role=ChatRole[choice["message"]["role"].upper()],
121
- name=None,
122
- meta={
123
- "response_id": response["id"],
124
- "model": response["model"],
125
- "created": response["created"],
126
- "index": choice["index"],
127
- "finish_reason": choice["finish_reason"],
128
- "usage": response["usage"],
129
- },
130
- )
131
- for choice in response["choices"]
132
- ]
133
-
134
- for reply, choice in zip(replies, response["choices"]):
121
+
122
+ replies = []
123
+
124
+ for choice in response["choices"]:
125
+ meta = {
126
+ "response_id": response["id"],
127
+ "model": response["model"],
128
+ "created": response["created"],
129
+ "index": choice["index"],
130
+ "finish_reason": choice["finish_reason"],
131
+ "usage": response["usage"],
132
+ }
133
+
134
+ name = None
135
135
  tool_calls = choice.get("message", {}).get("tool_calls", [])
136
136
  if tool_calls:
137
- reply.meta["tool_calls"] = tool_calls
138
- reply.name = tool_calls[0]["function"]["name"] if tool_calls else None
137
+ meta["tool_calls"] = tool_calls
138
+ name = tool_calls[0]["function"]["name"]
139
+
140
+ reply = ChatMessage.from_assistant(choice["message"]["content"], meta=meta)
141
+ if name:
142
+ if hasattr(reply, "_name"):
143
+ reply._name = name # new ChatMessage
144
+ elif hasattr(reply, "name"):
145
+ reply.name = name # legacy ChatMessage
146
+ replies.append(reply)
147
+
139
148
  return {"replies": replies}
@@ -10,6 +10,7 @@ from haystack.components.builders import ChatPromptBuilder
10
10
  from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
11
11
  from haystack.dataclasses import ChatMessage, ChatRole
12
12
  from haystack.document_stores.in_memory import InMemoryDocumentStore
13
+
13
14
  from haystack_integrations.components.generators.llama_cpp.chat.chat_generator import (
14
15
  LlamaCppChatGenerator,
15
16
  _convert_message_to_llamacpp_format,
@@ -40,11 +41,11 @@ def test_convert_message_to_llamacpp_format():
40
41
  assert _convert_message_to_llamacpp_format(message) == {"role": "user", "content": "I have a question"}
41
42
 
42
43
  message = ChatMessage.from_function("Function call", "function_name")
43
- assert _convert_message_to_llamacpp_format(message) == {
44
- "role": "function",
45
- "content": "Function call",
46
- "name": "function_name",
47
- }
44
+ converted_message = _convert_message_to_llamacpp_format(message)
45
+
46
+ assert converted_message["role"] in ("function", "tool")
47
+ assert converted_message["name"] == "function_name"
48
+ assert converted_message["content"] == "Function call"
48
49
 
49
50
 
50
51
  class TestLlamaCppChatGenerator:
@@ -162,7 +163,7 @@ class TestLlamaCppChatGenerator:
162
163
  assert isinstance(result["replies"], list)
163
164
  assert len(result["replies"]) == 1
164
165
  assert isinstance(result["replies"][0], ChatMessage)
165
- assert result["replies"][0].content == "Generated text"
166
+ assert result["replies"][0].text == "Generated text"
166
167
  assert result["replies"][0].role == ChatRole.ASSISTANT
167
168
 
168
169
  def test_run_with_generation_kwargs(self, generator_mock):
@@ -182,7 +183,7 @@ class TestLlamaCppChatGenerator:
182
183
  mock_model.create_chat_completion.return_value = mock_output
183
184
  generation_kwargs = {"max_tokens": 128}
184
185
  result = generator.run([ChatMessage.from_system("Write a 200 word paragraph.")], generation_kwargs)
185
- assert result["replies"][0].content == "Generated text"
186
+ assert result["replies"][0].text == "Generated text"
186
187
  assert result["replies"][0].meta["finish_reason"] == "length"
187
188
 
188
189
  @pytest.mark.integration
@@ -205,7 +206,7 @@ class TestLlamaCppChatGenerator:
205
206
  assert "replies" in result
206
207
  assert isinstance(result["replies"], list)
207
208
  assert len(result["replies"]) > 0
208
- assert any(answer.lower() in reply.content.lower() for reply in result["replies"])
209
+ assert any(answer.lower() in reply.text.lower() for reply in result["replies"])
209
210
 
210
211
  @pytest.mark.integration
211
212
  def test_run_rag_pipeline(self, generator):
@@ -269,7 +270,7 @@ class TestLlamaCppChatGenerator:
269
270
 
270
271
  replies = result["llm"]["replies"]
271
272
  assert len(replies) > 0
272
- assert any("bioluminescent waves" in reply.content for reply in replies)
273
+ assert any("bioluminescent waves" in reply.text.lower() for reply in replies)
273
274
  assert all(reply.role == ChatRole.ASSISTANT for reply in replies)
274
275
 
275
276
  @pytest.mark.integration
@@ -307,15 +308,15 @@ class TestLlamaCppChatGenerator:
307
308
  assert len(result["replies"]) > 0
308
309
  assert all(reply.role == ChatRole.ASSISTANT for reply in result["replies"])
309
310
  for reply in result["replies"]:
310
- assert json.loads(reply.content)
311
- assert isinstance(json.loads(reply.content), dict)
312
- assert "people" in json.loads(reply.content)
313
- assert isinstance(json.loads(reply.content)["people"], list)
314
- assert all(isinstance(person, dict) for person in json.loads(reply.content)["people"])
315
- assert all("name" in person for person in json.loads(reply.content)["people"])
316
- assert all("age" in person for person in json.loads(reply.content)["people"])
317
- assert all(isinstance(person["name"], str) for person in json.loads(reply.content)["people"])
318
- assert all(isinstance(person["age"], int) for person in json.loads(reply.content)["people"])
311
+ assert json.loads(reply.text)
312
+ assert isinstance(json.loads(reply.text), dict)
313
+ assert "people" in json.loads(reply.text)
314
+ assert isinstance(json.loads(reply.text)["people"], list)
315
+ assert all(isinstance(person, dict) for person in json.loads(reply.text)["people"])
316
+ assert all("name" in person for person in json.loads(reply.text)["people"])
317
+ assert all("age" in person for person in json.loads(reply.text)["people"])
318
+ assert all(isinstance(person["name"], str) for person in json.loads(reply.text)["people"])
319
+ assert all(isinstance(person["age"], int) for person in json.loads(reply.text)["people"])
319
320
 
320
321
 
321
322
  class TestLlamaCppChatGeneratorFunctionary:
@@ -341,7 +342,7 @@ class TestLlamaCppChatGeneratorFunctionary:
341
342
  hf_tokenizer_path = "meetkai/functionary-small-v2.4-GGUF"
342
343
  generator = LlamaCppChatGenerator(
343
344
  model=model_path,
344
- n_ctx=8192,
345
+ n_ctx=512,
345
346
  n_batch=512,
346
347
  model_kwargs={
347
348
  "chat_format": "functionary-v2",
@@ -398,7 +399,6 @@ class TestLlamaCppChatGeneratorFunctionary:
398
399
  "type": "string",
399
400
  "description": "The city and state, e.g. San Francisco, CA",
400
401
  },
401
- "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
402
402
  },
403
403
  "required": ["location"],
404
404
  },
@@ -406,7 +406,8 @@ class TestLlamaCppChatGeneratorFunctionary:
406
406
  }
407
407
  ]
408
408
 
409
- response = generator.run(messages=messages, generation_kwargs={"tools": tools})
409
+ tool_choice = {"type": "function", "function": {"name": "get_current_temperature"}}
410
+ response = generator.run(messages=messages, generation_kwargs={"tools": tools, "tool_choice": tool_choice})
410
411
 
411
412
  available_functions = {
412
413
  "get_current_temperature": self.get_current_temperature,
@@ -430,8 +431,8 @@ class TestLlamaCppChatGeneratorFunctionary:
430
431
  second_response = generator.run(messages=messages)
431
432
  assert "replies" in second_response
432
433
  assert len(second_response["replies"]) > 0
433
- assert any("San Francisco" in reply.content for reply in second_response["replies"])
434
- assert any("72" in reply.content for reply in second_response["replies"])
434
+ assert any("San Francisco" in reply.text for reply in second_response["replies"])
435
+ assert any("72" in reply.text for reply in second_response["replies"])
435
436
 
436
437
 
437
438
  class TestLlamaCppChatGeneratorChatML:
@@ -9,6 +9,7 @@ from haystack.components.builders.answer_builder import AnswerBuilder
9
9
  from haystack.components.builders.prompt_builder import PromptBuilder
10
10
  from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
11
11
  from haystack.document_stores.in_memory import InMemoryDocumentStore
12
+
12
13
  from haystack_integrations.components.generators.llama_cpp import LlamaCppGenerator
13
14
 
14
15