llama-cpp-haystack 2.0.0__tar.gz → 2.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (18) hide show
  1. {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/CHANGELOG.md +16 -0
  2. {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/LICENSE.txt +1 -1
  3. {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/PKG-INFO +1 -1
  4. {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/src/haystack_integrations/components/generators/llama_cpp/chat/chat_generator.py +14 -8
  5. {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/src/haystack_integrations/components/generators/llama_cpp/generator.py +8 -5
  6. {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/tests/test_chat_generator.py +4 -13
  7. {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/tests/test_generator.py +4 -9
  8. {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/.gitignore +0 -0
  9. {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/README.md +0 -0
  10. {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/examples/llama_cpp_generator_example.py +0 -0
  11. {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/examples/rag_pipeline_example.py +0 -0
  12. {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/pydoc/config_docusaurus.yml +0 -0
  13. {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/pyproject.toml +0 -0
  14. {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/src/haystack_integrations/components/generators/llama_cpp/__init__.py +0 -0
  15. {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/src/haystack_integrations/components/generators/py.typed +0 -0
  16. {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/tests/__init__.py +0 -0
  17. {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/tests/models/.gitignore +0 -0
  18. {llama_cpp_haystack-2.0.0 → llama_cpp_haystack-2.1.0}/tests/test_files/apple.jpg +0 -0
@@ -1,5 +1,21 @@
1
1
  # Changelog
2
2
 
3
+ ## [integrations/llama_cpp-v2.0.0] - 2026-01-12
4
+
5
+ ### ⚙️ CI
6
+
7
+ - Change pytest command (#2475)
8
+
9
+ ### 🧹 Chores
10
+
11
+ - Remove Readme API CI workflow and configs (#2573)
12
+ - Make fmt command more forgiving (#2671)
13
+ - [**breaking**] Llama_cpp - drop Python 3.9 and use X|Y typing (#2710)
14
+
15
+ ### 🌀 Miscellaneous
16
+
17
+ - Enhancement: Adopt PEP 585 type hinting (part 4) (#2527)
18
+
3
19
  ## [integrations/llama_cpp-v1.4.0] - 2025-10-23
4
20
 
5
21
  ### 📚 Documentation
@@ -58,7 +58,7 @@ APPENDIX: How to apply the Apache License to your work.
58
58
 
59
59
  To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives.
60
60
 
61
- Copyright [yyyy] [name of copyright owner]
61
+ Copyright 2024 deepset GmbH
62
62
 
63
63
  Licensed under the Apache License, Version 2.0 (the "License");
64
64
  you may not use this file except in compliance with the License.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llama-cpp-haystack
3
- Version: 2.0.0
3
+ Version: 2.1.0
4
4
  Summary: An integration between the llama.cpp LLM framework and Haystack
5
5
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/llama_cpp#readme
6
6
  Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
@@ -1,3 +1,7 @@
1
+ # SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
1
5
  import json
2
6
  from collections.abc import Iterator
3
7
  from datetime import datetime, timezone
@@ -201,7 +205,7 @@ class LlamaCppChatGenerator:
201
205
  streaming_callback: StreamingCallbackT | None = None,
202
206
  chat_handler_name: str | None = None,
203
207
  model_clip_path: str | None = None,
204
- ):
208
+ ) -> None:
205
209
  """
206
210
  :param model: The path of a quantized model for text generation, for example, "zephyr-7b-beta.Q4_0.gguf".
207
211
  If the model path is also specified in the `model_kwargs`, this parameter will be ignored.
@@ -263,7 +267,7 @@ class LlamaCppChatGenerator:
263
267
  self.model_clip_path = model_clip_path
264
268
  self._handler = handler
265
269
 
266
- def warm_up(self):
270
+ def warm_up(self) -> None:
267
271
  if self._model is not None:
268
272
  return
269
273
 
@@ -347,8 +351,7 @@ class LlamaCppChatGenerator:
347
351
  - `replies`: The responses from the model
348
352
  """
349
353
  if self._model is None:
350
- error_msg = "The model has not been loaded. Please call warm_up() before running."
351
- raise RuntimeError(error_msg)
354
+ self.warm_up()
352
355
 
353
356
  if not messages:
354
357
  return {"replies": []}
@@ -381,7 +384,7 @@ class LlamaCppChatGenerator:
381
384
  )
382
385
 
383
386
  if streaming_callback:
384
- response_stream = self._model.create_chat_completion(
387
+ response_stream = self._model.create_chat_completion( # type: ignore[union-attr]
385
388
  messages=formatted_messages, tools=llamacpp_tools, **updated_generation_kwargs, stream=True
386
389
  )
387
390
  return self._handle_streaming_response(
@@ -391,7 +394,7 @@ class LlamaCppChatGenerator:
391
394
  ) # we know that response_stream is Iterator[CreateChatCompletionStreamResponse]
392
395
  # because create_chat_completion was called with stream=True, but mypy doesn't know that
393
396
 
394
- response = self._model.create_chat_completion(
397
+ response = self._model.create_chat_completion( # type: ignore[union-attr]
395
398
  messages=formatted_messages, tools=llamacpp_tools, **updated_generation_kwargs
396
399
  )
397
400
  replies = []
@@ -399,8 +402,11 @@ class LlamaCppChatGenerator:
399
402
  msg = f"Expected a dictionary response, got a different object: {response}"
400
403
  raise ValueError(msg)
401
404
 
402
- for choice in response["choices"]:
403
- chat_message = self._convert_chat_completion_choice_to_chat_message(choice, response)
405
+ for choice in response["choices"]: # type: ignore[index]
406
+ chat_message = self._convert_chat_completion_choice_to_chat_message(
407
+ choice,
408
+ response, # type: ignore[arg-type]
409
+ )
404
410
  replies.append(chat_message)
405
411
  return {"replies": replies}
406
412
 
@@ -1,3 +1,7 @@
1
+ # SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
1
5
  from typing import Any
2
6
 
3
7
  from haystack import component, logging
@@ -32,7 +36,7 @@ class LlamaCppGenerator:
32
36
  n_batch: int | None = 512,
33
37
  model_kwargs: dict[str, Any] | None = None,
34
38
  generation_kwargs: dict[str, Any] | None = None,
35
- ):
39
+ ) -> None:
36
40
  """
37
41
  :param model: The path of a quantized model for text generation, for example, "zephyr-7b-beta.Q4_0.gguf".
38
42
  If the model path is also specified in the `model_kwargs`, this parameter will be ignored.
@@ -64,7 +68,7 @@ class LlamaCppGenerator:
64
68
  self.generation_kwargs = generation_kwargs
65
69
  self.model: Llama | None = None
66
70
 
67
- def warm_up(self):
71
+ def warm_up(self) -> None:
68
72
  if self.model is None:
69
73
  self.model = Llama(**self.model_kwargs)
70
74
 
@@ -84,8 +88,7 @@ class LlamaCppGenerator:
84
88
  - `meta`: metadata about the request.
85
89
  """
86
90
  if self.model is None:
87
- error_msg = "The model has not been loaded. Please call warm_up() before running."
88
- raise RuntimeError(error_msg)
91
+ self.warm_up()
89
92
 
90
93
  if not prompt:
91
94
  return {"replies": []}
@@ -93,7 +96,7 @@ class LlamaCppGenerator:
93
96
  # merge generation kwargs from init method with those from run method
94
97
  updated_generation_kwargs = {**self.generation_kwargs, **(generation_kwargs or {})}
95
98
 
96
- output = self.model.create_completion(prompt=prompt, **updated_generation_kwargs)
99
+ output = self.model.create_completion(prompt=prompt, **updated_generation_kwargs) # type: ignore[union-attr]
97
100
  if not isinstance(output, dict):
98
101
  msg = f"Expected a dictionary response, got a different object: {output}"
99
102
  raise ValueError(msg)
@@ -1,3 +1,7 @@
1
+ # SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
1
5
  import json
2
6
  import os
3
7
  import urllib.request
@@ -678,7 +682,6 @@ class TestLlamaCppChatGenerator:
678
682
 
679
683
  model_path = str(model_path / filename)
680
684
  generator = LlamaCppChatGenerator(model=model_path, n_ctx=8192, n_batch=512)
681
- generator.warm_up()
682
685
  return generator
683
686
 
684
687
  @pytest.fixture
@@ -898,14 +901,6 @@ class TestLlamaCppChatGenerator:
898
901
  )
899
902
  assert generator.model_kwargs["n_batch"] == 1024
900
903
 
901
- def test_raises_error_without_warm_up(self):
902
- """
903
- Test that the generator raises an error if warm_up() is not called before running.
904
- """
905
- generator = LlamaCppChatGenerator(model="test_model.gguf", n_ctx=512, n_batch=512)
906
- with pytest.raises(RuntimeError):
907
- generator.run("What is the capital of China?")
908
-
909
904
  def test_run_with_empty_message(self, generator_mock):
910
905
  """
911
906
  Test that an empty message returns an empty list of replies.
@@ -1179,7 +1174,6 @@ class TestLlamaCppChatGeneratorFunctionary:
1179
1174
  "hf_tokenizer_path": hf_tokenizer_path,
1180
1175
  },
1181
1176
  )
1182
- generator.warm_up()
1183
1177
  return generator
1184
1178
 
1185
1179
  @pytest.mark.integration
@@ -1260,7 +1254,6 @@ class TestLlamaCppChatGeneratorChatML:
1260
1254
  "chat_format": "chatml-function-calling",
1261
1255
  },
1262
1256
  )
1263
- generator.warm_up()
1264
1257
  return generator
1265
1258
 
1266
1259
  @pytest.mark.integration
@@ -1326,8 +1319,6 @@ class TestLlamaCppChatGeneratorChatML:
1326
1319
  generation_kwargs={"max_tokens": 50, "temperature": 0.1},
1327
1320
  )
1328
1321
 
1329
- generator.warm_up()
1330
-
1331
1322
  result = generator.run(messages)
1332
1323
 
1333
1324
  assert "replies" in result
@@ -1,3 +1,7 @@
1
+ # SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
1
5
  import os
2
6
  import urllib.request
3
7
  from pathlib import Path
@@ -42,7 +46,6 @@ class TestLlamaCppGenerator:
42
46
 
43
47
  model_path = str(model_path / filename)
44
48
  generator = LlamaCppGenerator(model=model_path, n_ctx=128, n_batch=128)
45
- generator.warm_up()
46
49
  return generator
47
50
 
48
51
  @pytest.fixture
@@ -106,14 +109,6 @@ class TestLlamaCppGenerator:
106
109
  generator = LlamaCppGenerator(model="test_model.gguf", n_ctx=512, n_batch=512, model_kwargs={"n_batch": 1024})
107
110
  assert generator.model_kwargs["n_batch"] == 1024
108
111
 
109
- def test_raises_error_without_warm_up(self):
110
- """
111
- Test that the generator raises an error if warm_up() is not called before running.
112
- """
113
- generator = LlamaCppGenerator(model="test_model.gguf", n_ctx=512, n_batch=512)
114
- with pytest.raises(RuntimeError):
115
- generator.run("What is the capital of China?")
116
-
117
112
  def test_run_with_empty_prompt(self, generator_mock):
118
113
  """
119
114
  Test that an empty prompt returns an empty list of replies.