nvidia-haystack 1.1.0__tar.gz → 1.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/CHANGELOG.md +27 -0
  2. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/PKG-INFO +1 -1
  3. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/pyproject.toml +3 -1
  4. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/components/embedders/nvidia/document_embedder.py +1 -1
  5. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/components/embedders/nvidia/text_embedder.py +2 -2
  6. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/components/generators/nvidia/generator.py +1 -1
  7. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/components/rankers/nvidia/ranker.py +4 -6
  8. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/tests/test_document_embedder.py +7 -0
  9. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/tests/test_generator.py +70 -0
  10. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/tests/test_nim_backend.py +54 -0
  11. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/tests/test_ranker.py +14 -0
  12. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/tests/test_text_embedder.py +14 -0
  13. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/tests/test_utils.py +42 -1
  14. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/.gitignore +0 -0
  15. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/LICENSE.txt +0 -0
  16. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/README.md +0 -0
  17. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/examples/chat_generator_with_structured_outputs.py +0 -0
  18. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/pydoc/config_docusaurus.yml +0 -0
  19. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/components/embedders/nvidia/__init__.py +0 -0
  20. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/components/embedders/nvidia/truncate.py +0 -0
  21. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/components/embedders/py.typed +0 -0
  22. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/components/generators/nvidia/__init__.py +0 -0
  23. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/components/generators/nvidia/chat/__init__.py +0 -0
  24. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/components/generators/nvidia/chat/chat_generator.py +0 -0
  25. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/components/generators/py.typed +0 -0
  26. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/components/rankers/nvidia/__init__.py +0 -0
  27. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/components/rankers/nvidia/py.typed +0 -0
  28. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/components/rankers/nvidia/truncate.py +0 -0
  29. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/utils/nvidia/__init__.py +0 -0
  30. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/utils/nvidia/client.py +0 -0
  31. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/utils/nvidia/models.py +0 -0
  32. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/utils/nvidia/nim_backend.py +0 -0
  33. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/utils/nvidia/utils.py +0 -0
  34. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/utils/py.typed +0 -0
  35. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/tests/__init__.py +0 -0
  36. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/tests/conftest.py +0 -0
  37. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/tests/test_base_url.py +0 -0
  38. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/tests/test_embedding_truncate_mode.py +0 -0
  39. {nvidia_haystack-1.1.0 → nvidia_haystack-1.1.1}/tests/test_nvidia_chat_generator.py +0 -0
@@ -1,5 +1,32 @@
1
1
  # Changelog
2
2
 
3
+ ## [integrations/nvidia-v1.1.0] - 2026-03-30
4
+
5
+ ### 🐛 Bug Fixes
6
+
7
+ - Nvidia - fix structured output syntax (#3058)
8
+
9
+ ### 📚 Documentation
10
+
11
+ - *(nvidia)* Remove explicit warm_up from examples (#2843)
12
+ - Simplify pydoc configs (#2855)
13
+
14
+ ### 🧪 Testing
15
+
16
+ - Test compatible integrations with python 3.14; update pyproject (#3001)
17
+
18
+ ### 🧹 Chores
19
+
20
+ - Add ANN ruff ruleset to llama_cpp, llama_stack, mcp, meta_llama, mistral, mongodb_atlas, nvidia, ollama, openrouter, opensearch (#2991)
21
+ - Enforce ruff docstring rules (D102/D103/D205/D209/D213/D417/D419) in integrations 21-30 (#3010)
22
+
23
+ ## [integrations/nvidia-v1.0.0] - 2026-01-13
24
+
25
+ ### 🧹 Chores
26
+
27
+ - [**breaking**] Nvidia - drop Python 3.9 and use X|Y typing; fix default reranking model; improve tests (#2736)
28
+
29
+
3
30
  ## [integrations/nvidia-v0.5.0] - 2026-01-13
4
31
 
5
32
  ### 🚀 Features
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nvidia-haystack
3
- Version: 1.1.0
3
+ Version: 1.1.1
4
4
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/nvidia#readme
5
5
  Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
6
6
  Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/nvidia
@@ -66,7 +66,8 @@ dependencies = [
66
66
  unit = 'pytest -m "not integration" {args:tests}'
67
67
  integration = 'pytest -m "integration" {args:tests}'
68
68
  all = 'pytest {args:tests}'
69
- cov-retry = 'pytest --cov=haystack_integrations --reruns 3 --reruns-delay 30 -x {args:tests}'
69
+ unit-cov-retry = 'pytest --cov=haystack_integrations --reruns 3 --reruns-delay 30 -x -m "not integration" {args:tests}'
70
+ integration-cov-append-retry = 'pytest --cov=haystack_integrations --cov-append --reruns 3 --reruns-delay 30 -x -m "integration" {args:tests}'
70
71
 
71
72
  types = """mypy -p haystack_integrations.components.embedders.nvidia \
72
73
  -p haystack_integrations.components.generators.nvidia \
@@ -152,6 +153,7 @@ ban-relative-imports = "parents"
152
153
  [tool.coverage.run]
153
154
  source = ["haystack_integrations"]
154
155
  branch = true
156
+ relative_files = true
155
157
  parallel = false
156
158
 
157
159
 
@@ -253,7 +253,7 @@ class NvidiaDocumentEmbedder:
253
253
  if not self._initialized:
254
254
  self.warm_up()
255
255
 
256
- elif not isinstance(documents, list) or (documents and not isinstance(documents[0], Document)):
256
+ if not isinstance(documents, list) or (documents and not isinstance(documents[0], Document)):
257
257
  msg = (
258
258
  "NvidiaDocumentEmbedder expects a list of Documents as input."
259
259
  "In case you want to embed a string, please use the NvidiaTextEmbedder."
@@ -205,13 +205,13 @@ class NvidiaTextEmbedder:
205
205
  if not self._initialized:
206
206
  self.warm_up()
207
207
 
208
- elif not isinstance(text, str):
208
+ if not isinstance(text, str):
209
209
  msg = (
210
210
  "NvidiaTextEmbedder expects a string as an input."
211
211
  "In case you want to embed a list of Documents, please use the NvidiaDocumentEmbedder."
212
212
  )
213
213
  raise TypeError(msg)
214
- elif not text:
214
+ if not text:
215
215
  msg = "Cannot embed an empty string."
216
216
  raise ValueError(msg)
217
217
 
@@ -132,7 +132,7 @@ class NvidiaGenerator:
132
132
 
133
133
  if not self.is_hosted and not self._model:
134
134
  if self.backend.model:
135
- self.model = self.backend.model
135
+ self._model = self.backend.model
136
136
  else:
137
137
  self.default_model()
138
138
 
@@ -4,6 +4,7 @@
4
4
 
5
5
  import os
6
6
  import warnings
7
+ from dataclasses import replace
7
8
  from typing import Any
8
9
 
9
10
  from haystack import Document, component, default_from_dict, default_to_dict, logging
@@ -236,11 +237,8 @@ class NvidiaRanker:
236
237
 
237
238
  # rank result is list[{index: int, logit: float}] sorted by logit
238
239
  sorted_indexes_and_scores = self.backend.rank(query_text=query_text, document_texts=document_texts)
239
- sorted_documents = []
240
- for item in sorted_indexes_and_scores[:top_k]:
241
- # mutate (don't copy) the document because we're only updating the score
242
- doc = documents[item["index"]]
243
- doc.score = item["logit"]
244
- sorted_documents.append(doc)
240
+ sorted_documents = [
241
+ replace(documents[item["index"]], score=item["logit"]) for item in sorted_indexes_and_scores[:top_k]
242
+ ]
245
243
 
246
244
  return {"documents": sorted_documents}
@@ -354,6 +354,13 @@ class TestNvidiaDocumentEmbedder:
354
354
  with pytest.raises(TypeError, match="NvidiaDocumentEmbedder expects a list of Documents as input"):
355
355
  embedder.run(documents=list_integers_input)
356
356
 
357
+ def test_run_validates_input_without_prior_warm_up(self):
358
+ api_key = Secret.from_token("fake-api-key")
359
+ embedder = NvidiaDocumentEmbedder("nvidia/nv-embedqa-e5-v5", api_key=api_key)
360
+
361
+ with pytest.raises(TypeError, match="NvidiaDocumentEmbedder expects a list of Documents as input"):
362
+ embedder.run(documents="text")
363
+
357
364
  def test_run_empty_document(self, caplog):
358
365
  model = "nvidia/nv-embedqa-e5-v5"
359
366
  api_key = Secret.from_token("fake-api-key")
@@ -230,3 +230,73 @@ class TestNvidiaGenerator:
230
230
  )
231
231
  with pytest.raises(ValueError):
232
232
  generator1.warm_up()
233
+
234
+ @pytest.mark.usefixtures("mock_local_models")
235
+ def test_warm_up_falls_back_to_default_model(self, monkeypatch):
236
+ monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
237
+ generator = NvidiaGenerator(api_url="http://localhost:8080/v1")
238
+
239
+ with pytest.warns(UserWarning, match="Default model is set as:"):
240
+ generator.warm_up()
241
+
242
+ assert generator._model == "model1"
243
+ assert generator.backend.model == "model1"
244
+ assert generator.to_dict()["init_parameters"]["model"] == "model1"
245
+
246
+ def test_default_model_raises_when_no_valid_models(self, monkeypatch, requests_mock):
247
+ monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
248
+ requests_mock.get(
249
+ "http://localhost:8080/v1/models",
250
+ json={"data": [{"id": "derived-model", "object": "model", "root": "base-model"}]},
251
+ )
252
+ generator = NvidiaGenerator(api_url="http://localhost:8080/v1")
253
+
254
+ with pytest.raises(ValueError, match="No locally hosted model was found"):
255
+ generator.warm_up()
256
+
257
+ def test_warm_up_is_idempotent(self, monkeypatch):
258
+ monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
259
+ generator = NvidiaGenerator("meta/llama3-8b-instruct")
260
+ generator.warm_up()
261
+ backend = generator.backend
262
+ generator.warm_up()
263
+ assert generator.backend is backend
264
+
265
+ def test_available_models_without_backend(self, monkeypatch):
266
+ monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
267
+ generator = NvidiaGenerator("meta/llama3-8b-instruct")
268
+ assert generator.available_models == []
269
+
270
+ @pytest.mark.usefixtures("mock_local_models")
271
+ def test_available_models_with_backend(self, monkeypatch):
272
+ monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
273
+ generator = NvidiaGenerator(model="model1", api_url="http://localhost:8080/v1")
274
+ generator.warm_up()
275
+ models = generator.available_models
276
+ assert len(models) == 1
277
+ assert models[0].id == "model1"
278
+
279
+ def test_from_dict(self, monkeypatch):
280
+ monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
281
+ data = {
282
+ "type": "haystack_integrations.components.generators.nvidia.generator.NvidiaGenerator",
283
+ "init_parameters": {
284
+ "api_key": {"env_vars": ["NVIDIA_API_KEY"], "strict": True, "type": "env_var"},
285
+ "api_url": "https://my.url.com/v1",
286
+ "model": "meta/llama3-8b-instruct",
287
+ "model_arguments": {"temperature": 0.5},
288
+ },
289
+ }
290
+ generator = NvidiaGenerator.from_dict(data)
291
+ assert generator._model == "meta/llama3-8b-instruct"
292
+ assert generator.api_url == "https://my.url.com/v1"
293
+ assert generator._model_arguments == {"temperature": 0.5}
294
+
295
+ def test_run(self, monkeypatch, mock_local_chat_completion): # noqa: ARG002
296
+ monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
297
+ generator = NvidiaGenerator(model="model1", api_url="http://localhost:8080/v1")
298
+
299
+ result = generator.run(prompt="What is the answer?")
300
+
301
+ assert result["replies"] == ["Hello!", "How are you?"]
302
+ assert len(result["meta"]) == 2
@@ -233,6 +233,36 @@ class TestNimBackend:
233
233
  timeout=60.0,
234
234
  )
235
235
 
236
+ def test_embed_raises_on_http_error(self, monkeypatch):
237
+ error_response = requests.Response()
238
+ error_response.status_code = 500
239
+ error_response._content = b"server exploded"
240
+ with patch("requests.sessions.Session.post", return_value=error_response):
241
+ monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
242
+ backend = NimBackend(model="nvidia/nv-embedqa-e5-v5", api_url=DEFAULT_API_URL, client="NvidiaTextEmbedder")
243
+ with pytest.raises(ValueError, match="Failed to query embedding endpoint"):
244
+ backend.embed(texts=["a"])
245
+
246
+ def test_generate_raises_on_http_error(self, monkeypatch):
247
+ error_response = requests.Response()
248
+ error_response.status_code = 500
249
+ error_response._content = b"server exploded"
250
+ with patch("requests.sessions.Session.post", return_value=error_response):
251
+ monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
252
+ backend = NimBackend(model="meta/llama3-8b-instruct", api_url=DEFAULT_API_URL, client="NvidiaGenerator")
253
+ with pytest.raises(ValueError, match="Failed to query chat completion endpoint"):
254
+ backend.generate(prompt="hi")
255
+
256
+ def test_models_raises_when_empty(self, monkeypatch):
257
+ empty_response = requests.Response()
258
+ empty_response.status_code = 200
259
+ empty_response._content = json.dumps({"data": []}).encode()
260
+ with patch("requests.sessions.Session.get", return_value=empty_response):
261
+ monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
262
+ backend = NimBackend(model="custom-model", api_url="http://localhost:8000")
263
+ with pytest.raises(ValueError, match="No hosted model were found"):
264
+ backend.models()
265
+
236
266
  def test_rank(self, monkeypatch):
237
267
  with patch("requests.sessions.Session.post", side_effect=mock_rank_post_response) as mock_post:
238
268
  monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
@@ -258,3 +288,27 @@ class TestNimBackend:
258
288
  },
259
289
  timeout=60.0,
260
290
  )
291
+
292
+ def test_rank_raises_on_http_error(self, monkeypatch):
293
+ error_response = requests.Response()
294
+ error_response.status_code = 500
295
+ error_response._content = b"server exploded"
296
+ with patch("requests.sessions.Session.post", return_value=error_response):
297
+ monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
298
+ backend = NimBackend(
299
+ model="nvidia/llama-3.2-nv-rerankqa-1b-v2", api_url=DEFAULT_API_URL, client="NvidiaRanker"
300
+ )
301
+ with pytest.raises(ValueError, match="Failed to rank endpoint"):
302
+ backend.rank(query_text="q", document_texts=["a"])
303
+
304
+ def test_rank_raises_when_rankings_missing(self, monkeypatch):
305
+ response = requests.Response()
306
+ response.status_code = 200
307
+ response._content = json.dumps({"unexpected": "payload"}).encode()
308
+ with patch("requests.sessions.Session.post", return_value=response):
309
+ monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
310
+ backend = NimBackend(
311
+ model="nvidia/llama-3.2-nv-rerankqa-1b-v2", api_url=DEFAULT_API_URL, client="NvidiaRanker"
312
+ )
313
+ with pytest.raises(ValueError, match="Expected 'rankings' in response"):
314
+ backend.rank(query_text="q", document_texts=["a"])
@@ -330,6 +330,20 @@ class TestNvidiaRanker:
330
330
  client.warm_up()
331
331
  assert client.backend.timeout == 45.0
332
332
 
333
+ def test_run_on_empty_list(self, monkeypatch):
334
+ monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
335
+ client = NvidiaRanker()
336
+ client.warm_up()
337
+ assert client.run(query="q", documents=[]) == {"documents": []}
338
+
339
+ def test_run_without_prior_warm_up(self, requests_mock, monkeypatch):
340
+ monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
341
+ requests_mock.post(re.compile(r".*ranking"), json={"rankings": [{"index": 0, "logit": 1.0}]})
342
+ client = NvidiaRanker()
343
+ result = client.run(query="q", documents=[Document(content="doc")])
344
+ assert client._initialized is True
345
+ assert len(result["documents"]) == 1
346
+
333
347
  def test_prepare_texts_to_embed_w_metadata(self):
334
348
  documents = [
335
349
  Document(content=f"document number {i}:\ncontent", meta={"meta_field": f"meta_value {i}"}) for i in range(5)
@@ -183,6 +183,20 @@ class TestNvidiaTextEmbedder:
183
183
  with pytest.raises(ValueError, match="empty string"):
184
184
  embedder.run(text="")
185
185
 
186
+ def test_run_validates_input_without_prior_warm_up(self):
187
+ api_key = Secret.from_token("fake-api-key")
188
+ embedder = NvidiaTextEmbedder("nvidia/nv-embedqa-e5-v5", api_key=api_key)
189
+
190
+ with pytest.raises(TypeError, match="NvidiaTextEmbedder expects a string as an input"):
191
+ embedder.run(text=[1, 2, 3])
192
+
193
+ def test_run_rejects_empty_string_without_prior_warm_up(self):
194
+ api_key = Secret.from_token("fake-api-key")
195
+ embedder = NvidiaTextEmbedder("nvidia/nv-embedqa-e5-v5", api_key=api_key)
196
+
197
+ with pytest.raises(ValueError, match="empty string"):
198
+ embedder.run(text="")
199
+
186
200
  def test_setting_timeout(self, monkeypatch):
187
201
  monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
188
202
  embedder = NvidiaTextEmbedder(timeout=10.0)
@@ -5,7 +5,12 @@
5
5
  import pytest
6
6
 
7
7
  from haystack_integrations.utils.nvidia import Client, is_hosted
8
- from haystack_integrations.utils.nvidia.models import CHAT_MODEL_TABLE, EMBEDDING_MODEL_TABLE, RANKING_MODEL_TABLE
8
+ from haystack_integrations.utils.nvidia.models import (
9
+ CHAT_MODEL_TABLE,
10
+ EMBEDDING_MODEL_TABLE,
11
+ RANKING_MODEL_TABLE,
12
+ Model,
13
+ )
9
14
  from haystack_integrations.utils.nvidia.utils import (
10
15
  determine_model,
11
16
  lookup_model,
@@ -110,3 +115,39 @@ def test_validate_hosted_model_with_client() -> None:
110
115
  model = validate_hosted_model("meta/codellama-70b", Client.NVIDIA_GENERATOR)
111
116
  assert model is not None
112
117
  assert model.client == Client.NVIDIA_GENERATOR
118
+
119
+
120
+ # Model
121
+ def test_model_hash_uses_id() -> None:
122
+ assert hash(Model(id="foo")) == hash("foo")
123
+
124
+
125
+ def test_model_validate_with_enum_client() -> None:
126
+ model = Model(id="foo", model_type="chat", client=Client.NVIDIA_GENERATOR)
127
+ assert model.validate() == hash("foo")
128
+
129
+
130
+ def test_model_validate_with_string_client() -> None:
131
+ model = Model(id="foo", model_type="chat", client="NvidiaGenerator")
132
+ assert model.validate() == hash("foo")
133
+
134
+
135
+ def test_model_validate_raises_on_incompatible_type() -> None:
136
+ model = Model(id="foo", model_type="embedding", client=Client.NVIDIA_GENERATOR)
137
+ with pytest.raises(ValueError, match="not supported by client"):
138
+ model.validate()
139
+
140
+
141
+ def test_model_validate_without_client() -> None:
142
+ model = Model(id="foo", model_type="chat")
143
+ assert model.validate() == hash("foo")
144
+
145
+
146
+ # Client
147
+ def test_client_from_str_invalid() -> None:
148
+ with pytest.raises(ValueError, match="Unknown client"):
149
+ Client.from_str("NotARealClient")
150
+
151
+
152
+ def test_client_str_returns_value() -> None:
153
+ assert str(Client.NVIDIA_GENERATOR) == "NvidiaGenerator"