nvidia-haystack 1.0.0__tar.gz → 1.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/CHANGELOG.md +27 -0
  2. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/PKG-INFO +2 -1
  3. nvidia_haystack-1.1.1/pydoc/config_docusaurus.yml +19 -0
  4. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/pyproject.toml +16 -3
  5. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/components/embedders/nvidia/document_embedder.py +4 -4
  6. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/components/embedders/nvidia/text_embedder.py +8 -8
  7. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/components/embedders/nvidia/truncate.py +2 -1
  8. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/components/generators/nvidia/chat/chat_generator.py +11 -9
  9. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/components/generators/nvidia/generator.py +6 -4
  10. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/components/rankers/nvidia/ranker.py +7 -9
  11. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/components/rankers/nvidia/truncate.py +2 -1
  12. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/utils/nvidia/models.py +2 -1
  13. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/utils/nvidia/nim_backend.py +5 -1
  14. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/utils/nvidia/utils.py +2 -2
  15. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/tests/test_document_embedder.py +7 -0
  16. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/tests/test_generator.py +70 -0
  17. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/tests/test_nim_backend.py +54 -0
  18. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/tests/test_nvidia_chat_generator.py +11 -3
  19. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/tests/test_ranker.py +14 -0
  20. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/tests/test_text_embedder.py +14 -0
  21. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/tests/test_utils.py +42 -1
  22. nvidia_haystack-1.0.0/pydoc/config_docusaurus.yml +0 -34
  23. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/.gitignore +0 -0
  24. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/LICENSE.txt +0 -0
  25. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/README.md +0 -0
  26. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/examples/chat_generator_with_structured_outputs.py +0 -0
  27. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/components/embedders/nvidia/__init__.py +0 -0
  28. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/components/embedders/py.typed +0 -0
  29. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/components/generators/nvidia/__init__.py +0 -0
  30. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/components/generators/nvidia/chat/__init__.py +0 -0
  31. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/components/generators/py.typed +0 -0
  32. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/components/rankers/nvidia/__init__.py +0 -0
  33. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/components/rankers/nvidia/py.typed +0 -0
  34. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/utils/nvidia/__init__.py +0 -0
  35. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/utils/nvidia/client.py +0 -0
  36. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/src/haystack_integrations/utils/py.typed +0 -0
  37. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/tests/__init__.py +0 -0
  38. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/tests/conftest.py +0 -0
  39. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/tests/test_base_url.py +0 -0
  40. {nvidia_haystack-1.0.0 → nvidia_haystack-1.1.1}/tests/test_embedding_truncate_mode.py +0 -0
@@ -1,5 +1,32 @@
1
1
  # Changelog
2
2
 
3
+ ## [integrations/nvidia-v1.1.0] - 2026-03-30
4
+
5
+ ### 🐛 Bug Fixes
6
+
7
+ - Nvidia - fix structured output syntax (#3058)
8
+
9
+ ### 📚 Documentation
10
+
11
+ - *(nvidia)* Remove explicit warm_up from examples (#2843)
12
+ - Simplify pydoc configs (#2855)
13
+
14
+ ### 🧪 Testing
15
+
16
+ - Test compatible integrations with python 3.14; update pyproject (#3001)
17
+
18
+ ### 🧹 Chores
19
+
20
+ - Add ANN ruff ruleset to llama_cpp, llama_stack, mcp, meta_llama, mistral, mongodb_atlas, nvidia, ollama, openrouter, opensearch (#2991)
21
+ - Enforce ruff docstring rules (D102/D103/D205/D209/D213/D417/D419) in integrations 21-30 (#3010)
22
+
23
+ ## [integrations/nvidia-v1.0.0] - 2026-01-13
24
+
25
+ ### 🧹 Chores
26
+
27
+ - [**breaking**] Nvidia - drop Python 3.9 and use X|Y typing; fix default reranking model; improve tests (#2736)
28
+
29
+
3
30
  ## [integrations/nvidia-v0.5.0] - 2026-01-13
4
31
 
5
32
  ### 🚀 Features
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nvidia-haystack
3
- Version: 1.0.0
3
+ Version: 1.1.1
4
4
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/nvidia#readme
5
5
  Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
6
6
  Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/nvidia
@@ -14,6 +14,7 @@ Classifier: Programming Language :: Python :: 3.10
14
14
  Classifier: Programming Language :: Python :: 3.11
15
15
  Classifier: Programming Language :: Python :: 3.12
16
16
  Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Programming Language :: Python :: 3.14
17
18
  Classifier: Programming Language :: Python :: Implementation :: CPython
18
19
  Classifier: Programming Language :: Python :: Implementation :: PyPy
19
20
  Requires-Python: >=3.10
@@ -0,0 +1,19 @@
1
+ loaders:
2
+ - modules:
3
+ - haystack_integrations.components.embedders.nvidia.document_embedder
4
+ - haystack_integrations.components.embedders.nvidia.text_embedder
5
+ - haystack_integrations.components.embedders.nvidia.truncate
6
+ - haystack_integrations.components.generators.nvidia.generator
7
+ - haystack_integrations.components.generators.nvidia.chat.chat_generator
8
+ - haystack_integrations.components.rankers.nvidia.ranker
9
+ - haystack_integrations.components.rankers.nvidia.truncate
10
+ search_path: [../src]
11
+ processors:
12
+ - type: filter
13
+ documented_only: true
14
+ skip_empty_modules: true
15
+ renderer:
16
+ description: Nvidia integration for Haystack
17
+ id: integrations-nvidia
18
+ filename: nvidia.md
19
+ title: Nvidia
@@ -19,6 +19,7 @@ classifiers = [
19
19
  "Programming Language :: Python :: 3.11",
20
20
  "Programming Language :: Python :: 3.12",
21
21
  "Programming Language :: Python :: 3.13",
22
+ "Programming Language :: Python :: 3.14",
22
23
  "Programming Language :: Python :: Implementation :: CPython",
23
24
  "Programming Language :: Python :: Implementation :: PyPy",
24
25
  ]
@@ -45,7 +46,7 @@ installer = "uv"
45
46
  dependencies = ["haystack-pydoc-tools", "ruff"]
46
47
 
47
48
  [tool.hatch.envs.default.scripts]
48
- docs = ["pydoc-markdown pydoc/config_docusaurus.yml"]
49
+ docs = ["haystack-pydoc pydoc/config_docusaurus.yml"]
49
50
  fmt = "ruff check --fix {args}; ruff format {args}"
50
51
  fmt-check = "ruff check {args} && ruff format --check {args}"
51
52
 
@@ -65,7 +66,8 @@ dependencies = [
65
66
  unit = 'pytest -m "not integration" {args:tests}'
66
67
  integration = 'pytest -m "integration" {args:tests}'
67
68
  all = 'pytest {args:tests}'
68
- cov-retry = 'pytest --cov=haystack_integrations --reruns 3 --reruns-delay 30 -x {args:tests}'
69
+ unit-cov-retry = 'pytest --cov=haystack_integrations --reruns 3 --reruns-delay 30 -x -m "not integration" {args:tests}'
70
+ integration-cov-append-retry = 'pytest --cov=haystack_integrations --cov-append --reruns 3 --reruns-delay 30 -x -m "integration" {args:tests}'
69
71
 
70
72
  types = """mypy -p haystack_integrations.components.embedders.nvidia \
71
73
  -p haystack_integrations.components.generators.nvidia \
@@ -86,9 +88,17 @@ line-length = 120
86
88
  [tool.ruff.lint]
87
89
  select = [
88
90
  "A",
91
+ "ANN",
89
92
  "ARG",
90
93
  "B",
91
94
  "C",
95
+ "D102", # Missing docstring in public method
96
+ "D103", # Missing docstring in public function
97
+ "D205", # 1 blank line required between summary line and description
98
+ "D209", # Closing triple quotes go to new line
99
+ "D213", # summary lines must be positioned on the second physical line of the docstring
100
+ "D417", # Missing argument descriptions in the docstring
101
+ "D419", # Docstring is empty
92
102
  "DTZ",
93
103
  "E",
94
104
  "EM",
@@ -126,6 +136,8 @@ ignore = [
126
136
  # Misc
127
137
  "B008",
128
138
  "S101",
139
+ # Allow `Any` - used legitimately for dynamic types and SDK boundaries
140
+ "ANN401",
129
141
  ]
130
142
 
131
143
  [tool.ruff.lint.isort]
@@ -136,11 +148,12 @@ ban-relative-imports = "parents"
136
148
 
137
149
  [tool.ruff.lint.per-file-ignores]
138
150
  # Tests can use magic values, assertions, and relative imports
139
- "tests/**/*" = ["PLR2004", "S101", "TID252"]
151
+ "tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"]
140
152
 
141
153
  [tool.coverage.run]
142
154
  source = ["haystack_integrations"]
143
155
  branch = true
156
+ relative_files = true
144
157
  parallel = false
145
158
 
146
159
 
@@ -20,8 +20,7 @@ logger = logging.getLogger(__name__)
20
20
  @component
21
21
  class NvidiaDocumentEmbedder:
22
22
  """
23
- A component for embedding documents using embedding models provided by
24
- [NVIDIA NIMs](https://ai.nvidia.com).
23
+ A component for embedding documents using embedding models provided by [NVIDIA NIMs](https://ai.nvidia.com).
25
24
 
26
25
  Usage example:
27
26
  ```python
@@ -30,7 +29,7 @@ class NvidiaDocumentEmbedder:
30
29
  doc = Document(content="I love pizza!")
31
30
 
32
31
  text_embedder = NvidiaDocumentEmbedder(model="nvidia/nv-embedqa-e5-v5", api_url="https://integrate.api.nvidia.com/v1")
33
- text_embedder.warm_up()
32
+ # Components warm up automatically on first run.
34
33
 
35
34
  result = document_embedder.run([doc])
36
35
  print(result["documents"][0].embedding)
@@ -107,6 +106,7 @@ class NvidiaDocumentEmbedder:
107
106
 
108
107
  @classmethod
109
108
  def class_name(cls) -> str:
109
+ """Return the class name identifier for serialization."""
110
110
  return "NvidiaDocumentEmbedder"
111
111
 
112
112
  def default_model(self) -> None:
@@ -253,7 +253,7 @@ class NvidiaDocumentEmbedder:
253
253
  if not self._initialized:
254
254
  self.warm_up()
255
255
 
256
- elif not isinstance(documents, list) or (documents and not isinstance(documents[0], Document)):
256
+ if not isinstance(documents, list) or (documents and not isinstance(documents[0], Document)):
257
257
  msg = (
258
258
  "NvidiaDocumentEmbedder expects a list of Documents as input."
259
259
  "In case you want to embed a string, please use the NvidiaTextEmbedder."
@@ -18,8 +18,7 @@ logger = logging.getLogger(__name__)
18
18
  @component
19
19
  class NvidiaTextEmbedder:
20
20
  """
21
- A component for embedding strings using embedding models provided by
22
- [NVIDIA NIMs](https://ai.nvidia.com).
21
+ A component for embedding strings using embedding models provided by [NVIDIA NIMs](https://ai.nvidia.com).
23
22
 
24
23
  For models that differentiate between query and document inputs,
25
24
  this component embeds the input string as a query.
@@ -31,7 +30,7 @@ class NvidiaTextEmbedder:
31
30
  text_to_embed = "I love pizza!"
32
31
 
33
32
  text_embedder = NvidiaTextEmbedder(model="nvidia/nv-embedqa-e5-v5", api_url="https://integrate.api.nvidia.com/v1")
34
- text_embedder.warm_up()
33
+ # Components warm up automatically on first run.
35
34
 
36
35
  print(text_embedder.run(text_to_embed))
37
36
  ```
@@ -46,7 +45,7 @@ class NvidiaTextEmbedder:
46
45
  suffix: str = "",
47
46
  truncate: EmbeddingTruncateMode | str | None = None,
48
47
  timeout: float | None = None,
49
- ):
48
+ ) -> None:
50
49
  """
51
50
  Create a NvidiaTextEmbedder component.
52
51
 
@@ -90,9 +89,10 @@ class NvidiaTextEmbedder:
90
89
 
91
90
  @classmethod
92
91
  def class_name(cls) -> str:
92
+ """Return the class name identifier for serialization."""
93
93
  return "NvidiaTextEmbedder"
94
94
 
95
- def default_model(self):
95
+ def default_model(self) -> None:
96
96
  """Set default model in local NIM mode."""
97
97
  valid_models = [
98
98
  model.id for model in self.available_models if not model.base_model or model.base_model == model.id
@@ -119,7 +119,7 @@ class NvidiaTextEmbedder:
119
119
  error_message = "No locally hosted model was found."
120
120
  raise ValueError(error_message)
121
121
 
122
- def warm_up(self):
122
+ def warm_up(self) -> None:
123
123
  """
124
124
  Initializes the component.
125
125
  """
@@ -205,13 +205,13 @@ class NvidiaTextEmbedder:
205
205
  if not self._initialized:
206
206
  self.warm_up()
207
207
 
208
- elif not isinstance(text, str):
208
+ if not isinstance(text, str):
209
209
  msg = (
210
210
  "NvidiaTextEmbedder expects a string as an input."
211
211
  "In case you want to embed a list of Documents, please use the NvidiaDocumentEmbedder."
212
212
  )
213
213
  raise TypeError(msg)
214
- elif not text:
214
+ if not text:
215
215
  msg = "Cannot embed an empty string."
216
216
  raise ValueError(msg)
217
217
 
@@ -8,6 +8,7 @@ from enum import Enum
8
8
  class EmbeddingTruncateMode(Enum):
9
9
  """
10
10
  Specifies how inputs to the NVIDIA embedding components are truncated.
11
+
11
12
  If START, the input will be truncated from the start.
12
13
  If END, the input will be truncated from the end.
13
14
  If NONE, an error will be returned (if the input is too long).
@@ -17,7 +18,7 @@ class EmbeddingTruncateMode(Enum):
17
18
  END = "END"
18
19
  NONE = "NONE"
19
20
 
20
- def __str__(self):
21
+ def __str__(self) -> str:
21
22
  return self.value
22
23
 
23
24
  @classmethod
@@ -21,6 +21,7 @@ logger = logging.getLogger(__name__)
21
21
  class NvidiaChatGenerator(OpenAIChatGenerator):
22
22
  """
23
23
  Enables text generation using NVIDIA generative models.
24
+
24
25
  For supported models, see [NVIDIA Docs](https://build.nvidia.com/models).
25
26
 
26
27
  Users can pass any text generation parameters valid for the NVIDIA Chat Completion API
@@ -87,21 +88,22 @@ class NvidiaChatGenerator(OpenAIChatGenerator):
87
88
  - `stream`: Whether to stream back partial progress. If set, tokens will be sent as data-only server-sent
88
89
  events as they become available, with the stream terminated by a data: [DONE] message.
89
90
  - `response_format`: For NVIDIA NIM servers, this parameter has limited support.
90
- - The basic JSON mode with `{"type": "json_object"}` is supported by compatible models, to produce
91
+ The basic JSON mode with `{"type": "json_object"}` is supported by compatible models, to produce
91
92
  valid JSON output.
92
- To pass the JSON schema to the model, use the `guided_json` parameter in `extra_body`.
93
- For example:
93
+ To generate structured JSON output, use the `response_format` parameter.
94
+ Example:
94
95
  ```python
95
96
  generation_kwargs={
96
- "extra_body": {
97
- "nvext": {
98
- "guided_json": {
99
- json_schema
100
- }
97
+ "response_format": {
98
+ "type": "json_schema",
99
+ "json_schema": {
100
+ "name": "my_schema",
101
+ "schema": json_schema,
102
+ },
101
103
  }
102
104
  }
103
105
  ```
104
- For more details, see the [NVIDIA NIM documentation](https://docs.nvidia.com/nim/large-language-models/latest/structured-generation.html).
106
+ For more details, see the [NVIDIA NIM documentation](https://docs.nvidia.com/nim/vision-language-models/latest/structured-generation.html).
105
107
  :param tools:
106
108
  A list of tools or a Toolset for which the model can prepare calls. This parameter can accept either a
107
109
  list of `Tool` objects or a `Toolset` instance.
@@ -15,8 +15,9 @@ from haystack_integrations.utils.nvidia import DEFAULT_API_URL, Client, Model, N
15
15
  @component
16
16
  class NvidiaGenerator:
17
17
  """
18
- Generates text using generative models hosted with
19
- [NVIDIA NIM](https://ai.nvidia.com) on the [NVIDIA API Catalog](https://build.nvidia.com/explore/discover).
18
+ Generates text using generative models hosted with [NVIDIA NIM](https://ai.nvidia.com).
19
+
20
+ Available via the [NVIDIA API Catalog](https://build.nvidia.com/explore/discover).
20
21
 
21
22
  ### Usage example
22
23
 
@@ -31,7 +32,7 @@ class NvidiaGenerator:
31
32
  "max_tokens": 1024,
32
33
  },
33
34
  )
34
- generator.warm_up()
35
+ # Components warm up automatically on first run.
35
36
 
36
37
  result = generator.run(prompt="What is the answer?")
37
38
  print(result["replies"])
@@ -88,6 +89,7 @@ class NvidiaGenerator:
88
89
 
89
90
  @classmethod
90
91
  def class_name(cls) -> str:
92
+ """Return the class name identifier for serialization."""
91
93
  return "NvidiaGenerator"
92
94
 
93
95
  def default_model(self) -> None:
@@ -130,7 +132,7 @@ class NvidiaGenerator:
130
132
 
131
133
  if not self.is_hosted and not self._model:
132
134
  if self.backend.model:
133
- self.model = self.backend.model
135
+ self._model = self.backend.model
134
136
  else:
135
137
  self.default_model()
136
138
 
@@ -4,6 +4,7 @@
4
4
 
5
5
  import os
6
6
  import warnings
7
+ from dataclasses import replace
7
8
  from typing import Any
8
9
 
9
10
  from haystack import Document, component, default_from_dict, default_to_dict, logging
@@ -18,8 +19,7 @@ logger = logging.getLogger(__name__)
18
19
  @component
19
20
  class NvidiaRanker:
20
21
  """
21
- A component for ranking documents using ranking models provided by
22
- [NVIDIA NIMs](https://ai.nvidia.com).
22
+ A component for ranking documents using ranking models provided by [NVIDIA NIMs](https://ai.nvidia.com).
23
23
 
24
24
  Usage example:
25
25
  ```python
@@ -31,7 +31,7 @@ class NvidiaRanker:
31
31
  model="nvidia/nv-rerankqa-mistral-4b-v3",
32
32
  api_key=Secret.from_env_var("NVIDIA_API_KEY"),
33
33
  )
34
- ranker.warm_up()
34
+ # Components warm up automatically on first run.
35
35
 
36
36
  query = "What is the capital of Germany?"
37
37
  documents = [
@@ -120,6 +120,7 @@ class NvidiaRanker:
120
120
 
121
121
  @classmethod
122
122
  def class_name(cls) -> str:
123
+ """Return the class name identifier for serialization."""
123
124
  return "NvidiaRanker"
124
125
 
125
126
  def to_dict(self) -> dict[str, Any]:
@@ -236,11 +237,8 @@ class NvidiaRanker:
236
237
 
237
238
  # rank result is list[{index: int, logit: float}] sorted by logit
238
239
  sorted_indexes_and_scores = self.backend.rank(query_text=query_text, document_texts=document_texts)
239
- sorted_documents = []
240
- for item in sorted_indexes_and_scores[:top_k]:
241
- # mutate (don't copy) the document because we're only updating the score
242
- doc = documents[item["index"]]
243
- doc.score = item["logit"]
244
- sorted_documents.append(doc)
240
+ sorted_documents = [
241
+ replace(documents[item["index"]], score=item["logit"]) for item in sorted_indexes_and_scores[:top_k]
242
+ ]
245
243
 
246
244
  return {"documents": sorted_documents}
@@ -8,6 +8,7 @@ from enum import Enum
8
8
  class RankerTruncateMode(str, Enum):
9
9
  """
10
10
  Specifies how inputs to the NVIDIA ranker components are truncated.
11
+
11
12
  If NONE, the input will not be truncated and an error returned instead.
12
13
  If END, the input will be truncated from the end.
13
14
  """
@@ -15,7 +16,7 @@ class RankerTruncateMode(str, Enum):
15
16
  NONE = "NONE"
16
17
  END = "END"
17
18
 
18
- def __str__(self):
19
+ def __str__(self) -> str:
19
20
  return self.value
20
21
 
21
22
  @classmethod
@@ -35,7 +35,8 @@ class Model:
35
35
  def __hash__(self) -> int:
36
36
  return hash(self.id)
37
37
 
38
- def validate(self):
38
+ def validate(self) -> int:
39
+ """Validate the model against the backend and return a sort key."""
39
40
  if self.client:
40
41
  client = self.client if isinstance(self.client, Client) else Client.from_str(self.client)
41
42
  supported = {
@@ -29,7 +29,7 @@ class NimBackend:
29
29
  model_kwargs: dict[str, Any] | None = None,
30
30
  client: str | Client | None = None,
31
31
  timeout: float | None = None,
32
- ):
32
+ ) -> None:
33
33
  headers = {
34
34
  "Content-Type": "application/json",
35
35
  "accept": "application/json",
@@ -74,6 +74,7 @@ class NimBackend:
74
74
  self.timeout = timeout
75
75
 
76
76
  def embed(self, texts: list[str]) -> tuple[list[list[float]], dict[str, Any]]:
77
+ """Compute embeddings for a list of texts via the NIM API."""
77
78
  url = f"{self.api_url}/embeddings"
78
79
 
79
80
  try:
@@ -99,6 +100,7 @@ class NimBackend:
99
100
  return embeddings, {"usage": data["usage"]}
100
101
 
101
102
  def generate(self, prompt: str) -> tuple[list[str], list[dict[str, Any]]]:
103
+ """Generate text completions for a prompt via the NIM API."""
102
104
  # We're using the chat completion endpoint as the NIM API doesn't support
103
105
  # the /completions endpoint. So both the non-chat and chat generator will use this.
104
106
  # This is the same for local containers and the cloud API.
@@ -152,6 +154,7 @@ class NimBackend:
152
154
  return replies, meta
153
155
 
154
156
  def models(self) -> list[Model]:
157
+ """Retrieve available models from the NIM API."""
155
158
  url = f"{self.api_url}/models"
156
159
 
157
160
  res = self.session.get(
@@ -175,6 +178,7 @@ class NimBackend:
175
178
  return models
176
179
 
177
180
  def rank(self, query_text: str, document_texts: list[str]) -> list[dict[str, Any]]:
181
+ """Rank documents by relevance to a query via the NIM API."""
178
182
  url = self.api_url
179
183
 
180
184
  try:
@@ -49,6 +49,7 @@ def is_hosted(api_url: str) -> bool:
49
49
  def lookup_model(name: str) -> Model | None:
50
50
  """
51
51
  Lookup a model by name, using only the table of known models.
52
+
52
53
  The name is either:
53
54
  - directly in the table
54
55
  - an alias in the table
@@ -66,8 +67,7 @@ def lookup_model(name: str) -> Model | None:
66
67
 
67
68
  def determine_model(name: str) -> Model | None:
68
69
  """
69
- Determine the model to use based on a name, using
70
- only the table of known models.
70
+ Determine the model to use based on a name, using only the table of known models.
71
71
 
72
72
  Raise a warning if the model is found to be
73
73
  an alias of a known model.
@@ -354,6 +354,13 @@ class TestNvidiaDocumentEmbedder:
354
354
  with pytest.raises(TypeError, match="NvidiaDocumentEmbedder expects a list of Documents as input"):
355
355
  embedder.run(documents=list_integers_input)
356
356
 
357
+ def test_run_validates_input_without_prior_warm_up(self):
358
+ api_key = Secret.from_token("fake-api-key")
359
+ embedder = NvidiaDocumentEmbedder("nvidia/nv-embedqa-e5-v5", api_key=api_key)
360
+
361
+ with pytest.raises(TypeError, match="NvidiaDocumentEmbedder expects a list of Documents as input"):
362
+ embedder.run(documents="text")
363
+
357
364
  def test_run_empty_document(self, caplog):
358
365
  model = "nvidia/nv-embedqa-e5-v5"
359
366
  api_key = Secret.from_token("fake-api-key")
@@ -230,3 +230,73 @@ class TestNvidiaGenerator:
230
230
  )
231
231
  with pytest.raises(ValueError):
232
232
  generator1.warm_up()
233
+
234
+ @pytest.mark.usefixtures("mock_local_models")
235
+ def test_warm_up_falls_back_to_default_model(self, monkeypatch):
236
+ monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
237
+ generator = NvidiaGenerator(api_url="http://localhost:8080/v1")
238
+
239
+ with pytest.warns(UserWarning, match="Default model is set as:"):
240
+ generator.warm_up()
241
+
242
+ assert generator._model == "model1"
243
+ assert generator.backend.model == "model1"
244
+ assert generator.to_dict()["init_parameters"]["model"] == "model1"
245
+
246
+ def test_default_model_raises_when_no_valid_models(self, monkeypatch, requests_mock):
247
+ monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
248
+ requests_mock.get(
249
+ "http://localhost:8080/v1/models",
250
+ json={"data": [{"id": "derived-model", "object": "model", "root": "base-model"}]},
251
+ )
252
+ generator = NvidiaGenerator(api_url="http://localhost:8080/v1")
253
+
254
+ with pytest.raises(ValueError, match="No locally hosted model was found"):
255
+ generator.warm_up()
256
+
257
+ def test_warm_up_is_idempotent(self, monkeypatch):
258
+ monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
259
+ generator = NvidiaGenerator("meta/llama3-8b-instruct")
260
+ generator.warm_up()
261
+ backend = generator.backend
262
+ generator.warm_up()
263
+ assert generator.backend is backend
264
+
265
+ def test_available_models_without_backend(self, monkeypatch):
266
+ monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
267
+ generator = NvidiaGenerator("meta/llama3-8b-instruct")
268
+ assert generator.available_models == []
269
+
270
+ @pytest.mark.usefixtures("mock_local_models")
271
+ def test_available_models_with_backend(self, monkeypatch):
272
+ monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
273
+ generator = NvidiaGenerator(model="model1", api_url="http://localhost:8080/v1")
274
+ generator.warm_up()
275
+ models = generator.available_models
276
+ assert len(models) == 1
277
+ assert models[0].id == "model1"
278
+
279
+ def test_from_dict(self, monkeypatch):
280
+ monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
281
+ data = {
282
+ "type": "haystack_integrations.components.generators.nvidia.generator.NvidiaGenerator",
283
+ "init_parameters": {
284
+ "api_key": {"env_vars": ["NVIDIA_API_KEY"], "strict": True, "type": "env_var"},
285
+ "api_url": "https://my.url.com/v1",
286
+ "model": "meta/llama3-8b-instruct",
287
+ "model_arguments": {"temperature": 0.5},
288
+ },
289
+ }
290
+ generator = NvidiaGenerator.from_dict(data)
291
+ assert generator._model == "meta/llama3-8b-instruct"
292
+ assert generator.api_url == "https://my.url.com/v1"
293
+ assert generator._model_arguments == {"temperature": 0.5}
294
+
295
+ def test_run(self, monkeypatch, mock_local_chat_completion): # noqa: ARG002
296
+ monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
297
+ generator = NvidiaGenerator(model="model1", api_url="http://localhost:8080/v1")
298
+
299
+ result = generator.run(prompt="What is the answer?")
300
+
301
+ assert result["replies"] == ["Hello!", "How are you?"]
302
+ assert len(result["meta"]) == 2
@@ -233,6 +233,36 @@ class TestNimBackend:
233
233
  timeout=60.0,
234
234
  )
235
235
 
236
+ def test_embed_raises_on_http_error(self, monkeypatch):
237
+ error_response = requests.Response()
238
+ error_response.status_code = 500
239
+ error_response._content = b"server exploded"
240
+ with patch("requests.sessions.Session.post", return_value=error_response):
241
+ monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
242
+ backend = NimBackend(model="nvidia/nv-embedqa-e5-v5", api_url=DEFAULT_API_URL, client="NvidiaTextEmbedder")
243
+ with pytest.raises(ValueError, match="Failed to query embedding endpoint"):
244
+ backend.embed(texts=["a"])
245
+
246
+ def test_generate_raises_on_http_error(self, monkeypatch):
247
+ error_response = requests.Response()
248
+ error_response.status_code = 500
249
+ error_response._content = b"server exploded"
250
+ with patch("requests.sessions.Session.post", return_value=error_response):
251
+ monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
252
+ backend = NimBackend(model="meta/llama3-8b-instruct", api_url=DEFAULT_API_URL, client="NvidiaGenerator")
253
+ with pytest.raises(ValueError, match="Failed to query chat completion endpoint"):
254
+ backend.generate(prompt="hi")
255
+
256
+ def test_models_raises_when_empty(self, monkeypatch):
257
+ empty_response = requests.Response()
258
+ empty_response.status_code = 200
259
+ empty_response._content = json.dumps({"data": []}).encode()
260
+ with patch("requests.sessions.Session.get", return_value=empty_response):
261
+ monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
262
+ backend = NimBackend(model="custom-model", api_url="http://localhost:8000")
263
+ with pytest.raises(ValueError, match="No hosted model were found"):
264
+ backend.models()
265
+
236
266
  def test_rank(self, monkeypatch):
237
267
  with patch("requests.sessions.Session.post", side_effect=mock_rank_post_response) as mock_post:
238
268
  monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
@@ -258,3 +288,27 @@ class TestNimBackend:
258
288
  },
259
289
  timeout=60.0,
260
290
  )
291
+
292
+ def test_rank_raises_on_http_error(self, monkeypatch):
293
+ error_response = requests.Response()
294
+ error_response.status_code = 500
295
+ error_response._content = b"server exploded"
296
+ with patch("requests.sessions.Session.post", return_value=error_response):
297
+ monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
298
+ backend = NimBackend(
299
+ model="nvidia/llama-3.2-nv-rerankqa-1b-v2", api_url=DEFAULT_API_URL, client="NvidiaRanker"
300
+ )
301
+ with pytest.raises(ValueError, match="Failed to rank endpoint"):
302
+ backend.rank(query_text="q", document_texts=["a"])
303
+
304
+ def test_rank_raises_when_rankings_missing(self, monkeypatch):
305
+ response = requests.Response()
306
+ response.status_code = 200
307
+ response._content = json.dumps({"unexpected": "payload"}).encode()
308
+ with patch("requests.sessions.Session.post", return_value=response):
309
+ monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
310
+ backend = NimBackend(
311
+ model="nvidia/llama-3.2-nv-rerankqa-1b-v2", api_url=DEFAULT_API_URL, client="NvidiaRanker"
312
+ )
313
+ with pytest.raises(ValueError, match="Expected 'rankings' in response"):
314
+ backend.rank(query_text="q", document_texts=["a"])
@@ -278,7 +278,7 @@ class TestNvidiaChatGenerator:
278
278
  reason="Export an env var called NVIDIA_API_KEY containing the NVIDIA API key to run this test.",
279
279
  )
280
280
  @pytest.mark.integration
281
- def test_live_run_with_guided_json_schema(self):
281
+ def test_live_run_with_json_schema(self):
282
282
  json_schema = {
283
283
  "type": "object",
284
284
  "properties": {"title": {"type": "string"}, "rating": {"type": "number"}},
@@ -295,14 +295,22 @@ class TestNvidiaChatGenerator:
295
295
 
296
296
  component = NvidiaChatGenerator(
297
297
  model="meta/llama-3.1-70b-instruct",
298
- generation_kwargs={"extra_body": {"nvext": {"guided_json": json_schema}}},
298
+ generation_kwargs={
299
+ "response_format": {
300
+ "type": "json_schema",
301
+ "json_schema": {
302
+ "name": "movie_review",
303
+ "schema": json_schema,
304
+ },
305
+ },
306
+ },
299
307
  )
300
308
 
301
309
  results = component.run(chat_messages)
302
310
  assert len(results["replies"]) == 1
303
311
  message = results["replies"][0].text
304
312
  output = json.loads(message)
305
- assert output["title"] == "Inception"
313
+ assert "Inception" in output["title"]
306
314
  assert "rating" in output
307
315
 
308
316
  @pytest.mark.skipif(
@@ -330,6 +330,20 @@ class TestNvidiaRanker:
330
330
  client.warm_up()
331
331
  assert client.backend.timeout == 45.0
332
332
 
333
+ def test_run_on_empty_list(self, monkeypatch):
334
+ monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
335
+ client = NvidiaRanker()
336
+ client.warm_up()
337
+ assert client.run(query="q", documents=[]) == {"documents": []}
338
+
339
+ def test_run_without_prior_warm_up(self, requests_mock, monkeypatch):
340
+ monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
341
+ requests_mock.post(re.compile(r".*ranking"), json={"rankings": [{"index": 0, "logit": 1.0}]})
342
+ client = NvidiaRanker()
343
+ result = client.run(query="q", documents=[Document(content="doc")])
344
+ assert client._initialized is True
345
+ assert len(result["documents"]) == 1
346
+
333
347
  def test_prepare_texts_to_embed_w_metadata(self):
334
348
  documents = [
335
349
  Document(content=f"document number {i}:\ncontent", meta={"meta_field": f"meta_value {i}"}) for i in range(5)
@@ -183,6 +183,20 @@ class TestNvidiaTextEmbedder:
183
183
  with pytest.raises(ValueError, match="empty string"):
184
184
  embedder.run(text="")
185
185
 
186
+ def test_run_validates_input_without_prior_warm_up(self):
187
+ api_key = Secret.from_token("fake-api-key")
188
+ embedder = NvidiaTextEmbedder("nvidia/nv-embedqa-e5-v5", api_key=api_key)
189
+
190
+ with pytest.raises(TypeError, match="NvidiaTextEmbedder expects a string as an input"):
191
+ embedder.run(text=[1, 2, 3])
192
+
193
+ def test_run_rejects_empty_string_without_prior_warm_up(self):
194
+ api_key = Secret.from_token("fake-api-key")
195
+ embedder = NvidiaTextEmbedder("nvidia/nv-embedqa-e5-v5", api_key=api_key)
196
+
197
+ with pytest.raises(ValueError, match="empty string"):
198
+ embedder.run(text="")
199
+
186
200
  def test_setting_timeout(self, monkeypatch):
187
201
  monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
188
202
  embedder = NvidiaTextEmbedder(timeout=10.0)
@@ -5,7 +5,12 @@
5
5
  import pytest
6
6
 
7
7
  from haystack_integrations.utils.nvidia import Client, is_hosted
8
- from haystack_integrations.utils.nvidia.models import CHAT_MODEL_TABLE, EMBEDDING_MODEL_TABLE, RANKING_MODEL_TABLE
8
+ from haystack_integrations.utils.nvidia.models import (
9
+ CHAT_MODEL_TABLE,
10
+ EMBEDDING_MODEL_TABLE,
11
+ RANKING_MODEL_TABLE,
12
+ Model,
13
+ )
9
14
  from haystack_integrations.utils.nvidia.utils import (
10
15
  determine_model,
11
16
  lookup_model,
@@ -110,3 +115,39 @@ def test_validate_hosted_model_with_client() -> None:
110
115
  model = validate_hosted_model("meta/codellama-70b", Client.NVIDIA_GENERATOR)
111
116
  assert model is not None
112
117
  assert model.client == Client.NVIDIA_GENERATOR
118
+
119
+
120
+ # Model
121
+ def test_model_hash_uses_id() -> None:
122
+ assert hash(Model(id="foo")) == hash("foo")
123
+
124
+
125
+ def test_model_validate_with_enum_client() -> None:
126
+ model = Model(id="foo", model_type="chat", client=Client.NVIDIA_GENERATOR)
127
+ assert model.validate() == hash("foo")
128
+
129
+
130
+ def test_model_validate_with_string_client() -> None:
131
+ model = Model(id="foo", model_type="chat", client="NvidiaGenerator")
132
+ assert model.validate() == hash("foo")
133
+
134
+
135
+ def test_model_validate_raises_on_incompatible_type() -> None:
136
+ model = Model(id="foo", model_type="embedding", client=Client.NVIDIA_GENERATOR)
137
+ with pytest.raises(ValueError, match="not supported by client"):
138
+ model.validate()
139
+
140
+
141
+ def test_model_validate_without_client() -> None:
142
+ model = Model(id="foo", model_type="chat")
143
+ assert model.validate() == hash("foo")
144
+
145
+
146
+ # Client
147
+ def test_client_from_str_invalid() -> None:
148
+ with pytest.raises(ValueError, match="Unknown client"):
149
+ Client.from_str("NotARealClient")
150
+
151
+
152
+ def test_client_str_returns_value() -> None:
153
+ assert str(Client.NVIDIA_GENERATOR) == "NvidiaGenerator"
@@ -1,34 +0,0 @@
1
- loaders:
2
- - ignore_when_discovered:
3
- - __init__
4
- modules:
5
- - haystack_integrations.components.embedders.nvidia.document_embedder
6
- - haystack_integrations.components.embedders.nvidia.text_embedder
7
- - haystack_integrations.components.embedders.nvidia.truncate
8
- - haystack_integrations.components.generators.nvidia.generator
9
- - haystack_integrations.components.generators.nvidia.chat.chat_generator
10
- - haystack_integrations.components.rankers.nvidia.ranker
11
- - haystack_integrations.components.rankers.nvidia.truncate
12
- search_path:
13
- - ../src
14
- type: haystack_pydoc_tools.loaders.CustomPythonLoader
15
- processors:
16
- - do_not_filter_modules: false
17
- documented_only: true
18
- expression: null
19
- skip_empty_modules: true
20
- type: filter
21
- - type: smart
22
- - type: crossref
23
- renderer:
24
- description: Nvidia integration for Haystack
25
- id: integrations-nvidia
26
- markdown:
27
- add_member_class_prefix: false
28
- add_method_class_prefix: true
29
- classdef_code_block: false
30
- descriptive_class_title: false
31
- descriptive_module_title: true
32
- filename: nvidia.md
33
- title: Nvidia
34
- type: haystack_pydoc_tools.renderers.DocusaurusRenderer