nvidia-haystack 0.0.3__tar.gz → 0.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {nvidia_haystack-0.0.3 → nvidia_haystack-0.0.4}/.gitignore +9 -0
  2. {nvidia_haystack-0.0.3 → nvidia_haystack-0.0.4}/PKG-INFO +1 -1
  3. {nvidia_haystack-0.0.3 → nvidia_haystack-0.0.4}/pyproject.toml +4 -4
  4. {nvidia_haystack-0.0.3 → nvidia_haystack-0.0.4}/src/haystack_integrations/components/embedders/nvidia/document_embedder.py +15 -24
  5. {nvidia_haystack-0.0.3 → nvidia_haystack-0.0.4}/src/haystack_integrations/components/embedders/nvidia/text_embedder.py +15 -24
  6. {nvidia_haystack-0.0.3 → nvidia_haystack-0.0.4}/src/haystack_integrations/components/generators/nvidia/generator.py +18 -21
  7. {nvidia_haystack-0.0.3 → nvidia_haystack-0.0.4}/tests/test_document_embedder.py +31 -87
  8. {nvidia_haystack-0.0.3 → nvidia_haystack-0.0.4}/tests/test_generator.py +28 -91
  9. {nvidia_haystack-0.0.3 → nvidia_haystack-0.0.4}/tests/test_text_embedder.py +30 -50
  10. nvidia_haystack-0.0.3/src/haystack_integrations/components/embedders/nvidia/_nvcf_backend.py +0 -111
  11. nvidia_haystack-0.0.3/src/haystack_integrations/components/generators/nvidia/_nvcf_backend.py +0 -119
  12. nvidia_haystack-0.0.3/src/haystack_integrations/components/generators/nvidia/_schema.py +0 -69
  13. nvidia_haystack-0.0.3/src/haystack_integrations/utils/nvidia/__init__.py +0 -3
  14. nvidia_haystack-0.0.3/src/haystack_integrations/utils/nvidia/client.py +0 -82
  15. {nvidia_haystack-0.0.3 → nvidia_haystack-0.0.4}/LICENSE.txt +0 -0
  16. {nvidia_haystack-0.0.3 → nvidia_haystack-0.0.4}/README.md +0 -0
  17. {nvidia_haystack-0.0.3 → nvidia_haystack-0.0.4}/pydoc/config.yml +0 -0
  18. {nvidia_haystack-0.0.3 → nvidia_haystack-0.0.4}/src/haystack_integrations/components/embedders/nvidia/__init__.py +0 -0
  19. {nvidia_haystack-0.0.3 → nvidia_haystack-0.0.4}/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py +0 -0
  20. {nvidia_haystack-0.0.3 → nvidia_haystack-0.0.4}/src/haystack_integrations/components/embedders/nvidia/backend.py +0 -0
  21. {nvidia_haystack-0.0.3 → nvidia_haystack-0.0.4}/src/haystack_integrations/components/embedders/nvidia/truncate.py +0 -0
  22. {nvidia_haystack-0.0.3 → nvidia_haystack-0.0.4}/src/haystack_integrations/components/generators/nvidia/__init__.py +0 -0
  23. {nvidia_haystack-0.0.3 → nvidia_haystack-0.0.4}/src/haystack_integrations/components/generators/nvidia/_nim_backend.py +0 -0
  24. {nvidia_haystack-0.0.3 → nvidia_haystack-0.0.4}/src/haystack_integrations/components/generators/nvidia/backend.py +0 -0
  25. {nvidia_haystack-0.0.3 → nvidia_haystack-0.0.4}/src/haystack_integrations/components/generators/nvidia/chat/__init__.py +0 -0
  26. {nvidia_haystack-0.0.3 → nvidia_haystack-0.0.4}/tests/__init__.py +0 -0
@@ -135,3 +135,12 @@ dmypy.json
135
135
  # Docs generation artifacts
136
136
  _readme_*.md
137
137
  .idea
138
+
139
+ # macOS
140
+ .DS_Store
141
+
142
+ # http cache (requests-cache)
143
+ **/http_cache.sqlite
144
+
145
+ # ruff
146
+ .ruff_cache
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: nvidia-haystack
3
- Version: 0.0.3
3
+ Version: 0.0.4
4
4
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/nvidia#readme
5
5
  Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
6
6
  Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/nvidia
@@ -42,10 +42,10 @@ root = "../.."
42
42
  git_describe_command = 'git describe --tags --match="integrations/nvidia-v[0-9]*"'
43
43
 
44
44
  [tool.hatch.envs.default]
45
- dependencies = ["coverage[toml]>=6.5", "pytest", "haystack-pydoc-tools"]
45
+ dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools"]
46
46
  [tool.hatch.envs.default.scripts]
47
- test = "pytest {args:tests}"
48
- test-cov = "coverage run -m pytest {args:tests}"
47
+ test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
48
+ test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
49
49
  cov-report = ["- coverage combine", "coverage report"]
50
50
  cov = ["test-cov", "cov-report"]
51
51
  docs = ["pydoc-markdown pydoc/config.yml"]
@@ -58,7 +58,7 @@ detached = true
58
58
  dependencies = ["black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"]
59
59
  [tool.hatch.envs.lint.scripts]
60
60
  typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}"
61
- style = ["ruff {args:.}", "black --check --diff {args:.}"]
61
+ style = ["ruff check {args:.}", "black --check --diff {args:.}"]
62
62
  fmt = ["black {args:.}", "ruff --fix {args:.}", "style"]
63
63
  all = ["style", "typing"]
64
64
 
@@ -5,7 +5,6 @@ from haystack.utils import Secret, deserialize_secrets_inplace
5
5
  from tqdm import tqdm
6
6
 
7
7
  from ._nim_backend import NimBackend
8
- from ._nvcf_backend import NvcfBackend
9
8
  from .backend import EmbedderBackend
10
9
  from .truncate import EmbeddingTruncateMode
11
10
 
@@ -14,8 +13,7 @@ from .truncate import EmbeddingTruncateMode
14
13
  class NvidiaDocumentEmbedder:
15
14
  """
16
15
  A component for embedding documents using embedding models provided by
17
- [NVIDIA AI Foundation Endpoints](https://www.nvidia.com/en-us/ai-data-science/foundation-models/)
18
- and NVIDIA Inference Microservices.
16
+ [NVIDIA NIMs](https://ai.nvidia.com).
19
17
 
20
18
  Usage example:
21
19
  ```python
@@ -23,7 +21,7 @@ class NvidiaDocumentEmbedder:
23
21
 
24
22
  doc = Document(content="I love pizza!")
25
23
 
26
- text_embedder = NvidiaDocumentEmbedder(model="nvolveqa_40k")
24
+ text_embedder = NvidiaDocumentEmbedder(model="NV-Embed-QA", api_url="https://ai.api.nvidia.com/v1/retrieval/nvidia")
27
25
  text_embedder.warm_up()
28
26
 
29
27
  result = document_embedder.run([doc])
@@ -33,9 +31,9 @@ class NvidiaDocumentEmbedder:
33
31
 
34
32
  def __init__(
35
33
  self,
36
- model: str,
34
+ model: str = "NV-Embed-QA",
37
35
  api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"),
38
- api_url: Optional[str] = None,
36
+ api_url: str = "https://ai.api.nvidia.com/v1/retrieval/nvidia",
39
37
  prefix: str = "",
40
38
  suffix: str = "",
41
39
  batch_size: int = 32,
@@ -50,9 +48,9 @@ class NvidiaDocumentEmbedder:
50
48
  :param model:
51
49
  Embedding model to use.
52
50
  :param api_key:
53
- API key for the NVIDIA AI Foundation Endpoints.
51
+ API key for the NVIDIA NIM.
54
52
  :param api_url:
55
- Custom API URL for the NVIDIA Inference Microservices.
53
+ Custom API URL for the NVIDIA NIM.
56
54
  :param prefix:
57
55
  A string to add to the beginning of each text.
58
56
  :param suffix:
@@ -95,22 +93,15 @@ class NvidiaDocumentEmbedder:
95
93
  if self._initialized:
96
94
  return
97
95
 
98
- if self.api_url is None:
99
- if self.api_key is None:
100
- msg = "API key is required for NVIDIA AI Foundation Endpoints."
101
- raise ValueError(msg)
102
-
103
- self.backend = NvcfBackend(self.model, api_key=self.api_key, model_kwargs={"model": "passage"})
104
- else:
105
- model_kwargs = {"input_type": "passage"}
106
- if self.truncate is not None:
107
- model_kwargs["truncate"] = str(self.truncate)
108
- self.backend = NimBackend(
109
- self.model,
110
- api_url=self.api_url,
111
- api_key=self.api_key,
112
- model_kwargs=model_kwargs,
113
- )
96
+ model_kwargs = {"input_type": "passage"}
97
+ if self.truncate is not None:
98
+ model_kwargs["truncate"] = str(self.truncate)
99
+ self.backend = NimBackend(
100
+ self.model,
101
+ api_url=self.api_url,
102
+ api_key=self.api_key,
103
+ model_kwargs=model_kwargs,
104
+ )
114
105
 
115
106
  self._initialized = True
116
107
 
@@ -4,7 +4,6 @@ from haystack import component, default_from_dict, default_to_dict
4
4
  from haystack.utils import Secret, deserialize_secrets_inplace
5
5
 
6
6
  from ._nim_backend import NimBackend
7
- from ._nvcf_backend import NvcfBackend
8
7
  from .backend import EmbedderBackend
9
8
  from .truncate import EmbeddingTruncateMode
10
9
 
@@ -13,8 +12,7 @@ from .truncate import EmbeddingTruncateMode
13
12
  class NvidiaTextEmbedder:
14
13
  """
15
14
  A component for embedding strings using embedding models provided by
16
- [NVIDIA AI Foundation Endpoints](https://www.nvidia.com/en-us/ai-data-science/foundation-models/)
17
- and NVIDIA Inference Microservices.
15
+ [NVIDIA NIMs](https://ai.nvidia.com).
18
16
 
19
17
  For models that differentiate between query and document inputs,
20
18
  this component embeds the input string as a query.
@@ -25,7 +23,7 @@ class NvidiaTextEmbedder:
25
23
 
26
24
  text_to_embed = "I love pizza!"
27
25
 
28
- text_embedder = NvidiaTextEmbedder(model="nvolveqa_40k")
26
+ text_embedder = NvidiaTextEmbedder(model="NV-Embed-QA", api_url="https://ai.api.nvidia.com/v1/retrieval/nvidia")
29
27
  text_embedder.warm_up()
30
28
 
31
29
  print(text_embedder.run(text_to_embed))
@@ -34,9 +32,9 @@ class NvidiaTextEmbedder:
34
32
 
35
33
  def __init__(
36
34
  self,
37
- model: str,
35
+ model: str = "NV-Embed-QA",
38
36
  api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"),
39
- api_url: Optional[str] = None,
37
+ api_url: str = "https://ai.api.nvidia.com/v1/retrieval/nvidia",
40
38
  prefix: str = "",
41
39
  suffix: str = "",
42
40
  truncate: Optional[Union[EmbeddingTruncateMode, str]] = None,
@@ -47,9 +45,9 @@ class NvidiaTextEmbedder:
47
45
  :param model:
48
46
  Embedding model to use.
49
47
  :param api_key:
50
- API key for the NVIDIA AI Foundation Endpoints.
48
+ API key for the NVIDIA NIM.
51
49
  :param api_url:
52
- Custom API URL for the NVIDIA Inference Microservices.
50
+ Custom API URL for the NVIDIA NIM.
53
51
  :param prefix:
54
52
  A string to add to the beginning of each text.
55
53
  :param suffix:
@@ -79,22 +77,15 @@ class NvidiaTextEmbedder:
79
77
  if self._initialized:
80
78
  return
81
79
 
82
- if self.api_url is None:
83
- if self.api_key is None:
84
- msg = "API key is required for NVIDIA AI Foundation Endpoints."
85
- raise ValueError(msg)
86
-
87
- self.backend = NvcfBackend(self.model, api_key=self.api_key, model_kwargs={"model": "query"})
88
- else:
89
- model_kwargs = {"input_type": "query"}
90
- if self.truncate is not None:
91
- model_kwargs["truncate"] = str(self.truncate)
92
- self.backend = NimBackend(
93
- self.model,
94
- api_url=self.api_url,
95
- api_key=self.api_key,
96
- model_kwargs=model_kwargs,
97
- )
80
+ model_kwargs = {"input_type": "query"}
81
+ if self.truncate is not None:
82
+ model_kwargs["truncate"] = str(self.truncate)
83
+ self.backend = NimBackend(
84
+ self.model,
85
+ api_url=self.api_url,
86
+ api_key=self.api_key,
87
+ model_kwargs=model_kwargs,
88
+ )
98
89
 
99
90
  self._initialized = True
100
91
 
@@ -7,23 +7,23 @@ from haystack import component, default_from_dict, default_to_dict
7
7
  from haystack.utils.auth import Secret, deserialize_secrets_inplace
8
8
 
9
9
  from ._nim_backend import NimBackend
10
- from ._nvcf_backend import NvcfBackend
11
10
  from .backend import GeneratorBackend
12
11
 
12
+ _DEFAULT_API_URL = "https://integrate.api.nvidia.com/v1"
13
+
13
14
 
14
15
  @component
15
16
  class NvidiaGenerator:
16
17
  """
17
18
  A component for generating text using generative models provided by
18
- [NVIDIA AI Foundation Endpoints](https://www.nvidia.com/en-us/ai-data-science/foundation-models/)
19
- and NVIDIA Inference Microservices.
19
+ [NVIDIA NIMs](https://ai.nvidia.com).
20
20
 
21
21
  Usage example:
22
22
  ```python
23
23
  from haystack_integrations.components.generators.nvidia import NvidiaGenerator
24
24
 
25
25
  generator = NvidiaGenerator(
26
- model="nv_llama2_rlhf_70b",
26
+ model="meta/llama3-70b-instruct",
27
27
  model_arguments={
28
28
  "temperature": 0.2,
29
29
  "top_p": 0.7,
@@ -42,7 +42,7 @@ class NvidiaGenerator:
42
42
  def __init__(
43
43
  self,
44
44
  model: str,
45
- api_url: Optional[str] = None,
45
+ api_url: str = _DEFAULT_API_URL,
46
46
  api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"),
47
47
  model_arguments: Optional[Dict[str, Any]] = None,
48
48
  ):
@@ -51,15 +51,15 @@ class NvidiaGenerator:
51
51
 
52
52
  :param model:
53
53
  Name of the model to use for text generation.
54
- See the [Nvidia catalog](https://catalog.ngc.nvidia.com/ai-foundation-models)
54
+ See the [NVIDIA NIMs](https://ai.nvidia.com)
55
55
  for more information on the supported models.
56
56
  :param api_key:
57
- API key for the NVIDIA AI Foundation Endpoints.
57
+ API key for the NVIDIA NIM.
58
58
  :param api_url:
59
- Custom API URL for the NVIDIA Inference Microservices.
59
+ Custom API URL for the NVIDIA NIM.
60
60
  :param model_arguments:
61
61
  Additional arguments to pass to the model provider. Different models accept different arguments.
62
- Search your model in the [Nvidia catalog](https://catalog.ngc.nvidia.com/ai-foundation-models)
62
+ Search your model in the [NVIDIA NIMs](https://ai.nvidia.com)
63
63
  to know the supported arguments.
64
64
  """
65
65
  self._model = model
@@ -76,18 +76,15 @@ class NvidiaGenerator:
76
76
  if self._backend is not None:
77
77
  return
78
78
 
79
- if self._api_url is None:
80
- if self._api_key is None:
81
- msg = "API key is required for NVIDIA AI Foundation Endpoints."
82
- raise ValueError(msg)
83
- self._backend = NvcfBackend(self._model, api_key=self._api_key, model_kwargs=self._model_arguments)
84
- else:
85
- self._backend = NimBackend(
86
- self._model,
87
- api_url=self._api_url,
88
- api_key=self._api_key,
89
- model_kwargs=self._model_arguments,
90
- )
79
+ if self._api_url == _DEFAULT_API_URL and self._api_key is None:
80
+ msg = "API key is required for hosted NVIDIA NIMs."
81
+ raise ValueError(msg)
82
+ self._backend = NimBackend(
83
+ self._model,
84
+ api_url=self._api_url,
85
+ api_key=self._api_key,
86
+ model_kwargs=self._model_arguments,
87
+ )
91
88
 
92
89
  def to_dict(self) -> Dict[str, Any]:
93
90
  """
@@ -1,19 +1,30 @@
1
1
  import os
2
- from unittest.mock import Mock, patch
3
2
 
4
3
  import pytest
5
4
  from haystack import Document
6
5
  from haystack.utils import Secret
7
6
  from haystack_integrations.components.embedders.nvidia import EmbeddingTruncateMode, NvidiaDocumentEmbedder
7
+ from haystack_integrations.components.embedders.nvidia.backend import EmbedderBackend
8
+
9
+
10
+ class MockBackend(EmbedderBackend):
11
+ def __init__(self, model, model_kwargs):
12
+ super().__init__(model, model_kwargs)
13
+
14
+ def embed(self, texts):
15
+ inputs = texts
16
+ data = [[0.1, 0.2, 0.3] for i in range(len(inputs))]
17
+ return data, {"usage": {"total_tokens": 4, "prompt_tokens": 4}}
8
18
 
9
19
 
10
20
  class TestNvidiaDocumentEmbedder:
11
21
  def test_init_default(self, monkeypatch):
12
22
  monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
13
- embedder = NvidiaDocumentEmbedder("nvolveqa_40k")
23
+ embedder = NvidiaDocumentEmbedder()
14
24
 
15
25
  assert embedder.api_key == Secret.from_env_var("NVIDIA_API_KEY")
16
- assert embedder.model == "nvolveqa_40k"
26
+ assert embedder.model == "NV-Embed-QA"
27
+ assert embedder.api_url == "https://ai.api.nvidia.com/v1/retrieval/nvidia"
17
28
  assert embedder.prefix == ""
18
29
  assert embedder.suffix == ""
19
30
  assert embedder.batch_size == 32
@@ -25,6 +36,7 @@ class TestNvidiaDocumentEmbedder:
25
36
  embedder = NvidiaDocumentEmbedder(
26
37
  api_key=Secret.from_token("fake-api-key"),
27
38
  model="nvolveqa_40k",
39
+ api_url="https://ai.api.nvidia.com/v1/retrieval/nvidia/test",
28
40
  prefix="prefix",
29
41
  suffix="suffix",
30
42
  batch_size=30,
@@ -35,6 +47,7 @@ class TestNvidiaDocumentEmbedder:
35
47
 
36
48
  assert embedder.api_key == Secret.from_token("fake-api-key")
37
49
  assert embedder.model == "nvolveqa_40k"
50
+ assert embedder.api_url == "https://ai.api.nvidia.com/v1/retrieval/nvidia/test"
38
51
  assert embedder.prefix == "prefix"
39
52
  assert embedder.suffix == "suffix"
40
53
  assert embedder.batch_size == 30
@@ -56,7 +69,7 @@ class TestNvidiaDocumentEmbedder:
56
69
  "type": "haystack_integrations.components.embedders.nvidia.document_embedder.NvidiaDocumentEmbedder",
57
70
  "init_parameters": {
58
71
  "api_key": {"env_vars": ["NVIDIA_API_KEY"], "strict": True, "type": "env_var"},
59
- "api_url": None,
72
+ "api_url": "https://ai.api.nvidia.com/v1/retrieval/nvidia",
60
73
  "model": "playground_nvolveqa_40k",
61
74
  "prefix": "",
62
75
  "suffix": "",
@@ -117,7 +130,7 @@ class TestNvidiaDocumentEmbedder:
117
130
  }
118
131
  component = NvidiaDocumentEmbedder.from_dict(data)
119
132
  assert component.model == "nvolveqa_40k"
120
- assert component.api_url is None
133
+ assert component.api_url == "https://example.com"
121
134
  assert component.prefix == "prefix"
122
135
  assert component.suffix == "suffix"
123
136
  assert component.batch_size == 32
@@ -169,8 +182,7 @@ class TestNvidiaDocumentEmbedder:
169
182
  "my_prefix document number 4 my_suffix",
170
183
  ]
171
184
 
172
- @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient")
173
- def test_embed_batch(self, mock_client_class):
185
+ def test_embed_batch(self):
174
186
  texts = ["text 1", "text 2", "text 3", "text 4", "text 5"]
175
187
 
176
188
  embedder = NvidiaDocumentEmbedder(
@@ -178,17 +190,8 @@ class TestNvidiaDocumentEmbedder:
178
190
  api_key=Secret.from_token("fake-api-key"),
179
191
  )
180
192
 
181
- def mock_query_function(_, payload):
182
- inputs = payload["input"]
183
- data = [{"index": i, "embedding": [0.1, 0.2, 0.3]} for i in range(len(inputs))]
184
- return {"data": data, "usage": {"total_tokens": 4, "prompt_tokens": 4}}
185
-
186
- mock_client = Mock(
187
- get_model_nvcf_id=Mock(return_value="some_id"),
188
- query_function=mock_query_function,
189
- )
190
- mock_client_class.return_value = mock_client
191
193
  embedder.warm_up()
194
+ embedder.backend = MockBackend("aa", None)
192
195
 
193
196
  embeddings, metadata = embedder._embed_batch(texts_to_embed=texts, batch_size=2)
194
197
 
@@ -201,8 +204,7 @@ class TestNvidiaDocumentEmbedder:
201
204
 
202
205
  assert metadata == {"usage": {"prompt_tokens": 3 * 4, "total_tokens": 3 * 4}}
203
206
 
204
- @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient")
205
- def test_run(self, mock_client_class):
207
+ def test_run(self):
206
208
  docs = [
207
209
  Document(content="I love cheese", meta={"topic": "Cuisine"}),
208
210
  Document(content="A transformer is a deep learning architecture", meta={"topic": "ML"}),
@@ -218,17 +220,8 @@ class TestNvidiaDocumentEmbedder:
218
220
  embedding_separator=" | ",
219
221
  )
220
222
 
221
- def mock_query_function(_, payload):
222
- inputs = payload["input"]
223
- data = [{"index": i, "embedding": [0.1, 0.2, 0.3]} for i in range(len(inputs))]
224
- return {"data": data, "usage": {"total_tokens": 4, "prompt_tokens": 4}}
225
-
226
- mock_client = Mock(
227
- get_model_nvcf_id=Mock(return_value="some_id"),
228
- query_function=mock_query_function,
229
- )
230
- mock_client_class.return_value = mock_client
231
223
  embedder.warm_up()
224
+ embedder.backend = MockBackend("aa", None)
232
225
 
233
226
  result = embedder.run(documents=docs)
234
227
 
@@ -244,8 +237,7 @@ class TestNvidiaDocumentEmbedder:
244
237
  assert all(isinstance(x, float) for x in doc.embedding)
245
238
  assert metadata == {"usage": {"prompt_tokens": 4, "total_tokens": 4}}
246
239
 
247
- @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient")
248
- def test_run_custom_batch_size(self, mock_client_class):
240
+ def test_run_custom_batch_size(self):
249
241
  docs = [
250
242
  Document(content="I love cheese", meta={"topic": "Cuisine"}),
251
243
  Document(content="A transformer is a deep learning architecture", meta={"topic": "ML"}),
@@ -261,17 +253,8 @@ class TestNvidiaDocumentEmbedder:
261
253
  batch_size=1,
262
254
  )
263
255
 
264
- def mock_query_function(_, payload):
265
- inputs = payload["input"]
266
- data = [{"index": i, "embedding": [0.1, 0.2, 0.3]} for i in range(len(inputs))]
267
- return {"data": data, "usage": {"total_tokens": 4, "prompt_tokens": 4}}
268
-
269
- mock_client = Mock(
270
- get_model_nvcf_id=Mock(return_value="some_id"),
271
- query_function=mock_query_function,
272
- )
273
- mock_client_class.return_value = mock_client
274
256
  embedder.warm_up()
257
+ embedder.backend = MockBackend("aa", None)
275
258
 
276
259
  result = embedder.run(documents=docs)
277
260
 
@@ -288,21 +271,11 @@ class TestNvidiaDocumentEmbedder:
288
271
 
289
272
  assert metadata == {"usage": {"prompt_tokens": 2 * 4, "total_tokens": 2 * 4}}
290
273
 
291
- @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient")
292
- def test_run_wrong_input_format(self, mock_client_class):
274
+ def test_run_wrong_input_format(self):
293
275
  embedder = NvidiaDocumentEmbedder("playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key"))
294
276
 
295
- def mock_query_function(_, payload):
296
- inputs = payload["input"]
297
- data = [{"index": i, "embedding": [0.1, 0.2, 0.3]} for i in range(len(inputs))]
298
- return {"data": data, "usage": {"total_tokens": 4, "prompt_tokens": 4}}
299
-
300
- mock_client = Mock(
301
- get_model_nvcf_id=Mock(return_value="some_id"),
302
- query_function=mock_query_function,
303
- )
304
- mock_client_class.return_value = mock_client
305
277
  embedder.warm_up()
278
+ embedder.backend = MockBackend("aa", None)
306
279
 
307
280
  string_input = "text"
308
281
  list_integers_input = [1, 2, 3]
@@ -313,21 +286,11 @@ class TestNvidiaDocumentEmbedder:
313
286
  with pytest.raises(TypeError, match="NvidiaDocumentEmbedder expects a list of Documents as input"):
314
287
  embedder.run(documents=list_integers_input)
315
288
 
316
- @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient")
317
- def test_run_on_empty_list(self, mock_client_class):
289
+ def test_run_on_empty_list(self):
318
290
  embedder = NvidiaDocumentEmbedder("playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key"))
319
291
 
320
- def mock_query_function(_, payload):
321
- inputs = payload["input"]
322
- data = [{"index": i, "embedding": [0.1, 0.2, 0.3]} for i in range(len(inputs))]
323
- return {"data": data, "usage": {"total_tokens": 4, "prompt_tokens": 4}}
324
-
325
- mock_client = Mock(
326
- get_model_nvcf_id=Mock(return_value="some_id"),
327
- query_function=mock_query_function,
328
- )
329
- mock_client_class.return_value = mock_client
330
292
  embedder.warm_up()
293
+ embedder.backend = MockBackend("aa", None)
331
294
 
332
295
  empty_list_input = []
333
296
  result = embedder.run(documents=empty_list_input)
@@ -339,25 +302,6 @@ class TestNvidiaDocumentEmbedder:
339
302
  not os.environ.get("NVIDIA_API_KEY", None),
340
303
  reason="Export an env var called NVIDIA_API_KEY containing the Nvidia API key to run this test.",
341
304
  )
342
- @pytest.mark.integration
343
- def test_run_integration(self):
344
- embedder = NvidiaDocumentEmbedder("playground_nvolveqa_40k")
345
- embedder.warm_up()
346
-
347
- docs = [
348
- Document(content="I love cheese", meta={"topic": "Cuisine"}),
349
- Document(content="A transformer is a deep learning architecture", meta={"topic": "ML"}),
350
- ]
351
-
352
- result = embedder.run(docs)
353
- docs_with_embeddings = result["documents"]
354
-
355
- assert isinstance(docs_with_embeddings, list)
356
- assert len(docs_with_embeddings) == len(docs)
357
- for doc in docs_with_embeddings:
358
- assert isinstance(doc.embedding, list)
359
- assert isinstance(doc.embedding[0], float)
360
-
361
305
  @pytest.mark.skipif(
362
306
  not os.environ.get("NVIDIA_NIM_EMBEDDER_MODEL", None) or not os.environ.get("NVIDIA_NIM_ENDPOINT_URL", None),
363
307
  reason="Export an env var called NVIDIA_NIM_EMBEDDER_MODEL containing the hosted model name and "
@@ -388,15 +332,15 @@ class TestNvidiaDocumentEmbedder:
388
332
  assert isinstance(doc.embedding[0], float)
389
333
 
390
334
  @pytest.mark.skipif(
391
- not os.environ.get("NVIDIA_CATALOG_API_KEY", None),
392
- reason="Export an env var called NVIDIA_CATALOG_API_KEY containing the Nvidia API key to run this test.",
335
+ not os.environ.get("NVIDIA_API_KEY", None),
336
+ reason="Export an env var called NVIDIA_API_KEY containing the NVIDIA API key to run this test.",
393
337
  )
394
338
  @pytest.mark.integration
395
339
  def test_run_integration_with_api_catalog(self):
396
340
  embedder = NvidiaDocumentEmbedder(
397
341
  model="NV-Embed-QA",
398
342
  api_url="https://ai.api.nvidia.com/v1/retrieval/nvidia",
399
- api_key=Secret.from_env_var("NVIDIA_CATALOG_API_KEY"),
343
+ api_key=Secret.from_env_var("NVIDIA_API_KEY"),
400
344
  )
401
345
  embedder.warm_up()
402
346
 
@@ -2,7 +2,6 @@
2
2
  #
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  import os
5
- from unittest.mock import Mock, patch
6
5
 
7
6
  import pytest
8
7
  from haystack.utils import Secret
@@ -55,7 +54,7 @@ class TestNvidiaGenerator:
55
54
  assert data == {
56
55
  "type": "haystack_integrations.components.generators.nvidia.generator.NvidiaGenerator",
57
56
  "init_parameters": {
58
- "api_url": None,
57
+ "api_url": "https://integrate.api.nvidia.com/v1",
59
58
  "api_key": {"env_vars": ["NVIDIA_API_KEY"], "strict": True, "type": "env_var"},
60
59
  "model": "playground_nemotron_steerlm_8b",
61
60
  "model_arguments": {},
@@ -94,92 +93,6 @@ class TestNvidiaGenerator:
94
93
  },
95
94
  }
96
95
 
97
- @patch("haystack_integrations.components.generators.nvidia._nvcf_backend.NvidiaCloudFunctionsClient")
98
- def test_run(self, mock_client_class):
99
- generator = NvidiaGenerator(
100
- model="playground_nemotron_steerlm_8b",
101
- api_key=Secret.from_token("fake-api-key"),
102
- model_arguments={
103
- "temperature": 0.2,
104
- "top_p": 0.7,
105
- "max_tokens": 1024,
106
- "seed": None,
107
- "bad": None,
108
- "stop": None,
109
- },
110
- )
111
- mock_client = Mock(
112
- get_model_nvcf_id=Mock(return_value="some_id"),
113
- query_function=Mock(
114
- return_value={
115
- "id": "some_id",
116
- "choices": [
117
- {
118
- "index": 0,
119
- "message": {"content": "42", "role": "assistant"},
120
- "finish_reason": "stop",
121
- }
122
- ],
123
- "usage": {"total_tokens": 21, "prompt_tokens": 19, "completion_tokens": 2},
124
- }
125
- ),
126
- )
127
- mock_client_class.return_value = mock_client
128
- generator.warm_up()
129
-
130
- result = generator.run(prompt="What is the answer?")
131
- mock_client.query_function.assert_called_once_with(
132
- "some_id",
133
- {
134
- "messages": [
135
- {"content": "What is the answer?", "role": "user"},
136
- ],
137
- "temperature": 0.2,
138
- "top_p": 0.7,
139
- "max_tokens": 1024,
140
- "seed": None,
141
- "bad": None,
142
- "stop": None,
143
- },
144
- )
145
- assert result == {
146
- "replies": ["42"],
147
- "meta": [
148
- {
149
- "finish_reason": "stop",
150
- "role": "assistant",
151
- "usage": {
152
- "total_tokens": 21,
153
- "prompt_tokens": 19,
154
- "completion_tokens": 2,
155
- },
156
- },
157
- ],
158
- }
159
-
160
- @pytest.mark.skipif(
161
- not os.environ.get("NVIDIA_API_KEY", None),
162
- reason="Export an env var called NVIDIA_API_KEY containing the Nvidia API key to run this test.",
163
- )
164
- @pytest.mark.integration
165
- def test_run_integration_with_nvcf_backend(self):
166
- generator = NvidiaGenerator(
167
- model="playground_nv_llama2_rlhf_70b",
168
- model_arguments={
169
- "temperature": 0.2,
170
- "top_p": 0.7,
171
- "max_tokens": 1024,
172
- "seed": None,
173
- "bad": None,
174
- "stop": None,
175
- },
176
- )
177
- generator.warm_up()
178
- result = generator.run(prompt="What is the answer?")
179
-
180
- assert result["replies"]
181
- assert result["meta"]
182
-
183
96
  @pytest.mark.skipif(
184
97
  not os.environ.get("NVIDIA_NIM_GENERATOR_MODEL", None) or not os.environ.get("NVIDIA_NIM_ENDPOINT_URL", None),
185
98
  reason="Export an env var called NVIDIA_NIM_GENERATOR_MODEL containing the hosted model name and "
@@ -204,15 +117,15 @@ class TestNvidiaGenerator:
204
117
  assert result["meta"]
205
118
 
206
119
  @pytest.mark.skipif(
207
- not os.environ.get("NVIDIA_CATALOG_API_KEY", None),
208
- reason="Export an env var called NVIDIA_CATALOG_API_KEY containing the Nvidia API key to run this test.",
120
+ not os.environ.get("NVIDIA_API_KEY", None),
121
+ reason="Export an env var called NVIDIA_API_KEY containing the NVIDIA API key to run this test.",
209
122
  )
210
123
  @pytest.mark.integration
211
124
  def test_run_integration_with_api_catalog(self):
212
125
  generator = NvidiaGenerator(
213
126
  model="meta/llama3-70b-instruct",
214
127
  api_url="https://integrate.api.nvidia.com/v1",
215
- api_key=Secret.from_env_var("NVIDIA_CATALOG_API_KEY"),
128
+ api_key=Secret.from_env_var("NVIDIA_API_KEY"),
216
129
  model_arguments={
217
130
  "temperature": 0.2,
218
131
  },
@@ -222,3 +135,27 @@ class TestNvidiaGenerator:
222
135
 
223
136
  assert result["replies"]
224
137
  assert result["meta"]
138
+
139
+ def test_local_nim_without_key(self) -> None:
140
+ generator = NvidiaGenerator(
141
+ model="BOGUS",
142
+ api_url="http://localhost:8000",
143
+ api_key=None,
144
+ )
145
+ generator.warm_up()
146
+
147
+ def test_hosted_nim_without_key(self):
148
+ generator0 = NvidiaGenerator(
149
+ model="BOGUS",
150
+ api_url="https://integrate.api.nvidia.com/v1",
151
+ api_key=None,
152
+ )
153
+ with pytest.raises(ValueError):
154
+ generator0.warm_up()
155
+
156
+ generator1 = NvidiaGenerator(
157
+ model="BOGUS",
158
+ api_key=None,
159
+ )
160
+ with pytest.raises(ValueError):
161
+ generator1.warm_up()
@@ -1,18 +1,29 @@
1
1
  import os
2
- from unittest.mock import Mock, patch
3
2
 
4
3
  import pytest
5
4
  from haystack.utils import Secret
6
5
  from haystack_integrations.components.embedders.nvidia import EmbeddingTruncateMode, NvidiaTextEmbedder
6
+ from haystack_integrations.components.embedders.nvidia.backend import EmbedderBackend
7
+
8
+
9
+ class MockBackend(EmbedderBackend):
10
+ def __init__(self, model, model_kwargs):
11
+ super().__init__(model, model_kwargs)
12
+
13
+ def embed(self, texts):
14
+ inputs = texts
15
+ data = [[0.1, 0.2, 0.3] for i in range(len(inputs))]
16
+ return data, {"usage": {"total_tokens": 4, "prompt_tokens": 4}}
7
17
 
8
18
 
9
19
  class TestNvidiaTextEmbedder:
10
20
  def test_init_default(self, monkeypatch):
11
21
  monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
12
- embedder = NvidiaTextEmbedder("nvolveqa_40k")
22
+ embedder = NvidiaTextEmbedder()
13
23
 
14
24
  assert embedder.api_key == Secret.from_env_var("NVIDIA_API_KEY")
15
- assert embedder.model == "nvolveqa_40k"
25
+ assert embedder.model == "NV-Embed-QA"
26
+ assert embedder.api_url == "https://ai.api.nvidia.com/v1/retrieval/nvidia"
16
27
  assert embedder.prefix == ""
17
28
  assert embedder.suffix == ""
18
29
 
@@ -20,11 +31,13 @@ class TestNvidiaTextEmbedder:
20
31
  embedder = NvidiaTextEmbedder(
21
32
  api_key=Secret.from_token("fake-api-key"),
22
33
  model="nvolveqa_40k",
34
+ api_url="https://ai.api.nvidia.com/v1/retrieval/nvidia/test",
23
35
  prefix="prefix",
24
36
  suffix="suffix",
25
37
  )
26
38
  assert embedder.api_key == Secret.from_token("fake-api-key")
27
39
  assert embedder.model == "nvolveqa_40k"
40
+ assert embedder.api_url == "https://ai.api.nvidia.com/v1/retrieval/nvidia/test"
28
41
  assert embedder.prefix == "prefix"
29
42
  assert embedder.suffix == "suffix"
30
43
 
@@ -42,7 +55,7 @@ class TestNvidiaTextEmbedder:
42
55
  "type": "haystack_integrations.components.embedders.nvidia.text_embedder.NvidiaTextEmbedder",
43
56
  "init_parameters": {
44
57
  "api_key": {"env_vars": ["NVIDIA_API_KEY"], "strict": True, "type": "env_var"},
45
- "api_url": None,
58
+ "api_url": "https://ai.api.nvidia.com/v1/retrieval/nvidia",
46
59
  "model": "nvolveqa_40k",
47
60
  "prefix": "",
48
61
  "suffix": "",
@@ -54,6 +67,7 @@ class TestNvidiaTextEmbedder:
54
67
  monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
55
68
  component = NvidiaTextEmbedder(
56
69
  model="nvolveqa_40k",
70
+ api_url="https://example.com",
57
71
  prefix="prefix",
58
72
  suffix="suffix",
59
73
  truncate=EmbeddingTruncateMode.START,
@@ -63,7 +77,7 @@ class TestNvidiaTextEmbedder:
63
77
  "type": "haystack_integrations.components.embedders.nvidia.text_embedder.NvidiaTextEmbedder",
64
78
  "init_parameters": {
65
79
  "api_key": {"env_vars": ["NVIDIA_API_KEY"], "strict": True, "type": "env_var"},
66
- "api_url": None,
80
+ "api_url": "https://example.com",
67
81
  "model": "nvolveqa_40k",
68
82
  "prefix": "prefix",
69
83
  "suffix": "suffix",
@@ -77,7 +91,7 @@ class TestNvidiaTextEmbedder:
77
91
  "type": "haystack_integrations.components.embedders.nvidia.text_embedder.NvidiaTextEmbedder",
78
92
  "init_parameters": {
79
93
  "api_key": {"env_vars": ["NVIDIA_API_KEY"], "strict": True, "type": "env_var"},
80
- "api_url": None,
94
+ "api_url": "https://example.com",
81
95
  "model": "nvolveqa_40k",
82
96
  "prefix": "prefix",
83
97
  "suffix": "suffix",
@@ -86,27 +100,19 @@ class TestNvidiaTextEmbedder:
86
100
  }
87
101
  component = NvidiaTextEmbedder.from_dict(data)
88
102
  assert component.model == "nvolveqa_40k"
89
- assert component.api_url is None
103
+ assert component.api_url == "https://example.com"
90
104
  assert component.prefix == "prefix"
91
105
  assert component.suffix == "suffix"
92
106
  assert component.truncate == "START"
93
107
 
94
- @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient")
95
- def test_run(self, mock_client_class):
108
+ def test_run(self):
96
109
  embedder = NvidiaTextEmbedder(
97
110
  "playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key"), prefix="prefix ", suffix=" suffix"
98
111
  )
99
- mock_client = Mock(
100
- get_model_nvcf_id=Mock(return_value="some_id"),
101
- query_function=Mock(
102
- return_value={
103
- "data": [{"index": 0, "embedding": [0.1, 0.2, 0.3]}],
104
- "usage": {"total_tokens": 4, "prompt_tokens": 4},
105
- }
106
- ),
107
- )
108
- mock_client_class.return_value = mock_client
112
+
109
113
  embedder.warm_up()
114
+ embedder.backend = MockBackend("aa", None)
115
+
110
116
  result = embedder.run(text="The food was delicious")
111
117
 
112
118
  assert len(result["embedding"]) == 3
@@ -115,42 +121,16 @@ class TestNvidiaTextEmbedder:
115
121
  "usage": {"prompt_tokens": 4, "total_tokens": 4},
116
122
  }
117
123
 
118
- @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient")
119
- def test_run_wrong_input_format(self, mock_client_class):
124
+ def test_run_wrong_input_format(self):
120
125
  embedder = NvidiaTextEmbedder("playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key"))
121
- mock_client = Mock(
122
- get_model_nvcf_id=Mock(return_value="some_id"),
123
- query_function=Mock(
124
- return_value={
125
- "data": [{"index": 0, "embedding": [0.1, 0.2, 0.3]}],
126
- "usage": {"total_tokens": 4, "prompt_tokens": 4},
127
- }
128
- ),
129
- )
130
- mock_client_class.return_value = mock_client
131
126
  embedder.warm_up()
127
+ embedder.backend = MockBackend("aa", None)
132
128
 
133
129
  list_integers_input = [1, 2, 3]
134
130
 
135
131
  with pytest.raises(TypeError, match="NvidiaTextEmbedder expects a string as an input"):
136
132
  embedder.run(text=list_integers_input)
137
133
 
138
- @pytest.mark.skipif(
139
- not os.environ.get("NVIDIA_API_KEY", None),
140
- reason="Export an env var called NVIDIA_API_KEY containing the Nvidia API key to run this test.",
141
- )
142
- @pytest.mark.integration
143
- def test_run_integration_with_nvcf_backend(self):
144
- embedder = NvidiaTextEmbedder("playground_nvolveqa_40k")
145
- embedder.warm_up()
146
-
147
- result = embedder.run("A transformer is a deep learning architecture")
148
- embedding = result["embedding"]
149
- meta = result["meta"]
150
-
151
- assert all(isinstance(x, float) for x in embedding)
152
- assert "usage" in meta
153
-
154
134
  @pytest.mark.skipif(
155
135
  not os.environ.get("NVIDIA_NIM_EMBEDDER_MODEL", None) or not os.environ.get("NVIDIA_NIM_ENDPOINT_URL", None),
156
136
  reason="Export an env var called NVIDIA_NIM_EMBEDDER_MODEL containing the hosted model name and "
@@ -175,15 +155,15 @@ class TestNvidiaTextEmbedder:
175
155
  assert "usage" in meta
176
156
 
177
157
  @pytest.mark.skipif(
178
- not os.environ.get("NVIDIA_CATALOG_API_KEY", None),
179
- reason="Export an env var called NVIDIA_CATALOG_API_KEY containing the Nvidia API key to run this test.",
158
+ not os.environ.get("NVIDIA_API_KEY", None),
159
+ reason="Export an env var called NVIDIA_API_KEY containing the NVIDIA API key to run this test.",
180
160
  )
181
161
  @pytest.mark.integration
182
162
  def test_run_integration_with_api_catalog(self):
183
163
  embedder = NvidiaTextEmbedder(
184
164
  model="NV-Embed-QA",
185
165
  api_url="https://ai.api.nvidia.com/v1/retrieval/nvidia",
186
- api_key=Secret.from_env_var("NVIDIA_CATALOG_API_KEY"),
166
+ api_key=Secret.from_env_var("NVIDIA_API_KEY"),
187
167
  )
188
168
  embedder.warm_up()
189
169
 
@@ -1,111 +0,0 @@
1
- import warnings
2
- from dataclasses import asdict, dataclass
3
- from typing import Any, Dict, List, Literal, Optional, Tuple, Union
4
-
5
- from haystack.utils.auth import Secret
6
- from haystack_integrations.utils.nvidia import NvidiaCloudFunctionsClient
7
-
8
- from .backend import EmbedderBackend
9
-
10
- MAX_INPUT_STRING_LENGTH = 2048
11
- MAX_INPUTS = 50
12
-
13
-
14
- class NvcfBackend(EmbedderBackend):
15
- def __init__(
16
- self,
17
- model: str,
18
- api_key: Secret,
19
- model_kwargs: Optional[Dict[str, Any]] = None,
20
- ):
21
- warnings.warn("Nvidia NGC is deprecated, use Nvidia NIM instead.", DeprecationWarning, stacklevel=2)
22
- if not model.startswith("playground_"):
23
- model = f"playground_{model}"
24
-
25
- super().__init__(model=model, model_kwargs=model_kwargs)
26
-
27
- self.api_key = api_key
28
- self.client = NvidiaCloudFunctionsClient(
29
- api_key=api_key,
30
- headers={
31
- "Content-Type": "application/json",
32
- "Accept": "application/json",
33
- },
34
- )
35
- self.nvcf_id = self.client.get_model_nvcf_id(self.model_name)
36
-
37
- def embed(self, texts: List[str]) -> Tuple[List[List[float]], Dict[str, Any]]:
38
- request = EmbeddingsRequest(input=texts, **self.model_kwargs).to_dict()
39
- json_response = self.client.query_function(self.nvcf_id, request)
40
- response = EmbeddingsResponse.from_dict(json_response)
41
-
42
- # Sort resulting embeddings by index
43
- assert all(isinstance(r.embedding, list) for r in response.data)
44
- sorted_embeddings: List[List[float]] = [r.embedding for r in sorted(response.data, key=lambda e: e.index)] # type: ignore
45
- metadata = {"usage": response.usage.to_dict()}
46
- return sorted_embeddings, metadata
47
-
48
-
49
- @dataclass
50
- class EmbeddingsRequest:
51
- input: Union[str, List[str]]
52
- model: Literal["query", "passage"]
53
- encoding_format: Literal["float", "base64"] = "float"
54
-
55
- def __post_init__(self):
56
- if isinstance(self.input, list):
57
- if len(self.input) > MAX_INPUTS:
58
- msg = f"The number of inputs should not exceed {MAX_INPUTS}"
59
- raise ValueError(msg)
60
- else:
61
- self.input = [self.input]
62
-
63
- if len(self.input) == 0:
64
- msg = "The number of inputs should not be 0"
65
- raise ValueError(msg)
66
-
67
- if any(len(x) > MAX_INPUT_STRING_LENGTH for x in self.input):
68
- msg = f"The length of each input should not exceed {MAX_INPUT_STRING_LENGTH} characters"
69
- raise ValueError(msg)
70
-
71
- if self.encoding_format not in ["float", "base64"]:
72
- msg = "encoding_format should be either 'float' or 'base64'"
73
- raise ValueError(msg)
74
-
75
- if self.model not in ["query", "passage"]:
76
- msg = "model should be either 'query' or 'passage'"
77
- raise ValueError(msg)
78
-
79
- def to_dict(self) -> Dict[str, Any]:
80
- return asdict(self)
81
-
82
-
83
- @dataclass
84
- class Usage:
85
- prompt_tokens: int
86
- total_tokens: int
87
-
88
- def to_dict(self) -> Dict[str, Any]:
89
- return asdict(self)
90
-
91
-
92
- @dataclass
93
- class Embeddings:
94
- index: int
95
- embedding: Union[List[float], str]
96
-
97
-
98
- @dataclass
99
- class EmbeddingsResponse:
100
- data: List[Embeddings]
101
- usage: Usage
102
-
103
- @classmethod
104
- def from_dict(cls, data: Dict[str, Any]) -> "EmbeddingsResponse":
105
- try:
106
- embeddings = [Embeddings(**x) for x in data["data"]]
107
- usage = Usage(**data["usage"])
108
- return cls(data=embeddings, usage=usage)
109
- except (KeyError, TypeError) as e:
110
- msg = f"Failed to parse EmbeddingsResponse from data: {data}"
111
- raise ValueError(msg) from e
@@ -1,119 +0,0 @@
1
- import warnings
2
- from dataclasses import asdict, dataclass
3
- from typing import Any, Dict, List, Optional, Tuple
4
-
5
- from haystack.utils.auth import Secret
6
- from haystack_integrations.utils.nvidia import NvidiaCloudFunctionsClient
7
-
8
- from .backend import GeneratorBackend
9
-
10
-
11
- class NvcfBackend(GeneratorBackend):
12
- def __init__(
13
- self,
14
- model: str,
15
- api_key: Secret,
16
- model_kwargs: Optional[Dict[str, Any]] = None,
17
- ):
18
- warnings.warn("Nvidia NGC is deprecated, use Nvidia NIM instead.", DeprecationWarning, stacklevel=2)
19
- if not model.startswith("playground_"):
20
- model = f"playground_{model}"
21
-
22
- super().__init__(model=model, model_kwargs=model_kwargs)
23
-
24
- self.api_key = api_key
25
- self.client = NvidiaCloudFunctionsClient(
26
- api_key=api_key,
27
- headers={
28
- "Content-Type": "application/json",
29
- "Accept": "application/json",
30
- },
31
- )
32
- self.nvcf_id = self.client.get_model_nvcf_id(self.model_name)
33
-
34
- def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]]]:
35
- messages = [Message(role="user", content=prompt)]
36
- request = GenerationRequest(messages=messages, **self.model_kwargs).to_dict()
37
- json_response = self.client.query_function(self.nvcf_id, request)
38
- response = GenerationResponse.from_dict(json_response)
39
-
40
- replies = []
41
- meta = []
42
- for choice in response.choices:
43
- replies.append(choice.message.content)
44
- meta.append(
45
- {
46
- "role": choice.message.role,
47
- "finish_reason": choice.finish_reason,
48
- "usage": {
49
- "completion_tokens": response.usage.completion_tokens,
50
- "prompt_tokens": response.usage.prompt_tokens,
51
- "total_tokens": response.usage.total_tokens,
52
- },
53
- }
54
- )
55
- return replies, meta
56
-
57
-
58
- @dataclass
59
- class Message:
60
- content: str
61
- role: str
62
-
63
-
64
- @dataclass
65
- class GenerationRequest:
66
- messages: List[Message]
67
- temperature: float = 0.2
68
- top_p: float = 0.7
69
- max_tokens: int = 1024
70
- seed: Optional[int] = None
71
- bad: Optional[List[str]] = None
72
- stop: Optional[List[str]] = None
73
-
74
- def to_dict(self) -> Dict[str, Any]:
75
- return asdict(self)
76
-
77
-
78
- @dataclass
79
- class Choice:
80
- index: int
81
- message: Message
82
- finish_reason: str
83
-
84
-
85
- @dataclass
86
- class Usage:
87
- completion_tokens: int
88
- prompt_tokens: int
89
- total_tokens: int
90
-
91
-
92
- @dataclass
93
- class GenerationResponse:
94
- id: str
95
- choices: List[Choice]
96
- usage: Usage
97
-
98
- @classmethod
99
- def from_dict(cls, data: dict) -> "GenerationResponse":
100
- try:
101
- return cls(
102
- id=data["id"],
103
- choices=[
104
- Choice(
105
- index=choice["index"],
106
- message=Message(content=choice["message"]["content"], role=choice["message"]["role"]),
107
- finish_reason=choice["finish_reason"],
108
- )
109
- for choice in data["choices"]
110
- ],
111
- usage=Usage(
112
- completion_tokens=data["usage"]["completion_tokens"],
113
- prompt_tokens=data["usage"]["prompt_tokens"],
114
- total_tokens=data["usage"]["total_tokens"],
115
- ),
116
- )
117
- except (KeyError, TypeError) as e:
118
- msg = f"Failed to parse {cls.__name__} from data: {data}"
119
- raise ValueError(msg) from e
@@ -1,69 +0,0 @@
1
- # SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
2
- #
3
- # SPDX-License-Identifier: Apache-2.0
4
- from dataclasses import asdict, dataclass
5
- from typing import Any, Dict, List, Optional
6
-
7
-
8
- @dataclass
9
- class Message:
10
- content: str
11
- role: str
12
-
13
-
14
- @dataclass
15
- class GenerationRequest:
16
- messages: List[Message]
17
- temperature: float = 0.2
18
- top_p: float = 0.7
19
- max_tokens: int = 1024
20
- seed: Optional[int] = None
21
- bad: Optional[List[str]] = None
22
- stop: Optional[List[str]] = None
23
-
24
- def to_dict(self) -> Dict[str, Any]:
25
- return asdict(self)
26
-
27
-
28
- @dataclass
29
- class Choice:
30
- index: int
31
- message: Message
32
- finish_reason: str
33
-
34
-
35
- @dataclass
36
- class Usage:
37
- completion_tokens: int
38
- prompt_tokens: int
39
- total_tokens: int
40
-
41
-
42
- @dataclass
43
- class GenerationResponse:
44
- id: str
45
- choices: List[Choice]
46
- usage: Usage
47
-
48
- @classmethod
49
- def from_dict(cls, data: dict) -> "GenerationResponse":
50
- try:
51
- return cls(
52
- id=data["id"],
53
- choices=[
54
- Choice(
55
- index=choice["index"],
56
- message=Message(content=choice["message"]["content"], role=choice["message"]["role"]),
57
- finish_reason=choice["finish_reason"],
58
- )
59
- for choice in data["choices"]
60
- ],
61
- usage=Usage(
62
- completion_tokens=data["usage"]["completion_tokens"],
63
- prompt_tokens=data["usage"]["prompt_tokens"],
64
- total_tokens=data["usage"]["total_tokens"],
65
- ),
66
- )
67
- except (KeyError, TypeError) as e:
68
- msg = f"Failed to parse {cls.__name__} from data: {data}"
69
- raise ValueError(msg) from e
@@ -1,3 +0,0 @@
1
- from .client import NvidiaCloudFunctionsClient
2
-
3
- __all__ = ["NvidiaCloudFunctionsClient"]
@@ -1,82 +0,0 @@
1
- import copy
2
- from dataclasses import dataclass
3
- from typing import Dict, Optional
4
-
5
- import requests
6
- from haystack.utils import Secret
7
-
8
- FUNCTIONS_ENDPOINT = "https://api.nvcf.nvidia.com/v2/nvcf/functions"
9
- INVOKE_ENDPOINT = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions"
10
- STATUS_ENDPOINT = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/status"
11
-
12
- ACCEPTED_STATUS_CODE = 202
13
-
14
-
15
- @dataclass
16
- class AvailableNvidiaCloudFunctions:
17
- name: str
18
- id: str
19
- status: Optional[str] = None
20
-
21
-
22
- class NvidiaCloudFunctionsClient:
23
- def __init__(self, *, api_key: Secret, headers: Dict[str, str], timeout: int = 60):
24
- self.api_key = api_key.resolve_value()
25
- if self.api_key is None:
26
- msg = "Nvidia Cloud Functions API key is not set."
27
- raise ValueError(msg)
28
-
29
- self.fetch_url_format = STATUS_ENDPOINT
30
- self.headers = copy.deepcopy(headers)
31
- self.headers.update(
32
- {
33
- "Authorization": f"Bearer {self.api_key}",
34
- }
35
- )
36
- self.timeout = timeout
37
- self.session = requests.Session()
38
-
39
- def query_function(self, func_id: str, payload: Dict[str, str]) -> Dict[str, str]:
40
- invoke_url = f"{INVOKE_ENDPOINT}/{func_id}"
41
-
42
- response = self.session.post(invoke_url, headers=self.headers, json=payload, timeout=self.timeout)
43
- request_id = response.headers.get("NVCF-REQID")
44
- if request_id is None:
45
- msg = "NVCF-REQID header not found in response"
46
- raise ValueError(msg)
47
-
48
- while response.status_code == ACCEPTED_STATUS_CODE:
49
- fetch_url = f"{self.fetch_url_format}/{request_id}"
50
- response = self.session.get(fetch_url, headers=self.headers, timeout=self.timeout)
51
-
52
- response.raise_for_status()
53
- return response.json()
54
-
55
- def available_functions(self) -> Dict[str, AvailableNvidiaCloudFunctions]:
56
- response = self.session.get(FUNCTIONS_ENDPOINT, headers=self.headers, timeout=self.timeout)
57
- response.raise_for_status()
58
-
59
- return {
60
- f["name"]: AvailableNvidiaCloudFunctions(
61
- name=f["name"],
62
- id=f["id"],
63
- status=f.get("status"),
64
- )
65
- for f in response.json()["functions"]
66
- }
67
-
68
- def get_model_nvcf_id(self, model: str) -> str:
69
- """
70
- Returns the Nvidia Cloud Functions UUID for the given model.
71
- """
72
-
73
- available_functions = self.available_functions()
74
- func = available_functions.get(model)
75
- if func is None:
76
- msg = f"Model '{model}' was not found on the Nvidia Cloud Functions backend"
77
- raise ValueError(msg)
78
- elif func.status != "ACTIVE":
79
- msg = f"Model '{model}' is not currently active/usable on the Nvidia Cloud Functions backend"
80
- raise ValueError(msg)
81
-
82
- return func.id