PyPI - nvidia-haystack - Versions diffs - 0.0.3__tar.gz → 0.0.4__tar.gz - Mend

nvidia-haystack 0.0.3tar.gz → 0.0.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

{nvidia_haystack-0.0.3 → nvidia_haystack-0.0.4}/.gitignore RENAMED Viewed

@@ -135,3 +135,12 @@ dmypy.json
 # Docs generation artifacts
 _readme_*.md
 .idea
+# macOS
+.DS_Store
+# http cache (requests-cache)
+**/http_cache.sqlite
+# ruff
+.ruff_cache

{nvidia_haystack-0.0.3 → nvidia_haystack-0.0.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: nvidia-haystack
-Version: 0.0.3
+Version: 0.0.4
 Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/nvidia#readme
 Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
 Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/nvidia

{nvidia_haystack-0.0.3 → nvidia_haystack-0.0.4}/pyproject.toml RENAMED Viewed

@@ -42,10 +42,10 @@ root = "../.."
 git_describe_command = 'git describe --tags --match="integrations/nvidia-v[0-9]*"'
 [tool.hatch.envs.default]
-dependencies = ["coverage[toml]>=6.5", "pytest", "haystack-pydoc-tools"]
+dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools"]
 [tool.hatch.envs.default.scripts]
-test = "pytest {args:tests}"
-test-cov = "coverage run -m pytest {args:tests}"
+test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
+test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
 cov-report = ["- coverage combine", "coverage report"]
 cov = ["test-cov", "cov-report"]
 docs = ["pydoc-markdown pydoc/config.yml"]
@@ -58,7 +58,7 @@ detached = true
 dependencies = ["black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"]
 [tool.hatch.envs.lint.scripts]
 typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}"
-style = ["ruff {args:.}", "black --check --diff {args:.}"]
+style = ["ruff check {args:.}", "black --check --diff {args:.}"]
 fmt = ["black {args:.}", "ruff --fix {args:.}", "style"]
 all = ["style", "typing"]

{nvidia_haystack-0.0.3 → nvidia_haystack-0.0.4}/src/haystack_integrations/components/embedders/nvidia/document_embedder.py RENAMED Viewed

@@ -5,7 +5,6 @@ from haystack.utils import Secret, deserialize_secrets_inplace
 from tqdm import tqdm
 from ._nim_backend import NimBackend
-from ._nvcf_backend import NvcfBackend
 from .backend import EmbedderBackend
 from .truncate import EmbeddingTruncateMode
@@ -14,8 +13,7 @@ from .truncate import EmbeddingTruncateMode
 class NvidiaDocumentEmbedder:
     """
     A component for embedding documents using embedding models provided by
-    [NVIDIA AI Foundation Endpoints](https://www.nvidia.com/en-us/ai-data-science/foundation-models/)
-    and NVIDIA Inference Microservices.
+    [NVIDIA NIMs](https://ai.nvidia.com).
     Usage example:
     ```python
@@ -23,7 +21,7 @@ class NvidiaDocumentEmbedder:
     doc = Document(content="I love pizza!")
-    text_embedder = NvidiaDocumentEmbedder(model="nvolveqa_40k")
+    text_embedder = NvidiaDocumentEmbedder(model="NV-Embed-QA", api_url="https://ai.api.nvidia.com/v1/retrieval/nvidia")
     text_embedder.warm_up()
     result = document_embedder.run([doc])
@@ -33,9 +31,9 @@ class NvidiaDocumentEmbedder:
     def __init__(
         self,
-        model: str,
+        model: str = "NV-Embed-QA",
         api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"),
-        api_url: Optional[str] = None,
+        api_url: str = "https://ai.api.nvidia.com/v1/retrieval/nvidia",
         prefix: str = "",
         suffix: str = "",
         batch_size: int = 32,
@@ -50,9 +48,9 @@ class NvidiaDocumentEmbedder:
         :param model:
             Embedding model to use.
         :param api_key:
-            API key for the NVIDIA AI Foundation Endpoints.
+            API key for the NVIDIA NIM.
         :param api_url:
-            Custom API URL for the NVIDIA Inference Microservices.
+            Custom API URL for the NVIDIA NIM.
         :param prefix:
             A string to add to the beginning of each text.
         :param suffix:
@@ -95,22 +93,15 @@ class NvidiaDocumentEmbedder:
         if self._initialized:
             return
-        if self.api_url is None:
-            if self.api_key is None:
-                msg = "API key is required for NVIDIA AI Foundation Endpoints."
-                raise ValueError(msg)
-            self.backend = NvcfBackend(self.model, api_key=self.api_key, model_kwargs={"model": "passage"})
-        else:
-            model_kwargs = {"input_type": "passage"}
-            if self.truncate is not None:
-                model_kwargs["truncate"] = str(self.truncate)
-            self.backend = NimBackend(
-                self.model,
-                api_url=self.api_url,
-                api_key=self.api_key,
-                model_kwargs=model_kwargs,
-            )
+        model_kwargs = {"input_type": "passage"}
+        if self.truncate is not None:
+            model_kwargs["truncate"] = str(self.truncate)
+        self.backend = NimBackend(
+            self.model,
+            api_url=self.api_url,
+            api_key=self.api_key,
+            model_kwargs=model_kwargs,
+        )
         self._initialized = True

{nvidia_haystack-0.0.3 → nvidia_haystack-0.0.4}/src/haystack_integrations/components/embedders/nvidia/text_embedder.py RENAMED Viewed

@@ -4,7 +4,6 @@ from haystack import component, default_from_dict, default_to_dict
 from haystack.utils import Secret, deserialize_secrets_inplace
 from ._nim_backend import NimBackend
-from ._nvcf_backend import NvcfBackend
 from .backend import EmbedderBackend
 from .truncate import EmbeddingTruncateMode
@@ -13,8 +12,7 @@ from .truncate import EmbeddingTruncateMode
 class NvidiaTextEmbedder:
     """
     A component for embedding strings using embedding models provided by
-    [NVIDIA AI Foundation Endpoints](https://www.nvidia.com/en-us/ai-data-science/foundation-models/)
-    and NVIDIA Inference Microservices.
+    [NVIDIA NIMs](https://ai.nvidia.com).
     For models that differentiate between query and document inputs,
     this component embeds the input string as a query.
@@ -25,7 +23,7 @@ class NvidiaTextEmbedder:
     text_to_embed = "I love pizza!"
-    text_embedder = NvidiaTextEmbedder(model="nvolveqa_40k")
+    text_embedder = NvidiaTextEmbedder(model="NV-Embed-QA", api_url="https://ai.api.nvidia.com/v1/retrieval/nvidia")
     text_embedder.warm_up()
     print(text_embedder.run(text_to_embed))
@@ -34,9 +32,9 @@ class NvidiaTextEmbedder:
     def __init__(
         self,
-        model: str,
+        model: str = "NV-Embed-QA",
         api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"),
-        api_url: Optional[str] = None,
+        api_url: str = "https://ai.api.nvidia.com/v1/retrieval/nvidia",
         prefix: str = "",
         suffix: str = "",
         truncate: Optional[Union[EmbeddingTruncateMode, str]] = None,
@@ -47,9 +45,9 @@ class NvidiaTextEmbedder:
         :param model:
             Embedding model to use.
         :param api_key:
-            API key for the NVIDIA AI Foundation Endpoints.
+            API key for the NVIDIA NIM.
         :param api_url:
-            Custom API URL for the NVIDIA Inference Microservices.
+            Custom API URL for the NVIDIA NIM.
         :param prefix:
             A string to add to the beginning of each text.
         :param suffix:
@@ -79,22 +77,15 @@ class NvidiaTextEmbedder:
         if self._initialized:
             return
-        if self.api_url is None:
-            if self.api_key is None:
-                msg = "API key is required for NVIDIA AI Foundation Endpoints."
-                raise ValueError(msg)
-            self.backend = NvcfBackend(self.model, api_key=self.api_key, model_kwargs={"model": "query"})
-        else:
-            model_kwargs = {"input_type": "query"}
-            if self.truncate is not None:
-                model_kwargs["truncate"] = str(self.truncate)
-            self.backend = NimBackend(
-                self.model,
-                api_url=self.api_url,
-                api_key=self.api_key,
-                model_kwargs=model_kwargs,
-            )
+        model_kwargs = {"input_type": "query"}
+        if self.truncate is not None:
+            model_kwargs["truncate"] = str(self.truncate)
+        self.backend = NimBackend(
+            self.model,
+            api_url=self.api_url,
+            api_key=self.api_key,
+            model_kwargs=model_kwargs,
+        )
         self._initialized = True

{nvidia_haystack-0.0.3 → nvidia_haystack-0.0.4}/src/haystack_integrations/components/generators/nvidia/generator.py RENAMED Viewed

@@ -7,23 +7,23 @@ from haystack import component, default_from_dict, default_to_dict
 from haystack.utils.auth import Secret, deserialize_secrets_inplace
 from ._nim_backend import NimBackend
-from ._nvcf_backend import NvcfBackend
 from .backend import GeneratorBackend
+_DEFAULT_API_URL = "https://integrate.api.nvidia.com/v1"
 @component
 class NvidiaGenerator:
     """
     A component for generating text using generative models provided by
-    [NVIDIA AI Foundation Endpoints](https://www.nvidia.com/en-us/ai-data-science/foundation-models/)
-    and NVIDIA Inference Microservices.
+    [NVIDIA NIMs](https://ai.nvidia.com).
     Usage example:
     ```python
     from haystack_integrations.components.generators.nvidia import NvidiaGenerator
     generator = NvidiaGenerator(
-        model="nv_llama2_rlhf_70b",
+        model="meta/llama3-70b-instruct",
         model_arguments={
             "temperature": 0.2,
             "top_p": 0.7,
@@ -42,7 +42,7 @@ class NvidiaGenerator:
     def __init__(
         self,
         model: str,
-        api_url: Optional[str] = None,
+        api_url: str = _DEFAULT_API_URL,
         api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"),
         model_arguments: Optional[Dict[str, Any]] = None,
     ):
@@ -51,15 +51,15 @@ class NvidiaGenerator:
         :param model:
             Name of the model to use for text generation.
-            See the [Nvidia catalog](https://catalog.ngc.nvidia.com/ai-foundation-models)
+            See the [NVIDIA NIMs](https://ai.nvidia.com)
             for more information on the supported models.
         :param api_key:
-            API key for the NVIDIA AI Foundation Endpoints.
+            API key for the NVIDIA NIM.
         :param api_url:
-            Custom API URL for the NVIDIA Inference Microservices.
+            Custom API URL for the NVIDIA NIM.
         :param model_arguments:
             Additional arguments to pass to the model provider. Different models accept different arguments.
-            Search your model in the [Nvidia catalog](https://catalog.ngc.nvidia.com/ai-foundation-models)
+            Search your model in the [NVIDIA NIMs](https://ai.nvidia.com)
             to know the supported arguments.
         """
         self._model = model
@@ -76,18 +76,15 @@ class NvidiaGenerator:
         if self._backend is not None:
             return
-        if self._api_url is None:
-            if self._api_key is None:
-                msg = "API key is required for NVIDIA AI Foundation Endpoints."
-                raise ValueError(msg)
-            self._backend = NvcfBackend(self._model, api_key=self._api_key, model_kwargs=self._model_arguments)
-        else:
-            self._backend = NimBackend(
-                self._model,
-                api_url=self._api_url,
-                api_key=self._api_key,
-                model_kwargs=self._model_arguments,
-            )
+        if self._api_url == _DEFAULT_API_URL and self._api_key is None:
+            msg = "API key is required for hosted NVIDIA NIMs."
+            raise ValueError(msg)
+        self._backend = NimBackend(
+            self._model,
+            api_url=self._api_url,
+            api_key=self._api_key,
+            model_kwargs=self._model_arguments,
+        )
     def to_dict(self) -> Dict[str, Any]:
         """

{nvidia_haystack-0.0.3 → nvidia_haystack-0.0.4}/tests/test_document_embedder.py RENAMED Viewed

@@ -1,19 +1,30 @@
 import os
-from unittest.mock import Mock, patch
 import pytest
 from haystack import Document
 from haystack.utils import Secret
 from haystack_integrations.components.embedders.nvidia import EmbeddingTruncateMode, NvidiaDocumentEmbedder
+from haystack_integrations.components.embedders.nvidia.backend import EmbedderBackend
+class MockBackend(EmbedderBackend):
+    def __init__(self, model, model_kwargs):
+        super().__init__(model, model_kwargs)
+    def embed(self, texts):
+        inputs = texts
+        data = [[0.1, 0.2, 0.3] for i in range(len(inputs))]
+        return data, {"usage": {"total_tokens": 4, "prompt_tokens": 4}}
 class TestNvidiaDocumentEmbedder:
     def test_init_default(self, monkeypatch):
         monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
-        embedder = NvidiaDocumentEmbedder("nvolveqa_40k")
+        embedder = NvidiaDocumentEmbedder()
         assert embedder.api_key == Secret.from_env_var("NVIDIA_API_KEY")
-        assert embedder.model == "nvolveqa_40k"
+        assert embedder.model == "NV-Embed-QA"
+        assert embedder.api_url == "https://ai.api.nvidia.com/v1/retrieval/nvidia"
         assert embedder.prefix == ""
         assert embedder.suffix == ""
         assert embedder.batch_size == 32
@@ -25,6 +36,7 @@ class TestNvidiaDocumentEmbedder:
         embedder = NvidiaDocumentEmbedder(
             api_key=Secret.from_token("fake-api-key"),
             model="nvolveqa_40k",
+            api_url="https://ai.api.nvidia.com/v1/retrieval/nvidia/test",
             prefix="prefix",
             suffix="suffix",
             batch_size=30,
@@ -35,6 +47,7 @@ class TestNvidiaDocumentEmbedder:
         assert embedder.api_key == Secret.from_token("fake-api-key")
         assert embedder.model == "nvolveqa_40k"
+        assert embedder.api_url == "https://ai.api.nvidia.com/v1/retrieval/nvidia/test"
         assert embedder.prefix == "prefix"
         assert embedder.suffix == "suffix"
         assert embedder.batch_size == 30
@@ -56,7 +69,7 @@ class TestNvidiaDocumentEmbedder:
             "type": "haystack_integrations.components.embedders.nvidia.document_embedder.NvidiaDocumentEmbedder",
             "init_parameters": {
                 "api_key": {"env_vars": ["NVIDIA_API_KEY"], "strict": True, "type": "env_var"},
-                "api_url": None,
+                "api_url": "https://ai.api.nvidia.com/v1/retrieval/nvidia",
                 "model": "playground_nvolveqa_40k",
                 "prefix": "",
                 "suffix": "",
@@ -117,7 +130,7 @@ class TestNvidiaDocumentEmbedder:
         }
         component = NvidiaDocumentEmbedder.from_dict(data)
         assert component.model == "nvolveqa_40k"
-        assert component.api_url is None
+        assert component.api_url == "https://example.com"
         assert component.prefix == "prefix"
         assert component.suffix == "suffix"
         assert component.batch_size == 32
@@ -169,8 +182,7 @@ class TestNvidiaDocumentEmbedder:
             "my_prefix document number 4 my_suffix",
         ]
-    @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient")
-    def test_embed_batch(self, mock_client_class):
+    def test_embed_batch(self):
         texts = ["text 1", "text 2", "text 3", "text 4", "text 5"]
         embedder = NvidiaDocumentEmbedder(
@@ -178,17 +190,8 @@ class TestNvidiaDocumentEmbedder:
             api_key=Secret.from_token("fake-api-key"),
         )
-        def mock_query_function(_, payload):
-            inputs = payload["input"]
-            data = [{"index": i, "embedding": [0.1, 0.2, 0.3]} for i in range(len(inputs))]
-            return {"data": data, "usage": {"total_tokens": 4, "prompt_tokens": 4}}
-        mock_client = Mock(
-            get_model_nvcf_id=Mock(return_value="some_id"),
-            query_function=mock_query_function,
-        )
-        mock_client_class.return_value = mock_client
         embedder.warm_up()
+        embedder.backend = MockBackend("aa", None)
         embeddings, metadata = embedder._embed_batch(texts_to_embed=texts, batch_size=2)
@@ -201,8 +204,7 @@ class TestNvidiaDocumentEmbedder:
         assert metadata == {"usage": {"prompt_tokens": 3 * 4, "total_tokens": 3 * 4}}
-    @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient")
-    def test_run(self, mock_client_class):
+    def test_run(self):
         docs = [
             Document(content="I love cheese", meta={"topic": "Cuisine"}),
             Document(content="A transformer is a deep learning architecture", meta={"topic": "ML"}),
@@ -218,17 +220,8 @@ class TestNvidiaDocumentEmbedder:
             embedding_separator=" | ",
         )
-        def mock_query_function(_, payload):
-            inputs = payload["input"]
-            data = [{"index": i, "embedding": [0.1, 0.2, 0.3]} for i in range(len(inputs))]
-            return {"data": data, "usage": {"total_tokens": 4, "prompt_tokens": 4}}
-        mock_client = Mock(
-            get_model_nvcf_id=Mock(return_value="some_id"),
-            query_function=mock_query_function,
-        )
-        mock_client_class.return_value = mock_client
         embedder.warm_up()
+        embedder.backend = MockBackend("aa", None)
         result = embedder.run(documents=docs)
@@ -244,8 +237,7 @@ class TestNvidiaDocumentEmbedder:
             assert all(isinstance(x, float) for x in doc.embedding)
         assert metadata == {"usage": {"prompt_tokens": 4, "total_tokens": 4}}
-    @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient")
-    def test_run_custom_batch_size(self, mock_client_class):
+    def test_run_custom_batch_size(self):
         docs = [
             Document(content="I love cheese", meta={"topic": "Cuisine"}),
             Document(content="A transformer is a deep learning architecture", meta={"topic": "ML"}),
@@ -261,17 +253,8 @@ class TestNvidiaDocumentEmbedder:
             batch_size=1,
         )
-        def mock_query_function(_, payload):
-            inputs = payload["input"]
-            data = [{"index": i, "embedding": [0.1, 0.2, 0.3]} for i in range(len(inputs))]
-            return {"data": data, "usage": {"total_tokens": 4, "prompt_tokens": 4}}
-        mock_client = Mock(
-            get_model_nvcf_id=Mock(return_value="some_id"),
-            query_function=mock_query_function,
-        )
-        mock_client_class.return_value = mock_client
         embedder.warm_up()
+        embedder.backend = MockBackend("aa", None)
         result = embedder.run(documents=docs)
@@ -288,21 +271,11 @@ class TestNvidiaDocumentEmbedder:
         assert metadata == {"usage": {"prompt_tokens": 2 * 4, "total_tokens": 2 * 4}}
-    @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient")
-    def test_run_wrong_input_format(self, mock_client_class):
+    def test_run_wrong_input_format(self):
         embedder = NvidiaDocumentEmbedder("playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key"))
-        def mock_query_function(_, payload):
-            inputs = payload["input"]
-            data = [{"index": i, "embedding": [0.1, 0.2, 0.3]} for i in range(len(inputs))]
-            return {"data": data, "usage": {"total_tokens": 4, "prompt_tokens": 4}}
-        mock_client = Mock(
-            get_model_nvcf_id=Mock(return_value="some_id"),
-            query_function=mock_query_function,
-        )
-        mock_client_class.return_value = mock_client
         embedder.warm_up()
+        embedder.backend = MockBackend("aa", None)
         string_input = "text"
         list_integers_input = [1, 2, 3]
@@ -313,21 +286,11 @@ class TestNvidiaDocumentEmbedder:
         with pytest.raises(TypeError, match="NvidiaDocumentEmbedder expects a list of Documents as input"):
             embedder.run(documents=list_integers_input)
-    @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient")
-    def test_run_on_empty_list(self, mock_client_class):
+    def test_run_on_empty_list(self):
         embedder = NvidiaDocumentEmbedder("playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key"))
-        def mock_query_function(_, payload):
-            inputs = payload["input"]
-            data = [{"index": i, "embedding": [0.1, 0.2, 0.3]} for i in range(len(inputs))]
-            return {"data": data, "usage": {"total_tokens": 4, "prompt_tokens": 4}}
-        mock_client = Mock(
-            get_model_nvcf_id=Mock(return_value="some_id"),
-            query_function=mock_query_function,
-        )
-        mock_client_class.return_value = mock_client
         embedder.warm_up()
+        embedder.backend = MockBackend("aa", None)
         empty_list_input = []
         result = embedder.run(documents=empty_list_input)
@@ -339,25 +302,6 @@ class TestNvidiaDocumentEmbedder:
         not os.environ.get("NVIDIA_API_KEY", None),
         reason="Export an env var called NVIDIA_API_KEY containing the Nvidia API key to run this test.",
     )
-    @pytest.mark.integration
-    def test_run_integration(self):
-        embedder = NvidiaDocumentEmbedder("playground_nvolveqa_40k")
-        embedder.warm_up()
-        docs = [
-            Document(content="I love cheese", meta={"topic": "Cuisine"}),
-            Document(content="A transformer is a deep learning architecture", meta={"topic": "ML"}),
-        ]
-        result = embedder.run(docs)
-        docs_with_embeddings = result["documents"]
-        assert isinstance(docs_with_embeddings, list)
-        assert len(docs_with_embeddings) == len(docs)
-        for doc in docs_with_embeddings:
-            assert isinstance(doc.embedding, list)
-            assert isinstance(doc.embedding[0], float)
     @pytest.mark.skipif(
         not os.environ.get("NVIDIA_NIM_EMBEDDER_MODEL", None) or not os.environ.get("NVIDIA_NIM_ENDPOINT_URL", None),
         reason="Export an env var called NVIDIA_NIM_EMBEDDER_MODEL containing the hosted model name and "
@@ -388,15 +332,15 @@ class TestNvidiaDocumentEmbedder:
             assert isinstance(doc.embedding[0], float)
     @pytest.mark.skipif(
-        not os.environ.get("NVIDIA_CATALOG_API_KEY", None),
-        reason="Export an env var called NVIDIA_CATALOG_API_KEY containing the Nvidia API key to run this test.",
+        not os.environ.get("NVIDIA_API_KEY", None),
+        reason="Export an env var called NVIDIA_API_KEY containing the NVIDIA API key to run this test.",
     )
     @pytest.mark.integration
     def test_run_integration_with_api_catalog(self):
         embedder = NvidiaDocumentEmbedder(
             model="NV-Embed-QA",
             api_url="https://ai.api.nvidia.com/v1/retrieval/nvidia",
-            api_key=Secret.from_env_var("NVIDIA_CATALOG_API_KEY"),
+            api_key=Secret.from_env_var("NVIDIA_API_KEY"),
         )
         embedder.warm_up()

{nvidia_haystack-0.0.3 → nvidia_haystack-0.0.4}/tests/test_generator.py RENAMED Viewed

@@ -2,7 +2,6 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 import os
-from unittest.mock import Mock, patch
 import pytest
 from haystack.utils import Secret
@@ -55,7 +54,7 @@ class TestNvidiaGenerator:
         assert data == {
             "type": "haystack_integrations.components.generators.nvidia.generator.NvidiaGenerator",
             "init_parameters": {
-                "api_url": None,
+                "api_url": "https://integrate.api.nvidia.com/v1",
                 "api_key": {"env_vars": ["NVIDIA_API_KEY"], "strict": True, "type": "env_var"},
                 "model": "playground_nemotron_steerlm_8b",
                 "model_arguments": {},
@@ -94,92 +93,6 @@ class TestNvidiaGenerator:
             },
         }
-    @patch("haystack_integrations.components.generators.nvidia._nvcf_backend.NvidiaCloudFunctionsClient")
-    def test_run(self, mock_client_class):
-        generator = NvidiaGenerator(
-            model="playground_nemotron_steerlm_8b",
-            api_key=Secret.from_token("fake-api-key"),
-            model_arguments={
-                "temperature": 0.2,
-                "top_p": 0.7,
-                "max_tokens": 1024,
-                "seed": None,
-                "bad": None,
-                "stop": None,
-            },
-        )
-        mock_client = Mock(
-            get_model_nvcf_id=Mock(return_value="some_id"),
-            query_function=Mock(
-                return_value={
-                    "id": "some_id",
-                    "choices": [
-                        {
-                            "index": 0,
-                            "message": {"content": "42", "role": "assistant"},
-                            "finish_reason": "stop",
-                        }
-                    ],
-                    "usage": {"total_tokens": 21, "prompt_tokens": 19, "completion_tokens": 2},
-                }
-            ),
-        )
-        mock_client_class.return_value = mock_client
-        generator.warm_up()
-        result = generator.run(prompt="What is the answer?")
-        mock_client.query_function.assert_called_once_with(
-            "some_id",
-            {
-                "messages": [
-                    {"content": "What is the answer?", "role": "user"},
-                ],
-                "temperature": 0.2,
-                "top_p": 0.7,
-                "max_tokens": 1024,
-                "seed": None,
-                "bad": None,
-                "stop": None,
-            },
-        )
-        assert result == {
-            "replies": ["42"],
-            "meta": [
-                {
-                    "finish_reason": "stop",
-                    "role": "assistant",
-                    "usage": {
-                        "total_tokens": 21,
-                        "prompt_tokens": 19,
-                        "completion_tokens": 2,
-                    },
-                },
-            ],
-        }
-    @pytest.mark.skipif(
-        not os.environ.get("NVIDIA_API_KEY", None),
-        reason="Export an env var called NVIDIA_API_KEY containing the Nvidia API key to run this test.",
-    )
-    @pytest.mark.integration
-    def test_run_integration_with_nvcf_backend(self):
-        generator = NvidiaGenerator(
-            model="playground_nv_llama2_rlhf_70b",
-            model_arguments={
-                "temperature": 0.2,
-                "top_p": 0.7,
-                "max_tokens": 1024,
-                "seed": None,
-                "bad": None,
-                "stop": None,
-            },
-        )
-        generator.warm_up()
-        result = generator.run(prompt="What is the answer?")
-        assert result["replies"]
-        assert result["meta"]
     @pytest.mark.skipif(
         not os.environ.get("NVIDIA_NIM_GENERATOR_MODEL", None) or not os.environ.get("NVIDIA_NIM_ENDPOINT_URL", None),
         reason="Export an env var called NVIDIA_NIM_GENERATOR_MODEL containing the hosted model name and "
@@ -204,15 +117,15 @@ class TestNvidiaGenerator:
         assert result["meta"]
     @pytest.mark.skipif(
-        not os.environ.get("NVIDIA_CATALOG_API_KEY", None),
-        reason="Export an env var called NVIDIA_CATALOG_API_KEY containing the Nvidia API key to run this test.",
+        not os.environ.get("NVIDIA_API_KEY", None),
+        reason="Export an env var called NVIDIA_API_KEY containing the NVIDIA API key to run this test.",
     )
     @pytest.mark.integration
     def test_run_integration_with_api_catalog(self):
         generator = NvidiaGenerator(
             model="meta/llama3-70b-instruct",
             api_url="https://integrate.api.nvidia.com/v1",
-            api_key=Secret.from_env_var("NVIDIA_CATALOG_API_KEY"),
+            api_key=Secret.from_env_var("NVIDIA_API_KEY"),
             model_arguments={
                 "temperature": 0.2,
             },
@@ -222,3 +135,27 @@ class TestNvidiaGenerator:
         assert result["replies"]
         assert result["meta"]
+    def test_local_nim_without_key(self) -> None:
+        generator = NvidiaGenerator(
+            model="BOGUS",
+            api_url="http://localhost:8000",
+            api_key=None,
+        )
+        generator.warm_up()
+    def test_hosted_nim_without_key(self):
+        generator0 = NvidiaGenerator(
+            model="BOGUS",
+            api_url="https://integrate.api.nvidia.com/v1",
+            api_key=None,
+        )
+        with pytest.raises(ValueError):
+            generator0.warm_up()
+        generator1 = NvidiaGenerator(
+            model="BOGUS",
+            api_key=None,
+        )
+        with pytest.raises(ValueError):
+            generator1.warm_up()

{nvidia_haystack-0.0.3 → nvidia_haystack-0.0.4}/tests/test_text_embedder.py RENAMED Viewed

@@ -1,18 +1,29 @@
 import os
-from unittest.mock import Mock, patch
 import pytest
 from haystack.utils import Secret
 from haystack_integrations.components.embedders.nvidia import EmbeddingTruncateMode, NvidiaTextEmbedder
+from haystack_integrations.components.embedders.nvidia.backend import EmbedderBackend
+class MockBackend(EmbedderBackend):
+    def __init__(self, model, model_kwargs):
+        super().__init__(model, model_kwargs)
+    def embed(self, texts):
+        inputs = texts
+        data = [[0.1, 0.2, 0.3] for i in range(len(inputs))]
+        return data, {"usage": {"total_tokens": 4, "prompt_tokens": 4}}
 class TestNvidiaTextEmbedder:
     def test_init_default(self, monkeypatch):
         monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
-        embedder = NvidiaTextEmbedder("nvolveqa_40k")
+        embedder = NvidiaTextEmbedder()
         assert embedder.api_key == Secret.from_env_var("NVIDIA_API_KEY")
-        assert embedder.model == "nvolveqa_40k"
+        assert embedder.model == "NV-Embed-QA"
+        assert embedder.api_url == "https://ai.api.nvidia.com/v1/retrieval/nvidia"
         assert embedder.prefix == ""
         assert embedder.suffix == ""
@@ -20,11 +31,13 @@ class TestNvidiaTextEmbedder:
         embedder = NvidiaTextEmbedder(
             api_key=Secret.from_token("fake-api-key"),
             model="nvolveqa_40k",
+            api_url="https://ai.api.nvidia.com/v1/retrieval/nvidia/test",
             prefix="prefix",
             suffix="suffix",
         )
         assert embedder.api_key == Secret.from_token("fake-api-key")
         assert embedder.model == "nvolveqa_40k"
+        assert embedder.api_url == "https://ai.api.nvidia.com/v1/retrieval/nvidia/test"
         assert embedder.prefix == "prefix"
         assert embedder.suffix == "suffix"
@@ -42,7 +55,7 @@ class TestNvidiaTextEmbedder:
             "type": "haystack_integrations.components.embedders.nvidia.text_embedder.NvidiaTextEmbedder",
             "init_parameters": {
                 "api_key": {"env_vars": ["NVIDIA_API_KEY"], "strict": True, "type": "env_var"},
-                "api_url": None,
+                "api_url": "https://ai.api.nvidia.com/v1/retrieval/nvidia",
                 "model": "nvolveqa_40k",
                 "prefix": "",
                 "suffix": "",
@@ -54,6 +67,7 @@ class TestNvidiaTextEmbedder:
         monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
         component = NvidiaTextEmbedder(
             model="nvolveqa_40k",
+            api_url="https://example.com",
             prefix="prefix",
             suffix="suffix",
             truncate=EmbeddingTruncateMode.START,
@@ -63,7 +77,7 @@ class TestNvidiaTextEmbedder:
             "type": "haystack_integrations.components.embedders.nvidia.text_embedder.NvidiaTextEmbedder",
             "init_parameters": {
                 "api_key": {"env_vars": ["NVIDIA_API_KEY"], "strict": True, "type": "env_var"},
-                "api_url": None,
+                "api_url": "https://example.com",
                 "model": "nvolveqa_40k",
                 "prefix": "prefix",
                 "suffix": "suffix",
@@ -77,7 +91,7 @@ class TestNvidiaTextEmbedder:
             "type": "haystack_integrations.components.embedders.nvidia.text_embedder.NvidiaTextEmbedder",
             "init_parameters": {
                 "api_key": {"env_vars": ["NVIDIA_API_KEY"], "strict": True, "type": "env_var"},
-                "api_url": None,
+                "api_url": "https://example.com",
                 "model": "nvolveqa_40k",
                 "prefix": "prefix",
                 "suffix": "suffix",
@@ -86,27 +100,19 @@ class TestNvidiaTextEmbedder:
         }
         component = NvidiaTextEmbedder.from_dict(data)
         assert component.model == "nvolveqa_40k"
-        assert component.api_url is None
+        assert component.api_url == "https://example.com"
         assert component.prefix == "prefix"
         assert component.suffix == "suffix"
         assert component.truncate == "START"
-    @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient")
-    def test_run(self, mock_client_class):
+    def test_run(self):
         embedder = NvidiaTextEmbedder(
             "playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key"), prefix="prefix ", suffix=" suffix"
         )
-        mock_client = Mock(
-            get_model_nvcf_id=Mock(return_value="some_id"),
-            query_function=Mock(
-                return_value={
-                    "data": [{"index": 0, "embedding": [0.1, 0.2, 0.3]}],
-                    "usage": {"total_tokens": 4, "prompt_tokens": 4},
-                }
-            ),
-        )
-        mock_client_class.return_value = mock_client
         embedder.warm_up()
+        embedder.backend = MockBackend("aa", None)
         result = embedder.run(text="The food was delicious")
         assert len(result["embedding"]) == 3
@@ -115,42 +121,16 @@ class TestNvidiaTextEmbedder:
             "usage": {"prompt_tokens": 4, "total_tokens": 4},
         }
-    @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient")
-    def test_run_wrong_input_format(self, mock_client_class):
+    def test_run_wrong_input_format(self):
         embedder = NvidiaTextEmbedder("playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key"))
-        mock_client = Mock(
-            get_model_nvcf_id=Mock(return_value="some_id"),
-            query_function=Mock(
-                return_value={
-                    "data": [{"index": 0, "embedding": [0.1, 0.2, 0.3]}],
-                    "usage": {"total_tokens": 4, "prompt_tokens": 4},
-                }
-            ),
-        )
-        mock_client_class.return_value = mock_client
         embedder.warm_up()
+        embedder.backend = MockBackend("aa", None)
         list_integers_input = [1, 2, 3]
         with pytest.raises(TypeError, match="NvidiaTextEmbedder expects a string as an input"):
             embedder.run(text=list_integers_input)
-    @pytest.mark.skipif(
-        not os.environ.get("NVIDIA_API_KEY", None),
-        reason="Export an env var called NVIDIA_API_KEY containing the Nvidia API key to run this test.",
-    )
-    @pytest.mark.integration
-    def test_run_integration_with_nvcf_backend(self):
-        embedder = NvidiaTextEmbedder("playground_nvolveqa_40k")
-        embedder.warm_up()
-        result = embedder.run("A transformer is a deep learning architecture")
-        embedding = result["embedding"]
-        meta = result["meta"]
-        assert all(isinstance(x, float) for x in embedding)
-        assert "usage" in meta
     @pytest.mark.skipif(
         not os.environ.get("NVIDIA_NIM_EMBEDDER_MODEL", None) or not os.environ.get("NVIDIA_NIM_ENDPOINT_URL", None),
         reason="Export an env var called NVIDIA_NIM_EMBEDDER_MODEL containing the hosted model name and "
@@ -175,15 +155,15 @@ class TestNvidiaTextEmbedder:
         assert "usage" in meta
     @pytest.mark.skipif(
-        not os.environ.get("NVIDIA_CATALOG_API_KEY", None),
-        reason="Export an env var called NVIDIA_CATALOG_API_KEY containing the Nvidia API key to run this test.",
+        not os.environ.get("NVIDIA_API_KEY", None),
+        reason="Export an env var called NVIDIA_API_KEY containing the NVIDIA API key to run this test.",
     )
     @pytest.mark.integration
     def test_run_integration_with_api_catalog(self):
         embedder = NvidiaTextEmbedder(
             model="NV-Embed-QA",
             api_url="https://ai.api.nvidia.com/v1/retrieval/nvidia",
-            api_key=Secret.from_env_var("NVIDIA_CATALOG_API_KEY"),
+            api_key=Secret.from_env_var("NVIDIA_API_KEY"),
         )
         embedder.warm_up()

nvidia_haystack-0.0.3/src/haystack_integrations/components/embedders/nvidia/_nvcf_backend.py DELETED Viewed

@@ -1,111 +0,0 @@
-import warnings
-from dataclasses import asdict, dataclass
-from typing import Any, Dict, List, Literal, Optional, Tuple, Union
-from haystack.utils.auth import Secret
-from haystack_integrations.utils.nvidia import NvidiaCloudFunctionsClient
-from .backend import EmbedderBackend
-MAX_INPUT_STRING_LENGTH = 2048
-MAX_INPUTS = 50
-class NvcfBackend(EmbedderBackend):
-    def __init__(
-        self,
-        model: str,
-        api_key: Secret,
-        model_kwargs: Optional[Dict[str, Any]] = None,
-    ):
-        warnings.warn("Nvidia NGC is deprecated, use Nvidia NIM instead.", DeprecationWarning, stacklevel=2)
-        if not model.startswith("playground_"):
-            model = f"playground_{model}"
-        super().__init__(model=model, model_kwargs=model_kwargs)
-        self.api_key = api_key
-        self.client = NvidiaCloudFunctionsClient(
-            api_key=api_key,
-            headers={
-                "Content-Type": "application/json",
-                "Accept": "application/json",
-            },
-        )
-        self.nvcf_id = self.client.get_model_nvcf_id(self.model_name)
-    def embed(self, texts: List[str]) -> Tuple[List[List[float]], Dict[str, Any]]:
-        request = EmbeddingsRequest(input=texts, **self.model_kwargs).to_dict()
-        json_response = self.client.query_function(self.nvcf_id, request)
-        response = EmbeddingsResponse.from_dict(json_response)
-        # Sort resulting embeddings by index
-        assert all(isinstance(r.embedding, list) for r in response.data)
-        sorted_embeddings: List[List[float]] = [r.embedding for r in sorted(response.data, key=lambda e: e.index)]  # type: ignore
-        metadata = {"usage": response.usage.to_dict()}
-        return sorted_embeddings, metadata
-@dataclass
-class EmbeddingsRequest:
-    input: Union[str, List[str]]
-    model: Literal["query", "passage"]
-    encoding_format: Literal["float", "base64"] = "float"
-    def __post_init__(self):
-        if isinstance(self.input, list):
-            if len(self.input) > MAX_INPUTS:
-                msg = f"The number of inputs should not exceed {MAX_INPUTS}"
-                raise ValueError(msg)
-        else:
-            self.input = [self.input]
-        if len(self.input) == 0:
-            msg = "The number of inputs should not be 0"
-            raise ValueError(msg)
-        if any(len(x) > MAX_INPUT_STRING_LENGTH for x in self.input):
-            msg = f"The length of each input should not exceed {MAX_INPUT_STRING_LENGTH} characters"
-            raise ValueError(msg)
-        if self.encoding_format not in ["float", "base64"]:
-            msg = "encoding_format should be either 'float' or 'base64'"
-            raise ValueError(msg)
-        if self.model not in ["query", "passage"]:
-            msg = "model should be either 'query' or 'passage'"
-            raise ValueError(msg)
-    def to_dict(self) -> Dict[str, Any]:
-        return asdict(self)
-@dataclass
-class Usage:
-    prompt_tokens: int
-    total_tokens: int
-    def to_dict(self) -> Dict[str, Any]:
-        return asdict(self)
-@dataclass
-class Embeddings:
-    index: int
-    embedding: Union[List[float], str]
-@dataclass
-class EmbeddingsResponse:
-    data: List[Embeddings]
-    usage: Usage
-    @classmethod
-    def from_dict(cls, data: Dict[str, Any]) -> "EmbeddingsResponse":
-        try:
-            embeddings = [Embeddings(**x) for x in data["data"]]
-            usage = Usage(**data["usage"])
-            return cls(data=embeddings, usage=usage)
-        except (KeyError, TypeError) as e:
-            msg = f"Failed to parse EmbeddingsResponse from data: {data}"
-            raise ValueError(msg) from e

nvidia_haystack-0.0.3/src/haystack_integrations/components/generators/nvidia/_nvcf_backend.py DELETED Viewed

@@ -1,119 +0,0 @@
-import warnings
-from dataclasses import asdict, dataclass
-from typing import Any, Dict, List, Optional, Tuple
-from haystack.utils.auth import Secret
-from haystack_integrations.utils.nvidia import NvidiaCloudFunctionsClient
-from .backend import GeneratorBackend
-class NvcfBackend(GeneratorBackend):
-    def __init__(
-        self,
-        model: str,
-        api_key: Secret,
-        model_kwargs: Optional[Dict[str, Any]] = None,
-    ):
-        warnings.warn("Nvidia NGC is deprecated, use Nvidia NIM instead.", DeprecationWarning, stacklevel=2)
-        if not model.startswith("playground_"):
-            model = f"playground_{model}"
-        super().__init__(model=model, model_kwargs=model_kwargs)
-        self.api_key = api_key
-        self.client = NvidiaCloudFunctionsClient(
-            api_key=api_key,
-            headers={
-                "Content-Type": "application/json",
-                "Accept": "application/json",
-            },
-        )
-        self.nvcf_id = self.client.get_model_nvcf_id(self.model_name)
-    def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]]]:
-        messages = [Message(role="user", content=prompt)]
-        request = GenerationRequest(messages=messages, **self.model_kwargs).to_dict()
-        json_response = self.client.query_function(self.nvcf_id, request)
-        response = GenerationResponse.from_dict(json_response)
-        replies = []
-        meta = []
-        for choice in response.choices:
-            replies.append(choice.message.content)
-            meta.append(
-                {
-                    "role": choice.message.role,
-                    "finish_reason": choice.finish_reason,
-                    "usage": {
-                        "completion_tokens": response.usage.completion_tokens,
-                        "prompt_tokens": response.usage.prompt_tokens,
-                        "total_tokens": response.usage.total_tokens,
-                    },
-                }
-            )
-        return replies, meta
-@dataclass
-class Message:
-    content: str
-    role: str
-@dataclass
-class GenerationRequest:
-    messages: List[Message]
-    temperature: float = 0.2
-    top_p: float = 0.7
-    max_tokens: int = 1024
-    seed: Optional[int] = None
-    bad: Optional[List[str]] = None
-    stop: Optional[List[str]] = None
-    def to_dict(self) -> Dict[str, Any]:
-        return asdict(self)
-@dataclass
-class Choice:
-    index: int
-    message: Message
-    finish_reason: str
-@dataclass
-class Usage:
-    completion_tokens: int
-    prompt_tokens: int
-    total_tokens: int
-@dataclass
-class GenerationResponse:
-    id: str
-    choices: List[Choice]
-    usage: Usage
-    @classmethod
-    def from_dict(cls, data: dict) -> "GenerationResponse":
-        try:
-            return cls(
-                id=data["id"],
-                choices=[
-                    Choice(
-                        index=choice["index"],
-                        message=Message(content=choice["message"]["content"], role=choice["message"]["role"]),
-                        finish_reason=choice["finish_reason"],
-                    )
-                    for choice in data["choices"]
-                ],
-                usage=Usage(
-                    completion_tokens=data["usage"]["completion_tokens"],
-                    prompt_tokens=data["usage"]["prompt_tokens"],
-                    total_tokens=data["usage"]["total_tokens"],
-                ),
-            )
-        except (KeyError, TypeError) as e:
-            msg = f"Failed to parse {cls.__name__} from data: {data}"
-            raise ValueError(msg) from e

nvidia_haystack-0.0.3/src/haystack_integrations/components/generators/nvidia/_schema.py DELETED Viewed

@@ -1,69 +0,0 @@
-# SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
-#
-# SPDX-License-Identifier: Apache-2.0
-from dataclasses import asdict, dataclass
-from typing import Any, Dict, List, Optional
-@dataclass
-class Message:
-    content: str
-    role: str
-@dataclass
-class GenerationRequest:
-    messages: List[Message]
-    temperature: float = 0.2
-    top_p: float = 0.7
-    max_tokens: int = 1024
-    seed: Optional[int] = None
-    bad: Optional[List[str]] = None
-    stop: Optional[List[str]] = None
-    def to_dict(self) -> Dict[str, Any]:
-        return asdict(self)
-@dataclass
-class Choice:
-    index: int
-    message: Message
-    finish_reason: str
-@dataclass
-class Usage:
-    completion_tokens: int
-    prompt_tokens: int
-    total_tokens: int
-@dataclass
-class GenerationResponse:
-    id: str
-    choices: List[Choice]
-    usage: Usage
-    @classmethod
-    def from_dict(cls, data: dict) -> "GenerationResponse":
-        try:
-            return cls(
-                id=data["id"],
-                choices=[
-                    Choice(
-                        index=choice["index"],
-                        message=Message(content=choice["message"]["content"], role=choice["message"]["role"]),
-                        finish_reason=choice["finish_reason"],
-                    )
-                    for choice in data["choices"]
-                ],
-                usage=Usage(
-                    completion_tokens=data["usage"]["completion_tokens"],
-                    prompt_tokens=data["usage"]["prompt_tokens"],
-                    total_tokens=data["usage"]["total_tokens"],
-                ),
-            )
-        except (KeyError, TypeError) as e:
-            msg = f"Failed to parse {cls.__name__} from data: {data}"
-            raise ValueError(msg) from e

nvidia_haystack-0.0.3/src/haystack_integrations/utils/nvidia/__init__.py DELETED Viewed

@@ -1,3 +0,0 @@
-from .client import NvidiaCloudFunctionsClient
-__all__ = ["NvidiaCloudFunctionsClient"]

nvidia_haystack-0.0.3/src/haystack_integrations/utils/nvidia/client.py DELETED Viewed

@@ -1,82 +0,0 @@
-import copy
-from dataclasses import dataclass
-from typing import Dict, Optional
-import requests
-from haystack.utils import Secret
-FUNCTIONS_ENDPOINT = "https://api.nvcf.nvidia.com/v2/nvcf/functions"
-INVOKE_ENDPOINT = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions"
-STATUS_ENDPOINT = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/status"
-ACCEPTED_STATUS_CODE = 202
-@dataclass
-class AvailableNvidiaCloudFunctions:
-    name: str
-    id: str
-    status: Optional[str] = None
-class NvidiaCloudFunctionsClient:
-    def __init__(self, *, api_key: Secret, headers: Dict[str, str], timeout: int = 60):
-        self.api_key = api_key.resolve_value()
-        if self.api_key is None:
-            msg = "Nvidia Cloud Functions API key is not set."
-            raise ValueError(msg)
-        self.fetch_url_format = STATUS_ENDPOINT
-        self.headers = copy.deepcopy(headers)
-        self.headers.update(
-            {
-                "Authorization": f"Bearer {self.api_key}",
-            }
-        )
-        self.timeout = timeout
-        self.session = requests.Session()
-    def query_function(self, func_id: str, payload: Dict[str, str]) -> Dict[str, str]:
-        invoke_url = f"{INVOKE_ENDPOINT}/{func_id}"
-        response = self.session.post(invoke_url, headers=self.headers, json=payload, timeout=self.timeout)
-        request_id = response.headers.get("NVCF-REQID")
-        if request_id is None:
-            msg = "NVCF-REQID header not found in response"
-            raise ValueError(msg)
-        while response.status_code == ACCEPTED_STATUS_CODE:
-            fetch_url = f"{self.fetch_url_format}/{request_id}"
-            response = self.session.get(fetch_url, headers=self.headers, timeout=self.timeout)
-        response.raise_for_status()
-        return response.json()
-    def available_functions(self) -> Dict[str, AvailableNvidiaCloudFunctions]:
-        response = self.session.get(FUNCTIONS_ENDPOINT, headers=self.headers, timeout=self.timeout)
-        response.raise_for_status()
-        return {
-            f["name"]: AvailableNvidiaCloudFunctions(
-                name=f["name"],
-                id=f["id"],
-                status=f.get("status"),
-            )
-            for f in response.json()["functions"]
-        }
-    def get_model_nvcf_id(self, model: str) -> str:
-        """
-        Returns the Nvidia Cloud Functions UUID for the given model.
-        """
-        available_functions = self.available_functions()
-        func = available_functions.get(model)
-        if func is None:
-            msg = f"Model '{model}' was not found on the Nvidia Cloud Functions backend"
-            raise ValueError(msg)
-        elif func.status != "ACTIVE":
-            msg = f"Model '{model}' is not currently active/usable on the Nvidia Cloud Functions backend"
-            raise ValueError(msg)
-        return func.id