PyPI - nvidia-haystack - Versions diffs - 0.1.7__tar.gz → 0.2.0__tar.gz - Mend

nvidia-haystack 0.1.7tar.gz → 0.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

{nvidia_haystack-0.1.7 → nvidia_haystack-0.2.0}/CHANGELOG.md RENAMED Viewed

@@ -1,11 +1,39 @@
 # Changelog
+## [integrations/nvidia-v0.1.8] - 2025-05-28
+### 🌀 Miscellaneous
+- Add pins for Nvidia (#1846)
+## [integrations/nvidia-v0.1.7] - 2025-04-03
+### 🧪 Testing
+- Reduce Nvidia API calls in integration tests (#1432)
+- Add test cases for all utils methods for Nvidia integration (#1458)
+- Add unit tests for Nvidia NimBackend (#1546)
+### ⚙️ CI
+- Review testing workflows (#1541)
+### 🧹 Chores
+- Remove Python 3.8 support (#1421)
+### 🌀 Miscellaneous
+- Fix: nvidia-haystack remove init files to make them namespace packages (#1594)
 ## [integrations/nvidia-v0.1.6] - 2025-02-11
 ### 🚀 Features
 - Add nvidia latest embedding models (#1364)
 ## [integrations/nvidia-v0.1.5] - 2025-02-04
 ### 🌀 Miscellaneous

{nvidia_haystack-0.1.7 → nvidia_haystack-0.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nvidia-haystack
-Version: 0.1.7
+Version: 0.2.0
 Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/nvidia#readme
 Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
 Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/nvidia
@@ -18,9 +18,9 @@ Classifier: Programming Language :: Python :: 3.13
 Classifier: Programming Language :: Python :: Implementation :: CPython
 Classifier: Programming Language :: Python :: Implementation :: PyPy
 Requires-Python: >=3.9
-Requires-Dist: haystack-ai
-Requires-Dist: requests
-Requires-Dist: tqdm
+Requires-Dist: haystack-ai>=2.13.0
+Requires-Dist: requests>=2.25.0
+Requires-Dist: tqdm>=4.21.0
 Description-Content-Type: text/markdown
 # nvidia-haystack

{nvidia_haystack-0.1.7 → nvidia_haystack-0.2.0}/pyproject.toml RENAMED Viewed

@@ -23,7 +23,7 @@ classifiers = [
   "Programming Language :: Python :: Implementation :: CPython",
   "Programming Language :: Python :: Implementation :: PyPy",
 ]
-dependencies = ["haystack-ai", "requests", "tqdm"]
+dependencies = ["haystack-ai>=2.13.0", "requests>=2.25.0", "tqdm>=4.21.0"]
 [project.urls]
 Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/nvidia#readme"
@@ -46,6 +46,8 @@ installer = "uv"
 dependencies = [
   "coverage[toml]>=6.5",
   "pytest",
+  "pytest-asyncio",
+  "pytz",
   "pytest-rerunfailures",
   "haystack-pydoc-tools",
   "requests_mock",
@@ -160,6 +162,7 @@ module = [
   "pytest.*",
   "numpy.*",
   "requests_mock.*",
+  "openai.*",
   "pydantic.*",
 ]
 ignore_missing_imports = true

{nvidia_haystack-0.1.7 → nvidia_haystack-0.2.0}/src/haystack_integrations/components/generators/nvidia/__init__.py RENAMED Viewed

@@ -2,6 +2,7 @@
 #
 # SPDX-License-Identifier: Apache-2.0
+from .chat.chat_generator import NvidiaChatGenerator
 from .generator import NvidiaGenerator
-__all__ = ["NvidiaGenerator"]
+__all__ = ["NvidiaChatGenerator", "NvidiaGenerator"]

nvidia_haystack-0.2.0/src/haystack_integrations/components/generators/nvidia/chat/chat_generator.py ADDED Viewed

@@ -0,0 +1,133 @@
+# SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
+import os
+from typing import Any, Dict, List, Optional, Union
+from haystack import component, default_to_dict, logging
+from haystack.components.generators.chat import OpenAIChatGenerator
+from haystack.dataclasses import StreamingCallbackT
+from haystack.tools import Tool, Toolset, serialize_tools_or_toolset
+from haystack.utils import serialize_callable
+from haystack.utils.auth import Secret
+from haystack_integrations.utils.nvidia import DEFAULT_API_URL
+logger = logging.getLogger(__name__)
+@component
+class NvidiaChatGenerator(OpenAIChatGenerator):
+    """
+    Enables text generation using NVIDIA generative models.
+    For supported models, see [NVIDIA Docs](https://build.nvidia.com/models).
+    Users can pass any text generation parameters valid for the NVIDIA Chat Completion API
+    directly to this component via the `generation_kwargs` parameter in `__init__` or the `generation_kwargs`
+    parameter in `run` method.
+    This component uses the ChatMessage format for structuring both input and output,
+    ensuring coherent and contextually relevant responses in chat-based text generation scenarios.
+    Details on the ChatMessage format can be found in the
+    [Haystack docs](https://docs.haystack.deepset.ai/docs/data-classes#chatmessage)
+    For more details on the parameters supported by the NVIDIA API, refer to the
+    [NVIDIA Docs](https://build.nvidia.com/models).
+    Usage example:
+    ```python
+    from haystack_integrations.components.generators.nvidia import NvidiaChatGenerator
+    from haystack.dataclasses import ChatMessage
+    messages = [ChatMessage.from_user("What's Natural Language Processing?")]
+    client = NvidiaChatGenerator()
+    response = client.run(messages)
+    print(response)
+    ```
+    """
+    def __init__(
+        self,
+        *,
+        api_key: Secret = Secret.from_env_var("NVIDIA_API_KEY"),
+        model: str = "meta/llama-3.1-8b-instruct",
+        streaming_callback: Optional[StreamingCallbackT] = None,
+        api_base_url: Optional[str] = os.getenv("NVIDIA_API_URL", DEFAULT_API_URL),
+        generation_kwargs: Optional[Dict[str, Any]] = None,
+        tools: Optional[Union[List[Tool], Toolset]] = None,
+        timeout: Optional[float] = None,
+        max_retries: Optional[int] = None,
+        http_client_kwargs: Optional[Dict[str, Any]] = None,
+    ):
+        """
+        Creates an instance of NvidiaChatGenerator.
+        :param api_key:
+            The NVIDIA API key.
+        :param model:
+            The name of the NVIDIA chat completion model to use.
+        :param streaming_callback:
+            A callback function that is called when a new token is received from the stream.
+            The callback function accepts StreamingChunk as an argument.
+        :param api_base_url:
+            The NVIDIA API Base url.
+        :param generation_kwargs:
+            Other parameters to use for the model. These parameters are all sent directly to
+            the NVIDIA API endpoint. See [NVIDIA API docs](https://docs.nvcf.nvidia.com/ai/generative-models/)
+            for more details.
+            Some of the supported parameters:
+            - `max_tokens`: The maximum number of tokens the output text can have.
+            - `temperature`: What sampling temperature to use. Higher values mean the model will take more risks.
+                Try 0.9 for more creative applications and 0 (argmax sampling) for ones with a well-defined answer.
+            - `top_p`: An alternative to sampling with temperature, called nucleus sampling, where the model
+                considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens
+                comprising the top 10% probability mass are considered.
+            - `stream`: Whether to stream back partial progress. If set, tokens will be sent as data-only server-sent
+                events as they become available, with the stream terminated by a data: [DONE] message.
+        :param tools:
+            A list of tools or a Toolset for which the model can prepare calls. This parameter can accept either a
+            list of `Tool` objects or a `Toolset` instance.
+        :param timeout:
+            The timeout for the NVIDIA API call.
+        :param max_retries:
+            Maximum number of retries to contact NVIDIA after an internal error.
+            If not set, it defaults to either the `NVIDIA_MAX_RETRIES` environment variable, or set to 5.
+        :param http_client_kwargs:
+            A dictionary of keyword arguments to configure a custom `httpx.Client`or `httpx.AsyncClient`.
+            For more information, see the [HTTPX documentation](https://www.python-httpx.org/api/#client).
+        """
+        super(NvidiaChatGenerator, self).__init__(  # noqa: UP008
+            api_key=api_key,
+            model=model,
+            streaming_callback=streaming_callback,
+            api_base_url=api_base_url,
+            generation_kwargs=generation_kwargs,
+            tools=tools,
+            timeout=timeout,
+            max_retries=max_retries,
+            http_client_kwargs=http_client_kwargs,
+        )
+    def to_dict(self) -> Dict[str, Any]:
+        """
+        Serialize this component to a dictionary.
+        :returns:
+            The serialized component as a dictionary.
+        """
+        callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None
+        return default_to_dict(
+            self,
+            model=self.model,
+            streaming_callback=callback_name,
+            api_base_url=self.api_base_url,
+            generation_kwargs=self.generation_kwargs,
+            api_key=self.api_key.to_dict(),
+            tools=serialize_tools_or_toolset(self.tools),
+            timeout=self.timeout,
+            max_retries=self.max_retries,
+            http_client_kwargs=self.http_client_kwargs,
+        )

nvidia_haystack-0.2.0/tests/test_nvidia_chat_generator.py ADDED Viewed

@@ -0,0 +1,379 @@
+# SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
+import os
+from datetime import datetime
+from unittest.mock import AsyncMock, patch
+import pytest
+import pytz
+from haystack.components.generators.utils import print_streaming_chunk
+from haystack.dataclasses import ChatMessage, StreamingChunk
+from haystack.tools import Tool
+from haystack.utils.auth import Secret
+from openai import AsyncOpenAI, OpenAIError
+from openai.types.chat import ChatCompletion, ChatCompletionMessage
+from openai.types.chat.chat_completion import Choice
+from haystack_integrations.components.generators.nvidia.chat.chat_generator import NvidiaChatGenerator
+from haystack_integrations.utils.nvidia.models import DEFAULT_API_URL
+@pytest.fixture
+def chat_messages():
+    return [
+        ChatMessage.from_system("You are a helpful assistant"),
+        ChatMessage.from_user("What's the capital of France"),
+    ]
+def weather(city: str):
+    """Get weather for a given city."""
+    return f"The weather in {city} is sunny and 32°C"
+@pytest.fixture
+def tools():
+    tool_parameters = {
+        "type": "object",
+        "properties": {"city": {"type": "string"}},
+        "required": ["city"],
+    }
+    tool = Tool(
+        name="weather",
+        description="useful to determine the weather in a given location",
+        parameters=tool_parameters,
+        function=weather,
+    )
+    return [tool]
+@pytest.fixture
+def mock_chat_completion():
+    """
+    Mock the OpenAI API completion response and reuse it for tests
+    """
+    with patch("openai.resources.chat.completions.Completions.create") as mock_chat_completion_create:
+        completion = ChatCompletion(
+            id="foo",
+            model="meta/llama-3.1-8b-instruct",
+            object="chat.completion",
+            choices=[
+                Choice(
+                    finish_reason="stop",
+                    logprobs=None,
+                    index=0,
+                    message=ChatCompletionMessage(content="Hello world!", role="assistant"),
+                )
+            ],
+            created=int(datetime.now(tz=pytz.timezone("UTC")).timestamp()),
+            usage={"prompt_tokens": 57, "completion_tokens": 40, "total_tokens": 97},
+        )
+        mock_chat_completion_create.return_value = completion
+        yield mock_chat_completion_create
+@pytest.fixture
+def mock_async_chat_completion():
+    """
+    Mock the Async OpenAI API completion response and reuse it for async tests
+    """
+    with patch(
+        "openai.resources.chat.completions.AsyncCompletions.create",
+        new_callable=AsyncMock,
+    ) as mock_chat_completion_create:
+        completion = ChatCompletion(
+            id="foo",
+            model="meta/llama-3.1-8b-instruct",
+            object="chat.completion",
+            choices=[
+                Choice(
+                    finish_reason="stop",
+                    logprobs=None,
+                    index=0,
+                    message=ChatCompletionMessage(content="Hello world!", role="assistant"),
+                )
+            ],
+            created=int(datetime.now(tz=pytz.timezone("UTC")).timestamp()),
+            usage={
+                "prompt_tokens": 57,
+                "completion_tokens": 40,
+                "total_tokens": 97,
+            },
+        )
+        # For async mocks, the return value should be awaitable
+        mock_chat_completion_create.return_value = completion
+        yield mock_chat_completion_create
+class TestNvidiaChatGenerator:
+    def test_init_default(self, monkeypatch):
+        monkeypatch.setenv("NVIDIA_API_KEY", "test-api-key")
+        component = NvidiaChatGenerator()
+        assert component.client.api_key == "test-api-key"
+        assert component.model == "meta/llama-3.1-8b-instruct"
+        assert component.streaming_callback is None
+        assert not component.generation_kwargs
+    def test_init_fail_wo_api_key(self, monkeypatch):
+        monkeypatch.delenv("NVIDIA_API_KEY", raising=False)
+        with pytest.raises(ValueError, match="None of the .* environment variables are set"):
+            NvidiaChatGenerator()
+    def test_init_with_parameters(self):
+        component = NvidiaChatGenerator(
+            api_key=Secret.from_token("test-api-key"),
+            model="meta/llama-3.1-8b-instruct",
+            streaming_callback=print_streaming_chunk,
+            api_base_url="test-base-url",
+            generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"},
+        )
+        assert component.client.api_key == "test-api-key"
+        assert component.model == "meta/llama-3.1-8b-instruct"
+        assert component.streaming_callback is print_streaming_chunk
+        assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"}
+    def test_to_dict_default(self, monkeypatch):
+        monkeypatch.setenv("NVIDIA_API_KEY", "test-api-key")
+        component = NvidiaChatGenerator()
+        data = component.to_dict()
+        assert (
+            data["type"] == "haystack_integrations.components.generators.nvidia.chat.chat_generator.NvidiaChatGenerator"
+        )
+        expected_params = {
+            "api_key": {"env_vars": ["NVIDIA_API_KEY"], "strict": True, "type": "env_var"},
+            "model": "meta/llama-3.1-8b-instruct",
+            "streaming_callback": None,
+            "api_base_url": DEFAULT_API_URL,
+            "generation_kwargs": {},
+            "tools": None,
+            "timeout": None,
+            "max_retries": None,
+            "http_client_kwargs": None,
+        }
+        for key, value in expected_params.items():
+            assert data["init_parameters"][key] == value
+    def test_run(self, chat_messages, mock_chat_completion, monkeypatch):  # noqa: ARG002
+        monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
+        component = NvidiaChatGenerator()
+        response = component.run(chat_messages)
+        # check that the component returns the correct ChatMessage response
+        assert isinstance(response, dict)
+        assert "replies" in response
+        assert isinstance(response["replies"], list)
+        assert len(response["replies"]) == 1
+        assert [isinstance(reply, ChatMessage) for reply in response["replies"]]
+    def test_run_with_params(self, chat_messages, mock_chat_completion, monkeypatch):
+        monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
+        component = NvidiaChatGenerator(generation_kwargs={"max_tokens": 10, "temperature": 0.5})
+        response = component.run(chat_messages)
+        # check that the component calls the OpenAI API with the correct parameters
+        _, kwargs = mock_chat_completion.call_args
+        assert kwargs["max_tokens"] == 10
+        assert kwargs["temperature"] == 0.5
+        # check that the component returns the correct response
+        assert isinstance(response, dict)
+        assert "replies" in response
+        assert isinstance(response["replies"], list)
+        assert len(response["replies"]) == 1
+        assert [isinstance(reply, ChatMessage) for reply in response["replies"]]
+    def test_run_with_extra_body(self, chat_messages, mock_chat_completion, monkeypatch):
+        monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
+        extra_body = {
+            "guardrails": {"config_id": "demo-self-check-input-output"},
+        }
+        component = NvidiaChatGenerator(generation_kwargs={"extra_body": extra_body})
+        response = component.run(chat_messages)
+        # check that the component calls the OpenAI API with the correct parameters
+        _, kwargs = mock_chat_completion.call_args
+        assert kwargs["extra_body"] == extra_body
+        assert kwargs["model"] == "meta/llama-3.1-8b-instruct"
+        assert kwargs["messages"] == [
+            {"role": "system", "content": "You are a helpful assistant"},
+            {"role": "user", "content": "What's the capital of France"},
+        ]
+        # check that the component returns the correct response
+        assert isinstance(response, dict)
+        assert "replies" in response
+        assert isinstance(response["replies"], list)
+        assert len(response["replies"]) == 1
+        assert [isinstance(reply, ChatMessage) for reply in response["replies"]]
+    @pytest.mark.skipif(
+        not os.environ.get("NVIDIA_API_KEY", None),
+        reason="Export an env var called NVIDIA_API_KEY containing the NVIDIA API key to run this test.",
+    )
+    @pytest.mark.integration
+    def test_live_run(self):
+        chat_messages = [ChatMessage.from_user("What's the capital of France")]
+        component = NvidiaChatGenerator()
+        results = component.run(chat_messages)
+        assert len(results["replies"]) == 1
+        message: ChatMessage = results["replies"][0]
+        assert "Paris" in message.text
+        assert "meta/llama-3.1-8b-instruct" in message.meta["model"]
+        assert message.meta["finish_reason"] == "stop"
+    @pytest.mark.skipif(
+        not os.environ.get("NVIDIA_API_KEY", None),
+        reason="Export an env var called NVIDIA_API_KEY containing the NVIDIA API key to run this test.",
+    )
+    @pytest.mark.integration
+    def test_live_run_wrong_model(self, chat_messages):
+        component = NvidiaChatGenerator(model="something-obviously-wrong")
+        with pytest.raises(OpenAIError):
+            component.run(chat_messages)
+    @pytest.mark.skipif(
+        not os.environ.get("NVIDIA_API_KEY", None),
+        reason="Export an env var called NVIDIA_API_KEY containing the NVIDIA API key to run this test.",
+    )
+    @pytest.mark.integration
+    def test_live_run_streaming(self):
+        class Callback:
+            def __init__(self):
+                self.responses = ""
+                self.counter = 0
+            def __call__(self, chunk: StreamingChunk) -> None:
+                self.counter += 1
+                self.responses += chunk.content if chunk.content else ""
+        callback = Callback()
+        component = NvidiaChatGenerator(streaming_callback=callback)
+        results = component.run([ChatMessage.from_user("What's the capital of France?")])
+        assert len(results["replies"]) == 1
+        message: ChatMessage = results["replies"][0]
+        assert "Paris" in message.text
+        assert "meta/llama-3.1-8b-instruct" in message.meta["model"]
+        assert message.meta["finish_reason"] == "stop"
+        assert callback.counter > 1
+        assert "Paris" in callback.responses
+class TestNvidiaChatGeneratorAsync:
+    def test_init_default_async(self, monkeypatch):
+        monkeypatch.setenv("NVIDIA_API_KEY", "test-api-key")
+        component = NvidiaChatGenerator()
+        assert isinstance(component.async_client, AsyncOpenAI)
+        assert component.async_client.api_key == "test-api-key"
+        assert not component.generation_kwargs
+    @pytest.mark.asyncio
+    async def test_run_async(self, chat_messages, mock_async_chat_completion, monkeypatch):  # noqa: ARG002
+        monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
+        component = NvidiaChatGenerator()
+        response = await component.run_async(chat_messages)
+        # check that the component returns the correct ChatMessage response
+        assert isinstance(response, dict)
+        assert "replies" in response
+        assert isinstance(response["replies"], list)
+        assert len(response["replies"]) == 1
+        assert [isinstance(reply, ChatMessage) for reply in response["replies"]]
+    @pytest.mark.asyncio
+    async def test_run_async_with_params(self, chat_messages, mock_async_chat_completion, monkeypatch):
+        monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
+        component = NvidiaChatGenerator(generation_kwargs={"max_tokens": 10, "temperature": 0.5})
+        response = await component.run_async(chat_messages)
+        # check that the component calls the OpenAI API with the correct parameters
+        _, kwargs = mock_async_chat_completion.call_args
+        assert kwargs["max_tokens"] == 10
+        assert kwargs["temperature"] == 0.5
+        # check that the component returns the correct response
+        assert isinstance(response, dict)
+        assert "replies" in response
+        assert isinstance(response["replies"], list)
+        assert len(response["replies"]) == 1
+        assert [isinstance(reply, ChatMessage) for reply in response["replies"]]
+    @pytest.mark.asyncio
+    async def test_run_async_with_extra_body(self, chat_messages, mock_async_chat_completion, monkeypatch):
+        monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
+        extra_body = {
+            "guardrails": {"config_id": "demo-self-check-input-output"},
+        }
+        component = NvidiaChatGenerator(generation_kwargs={"extra_body": extra_body})
+        response = await component.run_async(chat_messages)
+        # check that the component calls the OpenAI API with the correct parameters
+        _, kwargs = mock_async_chat_completion.call_args
+        assert kwargs["extra_body"] == extra_body
+        assert kwargs["model"] == "meta/llama-3.1-8b-instruct"
+        assert kwargs["messages"] == [
+            {"role": "system", "content": "You are a helpful assistant"},
+            {"role": "user", "content": "What's the capital of France"},
+        ]
+        # check that the component returns the correct response
+        assert isinstance(response, dict)
+        assert "replies" in response
+        assert isinstance(response["replies"], list)
+        assert len(response["replies"]) == 1
+        assert [isinstance(reply, ChatMessage) for reply in response["replies"]]
+    @pytest.mark.skipif(
+        not os.environ.get("NVIDIA_API_KEY", None),
+        reason="Export an env var called NVIDIA_API_KEY containing the NVIDIA API key to run this test.",
+    )
+    @pytest.mark.integration
+    @pytest.mark.asyncio
+    async def test_live_run_async(self):
+        chat_messages = [ChatMessage.from_user("What's the capital of France")]
+        component = NvidiaChatGenerator()
+        results = await component.run_async(chat_messages)
+        assert len(results["replies"]) == 1
+        message: ChatMessage = results["replies"][0]
+        assert "Paris" in message.text
+        assert "meta/llama-3.1-8b-instruct" in message.meta["model"]
+        assert message.meta["finish_reason"] == "stop"
+    @pytest.mark.skipif(
+        not os.environ.get("NVIDIA_API_KEY", None),
+        reason="Export an env var called NVIDIA_API_KEY containing the NVIDIA API key to run this test.",
+    )
+    @pytest.mark.integration
+    @pytest.mark.asyncio
+    async def test_live_run_streaming_async(self):
+        counter = 0
+        responses = ""
+        async def callback(chunk: StreamingChunk):
+            nonlocal counter
+            nonlocal responses
+            counter += 1
+            responses += chunk.content if chunk.content else ""
+        component = NvidiaChatGenerator(streaming_callback=callback)
+        results = await component.run_async([ChatMessage.from_user("What's the capital of France?")])
+        assert len(results["replies"]) == 1
+        message: ChatMessage = results["replies"][0]
+        assert "Paris" in message.text
+        assert "meta/llama-3.1-8b-instruct" in message.meta["model"]
+        assert message.meta["finish_reason"] == "stop"
+        assert counter > 1
+        assert "Paris" in responses