PyPI - nvidia-nat - Versions diffs - 1.4.0a20251015__py3-none-any.whl → 1.4.0a20251022__py3-none-any.whl - Mend

nvidia-nat 1.4.0a20251015py3-none-any.whl → 1.4.0a20251022py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

nat/agent/base.py +3 -3
nat/agent/reasoning_agent/reasoning_agent.py +6 -6
nat/agent/register.py +1 -0
nat/agent/responses_api_agent/__init__.py +14 -0
nat/agent/responses_api_agent/register.py +126 -0
nat/agent/tool_calling_agent/agent.py +6 -10
nat/builder/context.py +2 -1
nat/builder/intermediate_step_manager.py +6 -2
nat/data_models/api_server.py +83 -33
nat/data_models/intermediate_step.py +9 -1
nat/data_models/llm.py +15 -1
nat/data_models/openai_mcp.py +46 -0
nat/data_models/optimizable.py +2 -1
nat/data_models/thinking_mixin.py +2 -2
nat/eval/evaluate.py +2 -0
nat/eval/usage_stats.py +2 -0
nat/front_ends/fastapi/fastapi_front_end_plugin_worker.py +3 -0
nat/front_ends/fastapi/message_handler.py +65 -40
nat/front_ends/fastapi/message_validator.py +1 -2
nat/front_ends/mcp/mcp_front_end_config.py +32 -0
nat/front_ends/mcp/mcp_front_end_plugin.py +9 -6
nat/llm/aws_bedrock_llm.py +3 -3
nat/llm/litellm_llm.py +6 -3
nat/llm/nim_llm.py +3 -3
nat/llm/openai_llm.py +4 -3
nat/profiler/callbacks/langchain_callback_handler.py +32 -7
nat/profiler/callbacks/llama_index_callback_handler.py +36 -2
nat/profiler/callbacks/token_usage_base_model.py +2 -0
nat/runtime/runner.py +11 -3
nat/utils/exception_handlers/automatic_retries.py +205 -54
nat/utils/responses_api.py +26 -0
nat/utils/string_utils.py +16 -0
{nvidia_nat-1.4.0a20251015.dist-info → nvidia_nat-1.4.0a20251022.dist-info}/METADATA +4 -4
{nvidia_nat-1.4.0a20251015.dist-info → nvidia_nat-1.4.0a20251022.dist-info}/RECORD +39 -35
{nvidia_nat-1.4.0a20251015.dist-info → nvidia_nat-1.4.0a20251022.dist-info}/WHEEL +0 -0
{nvidia_nat-1.4.0a20251015.dist-info → nvidia_nat-1.4.0a20251022.dist-info}/entry_points.txt +0 -0
{nvidia_nat-1.4.0a20251015.dist-info → nvidia_nat-1.4.0a20251022.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
{nvidia_nat-1.4.0a20251015.dist-info → nvidia_nat-1.4.0a20251022.dist-info}/licenses/LICENSE.md +0 -0
{nvidia_nat-1.4.0a20251015.dist-info → nvidia_nat-1.4.0a20251022.dist-info}/top_level.txt +0 -0

nat/agent/base.py CHANGED Viewed

@@ -102,11 +102,11 @@ class BaseAgent(ABC):
         AIMessage
             The LLM response
         """
-        output_message = ""
+        output_message = []
         async for event in runnable.astream(inputs, config=config):
-            output_message += event.content
+            output_message.append(event.content)
-        return AIMessage(content=output_message)
+        return AIMessage(content="".join(output_message))
     async def _call_llm(self, llm: Runnable, inputs: dict[str, Any], config: RunnableConfig | None = None) -> AIMessage:
         """

nat/agent/reasoning_agent/reasoning_agent.py CHANGED Viewed

@@ -157,12 +157,12 @@ async def build_reasoning_function(config: ReasoningFunctionConfig, builder: Bui
             prompt = prompt.to_string()
             # Get the reasoning output from the LLM
-            reasoning_output = ""
+            reasoning_output = []
             async for chunk in llm.astream(prompt):
-                reasoning_output += chunk.content
+                reasoning_output.append(chunk.content)
-            reasoning_output = remove_r1_think_tags(reasoning_output)
+            reasoning_output = remove_r1_think_tags("".join(reasoning_output))
             output = await downstream_template.ainvoke(input={
                 "input_text": input_text, "reasoning_output": reasoning_output
@@ -200,12 +200,12 @@ async def build_reasoning_function(config: ReasoningFunctionConfig, builder: Bui
             prompt = prompt.to_string()
             # Get the reasoning output from the LLM
-            reasoning_output = ""
+            reasoning_output = []
             async for chunk in llm.astream(prompt):
-                reasoning_output += chunk.content
+                reasoning_output.append(chunk.content)
-            reasoning_output = remove_r1_think_tags(reasoning_output)
+            reasoning_output = remove_r1_think_tags("".join(reasoning_output))
             output = await downstream_template.ainvoke(input={
                 "input_text": input_text, "reasoning_output": reasoning_output

nat/agent/register.py CHANGED Viewed

@@ -19,5 +19,6 @@
 from .prompt_optimizer import register as prompt_optimizer
 from .react_agent import register as react_agent
 from .reasoning_agent import reasoning_agent
+from .responses_api_agent import register as responses_api_agent
 from .rewoo_agent import register as rewoo_agent
 from .tool_calling_agent import register as tool_calling_agent

nat/agent/responses_api_agent/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

nat/agent/responses_api_agent/register.py ADDED Viewed

@@ -0,0 +1,126 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import typing
+from pydantic import Field
+from nat.agent.base import AGENT_LOG_PREFIX
+from nat.builder.builder import Builder
+from nat.builder.framework_enum import LLMFrameworkEnum
+from nat.builder.function_info import FunctionInfo
+from nat.cli.register_workflow import register_function
+from nat.data_models.component_ref import FunctionRef
+from nat.data_models.component_ref import LLMRef
+from nat.data_models.function import FunctionBaseConfig
+from nat.data_models.openai_mcp import OpenAIMCPSchemaTool
+logger = logging.getLogger(__name__)
+class ResponsesAPIAgentWorkflowConfig(FunctionBaseConfig, name="responses_api_agent"):
+    """
+    Defines an NeMo Agent Toolkit function that uses a Responses API
+    Agent performs reasoning inbetween tool calls, and utilizes the
+    tool names and descriptions to select the optimal tool.
+    """
+    llm_name: LLMRef = Field(description="The LLM model to use with the agent.")
+    verbose: bool = Field(default=False, description="Set the verbosity of the agent's logging.")
+    nat_tools: list[FunctionRef] = Field(default_factory=list, description="The list of tools to provide to the agent.")
+    mcp_tools: list[OpenAIMCPSchemaTool] = Field(
+        default_factory=list,
+        description="List of MCP tools to use with the agent. If empty, no MCP tools will be used.")
+    builtin_tools: list[dict[str, typing.Any]] = Field(
+        default_factory=list,
+        description="List of built-in tools to use with the agent. If empty, no built-in tools will be used.")
+    max_iterations: int = Field(default=15, description="Number of tool calls before stoping the agent.")
+    description: str = Field(default="Agent Workflow", description="The description of this functions use.")
+    parallel_tool_calls: bool = Field(default=False,
+                                      description="Specify whether to allow parallel tool calls in the agent.")
+    handle_tool_errors: bool = Field(
+        default=True,
+        description="Specify ability to handle tool calling errors. If False, tool errors will raise an exception.")
+@register_function(config_type=ResponsesAPIAgentWorkflowConfig, framework_wrappers=[LLMFrameworkEnum.LANGCHAIN])
+async def responses_api_agent_workflow(config: ResponsesAPIAgentWorkflowConfig, builder: Builder):
+    from langchain_core.messages.human import HumanMessage
+    from langchain_core.runnables import Runnable
+    from langchain_openai import ChatOpenAI
+    from nat.agent.tool_calling_agent.agent import ToolCallAgentGraph
+    from nat.agent.tool_calling_agent.agent import ToolCallAgentGraphState
+    llm: ChatOpenAI = await builder.get_llm(config.llm_name, wrapper_type=LLMFrameworkEnum.LANGCHAIN)
+    assert llm.use_responses_api, "Responses API Agent requires an LLM that supports the Responses API."
+    # Get tools
+    tools = []
+    nat_tools = await builder.get_tools(tool_names=config.nat_tools, wrapper_type=LLMFrameworkEnum.LANGCHAIN)
+    tools.extend(nat_tools)
+    # MCP tools are optional, if provided they will be used by the agent
+    tools.extend([m.model_dump() for m in config.mcp_tools])
+    # Built-in tools are optional, if provided they will be used by the agent
+    tools.extend(config.builtin_tools)
+    # Bind tools to LLM
+    if tools:
+        llm: Runnable = llm.bind_tools(tools=tools, parallel_tool_calls=config.parallel_tool_calls, strict=True)
+    if config.verbose:
+        logger.info("%s Using LLM: %s with tools: %s", AGENT_LOG_PREFIX, llm.model_name, tools)
+    agent = ToolCallAgentGraph(
+        llm=llm,
+        tools=nat_tools,  # MCP and built-in tools are already bound to the LLM and need not be handled by graph
+        detailed_logs=config.verbose,
+        handle_tool_errors=config.handle_tool_errors)
+    graph = await agent.build_graph()
+    async def _response_fn(input_message: str) -> str:
+        try:
+            # initialize the starting state with the user query
+            input_message = HumanMessage(content=input_message)
+            state = ToolCallAgentGraphState(messages=[input_message])
+            # run the Tool Calling Agent Graph
+            state = await graph.ainvoke(state, config={'recursion_limit': (config.max_iterations + 1) * 2})
+            # setting recursion_limit: 4 allows 1 tool call
+            #   - allows the Tool Calling Agent to perform 1 cycle / call 1 single tool,
+            #   - but stops the agent when it tries to call a tool a second time
+            # get and return the output from the state
+            state = ToolCallAgentGraphState(**state)
+            output_message = state.messages[-1]  # pylint: disable=E1136
+            content = output_message.content[-1]['text'] if output_message.content and isinstance(
+                output_message.content[-1], dict) and 'text' in output_message.content[-1] else str(
+                    output_message.content)
+            return content
+        except Exception as ex:
+            logger.exception("%s Tool Calling Agent failed with exception: %s", AGENT_LOG_PREFIX, ex, exc_info=ex)
+            if config.verbose:
+                return str(ex)
+            return "I seem to be having a problem."
+    try:
+        yield FunctionInfo.from_fn(_response_fn, description=config.description)
+    except GeneratorExit:
+        logger.exception("%s Workflow exited early!", AGENT_LOG_PREFIX, exc_info=True)
+    finally:
+        logger.debug("%s Cleaning up react_agent workflow.", AGENT_LOG_PREFIX)

nat/agent/tool_calling_agent/agent.py CHANGED Viewed

@@ -233,14 +233,10 @@ def create_tool_calling_agent_prompt(config: "ToolCallAgentWorkflowConfig") -> s
     """
     # the Tool Calling Agent prompt can be customized via config option system_prompt and additional_instructions.
-    if config.system_prompt:
-        prompt_str = config.system_prompt
-    else:
-        prompt_str = ""
-    if config.additional_instructions:
-        prompt_str += f" {config.additional_instructions}"
-    if len(prompt_str) > 0:
-        return prompt_str
+    prompt_strs = []
+    for msg in [config.system_prompt, config.additional_instructions]:
+        if msg is not None:
+            prompt_strs.append(msg)
+    if prompt_strs:
+        return " ".join(prompt_strs)
     return None

nat/builder/context.py CHANGED Viewed

@@ -19,6 +19,7 @@ from collections.abc import Awaitable
 from collections.abc import Callable
 from contextlib import contextmanager
 from contextvars import ContextVar
+from functools import cached_property
 from nat.builder.intermediate_step_manager import IntermediateStepManager
 from nat.builder.user_interaction_manager import UserInteractionManager
@@ -167,7 +168,7 @@ class Context:
         """
         return UserInteractionManager(self._context_state)
-    @property
+    @cached_property
     def intermediate_step_manager(self) -> IntermediateStepManager:
         """
         Retrieves the intermediate step manager instance from the current context state.

nat/builder/intermediate_step_manager.py CHANGED Viewed

@@ -101,7 +101,10 @@ class IntermediateStepManager:
             open_step = self._outstanding_start_steps.pop(payload.UUID, None)
             if (open_step is None):
-                logger.warning("Step id %s not found in outstanding start steps", payload.UUID)
+                logger.warning(
+                    "Step id %s not found in outstanding start steps. "
+                    "This may occur if the step was started in a different context or already completed.",
+                    payload.UUID)
                 return
             parent_step_id = open_step.step_parent_id
@@ -157,7 +160,8 @@ class IntermediateStepManager:
             if (open_step is None):
                 logger.warning(
                     "Created a chunk for step %s, but no matching start step was found. "
-                    "Chunks must be created with the same ID as the start step.",
+                    "Chunks must be created with the same ID as the start step. "
+                    "This may occur if the step was started in a different context.",
                     payload.UUID)
                 return

nat/data_models/api_server.py CHANGED Viewed

@@ -121,7 +121,15 @@ class Message(BaseModel):
     role: UserMessageContentRoleType
-class ChatRequestOptionals(BaseModel):
+class ChatRequest(BaseModel):
+    """
+    ChatRequest is a data model that represents a request to the NAT chat API.
+    Fully compatible with OpenAI Chat Completions API specification.
+    """
+    # Required fields
+    messages: typing.Annotated[list[Message], conlist(Message, min_length=1)]
     # Optional fields (OpenAI Chat Completions API compatible)
     model: str | None = Field(default=None, description="name of the model to use")
     frequency_penalty: float | None = Field(default=0.0,
@@ -145,17 +153,6 @@ class ChatRequestOptionals(BaseModel):
     tool_choice: str | dict[str, typing.Any] | None = Field(default=None, description="Controls which tool is called")
     parallel_tool_calls: bool | None = Field(default=True, description="Whether to enable parallel function calling")
     user: str | None = Field(default=None, description="Unique identifier representing end-user")
-class ChatRequest(ChatRequestOptionals):
-    """
-    ChatRequest is a data model that represents a request to the NAT chat API.
-    Fully compatible with OpenAI Chat Completions API specification.
-    """
-    # Required fields
-    messages: typing.Annotated[list[Message], conlist(Message, min_length=1)]
     model_config = ConfigDict(extra="allow",
                               json_schema_extra={
                                   "example": {
@@ -197,39 +194,82 @@ class ChatRequest(ChatRequestOptionals):
                            top_p=top_p)
-class ChatRequestOrMessage(ChatRequestOptionals):
+class ChatRequestOrMessage(BaseModel):
     """
-    ChatRequestOrMessage is a data model that represents either a conversation or a string input.
+    `ChatRequestOrMessage` is a data model that represents either a conversation or a string input.
     This is useful for functions that can handle either type of input.
-    `messages` is compatible with the OpenAI Chat Completions API specification.
-    `input_string` is a string input that can be used for functions that do not require a conversation.
-    """
+    - `messages` is compatible with the OpenAI Chat Completions API specification.
+    - `input_message` is a string input that can be used for functions that do not require a conversation.
+    Note: When `messages` is provided, extra fields are allowed to enable lossless round-trip
+    conversion with ChatRequest. When `input_message` is provided, no extra fields are permitted.
+    """
+    model_config = ConfigDict(
+        extra="allow",
+        json_schema_extra={
+            "examples": [
+                {
+                    "input_message": "What can you do?"
+                },
+                {
+                    "messages": [{
+                        "role": "user", "content": "What can you do?"
+                    }],
+                    "model": "nvidia/nemotron",
+                    "temperature": 0.7
+                },
+            ],
+            "oneOf": [
+                {
+                    "required": ["input_message"],
+                    "properties": {
+                        "input_message": {
+                            "type": "string"
+                        },
+                    },
+                    "additionalProperties": {
+                        "not": True, "errorMessage": 'remove additional property ${0#}'
+                    },
+                },
+                {
+                    "required": ["messages"],
+                    "properties": {
+                        "messages": {
+                            "type": "array"
+                        },
+                    },
+                    "additionalProperties": True
+                },
+            ]
+        },
+    )
     messages: typing.Annotated[list[Message] | None, conlist(Message, min_length=1)] = Field(
-        default=None, description="The conversation messages to process.")
+        default=None, description="A non-empty conversation of messages to process.")
-    input_string: str | None = Field(default=None, alias="input_message", description="The input message to process.")
+    input_message: str | None = Field(
+        default=None,
+        description="A single input message to process. Useful for functions that do not require a conversation")
     @property
     def is_string(self) -> bool:
-        return self.input_string is not None
+        return self.input_message is not None
     @property
     def is_conversation(self) -> bool:
         return self.messages is not None
     @model_validator(mode="after")
-    def validate_messages_or_input_string(self):
-        if self.messages is not None and self.input_string is not None:
-            raise ValueError("Either messages or input_message/input_string must be provided, not both")
-        if self.messages is None and self.input_string is None:
-            raise ValueError("Either messages or input_message/input_string must be provided")
-        if self.input_string is not None:
-            extra_fields = self.model_dump(exclude={"input_string"}, exclude_none=True, exclude_unset=True)
+    def validate_model(self):
+        if self.messages is not None and self.input_message is not None:
+            raise ValueError("Either messages or input_message must be provided, not both")
+        if self.messages is None and self.input_message is None:
+            raise ValueError("Either messages or input_message must be provided")
+        if self.input_message is not None:
+            extra_fields = self.model_dump(exclude={"input_message"}, exclude_none=True, exclude_unset=True)
             if len(extra_fields) > 0:
-                raise ValueError("no extra fields are permitted when input_message/input_string is provided")
+                raise ValueError("no extra fields are permitted when input_message is provided")
         return self
@@ -701,9 +741,9 @@ GlobalTypeConverter.register_converter(_string_to_nat_chat_request)
 def _chat_request_or_message_to_chat_request(data: ChatRequestOrMessage) -> ChatRequest:
-    if data.input_string is not None:
-        return _string_to_nat_chat_request(data.input_string)
-    return ChatRequest(**data.model_dump(exclude={"input_string"}))
+    if data.input_message is not None:
+        return _string_to_nat_chat_request(data.input_message)
+    return ChatRequest(**data.model_dump(exclude={"input_message"}))
 GlobalTypeConverter.register_converter(_chat_request_or_message_to_chat_request)
@@ -717,7 +757,17 @@ GlobalTypeConverter.register_converter(_chat_request_to_chat_request_or_message)
 def _chat_request_or_message_to_string(data: ChatRequestOrMessage) -> str:
-    return data.input_string or ""
+    if data.input_message is not None:
+        return data.input_message
+    # Extract content from last message in conversation
+    if data.messages is None:
+        return ""
+    content = data.messages[-1].content
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content
+    return str(content)
 GlobalTypeConverter.register_converter(_chat_request_or_message_to_string)

nat/data_models/intermediate_step.py CHANGED Viewed

@@ -103,11 +103,19 @@ class ToolSchema(BaseModel):
     function: ToolDetails = Field(..., description="The function details.")
+class ServerToolUseSchema(BaseModel):
+    name: str
+    arguments: str | dict[str, typing.Any] | typing.Any
+    output: typing.Any
+    model_config = ConfigDict(extra="ignore")
 class TraceMetadata(BaseModel):
     chat_responses: typing.Any | None = None
     chat_inputs: typing.Any | None = None
     tool_inputs: typing.Any | None = None
-    tool_outputs: typing.Any | None = None
+    tool_outputs: list[ServerToolUseSchema] | typing.Any | None = None
     tool_info: typing.Any | None = None
     span_inputs: typing.Any | None = None
     span_outputs: typing.Any | None = None

nat/data_models/llm.py CHANGED Viewed

@@ -14,14 +14,28 @@
 # limitations under the License.
 import typing
+from enum import Enum
+from pydantic import Field
 from .common import BaseModelRegistryTag
 from .common import TypedBaseModel
+class APITypeEnum(str, Enum):
+    CHAT_COMPLETION = "chat_completion"
+    RESPONSES = "responses"
 class LLMBaseConfig(TypedBaseModel, BaseModelRegistryTag):
     """Base configuration for LLM providers."""
-    pass
+    api_type: APITypeEnum = Field(default=APITypeEnum.CHAT_COMPLETION,
+                                  description="The type of API to use for the LLM provider.",
+                                  json_schema_extra={
+                                      "enum": [e.value for e in APITypeEnum],
+                                      "examples": [e.value for e in APITypeEnum],
+                                  })
 LLMBaseConfigT = typing.TypeVar("LLMBaseConfigT", bound=LLMBaseConfig)

nat/data_models/openai_mcp.py ADDED Viewed

@@ -0,0 +1,46 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from enum import Enum
+from pydantic import BaseModel
+from pydantic import ConfigDict
+from pydantic import Field
+class MCPApprovalRequiredEnum(str, Enum):
+    """
+    Enum to specify if approval is required for tool usage in the OpenAI MCP schema.
+    """
+    NEVER = "never"
+    ALWAYS = "always"
+    AUTO = "auto"
+class OpenAIMCPSchemaTool(BaseModel):
+    """
+    Represents a tool in the OpenAI MCP schema.
+    """
+    type: str = "mcp"
+    server_label: str = Field(description="Label for the server where the tool is hosted.")
+    server_url: str = Field(description="URL of the server hosting the tool.")
+    allowed_tools: list[str] | None = Field(default=None,
+                                            description="List of allowed tool names that can be used by the agent.")
+    require_approval: MCPApprovalRequiredEnum = Field(default=MCPApprovalRequiredEnum.NEVER,
+                                                      description="Specifies if approval is required for tool usage.")
+    headers: dict[str, str] | None = Field(default=None,
+                                           description="Optional headers to include in requests to the tool server.")
+    model_config = ConfigDict(use_enum_values=True)

nat/data_models/optimizable.py CHANGED Viewed

@@ -23,6 +23,7 @@ from pydantic import BaseModel
 from pydantic import ConfigDict
 from pydantic import Field
 from pydantic import model_validator
+from pydantic_core import PydanticUndefined
 T = TypeVar("T", int, float, bool, str)
@@ -66,7 +67,7 @@ class SearchSpace(BaseModel, Generic[T]):
 def OptimizableField(
-    default: Any,
+    default: Any = PydanticUndefined,
     *,
     space: SearchSpace | None = None,
     merge_conflict: str = "overwrite",

nat/data_models/thinking_mixin.py CHANGED Viewed

@@ -51,7 +51,7 @@ class ThinkingMixin(
         Returns the system prompt to use for thinking.
         For NVIDIA Nemotron, returns "/think" if enabled, else "/no_think".
         For Llama Nemotron v1.5, returns "/think" if enabled, else "/no_think".
-        For Llama Nemotron v1.0, returns "detailed thinking on" if enabled, else "detailed thinking off".
+        For Llama Nemotron v1.0 or v1.1, returns "detailed thinking on" if enabled, else "detailed thinking off".
         If thinking is not supported on the model, returns None.
         Returns:
@@ -72,7 +72,7 @@ class ThinkingMixin(
                 return "/think" if self.thinking else "/no_think"
             if model.startswith("nvidia/llama"):
-                if "v1-0" in model or "v1-1" in model:
+                if "v1-0" in model or "v1-1" in model or model.endswith("v1"):
                     return f"detailed thinking {'on' if self.thinking else 'off'}"
                 if "v1-5" in model:

nat/eval/evaluate.py CHANGED Viewed

@@ -104,6 +104,8 @@ class EvaluationRun:
                 usage_stats_per_llm[llm_name].prompt_tokens += step.token_usage.prompt_tokens
                 usage_stats_per_llm[llm_name].completion_tokens += step.token_usage.completion_tokens
                 usage_stats_per_llm[llm_name].total_tokens += step.token_usage.total_tokens
+                usage_stats_per_llm[llm_name].reasoning_tokens += step.token_usage.reasoning_tokens
+                usage_stats_per_llm[llm_name].cached_tokens += step.token_usage.cached_tokens
                 total_tokens += step.token_usage.total_tokens
         # find min and max event timestamps

nat/eval/usage_stats.py CHANGED Viewed

@@ -21,6 +21,8 @@ from pydantic import BaseModel
 class UsageStatsLLM(BaseModel):
     prompt_tokens: int = 0
     completion_tokens: int = 0
+    cached_tokens: int = 0
+    reasoning_tokens: int = 0
     total_tokens: int = 0

nat/front_ends/fastapi/fastapi_front_end_plugin_worker.py CHANGED Viewed

@@ -1184,6 +1184,7 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
                             "server": client.server_name,
                             "transport": config.server.transport,
                             "session_healthy": session_healthy,
+                            "protected": True if config.server.auth_provider is not None else False,
                             "tools": tools_info,
                             "total_tools": len(configured_short_names),
                             "available_tools": available_count
@@ -1196,6 +1197,7 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
                             "server": "unknown",
                             "transport": config.server.transport if config.server else "unknown",
                             "session_healthy": False,
+                            "protected": False,
                             "error": str(e),
                             "tools": [],
                             "total_tools": 0,
@@ -1226,6 +1228,7 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
                                     "server": "streamable-http:http://localhost:9901/mcp",
                                     "transport": "streamable-http",
                                     "session_healthy": True,
+                                    "protected": False,
                                     "tools": [{
                                         "name": "tool_a",
                                         "description": "Tool A description",

nvidia-nat 1.4.0a20251015__py3-none-any.whl → 1.4.0a20251022__py3-none-any.whl

nvidia-nat 1.4.0a20251015py3-none-any.whl → 1.4.0a20251022py3-none-any.whl