PyPI - nvidia-nat - Versions diffs - 1.3.0rc1__py3-none-any.whl → 1.4.0a20251008__py3-none-any.whl - Mend

nvidia-nat 1.3.0rc1py3-none-any.whl → 1.4.0a20251008py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

nat/data_models/api_server.py CHANGED Viewed

@@ -36,6 +36,15 @@ from nat.utils.type_converter import GlobalTypeConverter
 FINISH_REASONS = frozenset({'stop', 'length', 'tool_calls', 'content_filter', 'function_call'})
+class UserMessageContentRoleType(str, Enum):
+    """
+    Enum representing chat message roles in API requests and responses.
+    """
+    USER = "user"
+    ASSISTANT = "assistant"
+    SYSTEM = "system"
 class Request(BaseModel):
     """
     Request is a data model that represents HTTP request attributes.
@@ -108,7 +117,7 @@ UserContent = typing.Annotated[TextContent | ImageContent | AudioContent, Discri
 class Message(BaseModel):
     content: str | list[UserContent]
-    role: str
+    role: UserMessageContentRoleType
 class ChatRequest(BaseModel):
@@ -164,7 +173,7 @@ class ChatRequest(BaseModel):
                     max_tokens: int | None = None,
                     top_p: float | None = None) -> "ChatRequest":
-        return ChatRequest(messages=[Message(content=data, role="user")],
+        return ChatRequest(messages=[Message(content=data, role=UserMessageContentRoleType.USER)],
                            model=model,
                            temperature=temperature,
                            max_tokens=max_tokens,
@@ -178,7 +187,7 @@ class ChatRequest(BaseModel):
                      max_tokens: int | None = None,
                      top_p: float | None = None) -> "ChatRequest":
-        return ChatRequest(messages=[Message(content=content, role="user")],
+        return ChatRequest(messages=[Message(content=content, role=UserMessageContentRoleType.USER)],
                            model=model,
                            temperature=temperature,
                            max_tokens=max_tokens,
@@ -187,29 +196,40 @@ class ChatRequest(BaseModel):
 class ChoiceMessage(BaseModel):
     content: str | None = None
-    role: str | None = None
+    role: UserMessageContentRoleType | None = None
 class ChoiceDelta(BaseModel):
     """Delta object for streaming responses (OpenAI-compatible)"""
     content: str | None = None
-    role: str | None = None
+    role: UserMessageContentRoleType | None = None
-class Choice(BaseModel):
+class ChoiceBase(BaseModel):
+    """Base choice model with common fields for both streaming and non-streaming responses"""
     model_config = ConfigDict(extra="allow")
-    message: ChoiceMessage | None = None
-    delta: ChoiceDelta | None = None
     finish_reason: typing.Literal['stop', 'length', 'tool_calls', 'content_filter', 'function_call'] | None = None
     index: int
-    # logprobs: ChoiceLogprobs | None = None
+class ChatResponseChoice(ChoiceBase):
+    """Choice model for non-streaming responses - contains message field"""
+    message: ChoiceMessage
+class ChatResponseChunkChoice(ChoiceBase):
+    """Choice model for streaming responses - contains delta field"""
+    delta: ChoiceDelta
+# Backward compatibility alias
+Choice = ChatResponseChoice
 class Usage(BaseModel):
-    prompt_tokens: int
-    completion_tokens: int
-    total_tokens: int
+    prompt_tokens: int | None = None
+    completion_tokens: int | None = None
+    total_tokens: int | None = None
 class ResponseSerializable(abc.ABC):
@@ -245,10 +265,10 @@ class ChatResponse(ResponseBaseModelOutput):
     model_config = ConfigDict(extra="allow")
     id: str
     object: str = "chat.completion"
-    model: str = ""
+    model: str = "unknown-model"
     created: datetime.datetime
-    choices: list[Choice]
-    usage: Usage | None = None
+    choices: list[ChatResponseChoice]
+    usage: Usage
     system_fingerprint: str | None = None
     service_tier: typing.Literal["scale", "default"] | None = None
@@ -264,14 +284,14 @@ class ChatResponse(ResponseBaseModelOutput):
                     object_: str | None = None,
                     model: str | None = None,
                     created: datetime.datetime | None = None,
-                    usage: Usage | None = None) -> "ChatResponse":
+                    usage: Usage) -> "ChatResponse":
         if id_ is None:
             id_ = str(uuid.uuid4())
         if object_ is None:
             object_ = "chat.completion"
         if model is None:
-            model = ""
+            model = "unknown-model"
         if created is None:
             created = datetime.datetime.now(datetime.UTC)
@@ -279,7 +299,12 @@ class ChatResponse(ResponseBaseModelOutput):
                             object=object_,
                             model=model,
                             created=created,
-                            choices=[Choice(index=0, message=ChoiceMessage(content=data), finish_reason="stop")],
+                            choices=[
+                                ChatResponseChoice(index=0,
+                                                   message=ChoiceMessage(content=data,
+                                                                         role=UserMessageContentRoleType.ASSISTANT),
+                                                   finish_reason="stop")
+                            ],
                             usage=usage)
@@ -293,9 +318,9 @@ class ChatResponseChunk(ResponseBaseModelOutput):
     model_config = ConfigDict(extra="allow")
     id: str
-    choices: list[Choice]
+    choices: list[ChatResponseChunkChoice]
     created: datetime.datetime
-    model: str = ""
+    model: str = "unknown-model"
     object: str = "chat.completion.chunk"
     system_fingerprint: str | None = None
     service_tier: typing.Literal["scale", "default"] | None = None
@@ -319,12 +344,18 @@ class ChatResponseChunk(ResponseBaseModelOutput):
         if created is None:
             created = datetime.datetime.now(datetime.UTC)
         if model is None:
-            model = ""
+            model = "unknown-model"
         if object_ is None:
             object_ = "chat.completion.chunk"
         return ChatResponseChunk(id=id_,
-                                 choices=[Choice(index=0, message=ChoiceMessage(content=data), finish_reason="stop")],
+                                 choices=[
+                                     ChatResponseChunkChoice(index=0,
+                                                             delta=ChoiceDelta(
+                                                                 content=data,
+                                                                 role=UserMessageContentRoleType.ASSISTANT),
+                                                             finish_reason="stop")
+                                 ],
                                  created=created,
                                  model=model,
                                  object=object_)
@@ -335,7 +366,7 @@ class ChatResponseChunk(ResponseBaseModelOutput):
                                id_: str | None = None,
                                created: datetime.datetime | None = None,
                                model: str | None = None,
-                               role: str | None = None,
+                               role: UserMessageContentRoleType | None = None,
                                finish_reason: str | None = None,
                                usage: Usage | None = None,
                                system_fingerprint: str | None = None) -> "ChatResponseChunk":
@@ -345,7 +376,7 @@ class ChatResponseChunk(ResponseBaseModelOutput):
         if created is None:
             created = datetime.datetime.now(datetime.UTC)
         if model is None:
-            model = ""
+            model = "unknown-model"
         delta = ChoiceDelta(content=content, role=role) if content is not None or role is not None else ChoiceDelta()
@@ -353,7 +384,14 @@ class ChatResponseChunk(ResponseBaseModelOutput):
         return ChatResponseChunk(
             id=id_,
-            choices=[Choice(index=0, message=None, delta=delta, finish_reason=final_finish_reason)],
+            choices=[
+                ChatResponseChunkChoice(
+                    index=0,
+                    delta=delta,
+                    finish_reason=typing.cast(
+                        typing.Literal['stop', 'length', 'tool_calls', 'content_filter', 'function_call'] | None,
+                        final_finish_reason))
+            ],
             created=created,
             model=model,
             object="chat.completion.chunk",
@@ -398,11 +436,6 @@ class GenerateResponse(BaseModel):
     value: str | None = "default"
-class UserMessageContentRoleType(str, Enum):
-    USER = "user"
-    ASSISTANT = "assistant"
 class WebSocketMessageType(str, Enum):
     """
     WebSocketMessageType is an Enum that represents WebSocket Message types.
@@ -622,7 +655,7 @@ GlobalTypeConverter.register_converter(_nat_chat_request_to_string)
 def _string_to_nat_chat_request(data: str) -> ChatRequest:
-    return ChatRequest.from_string(data, model="")
+    return ChatRequest.from_string(data, model="unknown-model")
 GlobalTypeConverter.register_converter(_string_to_nat_chat_request)
@@ -654,22 +687,12 @@ def _string_to_nat_chat_response(data: str) -> ChatResponse:
 GlobalTypeConverter.register_converter(_string_to_nat_chat_response)
-def _chat_response_to_chat_response_chunk(data: ChatResponse) -> ChatResponseChunk:
-    # Preserve original message structure for backward compatibility
-    return ChatResponseChunk(id=data.id, choices=data.choices, created=data.created, model=data.model)
-GlobalTypeConverter.register_converter(_chat_response_to_chat_response_chunk)
 # ======== ChatResponseChunk Converters ========
 def _chat_response_chunk_to_string(data: ChatResponseChunk) -> str:
     if data.choices and len(data.choices) > 0:
         choice = data.choices[0]
         if choice.delta and choice.delta.content:
             return choice.delta.content
-        if choice.message and choice.message.content:
-            return choice.message.content
     return ""
@@ -685,21 +708,6 @@ def _string_to_nat_chat_response_chunk(data: str) -> ChatResponseChunk:
 GlobalTypeConverter.register_converter(_string_to_nat_chat_response_chunk)
-# ======== AINodeMessageChunk Converters ========
-def _ai_message_chunk_to_nat_chat_response_chunk(data) -> ChatResponseChunk:
-    '''Converts LangChain/LangGraph AINodeMessageChunk to ChatResponseChunk'''
-    content = ""
-    if hasattr(data, 'content') and data.content is not None:
-        content = str(data.content)
-    elif hasattr(data, 'text') and data.text is not None:
-        content = str(data.text)
-    elif hasattr(data, 'message') and data.message is not None:
-        content = str(data.message)
-    return ChatResponseChunk.create_streaming_chunk(content=content, role="assistant", finish_reason=None)
 # Compatibility aliases with previous releases
 AIQChatRequest = ChatRequest
 AIQChoiceMessage = ChoiceMessage

nat/data_models/span.py CHANGED Viewed

@@ -128,10 +128,48 @@ class SpanStatus(BaseModel):
     message: str | None = Field(default=None, description="The status message of the span.")
+def _generate_nonzero_trace_id() -> int:
+    """Generate a non-zero 128-bit trace ID."""
+    return uuid.uuid4().int
+def _generate_nonzero_span_id() -> int:
+    """Generate a non-zero 64-bit span ID."""
+    return uuid.uuid4().int >> 64
 class SpanContext(BaseModel):
-    trace_id: int = Field(default_factory=lambda: uuid.uuid4().int, description="The 128-bit trace ID of the span.")
-    span_id: int = Field(default_factory=lambda: uuid.uuid4().int & ((1 << 64) - 1),
-                         description="The 64-bit span ID of the span.")
+    trace_id: int = Field(default_factory=_generate_nonzero_trace_id,
+                          description="The OTel-syle 128-bit trace ID of the span.")
+    span_id: int = Field(default_factory=_generate_nonzero_span_id,
+                         description="The OTel-syle 64-bit span ID of the span.")
+    @field_validator("trace_id", mode="before")
+    @classmethod
+    def _validate_trace_id(cls, v: int | str | None) -> int:
+        """Regenerate if trace_id is None; raise an exception if trace_id is invalid;"""
+        if isinstance(v, str):
+            v = uuid.UUID(v).int
+        if isinstance(v, type(None)):
+            v = _generate_nonzero_trace_id()
+        if v <= 0 or v >> 128:
+            raise ValueError(f"Invalid trace_id: must be a non-zero 128-bit integer, got {v}")
+        return v
+    @field_validator("span_id", mode="before")
+    @classmethod
+    def _validate_span_id(cls, v: int | str | None) -> int:
+        """Regenerate if span_id is None; raise an exception if span_id is invalid;"""
+        if isinstance(v, str):
+            try:
+                v = int(v, 16)
+            except ValueError:
+                raise ValueError(f"span_id unable to be parsed: {v}")
+        if isinstance(v, type(None)):
+            v = _generate_nonzero_span_id()
+        if v <= 0 or v >> 64:
+            raise ValueError(f"Invalid span_id: must be a non-zero 64-bit integer, got {v}")
+        return v
 class Span(BaseModel):

nat/experimental/test_time_compute/functions/execute_score_select_function.py CHANGED Viewed

@@ -46,7 +46,7 @@ async def execute_score_select_function(config: ExecuteScoreSelectFunctionConfig
     from pydantic import BaseModel
-    executable_fn: Function = builder.get_function(name=config.augmented_fn)
+    executable_fn: Function = await builder.get_function(name=config.augmented_fn)
     if config.scorer:
         scorer = await builder.get_ttc_strategy(strategy_name=config.scorer,

nat/experimental/test_time_compute/functions/ttc_tool_wrapper_function.py CHANGED Viewed

@@ -98,8 +98,8 @@ async def register_ttc_tool_wrapper_function(
         augmented_function_desc = config.tool_description
-    fn_input_schema: BaseModel = augmented_function.input_schema
-    fn_output_schema: BaseModel = augmented_function.single_output_schema
+    fn_input_schema: type[BaseModel] = augmented_function.input_schema
+    fn_output_schema: type[BaseModel] | type[None] = augmented_function.single_output_schema
     runnable_llm = input_llm.with_structured_output(schema=fn_input_schema)

nat/front_ends/fastapi/fastapi_front_end_plugin_worker.py CHANGED Viewed

@@ -689,10 +689,13 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
             async def post_openai_api_compatible(response: Response, request: Request, payload: request_type):
                 # Check if streaming is requested
+                response.headers["Content-Type"] = "application/json"
                 stream_requested = getattr(payload, 'stream', False)
                 async with session_manager.session(http_connection=request):
                     if stream_requested:
                         # Return streaming response
                         return StreamingResponse(headers={"Content-Type": "text/event-stream; charset=utf-8"},
                                                  content=generate_streaming_response_as_str(
@@ -703,40 +706,7 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
                                                      result_type=ChatResponseChunk,
                                                      output_type=ChatResponseChunk))
-                    # Return single response - check if workflow supports non-streaming
-                    try:
-                        response.headers["Content-Type"] = "application/json"
-                        return await generate_single_response(payload, session_manager, result_type=ChatResponse)
-                    except ValueError as e:
-                        if "Cannot get a single output value for streaming workflows" in str(e):
-                            # Workflow only supports streaming, but client requested non-streaming
-                            # Fall back to streaming and collect the result
-                            chunks = []
-                            async for chunk_str in generate_streaming_response_as_str(
-                                    payload,
-                                    session_manager=session_manager,
-                                    streaming=True,
-                                    step_adaptor=self.get_step_adaptor(),
-                                    result_type=ChatResponseChunk,
-                                    output_type=ChatResponseChunk):
-                                if chunk_str.startswith("data: ") and not chunk_str.startswith("data: [DONE]"):
-                                    chunk_data = chunk_str[6:].strip()  # Remove "data: " prefix
-                                    if chunk_data:
-                                        try:
-                                            chunk_json = ChatResponseChunk.model_validate_json(chunk_data)
-                                            if (chunk_json.choices and len(chunk_json.choices) > 0
-                                                    and chunk_json.choices[0].delta
-                                                    and chunk_json.choices[0].delta.content is not None):
-                                                chunks.append(chunk_json.choices[0].delta.content)
-                                        except Exception:
-                                            continue
-                            # Create a single response from collected chunks
-                            content = "".join(chunks)
-                            single_response = ChatResponse.from_string(content)
-                            response.headers["Content-Type"] = "application/json"
-                            return single_response
-                        raise
+                    return await generate_single_response(payload, session_manager, result_type=ChatResponse)
             return post_openai_api_compatible
@@ -1128,7 +1098,7 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
                     if configured_group.config.type != "mcp_client":
                         continue
-                    from nat.plugins.mcp.client_impl import MCPClientConfig
+                    from nat.plugins.mcp.client_config import MCPClientConfig
                     config = configured_group.config
                     assert isinstance(config, MCPClientConfig)

nat/front_ends/fastapi/message_validator.py CHANGED Viewed

@@ -139,8 +139,10 @@ class MessageValidator:
                     text_content: str = str(data_model.payload)
                 validated_message_content = SystemResponseContent(text=text_content)
-            elif (isinstance(data_model, ChatResponse | ChatResponseChunk)):
+            elif isinstance(data_model, ChatResponse):
                 validated_message_content = SystemResponseContent(text=data_model.choices[0].message.content)
+            elif isinstance(data_model, ChatResponseChunk):
+                validated_message_content = SystemResponseContent(text=data_model.choices[0].delta.content)
             elif (isinstance(data_model, ResponseIntermediateStep)):
                 validated_message_content = SystemIntermediateStepContent(name=data_model.name,

nat/observability/exporter/span_exporter.py CHANGED Viewed

@@ -126,6 +126,7 @@ class SpanExporter(ProcessingExporter[InputSpanT, OutputSpanT], SerializeMixin):
         parent_span = None
         span_ctx = None
+        workflow_trace_id = self._context_state.workflow_trace_id.get()
         # Look up the parent span to establish hierarchy
         # event.parent_id is the UUID of the last START step with a different UUID from current step
@@ -141,6 +142,9 @@ class SpanExporter(ProcessingExporter[InputSpanT, OutputSpanT], SerializeMixin):
             parent_span = parent_span.model_copy() if isinstance(parent_span, Span) else None
             if parent_span and parent_span.context:
                 span_ctx = SpanContext(trace_id=parent_span.context.trace_id)
+        # No parent: adopt workflow trace id if available to keep all spans in the same trace
+        if span_ctx is None and workflow_trace_id:
+            span_ctx = SpanContext(trace_id=workflow_trace_id)
         # Extract start/end times from the step
         # By convention, `span_event_timestamp` is the time we started, `event_timestamp` is the time we ended.
@@ -154,23 +158,39 @@ class SpanExporter(ProcessingExporter[InputSpanT, OutputSpanT], SerializeMixin):
         else:
             sub_span_name = f"{event.payload.event_type}"
+        # Prefer parent/context trace id for attribute, else workflow trace id
+        _attr_trace_id = None
+        if span_ctx is not None:
+            _attr_trace_id = span_ctx.trace_id
+        elif parent_span and parent_span.context:
+            _attr_trace_id = parent_span.context.trace_id
+        elif workflow_trace_id:
+            _attr_trace_id = workflow_trace_id
+        attributes = {
+            f"{self._span_prefix}.event_type":
+                event.payload.event_type.value,
+            f"{self._span_prefix}.function.id":
+                event.function_ancestry.function_id if event.function_ancestry else "unknown",
+            f"{self._span_prefix}.function.name":
+                event.function_ancestry.function_name if event.function_ancestry else "unknown",
+            f"{self._span_prefix}.subspan.name":
+                event.payload.name or "",
+            f"{self._span_prefix}.event_timestamp":
+                event.event_timestamp,
+            f"{self._span_prefix}.framework":
+                event.payload.framework.value if event.payload.framework else "unknown",
+            f"{self._span_prefix}.conversation.id":
+                self._context_state.conversation_id.get() or "unknown",
+            f"{self._span_prefix}.workflow.run_id":
+                self._context_state.workflow_run_id.get() or "unknown",
+            f"{self._span_prefix}.workflow.trace_id": (f"{_attr_trace_id:032x}" if _attr_trace_id else "unknown"),
+        }
         sub_span = Span(name=sub_span_name,
                         parent=parent_span,
                         context=span_ctx,
-                        attributes={
-                            f"{self._span_prefix}.event_type":
-                                event.payload.event_type.value,
-                            f"{self._span_prefix}.function.id":
-                                event.function_ancestry.function_id if event.function_ancestry else "unknown",
-                            f"{self._span_prefix}.function.name":
-                                event.function_ancestry.function_name if event.function_ancestry else "unknown",
-                            f"{self._span_prefix}.subspan.name":
-                                event.payload.name or "",
-                            f"{self._span_prefix}.event_timestamp":
-                                event.event_timestamp,
-                            f"{self._span_prefix}.framework":
-                                event.payload.framework.value if event.payload.framework else "unknown",
-                        },
+                        attributes=attributes,
                         start_time=start_ns)
         span_kind = event_type_to_span_kind(event.event_type)

nat/profiler/decorators/framework_wrapper.py CHANGED Viewed

@@ -123,7 +123,7 @@ def set_framework_profiler_handler(
                 except ImportError as e:
                     logger.warning(
                         "ADK profiler not available. " +
-                        "Install NAT with ADK extras: pip install 'nvidia-nat[adk]'. Error: %s",
+                        "Install NAT with ADK extras: pip install \"nvidia-nat[adk]\". Error: %s",
                         e)
                 else:
                     handler = ADKProfilerHandler()

nat/profiler/forecasting/models/linear_model.py CHANGED Viewed

@@ -36,7 +36,7 @@ class LinearModel(ForecastingBaseModel):
         except ImportError:
             logger.error(
                 "scikit-learn is not installed. Please install scikit-learn to use the LinearModel "
-                "profiling model or install `nvidia-nat[profiler]` to install all necessary profiling packages.")
+                "profiling model or install \"nvidia-nat[profiler]\" to install all necessary profiling packages.")
             raise

nat/profiler/forecasting/models/random_forest_regressor.py CHANGED Viewed

@@ -36,7 +36,7 @@ class RandomForestModel(ForecastingBaseModel):
         except ImportError:
             logger.error(
                 "scikit-learn is not installed. Please install scikit-learn to use the RandomForest "
-                "profiling model or install `nvidia-nat[profiler]` to install all necessary profiling packages.")
+                "profiling model or install \"nvidia-nat[profiler]\" to install all necessary profiling packages.")
             raise

nat/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py CHANGED Viewed

@@ -304,7 +304,7 @@ def save_gantt_chart(all_nodes: list[CallNode], output_path: str) -> None:
         import matplotlib.pyplot as plt
     except ImportError:
         logger.error("matplotlib is not installed. Please install matplotlib to use generate plots for the profiler "
-                     "or install `nvidia-nat[profiler]` to install all necessary profiling packages.")
+                     "or install \"nvidia-nat[profiler]\" to install all necessary profiling packages.")
         raise

nat/profiler/inference_optimization/experimental/prefix_span_analysis.py CHANGED Viewed

@@ -212,7 +212,7 @@ def run_prefixspan(sequences_map: dict[int, list[PrefixCallNode]],
         from prefixspan import PrefixSpan
     except ImportError:
         logger.error("prefixspan is not installed. Please install prefixspan to run the prefix analysis in the "
-                     "profiler or install `nvidia-nat[profiler]` to install all necessary profiling packages.")
+                     "profiler or install \"nvidia-nat[profiler]\" to install all necessary profiling packages.")
         raise

nvidia-nat 1.3.0rc1__py3-none-any.whl → 1.4.0a20251008__py3-none-any.whl

nvidia-nat 1.3.0rc1py3-none-any.whl → 1.4.0a20251008py3-none-any.whl