PyPI - nvidia-nat - Versions diffs - 1.3.0a20250910__py3-none-any.whl → 1.4.0a20251112__py3-none-any.whl - Mend

nvidia-nat 1.3.0a20250910py3-none-any.whl → 1.4.0a20251112py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (213) hide show

nat/agent/base.py +13 -8
nat/agent/prompt_optimizer/prompt.py +68 -0
nat/agent/prompt_optimizer/register.py +149 -0
nat/agent/react_agent/agent.py +6 -5
nat/agent/react_agent/register.py +49 -39
nat/agent/reasoning_agent/reasoning_agent.py +17 -15
nat/agent/register.py +2 -0
nat/agent/responses_api_agent/__init__.py +14 -0
nat/agent/responses_api_agent/register.py +126 -0
nat/agent/rewoo_agent/agent.py +304 -117
nat/agent/rewoo_agent/prompt.py +19 -22
nat/agent/rewoo_agent/register.py +51 -38
nat/agent/tool_calling_agent/agent.py +75 -17
nat/agent/tool_calling_agent/register.py +46 -23
nat/authentication/api_key/api_key_auth_provider.py +6 -11
nat/authentication/api_key/api_key_auth_provider_config.py +8 -5
nat/authentication/credential_validator/__init__.py +14 -0
nat/authentication/credential_validator/bearer_token_validator.py +557 -0
nat/authentication/http_basic_auth/http_basic_auth_provider.py +1 -1
nat/authentication/interfaces.py +5 -2
nat/authentication/oauth2/oauth2_auth_code_flow_provider.py +69 -36
nat/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +2 -1
nat/authentication/oauth2/oauth2_resource_server_config.py +125 -0
nat/builder/builder.py +55 -23
nat/builder/component_utils.py +9 -5
nat/builder/context.py +54 -15
nat/builder/eval_builder.py +14 -9
nat/builder/framework_enum.py +1 -0
nat/builder/front_end.py +1 -1
nat/builder/function.py +370 -0
nat/builder/function_info.py +1 -1
nat/builder/intermediate_step_manager.py +38 -2
nat/builder/workflow.py +5 -0
nat/builder/workflow_builder.py +306 -54
nat/cli/cli_utils/config_override.py +1 -1
nat/cli/commands/info/info.py +16 -6
nat/cli/commands/mcp/__init__.py +14 -0
nat/cli/commands/mcp/mcp.py +986 -0
nat/cli/commands/optimize.py +90 -0
nat/cli/commands/start.py +1 -1
nat/cli/commands/workflow/templates/config.yml.j2 +14 -13
nat/cli/commands/workflow/templates/register.py.j2 +2 -2
nat/cli/commands/workflow/templates/workflow.py.j2 +35 -21
nat/cli/commands/workflow/workflow_commands.py +60 -18
nat/cli/entrypoint.py +15 -11
nat/cli/main.py +3 -0
nat/cli/register_workflow.py +38 -4
nat/cli/type_registry.py +72 -1
nat/control_flow/__init__.py +0 -0
nat/control_flow/register.py +20 -0
nat/control_flow/router_agent/__init__.py +0 -0
nat/control_flow/router_agent/agent.py +329 -0
nat/control_flow/router_agent/prompt.py +48 -0
nat/control_flow/router_agent/register.py +91 -0
nat/control_flow/sequential_executor.py +166 -0
nat/data_models/agent.py +34 -0
nat/data_models/api_server.py +199 -69
nat/data_models/authentication.py +23 -9
nat/data_models/common.py +47 -0
nat/data_models/component.py +2 -0
nat/data_models/component_ref.py +11 -0
nat/data_models/config.py +41 -17
nat/data_models/dataset_handler.py +4 -3
nat/data_models/function.py +34 -0
nat/data_models/function_dependencies.py +8 -0
nat/data_models/intermediate_step.py +9 -1
nat/data_models/llm.py +15 -1
nat/data_models/openai_mcp.py +46 -0
nat/data_models/optimizable.py +208 -0
nat/data_models/optimizer.py +161 -0
nat/data_models/span.py +41 -3
nat/data_models/thinking_mixin.py +2 -2
nat/embedder/azure_openai_embedder.py +2 -1
nat/embedder/nim_embedder.py +3 -2
nat/embedder/openai_embedder.py +3 -2
nat/eval/config.py +1 -1
nat/eval/dataset_handler/dataset_downloader.py +3 -2
nat/eval/dataset_handler/dataset_filter.py +34 -2
nat/eval/evaluate.py +10 -3
nat/eval/evaluator/base_evaluator.py +1 -1
nat/eval/rag_evaluator/evaluate.py +7 -4
nat/eval/register.py +4 -0
nat/eval/runtime_evaluator/__init__.py +14 -0
nat/eval/runtime_evaluator/evaluate.py +123 -0
nat/eval/runtime_evaluator/register.py +100 -0
nat/eval/swe_bench_evaluator/evaluate.py +1 -1
nat/eval/trajectory_evaluator/register.py +1 -1
nat/eval/tunable_rag_evaluator/evaluate.py +1 -1
nat/eval/usage_stats.py +2 -0
nat/eval/utils/output_uploader.py +3 -2
nat/eval/utils/weave_eval.py +17 -3
nat/experimental/decorators/experimental_warning_decorator.py +27 -7
nat/experimental/test_time_compute/functions/execute_score_select_function.py +1 -1
nat/experimental/test_time_compute/functions/plan_select_execute_function.py +7 -3
nat/experimental/test_time_compute/functions/ttc_tool_orchestration_function.py +1 -1
nat/experimental/test_time_compute/functions/ttc_tool_wrapper_function.py +3 -3
nat/experimental/test_time_compute/models/strategy_base.py +2 -2
nat/experimental/test_time_compute/selection/llm_based_output_merging_selector.py +1 -1
nat/front_ends/console/authentication_flow_handler.py +82 -30
nat/front_ends/console/console_front_end_plugin.py +19 -7
nat/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +1 -1
nat/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +52 -17
nat/front_ends/fastapi/dask_client_mixin.py +65 -0
nat/front_ends/fastapi/fastapi_front_end_config.py +25 -3
nat/front_ends/fastapi/fastapi_front_end_plugin.py +140 -3
nat/front_ends/fastapi/fastapi_front_end_plugin_worker.py +445 -265
nat/front_ends/fastapi/job_store.py +518 -99
nat/front_ends/fastapi/main.py +11 -19
nat/front_ends/fastapi/message_handler.py +69 -44
nat/front_ends/fastapi/message_validator.py +8 -7
nat/front_ends/fastapi/utils.py +57 -0
nat/front_ends/mcp/introspection_token_verifier.py +73 -0
nat/front_ends/mcp/mcp_front_end_config.py +71 -3
nat/front_ends/mcp/mcp_front_end_plugin.py +85 -21
nat/front_ends/mcp/mcp_front_end_plugin_worker.py +248 -29
nat/front_ends/mcp/memory_profiler.py +320 -0
nat/front_ends/mcp/tool_converter.py +78 -25
nat/front_ends/simple_base/simple_front_end_plugin_base.py +3 -1
nat/llm/aws_bedrock_llm.py +21 -8
nat/llm/azure_openai_llm.py +14 -5
nat/llm/litellm_llm.py +80 -0
nat/llm/nim_llm.py +23 -9
nat/llm/openai_llm.py +19 -7
nat/llm/register.py +4 -0
nat/llm/utils/thinking.py +1 -1
nat/observability/exporter/base_exporter.py +1 -1
nat/observability/exporter/processing_exporter.py +29 -55
nat/observability/exporter/span_exporter.py +43 -15
nat/observability/exporter_manager.py +2 -2
nat/observability/mixin/redaction_config_mixin.py +5 -4
nat/observability/mixin/tagging_config_mixin.py +26 -14
nat/observability/mixin/type_introspection_mixin.py +420 -107
nat/observability/processor/batching_processor.py +1 -1
nat/observability/processor/processor.py +3 -0
nat/observability/processor/redaction/__init__.py +24 -0
nat/observability/processor/redaction/contextual_redaction_processor.py +125 -0
nat/observability/processor/redaction/contextual_span_redaction_processor.py +66 -0
nat/observability/processor/redaction/redaction_processor.py +177 -0
nat/observability/processor/redaction/span_header_redaction_processor.py +92 -0
nat/observability/processor/span_tagging_processor.py +21 -14
nat/observability/register.py +16 -0
nat/profiler/callbacks/langchain_callback_handler.py +32 -7
nat/profiler/callbacks/llama_index_callback_handler.py +36 -2
nat/profiler/callbacks/token_usage_base_model.py +2 -0
nat/profiler/decorators/framework_wrapper.py +61 -9
nat/profiler/decorators/function_tracking.py +35 -3
nat/profiler/forecasting/models/linear_model.py +1 -1
nat/profiler/forecasting/models/random_forest_regressor.py +1 -1
nat/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +1 -1
nat/profiler/inference_optimization/experimental/prefix_span_analysis.py +1 -1
nat/profiler/parameter_optimization/__init__.py +0 -0
nat/profiler/parameter_optimization/optimizable_utils.py +93 -0
nat/profiler/parameter_optimization/optimizer_runtime.py +67 -0
nat/profiler/parameter_optimization/parameter_optimizer.py +189 -0
nat/profiler/parameter_optimization/parameter_selection.py +107 -0
nat/profiler/parameter_optimization/pareto_visualizer.py +460 -0
nat/profiler/parameter_optimization/prompt_optimizer.py +384 -0
nat/profiler/parameter_optimization/update_helpers.py +66 -0
nat/profiler/utils.py +3 -1
nat/registry_handlers/pypi/register_pypi.py +5 -3
nat/registry_handlers/rest/register_rest.py +5 -3
nat/retriever/milvus/retriever.py +1 -1
nat/retriever/nemo_retriever/register.py +2 -1
nat/runtime/loader.py +1 -1
nat/runtime/runner.py +111 -6
nat/runtime/session.py +49 -3
nat/settings/global_settings.py +2 -2
nat/tool/chat_completion.py +4 -1
nat/tool/code_execution/code_sandbox.py +3 -6
nat/tool/code_execution/local_sandbox/Dockerfile.sandbox +19 -32
nat/tool/code_execution/local_sandbox/local_sandbox_server.py +6 -1
nat/tool/code_execution/local_sandbox/sandbox.requirements.txt +2 -0
nat/tool/code_execution/local_sandbox/start_local_sandbox.sh +10 -4
nat/tool/datetime_tools.py +1 -1
nat/tool/github_tools.py +450 -0
nat/tool/memory_tools/add_memory_tool.py +3 -3
nat/tool/memory_tools/delete_memory_tool.py +3 -4
nat/tool/memory_tools/get_memory_tool.py +4 -4
nat/tool/register.py +2 -7
nat/tool/server_tools.py +15 -2
nat/utils/__init__.py +76 -0
nat/utils/callable_utils.py +70 -0
nat/utils/data_models/schema_validator.py +1 -1
nat/utils/decorators.py +210 -0
nat/utils/exception_handlers/automatic_retries.py +278 -72
nat/utils/io/yaml_tools.py +73 -3
nat/utils/log_levels.py +25 -0
nat/utils/responses_api.py +26 -0
nat/utils/string_utils.py +16 -0
nat/utils/type_converter.py +12 -3
nat/utils/type_utils.py +6 -2
nvidia_nat-1.4.0a20251112.dist-info/METADATA +197 -0
{nvidia_nat-1.3.0a20250910.dist-info → nvidia_nat-1.4.0a20251112.dist-info}/RECORD +199 -165
{nvidia_nat-1.3.0a20250910.dist-info → nvidia_nat-1.4.0a20251112.dist-info}/entry_points.txt +1 -0
nat/cli/commands/info/list_mcp.py +0 -461
nat/data_models/temperature_mixin.py +0 -43
nat/data_models/top_p_mixin.py +0 -43
nat/observability/processor/header_redaction_processor.py +0 -123
nat/observability/processor/redaction_processor.py +0 -77
nat/tool/code_execution/test_code_execution_sandbox.py +0 -414
nat/tool/github_tools/create_github_commit.py +0 -133
nat/tool/github_tools/create_github_issue.py +0 -87
nat/tool/github_tools/create_github_pr.py +0 -106
nat/tool/github_tools/get_github_file.py +0 -106
nat/tool/github_tools/get_github_issue.py +0 -166
nat/tool/github_tools/get_github_pr.py +0 -256
nat/tool/github_tools/update_github_issue.py +0 -100
nvidia_nat-1.3.0a20250910.dist-info/METADATA +0 -373
/nat/{tool/github_tools → agent/prompt_optimizer}/__init__.py +0 -0
{nvidia_nat-1.3.0a20250910.dist-info → nvidia_nat-1.4.0a20251112.dist-info}/WHEEL +0 -0
{nvidia_nat-1.3.0a20250910.dist-info → nvidia_nat-1.4.0a20251112.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
{nvidia_nat-1.3.0a20250910.dist-info → nvidia_nat-1.4.0a20251112.dist-info}/licenses/LICENSE.md +0 -0
{nvidia_nat-1.3.0a20250910.dist-info → nvidia_nat-1.4.0a20251112.dist-info}/top_level.txt +0 -0

nat/data_models/optimizer.py ADDED Viewed

@@ -0,0 +1,161 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from enum import Enum
+from pathlib import Path
+from pydantic import BaseModel
+from pydantic import Field
+class OptimizerMetric(BaseModel):
+    """
+    Parameters used by the workflow optimizer to define a metric to optimize.
+    """
+    evaluator_name: str = Field(description="Name of the metric to optimize.")
+    direction: str = Field(description="Direction of the optimization. Can be 'maximize' or 'minimize'.")
+    weight: float = Field(description="Weight of the metric in the optimization process.", default=1.0)
+class SamplerType(str, Enum):
+    BAYESIAN = "bayesian"
+    GRID = "grid"
+class NumericOptimizationConfig(BaseModel):
+    """
+    Configuration for numeric/enum optimization (Optuna).
+    """
+    enabled: bool = Field(default=True, description="Enable numeric optimization")
+    n_trials: int = Field(description="Number of trials for numeric optimization.", default=20)
+    sampler: SamplerType | None = Field(
+        default=None,
+        description="Sampling strategy for numeric optimization. Options: None or 'bayesian' uses \
+            the Optuna default (TPE for single-objective, NSGA-II for multi-objective) or 'grid' performs \
+            exhaustive grid search over parameter combinations. Defaults to None.",
+    )
+class PromptGAOptimizationConfig(BaseModel):
+    """
+    Configuration for prompt optimization using a Genetic Algorithm.
+    """
+    enabled: bool = Field(default=False, description="Enable GA-based prompt optimization")
+    # Prompt optimization function hooks
+    prompt_population_init_function: str | None = Field(
+        default=None,
+        description="Optional function name to initialize/mutate candidate prompts.",
+    )
+    prompt_recombination_function: str | None = Field(
+        default=None,
+        description="Optional function name to recombine two parent prompts into a child.",
+    )
+    # Genetic algorithm configuration
+    ga_population_size: int = Field(
+        description="Population size for genetic algorithm prompt optimization.",
+        default=24,
+    )
+    ga_generations: int = Field(
+        description="Number of generations to evolve in GA prompt optimization.",
+        default=15,
+    )
+    ga_offspring_size: int | None = Field(
+        description="Number of offspring to produce per generation. Defaults to population_size - elitism.",
+        default=None,
+    )
+    ga_crossover_rate: float = Field(
+        description="Probability of applying crossover during reproduction.",
+        default=0.8,
+        ge=0.0,
+        le=1.0,
+    )
+    ga_mutation_rate: float = Field(
+        description="Probability of mutating a child after crossover.",
+        default=0.3,
+        ge=0.0,
+        le=1.0,
+    )
+    ga_elitism: int = Field(
+        description="Number of top individuals carried over unchanged each generation.",
+        default=2,
+    )
+    ga_selection_method: str = Field(
+        description="Parent selection strategy: 'tournament' or 'roulette'.",
+        default="tournament",
+    )
+    ga_tournament_size: int = Field(
+        description="Tournament size when using tournament selection.",
+        default=3,
+    )
+    ga_parallel_evaluations: int = Field(
+        description="Max number of individuals to evaluate concurrently per generation.",
+        default=8,
+    )
+    ga_diversity_lambda: float = Field(
+        description="Strength of diversity penalty (0 disables). Penalizes identical/near-identical prompts.",
+        default=0.0,
+        ge=0.0,
+    )
+class OptimizerConfig(BaseModel):
+    """
+    Parameters used by the workflow optimizer.
+    """
+    output_path: Path | None = Field(
+        default=None,
+        description="Path to the output directory where the results will be saved.",
+    )
+    eval_metrics: dict[str, OptimizerMetric] | None = Field(
+        description="List of evaluation metrics to optimize.",
+        default=None,
+    )
+    reps_per_param_set: int = Field(
+        default=3,
+        description="Number of repetitions per parameter set for the optimization.",
+    )
+    target: float | None = Field(
+        description=(
+            "Target value for the optimization. If set, the optimization will stop when this value is reached."),
+        default=None,
+    )
+    multi_objective_combination_mode: str = Field(
+        description="Method to combine multiple objectives into a single score.",
+        default="harmonic",
+    )
+    # Nested configs
+    numeric: NumericOptimizationConfig = NumericOptimizationConfig()
+    prompt: PromptGAOptimizationConfig = PromptGAOptimizationConfig()
+class OptimizerRunConfig(BaseModel):
+    """
+    Parameters used for an Optimizer R=run
+    """
+    # Eval parameters
+    config_file: Path | BaseModel  # allow for instantiated configs to be passed in
+    dataset: str | Path | None  # dataset file path can be specified in the config file
+    result_json_path: str = "$"
+    endpoint: str | None = None  # only used when running the workflow remotely
+    endpoint_timeout: int = 300
+    override: tuple[tuple[str, str], ...] = ()

nat/data_models/span.py CHANGED Viewed

@@ -128,10 +128,48 @@ class SpanStatus(BaseModel):
     message: str | None = Field(default=None, description="The status message of the span.")
+def _generate_nonzero_trace_id() -> int:
+    """Generate a non-zero 128-bit trace ID."""
+    return uuid.uuid4().int
+def _generate_nonzero_span_id() -> int:
+    """Generate a non-zero 64-bit span ID."""
+    return uuid.uuid4().int >> 64
 class SpanContext(BaseModel):
-    trace_id: int = Field(default_factory=lambda: uuid.uuid4().int, description="The 128-bit trace ID of the span.")
-    span_id: int = Field(default_factory=lambda: uuid.uuid4().int & ((1 << 64) - 1),
-                         description="The 64-bit span ID of the span.")
+    trace_id: int = Field(default_factory=_generate_nonzero_trace_id,
+                          description="The OTel-syle 128-bit trace ID of the span.")
+    span_id: int = Field(default_factory=_generate_nonzero_span_id,
+                         description="The OTel-syle 64-bit span ID of the span.")
+    @field_validator("trace_id", mode="before")
+    @classmethod
+    def _validate_trace_id(cls, v: int | str | None) -> int:
+        """Regenerate if trace_id is None; raise an exception if trace_id is invalid;"""
+        if isinstance(v, str):
+            v = uuid.UUID(v).int
+        if isinstance(v, type(None)):
+            v = _generate_nonzero_trace_id()
+        if v <= 0 or v >> 128:
+            raise ValueError(f"Invalid trace_id: must be a non-zero 128-bit integer, got {v}")
+        return v
+    @field_validator("span_id", mode="before")
+    @classmethod
+    def _validate_span_id(cls, v: int | str | None) -> int:
+        """Regenerate if span_id is None; raise an exception if span_id is invalid;"""
+        if isinstance(v, str):
+            try:
+                v = int(v, 16)
+            except ValueError:
+                raise ValueError(f"span_id unable to be parsed: {v}")
+        if isinstance(v, type(None)):
+            v = _generate_nonzero_span_id()
+        if v <= 0 or v >> 64:
+            raise ValueError(f"Invalid span_id: must be a non-zero 64-bit integer, got {v}")
+        return v
 class Span(BaseModel):

nat/data_models/thinking_mixin.py CHANGED Viewed

@@ -51,7 +51,7 @@ class ThinkingMixin(
         Returns the system prompt to use for thinking.
         For NVIDIA Nemotron, returns "/think" if enabled, else "/no_think".
         For Llama Nemotron v1.5, returns "/think" if enabled, else "/no_think".
-        For Llama Nemotron v1.0, returns "detailed thinking on" if enabled, else "detailed thinking off".
+        For Llama Nemotron v1.0 or v1.1, returns "detailed thinking on" if enabled, else "detailed thinking off".
         If thinking is not supported on the model, returns None.
         Returns:
@@ -72,7 +72,7 @@ class ThinkingMixin(
                 return "/think" if self.thinking else "/no_think"
             if model.startswith("nvidia/llama"):
-                if "v1-0" in model or "v1-1" in model:
+                if "v1-0" in model or "v1-1" in model or model.endswith("v1"):
                     return f"detailed thinking {'on' if self.thinking else 'off'}"
                 if "v1-5" in model:

nat/embedder/azure_openai_embedder.py CHANGED Viewed

@@ -20,6 +20,7 @@ from pydantic import Field
 from nat.builder.builder import Builder
 from nat.builder.embedder import EmbedderProviderInfo
 from nat.cli.register_workflow import register_embedder_provider
+from nat.data_models.common import OptionalSecretStr
 from nat.data_models.embedder import EmbedderBaseConfig
 from nat.data_models.retry_mixin import RetryMixin
@@ -29,7 +30,7 @@ class AzureOpenAIEmbedderModelConfig(EmbedderBaseConfig, RetryMixin, name="azure
     model_config = ConfigDict(protected_namespaces=(), extra="allow")
-    api_key: str | None = Field(default=None, description="Azure OpenAI API key to interact with hosted model.")
+    api_key: OptionalSecretStr = Field(default=None, description="Azure OpenAI API key to interact with hosted model.")
     api_version: str = Field(default="2025-04-01-preview", description="Azure OpenAI API version.")
     azure_endpoint: str | None = Field(validation_alias=AliasChoices("azure_endpoint", "base_url"),
                                        serialization_alias="azure_endpoint",

nat/embedder/nim_embedder.py CHANGED Viewed

@@ -23,6 +23,7 @@ from pydantic import Field
 from nat.builder.builder import Builder
 from nat.builder.embedder import EmbedderProviderInfo
 from nat.cli.register_workflow import register_embedder_provider
+from nat.data_models.common import OptionalSecretStr
 from nat.data_models.embedder import EmbedderBaseConfig
 from nat.data_models.retry_mixin import RetryMixin
@@ -41,7 +42,7 @@ TruncationOption = typing.Annotated[str, AfterValidator(option_in_allowed_values
 class NIMEmbedderModelConfig(EmbedderBaseConfig, RetryMixin, name="nim"):
     """A NVIDIA Inference Microservice (NIM) embedder provider to be used with an embedder client."""
-    api_key: str | None = Field(default=None, description="NVIDIA API key to interact with hosted NIM.")
+    api_key: OptionalSecretStr = Field(default=None, description="NVIDIA API key to interact with hosted NIM.")
     base_url: str | None = Field(default=None, description="Base url to the hosted NIM.")
     model_name: str = Field(validation_alias=AliasChoices("model_name", "model"),
                             serialization_alias="model",
@@ -50,7 +51,7 @@ class NIMEmbedderModelConfig(EmbedderBaseConfig, RetryMixin, name="nim"):
                                        description=("The truncation strategy if the input on the "
                                                     "server side if it's too large."))
-    model_config = ConfigDict(protected_namespaces=())
+    model_config = ConfigDict(protected_namespaces=(), extra="allow")
 @register_embedder_provider(config_type=NIMEmbedderModelConfig)

nat/embedder/openai_embedder.py CHANGED Viewed

@@ -20,6 +20,7 @@ from pydantic import Field
 from nat.builder.builder import Builder
 from nat.builder.embedder import EmbedderProviderInfo
 from nat.cli.register_workflow import register_embedder_provider
+from nat.data_models.common import OptionalSecretStr
 from nat.data_models.embedder import EmbedderBaseConfig
 from nat.data_models.retry_mixin import RetryMixin
@@ -27,9 +28,9 @@ from nat.data_models.retry_mixin import RetryMixin
 class OpenAIEmbedderModelConfig(EmbedderBaseConfig, RetryMixin, name="openai"):
     """An OpenAI LLM provider to be used with an LLM client."""
-    model_config = ConfigDict(protected_namespaces=())
+    model_config = ConfigDict(protected_namespaces=(), extra="allow")
-    api_key: str | None = Field(default=None, description="OpenAI API key to interact with hosted model.")
+    api_key: OptionalSecretStr = Field(default=None, description="OpenAI API key to interact with hosted model.")
     base_url: str | None = Field(default=None, description="Base url to the hosted model.")
     model_name: str = Field(validation_alias=AliasChoices("model_name", "model"),
                             serialization_alias="model",

nat/eval/config.py CHANGED Viewed

@@ -27,7 +27,7 @@ class EvaluationRunConfig(BaseModel):
     """
     Parameters used for a single evaluation run.
     """
-    config_file: Path
+    config_file: Path | BaseModel
     dataset: str | None = None  # dataset file path can be specified in the config file
     result_json_path: str = "$"
     skip_workflow: bool = False

nat/eval/dataset_handler/dataset_downloader.py CHANGED Viewed

@@ -19,6 +19,7 @@ import boto3
 import requests
 from botocore.exceptions import NoCredentialsError
+from nat.data_models.common import get_secret_value
 from nat.data_models.dataset_handler import EvalDatasetConfig
 logger = logging.getLogger(__name__)
@@ -46,8 +47,8 @@ class DatasetDownloader:
             try:
                 self._s3_client = boto3.client("s3",
                                                endpoint_url=self.s3_config.endpoint_url,
-                                               aws_access_key_id=self.s3_config.access_key,
-                                               aws_secret_access_key=self.s3_config.secret_key)
+                                               aws_access_key_id=get_secret_value(self.s3_config.access_key),
+                                               aws_secret_access_key=get_secret_value(self.s3_config.secret_key))
             except NoCredentialsError as e:
                 logger.error("AWS credentials not available: %s", e)
                 raise

nat/eval/dataset_handler/dataset_filter.py CHANGED Viewed

@@ -13,6 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import fnmatch
 import pandas as pd
 from nat.data_models.dataset_handler import EvalFilterConfig
@@ -24,6 +26,7 @@ class DatasetFilter:
         - If a allowlist is provided, only keep rows matching the filter values.
         - If a denylist is provided, remove rows matching the filter values.
         - If the filter column does not exist in the DataFrame, the filtering is skipped for that column.
+        - Supports Unix shell-style wildcards (``*``, ``?``, ``[seq]``, ``[!seq]``) for string matching.
     This is a utility class that is dataset agnostic and can be used to filter any DataFrame based on the provided
     filter configuration.
@@ -33,6 +36,33 @@ class DatasetFilter:
         self.filter_config = filter_config
+    @staticmethod
+    def _match_wildcard_patterns(series: pd.Series, patterns: list[str | int | float]) -> pd.Series:
+        """
+        Match series values against wildcard patterns and exact values.
+        Args:
+            series (pd.Series): pandas Series to match against
+            patterns (list[str | int | float]): List of patterns/values
+        Returns:
+            pd.Series: Boolean Series indicating matches
+        """
+        # Convert series to string for pattern matching
+        str_series = series.astype(str)
+        # Initialize boolean mask
+        matches = pd.Series([False] * len(series), index=series.index)
+        # Check each pattern using fnmatch with list comprehension to avoid lambda capture
+        for pattern in patterns:
+            pattern_str = str(pattern)
+            pattern_matches = pd.Series([fnmatch.fnmatch(val, pattern_str) for val in str_series],
+                                        index=str_series.index)
+            matches |= pattern_matches
+        return matches
     def apply_filters(self, df) -> pd.DataFrame:
         filtered_df = df.copy()
@@ -41,12 +71,14 @@ class DatasetFilter:
         if self.filter_config.allowlist:
             for column, values in self.filter_config.allowlist.field.items():
                 if column in filtered_df.columns:
-                    filtered_df = filtered_df[filtered_df[column].isin(values)]
+                    matches = self._match_wildcard_patterns(filtered_df[column], values)
+                    filtered_df = filtered_df[matches]
         # Apply denylist (remove specified rows)
         if self.filter_config.denylist:
             for column, values in self.filter_config.denylist.field.items():
                 if column in filtered_df.columns:
-                    filtered_df = filtered_df[~filtered_df[column].isin(values)]
+                    matches = self._match_wildcard_patterns(filtered_df[column], values)
+                    filtered_df = filtered_df[~matches]
         return filtered_df

nat/eval/evaluate.py CHANGED Viewed

@@ -104,6 +104,8 @@ class EvaluationRun:
                 usage_stats_per_llm[llm_name].prompt_tokens += step.token_usage.prompt_tokens
                 usage_stats_per_llm[llm_name].completion_tokens += step.token_usage.completion_tokens
                 usage_stats_per_llm[llm_name].total_tokens += step.token_usage.total_tokens
+                usage_stats_per_llm[llm_name].reasoning_tokens += step.token_usage.reasoning_tokens
+                usage_stats_per_llm[llm_name].cached_tokens += step.token_usage.cached_tokens
                 total_tokens += step.token_usage.total_tokens
         # find min and max event timestamps
@@ -449,10 +451,14 @@ class EvaluationRun:
         from nat.runtime.loader import load_config
         # Load and override the config
-        if self.config.override:
+        config = None
+        if isinstance(self.config.config_file, BaseModel):
+            config = self.config.config_file
+        elif self.config.override:
             config = self.apply_overrides()
         else:
             config = load_config(self.config.config_file)
         self.eval_config = config.eval
         workflow_alias = self._get_workflow_alias(config.workflow.type)
         logger.debug("Loaded %s evaluation configuration: %s", workflow_alias, self.eval_config)
@@ -508,7 +514,7 @@ class EvaluationRun:
         # Run workflow and evaluate
         async with WorkflowEvalBuilder.from_config(config=config) as eval_workflow:
             # Initialize Weave integration
-            self.weave_eval.initialize_logger(workflow_alias, self.eval_input, config)
+            self.weave_eval.initialize_logger(workflow_alias, self.eval_input, config, job_id=job_id)
             with self.eval_trace_context.evaluation_context():
                 # Run workflow
@@ -516,7 +522,8 @@ class EvaluationRun:
                     await self.run_workflow_remote()
                 elif not self.config.skip_workflow:
                     if session_manager is None:
-                        session_manager = SessionManager(eval_workflow.build(),
+                        workflow = await eval_workflow.build()
+                        session_manager = SessionManager(workflow,
                                                          max_concurrency=self.eval_config.general.max_concurrency)
                     await self.run_workflow_local(session_manager)

nat/eval/evaluator/base_evaluator.py CHANGED Viewed

@@ -71,7 +71,7 @@ class BaseEvaluator(ABC):
             TqdmPositionRegistry.release(tqdm_position)
         # Compute average if possible
-        numeric_scores = [item.score for item in output_items if isinstance(item.score, (int, float))]
+        numeric_scores = [item.score for item in output_items if isinstance(item.score, int | float)]
         avg_score = round(sum(numeric_scores) / len(numeric_scores), 2) if numeric_scores else None
         return EvalOutput(average_score=avg_score, eval_output_items=output_items)

nat/eval/rag_evaluator/evaluate.py CHANGED Viewed

@@ -116,11 +116,14 @@ class RAGEvaluator:
             """Convert NaN or None to 0.0 for safe arithmetic/serialization."""
             return 0.0 if v is None or (isinstance(v, float) and math.isnan(v)) else v
-        # Convert from list of dicts to dict of lists, coercing NaN/None to 0.0
+        # Keep original scores (preserving NaN/None) for output
+        original_scores_dict = {metric: [score.get(metric) for score in scores] for metric in scores[0]}
+        # Convert from list of dicts to dict of lists, coercing NaN/None to 0.0 for average calculation
         scores_dict = {metric: [_nan_to_zero(score.get(metric)) for score in scores] for metric in scores[0]}
         first_metric_name = list(scores_dict.keys())[0] if scores_dict else None
-        # Compute the average of each metric, guarding against empty lists
+        # Compute the average of each metric using cleaned scores (NaN/None -> 0.0)
         average_scores = {
             metric: (sum(values) / len(values) if values else 0.0)
             for metric, values in scores_dict.items()
@@ -137,11 +140,11 @@ class RAGEvaluator:
         else:
             ids = df["user_input"].tolist()  # Use "user_input" as ID fallback
-        # Construct EvalOutputItem list
+        # Construct EvalOutputItem list using original scores (preserving NaN/None)
         eval_output_items = [
             EvalOutputItem(
                 id=ids[i],
-                score=_nan_to_zero(getattr(row, first_metric_name, 0.0) if first_metric_name else 0.0),
+                score=original_scores_dict[first_metric_name][i] if first_metric_name else None,
                 reasoning={
                     key:
                         getattr(row, key, None)  # Use getattr to safely access attributes

nat/eval/register.py CHANGED Viewed

@@ -17,6 +17,10 @@
 # Import evaluators which need to be automatically registered here
 from .rag_evaluator.register import register_ragas_evaluator
+from .runtime_evaluator.register import register_avg_llm_latency_evaluator
+from .runtime_evaluator.register import register_avg_num_llm_calls_evaluator
+from .runtime_evaluator.register import register_avg_tokens_per_llm_end_evaluator
+from .runtime_evaluator.register import register_avg_workflow_runtime_evaluator
 from .swe_bench_evaluator.register import register_swe_bench_evaluator
 from .trajectory_evaluator.register import register_trajectory_evaluator
 from .tunable_rag_evaluator.register import register_tunable_rag_evaluator

nat/eval/runtime_evaluator/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

nat/eval/runtime_evaluator/evaluate.py ADDED Viewed

@@ -0,0 +1,123 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+from collections import defaultdict
+from dataclasses import dataclass
+from nat.data_models.intermediate_step import IntermediateStepType
+from nat.eval.evaluator.base_evaluator import BaseEvaluator
+from nat.eval.evaluator.evaluator_model import EvalInputItem
+from nat.eval.evaluator.evaluator_model import EvalOutputItem
+from nat.profiler.intermediate_property_adapter import IntermediatePropertyAdaptor
+@dataclass
+class _CallTiming:
+    start_ts: float | None = None
+    end_ts: float | None = None
+    @property
+    def latency(self) -> float | None:
+        if self.start_ts is None or self.end_ts is None:
+            return None
+        return max(0.0, self.end_ts - self.start_ts)
+class AverageLLMLatencyEvaluator(BaseEvaluator):
+    """
+    Mean difference between connected LLM_START and LLM_END events (same UUID).
+    The score is the average latency in seconds for the item. Reasoning contains per-call latencies.
+    """
+    def __init__(self, max_concurrency: int = 8):
+        super().__init__(max_concurrency=max_concurrency, tqdm_desc="Evaluating Avg LLM Latency")
+    async def evaluate_item(self, item: EvalInputItem) -> EvalOutputItem:  # noqa: D401
+        calls: dict[str, _CallTiming] = defaultdict(_CallTiming)
+        for step in (IntermediatePropertyAdaptor.from_intermediate_step(s) for s in item.trajectory):
+            if step.event_type == IntermediateStepType.LLM_START:
+                calls[step.UUID].start_ts = step.event_timestamp
+            elif step.event_type == IntermediateStepType.LLM_END:
+                calls[step.UUID].end_ts = step.event_timestamp
+        latencies = [ct.latency for ct in calls.values() if ct.latency is not None]
+        avg_latency = sum(latencies) / len(latencies) if latencies else 0.0
+        reasoning = {
+            "num_llm_calls": len(latencies),
+            "latencies": latencies,
+        }
+        return EvalOutputItem(id=item.id, score=round(avg_latency, 4), reasoning=reasoning)
+class AverageWorkflowRuntimeEvaluator(BaseEvaluator):
+    """
+    Average workflow runtime per item: max(event_timestamp) - min(event_timestamp) across the trajectory.
+    The score is the runtime in seconds for the item.
+    """
+    def __init__(self, max_concurrency: int = 8):
+        super().__init__(max_concurrency=max_concurrency, tqdm_desc="Evaluating Avg Workflow Runtime")
+    async def evaluate_item(self, item: EvalInputItem) -> EvalOutputItem:  # noqa: D401
+        if not item.trajectory:
+            return EvalOutputItem(id=item.id, score=0.0, reasoning={"note": "no steps"})
+        timestamps = [s.event_timestamp for s in item.trajectory]
+        runtime = max(timestamps) - min(timestamps)
+        return EvalOutputItem(id=item.id, score=round(max(0.0, runtime), 4), reasoning={"steps": len(timestamps)})
+class AverageNumberOfLLMCallsEvaluator(BaseEvaluator):
+    """
+    Average number of LLM calls per item. The score is the count for the item.
+    """
+    def __init__(self, max_concurrency: int = 8):
+        super().__init__(max_concurrency=max_concurrency, tqdm_desc="Evaluating Avg # LLM Calls")
+    async def evaluate_item(self, item: EvalInputItem) -> EvalOutputItem:  # noqa: D401
+        num_calls = sum(1 for s in item.trajectory if s.event_type == IntermediateStepType.LLM_END)
+        return EvalOutputItem(id=item.id, score=float(num_calls), reasoning={"num_llm_end": num_calls})
+class AverageTokensPerLLMEndEvaluator(BaseEvaluator):
+    """
+    Average total tokens per LLM_END event: sum of prompt and completion tokens if available.
+    The score is the average tokens per LLM_END for the item (0 if none).
+    """
+    def __init__(self, max_concurrency: int = 8):
+        super().__init__(max_concurrency=max_concurrency, tqdm_desc="Evaluating Avg Tokens/LLM_END")
+    async def evaluate_item(self, item: EvalInputItem) -> EvalOutputItem:  # noqa: D401
+        totals: list[int] = []
+        for step in (IntermediatePropertyAdaptor.from_intermediate_step(s) for s in item.trajectory):
+            if step.event_type == IntermediateStepType.LLM_END:
+                total_tokens = step.token_usage.total_tokens
+                # If framework doesn't set total, compute from prompt+completion
+                if total_tokens == 0:
+                    total_tokens = step.token_usage.prompt_tokens + step.token_usage.completion_tokens
+                totals.append(total_tokens)
+        avg_tokens = (sum(totals) / len(totals)) if totals else 0.0
+        reasoning = {
+            "num_llm_end": len(totals),
+            "totals": totals,
+        }
+        return EvalOutputItem(id=item.id, score=round(avg_tokens, 2), reasoning=reasoning)

nvidia-nat 1.3.0a20250910__py3-none-any.whl → 1.4.0a20251112__py3-none-any.whl

nvidia-nat 1.3.0a20250910py3-none-any.whl → 1.4.0a20251112py3-none-any.whl