PyPI - nvidia-nat - Versions diffs - 1.4.0a20251120__py3-none-any.whl → 1.4.0a20260113__py3-none-any.whl - Mend

nvidia-nat 1.4.0a20251120py3-none-any.whl → 1.4.0a20260113py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (492) hide show

aiq/__init__.py +1 -1
nat/{front_ends/mcp → agent/auto_memory_wrapper}/__init__.py +1 -1
nat/agent/auto_memory_wrapper/agent.py +278 -0
nat/agent/auto_memory_wrapper/register.py +227 -0
nat/agent/auto_memory_wrapper/state.py +30 -0
nat/agent/base.py +1 -1
nat/agent/dual_node.py +1 -1
nat/agent/prompt_optimizer/prompt.py +1 -1
nat/agent/prompt_optimizer/register.py +1 -1
nat/agent/react_agent/agent.py +16 -9
nat/agent/react_agent/output_parser.py +2 -2
nat/agent/react_agent/prompt.py +3 -2
nat/agent/react_agent/register.py +2 -2
nat/agent/react_agent/register_per_user_agent.py +104 -0
nat/agent/reasoning_agent/reasoning_agent.py +1 -1
nat/agent/register.py +3 -1
nat/agent/responses_api_agent/__init__.py +1 -1
nat/agent/responses_api_agent/register.py +1 -1
nat/agent/rewoo_agent/agent.py +9 -4
nat/agent/rewoo_agent/prompt.py +1 -1
nat/agent/rewoo_agent/register.py +1 -1
nat/agent/tool_calling_agent/agent.py +5 -4
nat/agent/tool_calling_agent/register.py +1 -1
nat/authentication/__init__.py +1 -1
nat/authentication/api_key/__init__.py +1 -1
nat/authentication/api_key/api_key_auth_provider.py +1 -1
nat/authentication/api_key/api_key_auth_provider_config.py +22 -7
nat/authentication/api_key/register.py +1 -1
nat/authentication/credential_validator/__init__.py +1 -1
nat/authentication/credential_validator/bearer_token_validator.py +1 -1
nat/authentication/exceptions/__init__.py +1 -1
nat/authentication/exceptions/api_key_exceptions.py +1 -1
nat/authentication/http_basic_auth/http_basic_auth_provider.py +1 -1
nat/authentication/http_basic_auth/register.py +1 -1
nat/authentication/interfaces.py +1 -1
nat/authentication/oauth2/__init__.py +1 -1
nat/authentication/oauth2/oauth2_auth_code_flow_provider.py +1 -1
nat/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +1 -1
nat/authentication/oauth2/oauth2_resource_server_config.py +1 -1
nat/authentication/oauth2/register.py +1 -1
nat/authentication/register.py +1 -1
nat/builder/builder.py +511 -1
nat/builder/child_builder.py +385 -0
nat/builder/component_utils.py +28 -4
nat/builder/context.py +17 -1
nat/builder/embedder.py +1 -1
nat/builder/eval_builder.py +19 -7
nat/builder/evaluator.py +1 -1
nat/builder/framework_enum.py +2 -1
nat/builder/front_end.py +1 -1
nat/builder/function.py +40 -3
nat/builder/function_base.py +1 -1
nat/builder/function_info.py +1 -1
nat/builder/intermediate_step_manager.py +1 -1
nat/builder/llm.py +1 -1
nat/builder/per_user_workflow_builder.py +843 -0
nat/builder/retriever.py +1 -1
nat/builder/sync_builder.py +571 -0
nat/builder/user_interaction_manager.py +1 -1
nat/builder/workflow.py +1 -1
nat/builder/workflow_builder.py +536 -424
nat/cli/__init__.py +1 -1
nat/cli/cli_utils/config_override.py +1 -1
nat/cli/cli_utils/validation.py +32 -1
nat/cli/commands/configure/channel/add.py +1 -1
nat/cli/commands/configure/channel/channel.py +1 -1
nat/cli/commands/configure/channel/remove.py +1 -1
nat/cli/commands/configure/channel/update.py +1 -1
nat/cli/commands/configure/configure.py +1 -1
nat/cli/commands/evaluate.py +87 -13
nat/cli/commands/finetune.py +132 -0
nat/cli/commands/info/__init__.py +1 -1
nat/cli/commands/info/info.py +1 -1
nat/cli/commands/info/list_channels.py +1 -1
nat/cli/commands/info/list_components.py +1 -1
nat/cli/commands/object_store/__init__.py +1 -1
nat/cli/commands/object_store/object_store.py +1 -1
nat/cli/commands/optimize.py +1 -1
nat/cli/commands/{mcp → red_teaming}/__init__.py +1 -1
nat/cli/commands/red_teaming/red_teaming.py +138 -0
nat/cli/commands/red_teaming/red_teaming_utils.py +73 -0
nat/cli/commands/registry/__init__.py +1 -1
nat/cli/commands/registry/publish.py +1 -1
nat/cli/commands/registry/pull.py +1 -1
nat/cli/commands/registry/registry.py +1 -1
nat/cli/commands/registry/remove.py +1 -1
nat/cli/commands/registry/search.py +1 -1
nat/cli/commands/sizing/__init__.py +1 -1
nat/cli/commands/sizing/calc.py +1 -1
nat/cli/commands/sizing/sizing.py +1 -1
nat/cli/commands/start.py +1 -1
nat/cli/commands/uninstall.py +1 -1
nat/cli/commands/validate.py +1 -1
nat/cli/commands/workflow/__init__.py +1 -1
nat/cli/commands/workflow/workflow.py +1 -1
nat/cli/commands/workflow/workflow_commands.py +3 -2
nat/cli/entrypoint.py +15 -37
nat/cli/main.py +2 -2
nat/cli/plugin_loader.py +69 -0
nat/cli/register_workflow.py +183 -5
nat/cli/type_registry.py +169 -3
nat/control_flow/register.py +1 -1
nat/control_flow/router_agent/agent.py +1 -1
nat/control_flow/router_agent/prompt.py +1 -1
nat/control_flow/router_agent/register.py +1 -1
nat/control_flow/sequential_executor.py +28 -7
nat/data_models/__init__.py +1 -1
nat/data_models/agent.py +1 -1
nat/data_models/api_server.py +38 -3
nat/data_models/authentication.py +1 -1
nat/data_models/common.py +1 -1
nat/data_models/component.py +7 -1
nat/data_models/component_ref.py +34 -1
nat/data_models/config.py +62 -1
nat/data_models/dataset_handler.py +15 -2
nat/data_models/discovery_metadata.py +1 -1
nat/data_models/embedder.py +1 -1
nat/data_models/evaluate.py +6 -1
nat/data_models/evaluator.py +1 -1
nat/data_models/finetuning.py +260 -0
nat/data_models/front_end.py +1 -1
nat/data_models/function.py +1 -1
nat/data_models/function_dependencies.py +1 -1
nat/data_models/gated_field_mixin.py +1 -1
nat/data_models/interactive.py +1 -1
nat/data_models/intermediate_step.py +29 -2
nat/data_models/invocation_node.py +1 -1
nat/data_models/llm.py +1 -1
nat/data_models/logging.py +1 -1
nat/data_models/memory.py +1 -1
nat/data_models/middleware.py +3 -1
nat/data_models/object_store.py +1 -1
nat/data_models/openai_mcp.py +1 -1
nat/data_models/optimizable.py +1 -1
nat/data_models/optimizer.py +1 -1
nat/data_models/profiler.py +1 -1
nat/data_models/registry_handler.py +1 -1
nat/data_models/retriever.py +1 -1
nat/data_models/retry_mixin.py +1 -1
nat/data_models/runtime_enum.py +1 -1
nat/data_models/span.py +1 -1
nat/data_models/step_adaptor.py +1 -1
nat/data_models/streaming.py +1 -1
nat/data_models/swe_bench_model.py +1 -1
nat/data_models/telemetry_exporter.py +1 -1
nat/data_models/thinking_mixin.py +1 -1
nat/data_models/ttc_strategy.py +1 -1
nat/embedder/azure_openai_embedder.py +1 -1
nat/embedder/nim_embedder.py +1 -1
nat/embedder/openai_embedder.py +1 -1
nat/embedder/register.py +1 -1
nat/eval/__init__.py +1 -1
nat/eval/config.py +8 -1
nat/eval/dataset_handler/dataset_downloader.py +1 -1
nat/eval/dataset_handler/dataset_filter.py +1 -1
nat/eval/dataset_handler/dataset_handler.py +4 -2
nat/eval/evaluate.py +217 -80
nat/eval/evaluator/__init__.py +1 -1
nat/eval/evaluator/base_evaluator.py +2 -2
nat/eval/evaluator/evaluator_model.py +3 -2
nat/eval/intermediate_step_adapter.py +1 -1
nat/eval/llm_validator.py +336 -0
nat/eval/rag_evaluator/evaluate.py +17 -10
nat/eval/rag_evaluator/register.py +1 -1
nat/eval/red_teaming_evaluator/__init__.py +14 -0
nat/eval/red_teaming_evaluator/data_models.py +66 -0
nat/eval/red_teaming_evaluator/evaluate.py +327 -0
nat/eval/red_teaming_evaluator/filter_conditions.py +75 -0
nat/eval/red_teaming_evaluator/register.py +55 -0
nat/eval/register.py +2 -1
nat/eval/remote_workflow.py +1 -1
nat/eval/runners/__init__.py +1 -1
nat/eval/runners/config.py +1 -1
nat/eval/runners/multi_eval_runner.py +1 -1
nat/eval/runners/red_teaming_runner/__init__.py +24 -0
nat/eval/runners/red_teaming_runner/config.py +282 -0
nat/eval/runners/red_teaming_runner/report_utils.py +707 -0
nat/eval/runners/red_teaming_runner/runner.py +867 -0
nat/eval/runtime_evaluator/__init__.py +1 -1
nat/eval/runtime_evaluator/evaluate.py +1 -1
nat/eval/runtime_evaluator/register.py +1 -1
nat/eval/runtime_event_subscriber.py +1 -1
nat/eval/swe_bench_evaluator/evaluate.py +1 -1
nat/eval/swe_bench_evaluator/register.py +1 -1
nat/eval/trajectory_evaluator/evaluate.py +2 -2
nat/eval/trajectory_evaluator/register.py +1 -1
nat/eval/tunable_rag_evaluator/evaluate.py +5 -5
nat/eval/tunable_rag_evaluator/register.py +1 -1
nat/eval/usage_stats.py +1 -1
nat/eval/utils/eval_trace_ctx.py +1 -1
nat/eval/utils/output_uploader.py +1 -1
nat/eval/utils/tqdm_position_registry.py +1 -1
nat/eval/utils/weave_eval.py +1 -1
nat/experimental/decorators/experimental_warning_decorator.py +1 -1
nat/experimental/test_time_compute/editing/iterative_plan_refinement_editor.py +1 -1
nat/experimental/test_time_compute/editing/llm_as_a_judge_editor.py +1 -1
nat/experimental/test_time_compute/editing/motivation_aware_summarization.py +1 -1
nat/experimental/test_time_compute/functions/execute_score_select_function.py +1 -1
nat/experimental/test_time_compute/functions/multi_llm_judge_function.py +88 -0
nat/experimental/test_time_compute/functions/plan_select_execute_function.py +1 -1
nat/experimental/test_time_compute/functions/ttc_tool_orchestration_function.py +1 -1
nat/experimental/test_time_compute/functions/ttc_tool_wrapper_function.py +1 -1
nat/experimental/test_time_compute/models/editor_config.py +1 -1
nat/experimental/test_time_compute/models/scoring_config.py +1 -1
nat/experimental/test_time_compute/models/search_config.py +20 -2
nat/experimental/test_time_compute/models/selection_config.py +33 -2
nat/experimental/test_time_compute/models/stage_enums.py +1 -1
nat/experimental/test_time_compute/models/strategy_base.py +1 -1
nat/experimental/test_time_compute/models/tool_use_config.py +1 -1
nat/experimental/test_time_compute/models/ttc_item.py +1 -1
nat/experimental/test_time_compute/register.py +4 -1
nat/experimental/test_time_compute/scoring/llm_based_agent_scorer.py +1 -1
nat/experimental/test_time_compute/scoring/llm_based_plan_scorer.py +1 -1
nat/experimental/test_time_compute/scoring/motivation_aware_scorer.py +1 -1
nat/experimental/test_time_compute/search/multi_llm_generation.py +115 -0
nat/experimental/test_time_compute/search/multi_llm_planner.py +1 -1
nat/experimental/test_time_compute/search/multi_query_retrieval_search.py +1 -1
nat/experimental/test_time_compute/search/single_shot_multi_plan_planner.py +1 -1
nat/experimental/test_time_compute/selection/best_of_n_selector.py +1 -1
nat/experimental/test_time_compute/selection/llm_based_agent_output_selector.py +1 -1
nat/experimental/test_time_compute/selection/llm_based_output_merging_selector.py +1 -1
nat/experimental/test_time_compute/selection/llm_based_plan_selector.py +1 -1
nat/experimental/test_time_compute/selection/llm_judge_selection.py +127 -0
nat/experimental/test_time_compute/selection/threshold_selector.py +1 -1
nat/finetuning/__init__.py +24 -0
nat/finetuning/finetuning_runtime.py +143 -0
nat/finetuning/interfaces/__init__.py +24 -0
nat/finetuning/interfaces/finetuning_runner.py +261 -0
nat/finetuning/interfaces/trainer_adapter.py +103 -0
nat/finetuning/interfaces/trajectory_builder.py +115 -0
nat/finetuning/utils/__init__.py +15 -0
nat/finetuning/utils/parsers/__init__.py +15 -0
nat/finetuning/utils/parsers/adk_parser.py +141 -0
nat/finetuning/utils/parsers/base_parser.py +238 -0
nat/finetuning/utils/parsers/common.py +91 -0
nat/finetuning/utils/parsers/langchain_parser.py +267 -0
nat/finetuning/utils/parsers/llama_index_parser.py +218 -0
nat/front_ends/__init__.py +1 -1
nat/front_ends/console/__init__.py +1 -1
nat/front_ends/console/authentication_flow_handler.py +1 -1
nat/front_ends/console/console_front_end_config.py +4 -1
nat/front_ends/console/console_front_end_plugin.py +5 -4
nat/front_ends/console/register.py +1 -1
nat/front_ends/cron/__init__.py +1 -1
nat/front_ends/fastapi/__init__.py +1 -1
nat/front_ends/fastapi/async_job.py +128 -0
nat/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +1 -1
nat/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +13 -9
nat/front_ends/fastapi/dask_client_mixin.py +1 -1
nat/front_ends/fastapi/fastapi_front_end_config.py +1 -1
nat/front_ends/fastapi/fastapi_front_end_controller.py +1 -1
nat/front_ends/fastapi/fastapi_front_end_plugin.py +25 -30
nat/front_ends/fastapi/fastapi_front_end_plugin_worker.py +195 -60
nat/front_ends/fastapi/html_snippets/__init__.py +1 -1
nat/front_ends/fastapi/html_snippets/auth_code_grant_success.py +1 -1
nat/front_ends/fastapi/intermediate_steps_subscriber.py +12 -1
nat/front_ends/fastapi/job_store.py +23 -11
nat/front_ends/fastapi/main.py +1 -1
nat/front_ends/fastapi/message_handler.py +27 -4
nat/front_ends/fastapi/message_validator.py +54 -2
nat/front_ends/fastapi/register.py +1 -1
nat/front_ends/fastapi/response_helpers.py +16 -15
nat/front_ends/fastapi/step_adaptor.py +1 -1
nat/front_ends/fastapi/utils.py +1 -1
nat/front_ends/register.py +1 -2
nat/front_ends/simple_base/__init__.py +1 -1
nat/front_ends/simple_base/simple_front_end_plugin_base.py +6 -4
nat/llm/aws_bedrock_llm.py +1 -1
nat/llm/azure_openai_llm.py +10 -1
nat/llm/dynamo_llm.py +363 -0
nat/llm/huggingface_llm.py +177 -0
nat/llm/litellm_llm.py +1 -1
nat/llm/nim_llm.py +1 -1
nat/llm/openai_llm.py +1 -1
nat/llm/register.py +3 -1
nat/llm/utils/__init__.py +1 -1
nat/llm/utils/env_config_value.py +1 -1
nat/llm/utils/error.py +1 -1
nat/llm/utils/thinking.py +1 -1
nat/memory/__init__.py +1 -1
nat/memory/interfaces.py +1 -1
nat/memory/models.py +1 -1
nat/meta/pypi.md +1 -1
nat/middleware/__init__.py +5 -5
nat/middleware/cache/__init__.py +14 -0
nat/middleware/{cache_middleware.py → cache/cache_middleware.py} +39 -42
nat/middleware/cache/cache_middleware_config.py +44 -0
nat/middleware/cache/register.py +33 -0
nat/middleware/defense/__init__.py +14 -0
nat/middleware/defense/defense_middleware.py +362 -0
nat/middleware/defense/defense_middleware_content_guard.py +455 -0
nat/middleware/defense/defense_middleware_data_models.py +91 -0
nat/middleware/defense/defense_middleware_output_verifier.py +440 -0
nat/middleware/defense/defense_middleware_pii.py +356 -0
nat/middleware/defense/register.py +82 -0
nat/middleware/dynamic/__init__.py +14 -0
nat/middleware/dynamic/dynamic_function_middleware.py +962 -0
nat/middleware/dynamic/dynamic_middleware_config.py +132 -0
nat/middleware/dynamic/register.py +34 -0
nat/middleware/function_middleware.py +236 -52
nat/middleware/logging/__init__.py +14 -0
nat/middleware/logging/logging_middleware.py +67 -0
nat/middleware/logging/logging_middleware_config.py +28 -0
nat/middleware/logging/register.py +33 -0
nat/middleware/middleware.py +142 -28
nat/middleware/red_teaming/__init__.py +14 -0
nat/middleware/red_teaming/red_teaming_middleware.py +344 -0
nat/middleware/red_teaming/red_teaming_middleware_config.py +112 -0
nat/middleware/red_teaming/register.py +47 -0
nat/middleware/register.py +7 -20
nat/middleware/utils/__init__.py +14 -0
nat/middleware/utils/workflow_inventory.py +155 -0
nat/object_store/__init__.py +1 -1
nat/object_store/in_memory_object_store.py +1 -1
nat/object_store/interfaces.py +1 -1
nat/object_store/models.py +1 -1
nat/object_store/register.py +1 -1
nat/observability/__init__.py +1 -1
nat/observability/exporter/__init__.py +1 -1
nat/observability/exporter/base_exporter.py +1 -1
nat/observability/exporter/exporter.py +1 -1
nat/observability/exporter/file_exporter.py +1 -1
nat/observability/exporter/processing_exporter.py +1 -1
nat/observability/exporter/raw_exporter.py +1 -1
nat/observability/exporter/span_exporter.py +7 -1
nat/observability/exporter_manager.py +1 -1
nat/observability/mixin/__init__.py +1 -1
nat/observability/mixin/batch_config_mixin.py +1 -1
nat/observability/mixin/collector_config_mixin.py +1 -1
nat/observability/mixin/file_mixin.py +1 -1
nat/observability/mixin/file_mode.py +1 -1
nat/observability/mixin/redaction_config_mixin.py +1 -1
nat/observability/mixin/resource_conflict_mixin.py +1 -1
nat/observability/mixin/serialize_mixin.py +1 -1
nat/observability/mixin/tagging_config_mixin.py +1 -1
nat/observability/mixin/type_introspection_mixin.py +1 -1
nat/observability/processor/__init__.py +1 -1
nat/observability/processor/batching_processor.py +1 -1
nat/observability/processor/callback_processor.py +1 -1
nat/observability/processor/falsy_batch_filter_processor.py +1 -1
nat/observability/processor/intermediate_step_serializer.py +1 -1
nat/observability/processor/processor.py +1 -1
nat/observability/processor/processor_factory.py +1 -1
nat/observability/processor/redaction/__init__.py +1 -1
nat/observability/processor/redaction/contextual_redaction_processor.py +1 -1
nat/observability/processor/redaction/contextual_span_redaction_processor.py +1 -1
nat/observability/processor/redaction/redaction_processor.py +1 -1
nat/observability/processor/redaction/span_header_redaction_processor.py +1 -1
nat/observability/processor/span_tagging_processor.py +1 -1
nat/observability/register.py +1 -1
nat/observability/utils/__init__.py +1 -1
nat/observability/utils/dict_utils.py +1 -1
nat/observability/utils/time_utils.py +1 -1
nat/profiler/calc/__init__.py +1 -1
nat/profiler/calc/calc_runner.py +3 -3
nat/profiler/calc/calculations.py +1 -1
nat/profiler/calc/data_models.py +1 -1
nat/profiler/calc/plot.py +30 -3
nat/profiler/callbacks/agno_callback_handler.py +1 -1
nat/profiler/callbacks/base_callback_class.py +1 -1
nat/profiler/callbacks/langchain_callback_handler.py +33 -3
nat/profiler/callbacks/llama_index_callback_handler.py +13 -10
nat/profiler/callbacks/semantic_kernel_callback_handler.py +1 -1
nat/profiler/callbacks/token_usage_base_model.py +1 -1
nat/profiler/data_frame_row.py +1 -1
nat/profiler/data_models.py +1 -1
nat/profiler/decorators/framework_wrapper.py +16 -1
nat/profiler/decorators/function_tracking.py +1 -1
nat/profiler/forecasting/config.py +1 -1
nat/profiler/forecasting/model_trainer.py +1 -1
nat/profiler/forecasting/models/__init__.py +1 -1
nat/profiler/forecasting/models/forecasting_base_model.py +1 -1
nat/profiler/forecasting/models/linear_model.py +1 -1
nat/profiler/forecasting/models/random_forest_regressor.py +1 -1
nat/profiler/inference_metrics_model.py +1 -1
nat/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +1 -1
nat/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py +1 -1
nat/profiler/inference_optimization/data_models.py +1 -1
nat/profiler/inference_optimization/experimental/concurrency_spike_analysis.py +1 -1
nat/profiler/inference_optimization/experimental/prefix_span_analysis.py +1 -1
nat/profiler/inference_optimization/llm_metrics.py +1 -1
nat/profiler/inference_optimization/prompt_caching.py +1 -1
nat/profiler/inference_optimization/token_uniqueness.py +1 -1
nat/profiler/inference_optimization/workflow_runtimes.py +1 -1
nat/profiler/intermediate_property_adapter.py +1 -1
nat/profiler/parameter_optimization/optimizable_utils.py +1 -1
nat/profiler/parameter_optimization/optimizer_runtime.py +1 -1
nat/profiler/parameter_optimization/parameter_optimizer.py +1 -1
nat/profiler/parameter_optimization/parameter_selection.py +1 -1
nat/profiler/parameter_optimization/pareto_visualizer.py +1 -1
nat/profiler/parameter_optimization/prompt_optimizer.py +1 -1
nat/profiler/parameter_optimization/update_helpers.py +1 -1
nat/profiler/profile_runner.py +1 -1
nat/profiler/utils.py +1 -1
nat/registry_handlers/local/local_handler.py +1 -1
nat/registry_handlers/local/register_local.py +1 -1
nat/registry_handlers/metadata_factory.py +1 -1
nat/registry_handlers/package_utils.py +1 -1
nat/registry_handlers/pypi/pypi_handler.py +1 -1
nat/registry_handlers/pypi/register_pypi.py +1 -1
nat/registry_handlers/register.py +1 -1
nat/registry_handlers/registry_handler_base.py +1 -1
nat/registry_handlers/rest/register_rest.py +1 -1
nat/registry_handlers/rest/rest_handler.py +1 -1
nat/registry_handlers/schemas/headers.py +1 -1
nat/registry_handlers/schemas/package.py +1 -1
nat/registry_handlers/schemas/publish.py +1 -1
nat/registry_handlers/schemas/pull.py +1 -1
nat/registry_handlers/schemas/remove.py +1 -1
nat/registry_handlers/schemas/search.py +1 -1
nat/registry_handlers/schemas/status.py +1 -1
nat/retriever/interface.py +1 -1
nat/retriever/milvus/__init__.py +1 -1
nat/retriever/milvus/register.py +1 -1
nat/retriever/milvus/retriever.py +1 -1
nat/retriever/models.py +1 -1
nat/retriever/nemo_retriever/__init__.py +1 -1
nat/retriever/nemo_retriever/register.py +1 -1
nat/retriever/nemo_retriever/retriever.py +5 -5
nat/retriever/register.py +1 -1
nat/runtime/__init__.py +1 -1
nat/runtime/loader.py +10 -3
nat/runtime/metrics.py +180 -0
nat/runtime/runner.py +1 -5
nat/runtime/session.py +451 -32
nat/runtime/user_metadata.py +1 -1
nat/settings/global_settings.py +1 -1
nat/tool/chat_completion.py +1 -1
nat/tool/code_execution/README.md +1 -1
nat/tool/code_execution/code_sandbox.py +1 -1
nat/tool/code_execution/local_sandbox/Dockerfile.sandbox +1 -1
nat/tool/code_execution/local_sandbox/__init__.py +1 -1
nat/tool/code_execution/local_sandbox/local_sandbox_server.py +1 -1
nat/tool/code_execution/local_sandbox/start_local_sandbox.sh +1 -1
nat/tool/code_execution/register.py +1 -1
nat/tool/code_execution/utils.py +1 -1
nat/tool/datetime_tools.py +1 -1
nat/tool/document_search.py +1 -1
nat/tool/github_tools.py +1 -1
nat/tool/memory_tools/add_memory_tool.py +1 -1
nat/tool/memory_tools/delete_memory_tool.py +1 -1
nat/tool/memory_tools/get_memory_tool.py +1 -1
nat/tool/nvidia_rag.py +2 -2
nat/tool/register.py +1 -1
nat/tool/retriever.py +1 -1
nat/tool/server_tools.py +1 -1
nat/utils/__init__.py +8 -5
nat/utils/callable_utils.py +1 -1
nat/utils/data_models/schema_validator.py +1 -1
nat/utils/debugging_utils.py +1 -1
nat/utils/decorators.py +1 -1
nat/utils/dump_distro_mapping.py +1 -1
nat/utils/exception_handlers/automatic_retries.py +3 -3
nat/utils/exception_handlers/schemas.py +1 -1
nat/utils/io/model_processing.py +1 -1
nat/utils/io/supress_logs.py +33 -0
nat/utils/io/yaml_tools.py +1 -1
nat/utils/log_levels.py +1 -1
nat/utils/log_utils.py +13 -1
nat/utils/metadata_utils.py +1 -1
nat/utils/optional_imports.py +1 -1
nat/utils/producer_consumer_queue.py +1 -1
nat/utils/reactive/base/observable_base.py +1 -1
nat/utils/reactive/base/observer_base.py +1 -1
nat/utils/reactive/base/subject_base.py +1 -1
nat/utils/reactive/observable.py +1 -1
nat/utils/reactive/observer.py +1 -1
nat/utils/reactive/subject.py +1 -1
nat/utils/reactive/subscription.py +1 -1
nat/utils/responses_api.py +1 -1
nat/utils/settings/global_settings.py +1 -1
nat/utils/string_utils.py +1 -1
nat/utils/type_converter.py +18 -5
nat/utils/type_utils.py +1 -1
nat/utils/url_utils.py +1 -1
{nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/METADATA +39 -14
nvidia_nat-1.4.0a20260113.dist-info/RECORD +547 -0
nvidia_nat-1.4.0a20260113.dist-info/entry_points.txt +38 -0
nat/cli/commands/mcp/mcp.py +0 -986
nat/front_ends/mcp/introspection_token_verifier.py +0 -73
nat/front_ends/mcp/mcp_front_end_config.py +0 -109
nat/front_ends/mcp/mcp_front_end_plugin.py +0 -155
nat/front_ends/mcp/mcp_front_end_plugin_worker.py +0 -388
nat/front_ends/mcp/memory_profiler.py +0 -320
nat/front_ends/mcp/register.py +0 -27
nat/front_ends/mcp/tool_converter.py +0 -321
nvidia_nat-1.4.0a20251120.dist-info/RECORD +0 -488
nvidia_nat-1.4.0a20251120.dist-info/entry_points.txt +0 -23
{nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/WHEEL +0 -0
{nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
{nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/licenses/LICENSE.md +0 -0
{nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/top_level.txt +0 -0

nat/data_models/optimizable.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");

nat/data_models/optimizer.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");

nat/data_models/profiler.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");

nat/data_models/registry_handler.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");

nat/data_models/retriever.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");

nat/data_models/retry_mixin.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");

nat/data_models/runtime_enum.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");

nat/data_models/span.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");

nat/data_models/step_adaptor.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");

nat/data_models/streaming.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");

nat/data_models/swe_bench_model.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");

nat/data_models/telemetry_exporter.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");

nat/data_models/thinking_mixin.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");

nat/data_models/ttc_strategy.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");

nat/embedder/azure_openai_embedder.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");

nat/embedder/nim_embedder.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");

nat/embedder/openai_embedder.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");

nat/embedder/register.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");

nat/eval/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");

nat/eval/config.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -46,6 +46,8 @@ class EvaluationRunConfig(BaseModel):
     num_passes: int = 0
     # timeout for waiting for trace export tasks to complete
     export_timeout: float = 60.0
+    # User ID to use for workflow session. Defaults to 'nat_eval_user_id'.
+    user_id: str = "nat_eval_user_id"
 class EvaluationRunOutput(BaseModel):
@@ -60,3 +62,8 @@ class EvaluationRunOutput(BaseModel):
     evaluation_results: list[tuple[str, EvalOutput]]
     usage_stats: UsageStats | None = None
     profiler_results: ProfilerResults
+    # Configuration files written to output directory
+    config_original_file: Path | None = None
+    config_effective_file: Path | None = None
+    config_metadata_file: Path | None = None

nat/eval/dataset_handler/dataset_downloader.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");

nat/eval/dataset_handler/dataset_filter.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");

nat/eval/dataset_handler/dataset_handler.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -217,7 +217,9 @@ class DatasetHandler:
         """
         # Apply filters and deduplicate
         input_df = self.dataset_filter.apply_filters(input_df)
-        input_df.drop_duplicates(subset=[self.dataset_config.id_key], inplace=True)
+        if (self.dataset_config.id_key in input_df.columns):
+            input_df.drop_duplicates(subset=[self.dataset_config.id_key], inplace=True)
         if self.reps > 1 and self.adjust_dataset_size:
             raise ValueError("reps and adjust_dataset_size are mutually exclusive")

nat/eval/evaluate.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,16 +14,21 @@
 # limitations under the License.
 import asyncio
+import json
 import logging
 import shutil
 import warnings
+from datetime import UTC
+from datetime import datetime
 from pathlib import Path
 from typing import Any
 from uuid import uuid4
+import yaml
 from pydantic import BaseModel
 from tqdm import tqdm
+from nat.data_models.config import Config
 from nat.data_models.evaluate import EvalConfig
 from nat.data_models.evaluate import JobEvictionPolicy
 from nat.data_models.runtime_enum import RuntimeTypeEnum
@@ -33,6 +38,7 @@ from nat.eval.dataset_handler.dataset_handler import DatasetHandler
 from nat.eval.evaluator.evaluator_model import EvalInput
 from nat.eval.evaluator.evaluator_model import EvalInputItem
 from nat.eval.evaluator.evaluator_model import EvalOutput
+from nat.eval.llm_validator import validate_llm_endpoints
 from nat.eval.usage_stats import UsageStats
 from nat.eval.usage_stats import UsageStatsItem
 from nat.eval.usage_stats import UsageStatsLLM
@@ -62,6 +68,7 @@ class EvaluationRun:
         # Run-specific configuration
         self.config: EvaluationRunConfig = config
         self.eval_config: EvalConfig | None = None
+        self.effective_config: Config | None = None  # Stores the complete config after applying overrides
         # Helpers
         self.intermediate_step_adapter: IntermediateStepAdapter = IntermediateStepAdapter()
@@ -97,6 +104,11 @@ class EvaluationRun:
         # evaluation output files
         self.evaluator_output_files: list[Path] = []
+        # configuration output files
+        self.config_original_file: Path | None = None
+        self.config_effective_file: Path | None = None
+        self.config_metadata_file: Path | None = None
     def _compute_usage_stats(self, item: EvalInputItem):
         """Compute usage stats for a single item using the intermediate steps"""
         # get the prompt and completion tokens from the intermediate steps
@@ -169,62 +181,65 @@ class EvaluationRun:
             if stop_event.is_set():
                 return "", []
-            async with session_manager.run(item.input_obj, runtime_type=RuntimeTypeEnum.EVALUATE) as runner:
-                if not session_manager.workflow.has_single_output:
-                    # raise an error if the workflow has multiple outputs
-                    raise NotImplementedError("Multiple outputs are not supported")
-                runner_result = None
-                intermediate_future = None
-                try:
-                    # Start usage stats and intermediate steps collection in parallel
-                    intermediate_future = pull_intermediate()
-                    runner_result = runner.result()
-                    base_output = await runner_result
-                    intermediate_steps = await intermediate_future
-                except NotImplementedError as e:
-                    logger.error("Failed to run the workflow: %s", e)
-                    # raise original error
-                    raise
-                except Exception as e:
-                    logger.exception("Failed to run the workflow: %s", e)
-                    # stop processing if a workflow error occurs
-                    self.workflow_interrupted = True
-                    # Cancel any coroutines that are still running, avoiding a warning about unawaited coroutines
-                    # (typically one of these two is what raised the exception and the other is still running)
-                    for coro in (runner_result, intermediate_future):
-                        if coro is not None:
-                            asyncio.ensure_future(coro).cancel()
-                    stop_event.set()
-                    return
-                try:
-                    base_output = runner.convert(base_output, to_type=str)
-                except ValueError:
-                    pass
-                # if base_output is a pydantic model dump it to json
-                if isinstance(base_output, BaseModel):
-                    output = base_output.model_dump_json(indent=2)
-                else:
-                    m = jsonpath_expr.find(base_output)
-                    if (not m):
-                        raise RuntimeError(f"Failed to extract output using jsonpath: {self.config.result_json_path}")
-                    if (len(m) > 1):
-                        logger.warning("Multiple matches found for jsonpath at row '%s'. Matches: %s. Using the first",
-                                       base_output,
-                                       m)
-                    output = m[0].value
-                item.output_obj = output
-                item.trajectory = self.intermediate_step_adapter.validate_intermediate_steps(intermediate_steps)
-                usage_stats_item = self._compute_usage_stats(item)
-                self.weave_eval.log_prediction(item, output)
-                await self.weave_eval.log_usage_stats(item, usage_stats_item)
+            async with session_manager.session(user_id=self.config.user_id) as session:
+                async with session.run(item.input_obj, runtime_type=RuntimeTypeEnum.EVALUATE) as runner:
+                    if not session.workflow.has_single_output:
+                        # raise an error if the workflow has multiple outputs
+                        raise NotImplementedError("Multiple outputs are not supported")
+                    runner_result = None
+                    intermediate_future = None
+                    try:
+                        # Start usage stats and intermediate steps collection in parallel
+                        intermediate_future = pull_intermediate()
+                        runner_result = runner.result()
+                        base_output = await runner_result
+                        intermediate_steps = await intermediate_future
+                    except NotImplementedError as e:
+                        logger.error("Failed to run the workflow: %s", e)
+                        # raise original error
+                        raise
+                    except Exception as e:
+                        logger.exception("Failed to run the workflow: %s", e)
+                        # stop processing if a workflow error occurs
+                        self.workflow_interrupted = True
+                        # Cancel any coroutines that are still running, avoiding a warning about unawaited coroutines
+                        # (typically one of these two is what raised the exception and the other is still running)
+                        for coro in (runner_result, intermediate_future):
+                            if coro is not None:
+                                asyncio.ensure_future(coro).cancel()
+                        stop_event.set()
+                        return
+                    try:
+                        base_output = runner.convert(base_output, to_type=str)
+                    except ValueError:
+                        pass
+                    # if base_output is a pydantic model dump it to json
+                    if isinstance(base_output, BaseModel):
+                        output = base_output.model_dump_json(indent=2)
+                    else:
+                        m = jsonpath_expr.find(base_output)
+                        if (not m):
+                            raise RuntimeError(
+                                f"Failed to extract output using jsonpath: {self.config.result_json_path}")
+                        if (len(m) > 1):
+                            logger.warning(
+                                "Multiple matches found for jsonpath at row '%s'. Matches: %s. Using the first",
+                                base_output,
+                                m)
+                        output = m[0].value
+                    item.output_obj = output
+                    item.trajectory = self.intermediate_step_adapter.validate_intermediate_steps(intermediate_steps)
+                    usage_stats_item = self._compute_usage_stats(item)
+                    self.weave_eval.log_prediction(item, output)
+                    await self.weave_eval.log_usage_stats(item, usage_stats_item)
         async def wrapped_run(item: EvalInputItem) -> None:
             await run_one(item)
@@ -329,10 +344,99 @@ class EvaluationRun:
             except Exception as e:
                 logger.exception("Failed to delete old job directory: %s: %s", dir_to_delete, e)
+    def write_configuration(self) -> None:
+        """Save the configuration used for this evaluation run to the output directory.
+        This saves three files:
+        1. config_original.yml - The original configuration file
+        2. config_effective.yml - The configuration with all overrides applied
+        3. config_metadata.json - Metadata about the evaluation run and overrides
+        """
+        output_dir = self.eval_config.general.output_dir
+        output_dir.mkdir(parents=True, exist_ok=True)
+        try:
+            # 1. Save original configuration
+            config_original_file = output_dir / "config_original.yml"
+            if isinstance(self.config.config_file, Path):
+                # Copy original file if it exists
+                if self.config.config_file.exists():
+                    shutil.copy2(self.config.config_file, config_original_file)
+                    self.config_original_file = config_original_file
+                    logger.info("Original config file copied to %s", config_original_file)
+                else:
+                    logger.warning("Original config file not found at %s", self.config.config_file)
+            elif isinstance(self.config.config_file, BaseModel):
+                # Serialize programmatic config, using mode='json' to handle special types like timedelta
+                config_dict = self.config.config_file.model_dump(mode='json')
+                with open(config_original_file, "w", encoding="utf-8") as f:
+                    yaml.safe_dump(config_dict, f, default_flow_style=False, sort_keys=False)
+                self.config_original_file = config_original_file
+                logger.info("Programmatic config saved to %s", config_original_file)
+            # 2. Save effective configuration (with overrides applied)
+            config_effective_file = output_dir / "config_effective.yml"
+            if self.effective_config is not None:
+                effective_config_dict = self.effective_config.model_dump(mode='json') if self.effective_config else {}
+                with open(config_effective_file, "w", encoding="utf-8") as f:
+                    yaml.safe_dump(effective_config_dict, f, default_flow_style=False, sort_keys=False)
+                self.config_effective_file = config_effective_file
+                logger.info("Effective config (with overrides) saved to %s", config_effective_file)
+            else:
+                logger.warning("Effective config not available, skipping config_effective.yml")
+            # 3. Save metadata about the run
+            config_metadata_file = output_dir / "config_metadata.json"
+            metadata = {
+                "config_file":
+                    str(self.config.config_file),
+                "config_file_type":
+                    "Path" if isinstance(self.config.config_file, Path) else "BaseModel",
+                "overrides": [{
+                    "path": path, "value": value
+                } for path, value in self.config.override] if self.config.override else [],
+                "dataset":
+                    self.config.dataset,
+                "result_json_path":
+                    self.config.result_json_path,
+                "skip_workflow":
+                    self.config.skip_workflow,
+                "skip_completed_entries":
+                    self.config.skip_completed_entries,
+                "reps":
+                    self.config.reps,
+                "endpoint":
+                    self.config.endpoint,
+                "endpoint_timeout":
+                    self.config.endpoint_timeout,
+                "adjust_dataset_size":
+                    self.config.adjust_dataset_size,
+                "num_passes":
+                    self.config.num_passes,
+                "export_timeout":
+                    self.config.export_timeout,
+                "user_id":
+                    self.config.user_id,
+                "timestamp":
+                    datetime.now(tz=UTC).isoformat(),
+            }
+            with open(config_metadata_file, "w", encoding="utf-8") as f:
+                json.dump(metadata, f, indent=2)
+            self.config_metadata_file = config_metadata_file
+            logger.info("Configuration metadata saved to %s", config_metadata_file)
+        except Exception:
+            logger.exception("Failed to write configuration files")
+            # Don't raise - this is not critical enough to fail the entire evaluation
     def write_output(self, dataset_handler: DatasetHandler, profiler_results: ProfilerResults):
         workflow_output_file = self.eval_config.general.output_dir / "workflow_output.json"
         workflow_output_file.parent.mkdir(parents=True, exist_ok=True)
+        # Write the configuration files (original, effective, and metadata)
+        self.write_configuration()
         # Write the workflow output to a file (this can be used for re-running the evaluation)
         step_filter = self.eval_config.general.output.workflow_output_step_filter \
@@ -459,7 +563,7 @@ class EvaluationRun:
         from nat.runtime.loader import load_config
         # Load and override the config
-        config = None
+        config: Config | None = None
         if isinstance(self.config.config_file, BaseModel):
             config = self.config.config_file
         elif self.config.override:
@@ -467,6 +571,8 @@ class EvaluationRun:
         else:
             config = load_config(self.config.config_file)
+        # Store the effective configuration for later saving to output directory
+        self.effective_config = config
         self.eval_config = config.eval
         workflow_alias = self._get_workflow_alias(config.workflow.type)
         logger.debug("Loaded %s evaluation configuration: %s", workflow_alias, self.eval_config)
@@ -498,7 +604,10 @@ class EvaluationRun:
                                        eval_input=EvalInput(eval_input_items=[]),
                                        evaluation_results=[],
                                        usage_stats=UsageStats(),
-                                       profiler_results=ProfilerResults())
+                                       profiler_results=ProfilerResults(),
+                                       config_original_file=self.config_original_file,
+                                       config_effective_file=self.config_effective_file,
+                                       config_metadata_file=self.config_metadata_file)
         custom_pre_eval_process_function = self.eval_config.general.output.custom_pre_eval_process_function \
             if self.eval_config.general.output else None
@@ -517,7 +626,25 @@ class EvaluationRun:
                                        eval_input=self.eval_input,
                                        evaluation_results=self.evaluation_results,
                                        usage_stats=self.usage_stats,
-                                       profiler_results=ProfilerResults())
+                                       profiler_results=ProfilerResults(),
+                                       config_original_file=self.config_original_file,
+                                       config_effective_file=self.config_effective_file,
+                                       config_metadata_file=self.config_metadata_file)
+        # Validate LLM endpoints before running evaluation (opt-in via config)
+        if (not self.config.skip_workflow and not self.config.endpoint and config.eval.general.validate_llm_endpoints):
+            try:
+                logger.info("Validating LLM endpoints before evaluation (enabled via config)...")
+                await validate_llm_endpoints(config)
+            except RuntimeError as e:
+                # Critical validation errors (404, connection failures) - fail fast
+                logger.error("LLM endpoint validation failed: %s", e)
+                raise
+            except Exception as e:
+                # Non-critical errors (missing packages, config issues) - warn but continue
+                logger.warning("LLM endpoint validation incomplete: %s. Continuing with evaluation...",
+                               e,
+                               exc_info=True)
         # Run workflow and evaluate
         async with WorkflowEvalBuilder.from_config(config=config) as eval_workflow:
@@ -526,25 +653,32 @@ class EvaluationRun:
             with self.eval_trace_context.evaluation_context():
                 # Run workflow
-                if self.config.endpoint:
-                    await self.run_workflow_remote()
-                elif not self.config.skip_workflow:
-                    if session_manager is None:
-                        workflow = await eval_workflow.build()
-                        session_manager = SessionManager(workflow,
-                                                         max_concurrency=self.eval_config.general.max_concurrency)
-                    await self.run_workflow_local(session_manager)
-                # Pre-evaluation process the workflow output
-                self.eval_input = dataset_handler.pre_eval_process_eval_input(self.eval_input)
-                # Evaluate
-                evaluators = {name: eval_workflow.get_evaluator(name) for name in self.eval_config.evaluators}
-                await self.run_evaluators(evaluators)
-                # Wait for all trace export tasks to complete (local workflows only)
-                if session_manager and not self.config.endpoint:
-                    await self.wait_for_all_export_tasks_local(session_manager, timeout=self.config.export_timeout)
+                local_session_manager: SessionManager | None = None
+                try:
+                    if self.config.endpoint:
+                        await self.run_workflow_remote()
+                    elif not self.config.skip_workflow:
+                        if session_manager is None:
+                            session_manager = await SessionManager.create(
+                                config=config,
+                                shared_builder=eval_workflow,
+                                max_concurrency=self.eval_config.general.max_concurrency)
+                            local_session_manager = session_manager
+                        await self.run_workflow_local(session_manager)
+                    # Pre-evaluation process the workflow output
+                    self.eval_input = dataset_handler.pre_eval_process_eval_input(self.eval_input)
+                    # Evaluate
+                    evaluators = {name: eval_workflow.get_evaluator(name) for name in self.eval_config.evaluators}
+                    await self.run_evaluators(evaluators)
+                    # Wait for all trace export tasks to complete (local workflows only)
+                    if session_manager and not self.config.endpoint:
+                        await self.wait_for_all_export_tasks_local(session_manager, timeout=self.config.export_timeout)
+                finally:
+                    if local_session_manager is not None:
+                        await local_session_manager.shutdown()
         # Profile the workflow
         profiler_results = await self.profile_workflow()
@@ -572,4 +706,7 @@ class EvaluationRun:
                                    eval_input=self.eval_input,
                                    evaluation_results=self.evaluation_results,
                                    usage_stats=self.usage_stats,
-                                   profiler_results=profiler_results)
+                                   profiler_results=profiler_results,
+                                   config_original_file=self.config_original_file,
+                                   config_effective_file=self.config_effective_file,
+                                   config_metadata_file=self.config_metadata_file)

nat/eval/evaluator/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");

nat/eval/evaluator/base_evaluator.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -34,7 +34,7 @@ class BaseEvaluator(ABC):
         **Experimental Feature**: The Evaluation API is experimental and may change in future releases.
         Future versions may introduce breaking changes without notice.
-    Each custom evaluator must implement the `evaluate_item` method which is used to evaluate a
+    Each custom evaluator must implement the ``evaluate_item`` method which is used to evaluate a
     single EvalInputItem.
     """

nat/eval/evaluator/evaluator_model.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -16,6 +16,7 @@
 import typing
 from pydantic import BaseModel
+from pydantic import SerializeAsAny
 from nat.data_models.intermediate_step import IntermediateStep
@@ -55,4 +56,4 @@ class EvalOutputItem(BaseModel):
 class EvalOutput(BaseModel):
     average_score: typing.Any  # float or any serializable type
-    eval_output_items: list[EvalOutputItem]
+    eval_output_items: list[SerializeAsAny[EvalOutputItem]]

nvidia-nat 1.4.0a20251120__py3-none-any.whl → 1.4.0a20260113__py3-none-any.whl

nvidia-nat 1.4.0a20251120py3-none-any.whl → 1.4.0a20260113py3-none-any.whl