nvidia-nat 1.4.0a20251120__py3-none-any.whl → 1.4.0a20260113__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiq/__init__.py +1 -1
- nat/{front_ends/mcp → agent/auto_memory_wrapper}/__init__.py +1 -1
- nat/agent/auto_memory_wrapper/agent.py +278 -0
- nat/agent/auto_memory_wrapper/register.py +227 -0
- nat/agent/auto_memory_wrapper/state.py +30 -0
- nat/agent/base.py +1 -1
- nat/agent/dual_node.py +1 -1
- nat/agent/prompt_optimizer/prompt.py +1 -1
- nat/agent/prompt_optimizer/register.py +1 -1
- nat/agent/react_agent/agent.py +16 -9
- nat/agent/react_agent/output_parser.py +2 -2
- nat/agent/react_agent/prompt.py +3 -2
- nat/agent/react_agent/register.py +2 -2
- nat/agent/react_agent/register_per_user_agent.py +104 -0
- nat/agent/reasoning_agent/reasoning_agent.py +1 -1
- nat/agent/register.py +3 -1
- nat/agent/responses_api_agent/__init__.py +1 -1
- nat/agent/responses_api_agent/register.py +1 -1
- nat/agent/rewoo_agent/agent.py +9 -4
- nat/agent/rewoo_agent/prompt.py +1 -1
- nat/agent/rewoo_agent/register.py +1 -1
- nat/agent/tool_calling_agent/agent.py +5 -4
- nat/agent/tool_calling_agent/register.py +1 -1
- nat/authentication/__init__.py +1 -1
- nat/authentication/api_key/__init__.py +1 -1
- nat/authentication/api_key/api_key_auth_provider.py +1 -1
- nat/authentication/api_key/api_key_auth_provider_config.py +22 -7
- nat/authentication/api_key/register.py +1 -1
- nat/authentication/credential_validator/__init__.py +1 -1
- nat/authentication/credential_validator/bearer_token_validator.py +1 -1
- nat/authentication/exceptions/__init__.py +1 -1
- nat/authentication/exceptions/api_key_exceptions.py +1 -1
- nat/authentication/http_basic_auth/http_basic_auth_provider.py +1 -1
- nat/authentication/http_basic_auth/register.py +1 -1
- nat/authentication/interfaces.py +1 -1
- nat/authentication/oauth2/__init__.py +1 -1
- nat/authentication/oauth2/oauth2_auth_code_flow_provider.py +1 -1
- nat/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +1 -1
- nat/authentication/oauth2/oauth2_resource_server_config.py +1 -1
- nat/authentication/oauth2/register.py +1 -1
- nat/authentication/register.py +1 -1
- nat/builder/builder.py +511 -1
- nat/builder/child_builder.py +385 -0
- nat/builder/component_utils.py +28 -4
- nat/builder/context.py +17 -1
- nat/builder/embedder.py +1 -1
- nat/builder/eval_builder.py +19 -7
- nat/builder/evaluator.py +1 -1
- nat/builder/framework_enum.py +2 -1
- nat/builder/front_end.py +1 -1
- nat/builder/function.py +40 -3
- nat/builder/function_base.py +1 -1
- nat/builder/function_info.py +1 -1
- nat/builder/intermediate_step_manager.py +1 -1
- nat/builder/llm.py +1 -1
- nat/builder/per_user_workflow_builder.py +843 -0
- nat/builder/retriever.py +1 -1
- nat/builder/sync_builder.py +571 -0
- nat/builder/user_interaction_manager.py +1 -1
- nat/builder/workflow.py +1 -1
- nat/builder/workflow_builder.py +536 -424
- nat/cli/__init__.py +1 -1
- nat/cli/cli_utils/config_override.py +1 -1
- nat/cli/cli_utils/validation.py +32 -1
- nat/cli/commands/configure/channel/add.py +1 -1
- nat/cli/commands/configure/channel/channel.py +1 -1
- nat/cli/commands/configure/channel/remove.py +1 -1
- nat/cli/commands/configure/channel/update.py +1 -1
- nat/cli/commands/configure/configure.py +1 -1
- nat/cli/commands/evaluate.py +87 -13
- nat/cli/commands/finetune.py +132 -0
- nat/cli/commands/info/__init__.py +1 -1
- nat/cli/commands/info/info.py +1 -1
- nat/cli/commands/info/list_channels.py +1 -1
- nat/cli/commands/info/list_components.py +1 -1
- nat/cli/commands/object_store/__init__.py +1 -1
- nat/cli/commands/object_store/object_store.py +1 -1
- nat/cli/commands/optimize.py +1 -1
- nat/cli/commands/{mcp → red_teaming}/__init__.py +1 -1
- nat/cli/commands/red_teaming/red_teaming.py +138 -0
- nat/cli/commands/red_teaming/red_teaming_utils.py +73 -0
- nat/cli/commands/registry/__init__.py +1 -1
- nat/cli/commands/registry/publish.py +1 -1
- nat/cli/commands/registry/pull.py +1 -1
- nat/cli/commands/registry/registry.py +1 -1
- nat/cli/commands/registry/remove.py +1 -1
- nat/cli/commands/registry/search.py +1 -1
- nat/cli/commands/sizing/__init__.py +1 -1
- nat/cli/commands/sizing/calc.py +1 -1
- nat/cli/commands/sizing/sizing.py +1 -1
- nat/cli/commands/start.py +1 -1
- nat/cli/commands/uninstall.py +1 -1
- nat/cli/commands/validate.py +1 -1
- nat/cli/commands/workflow/__init__.py +1 -1
- nat/cli/commands/workflow/workflow.py +1 -1
- nat/cli/commands/workflow/workflow_commands.py +3 -2
- nat/cli/entrypoint.py +15 -37
- nat/cli/main.py +2 -2
- nat/cli/plugin_loader.py +69 -0
- nat/cli/register_workflow.py +183 -5
- nat/cli/type_registry.py +169 -3
- nat/control_flow/register.py +1 -1
- nat/control_flow/router_agent/agent.py +1 -1
- nat/control_flow/router_agent/prompt.py +1 -1
- nat/control_flow/router_agent/register.py +1 -1
- nat/control_flow/sequential_executor.py +28 -7
- nat/data_models/__init__.py +1 -1
- nat/data_models/agent.py +1 -1
- nat/data_models/api_server.py +38 -3
- nat/data_models/authentication.py +1 -1
- nat/data_models/common.py +1 -1
- nat/data_models/component.py +7 -1
- nat/data_models/component_ref.py +34 -1
- nat/data_models/config.py +62 -1
- nat/data_models/dataset_handler.py +15 -2
- nat/data_models/discovery_metadata.py +1 -1
- nat/data_models/embedder.py +1 -1
- nat/data_models/evaluate.py +6 -1
- nat/data_models/evaluator.py +1 -1
- nat/data_models/finetuning.py +260 -0
- nat/data_models/front_end.py +1 -1
- nat/data_models/function.py +1 -1
- nat/data_models/function_dependencies.py +1 -1
- nat/data_models/gated_field_mixin.py +1 -1
- nat/data_models/interactive.py +1 -1
- nat/data_models/intermediate_step.py +29 -2
- nat/data_models/invocation_node.py +1 -1
- nat/data_models/llm.py +1 -1
- nat/data_models/logging.py +1 -1
- nat/data_models/memory.py +1 -1
- nat/data_models/middleware.py +3 -1
- nat/data_models/object_store.py +1 -1
- nat/data_models/openai_mcp.py +1 -1
- nat/data_models/optimizable.py +1 -1
- nat/data_models/optimizer.py +1 -1
- nat/data_models/profiler.py +1 -1
- nat/data_models/registry_handler.py +1 -1
- nat/data_models/retriever.py +1 -1
- nat/data_models/retry_mixin.py +1 -1
- nat/data_models/runtime_enum.py +1 -1
- nat/data_models/span.py +1 -1
- nat/data_models/step_adaptor.py +1 -1
- nat/data_models/streaming.py +1 -1
- nat/data_models/swe_bench_model.py +1 -1
- nat/data_models/telemetry_exporter.py +1 -1
- nat/data_models/thinking_mixin.py +1 -1
- nat/data_models/ttc_strategy.py +1 -1
- nat/embedder/azure_openai_embedder.py +1 -1
- nat/embedder/nim_embedder.py +1 -1
- nat/embedder/openai_embedder.py +1 -1
- nat/embedder/register.py +1 -1
- nat/eval/__init__.py +1 -1
- nat/eval/config.py +8 -1
- nat/eval/dataset_handler/dataset_downloader.py +1 -1
- nat/eval/dataset_handler/dataset_filter.py +1 -1
- nat/eval/dataset_handler/dataset_handler.py +4 -2
- nat/eval/evaluate.py +217 -80
- nat/eval/evaluator/__init__.py +1 -1
- nat/eval/evaluator/base_evaluator.py +2 -2
- nat/eval/evaluator/evaluator_model.py +3 -2
- nat/eval/intermediate_step_adapter.py +1 -1
- nat/eval/llm_validator.py +336 -0
- nat/eval/rag_evaluator/evaluate.py +17 -10
- nat/eval/rag_evaluator/register.py +1 -1
- nat/eval/red_teaming_evaluator/__init__.py +14 -0
- nat/eval/red_teaming_evaluator/data_models.py +66 -0
- nat/eval/red_teaming_evaluator/evaluate.py +327 -0
- nat/eval/red_teaming_evaluator/filter_conditions.py +75 -0
- nat/eval/red_teaming_evaluator/register.py +55 -0
- nat/eval/register.py +2 -1
- nat/eval/remote_workflow.py +1 -1
- nat/eval/runners/__init__.py +1 -1
- nat/eval/runners/config.py +1 -1
- nat/eval/runners/multi_eval_runner.py +1 -1
- nat/eval/runners/red_teaming_runner/__init__.py +24 -0
- nat/eval/runners/red_teaming_runner/config.py +282 -0
- nat/eval/runners/red_teaming_runner/report_utils.py +707 -0
- nat/eval/runners/red_teaming_runner/runner.py +867 -0
- nat/eval/runtime_evaluator/__init__.py +1 -1
- nat/eval/runtime_evaluator/evaluate.py +1 -1
- nat/eval/runtime_evaluator/register.py +1 -1
- nat/eval/runtime_event_subscriber.py +1 -1
- nat/eval/swe_bench_evaluator/evaluate.py +1 -1
- nat/eval/swe_bench_evaluator/register.py +1 -1
- nat/eval/trajectory_evaluator/evaluate.py +2 -2
- nat/eval/trajectory_evaluator/register.py +1 -1
- nat/eval/tunable_rag_evaluator/evaluate.py +5 -5
- nat/eval/tunable_rag_evaluator/register.py +1 -1
- nat/eval/usage_stats.py +1 -1
- nat/eval/utils/eval_trace_ctx.py +1 -1
- nat/eval/utils/output_uploader.py +1 -1
- nat/eval/utils/tqdm_position_registry.py +1 -1
- nat/eval/utils/weave_eval.py +1 -1
- nat/experimental/decorators/experimental_warning_decorator.py +1 -1
- nat/experimental/test_time_compute/editing/iterative_plan_refinement_editor.py +1 -1
- nat/experimental/test_time_compute/editing/llm_as_a_judge_editor.py +1 -1
- nat/experimental/test_time_compute/editing/motivation_aware_summarization.py +1 -1
- nat/experimental/test_time_compute/functions/execute_score_select_function.py +1 -1
- nat/experimental/test_time_compute/functions/multi_llm_judge_function.py +88 -0
- nat/experimental/test_time_compute/functions/plan_select_execute_function.py +1 -1
- nat/experimental/test_time_compute/functions/ttc_tool_orchestration_function.py +1 -1
- nat/experimental/test_time_compute/functions/ttc_tool_wrapper_function.py +1 -1
- nat/experimental/test_time_compute/models/editor_config.py +1 -1
- nat/experimental/test_time_compute/models/scoring_config.py +1 -1
- nat/experimental/test_time_compute/models/search_config.py +20 -2
- nat/experimental/test_time_compute/models/selection_config.py +33 -2
- nat/experimental/test_time_compute/models/stage_enums.py +1 -1
- nat/experimental/test_time_compute/models/strategy_base.py +1 -1
- nat/experimental/test_time_compute/models/tool_use_config.py +1 -1
- nat/experimental/test_time_compute/models/ttc_item.py +1 -1
- nat/experimental/test_time_compute/register.py +4 -1
- nat/experimental/test_time_compute/scoring/llm_based_agent_scorer.py +1 -1
- nat/experimental/test_time_compute/scoring/llm_based_plan_scorer.py +1 -1
- nat/experimental/test_time_compute/scoring/motivation_aware_scorer.py +1 -1
- nat/experimental/test_time_compute/search/multi_llm_generation.py +115 -0
- nat/experimental/test_time_compute/search/multi_llm_planner.py +1 -1
- nat/experimental/test_time_compute/search/multi_query_retrieval_search.py +1 -1
- nat/experimental/test_time_compute/search/single_shot_multi_plan_planner.py +1 -1
- nat/experimental/test_time_compute/selection/best_of_n_selector.py +1 -1
- nat/experimental/test_time_compute/selection/llm_based_agent_output_selector.py +1 -1
- nat/experimental/test_time_compute/selection/llm_based_output_merging_selector.py +1 -1
- nat/experimental/test_time_compute/selection/llm_based_plan_selector.py +1 -1
- nat/experimental/test_time_compute/selection/llm_judge_selection.py +127 -0
- nat/experimental/test_time_compute/selection/threshold_selector.py +1 -1
- nat/finetuning/__init__.py +24 -0
- nat/finetuning/finetuning_runtime.py +143 -0
- nat/finetuning/interfaces/__init__.py +24 -0
- nat/finetuning/interfaces/finetuning_runner.py +261 -0
- nat/finetuning/interfaces/trainer_adapter.py +103 -0
- nat/finetuning/interfaces/trajectory_builder.py +115 -0
- nat/finetuning/utils/__init__.py +15 -0
- nat/finetuning/utils/parsers/__init__.py +15 -0
- nat/finetuning/utils/parsers/adk_parser.py +141 -0
- nat/finetuning/utils/parsers/base_parser.py +238 -0
- nat/finetuning/utils/parsers/common.py +91 -0
- nat/finetuning/utils/parsers/langchain_parser.py +267 -0
- nat/finetuning/utils/parsers/llama_index_parser.py +218 -0
- nat/front_ends/__init__.py +1 -1
- nat/front_ends/console/__init__.py +1 -1
- nat/front_ends/console/authentication_flow_handler.py +1 -1
- nat/front_ends/console/console_front_end_config.py +4 -1
- nat/front_ends/console/console_front_end_plugin.py +5 -4
- nat/front_ends/console/register.py +1 -1
- nat/front_ends/cron/__init__.py +1 -1
- nat/front_ends/fastapi/__init__.py +1 -1
- nat/front_ends/fastapi/async_job.py +128 -0
- nat/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +1 -1
- nat/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +13 -9
- nat/front_ends/fastapi/dask_client_mixin.py +1 -1
- nat/front_ends/fastapi/fastapi_front_end_config.py +1 -1
- nat/front_ends/fastapi/fastapi_front_end_controller.py +1 -1
- nat/front_ends/fastapi/fastapi_front_end_plugin.py +25 -30
- nat/front_ends/fastapi/fastapi_front_end_plugin_worker.py +195 -60
- nat/front_ends/fastapi/html_snippets/__init__.py +1 -1
- nat/front_ends/fastapi/html_snippets/auth_code_grant_success.py +1 -1
- nat/front_ends/fastapi/intermediate_steps_subscriber.py +12 -1
- nat/front_ends/fastapi/job_store.py +23 -11
- nat/front_ends/fastapi/main.py +1 -1
- nat/front_ends/fastapi/message_handler.py +27 -4
- nat/front_ends/fastapi/message_validator.py +54 -2
- nat/front_ends/fastapi/register.py +1 -1
- nat/front_ends/fastapi/response_helpers.py +16 -15
- nat/front_ends/fastapi/step_adaptor.py +1 -1
- nat/front_ends/fastapi/utils.py +1 -1
- nat/front_ends/register.py +1 -2
- nat/front_ends/simple_base/__init__.py +1 -1
- nat/front_ends/simple_base/simple_front_end_plugin_base.py +6 -4
- nat/llm/aws_bedrock_llm.py +1 -1
- nat/llm/azure_openai_llm.py +10 -1
- nat/llm/dynamo_llm.py +363 -0
- nat/llm/huggingface_llm.py +177 -0
- nat/llm/litellm_llm.py +1 -1
- nat/llm/nim_llm.py +1 -1
- nat/llm/openai_llm.py +1 -1
- nat/llm/register.py +3 -1
- nat/llm/utils/__init__.py +1 -1
- nat/llm/utils/env_config_value.py +1 -1
- nat/llm/utils/error.py +1 -1
- nat/llm/utils/thinking.py +1 -1
- nat/memory/__init__.py +1 -1
- nat/memory/interfaces.py +1 -1
- nat/memory/models.py +1 -1
- nat/meta/pypi.md +1 -1
- nat/middleware/__init__.py +5 -5
- nat/middleware/cache/__init__.py +14 -0
- nat/middleware/{cache_middleware.py → cache/cache_middleware.py} +39 -42
- nat/middleware/cache/cache_middleware_config.py +44 -0
- nat/middleware/cache/register.py +33 -0
- nat/middleware/defense/__init__.py +14 -0
- nat/middleware/defense/defense_middleware.py +362 -0
- nat/middleware/defense/defense_middleware_content_guard.py +455 -0
- nat/middleware/defense/defense_middleware_data_models.py +91 -0
- nat/middleware/defense/defense_middleware_output_verifier.py +440 -0
- nat/middleware/defense/defense_middleware_pii.py +356 -0
- nat/middleware/defense/register.py +82 -0
- nat/middleware/dynamic/__init__.py +14 -0
- nat/middleware/dynamic/dynamic_function_middleware.py +962 -0
- nat/middleware/dynamic/dynamic_middleware_config.py +132 -0
- nat/middleware/dynamic/register.py +34 -0
- nat/middleware/function_middleware.py +236 -52
- nat/middleware/logging/__init__.py +14 -0
- nat/middleware/logging/logging_middleware.py +67 -0
- nat/middleware/logging/logging_middleware_config.py +28 -0
- nat/middleware/logging/register.py +33 -0
- nat/middleware/middleware.py +142 -28
- nat/middleware/red_teaming/__init__.py +14 -0
- nat/middleware/red_teaming/red_teaming_middleware.py +344 -0
- nat/middleware/red_teaming/red_teaming_middleware_config.py +112 -0
- nat/middleware/red_teaming/register.py +47 -0
- nat/middleware/register.py +7 -20
- nat/middleware/utils/__init__.py +14 -0
- nat/middleware/utils/workflow_inventory.py +155 -0
- nat/object_store/__init__.py +1 -1
- nat/object_store/in_memory_object_store.py +1 -1
- nat/object_store/interfaces.py +1 -1
- nat/object_store/models.py +1 -1
- nat/object_store/register.py +1 -1
- nat/observability/__init__.py +1 -1
- nat/observability/exporter/__init__.py +1 -1
- nat/observability/exporter/base_exporter.py +1 -1
- nat/observability/exporter/exporter.py +1 -1
- nat/observability/exporter/file_exporter.py +1 -1
- nat/observability/exporter/processing_exporter.py +1 -1
- nat/observability/exporter/raw_exporter.py +1 -1
- nat/observability/exporter/span_exporter.py +7 -1
- nat/observability/exporter_manager.py +1 -1
- nat/observability/mixin/__init__.py +1 -1
- nat/observability/mixin/batch_config_mixin.py +1 -1
- nat/observability/mixin/collector_config_mixin.py +1 -1
- nat/observability/mixin/file_mixin.py +1 -1
- nat/observability/mixin/file_mode.py +1 -1
- nat/observability/mixin/redaction_config_mixin.py +1 -1
- nat/observability/mixin/resource_conflict_mixin.py +1 -1
- nat/observability/mixin/serialize_mixin.py +1 -1
- nat/observability/mixin/tagging_config_mixin.py +1 -1
- nat/observability/mixin/type_introspection_mixin.py +1 -1
- nat/observability/processor/__init__.py +1 -1
- nat/observability/processor/batching_processor.py +1 -1
- nat/observability/processor/callback_processor.py +1 -1
- nat/observability/processor/falsy_batch_filter_processor.py +1 -1
- nat/observability/processor/intermediate_step_serializer.py +1 -1
- nat/observability/processor/processor.py +1 -1
- nat/observability/processor/processor_factory.py +1 -1
- nat/observability/processor/redaction/__init__.py +1 -1
- nat/observability/processor/redaction/contextual_redaction_processor.py +1 -1
- nat/observability/processor/redaction/contextual_span_redaction_processor.py +1 -1
- nat/observability/processor/redaction/redaction_processor.py +1 -1
- nat/observability/processor/redaction/span_header_redaction_processor.py +1 -1
- nat/observability/processor/span_tagging_processor.py +1 -1
- nat/observability/register.py +1 -1
- nat/observability/utils/__init__.py +1 -1
- nat/observability/utils/dict_utils.py +1 -1
- nat/observability/utils/time_utils.py +1 -1
- nat/profiler/calc/__init__.py +1 -1
- nat/profiler/calc/calc_runner.py +3 -3
- nat/profiler/calc/calculations.py +1 -1
- nat/profiler/calc/data_models.py +1 -1
- nat/profiler/calc/plot.py +30 -3
- nat/profiler/callbacks/agno_callback_handler.py +1 -1
- nat/profiler/callbacks/base_callback_class.py +1 -1
- nat/profiler/callbacks/langchain_callback_handler.py +33 -3
- nat/profiler/callbacks/llama_index_callback_handler.py +13 -10
- nat/profiler/callbacks/semantic_kernel_callback_handler.py +1 -1
- nat/profiler/callbacks/token_usage_base_model.py +1 -1
- nat/profiler/data_frame_row.py +1 -1
- nat/profiler/data_models.py +1 -1
- nat/profiler/decorators/framework_wrapper.py +16 -1
- nat/profiler/decorators/function_tracking.py +1 -1
- nat/profiler/forecasting/config.py +1 -1
- nat/profiler/forecasting/model_trainer.py +1 -1
- nat/profiler/forecasting/models/__init__.py +1 -1
- nat/profiler/forecasting/models/forecasting_base_model.py +1 -1
- nat/profiler/forecasting/models/linear_model.py +1 -1
- nat/profiler/forecasting/models/random_forest_regressor.py +1 -1
- nat/profiler/inference_metrics_model.py +1 -1
- nat/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +1 -1
- nat/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py +1 -1
- nat/profiler/inference_optimization/data_models.py +1 -1
- nat/profiler/inference_optimization/experimental/concurrency_spike_analysis.py +1 -1
- nat/profiler/inference_optimization/experimental/prefix_span_analysis.py +1 -1
- nat/profiler/inference_optimization/llm_metrics.py +1 -1
- nat/profiler/inference_optimization/prompt_caching.py +1 -1
- nat/profiler/inference_optimization/token_uniqueness.py +1 -1
- nat/profiler/inference_optimization/workflow_runtimes.py +1 -1
- nat/profiler/intermediate_property_adapter.py +1 -1
- nat/profiler/parameter_optimization/optimizable_utils.py +1 -1
- nat/profiler/parameter_optimization/optimizer_runtime.py +1 -1
- nat/profiler/parameter_optimization/parameter_optimizer.py +1 -1
- nat/profiler/parameter_optimization/parameter_selection.py +1 -1
- nat/profiler/parameter_optimization/pareto_visualizer.py +1 -1
- nat/profiler/parameter_optimization/prompt_optimizer.py +1 -1
- nat/profiler/parameter_optimization/update_helpers.py +1 -1
- nat/profiler/profile_runner.py +1 -1
- nat/profiler/utils.py +1 -1
- nat/registry_handlers/local/local_handler.py +1 -1
- nat/registry_handlers/local/register_local.py +1 -1
- nat/registry_handlers/metadata_factory.py +1 -1
- nat/registry_handlers/package_utils.py +1 -1
- nat/registry_handlers/pypi/pypi_handler.py +1 -1
- nat/registry_handlers/pypi/register_pypi.py +1 -1
- nat/registry_handlers/register.py +1 -1
- nat/registry_handlers/registry_handler_base.py +1 -1
- nat/registry_handlers/rest/register_rest.py +1 -1
- nat/registry_handlers/rest/rest_handler.py +1 -1
- nat/registry_handlers/schemas/headers.py +1 -1
- nat/registry_handlers/schemas/package.py +1 -1
- nat/registry_handlers/schemas/publish.py +1 -1
- nat/registry_handlers/schemas/pull.py +1 -1
- nat/registry_handlers/schemas/remove.py +1 -1
- nat/registry_handlers/schemas/search.py +1 -1
- nat/registry_handlers/schemas/status.py +1 -1
- nat/retriever/interface.py +1 -1
- nat/retriever/milvus/__init__.py +1 -1
- nat/retriever/milvus/register.py +1 -1
- nat/retriever/milvus/retriever.py +1 -1
- nat/retriever/models.py +1 -1
- nat/retriever/nemo_retriever/__init__.py +1 -1
- nat/retriever/nemo_retriever/register.py +1 -1
- nat/retriever/nemo_retriever/retriever.py +5 -5
- nat/retriever/register.py +1 -1
- nat/runtime/__init__.py +1 -1
- nat/runtime/loader.py +10 -3
- nat/runtime/metrics.py +180 -0
- nat/runtime/runner.py +1 -5
- nat/runtime/session.py +451 -32
- nat/runtime/user_metadata.py +1 -1
- nat/settings/global_settings.py +1 -1
- nat/tool/chat_completion.py +1 -1
- nat/tool/code_execution/README.md +1 -1
- nat/tool/code_execution/code_sandbox.py +1 -1
- nat/tool/code_execution/local_sandbox/Dockerfile.sandbox +1 -1
- nat/tool/code_execution/local_sandbox/__init__.py +1 -1
- nat/tool/code_execution/local_sandbox/local_sandbox_server.py +1 -1
- nat/tool/code_execution/local_sandbox/start_local_sandbox.sh +1 -1
- nat/tool/code_execution/register.py +1 -1
- nat/tool/code_execution/utils.py +1 -1
- nat/tool/datetime_tools.py +1 -1
- nat/tool/document_search.py +1 -1
- nat/tool/github_tools.py +1 -1
- nat/tool/memory_tools/add_memory_tool.py +1 -1
- nat/tool/memory_tools/delete_memory_tool.py +1 -1
- nat/tool/memory_tools/get_memory_tool.py +1 -1
- nat/tool/nvidia_rag.py +2 -2
- nat/tool/register.py +1 -1
- nat/tool/retriever.py +1 -1
- nat/tool/server_tools.py +1 -1
- nat/utils/__init__.py +8 -5
- nat/utils/callable_utils.py +1 -1
- nat/utils/data_models/schema_validator.py +1 -1
- nat/utils/debugging_utils.py +1 -1
- nat/utils/decorators.py +1 -1
- nat/utils/dump_distro_mapping.py +1 -1
- nat/utils/exception_handlers/automatic_retries.py +3 -3
- nat/utils/exception_handlers/schemas.py +1 -1
- nat/utils/io/model_processing.py +1 -1
- nat/utils/io/supress_logs.py +33 -0
- nat/utils/io/yaml_tools.py +1 -1
- nat/utils/log_levels.py +1 -1
- nat/utils/log_utils.py +13 -1
- nat/utils/metadata_utils.py +1 -1
- nat/utils/optional_imports.py +1 -1
- nat/utils/producer_consumer_queue.py +1 -1
- nat/utils/reactive/base/observable_base.py +1 -1
- nat/utils/reactive/base/observer_base.py +1 -1
- nat/utils/reactive/base/subject_base.py +1 -1
- nat/utils/reactive/observable.py +1 -1
- nat/utils/reactive/observer.py +1 -1
- nat/utils/reactive/subject.py +1 -1
- nat/utils/reactive/subscription.py +1 -1
- nat/utils/responses_api.py +1 -1
- nat/utils/settings/global_settings.py +1 -1
- nat/utils/string_utils.py +1 -1
- nat/utils/type_converter.py +18 -5
- nat/utils/type_utils.py +1 -1
- nat/utils/url_utils.py +1 -1
- {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/METADATA +39 -14
- nvidia_nat-1.4.0a20260113.dist-info/RECORD +547 -0
- nvidia_nat-1.4.0a20260113.dist-info/entry_points.txt +38 -0
- nat/cli/commands/mcp/mcp.py +0 -986
- nat/front_ends/mcp/introspection_token_verifier.py +0 -73
- nat/front_ends/mcp/mcp_front_end_config.py +0 -109
- nat/front_ends/mcp/mcp_front_end_plugin.py +0 -155
- nat/front_ends/mcp/mcp_front_end_plugin_worker.py +0 -388
- nat/front_ends/mcp/memory_profiler.py +0 -320
- nat/front_ends/mcp/register.py +0 -27
- nat/front_ends/mcp/tool_converter.py +0 -321
- nvidia_nat-1.4.0a20251120.dist-info/RECORD +0 -488
- nvidia_nat-1.4.0a20251120.dist-info/entry_points.txt +0 -23
- {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/WHEEL +0 -0
- {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
- {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/licenses/LICENSE.md +0 -0
- {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/top_level.txt +0 -0
nat/data_models/optimizable.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/data_models/optimizer.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2021-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/data_models/profiler.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/data_models/retriever.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/data_models/retry_mixin.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/data_models/runtime_enum.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/data_models/span.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/data_models/step_adaptor.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/data_models/streaming.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/data_models/ttc_strategy.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/embedder/nim_embedder.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/embedder/openai_embedder.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/embedder/register.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/eval/__init__.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/eval/config.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -46,6 +46,8 @@ class EvaluationRunConfig(BaseModel):
|
|
|
46
46
|
num_passes: int = 0
|
|
47
47
|
# timeout for waiting for trace export tasks to complete
|
|
48
48
|
export_timeout: float = 60.0
|
|
49
|
+
# User ID to use for workflow session. Defaults to 'nat_eval_user_id'.
|
|
50
|
+
user_id: str = "nat_eval_user_id"
|
|
49
51
|
|
|
50
52
|
|
|
51
53
|
class EvaluationRunOutput(BaseModel):
|
|
@@ -60,3 +62,8 @@ class EvaluationRunOutput(BaseModel):
|
|
|
60
62
|
evaluation_results: list[tuple[str, EvalOutput]]
|
|
61
63
|
usage_stats: UsageStats | None = None
|
|
62
64
|
profiler_results: ProfilerResults
|
|
65
|
+
|
|
66
|
+
# Configuration files written to output directory
|
|
67
|
+
config_original_file: Path | None = None
|
|
68
|
+
config_effective_file: Path | None = None
|
|
69
|
+
config_metadata_file: Path | None = None
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -217,7 +217,9 @@ class DatasetHandler:
|
|
|
217
217
|
"""
|
|
218
218
|
# Apply filters and deduplicate
|
|
219
219
|
input_df = self.dataset_filter.apply_filters(input_df)
|
|
220
|
-
|
|
220
|
+
|
|
221
|
+
if (self.dataset_config.id_key in input_df.columns):
|
|
222
|
+
input_df.drop_duplicates(subset=[self.dataset_config.id_key], inplace=True)
|
|
221
223
|
|
|
222
224
|
if self.reps > 1 and self.adjust_dataset_size:
|
|
223
225
|
raise ValueError("reps and adjust_dataset_size are mutually exclusive")
|
nat/eval/evaluate.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -14,16 +14,21 @@
|
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
|
|
16
16
|
import asyncio
|
|
17
|
+
import json
|
|
17
18
|
import logging
|
|
18
19
|
import shutil
|
|
19
20
|
import warnings
|
|
21
|
+
from datetime import UTC
|
|
22
|
+
from datetime import datetime
|
|
20
23
|
from pathlib import Path
|
|
21
24
|
from typing import Any
|
|
22
25
|
from uuid import uuid4
|
|
23
26
|
|
|
27
|
+
import yaml
|
|
24
28
|
from pydantic import BaseModel
|
|
25
29
|
from tqdm import tqdm
|
|
26
30
|
|
|
31
|
+
from nat.data_models.config import Config
|
|
27
32
|
from nat.data_models.evaluate import EvalConfig
|
|
28
33
|
from nat.data_models.evaluate import JobEvictionPolicy
|
|
29
34
|
from nat.data_models.runtime_enum import RuntimeTypeEnum
|
|
@@ -33,6 +38,7 @@ from nat.eval.dataset_handler.dataset_handler import DatasetHandler
|
|
|
33
38
|
from nat.eval.evaluator.evaluator_model import EvalInput
|
|
34
39
|
from nat.eval.evaluator.evaluator_model import EvalInputItem
|
|
35
40
|
from nat.eval.evaluator.evaluator_model import EvalOutput
|
|
41
|
+
from nat.eval.llm_validator import validate_llm_endpoints
|
|
36
42
|
from nat.eval.usage_stats import UsageStats
|
|
37
43
|
from nat.eval.usage_stats import UsageStatsItem
|
|
38
44
|
from nat.eval.usage_stats import UsageStatsLLM
|
|
@@ -62,6 +68,7 @@ class EvaluationRun:
|
|
|
62
68
|
# Run-specific configuration
|
|
63
69
|
self.config: EvaluationRunConfig = config
|
|
64
70
|
self.eval_config: EvalConfig | None = None
|
|
71
|
+
self.effective_config: Config | None = None # Stores the complete config after applying overrides
|
|
65
72
|
|
|
66
73
|
# Helpers
|
|
67
74
|
self.intermediate_step_adapter: IntermediateStepAdapter = IntermediateStepAdapter()
|
|
@@ -97,6 +104,11 @@ class EvaluationRun:
|
|
|
97
104
|
# evaluation output files
|
|
98
105
|
self.evaluator_output_files: list[Path] = []
|
|
99
106
|
|
|
107
|
+
# configuration output files
|
|
108
|
+
self.config_original_file: Path | None = None
|
|
109
|
+
self.config_effective_file: Path | None = None
|
|
110
|
+
self.config_metadata_file: Path | None = None
|
|
111
|
+
|
|
100
112
|
def _compute_usage_stats(self, item: EvalInputItem):
|
|
101
113
|
"""Compute usage stats for a single item using the intermediate steps"""
|
|
102
114
|
# get the prompt and completion tokens from the intermediate steps
|
|
@@ -169,62 +181,65 @@ class EvaluationRun:
|
|
|
169
181
|
if stop_event.is_set():
|
|
170
182
|
return "", []
|
|
171
183
|
|
|
172
|
-
async with session_manager.
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
184
|
+
async with session_manager.session(user_id=self.config.user_id) as session:
|
|
185
|
+
async with session.run(item.input_obj, runtime_type=RuntimeTypeEnum.EVALUATE) as runner:
|
|
186
|
+
if not session.workflow.has_single_output:
|
|
187
|
+
# raise an error if the workflow has multiple outputs
|
|
188
|
+
raise NotImplementedError("Multiple outputs are not supported")
|
|
189
|
+
|
|
190
|
+
runner_result = None
|
|
191
|
+
intermediate_future = None
|
|
192
|
+
|
|
193
|
+
try:
|
|
194
|
+
# Start usage stats and intermediate steps collection in parallel
|
|
195
|
+
intermediate_future = pull_intermediate()
|
|
196
|
+
runner_result = runner.result()
|
|
197
|
+
base_output = await runner_result
|
|
198
|
+
intermediate_steps = await intermediate_future
|
|
199
|
+
except NotImplementedError as e:
|
|
200
|
+
logger.error("Failed to run the workflow: %s", e)
|
|
201
|
+
# raise original error
|
|
202
|
+
raise
|
|
203
|
+
except Exception as e:
|
|
204
|
+
logger.exception("Failed to run the workflow: %s", e)
|
|
205
|
+
# stop processing if a workflow error occurs
|
|
206
|
+
self.workflow_interrupted = True
|
|
207
|
+
|
|
208
|
+
# Cancel any coroutines that are still running, avoiding a warning about unawaited coroutines
|
|
209
|
+
# (typically one of these two is what raised the exception and the other is still running)
|
|
210
|
+
for coro in (runner_result, intermediate_future):
|
|
211
|
+
if coro is not None:
|
|
212
|
+
asyncio.ensure_future(coro).cancel()
|
|
213
|
+
|
|
214
|
+
stop_event.set()
|
|
215
|
+
return
|
|
216
|
+
|
|
217
|
+
try:
|
|
218
|
+
base_output = runner.convert(base_output, to_type=str)
|
|
219
|
+
except ValueError:
|
|
220
|
+
pass
|
|
221
|
+
|
|
222
|
+
# if base_output is a pydantic model dump it to json
|
|
223
|
+
if isinstance(base_output, BaseModel):
|
|
224
|
+
output = base_output.model_dump_json(indent=2)
|
|
225
|
+
else:
|
|
226
|
+
m = jsonpath_expr.find(base_output)
|
|
227
|
+
if (not m):
|
|
228
|
+
raise RuntimeError(
|
|
229
|
+
f"Failed to extract output using jsonpath: {self.config.result_json_path}")
|
|
230
|
+
if (len(m) > 1):
|
|
231
|
+
logger.warning(
|
|
232
|
+
"Multiple matches found for jsonpath at row '%s'. Matches: %s. Using the first",
|
|
233
|
+
base_output,
|
|
234
|
+
m)
|
|
235
|
+
output = m[0].value
|
|
236
|
+
|
|
237
|
+
item.output_obj = output
|
|
238
|
+
item.trajectory = self.intermediate_step_adapter.validate_intermediate_steps(intermediate_steps)
|
|
239
|
+
usage_stats_item = self._compute_usage_stats(item)
|
|
240
|
+
|
|
241
|
+
self.weave_eval.log_prediction(item, output)
|
|
242
|
+
await self.weave_eval.log_usage_stats(item, usage_stats_item)
|
|
228
243
|
|
|
229
244
|
async def wrapped_run(item: EvalInputItem) -> None:
|
|
230
245
|
await run_one(item)
|
|
@@ -329,10 +344,99 @@ class EvaluationRun:
|
|
|
329
344
|
except Exception as e:
|
|
330
345
|
logger.exception("Failed to delete old job directory: %s: %s", dir_to_delete, e)
|
|
331
346
|
|
|
347
|
+
def write_configuration(self) -> None:
|
|
348
|
+
"""Save the configuration used for this evaluation run to the output directory.
|
|
349
|
+
|
|
350
|
+
This saves three files:
|
|
351
|
+
1. config_original.yml - The original configuration file
|
|
352
|
+
2. config_effective.yml - The configuration with all overrides applied
|
|
353
|
+
3. config_metadata.json - Metadata about the evaluation run and overrides
|
|
354
|
+
"""
|
|
355
|
+
output_dir = self.eval_config.general.output_dir
|
|
356
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
357
|
+
|
|
358
|
+
try:
|
|
359
|
+
# 1. Save original configuration
|
|
360
|
+
config_original_file = output_dir / "config_original.yml"
|
|
361
|
+
if isinstance(self.config.config_file, Path):
|
|
362
|
+
# Copy original file if it exists
|
|
363
|
+
if self.config.config_file.exists():
|
|
364
|
+
shutil.copy2(self.config.config_file, config_original_file)
|
|
365
|
+
self.config_original_file = config_original_file
|
|
366
|
+
logger.info("Original config file copied to %s", config_original_file)
|
|
367
|
+
else:
|
|
368
|
+
logger.warning("Original config file not found at %s", self.config.config_file)
|
|
369
|
+
elif isinstance(self.config.config_file, BaseModel):
|
|
370
|
+
# Serialize programmatic config, using mode='json' to handle special types like timedelta
|
|
371
|
+
config_dict = self.config.config_file.model_dump(mode='json')
|
|
372
|
+
with open(config_original_file, "w", encoding="utf-8") as f:
|
|
373
|
+
yaml.safe_dump(config_dict, f, default_flow_style=False, sort_keys=False)
|
|
374
|
+
self.config_original_file = config_original_file
|
|
375
|
+
logger.info("Programmatic config saved to %s", config_original_file)
|
|
376
|
+
|
|
377
|
+
# 2. Save effective configuration (with overrides applied)
|
|
378
|
+
config_effective_file = output_dir / "config_effective.yml"
|
|
379
|
+
if self.effective_config is not None:
|
|
380
|
+
effective_config_dict = self.effective_config.model_dump(mode='json') if self.effective_config else {}
|
|
381
|
+
with open(config_effective_file, "w", encoding="utf-8") as f:
|
|
382
|
+
yaml.safe_dump(effective_config_dict, f, default_flow_style=False, sort_keys=False)
|
|
383
|
+
self.config_effective_file = config_effective_file
|
|
384
|
+
logger.info("Effective config (with overrides) saved to %s", config_effective_file)
|
|
385
|
+
else:
|
|
386
|
+
logger.warning("Effective config not available, skipping config_effective.yml")
|
|
387
|
+
|
|
388
|
+
# 3. Save metadata about the run
|
|
389
|
+
config_metadata_file = output_dir / "config_metadata.json"
|
|
390
|
+
metadata = {
|
|
391
|
+
"config_file":
|
|
392
|
+
str(self.config.config_file),
|
|
393
|
+
"config_file_type":
|
|
394
|
+
"Path" if isinstance(self.config.config_file, Path) else "BaseModel",
|
|
395
|
+
"overrides": [{
|
|
396
|
+
"path": path, "value": value
|
|
397
|
+
} for path, value in self.config.override] if self.config.override else [],
|
|
398
|
+
"dataset":
|
|
399
|
+
self.config.dataset,
|
|
400
|
+
"result_json_path":
|
|
401
|
+
self.config.result_json_path,
|
|
402
|
+
"skip_workflow":
|
|
403
|
+
self.config.skip_workflow,
|
|
404
|
+
"skip_completed_entries":
|
|
405
|
+
self.config.skip_completed_entries,
|
|
406
|
+
"reps":
|
|
407
|
+
self.config.reps,
|
|
408
|
+
"endpoint":
|
|
409
|
+
self.config.endpoint,
|
|
410
|
+
"endpoint_timeout":
|
|
411
|
+
self.config.endpoint_timeout,
|
|
412
|
+
"adjust_dataset_size":
|
|
413
|
+
self.config.adjust_dataset_size,
|
|
414
|
+
"num_passes":
|
|
415
|
+
self.config.num_passes,
|
|
416
|
+
"export_timeout":
|
|
417
|
+
self.config.export_timeout,
|
|
418
|
+
"user_id":
|
|
419
|
+
self.config.user_id,
|
|
420
|
+
"timestamp":
|
|
421
|
+
datetime.now(tz=UTC).isoformat(),
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
with open(config_metadata_file, "w", encoding="utf-8") as f:
|
|
425
|
+
json.dump(metadata, f, indent=2)
|
|
426
|
+
self.config_metadata_file = config_metadata_file
|
|
427
|
+
logger.info("Configuration metadata saved to %s", config_metadata_file)
|
|
428
|
+
|
|
429
|
+
except Exception:
|
|
430
|
+
logger.exception("Failed to write configuration files")
|
|
431
|
+
# Don't raise - this is not critical enough to fail the entire evaluation
|
|
432
|
+
|
|
332
433
|
def write_output(self, dataset_handler: DatasetHandler, profiler_results: ProfilerResults):
|
|
333
434
|
workflow_output_file = self.eval_config.general.output_dir / "workflow_output.json"
|
|
334
435
|
workflow_output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
335
436
|
|
|
437
|
+
# Write the configuration files (original, effective, and metadata)
|
|
438
|
+
self.write_configuration()
|
|
439
|
+
|
|
336
440
|
# Write the workflow output to a file (this can be used for re-running the evaluation)
|
|
337
441
|
|
|
338
442
|
step_filter = self.eval_config.general.output.workflow_output_step_filter \
|
|
@@ -459,7 +563,7 @@ class EvaluationRun:
|
|
|
459
563
|
from nat.runtime.loader import load_config
|
|
460
564
|
|
|
461
565
|
# Load and override the config
|
|
462
|
-
config = None
|
|
566
|
+
config: Config | None = None
|
|
463
567
|
if isinstance(self.config.config_file, BaseModel):
|
|
464
568
|
config = self.config.config_file
|
|
465
569
|
elif self.config.override:
|
|
@@ -467,6 +571,8 @@ class EvaluationRun:
|
|
|
467
571
|
else:
|
|
468
572
|
config = load_config(self.config.config_file)
|
|
469
573
|
|
|
574
|
+
# Store the effective configuration for later saving to output directory
|
|
575
|
+
self.effective_config = config
|
|
470
576
|
self.eval_config = config.eval
|
|
471
577
|
workflow_alias = self._get_workflow_alias(config.workflow.type)
|
|
472
578
|
logger.debug("Loaded %s evaluation configuration: %s", workflow_alias, self.eval_config)
|
|
@@ -498,7 +604,10 @@ class EvaluationRun:
|
|
|
498
604
|
eval_input=EvalInput(eval_input_items=[]),
|
|
499
605
|
evaluation_results=[],
|
|
500
606
|
usage_stats=UsageStats(),
|
|
501
|
-
profiler_results=ProfilerResults()
|
|
607
|
+
profiler_results=ProfilerResults(),
|
|
608
|
+
config_original_file=self.config_original_file,
|
|
609
|
+
config_effective_file=self.config_effective_file,
|
|
610
|
+
config_metadata_file=self.config_metadata_file)
|
|
502
611
|
|
|
503
612
|
custom_pre_eval_process_function = self.eval_config.general.output.custom_pre_eval_process_function \
|
|
504
613
|
if self.eval_config.general.output else None
|
|
@@ -517,7 +626,25 @@ class EvaluationRun:
|
|
|
517
626
|
eval_input=self.eval_input,
|
|
518
627
|
evaluation_results=self.evaluation_results,
|
|
519
628
|
usage_stats=self.usage_stats,
|
|
520
|
-
profiler_results=ProfilerResults()
|
|
629
|
+
profiler_results=ProfilerResults(),
|
|
630
|
+
config_original_file=self.config_original_file,
|
|
631
|
+
config_effective_file=self.config_effective_file,
|
|
632
|
+
config_metadata_file=self.config_metadata_file)
|
|
633
|
+
|
|
634
|
+
# Validate LLM endpoints before running evaluation (opt-in via config)
|
|
635
|
+
if (not self.config.skip_workflow and not self.config.endpoint and config.eval.general.validate_llm_endpoints):
|
|
636
|
+
try:
|
|
637
|
+
logger.info("Validating LLM endpoints before evaluation (enabled via config)...")
|
|
638
|
+
await validate_llm_endpoints(config)
|
|
639
|
+
except RuntimeError as e:
|
|
640
|
+
# Critical validation errors (404, connection failures) - fail fast
|
|
641
|
+
logger.error("LLM endpoint validation failed: %s", e)
|
|
642
|
+
raise
|
|
643
|
+
except Exception as e:
|
|
644
|
+
# Non-critical errors (missing packages, config issues) - warn but continue
|
|
645
|
+
logger.warning("LLM endpoint validation incomplete: %s. Continuing with evaluation...",
|
|
646
|
+
e,
|
|
647
|
+
exc_info=True)
|
|
521
648
|
|
|
522
649
|
# Run workflow and evaluate
|
|
523
650
|
async with WorkflowEvalBuilder.from_config(config=config) as eval_workflow:
|
|
@@ -526,25 +653,32 @@ class EvaluationRun:
|
|
|
526
653
|
|
|
527
654
|
with self.eval_trace_context.evaluation_context():
|
|
528
655
|
# Run workflow
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
session_manager
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
await self.
|
|
656
|
+
local_session_manager: SessionManager | None = None
|
|
657
|
+
try:
|
|
658
|
+
if self.config.endpoint:
|
|
659
|
+
await self.run_workflow_remote()
|
|
660
|
+
elif not self.config.skip_workflow:
|
|
661
|
+
if session_manager is None:
|
|
662
|
+
session_manager = await SessionManager.create(
|
|
663
|
+
config=config,
|
|
664
|
+
shared_builder=eval_workflow,
|
|
665
|
+
max_concurrency=self.eval_config.general.max_concurrency)
|
|
666
|
+
local_session_manager = session_manager
|
|
667
|
+
await self.run_workflow_local(session_manager)
|
|
668
|
+
|
|
669
|
+
# Pre-evaluation process the workflow output
|
|
670
|
+
self.eval_input = dataset_handler.pre_eval_process_eval_input(self.eval_input)
|
|
671
|
+
|
|
672
|
+
# Evaluate
|
|
673
|
+
evaluators = {name: eval_workflow.get_evaluator(name) for name in self.eval_config.evaluators}
|
|
674
|
+
await self.run_evaluators(evaluators)
|
|
675
|
+
|
|
676
|
+
# Wait for all trace export tasks to complete (local workflows only)
|
|
677
|
+
if session_manager and not self.config.endpoint:
|
|
678
|
+
await self.wait_for_all_export_tasks_local(session_manager, timeout=self.config.export_timeout)
|
|
679
|
+
finally:
|
|
680
|
+
if local_session_manager is not None:
|
|
681
|
+
await local_session_manager.shutdown()
|
|
548
682
|
|
|
549
683
|
# Profile the workflow
|
|
550
684
|
profiler_results = await self.profile_workflow()
|
|
@@ -572,4 +706,7 @@ class EvaluationRun:
|
|
|
572
706
|
eval_input=self.eval_input,
|
|
573
707
|
evaluation_results=self.evaluation_results,
|
|
574
708
|
usage_stats=self.usage_stats,
|
|
575
|
-
profiler_results=profiler_results
|
|
709
|
+
profiler_results=profiler_results,
|
|
710
|
+
config_original_file=self.config_original_file,
|
|
711
|
+
config_effective_file=self.config_effective_file,
|
|
712
|
+
config_metadata_file=self.config_metadata_file)
|
nat/eval/evaluator/__init__.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -34,7 +34,7 @@ class BaseEvaluator(ABC):
|
|
|
34
34
|
**Experimental Feature**: The Evaluation API is experimental and may change in future releases.
|
|
35
35
|
Future versions may introduce breaking changes without notice.
|
|
36
36
|
|
|
37
|
-
Each custom evaluator must implement the
|
|
37
|
+
Each custom evaluator must implement the ``evaluate_item`` method which is used to evaluate a
|
|
38
38
|
single EvalInputItem.
|
|
39
39
|
"""
|
|
40
40
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
import typing
|
|
17
17
|
|
|
18
18
|
from pydantic import BaseModel
|
|
19
|
+
from pydantic import SerializeAsAny
|
|
19
20
|
|
|
20
21
|
from nat.data_models.intermediate_step import IntermediateStep
|
|
21
22
|
|
|
@@ -55,4 +56,4 @@ class EvalOutputItem(BaseModel):
|
|
|
55
56
|
|
|
56
57
|
class EvalOutput(BaseModel):
|
|
57
58
|
average_score: typing.Any # float or any serializable type
|
|
58
|
-
eval_output_items: list[EvalOutputItem]
|
|
59
|
+
eval_output_items: list[SerializeAsAny[EvalOutputItem]]
|