nvidia-nat 1.4.0a20251112__py3-none-any.whl → 1.4.0a20260113__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiq/__init__.py +1 -1
- nat/{front_ends/mcp → agent/auto_memory_wrapper}/__init__.py +1 -1
- nat/agent/auto_memory_wrapper/agent.py +278 -0
- nat/agent/auto_memory_wrapper/register.py +227 -0
- nat/agent/auto_memory_wrapper/state.py +30 -0
- nat/agent/base.py +1 -1
- nat/agent/dual_node.py +1 -1
- nat/agent/prompt_optimizer/prompt.py +1 -1
- nat/agent/prompt_optimizer/register.py +1 -1
- nat/agent/react_agent/agent.py +16 -9
- nat/agent/react_agent/output_parser.py +2 -2
- nat/agent/react_agent/prompt.py +3 -2
- nat/agent/react_agent/register.py +2 -2
- nat/agent/react_agent/register_per_user_agent.py +104 -0
- nat/agent/reasoning_agent/reasoning_agent.py +1 -1
- nat/agent/register.py +3 -1
- nat/agent/responses_api_agent/__init__.py +1 -1
- nat/agent/responses_api_agent/register.py +1 -1
- nat/agent/rewoo_agent/agent.py +9 -4
- nat/agent/rewoo_agent/prompt.py +1 -1
- nat/agent/rewoo_agent/register.py +1 -1
- nat/agent/tool_calling_agent/agent.py +5 -4
- nat/agent/tool_calling_agent/register.py +1 -1
- nat/authentication/__init__.py +1 -1
- nat/authentication/api_key/__init__.py +1 -1
- nat/authentication/api_key/api_key_auth_provider.py +1 -1
- nat/authentication/api_key/api_key_auth_provider_config.py +22 -7
- nat/authentication/api_key/register.py +1 -1
- nat/authentication/credential_validator/__init__.py +1 -1
- nat/authentication/credential_validator/bearer_token_validator.py +1 -1
- nat/authentication/exceptions/__init__.py +1 -1
- nat/authentication/exceptions/api_key_exceptions.py +1 -1
- nat/authentication/http_basic_auth/http_basic_auth_provider.py +1 -1
- nat/authentication/http_basic_auth/register.py +1 -1
- nat/authentication/interfaces.py +1 -1
- nat/authentication/oauth2/__init__.py +1 -1
- nat/authentication/oauth2/oauth2_auth_code_flow_provider.py +1 -1
- nat/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +1 -1
- nat/authentication/oauth2/oauth2_resource_server_config.py +1 -1
- nat/authentication/oauth2/register.py +1 -1
- nat/authentication/register.py +1 -1
- nat/builder/builder.py +563 -1
- nat/builder/child_builder.py +385 -0
- nat/builder/component_utils.py +34 -4
- nat/builder/context.py +34 -1
- nat/builder/embedder.py +1 -1
- nat/builder/eval_builder.py +19 -7
- nat/builder/evaluator.py +1 -1
- nat/builder/framework_enum.py +3 -1
- nat/builder/front_end.py +1 -1
- nat/builder/function.py +113 -5
- nat/builder/function_base.py +1 -1
- nat/builder/function_info.py +1 -1
- nat/builder/intermediate_step_manager.py +1 -1
- nat/builder/llm.py +1 -1
- nat/builder/per_user_workflow_builder.py +843 -0
- nat/builder/retriever.py +1 -1
- nat/builder/sync_builder.py +571 -0
- nat/builder/user_interaction_manager.py +1 -1
- nat/builder/workflow.py +5 -3
- nat/builder/workflow_builder.py +619 -378
- nat/cli/__init__.py +1 -1
- nat/cli/cli_utils/config_override.py +1 -1
- nat/cli/cli_utils/validation.py +32 -1
- nat/cli/commands/configure/channel/add.py +1 -1
- nat/cli/commands/configure/channel/channel.py +1 -1
- nat/cli/commands/configure/channel/remove.py +1 -1
- nat/cli/commands/configure/channel/update.py +1 -1
- nat/cli/commands/configure/configure.py +1 -1
- nat/cli/commands/evaluate.py +87 -13
- nat/cli/commands/finetune.py +132 -0
- nat/cli/commands/info/__init__.py +1 -1
- nat/cli/commands/info/info.py +1 -1
- nat/cli/commands/info/list_channels.py +1 -1
- nat/cli/commands/info/list_components.py +1 -1
- nat/cli/commands/object_store/__init__.py +1 -1
- nat/cli/commands/object_store/object_store.py +1 -1
- nat/cli/commands/optimize.py +1 -1
- nat/cli/commands/{mcp → red_teaming}/__init__.py +1 -1
- nat/cli/commands/red_teaming/red_teaming.py +138 -0
- nat/cli/commands/red_teaming/red_teaming_utils.py +73 -0
- nat/cli/commands/registry/__init__.py +1 -1
- nat/cli/commands/registry/publish.py +1 -1
- nat/cli/commands/registry/pull.py +1 -1
- nat/cli/commands/registry/registry.py +1 -1
- nat/cli/commands/registry/remove.py +1 -1
- nat/cli/commands/registry/search.py +1 -1
- nat/cli/commands/sizing/__init__.py +1 -1
- nat/cli/commands/sizing/calc.py +1 -1
- nat/cli/commands/sizing/sizing.py +1 -1
- nat/cli/commands/start.py +1 -1
- nat/cli/commands/uninstall.py +1 -1
- nat/cli/commands/validate.py +1 -1
- nat/cli/commands/workflow/__init__.py +1 -1
- nat/cli/commands/workflow/workflow.py +1 -1
- nat/cli/commands/workflow/workflow_commands.py +3 -2
- nat/cli/entrypoint.py +15 -37
- nat/cli/main.py +2 -2
- nat/cli/plugin_loader.py +69 -0
- nat/cli/register_workflow.py +233 -5
- nat/cli/type_registry.py +237 -3
- nat/control_flow/register.py +1 -1
- nat/control_flow/router_agent/agent.py +1 -1
- nat/control_flow/router_agent/prompt.py +1 -1
- nat/control_flow/router_agent/register.py +1 -1
- nat/control_flow/sequential_executor.py +28 -7
- nat/data_models/__init__.py +1 -1
- nat/data_models/agent.py +1 -1
- nat/data_models/api_server.py +38 -3
- nat/data_models/authentication.py +1 -1
- nat/data_models/common.py +1 -1
- nat/data_models/component.py +9 -1
- nat/data_models/component_ref.py +45 -1
- nat/data_models/config.py +78 -1
- nat/data_models/dataset_handler.py +15 -2
- nat/data_models/discovery_metadata.py +1 -1
- nat/data_models/embedder.py +1 -1
- nat/data_models/evaluate.py +6 -1
- nat/data_models/evaluator.py +1 -1
- nat/data_models/finetuning.py +260 -0
- nat/data_models/front_end.py +1 -1
- nat/data_models/function.py +15 -2
- nat/data_models/function_dependencies.py +1 -1
- nat/data_models/gated_field_mixin.py +1 -1
- nat/data_models/interactive.py +1 -1
- nat/data_models/intermediate_step.py +29 -2
- nat/data_models/invocation_node.py +1 -1
- nat/data_models/llm.py +1 -1
- nat/data_models/logging.py +1 -1
- nat/data_models/memory.py +1 -1
- nat/data_models/middleware.py +37 -0
- nat/data_models/object_store.py +1 -1
- nat/data_models/openai_mcp.py +1 -1
- nat/data_models/optimizable.py +1 -1
- nat/data_models/optimizer.py +1 -1
- nat/data_models/profiler.py +1 -1
- nat/data_models/registry_handler.py +1 -1
- nat/data_models/retriever.py +1 -1
- nat/data_models/retry_mixin.py +1 -1
- nat/data_models/runtime_enum.py +26 -0
- nat/data_models/span.py +1 -1
- nat/data_models/step_adaptor.py +1 -1
- nat/data_models/streaming.py +1 -1
- nat/data_models/swe_bench_model.py +1 -1
- nat/data_models/telemetry_exporter.py +1 -1
- nat/data_models/thinking_mixin.py +1 -1
- nat/data_models/ttc_strategy.py +1 -1
- nat/embedder/azure_openai_embedder.py +1 -1
- nat/embedder/nim_embedder.py +1 -1
- nat/embedder/openai_embedder.py +1 -1
- nat/embedder/register.py +1 -1
- nat/eval/__init__.py +1 -1
- nat/eval/config.py +8 -1
- nat/eval/dataset_handler/dataset_downloader.py +1 -1
- nat/eval/dataset_handler/dataset_filter.py +1 -1
- nat/eval/dataset_handler/dataset_handler.py +4 -2
- nat/eval/evaluate.py +226 -81
- nat/eval/evaluator/__init__.py +1 -1
- nat/eval/evaluator/base_evaluator.py +2 -2
- nat/eval/evaluator/evaluator_model.py +3 -2
- nat/eval/intermediate_step_adapter.py +1 -1
- nat/eval/llm_validator.py +336 -0
- nat/eval/rag_evaluator/evaluate.py +17 -10
- nat/eval/rag_evaluator/register.py +1 -1
- nat/eval/red_teaming_evaluator/__init__.py +14 -0
- nat/eval/red_teaming_evaluator/data_models.py +66 -0
- nat/eval/red_teaming_evaluator/evaluate.py +327 -0
- nat/eval/red_teaming_evaluator/filter_conditions.py +75 -0
- nat/eval/red_teaming_evaluator/register.py +55 -0
- nat/eval/register.py +2 -1
- nat/eval/remote_workflow.py +1 -1
- nat/eval/runners/__init__.py +1 -1
- nat/eval/runners/config.py +1 -1
- nat/eval/runners/multi_eval_runner.py +1 -1
- nat/eval/runners/red_teaming_runner/__init__.py +24 -0
- nat/eval/runners/red_teaming_runner/config.py +282 -0
- nat/eval/runners/red_teaming_runner/report_utils.py +707 -0
- nat/eval/runners/red_teaming_runner/runner.py +867 -0
- nat/eval/runtime_evaluator/__init__.py +1 -1
- nat/eval/runtime_evaluator/evaluate.py +1 -1
- nat/eval/runtime_evaluator/register.py +1 -1
- nat/eval/runtime_event_subscriber.py +1 -1
- nat/eval/swe_bench_evaluator/evaluate.py +1 -1
- nat/eval/swe_bench_evaluator/register.py +1 -1
- nat/eval/trajectory_evaluator/evaluate.py +2 -2
- nat/eval/trajectory_evaluator/register.py +1 -1
- nat/eval/tunable_rag_evaluator/evaluate.py +5 -5
- nat/eval/tunable_rag_evaluator/register.py +1 -1
- nat/eval/usage_stats.py +1 -1
- nat/eval/utils/eval_trace_ctx.py +1 -1
- nat/eval/utils/output_uploader.py +1 -1
- nat/eval/utils/tqdm_position_registry.py +1 -1
- nat/eval/utils/weave_eval.py +1 -1
- nat/experimental/decorators/experimental_warning_decorator.py +1 -1
- nat/experimental/test_time_compute/editing/iterative_plan_refinement_editor.py +1 -1
- nat/experimental/test_time_compute/editing/llm_as_a_judge_editor.py +1 -1
- nat/experimental/test_time_compute/editing/motivation_aware_summarization.py +1 -1
- nat/experimental/test_time_compute/functions/execute_score_select_function.py +1 -1
- nat/experimental/test_time_compute/functions/multi_llm_judge_function.py +88 -0
- nat/experimental/test_time_compute/functions/plan_select_execute_function.py +1 -1
- nat/experimental/test_time_compute/functions/ttc_tool_orchestration_function.py +1 -1
- nat/experimental/test_time_compute/functions/ttc_tool_wrapper_function.py +1 -1
- nat/experimental/test_time_compute/models/editor_config.py +1 -1
- nat/experimental/test_time_compute/models/scoring_config.py +1 -1
- nat/experimental/test_time_compute/models/search_config.py +20 -2
- nat/experimental/test_time_compute/models/selection_config.py +33 -2
- nat/experimental/test_time_compute/models/stage_enums.py +1 -1
- nat/experimental/test_time_compute/models/strategy_base.py +1 -1
- nat/experimental/test_time_compute/models/tool_use_config.py +1 -1
- nat/experimental/test_time_compute/models/ttc_item.py +1 -1
- nat/experimental/test_time_compute/register.py +4 -1
- nat/experimental/test_time_compute/scoring/llm_based_agent_scorer.py +1 -1
- nat/experimental/test_time_compute/scoring/llm_based_plan_scorer.py +1 -1
- nat/experimental/test_time_compute/scoring/motivation_aware_scorer.py +1 -1
- nat/experimental/test_time_compute/search/multi_llm_generation.py +115 -0
- nat/experimental/test_time_compute/search/multi_llm_planner.py +1 -1
- nat/experimental/test_time_compute/search/multi_query_retrieval_search.py +1 -1
- nat/experimental/test_time_compute/search/single_shot_multi_plan_planner.py +1 -1
- nat/experimental/test_time_compute/selection/best_of_n_selector.py +1 -1
- nat/experimental/test_time_compute/selection/llm_based_agent_output_selector.py +1 -1
- nat/experimental/test_time_compute/selection/llm_based_output_merging_selector.py +1 -1
- nat/experimental/test_time_compute/selection/llm_based_plan_selector.py +1 -1
- nat/experimental/test_time_compute/selection/llm_judge_selection.py +127 -0
- nat/experimental/test_time_compute/selection/threshold_selector.py +1 -1
- nat/finetuning/__init__.py +24 -0
- nat/finetuning/finetuning_runtime.py +143 -0
- nat/finetuning/interfaces/__init__.py +24 -0
- nat/finetuning/interfaces/finetuning_runner.py +261 -0
- nat/finetuning/interfaces/trainer_adapter.py +103 -0
- nat/finetuning/interfaces/trajectory_builder.py +115 -0
- nat/finetuning/utils/__init__.py +15 -0
- nat/finetuning/utils/parsers/__init__.py +15 -0
- nat/finetuning/utils/parsers/adk_parser.py +141 -0
- nat/finetuning/utils/parsers/base_parser.py +238 -0
- nat/finetuning/utils/parsers/common.py +91 -0
- nat/finetuning/utils/parsers/langchain_parser.py +267 -0
- nat/finetuning/utils/parsers/llama_index_parser.py +218 -0
- nat/front_ends/__init__.py +1 -1
- nat/front_ends/console/__init__.py +1 -1
- nat/front_ends/console/authentication_flow_handler.py +1 -1
- nat/front_ends/console/console_front_end_config.py +4 -1
- nat/front_ends/console/console_front_end_plugin.py +5 -4
- nat/front_ends/console/register.py +1 -1
- nat/front_ends/cron/__init__.py +1 -1
- nat/front_ends/fastapi/__init__.py +1 -1
- nat/front_ends/fastapi/async_job.py +128 -0
- nat/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +1 -1
- nat/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +13 -9
- nat/front_ends/fastapi/dask_client_mixin.py +1 -1
- nat/front_ends/fastapi/fastapi_front_end_config.py +23 -1
- nat/front_ends/fastapi/fastapi_front_end_controller.py +1 -1
- nat/front_ends/fastapi/fastapi_front_end_plugin.py +25 -30
- nat/front_ends/fastapi/fastapi_front_end_plugin_worker.py +318 -59
- nat/front_ends/fastapi/html_snippets/__init__.py +1 -1
- nat/front_ends/fastapi/html_snippets/auth_code_grant_success.py +1 -1
- nat/front_ends/fastapi/intermediate_steps_subscriber.py +12 -1
- nat/front_ends/fastapi/job_store.py +23 -11
- nat/front_ends/fastapi/main.py +1 -1
- nat/front_ends/fastapi/message_handler.py +27 -4
- nat/front_ends/fastapi/message_validator.py +54 -2
- nat/front_ends/fastapi/register.py +1 -1
- nat/front_ends/fastapi/response_helpers.py +16 -15
- nat/front_ends/fastapi/step_adaptor.py +1 -1
- nat/front_ends/fastapi/utils.py +1 -1
- nat/front_ends/register.py +1 -2
- nat/front_ends/simple_base/__init__.py +1 -1
- nat/front_ends/simple_base/simple_front_end_plugin_base.py +6 -4
- nat/llm/aws_bedrock_llm.py +1 -1
- nat/llm/azure_openai_llm.py +10 -1
- nat/llm/dynamo_llm.py +363 -0
- nat/llm/huggingface_llm.py +177 -0
- nat/llm/litellm_llm.py +1 -1
- nat/llm/nim_llm.py +1 -1
- nat/llm/openai_llm.py +1 -1
- nat/llm/register.py +3 -1
- nat/llm/utils/__init__.py +1 -1
- nat/llm/utils/env_config_value.py +1 -1
- nat/llm/utils/error.py +1 -1
- nat/llm/utils/thinking.py +1 -1
- nat/memory/__init__.py +1 -1
- nat/memory/interfaces.py +1 -1
- nat/memory/models.py +1 -1
- nat/meta/pypi.md +1 -1
- nat/middleware/__init__.py +35 -0
- nat/middleware/cache/__init__.py +14 -0
- nat/middleware/cache/cache_middleware.py +253 -0
- nat/middleware/cache/cache_middleware_config.py +44 -0
- nat/middleware/cache/register.py +33 -0
- nat/middleware/defense/__init__.py +14 -0
- nat/middleware/defense/defense_middleware.py +362 -0
- nat/middleware/defense/defense_middleware_content_guard.py +455 -0
- nat/middleware/defense/defense_middleware_data_models.py +91 -0
- nat/middleware/defense/defense_middleware_output_verifier.py +440 -0
- nat/middleware/defense/defense_middleware_pii.py +356 -0
- nat/middleware/defense/register.py +82 -0
- nat/middleware/dynamic/__init__.py +14 -0
- nat/middleware/dynamic/dynamic_function_middleware.py +962 -0
- nat/middleware/dynamic/dynamic_middleware_config.py +132 -0
- nat/middleware/dynamic/register.py +34 -0
- nat/middleware/function_middleware.py +370 -0
- nat/middleware/logging/__init__.py +14 -0
- nat/middleware/logging/logging_middleware.py +67 -0
- nat/middleware/logging/logging_middleware_config.py +28 -0
- nat/middleware/logging/register.py +33 -0
- nat/middleware/middleware.py +298 -0
- nat/middleware/red_teaming/__init__.py +14 -0
- nat/middleware/red_teaming/red_teaming_middleware.py +344 -0
- nat/middleware/red_teaming/red_teaming_middleware_config.py +112 -0
- nat/middleware/red_teaming/register.py +47 -0
- nat/middleware/register.py +22 -0
- nat/middleware/utils/__init__.py +14 -0
- nat/middleware/utils/workflow_inventory.py +155 -0
- nat/object_store/__init__.py +1 -1
- nat/object_store/in_memory_object_store.py +1 -1
- nat/object_store/interfaces.py +1 -1
- nat/object_store/models.py +1 -1
- nat/object_store/register.py +1 -1
- nat/observability/__init__.py +1 -1
- nat/observability/exporter/__init__.py +1 -1
- nat/observability/exporter/base_exporter.py +1 -1
- nat/observability/exporter/exporter.py +1 -1
- nat/observability/exporter/file_exporter.py +1 -1
- nat/observability/exporter/processing_exporter.py +1 -1
- nat/observability/exporter/raw_exporter.py +1 -1
- nat/observability/exporter/span_exporter.py +7 -1
- nat/observability/exporter_manager.py +1 -1
- nat/observability/mixin/__init__.py +1 -1
- nat/observability/mixin/batch_config_mixin.py +1 -1
- nat/observability/mixin/collector_config_mixin.py +1 -1
- nat/observability/mixin/file_mixin.py +1 -1
- nat/observability/mixin/file_mode.py +1 -1
- nat/observability/mixin/redaction_config_mixin.py +1 -1
- nat/observability/mixin/resource_conflict_mixin.py +1 -1
- nat/observability/mixin/serialize_mixin.py +1 -1
- nat/observability/mixin/tagging_config_mixin.py +1 -1
- nat/observability/mixin/type_introspection_mixin.py +1 -1
- nat/observability/processor/__init__.py +1 -1
- nat/observability/processor/batching_processor.py +1 -1
- nat/observability/processor/callback_processor.py +1 -1
- nat/observability/processor/falsy_batch_filter_processor.py +1 -1
- nat/observability/processor/intermediate_step_serializer.py +1 -1
- nat/observability/processor/processor.py +1 -1
- nat/observability/processor/processor_factory.py +1 -1
- nat/observability/processor/redaction/__init__.py +1 -1
- nat/observability/processor/redaction/contextual_redaction_processor.py +1 -1
- nat/observability/processor/redaction/contextual_span_redaction_processor.py +1 -1
- nat/observability/processor/redaction/redaction_processor.py +1 -1
- nat/observability/processor/redaction/span_header_redaction_processor.py +1 -1
- nat/observability/processor/span_tagging_processor.py +1 -1
- nat/observability/register.py +1 -1
- nat/observability/utils/__init__.py +1 -1
- nat/observability/utils/dict_utils.py +1 -1
- nat/observability/utils/time_utils.py +1 -1
- nat/profiler/calc/__init__.py +1 -1
- nat/profiler/calc/calc_runner.py +3 -3
- nat/profiler/calc/calculations.py +1 -1
- nat/profiler/calc/data_models.py +1 -1
- nat/profiler/calc/plot.py +30 -3
- nat/profiler/callbacks/agno_callback_handler.py +1 -1
- nat/profiler/callbacks/base_callback_class.py +1 -1
- nat/profiler/callbacks/langchain_callback_handler.py +33 -3
- nat/profiler/callbacks/llama_index_callback_handler.py +13 -10
- nat/profiler/callbacks/semantic_kernel_callback_handler.py +1 -1
- nat/profiler/callbacks/token_usage_base_model.py +1 -1
- nat/profiler/data_frame_row.py +1 -1
- nat/profiler/data_models.py +1 -1
- nat/profiler/decorators/framework_wrapper.py +32 -1
- nat/profiler/decorators/function_tracking.py +1 -1
- nat/profiler/forecasting/config.py +1 -1
- nat/profiler/forecasting/model_trainer.py +1 -1
- nat/profiler/forecasting/models/__init__.py +1 -1
- nat/profiler/forecasting/models/forecasting_base_model.py +1 -1
- nat/profiler/forecasting/models/linear_model.py +1 -1
- nat/profiler/forecasting/models/random_forest_regressor.py +1 -1
- nat/profiler/inference_metrics_model.py +1 -1
- nat/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +1 -1
- nat/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py +1 -1
- nat/profiler/inference_optimization/data_models.py +1 -1
- nat/profiler/inference_optimization/experimental/concurrency_spike_analysis.py +1 -1
- nat/profiler/inference_optimization/experimental/prefix_span_analysis.py +1 -1
- nat/profiler/inference_optimization/llm_metrics.py +1 -1
- nat/profiler/inference_optimization/prompt_caching.py +1 -1
- nat/profiler/inference_optimization/token_uniqueness.py +1 -1
- nat/profiler/inference_optimization/workflow_runtimes.py +1 -1
- nat/profiler/intermediate_property_adapter.py +1 -1
- nat/profiler/parameter_optimization/optimizable_utils.py +1 -1
- nat/profiler/parameter_optimization/optimizer_runtime.py +1 -1
- nat/profiler/parameter_optimization/parameter_optimizer.py +1 -1
- nat/profiler/parameter_optimization/parameter_selection.py +1 -1
- nat/profiler/parameter_optimization/pareto_visualizer.py +1 -1
- nat/profiler/parameter_optimization/prompt_optimizer.py +1 -1
- nat/profiler/parameter_optimization/update_helpers.py +1 -1
- nat/profiler/profile_runner.py +1 -1
- nat/profiler/utils.py +1 -1
- nat/registry_handlers/local/local_handler.py +1 -1
- nat/registry_handlers/local/register_local.py +1 -1
- nat/registry_handlers/metadata_factory.py +1 -1
- nat/registry_handlers/package_utils.py +1 -1
- nat/registry_handlers/pypi/pypi_handler.py +1 -1
- nat/registry_handlers/pypi/register_pypi.py +1 -1
- nat/registry_handlers/register.py +1 -1
- nat/registry_handlers/registry_handler_base.py +1 -1
- nat/registry_handlers/rest/register_rest.py +1 -1
- nat/registry_handlers/rest/rest_handler.py +1 -1
- nat/registry_handlers/schemas/headers.py +1 -1
- nat/registry_handlers/schemas/package.py +1 -1
- nat/registry_handlers/schemas/publish.py +1 -1
- nat/registry_handlers/schemas/pull.py +1 -1
- nat/registry_handlers/schemas/remove.py +1 -1
- nat/registry_handlers/schemas/search.py +1 -1
- nat/registry_handlers/schemas/status.py +1 -1
- nat/retriever/interface.py +1 -1
- nat/retriever/milvus/__init__.py +1 -1
- nat/retriever/milvus/register.py +12 -4
- nat/retriever/milvus/retriever.py +103 -41
- nat/retriever/models.py +1 -1
- nat/retriever/nemo_retriever/__init__.py +1 -1
- nat/retriever/nemo_retriever/register.py +1 -1
- nat/retriever/nemo_retriever/retriever.py +5 -5
- nat/retriever/register.py +1 -1
- nat/runtime/__init__.py +1 -1
- nat/runtime/loader.py +10 -3
- nat/runtime/metrics.py +180 -0
- nat/runtime/runner.py +13 -6
- nat/runtime/session.py +458 -32
- nat/runtime/user_metadata.py +1 -1
- nat/settings/global_settings.py +1 -1
- nat/tool/chat_completion.py +1 -1
- nat/tool/code_execution/README.md +1 -1
- nat/tool/code_execution/code_sandbox.py +2 -2
- nat/tool/code_execution/local_sandbox/Dockerfile.sandbox +1 -1
- nat/tool/code_execution/local_sandbox/__init__.py +1 -1
- nat/tool/code_execution/local_sandbox/local_sandbox_server.py +1 -1
- nat/tool/code_execution/local_sandbox/start_local_sandbox.sh +1 -1
- nat/tool/code_execution/register.py +1 -1
- nat/tool/code_execution/utils.py +1 -1
- nat/tool/datetime_tools.py +1 -1
- nat/tool/document_search.py +1 -1
- nat/tool/github_tools.py +1 -1
- nat/tool/memory_tools/add_memory_tool.py +1 -1
- nat/tool/memory_tools/delete_memory_tool.py +1 -1
- nat/tool/memory_tools/get_memory_tool.py +1 -1
- nat/tool/nvidia_rag.py +2 -2
- nat/tool/register.py +1 -1
- nat/tool/retriever.py +1 -1
- nat/tool/server_tools.py +1 -1
- nat/utils/__init__.py +8 -5
- nat/utils/callable_utils.py +1 -1
- nat/utils/data_models/schema_validator.py +1 -1
- nat/utils/debugging_utils.py +1 -1
- nat/utils/decorators.py +1 -1
- nat/utils/dump_distro_mapping.py +1 -1
- nat/utils/exception_handlers/automatic_retries.py +3 -3
- nat/utils/exception_handlers/schemas.py +1 -1
- nat/utils/io/model_processing.py +1 -1
- nat/utils/io/supress_logs.py +33 -0
- nat/utils/io/yaml_tools.py +1 -1
- nat/utils/log_levels.py +1 -1
- nat/utils/log_utils.py +13 -1
- nat/utils/metadata_utils.py +1 -1
- nat/utils/optional_imports.py +1 -1
- nat/utils/producer_consumer_queue.py +1 -1
- nat/utils/reactive/base/observable_base.py +1 -1
- nat/utils/reactive/base/observer_base.py +1 -1
- nat/utils/reactive/base/subject_base.py +1 -1
- nat/utils/reactive/observable.py +1 -1
- nat/utils/reactive/observer.py +1 -1
- nat/utils/reactive/subject.py +1 -1
- nat/utils/reactive/subscription.py +1 -1
- nat/utils/responses_api.py +1 -1
- nat/utils/settings/global_settings.py +1 -1
- nat/utils/string_utils.py +1 -1
- nat/utils/type_converter.py +18 -5
- nat/utils/type_utils.py +1 -1
- nat/utils/url_utils.py +1 -1
- {nvidia_nat-1.4.0a20251112.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/METADATA +46 -15
- nvidia_nat-1.4.0a20260113.dist-info/RECORD +547 -0
- nvidia_nat-1.4.0a20260113.dist-info/entry_points.txt +38 -0
- nat/cli/commands/mcp/mcp.py +0 -986
- nat/front_ends/mcp/introspection_token_verifier.py +0 -73
- nat/front_ends/mcp/mcp_front_end_config.py +0 -109
- nat/front_ends/mcp/mcp_front_end_plugin.py +0 -151
- nat/front_ends/mcp/mcp_front_end_plugin_worker.py +0 -362
- nat/front_ends/mcp/memory_profiler.py +0 -320
- nat/front_ends/mcp/register.py +0 -27
- nat/front_ends/mcp/tool_converter.py +0 -321
- nvidia_nat-1.4.0a20251112.dist-info/RECORD +0 -481
- nvidia_nat-1.4.0a20251112.dist-info/entry_points.txt +0 -22
- {nvidia_nat-1.4.0a20251112.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/WHEEL +0 -0
- {nvidia_nat-1.4.0a20251112.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
- {nvidia_nat-1.4.0a20251112.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/licenses/LICENSE.md +0 -0
- {nvidia_nat-1.4.0a20251112.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/top_level.txt +0 -0
nat/eval/evaluate.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -14,23 +14,31 @@
|
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
|
|
16
16
|
import asyncio
|
|
17
|
+
import json
|
|
17
18
|
import logging
|
|
18
19
|
import shutil
|
|
20
|
+
import warnings
|
|
21
|
+
from datetime import UTC
|
|
22
|
+
from datetime import datetime
|
|
19
23
|
from pathlib import Path
|
|
20
24
|
from typing import Any
|
|
21
25
|
from uuid import uuid4
|
|
22
26
|
|
|
27
|
+
import yaml
|
|
23
28
|
from pydantic import BaseModel
|
|
24
29
|
from tqdm import tqdm
|
|
25
30
|
|
|
31
|
+
from nat.data_models.config import Config
|
|
26
32
|
from nat.data_models.evaluate import EvalConfig
|
|
27
33
|
from nat.data_models.evaluate import JobEvictionPolicy
|
|
34
|
+
from nat.data_models.runtime_enum import RuntimeTypeEnum
|
|
28
35
|
from nat.eval.config import EvaluationRunConfig
|
|
29
36
|
from nat.eval.config import EvaluationRunOutput
|
|
30
37
|
from nat.eval.dataset_handler.dataset_handler import DatasetHandler
|
|
31
38
|
from nat.eval.evaluator.evaluator_model import EvalInput
|
|
32
39
|
from nat.eval.evaluator.evaluator_model import EvalInputItem
|
|
33
40
|
from nat.eval.evaluator.evaluator_model import EvalOutput
|
|
41
|
+
from nat.eval.llm_validator import validate_llm_endpoints
|
|
34
42
|
from nat.eval.usage_stats import UsageStats
|
|
35
43
|
from nat.eval.usage_stats import UsageStatsItem
|
|
36
44
|
from nat.eval.usage_stats import UsageStatsLLM
|
|
@@ -60,6 +68,7 @@ class EvaluationRun:
|
|
|
60
68
|
# Run-specific configuration
|
|
61
69
|
self.config: EvaluationRunConfig = config
|
|
62
70
|
self.eval_config: EvalConfig | None = None
|
|
71
|
+
self.effective_config: Config | None = None # Stores the complete config after applying overrides
|
|
63
72
|
|
|
64
73
|
# Helpers
|
|
65
74
|
self.intermediate_step_adapter: IntermediateStepAdapter = IntermediateStepAdapter()
|
|
@@ -67,7 +76,13 @@ class EvaluationRun:
|
|
|
67
76
|
# Create evaluation trace context
|
|
68
77
|
try:
|
|
69
78
|
from nat.eval.utils.eval_trace_ctx import WeaveEvalTraceContext
|
|
70
|
-
|
|
79
|
+
with warnings.catch_warnings():
|
|
80
|
+
# Ignore deprecation warnings being triggered by weave. https://github.com/wandb/weave/issues/3666
|
|
81
|
+
warnings.filterwarnings("ignore",
|
|
82
|
+
category=DeprecationWarning,
|
|
83
|
+
message=r"`sentry_sdk\.Hub` is deprecated")
|
|
84
|
+
|
|
85
|
+
self.eval_trace_context = WeaveEvalTraceContext()
|
|
71
86
|
except Exception:
|
|
72
87
|
from nat.eval.utils.eval_trace_ctx import EvalTraceContext
|
|
73
88
|
self.eval_trace_context = EvalTraceContext()
|
|
@@ -89,6 +104,11 @@ class EvaluationRun:
|
|
|
89
104
|
# evaluation output files
|
|
90
105
|
self.evaluator_output_files: list[Path] = []
|
|
91
106
|
|
|
107
|
+
# configuration output files
|
|
108
|
+
self.config_original_file: Path | None = None
|
|
109
|
+
self.config_effective_file: Path | None = None
|
|
110
|
+
self.config_metadata_file: Path | None = None
|
|
111
|
+
|
|
92
112
|
def _compute_usage_stats(self, item: EvalInputItem):
|
|
93
113
|
"""Compute usage stats for a single item using the intermediate steps"""
|
|
94
114
|
# get the prompt and completion tokens from the intermediate steps
|
|
@@ -161,62 +181,65 @@ class EvaluationRun:
|
|
|
161
181
|
if stop_event.is_set():
|
|
162
182
|
return "", []
|
|
163
183
|
|
|
164
|
-
async with session_manager.
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
184
|
+
async with session_manager.session(user_id=self.config.user_id) as session:
|
|
185
|
+
async with session.run(item.input_obj, runtime_type=RuntimeTypeEnum.EVALUATE) as runner:
|
|
186
|
+
if not session.workflow.has_single_output:
|
|
187
|
+
# raise an error if the workflow has multiple outputs
|
|
188
|
+
raise NotImplementedError("Multiple outputs are not supported")
|
|
189
|
+
|
|
190
|
+
runner_result = None
|
|
191
|
+
intermediate_future = None
|
|
192
|
+
|
|
193
|
+
try:
|
|
194
|
+
# Start usage stats and intermediate steps collection in parallel
|
|
195
|
+
intermediate_future = pull_intermediate()
|
|
196
|
+
runner_result = runner.result()
|
|
197
|
+
base_output = await runner_result
|
|
198
|
+
intermediate_steps = await intermediate_future
|
|
199
|
+
except NotImplementedError as e:
|
|
200
|
+
logger.error("Failed to run the workflow: %s", e)
|
|
201
|
+
# raise original error
|
|
202
|
+
raise
|
|
203
|
+
except Exception as e:
|
|
204
|
+
logger.exception("Failed to run the workflow: %s", e)
|
|
205
|
+
# stop processing if a workflow error occurs
|
|
206
|
+
self.workflow_interrupted = True
|
|
207
|
+
|
|
208
|
+
# Cancel any coroutines that are still running, avoiding a warning about unawaited coroutines
|
|
209
|
+
# (typically one of these two is what raised the exception and the other is still running)
|
|
210
|
+
for coro in (runner_result, intermediate_future):
|
|
211
|
+
if coro is not None:
|
|
212
|
+
asyncio.ensure_future(coro).cancel()
|
|
213
|
+
|
|
214
|
+
stop_event.set()
|
|
215
|
+
return
|
|
216
|
+
|
|
217
|
+
try:
|
|
218
|
+
base_output = runner.convert(base_output, to_type=str)
|
|
219
|
+
except ValueError:
|
|
220
|
+
pass
|
|
221
|
+
|
|
222
|
+
# if base_output is a pydantic model dump it to json
|
|
223
|
+
if isinstance(base_output, BaseModel):
|
|
224
|
+
output = base_output.model_dump_json(indent=2)
|
|
225
|
+
else:
|
|
226
|
+
m = jsonpath_expr.find(base_output)
|
|
227
|
+
if (not m):
|
|
228
|
+
raise RuntimeError(
|
|
229
|
+
f"Failed to extract output using jsonpath: {self.config.result_json_path}")
|
|
230
|
+
if (len(m) > 1):
|
|
231
|
+
logger.warning(
|
|
232
|
+
"Multiple matches found for jsonpath at row '%s'. Matches: %s. Using the first",
|
|
233
|
+
base_output,
|
|
234
|
+
m)
|
|
235
|
+
output = m[0].value
|
|
236
|
+
|
|
237
|
+
item.output_obj = output
|
|
238
|
+
item.trajectory = self.intermediate_step_adapter.validate_intermediate_steps(intermediate_steps)
|
|
239
|
+
usage_stats_item = self._compute_usage_stats(item)
|
|
240
|
+
|
|
241
|
+
self.weave_eval.log_prediction(item, output)
|
|
242
|
+
await self.weave_eval.log_usage_stats(item, usage_stats_item)
|
|
220
243
|
|
|
221
244
|
async def wrapped_run(item: EvalInputItem) -> None:
|
|
222
245
|
await run_one(item)
|
|
@@ -321,10 +344,99 @@ class EvaluationRun:
|
|
|
321
344
|
except Exception as e:
|
|
322
345
|
logger.exception("Failed to delete old job directory: %s: %s", dir_to_delete, e)
|
|
323
346
|
|
|
347
|
+
def write_configuration(self) -> None:
|
|
348
|
+
"""Save the configuration used for this evaluation run to the output directory.
|
|
349
|
+
|
|
350
|
+
This saves three files:
|
|
351
|
+
1. config_original.yml - The original configuration file
|
|
352
|
+
2. config_effective.yml - The configuration with all overrides applied
|
|
353
|
+
3. config_metadata.json - Metadata about the evaluation run and overrides
|
|
354
|
+
"""
|
|
355
|
+
output_dir = self.eval_config.general.output_dir
|
|
356
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
357
|
+
|
|
358
|
+
try:
|
|
359
|
+
# 1. Save original configuration
|
|
360
|
+
config_original_file = output_dir / "config_original.yml"
|
|
361
|
+
if isinstance(self.config.config_file, Path):
|
|
362
|
+
# Copy original file if it exists
|
|
363
|
+
if self.config.config_file.exists():
|
|
364
|
+
shutil.copy2(self.config.config_file, config_original_file)
|
|
365
|
+
self.config_original_file = config_original_file
|
|
366
|
+
logger.info("Original config file copied to %s", config_original_file)
|
|
367
|
+
else:
|
|
368
|
+
logger.warning("Original config file not found at %s", self.config.config_file)
|
|
369
|
+
elif isinstance(self.config.config_file, BaseModel):
|
|
370
|
+
# Serialize programmatic config, using mode='json' to handle special types like timedelta
|
|
371
|
+
config_dict = self.config.config_file.model_dump(mode='json')
|
|
372
|
+
with open(config_original_file, "w", encoding="utf-8") as f:
|
|
373
|
+
yaml.safe_dump(config_dict, f, default_flow_style=False, sort_keys=False)
|
|
374
|
+
self.config_original_file = config_original_file
|
|
375
|
+
logger.info("Programmatic config saved to %s", config_original_file)
|
|
376
|
+
|
|
377
|
+
# 2. Save effective configuration (with overrides applied)
|
|
378
|
+
config_effective_file = output_dir / "config_effective.yml"
|
|
379
|
+
if self.effective_config is not None:
|
|
380
|
+
effective_config_dict = self.effective_config.model_dump(mode='json') if self.effective_config else {}
|
|
381
|
+
with open(config_effective_file, "w", encoding="utf-8") as f:
|
|
382
|
+
yaml.safe_dump(effective_config_dict, f, default_flow_style=False, sort_keys=False)
|
|
383
|
+
self.config_effective_file = config_effective_file
|
|
384
|
+
logger.info("Effective config (with overrides) saved to %s", config_effective_file)
|
|
385
|
+
else:
|
|
386
|
+
logger.warning("Effective config not available, skipping config_effective.yml")
|
|
387
|
+
|
|
388
|
+
# 3. Save metadata about the run
|
|
389
|
+
config_metadata_file = output_dir / "config_metadata.json"
|
|
390
|
+
metadata = {
|
|
391
|
+
"config_file":
|
|
392
|
+
str(self.config.config_file),
|
|
393
|
+
"config_file_type":
|
|
394
|
+
"Path" if isinstance(self.config.config_file, Path) else "BaseModel",
|
|
395
|
+
"overrides": [{
|
|
396
|
+
"path": path, "value": value
|
|
397
|
+
} for path, value in self.config.override] if self.config.override else [],
|
|
398
|
+
"dataset":
|
|
399
|
+
self.config.dataset,
|
|
400
|
+
"result_json_path":
|
|
401
|
+
self.config.result_json_path,
|
|
402
|
+
"skip_workflow":
|
|
403
|
+
self.config.skip_workflow,
|
|
404
|
+
"skip_completed_entries":
|
|
405
|
+
self.config.skip_completed_entries,
|
|
406
|
+
"reps":
|
|
407
|
+
self.config.reps,
|
|
408
|
+
"endpoint":
|
|
409
|
+
self.config.endpoint,
|
|
410
|
+
"endpoint_timeout":
|
|
411
|
+
self.config.endpoint_timeout,
|
|
412
|
+
"adjust_dataset_size":
|
|
413
|
+
self.config.adjust_dataset_size,
|
|
414
|
+
"num_passes":
|
|
415
|
+
self.config.num_passes,
|
|
416
|
+
"export_timeout":
|
|
417
|
+
self.config.export_timeout,
|
|
418
|
+
"user_id":
|
|
419
|
+
self.config.user_id,
|
|
420
|
+
"timestamp":
|
|
421
|
+
datetime.now(tz=UTC).isoformat(),
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
with open(config_metadata_file, "w", encoding="utf-8") as f:
|
|
425
|
+
json.dump(metadata, f, indent=2)
|
|
426
|
+
self.config_metadata_file = config_metadata_file
|
|
427
|
+
logger.info("Configuration metadata saved to %s", config_metadata_file)
|
|
428
|
+
|
|
429
|
+
except Exception:
|
|
430
|
+
logger.exception("Failed to write configuration files")
|
|
431
|
+
# Don't raise - this is not critical enough to fail the entire evaluation
|
|
432
|
+
|
|
324
433
|
def write_output(self, dataset_handler: DatasetHandler, profiler_results: ProfilerResults):
|
|
325
434
|
workflow_output_file = self.eval_config.general.output_dir / "workflow_output.json"
|
|
326
435
|
workflow_output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
327
436
|
|
|
437
|
+
# Write the configuration files (original, effective, and metadata)
|
|
438
|
+
self.write_configuration()
|
|
439
|
+
|
|
328
440
|
# Write the workflow output to a file (this can be used for re-running the evaluation)
|
|
329
441
|
|
|
330
442
|
step_filter = self.eval_config.general.output.workflow_output_step_filter \
|
|
@@ -451,7 +563,7 @@ class EvaluationRun:
|
|
|
451
563
|
from nat.runtime.loader import load_config
|
|
452
564
|
|
|
453
565
|
# Load and override the config
|
|
454
|
-
config = None
|
|
566
|
+
config: Config | None = None
|
|
455
567
|
if isinstance(self.config.config_file, BaseModel):
|
|
456
568
|
config = self.config.config_file
|
|
457
569
|
elif self.config.override:
|
|
@@ -459,6 +571,8 @@ class EvaluationRun:
|
|
|
459
571
|
else:
|
|
460
572
|
config = load_config(self.config.config_file)
|
|
461
573
|
|
|
574
|
+
# Store the effective configuration for later saving to output directory
|
|
575
|
+
self.effective_config = config
|
|
462
576
|
self.eval_config = config.eval
|
|
463
577
|
workflow_alias = self._get_workflow_alias(config.workflow.type)
|
|
464
578
|
logger.debug("Loaded %s evaluation configuration: %s", workflow_alias, self.eval_config)
|
|
@@ -490,7 +604,10 @@ class EvaluationRun:
|
|
|
490
604
|
eval_input=EvalInput(eval_input_items=[]),
|
|
491
605
|
evaluation_results=[],
|
|
492
606
|
usage_stats=UsageStats(),
|
|
493
|
-
profiler_results=ProfilerResults()
|
|
607
|
+
profiler_results=ProfilerResults(),
|
|
608
|
+
config_original_file=self.config_original_file,
|
|
609
|
+
config_effective_file=self.config_effective_file,
|
|
610
|
+
config_metadata_file=self.config_metadata_file)
|
|
494
611
|
|
|
495
612
|
custom_pre_eval_process_function = self.eval_config.general.output.custom_pre_eval_process_function \
|
|
496
613
|
if self.eval_config.general.output else None
|
|
@@ -509,7 +626,25 @@ class EvaluationRun:
|
|
|
509
626
|
eval_input=self.eval_input,
|
|
510
627
|
evaluation_results=self.evaluation_results,
|
|
511
628
|
usage_stats=self.usage_stats,
|
|
512
|
-
profiler_results=ProfilerResults()
|
|
629
|
+
profiler_results=ProfilerResults(),
|
|
630
|
+
config_original_file=self.config_original_file,
|
|
631
|
+
config_effective_file=self.config_effective_file,
|
|
632
|
+
config_metadata_file=self.config_metadata_file)
|
|
633
|
+
|
|
634
|
+
# Validate LLM endpoints before running evaluation (opt-in via config)
|
|
635
|
+
if (not self.config.skip_workflow and not self.config.endpoint and config.eval.general.validate_llm_endpoints):
|
|
636
|
+
try:
|
|
637
|
+
logger.info("Validating LLM endpoints before evaluation (enabled via config)...")
|
|
638
|
+
await validate_llm_endpoints(config)
|
|
639
|
+
except RuntimeError as e:
|
|
640
|
+
# Critical validation errors (404, connection failures) - fail fast
|
|
641
|
+
logger.error("LLM endpoint validation failed: %s", e)
|
|
642
|
+
raise
|
|
643
|
+
except Exception as e:
|
|
644
|
+
# Non-critical errors (missing packages, config issues) - warn but continue
|
|
645
|
+
logger.warning("LLM endpoint validation incomplete: %s. Continuing with evaluation...",
|
|
646
|
+
e,
|
|
647
|
+
exc_info=True)
|
|
513
648
|
|
|
514
649
|
# Run workflow and evaluate
|
|
515
650
|
async with WorkflowEvalBuilder.from_config(config=config) as eval_workflow:
|
|
@@ -518,25 +653,32 @@ class EvaluationRun:
|
|
|
518
653
|
|
|
519
654
|
with self.eval_trace_context.evaluation_context():
|
|
520
655
|
# Run workflow
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
session_manager
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
await self.
|
|
656
|
+
local_session_manager: SessionManager | None = None
|
|
657
|
+
try:
|
|
658
|
+
if self.config.endpoint:
|
|
659
|
+
await self.run_workflow_remote()
|
|
660
|
+
elif not self.config.skip_workflow:
|
|
661
|
+
if session_manager is None:
|
|
662
|
+
session_manager = await SessionManager.create(
|
|
663
|
+
config=config,
|
|
664
|
+
shared_builder=eval_workflow,
|
|
665
|
+
max_concurrency=self.eval_config.general.max_concurrency)
|
|
666
|
+
local_session_manager = session_manager
|
|
667
|
+
await self.run_workflow_local(session_manager)
|
|
668
|
+
|
|
669
|
+
# Pre-evaluation process the workflow output
|
|
670
|
+
self.eval_input = dataset_handler.pre_eval_process_eval_input(self.eval_input)
|
|
671
|
+
|
|
672
|
+
# Evaluate
|
|
673
|
+
evaluators = {name: eval_workflow.get_evaluator(name) for name in self.eval_config.evaluators}
|
|
674
|
+
await self.run_evaluators(evaluators)
|
|
675
|
+
|
|
676
|
+
# Wait for all trace export tasks to complete (local workflows only)
|
|
677
|
+
if session_manager and not self.config.endpoint:
|
|
678
|
+
await self.wait_for_all_export_tasks_local(session_manager, timeout=self.config.export_timeout)
|
|
679
|
+
finally:
|
|
680
|
+
if local_session_manager is not None:
|
|
681
|
+
await local_session_manager.shutdown()
|
|
540
682
|
|
|
541
683
|
# Profile the workflow
|
|
542
684
|
profiler_results = await self.profile_workflow()
|
|
@@ -564,4 +706,7 @@ class EvaluationRun:
|
|
|
564
706
|
eval_input=self.eval_input,
|
|
565
707
|
evaluation_results=self.evaluation_results,
|
|
566
708
|
usage_stats=self.usage_stats,
|
|
567
|
-
profiler_results=profiler_results
|
|
709
|
+
profiler_results=profiler_results,
|
|
710
|
+
config_original_file=self.config_original_file,
|
|
711
|
+
config_effective_file=self.config_effective_file,
|
|
712
|
+
config_metadata_file=self.config_metadata_file)
|
nat/eval/evaluator/__init__.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -34,7 +34,7 @@ class BaseEvaluator(ABC):
|
|
|
34
34
|
**Experimental Feature**: The Evaluation API is experimental and may change in future releases.
|
|
35
35
|
Future versions may introduce breaking changes without notice.
|
|
36
36
|
|
|
37
|
-
Each custom evaluator must implement the
|
|
37
|
+
Each custom evaluator must implement the ``evaluate_item`` method which is used to evaluate a
|
|
38
38
|
single EvalInputItem.
|
|
39
39
|
"""
|
|
40
40
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
import typing
|
|
17
17
|
|
|
18
18
|
from pydantic import BaseModel
|
|
19
|
+
from pydantic import SerializeAsAny
|
|
19
20
|
|
|
20
21
|
from nat.data_models.intermediate_step import IntermediateStep
|
|
21
22
|
|
|
@@ -55,4 +56,4 @@ class EvalOutputItem(BaseModel):
|
|
|
55
56
|
|
|
56
57
|
class EvalOutput(BaseModel):
|
|
57
58
|
average_score: typing.Any # float or any serializable type
|
|
58
|
-
eval_output_items: list[EvalOutputItem]
|
|
59
|
+
eval_output_items: list[SerializeAsAny[EvalOutputItem]]
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|