nvidia-nat 1.4.0a20251112__py3-none-any.whl → 1.4.0a20260113__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiq/__init__.py +1 -1
- nat/{front_ends/mcp → agent/auto_memory_wrapper}/__init__.py +1 -1
- nat/agent/auto_memory_wrapper/agent.py +278 -0
- nat/agent/auto_memory_wrapper/register.py +227 -0
- nat/agent/auto_memory_wrapper/state.py +30 -0
- nat/agent/base.py +1 -1
- nat/agent/dual_node.py +1 -1
- nat/agent/prompt_optimizer/prompt.py +1 -1
- nat/agent/prompt_optimizer/register.py +1 -1
- nat/agent/react_agent/agent.py +16 -9
- nat/agent/react_agent/output_parser.py +2 -2
- nat/agent/react_agent/prompt.py +3 -2
- nat/agent/react_agent/register.py +2 -2
- nat/agent/react_agent/register_per_user_agent.py +104 -0
- nat/agent/reasoning_agent/reasoning_agent.py +1 -1
- nat/agent/register.py +3 -1
- nat/agent/responses_api_agent/__init__.py +1 -1
- nat/agent/responses_api_agent/register.py +1 -1
- nat/agent/rewoo_agent/agent.py +9 -4
- nat/agent/rewoo_agent/prompt.py +1 -1
- nat/agent/rewoo_agent/register.py +1 -1
- nat/agent/tool_calling_agent/agent.py +5 -4
- nat/agent/tool_calling_agent/register.py +1 -1
- nat/authentication/__init__.py +1 -1
- nat/authentication/api_key/__init__.py +1 -1
- nat/authentication/api_key/api_key_auth_provider.py +1 -1
- nat/authentication/api_key/api_key_auth_provider_config.py +22 -7
- nat/authentication/api_key/register.py +1 -1
- nat/authentication/credential_validator/__init__.py +1 -1
- nat/authentication/credential_validator/bearer_token_validator.py +1 -1
- nat/authentication/exceptions/__init__.py +1 -1
- nat/authentication/exceptions/api_key_exceptions.py +1 -1
- nat/authentication/http_basic_auth/http_basic_auth_provider.py +1 -1
- nat/authentication/http_basic_auth/register.py +1 -1
- nat/authentication/interfaces.py +1 -1
- nat/authentication/oauth2/__init__.py +1 -1
- nat/authentication/oauth2/oauth2_auth_code_flow_provider.py +1 -1
- nat/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +1 -1
- nat/authentication/oauth2/oauth2_resource_server_config.py +1 -1
- nat/authentication/oauth2/register.py +1 -1
- nat/authentication/register.py +1 -1
- nat/builder/builder.py +563 -1
- nat/builder/child_builder.py +385 -0
- nat/builder/component_utils.py +34 -4
- nat/builder/context.py +34 -1
- nat/builder/embedder.py +1 -1
- nat/builder/eval_builder.py +19 -7
- nat/builder/evaluator.py +1 -1
- nat/builder/framework_enum.py +3 -1
- nat/builder/front_end.py +1 -1
- nat/builder/function.py +113 -5
- nat/builder/function_base.py +1 -1
- nat/builder/function_info.py +1 -1
- nat/builder/intermediate_step_manager.py +1 -1
- nat/builder/llm.py +1 -1
- nat/builder/per_user_workflow_builder.py +843 -0
- nat/builder/retriever.py +1 -1
- nat/builder/sync_builder.py +571 -0
- nat/builder/user_interaction_manager.py +1 -1
- nat/builder/workflow.py +5 -3
- nat/builder/workflow_builder.py +619 -378
- nat/cli/__init__.py +1 -1
- nat/cli/cli_utils/config_override.py +1 -1
- nat/cli/cli_utils/validation.py +32 -1
- nat/cli/commands/configure/channel/add.py +1 -1
- nat/cli/commands/configure/channel/channel.py +1 -1
- nat/cli/commands/configure/channel/remove.py +1 -1
- nat/cli/commands/configure/channel/update.py +1 -1
- nat/cli/commands/configure/configure.py +1 -1
- nat/cli/commands/evaluate.py +87 -13
- nat/cli/commands/finetune.py +132 -0
- nat/cli/commands/info/__init__.py +1 -1
- nat/cli/commands/info/info.py +1 -1
- nat/cli/commands/info/list_channels.py +1 -1
- nat/cli/commands/info/list_components.py +1 -1
- nat/cli/commands/object_store/__init__.py +1 -1
- nat/cli/commands/object_store/object_store.py +1 -1
- nat/cli/commands/optimize.py +1 -1
- nat/cli/commands/{mcp → red_teaming}/__init__.py +1 -1
- nat/cli/commands/red_teaming/red_teaming.py +138 -0
- nat/cli/commands/red_teaming/red_teaming_utils.py +73 -0
- nat/cli/commands/registry/__init__.py +1 -1
- nat/cli/commands/registry/publish.py +1 -1
- nat/cli/commands/registry/pull.py +1 -1
- nat/cli/commands/registry/registry.py +1 -1
- nat/cli/commands/registry/remove.py +1 -1
- nat/cli/commands/registry/search.py +1 -1
- nat/cli/commands/sizing/__init__.py +1 -1
- nat/cli/commands/sizing/calc.py +1 -1
- nat/cli/commands/sizing/sizing.py +1 -1
- nat/cli/commands/start.py +1 -1
- nat/cli/commands/uninstall.py +1 -1
- nat/cli/commands/validate.py +1 -1
- nat/cli/commands/workflow/__init__.py +1 -1
- nat/cli/commands/workflow/workflow.py +1 -1
- nat/cli/commands/workflow/workflow_commands.py +3 -2
- nat/cli/entrypoint.py +15 -37
- nat/cli/main.py +2 -2
- nat/cli/plugin_loader.py +69 -0
- nat/cli/register_workflow.py +233 -5
- nat/cli/type_registry.py +237 -3
- nat/control_flow/register.py +1 -1
- nat/control_flow/router_agent/agent.py +1 -1
- nat/control_flow/router_agent/prompt.py +1 -1
- nat/control_flow/router_agent/register.py +1 -1
- nat/control_flow/sequential_executor.py +28 -7
- nat/data_models/__init__.py +1 -1
- nat/data_models/agent.py +1 -1
- nat/data_models/api_server.py +38 -3
- nat/data_models/authentication.py +1 -1
- nat/data_models/common.py +1 -1
- nat/data_models/component.py +9 -1
- nat/data_models/component_ref.py +45 -1
- nat/data_models/config.py +78 -1
- nat/data_models/dataset_handler.py +15 -2
- nat/data_models/discovery_metadata.py +1 -1
- nat/data_models/embedder.py +1 -1
- nat/data_models/evaluate.py +6 -1
- nat/data_models/evaluator.py +1 -1
- nat/data_models/finetuning.py +260 -0
- nat/data_models/front_end.py +1 -1
- nat/data_models/function.py +15 -2
- nat/data_models/function_dependencies.py +1 -1
- nat/data_models/gated_field_mixin.py +1 -1
- nat/data_models/interactive.py +1 -1
- nat/data_models/intermediate_step.py +29 -2
- nat/data_models/invocation_node.py +1 -1
- nat/data_models/llm.py +1 -1
- nat/data_models/logging.py +1 -1
- nat/data_models/memory.py +1 -1
- nat/data_models/middleware.py +37 -0
- nat/data_models/object_store.py +1 -1
- nat/data_models/openai_mcp.py +1 -1
- nat/data_models/optimizable.py +1 -1
- nat/data_models/optimizer.py +1 -1
- nat/data_models/profiler.py +1 -1
- nat/data_models/registry_handler.py +1 -1
- nat/data_models/retriever.py +1 -1
- nat/data_models/retry_mixin.py +1 -1
- nat/data_models/runtime_enum.py +26 -0
- nat/data_models/span.py +1 -1
- nat/data_models/step_adaptor.py +1 -1
- nat/data_models/streaming.py +1 -1
- nat/data_models/swe_bench_model.py +1 -1
- nat/data_models/telemetry_exporter.py +1 -1
- nat/data_models/thinking_mixin.py +1 -1
- nat/data_models/ttc_strategy.py +1 -1
- nat/embedder/azure_openai_embedder.py +1 -1
- nat/embedder/nim_embedder.py +1 -1
- nat/embedder/openai_embedder.py +1 -1
- nat/embedder/register.py +1 -1
- nat/eval/__init__.py +1 -1
- nat/eval/config.py +8 -1
- nat/eval/dataset_handler/dataset_downloader.py +1 -1
- nat/eval/dataset_handler/dataset_filter.py +1 -1
- nat/eval/dataset_handler/dataset_handler.py +4 -2
- nat/eval/evaluate.py +226 -81
- nat/eval/evaluator/__init__.py +1 -1
- nat/eval/evaluator/base_evaluator.py +2 -2
- nat/eval/evaluator/evaluator_model.py +3 -2
- nat/eval/intermediate_step_adapter.py +1 -1
- nat/eval/llm_validator.py +336 -0
- nat/eval/rag_evaluator/evaluate.py +17 -10
- nat/eval/rag_evaluator/register.py +1 -1
- nat/eval/red_teaming_evaluator/__init__.py +14 -0
- nat/eval/red_teaming_evaluator/data_models.py +66 -0
- nat/eval/red_teaming_evaluator/evaluate.py +327 -0
- nat/eval/red_teaming_evaluator/filter_conditions.py +75 -0
- nat/eval/red_teaming_evaluator/register.py +55 -0
- nat/eval/register.py +2 -1
- nat/eval/remote_workflow.py +1 -1
- nat/eval/runners/__init__.py +1 -1
- nat/eval/runners/config.py +1 -1
- nat/eval/runners/multi_eval_runner.py +1 -1
- nat/eval/runners/red_teaming_runner/__init__.py +24 -0
- nat/eval/runners/red_teaming_runner/config.py +282 -0
- nat/eval/runners/red_teaming_runner/report_utils.py +707 -0
- nat/eval/runners/red_teaming_runner/runner.py +867 -0
- nat/eval/runtime_evaluator/__init__.py +1 -1
- nat/eval/runtime_evaluator/evaluate.py +1 -1
- nat/eval/runtime_evaluator/register.py +1 -1
- nat/eval/runtime_event_subscriber.py +1 -1
- nat/eval/swe_bench_evaluator/evaluate.py +1 -1
- nat/eval/swe_bench_evaluator/register.py +1 -1
- nat/eval/trajectory_evaluator/evaluate.py +2 -2
- nat/eval/trajectory_evaluator/register.py +1 -1
- nat/eval/tunable_rag_evaluator/evaluate.py +5 -5
- nat/eval/tunable_rag_evaluator/register.py +1 -1
- nat/eval/usage_stats.py +1 -1
- nat/eval/utils/eval_trace_ctx.py +1 -1
- nat/eval/utils/output_uploader.py +1 -1
- nat/eval/utils/tqdm_position_registry.py +1 -1
- nat/eval/utils/weave_eval.py +1 -1
- nat/experimental/decorators/experimental_warning_decorator.py +1 -1
- nat/experimental/test_time_compute/editing/iterative_plan_refinement_editor.py +1 -1
- nat/experimental/test_time_compute/editing/llm_as_a_judge_editor.py +1 -1
- nat/experimental/test_time_compute/editing/motivation_aware_summarization.py +1 -1
- nat/experimental/test_time_compute/functions/execute_score_select_function.py +1 -1
- nat/experimental/test_time_compute/functions/multi_llm_judge_function.py +88 -0
- nat/experimental/test_time_compute/functions/plan_select_execute_function.py +1 -1
- nat/experimental/test_time_compute/functions/ttc_tool_orchestration_function.py +1 -1
- nat/experimental/test_time_compute/functions/ttc_tool_wrapper_function.py +1 -1
- nat/experimental/test_time_compute/models/editor_config.py +1 -1
- nat/experimental/test_time_compute/models/scoring_config.py +1 -1
- nat/experimental/test_time_compute/models/search_config.py +20 -2
- nat/experimental/test_time_compute/models/selection_config.py +33 -2
- nat/experimental/test_time_compute/models/stage_enums.py +1 -1
- nat/experimental/test_time_compute/models/strategy_base.py +1 -1
- nat/experimental/test_time_compute/models/tool_use_config.py +1 -1
- nat/experimental/test_time_compute/models/ttc_item.py +1 -1
- nat/experimental/test_time_compute/register.py +4 -1
- nat/experimental/test_time_compute/scoring/llm_based_agent_scorer.py +1 -1
- nat/experimental/test_time_compute/scoring/llm_based_plan_scorer.py +1 -1
- nat/experimental/test_time_compute/scoring/motivation_aware_scorer.py +1 -1
- nat/experimental/test_time_compute/search/multi_llm_generation.py +115 -0
- nat/experimental/test_time_compute/search/multi_llm_planner.py +1 -1
- nat/experimental/test_time_compute/search/multi_query_retrieval_search.py +1 -1
- nat/experimental/test_time_compute/search/single_shot_multi_plan_planner.py +1 -1
- nat/experimental/test_time_compute/selection/best_of_n_selector.py +1 -1
- nat/experimental/test_time_compute/selection/llm_based_agent_output_selector.py +1 -1
- nat/experimental/test_time_compute/selection/llm_based_output_merging_selector.py +1 -1
- nat/experimental/test_time_compute/selection/llm_based_plan_selector.py +1 -1
- nat/experimental/test_time_compute/selection/llm_judge_selection.py +127 -0
- nat/experimental/test_time_compute/selection/threshold_selector.py +1 -1
- nat/finetuning/__init__.py +24 -0
- nat/finetuning/finetuning_runtime.py +143 -0
- nat/finetuning/interfaces/__init__.py +24 -0
- nat/finetuning/interfaces/finetuning_runner.py +261 -0
- nat/finetuning/interfaces/trainer_adapter.py +103 -0
- nat/finetuning/interfaces/trajectory_builder.py +115 -0
- nat/finetuning/utils/__init__.py +15 -0
- nat/finetuning/utils/parsers/__init__.py +15 -0
- nat/finetuning/utils/parsers/adk_parser.py +141 -0
- nat/finetuning/utils/parsers/base_parser.py +238 -0
- nat/finetuning/utils/parsers/common.py +91 -0
- nat/finetuning/utils/parsers/langchain_parser.py +267 -0
- nat/finetuning/utils/parsers/llama_index_parser.py +218 -0
- nat/front_ends/__init__.py +1 -1
- nat/front_ends/console/__init__.py +1 -1
- nat/front_ends/console/authentication_flow_handler.py +1 -1
- nat/front_ends/console/console_front_end_config.py +4 -1
- nat/front_ends/console/console_front_end_plugin.py +5 -4
- nat/front_ends/console/register.py +1 -1
- nat/front_ends/cron/__init__.py +1 -1
- nat/front_ends/fastapi/__init__.py +1 -1
- nat/front_ends/fastapi/async_job.py +128 -0
- nat/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +1 -1
- nat/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +13 -9
- nat/front_ends/fastapi/dask_client_mixin.py +1 -1
- nat/front_ends/fastapi/fastapi_front_end_config.py +23 -1
- nat/front_ends/fastapi/fastapi_front_end_controller.py +1 -1
- nat/front_ends/fastapi/fastapi_front_end_plugin.py +25 -30
- nat/front_ends/fastapi/fastapi_front_end_plugin_worker.py +318 -59
- nat/front_ends/fastapi/html_snippets/__init__.py +1 -1
- nat/front_ends/fastapi/html_snippets/auth_code_grant_success.py +1 -1
- nat/front_ends/fastapi/intermediate_steps_subscriber.py +12 -1
- nat/front_ends/fastapi/job_store.py +23 -11
- nat/front_ends/fastapi/main.py +1 -1
- nat/front_ends/fastapi/message_handler.py +27 -4
- nat/front_ends/fastapi/message_validator.py +54 -2
- nat/front_ends/fastapi/register.py +1 -1
- nat/front_ends/fastapi/response_helpers.py +16 -15
- nat/front_ends/fastapi/step_adaptor.py +1 -1
- nat/front_ends/fastapi/utils.py +1 -1
- nat/front_ends/register.py +1 -2
- nat/front_ends/simple_base/__init__.py +1 -1
- nat/front_ends/simple_base/simple_front_end_plugin_base.py +6 -4
- nat/llm/aws_bedrock_llm.py +1 -1
- nat/llm/azure_openai_llm.py +10 -1
- nat/llm/dynamo_llm.py +363 -0
- nat/llm/huggingface_llm.py +177 -0
- nat/llm/litellm_llm.py +1 -1
- nat/llm/nim_llm.py +1 -1
- nat/llm/openai_llm.py +1 -1
- nat/llm/register.py +3 -1
- nat/llm/utils/__init__.py +1 -1
- nat/llm/utils/env_config_value.py +1 -1
- nat/llm/utils/error.py +1 -1
- nat/llm/utils/thinking.py +1 -1
- nat/memory/__init__.py +1 -1
- nat/memory/interfaces.py +1 -1
- nat/memory/models.py +1 -1
- nat/meta/pypi.md +1 -1
- nat/middleware/__init__.py +35 -0
- nat/middleware/cache/__init__.py +14 -0
- nat/middleware/cache/cache_middleware.py +253 -0
- nat/middleware/cache/cache_middleware_config.py +44 -0
- nat/middleware/cache/register.py +33 -0
- nat/middleware/defense/__init__.py +14 -0
- nat/middleware/defense/defense_middleware.py +362 -0
- nat/middleware/defense/defense_middleware_content_guard.py +455 -0
- nat/middleware/defense/defense_middleware_data_models.py +91 -0
- nat/middleware/defense/defense_middleware_output_verifier.py +440 -0
- nat/middleware/defense/defense_middleware_pii.py +356 -0
- nat/middleware/defense/register.py +82 -0
- nat/middleware/dynamic/__init__.py +14 -0
- nat/middleware/dynamic/dynamic_function_middleware.py +962 -0
- nat/middleware/dynamic/dynamic_middleware_config.py +132 -0
- nat/middleware/dynamic/register.py +34 -0
- nat/middleware/function_middleware.py +370 -0
- nat/middleware/logging/__init__.py +14 -0
- nat/middleware/logging/logging_middleware.py +67 -0
- nat/middleware/logging/logging_middleware_config.py +28 -0
- nat/middleware/logging/register.py +33 -0
- nat/middleware/middleware.py +298 -0
- nat/middleware/red_teaming/__init__.py +14 -0
- nat/middleware/red_teaming/red_teaming_middleware.py +344 -0
- nat/middleware/red_teaming/red_teaming_middleware_config.py +112 -0
- nat/middleware/red_teaming/register.py +47 -0
- nat/middleware/register.py +22 -0
- nat/middleware/utils/__init__.py +14 -0
- nat/middleware/utils/workflow_inventory.py +155 -0
- nat/object_store/__init__.py +1 -1
- nat/object_store/in_memory_object_store.py +1 -1
- nat/object_store/interfaces.py +1 -1
- nat/object_store/models.py +1 -1
- nat/object_store/register.py +1 -1
- nat/observability/__init__.py +1 -1
- nat/observability/exporter/__init__.py +1 -1
- nat/observability/exporter/base_exporter.py +1 -1
- nat/observability/exporter/exporter.py +1 -1
- nat/observability/exporter/file_exporter.py +1 -1
- nat/observability/exporter/processing_exporter.py +1 -1
- nat/observability/exporter/raw_exporter.py +1 -1
- nat/observability/exporter/span_exporter.py +7 -1
- nat/observability/exporter_manager.py +1 -1
- nat/observability/mixin/__init__.py +1 -1
- nat/observability/mixin/batch_config_mixin.py +1 -1
- nat/observability/mixin/collector_config_mixin.py +1 -1
- nat/observability/mixin/file_mixin.py +1 -1
- nat/observability/mixin/file_mode.py +1 -1
- nat/observability/mixin/redaction_config_mixin.py +1 -1
- nat/observability/mixin/resource_conflict_mixin.py +1 -1
- nat/observability/mixin/serialize_mixin.py +1 -1
- nat/observability/mixin/tagging_config_mixin.py +1 -1
- nat/observability/mixin/type_introspection_mixin.py +1 -1
- nat/observability/processor/__init__.py +1 -1
- nat/observability/processor/batching_processor.py +1 -1
- nat/observability/processor/callback_processor.py +1 -1
- nat/observability/processor/falsy_batch_filter_processor.py +1 -1
- nat/observability/processor/intermediate_step_serializer.py +1 -1
- nat/observability/processor/processor.py +1 -1
- nat/observability/processor/processor_factory.py +1 -1
- nat/observability/processor/redaction/__init__.py +1 -1
- nat/observability/processor/redaction/contextual_redaction_processor.py +1 -1
- nat/observability/processor/redaction/contextual_span_redaction_processor.py +1 -1
- nat/observability/processor/redaction/redaction_processor.py +1 -1
- nat/observability/processor/redaction/span_header_redaction_processor.py +1 -1
- nat/observability/processor/span_tagging_processor.py +1 -1
- nat/observability/register.py +1 -1
- nat/observability/utils/__init__.py +1 -1
- nat/observability/utils/dict_utils.py +1 -1
- nat/observability/utils/time_utils.py +1 -1
- nat/profiler/calc/__init__.py +1 -1
- nat/profiler/calc/calc_runner.py +3 -3
- nat/profiler/calc/calculations.py +1 -1
- nat/profiler/calc/data_models.py +1 -1
- nat/profiler/calc/plot.py +30 -3
- nat/profiler/callbacks/agno_callback_handler.py +1 -1
- nat/profiler/callbacks/base_callback_class.py +1 -1
- nat/profiler/callbacks/langchain_callback_handler.py +33 -3
- nat/profiler/callbacks/llama_index_callback_handler.py +13 -10
- nat/profiler/callbacks/semantic_kernel_callback_handler.py +1 -1
- nat/profiler/callbacks/token_usage_base_model.py +1 -1
- nat/profiler/data_frame_row.py +1 -1
- nat/profiler/data_models.py +1 -1
- nat/profiler/decorators/framework_wrapper.py +32 -1
- nat/profiler/decorators/function_tracking.py +1 -1
- nat/profiler/forecasting/config.py +1 -1
- nat/profiler/forecasting/model_trainer.py +1 -1
- nat/profiler/forecasting/models/__init__.py +1 -1
- nat/profiler/forecasting/models/forecasting_base_model.py +1 -1
- nat/profiler/forecasting/models/linear_model.py +1 -1
- nat/profiler/forecasting/models/random_forest_regressor.py +1 -1
- nat/profiler/inference_metrics_model.py +1 -1
- nat/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +1 -1
- nat/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py +1 -1
- nat/profiler/inference_optimization/data_models.py +1 -1
- nat/profiler/inference_optimization/experimental/concurrency_spike_analysis.py +1 -1
- nat/profiler/inference_optimization/experimental/prefix_span_analysis.py +1 -1
- nat/profiler/inference_optimization/llm_metrics.py +1 -1
- nat/profiler/inference_optimization/prompt_caching.py +1 -1
- nat/profiler/inference_optimization/token_uniqueness.py +1 -1
- nat/profiler/inference_optimization/workflow_runtimes.py +1 -1
- nat/profiler/intermediate_property_adapter.py +1 -1
- nat/profiler/parameter_optimization/optimizable_utils.py +1 -1
- nat/profiler/parameter_optimization/optimizer_runtime.py +1 -1
- nat/profiler/parameter_optimization/parameter_optimizer.py +1 -1
- nat/profiler/parameter_optimization/parameter_selection.py +1 -1
- nat/profiler/parameter_optimization/pareto_visualizer.py +1 -1
- nat/profiler/parameter_optimization/prompt_optimizer.py +1 -1
- nat/profiler/parameter_optimization/update_helpers.py +1 -1
- nat/profiler/profile_runner.py +1 -1
- nat/profiler/utils.py +1 -1
- nat/registry_handlers/local/local_handler.py +1 -1
- nat/registry_handlers/local/register_local.py +1 -1
- nat/registry_handlers/metadata_factory.py +1 -1
- nat/registry_handlers/package_utils.py +1 -1
- nat/registry_handlers/pypi/pypi_handler.py +1 -1
- nat/registry_handlers/pypi/register_pypi.py +1 -1
- nat/registry_handlers/register.py +1 -1
- nat/registry_handlers/registry_handler_base.py +1 -1
- nat/registry_handlers/rest/register_rest.py +1 -1
- nat/registry_handlers/rest/rest_handler.py +1 -1
- nat/registry_handlers/schemas/headers.py +1 -1
- nat/registry_handlers/schemas/package.py +1 -1
- nat/registry_handlers/schemas/publish.py +1 -1
- nat/registry_handlers/schemas/pull.py +1 -1
- nat/registry_handlers/schemas/remove.py +1 -1
- nat/registry_handlers/schemas/search.py +1 -1
- nat/registry_handlers/schemas/status.py +1 -1
- nat/retriever/interface.py +1 -1
- nat/retriever/milvus/__init__.py +1 -1
- nat/retriever/milvus/register.py +12 -4
- nat/retriever/milvus/retriever.py +103 -41
- nat/retriever/models.py +1 -1
- nat/retriever/nemo_retriever/__init__.py +1 -1
- nat/retriever/nemo_retriever/register.py +1 -1
- nat/retriever/nemo_retriever/retriever.py +5 -5
- nat/retriever/register.py +1 -1
- nat/runtime/__init__.py +1 -1
- nat/runtime/loader.py +10 -3
- nat/runtime/metrics.py +180 -0
- nat/runtime/runner.py +13 -6
- nat/runtime/session.py +458 -32
- nat/runtime/user_metadata.py +1 -1
- nat/settings/global_settings.py +1 -1
- nat/tool/chat_completion.py +1 -1
- nat/tool/code_execution/README.md +1 -1
- nat/tool/code_execution/code_sandbox.py +2 -2
- nat/tool/code_execution/local_sandbox/Dockerfile.sandbox +1 -1
- nat/tool/code_execution/local_sandbox/__init__.py +1 -1
- nat/tool/code_execution/local_sandbox/local_sandbox_server.py +1 -1
- nat/tool/code_execution/local_sandbox/start_local_sandbox.sh +1 -1
- nat/tool/code_execution/register.py +1 -1
- nat/tool/code_execution/utils.py +1 -1
- nat/tool/datetime_tools.py +1 -1
- nat/tool/document_search.py +1 -1
- nat/tool/github_tools.py +1 -1
- nat/tool/memory_tools/add_memory_tool.py +1 -1
- nat/tool/memory_tools/delete_memory_tool.py +1 -1
- nat/tool/memory_tools/get_memory_tool.py +1 -1
- nat/tool/nvidia_rag.py +2 -2
- nat/tool/register.py +1 -1
- nat/tool/retriever.py +1 -1
- nat/tool/server_tools.py +1 -1
- nat/utils/__init__.py +8 -5
- nat/utils/callable_utils.py +1 -1
- nat/utils/data_models/schema_validator.py +1 -1
- nat/utils/debugging_utils.py +1 -1
- nat/utils/decorators.py +1 -1
- nat/utils/dump_distro_mapping.py +1 -1
- nat/utils/exception_handlers/automatic_retries.py +3 -3
- nat/utils/exception_handlers/schemas.py +1 -1
- nat/utils/io/model_processing.py +1 -1
- nat/utils/io/supress_logs.py +33 -0
- nat/utils/io/yaml_tools.py +1 -1
- nat/utils/log_levels.py +1 -1
- nat/utils/log_utils.py +13 -1
- nat/utils/metadata_utils.py +1 -1
- nat/utils/optional_imports.py +1 -1
- nat/utils/producer_consumer_queue.py +1 -1
- nat/utils/reactive/base/observable_base.py +1 -1
- nat/utils/reactive/base/observer_base.py +1 -1
- nat/utils/reactive/base/subject_base.py +1 -1
- nat/utils/reactive/observable.py +1 -1
- nat/utils/reactive/observer.py +1 -1
- nat/utils/reactive/subject.py +1 -1
- nat/utils/reactive/subscription.py +1 -1
- nat/utils/responses_api.py +1 -1
- nat/utils/settings/global_settings.py +1 -1
- nat/utils/string_utils.py +1 -1
- nat/utils/type_converter.py +18 -5
- nat/utils/type_utils.py +1 -1
- nat/utils/url_utils.py +1 -1
- {nvidia_nat-1.4.0a20251112.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/METADATA +46 -15
- nvidia_nat-1.4.0a20260113.dist-info/RECORD +547 -0
- nvidia_nat-1.4.0a20260113.dist-info/entry_points.txt +38 -0
- nat/cli/commands/mcp/mcp.py +0 -986
- nat/front_ends/mcp/introspection_token_verifier.py +0 -73
- nat/front_ends/mcp/mcp_front_end_config.py +0 -109
- nat/front_ends/mcp/mcp_front_end_plugin.py +0 -151
- nat/front_ends/mcp/mcp_front_end_plugin_worker.py +0 -362
- nat/front_ends/mcp/memory_profiler.py +0 -320
- nat/front_ends/mcp/register.py +0 -27
- nat/front_ends/mcp/tool_converter.py +0 -321
- nvidia_nat-1.4.0a20251112.dist-info/RECORD +0 -481
- nvidia_nat-1.4.0a20251112.dist-info/entry_points.txt +0 -22
- {nvidia_nat-1.4.0a20251112.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/WHEEL +0 -0
- {nvidia_nat-1.4.0a20251112.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
- {nvidia_nat-1.4.0a20251112.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/licenses/LICENSE.md +0 -0
- {nvidia_nat-1.4.0a20251112.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,336 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
"""
|
|
16
|
+
LLM Endpoint Validator for NeMo Agent Toolkit evaluation.
|
|
17
|
+
|
|
18
|
+
This module provides functionality to validate LLM endpoints before running evaluation
|
|
19
|
+
workflows. This helps catch deployment issues early (e.g., models not deployed after
|
|
20
|
+
training cancellation) and provides actionable error messages.
|
|
21
|
+
|
|
22
|
+
The validation uses the NeMo Agent Toolkit `WorkflowBuilder` to instantiate LLMs in a framework-agnostic way,
|
|
23
|
+
then tests them with a minimal `ainvoke()` call. This approach works for all LLM types
|
|
24
|
+
(OpenAI, NIM, AWS Bedrock, vLLM, etc.) and respects the auth and config system.
|
|
25
|
+
|
|
26
|
+
Note: Validation invokes actual LLM endpoints with minimal test prompts. This may incur
|
|
27
|
+
small API costs for cloud-hosted models.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
import asyncio
|
|
31
|
+
import logging
|
|
32
|
+
import time
|
|
33
|
+
from typing import TYPE_CHECKING
|
|
34
|
+
|
|
35
|
+
from nat.builder.framework_enum import LLMFrameworkEnum
|
|
36
|
+
from nat.builder.workflow_builder import WorkflowBuilder
|
|
37
|
+
from nat.data_models.llm import LLMBaseConfig
|
|
38
|
+
|
|
39
|
+
if TYPE_CHECKING:
|
|
40
|
+
from nat.data_models.config import Config
|
|
41
|
+
|
|
42
|
+
logger = logging.getLogger(__name__)
|
|
43
|
+
|
|
44
|
+
# Constants
|
|
45
|
+
VALIDATION_TIMEOUT_SECONDS = 30 # Timeout for each LLM validation
|
|
46
|
+
MAX_ERROR_MESSAGE_LENGTH = 500 # Truncate long error messages
|
|
47
|
+
CONCURRENT_VALIDATION_BATCH_SIZE = 5 # Max LLMs to validate in parallel
|
|
48
|
+
VALIDATION_PROMPT = "test" # Minimal prompt for endpoint validation
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _is_404_error(exception: Exception) -> bool:
|
|
52
|
+
"""
|
|
53
|
+
Detect if an exception represents a 404 (model not found) error.
|
|
54
|
+
|
|
55
|
+
This handles various 404 error formats from different LLM providers:
|
|
56
|
+
- OpenAI SDK: openai.NotFoundError
|
|
57
|
+
- HTTP responses: HTTP 404 or status code 404
|
|
58
|
+
- LangChain wrappers: Various wrapped 404s
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
exception: The exception to check.
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
True if this is a 404 error, False otherwise.
|
|
65
|
+
"""
|
|
66
|
+
exception_str = str(exception).lower()
|
|
67
|
+
exception_type = type(exception).__name__
|
|
68
|
+
|
|
69
|
+
# Check for NotFoundError type (OpenAI SDK)
|
|
70
|
+
if "notfounderror" in exception_type.lower():
|
|
71
|
+
return True
|
|
72
|
+
|
|
73
|
+
# Check for HTTP 404 specifically (not just "404" which could appear in other contexts)
|
|
74
|
+
if any(pattern in exception_str for pattern in ["http 404", "status code 404", "status_code=404"]):
|
|
75
|
+
return True
|
|
76
|
+
|
|
77
|
+
# Check for model-specific not found errors
|
|
78
|
+
if "model" in exception_str and any(phrase in exception_str
|
|
79
|
+
for phrase in ["not found", "does not exist", "not deployed", "not available"]):
|
|
80
|
+
return True
|
|
81
|
+
|
|
82
|
+
return False
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _get_llm_endpoint_info(llm_config: LLMBaseConfig) -> tuple[str | None, str | None]:
|
|
86
|
+
"""
|
|
87
|
+
Extract endpoint and model information from an LLM config.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
llm_config: The LLM configuration object.
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
Tuple of (base_url, model_name), either may be None.
|
|
94
|
+
"""
|
|
95
|
+
base_url = getattr(llm_config, "base_url", None)
|
|
96
|
+
|
|
97
|
+
# Try multiple attributes for model name
|
|
98
|
+
model_name = getattr(llm_config, "model_name", None)
|
|
99
|
+
if model_name is None:
|
|
100
|
+
model_name = getattr(llm_config, "model", None)
|
|
101
|
+
|
|
102
|
+
return base_url, model_name
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _truncate_error_message(message: str, max_length: int = MAX_ERROR_MESSAGE_LENGTH) -> str:
|
|
106
|
+
"""
|
|
107
|
+
Truncate error messages to prevent memory issues with large stack traces.
|
|
108
|
+
|
|
109
|
+
Keeps both the start and end of the message to preserve context from both
|
|
110
|
+
the error description (start) and the stack trace (end).
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
message: The error message to truncate.
|
|
114
|
+
max_length: Maximum length to keep.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
Truncated message with ellipsis if needed.
|
|
118
|
+
"""
|
|
119
|
+
if len(message) <= max_length:
|
|
120
|
+
return message
|
|
121
|
+
|
|
122
|
+
# Keep first and last portions to preserve both error description and stack trace
|
|
123
|
+
separator = " ... (truncated) ... "
|
|
124
|
+
|
|
125
|
+
# Guard for very small max_length values
|
|
126
|
+
if max_length <= len(separator) + 2:
|
|
127
|
+
return message[:max_length]
|
|
128
|
+
|
|
129
|
+
keep_length = (max_length - len(separator)) // 2
|
|
130
|
+
return f"{message[:keep_length]}{separator}{message[-keep_length:]}"
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
async def _validate_single_llm(builder: WorkflowBuilder, llm_name: str,
|
|
134
|
+
llm_config: LLMBaseConfig) -> tuple[str | None, str | None]:
|
|
135
|
+
"""
|
|
136
|
+
Validate a single LLM endpoint.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
builder: The WorkflowBuilder instance.
|
|
140
|
+
llm_name: Name of the LLM to validate.
|
|
141
|
+
llm_config: Configuration for the LLM.
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
Tuple of (error_type, error_message):
|
|
145
|
+
- error_type: "404" for model not found, "warning" for non-critical, None for success
|
|
146
|
+
- error_message: Description of the error, or None if successful
|
|
147
|
+
"""
|
|
148
|
+
try:
|
|
149
|
+
logger.info("Validating LLM '%s' (type: %s)", llm_name, llm_config.type)
|
|
150
|
+
start_time = time.time()
|
|
151
|
+
|
|
152
|
+
# Add LLM to builder (handles all LLM types)
|
|
153
|
+
await builder.add_llm(llm_name, llm_config)
|
|
154
|
+
|
|
155
|
+
# Try all frameworks to find one that works with this LLM
|
|
156
|
+
llm = None
|
|
157
|
+
for framework in LLMFrameworkEnum:
|
|
158
|
+
try:
|
|
159
|
+
llm = await builder.get_llm(llm_name, framework)
|
|
160
|
+
logger.debug("LLM '%s' successfully loaded with framework '%s'", llm_name, framework.value)
|
|
161
|
+
break # Found a working framework
|
|
162
|
+
except Exception as e:
|
|
163
|
+
logger.debug("LLM '%s' failed with framework '%s': %s", llm_name, framework.value, e)
|
|
164
|
+
continue # Try next framework
|
|
165
|
+
|
|
166
|
+
if llm is None:
|
|
167
|
+
# Log all attempted frameworks for debugging
|
|
168
|
+
attempted = [f.value for f in LLMFrameworkEnum]
|
|
169
|
+
error_msg = (f"Could not instantiate LLM '{llm_name}' with any known framework. "
|
|
170
|
+
f"Attempted: {', '.join(attempted)}. "
|
|
171
|
+
f"If this LLM uses a custom framework, this warning can be safely ignored. "
|
|
172
|
+
f"Otherwise, verify the LLM type '{llm_config.type}' is supported and configured correctly.")
|
|
173
|
+
logger.warning("LLM '%s' - Framework instantiation failed: %s", llm_name, error_msg)
|
|
174
|
+
return ("warning", error_msg)
|
|
175
|
+
|
|
176
|
+
# Test with minimal prompt - this will hit the endpoint
|
|
177
|
+
await asyncio.wait_for(llm.ainvoke(VALIDATION_PROMPT), timeout=VALIDATION_TIMEOUT_SECONDS)
|
|
178
|
+
|
|
179
|
+
duration = time.time() - start_time
|
|
180
|
+
logger.info("LLM '%s' validated successfully in %.2fs", llm_name, duration)
|
|
181
|
+
return (None, None)
|
|
182
|
+
|
|
183
|
+
except TimeoutError:
|
|
184
|
+
error_msg = f"Validation timed out after {VALIDATION_TIMEOUT_SECONDS}s"
|
|
185
|
+
logger.warning("LLM '%s' validation timed out", llm_name)
|
|
186
|
+
return ("warning", _truncate_error_message(error_msg))
|
|
187
|
+
|
|
188
|
+
except (KeyboardInterrupt, SystemExit):
|
|
189
|
+
# Don't catch system-level interrupts
|
|
190
|
+
raise
|
|
191
|
+
|
|
192
|
+
except Exception as invoke_error:
|
|
193
|
+
# Check if this is a 404 error (model not deployed)
|
|
194
|
+
if _is_404_error(invoke_error):
|
|
195
|
+
base_url, model_name = _get_llm_endpoint_info(llm_config)
|
|
196
|
+
|
|
197
|
+
error_msg = (f"LLM '{llm_name}' validation failed: Model not found (404).\n"
|
|
198
|
+
f"\nThis typically means:\n"
|
|
199
|
+
f" 1. The model has not been deployed yet\n"
|
|
200
|
+
f" 2. The model name is incorrect\n"
|
|
201
|
+
f" 3. A training job was canceled and the model was never deployed\n"
|
|
202
|
+
f"\nLLM Configuration:\n"
|
|
203
|
+
f" Type: {str(llm_config.type)}\n"
|
|
204
|
+
f" Endpoint: {base_url or 'N/A'}\n"
|
|
205
|
+
f" Model: {model_name or 'N/A'}\n"
|
|
206
|
+
f"\nACTION REQUIRED:\n"
|
|
207
|
+
f" 1. Verify the model is deployed (check your deployment service)\n"
|
|
208
|
+
f" 2. If using NeMo Customizer, ensure training completed successfully\n"
|
|
209
|
+
f" 3. Check model deployment status in your platform\n"
|
|
210
|
+
f" 4. Verify the model name matches the deployed model\n"
|
|
211
|
+
f"\nOriginal error: {_truncate_error_message(str(invoke_error))}")
|
|
212
|
+
logger.exception(error_msg)
|
|
213
|
+
return ("404", error_msg)
|
|
214
|
+
|
|
215
|
+
else:
|
|
216
|
+
# Non-404 error - might be auth, rate limit, temporary issue, etc.
|
|
217
|
+
error_msg = (f"Could not fully validate LLM '{llm_name}': {_truncate_error_message(str(invoke_error))}. "
|
|
218
|
+
f"This might be due to auth requirements, rate limits, or temporary issues. "
|
|
219
|
+
f"Evaluation will proceed, but may fail if the LLM is truly inaccessible.")
|
|
220
|
+
logger.exception(error_msg)
|
|
221
|
+
return ("warning", _truncate_error_message(error_msg))
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
async def validate_llm_endpoints(config: "Config") -> None:
|
|
225
|
+
"""
|
|
226
|
+
Validate that all LLM endpoints in the config are accessible.
|
|
227
|
+
|
|
228
|
+
This function uses NAT's WorkflowBuilder to instantiate each configured LLM
|
|
229
|
+
and tests it with a minimal ainvoke() call. This approach is framework-agnostic
|
|
230
|
+
and works for all LLM types (OpenAI, NIM, AWS Bedrock, vLLM, etc.).
|
|
231
|
+
|
|
232
|
+
The validation distinguishes between critical errors (404s indicating model not
|
|
233
|
+
deployed) and non-critical errors (auth issues, rate limits, etc.):
|
|
234
|
+
- 404 errors: Fail fast with detailed troubleshooting guidance
|
|
235
|
+
- Other errors: Log warning but continue (to avoid false positives)
|
|
236
|
+
|
|
237
|
+
LLMs are validated in parallel batches to improve performance while respecting
|
|
238
|
+
rate limits. Each validation has a timeout to prevent hanging.
|
|
239
|
+
|
|
240
|
+
Note: This function invokes actual LLM endpoints, which may incur small API costs.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
config: The NAT configuration object containing LLM definitions.
|
|
244
|
+
|
|
245
|
+
Raises:
|
|
246
|
+
RuntimeError: If any LLM endpoint has a 404 error (model not deployed).
|
|
247
|
+
ValueError: If config.llms is not properly structured.
|
|
248
|
+
"""
|
|
249
|
+
|
|
250
|
+
# Validate config structure
|
|
251
|
+
if not hasattr(config, "llms"):
|
|
252
|
+
raise ValueError("Config does not have 'llms' attribute. Cannot validate LLM endpoints.")
|
|
253
|
+
|
|
254
|
+
if not isinstance(config.llms, dict):
|
|
255
|
+
raise ValueError(
|
|
256
|
+
f"Config.llms must be a dict, got {type(config.llms).__name__}. Cannot validate LLM endpoints.")
|
|
257
|
+
|
|
258
|
+
if not config.llms:
|
|
259
|
+
logger.info("No LLMs configured - skipping endpoint validation")
|
|
260
|
+
return
|
|
261
|
+
|
|
262
|
+
failed_llms = [] # List of (llm_name, error_message) tuples for 404 errors
|
|
263
|
+
validation_warnings = [] # List of (llm_name, warning_message) tuples for non-critical errors
|
|
264
|
+
|
|
265
|
+
# Use WorkflowBuilder to instantiate and test LLMs
|
|
266
|
+
async with WorkflowBuilder() as builder:
|
|
267
|
+
# Get list of LLMs to validate
|
|
268
|
+
llm_items = list(config.llms.items())
|
|
269
|
+
|
|
270
|
+
# Validate in batches to respect rate limits
|
|
271
|
+
for batch_start in range(0, len(llm_items), CONCURRENT_VALIDATION_BATCH_SIZE):
|
|
272
|
+
batch = llm_items[batch_start:batch_start + CONCURRENT_VALIDATION_BATCH_SIZE]
|
|
273
|
+
|
|
274
|
+
# Validate batch in parallel
|
|
275
|
+
validation_tasks = [_validate_single_llm(builder, llm_name, llm_config) for llm_name, llm_config in batch]
|
|
276
|
+
|
|
277
|
+
results = await asyncio.gather(*validation_tasks, return_exceptions=True)
|
|
278
|
+
|
|
279
|
+
# Process results - zip with batch to maintain llm_name association
|
|
280
|
+
for (llm_name, _llm_config), result in zip(batch, results, strict=True):
|
|
281
|
+
if isinstance(result, BaseException):
|
|
282
|
+
# Re-raise system interrupts if they somehow got through
|
|
283
|
+
if isinstance(result, KeyboardInterrupt | SystemExit):
|
|
284
|
+
raise result
|
|
285
|
+
|
|
286
|
+
# Unexpected exception during validation
|
|
287
|
+
logger.warning("Unexpected error during validation: %s", _truncate_error_message(str(result)))
|
|
288
|
+
validation_warnings.append((llm_name, _truncate_error_message(str(result))))
|
|
289
|
+
else:
|
|
290
|
+
# Normal result: (error_type, error_message)
|
|
291
|
+
error_type, error_message = result
|
|
292
|
+
|
|
293
|
+
if error_type == "404":
|
|
294
|
+
failed_llms.append((llm_name, error_message))
|
|
295
|
+
elif error_type == "warning":
|
|
296
|
+
validation_warnings.append((llm_name, error_message))
|
|
297
|
+
# If error_type is None, validation succeeded (no action needed)
|
|
298
|
+
|
|
299
|
+
# Calculate validation metrics
|
|
300
|
+
total_llms = len(llm_items)
|
|
301
|
+
succeeded_count = total_llms - len(failed_llms) - len(validation_warnings)
|
|
302
|
+
|
|
303
|
+
# Report non-critical warnings
|
|
304
|
+
if validation_warnings:
|
|
305
|
+
warning_summary = "\n".join([f" - {name}: {msg}" for name, msg in validation_warnings])
|
|
306
|
+
logger.warning(
|
|
307
|
+
"LLM validation completed with %d warning(s):\n%s\nThese LLMs may still work during evaluation.",
|
|
308
|
+
len(validation_warnings),
|
|
309
|
+
warning_summary,
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
# If any LLMs have 404 errors, fail validation
|
|
313
|
+
if failed_llms:
|
|
314
|
+
error_summary = "\n\n".join([f"LLM '{name}':\n{msg}" for name, msg in failed_llms])
|
|
315
|
+
|
|
316
|
+
# Log metrics before raising error
|
|
317
|
+
logger.error(
|
|
318
|
+
"Validation summary: %d total, %d succeeded, %d warned, %d failed (404)",
|
|
319
|
+
total_llms,
|
|
320
|
+
succeeded_count,
|
|
321
|
+
len(validation_warnings),
|
|
322
|
+
len(failed_llms),
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
raise RuntimeError(f"LLM endpoint validation failed for {len(failed_llms)} LLM(s) with 404 errors:\n\n"
|
|
326
|
+
f"{error_summary}\n\n"
|
|
327
|
+
f"Evaluation cannot proceed with undeployed models. "
|
|
328
|
+
f"Please resolve the deployment issues above before retrying.")
|
|
329
|
+
|
|
330
|
+
# Log success metrics
|
|
331
|
+
logger.info(
|
|
332
|
+
"All LLM endpoints validated successfully - %d total, %d succeeded, %d warned",
|
|
333
|
+
total_llms,
|
|
334
|
+
succeeded_count,
|
|
335
|
+
len(validation_warnings),
|
|
336
|
+
)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -15,14 +15,10 @@
|
|
|
15
15
|
|
|
16
16
|
import logging
|
|
17
17
|
import math
|
|
18
|
+
import typing
|
|
18
19
|
from collections.abc import Sequence
|
|
19
20
|
|
|
20
21
|
from pydantic import BaseModel
|
|
21
|
-
from ragas import EvaluationDataset
|
|
22
|
-
from ragas import SingleTurnSample
|
|
23
|
-
from ragas.dataset_schema import EvaluationResult
|
|
24
|
-
from ragas.llms import LangchainLLMWrapper
|
|
25
|
-
from ragas.metrics import Metric
|
|
26
22
|
from tqdm import tqdm
|
|
27
23
|
|
|
28
24
|
from nat.data_models.intermediate_step import IntermediateStepType
|
|
@@ -32,14 +28,22 @@ from nat.eval.evaluator.evaluator_model import EvalOutput
|
|
|
32
28
|
from nat.eval.evaluator.evaluator_model import EvalOutputItem
|
|
33
29
|
from nat.eval.utils.tqdm_position_registry import TqdmPositionRegistry
|
|
34
30
|
|
|
31
|
+
if typing.TYPE_CHECKING:
|
|
32
|
+
# We are lazily importing ragas to avoid import-time side effects such as applying the nest_asyncio patch, which is
|
|
33
|
+
# not compatible with Python 3.12+, we want to ensure that we are able to apply the nest_asyncio2 patch instead.
|
|
34
|
+
from ragas import EvaluationDataset
|
|
35
|
+
from ragas.dataset_schema import EvaluationResult
|
|
36
|
+
from ragas.llms import LangchainLLMWrapper
|
|
37
|
+
from ragas.metrics import Metric
|
|
38
|
+
|
|
35
39
|
logger = logging.getLogger(__name__)
|
|
36
40
|
|
|
37
41
|
|
|
38
42
|
class RAGEvaluator:
|
|
39
43
|
|
|
40
44
|
def __init__(self,
|
|
41
|
-
evaluator_llm: LangchainLLMWrapper,
|
|
42
|
-
metrics: Sequence[Metric],
|
|
45
|
+
evaluator_llm: "LangchainLLMWrapper",
|
|
46
|
+
metrics: Sequence["Metric"],
|
|
43
47
|
max_concurrency=8,
|
|
44
48
|
input_obj_field: str | None = None):
|
|
45
49
|
self.evaluator_llm = evaluator_llm
|
|
@@ -66,8 +70,11 @@ class RAGEvaluator:
|
|
|
66
70
|
|
|
67
71
|
return str(input_obj) # Fallback to string representation of the dict
|
|
68
72
|
|
|
69
|
-
def eval_input_to_ragas(self, eval_input: EvalInput) -> EvaluationDataset:
|
|
73
|
+
def eval_input_to_ragas(self, eval_input: EvalInput) -> "EvaluationDataset":
|
|
70
74
|
"""Converts EvalInput into a Ragas-compatible EvaluationDataset."""
|
|
75
|
+
from ragas import EvaluationDataset
|
|
76
|
+
from ragas import SingleTurnSample
|
|
77
|
+
|
|
71
78
|
from nat.eval.intermediate_step_adapter import IntermediateStepAdapter
|
|
72
79
|
event_filter = [IntermediateStepType.TOOL_END, IntermediateStepType.LLM_END, IntermediateStepType.CUSTOM_END]
|
|
73
80
|
samples = []
|
|
@@ -98,7 +105,7 @@ class RAGEvaluator:
|
|
|
98
105
|
|
|
99
106
|
return EvaluationDataset(samples=samples)
|
|
100
107
|
|
|
101
|
-
def ragas_to_eval_output(self, eval_input: EvalInput, results_dataset: EvaluationResult | None) -> EvalOutput:
|
|
108
|
+
def ragas_to_eval_output(self, eval_input: EvalInput, results_dataset: "EvaluationResult | None") -> EvalOutput:
|
|
102
109
|
"""Converts the ragas EvaluationResult to nat EvalOutput"""
|
|
103
110
|
|
|
104
111
|
if not results_dataset:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
"""Data models for red teaming evaluation output."""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
from pydantic import Field
|
|
20
|
+
|
|
21
|
+
from nat.data_models.intermediate_step import IntermediateStep
|
|
22
|
+
from nat.eval.evaluator.evaluator_model import EvalOutputItem
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ConditionEvalOutputItem(EvalOutputItem):
|
|
26
|
+
"""Evaluation results for a single IntermediateStep that meets the filtering condition.
|
|
27
|
+
|
|
28
|
+
Attributes:
|
|
29
|
+
id: Identifier from the input item.
|
|
30
|
+
score: Average score across all filter conditions.
|
|
31
|
+
reasoning: Reasoning for given score.
|
|
32
|
+
intermediate_step: IntermediateStep selected and evaluated via reduction strategy.
|
|
33
|
+
error_message: Error message if any step of the evaluation has failed.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
intermediate_step: IntermediateStep | None = Field(
|
|
37
|
+
default=None,
|
|
38
|
+
description="The single IntermediateStep that was selected and evaluated (based on reduction strategy)")
|
|
39
|
+
error_message: str | None = Field(default=None,
|
|
40
|
+
description="Error message if any step of the evaluation has failed")
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def empty(cls, id: str, error: str | None = None) -> ConditionEvalOutputItem:
|
|
44
|
+
"""Create an empty ConditionEvalOutputItem.
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
Empty ConditionEvalOutputItem instance
|
|
48
|
+
"""
|
|
49
|
+
return cls(id=id, score=0.0, reasoning={}, error_message=error, intermediate_step=None)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class RedTeamingEvalOutputItem(EvalOutputItem):
|
|
53
|
+
"""Extended evaluation output item for red teaming evaluations.
|
|
54
|
+
|
|
55
|
+
Organizes results by filter condition name, with each condition containing
|
|
56
|
+
its score, the evaluated output, and the single intermediate step that was selected.
|
|
57
|
+
|
|
58
|
+
Attributes:
|
|
59
|
+
id: Identifier from the input item
|
|
60
|
+
score: Average score across all filter conditions
|
|
61
|
+
reasoning: Summary information for compatibility
|
|
62
|
+
results_by_condition: Map from condition name to evaluation results
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
results_by_condition: dict[str, ConditionEvalOutputItem] = Field(
|
|
66
|
+
description="Results organized by filter condition name")
|