nvidia-nat 1.4.0a20251120__py3-none-any.whl → 1.4.0a20260113__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiq/__init__.py +1 -1
- nat/{front_ends/mcp → agent/auto_memory_wrapper}/__init__.py +1 -1
- nat/agent/auto_memory_wrapper/agent.py +278 -0
- nat/agent/auto_memory_wrapper/register.py +227 -0
- nat/agent/auto_memory_wrapper/state.py +30 -0
- nat/agent/base.py +1 -1
- nat/agent/dual_node.py +1 -1
- nat/agent/prompt_optimizer/prompt.py +1 -1
- nat/agent/prompt_optimizer/register.py +1 -1
- nat/agent/react_agent/agent.py +16 -9
- nat/agent/react_agent/output_parser.py +2 -2
- nat/agent/react_agent/prompt.py +3 -2
- nat/agent/react_agent/register.py +2 -2
- nat/agent/react_agent/register_per_user_agent.py +104 -0
- nat/agent/reasoning_agent/reasoning_agent.py +1 -1
- nat/agent/register.py +3 -1
- nat/agent/responses_api_agent/__init__.py +1 -1
- nat/agent/responses_api_agent/register.py +1 -1
- nat/agent/rewoo_agent/agent.py +9 -4
- nat/agent/rewoo_agent/prompt.py +1 -1
- nat/agent/rewoo_agent/register.py +1 -1
- nat/agent/tool_calling_agent/agent.py +5 -4
- nat/agent/tool_calling_agent/register.py +1 -1
- nat/authentication/__init__.py +1 -1
- nat/authentication/api_key/__init__.py +1 -1
- nat/authentication/api_key/api_key_auth_provider.py +1 -1
- nat/authentication/api_key/api_key_auth_provider_config.py +22 -7
- nat/authentication/api_key/register.py +1 -1
- nat/authentication/credential_validator/__init__.py +1 -1
- nat/authentication/credential_validator/bearer_token_validator.py +1 -1
- nat/authentication/exceptions/__init__.py +1 -1
- nat/authentication/exceptions/api_key_exceptions.py +1 -1
- nat/authentication/http_basic_auth/http_basic_auth_provider.py +1 -1
- nat/authentication/http_basic_auth/register.py +1 -1
- nat/authentication/interfaces.py +1 -1
- nat/authentication/oauth2/__init__.py +1 -1
- nat/authentication/oauth2/oauth2_auth_code_flow_provider.py +1 -1
- nat/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +1 -1
- nat/authentication/oauth2/oauth2_resource_server_config.py +1 -1
- nat/authentication/oauth2/register.py +1 -1
- nat/authentication/register.py +1 -1
- nat/builder/builder.py +511 -1
- nat/builder/child_builder.py +385 -0
- nat/builder/component_utils.py +28 -4
- nat/builder/context.py +17 -1
- nat/builder/embedder.py +1 -1
- nat/builder/eval_builder.py +19 -7
- nat/builder/evaluator.py +1 -1
- nat/builder/framework_enum.py +2 -1
- nat/builder/front_end.py +1 -1
- nat/builder/function.py +40 -3
- nat/builder/function_base.py +1 -1
- nat/builder/function_info.py +1 -1
- nat/builder/intermediate_step_manager.py +1 -1
- nat/builder/llm.py +1 -1
- nat/builder/per_user_workflow_builder.py +843 -0
- nat/builder/retriever.py +1 -1
- nat/builder/sync_builder.py +571 -0
- nat/builder/user_interaction_manager.py +1 -1
- nat/builder/workflow.py +1 -1
- nat/builder/workflow_builder.py +536 -424
- nat/cli/__init__.py +1 -1
- nat/cli/cli_utils/config_override.py +1 -1
- nat/cli/cli_utils/validation.py +32 -1
- nat/cli/commands/configure/channel/add.py +1 -1
- nat/cli/commands/configure/channel/channel.py +1 -1
- nat/cli/commands/configure/channel/remove.py +1 -1
- nat/cli/commands/configure/channel/update.py +1 -1
- nat/cli/commands/configure/configure.py +1 -1
- nat/cli/commands/evaluate.py +87 -13
- nat/cli/commands/finetune.py +132 -0
- nat/cli/commands/info/__init__.py +1 -1
- nat/cli/commands/info/info.py +1 -1
- nat/cli/commands/info/list_channels.py +1 -1
- nat/cli/commands/info/list_components.py +1 -1
- nat/cli/commands/object_store/__init__.py +1 -1
- nat/cli/commands/object_store/object_store.py +1 -1
- nat/cli/commands/optimize.py +1 -1
- nat/cli/commands/{mcp → red_teaming}/__init__.py +1 -1
- nat/cli/commands/red_teaming/red_teaming.py +138 -0
- nat/cli/commands/red_teaming/red_teaming_utils.py +73 -0
- nat/cli/commands/registry/__init__.py +1 -1
- nat/cli/commands/registry/publish.py +1 -1
- nat/cli/commands/registry/pull.py +1 -1
- nat/cli/commands/registry/registry.py +1 -1
- nat/cli/commands/registry/remove.py +1 -1
- nat/cli/commands/registry/search.py +1 -1
- nat/cli/commands/sizing/__init__.py +1 -1
- nat/cli/commands/sizing/calc.py +1 -1
- nat/cli/commands/sizing/sizing.py +1 -1
- nat/cli/commands/start.py +1 -1
- nat/cli/commands/uninstall.py +1 -1
- nat/cli/commands/validate.py +1 -1
- nat/cli/commands/workflow/__init__.py +1 -1
- nat/cli/commands/workflow/workflow.py +1 -1
- nat/cli/commands/workflow/workflow_commands.py +3 -2
- nat/cli/entrypoint.py +15 -37
- nat/cli/main.py +2 -2
- nat/cli/plugin_loader.py +69 -0
- nat/cli/register_workflow.py +183 -5
- nat/cli/type_registry.py +169 -3
- nat/control_flow/register.py +1 -1
- nat/control_flow/router_agent/agent.py +1 -1
- nat/control_flow/router_agent/prompt.py +1 -1
- nat/control_flow/router_agent/register.py +1 -1
- nat/control_flow/sequential_executor.py +28 -7
- nat/data_models/__init__.py +1 -1
- nat/data_models/agent.py +1 -1
- nat/data_models/api_server.py +38 -3
- nat/data_models/authentication.py +1 -1
- nat/data_models/common.py +1 -1
- nat/data_models/component.py +7 -1
- nat/data_models/component_ref.py +34 -1
- nat/data_models/config.py +62 -1
- nat/data_models/dataset_handler.py +15 -2
- nat/data_models/discovery_metadata.py +1 -1
- nat/data_models/embedder.py +1 -1
- nat/data_models/evaluate.py +6 -1
- nat/data_models/evaluator.py +1 -1
- nat/data_models/finetuning.py +260 -0
- nat/data_models/front_end.py +1 -1
- nat/data_models/function.py +1 -1
- nat/data_models/function_dependencies.py +1 -1
- nat/data_models/gated_field_mixin.py +1 -1
- nat/data_models/interactive.py +1 -1
- nat/data_models/intermediate_step.py +29 -2
- nat/data_models/invocation_node.py +1 -1
- nat/data_models/llm.py +1 -1
- nat/data_models/logging.py +1 -1
- nat/data_models/memory.py +1 -1
- nat/data_models/middleware.py +3 -1
- nat/data_models/object_store.py +1 -1
- nat/data_models/openai_mcp.py +1 -1
- nat/data_models/optimizable.py +1 -1
- nat/data_models/optimizer.py +1 -1
- nat/data_models/profiler.py +1 -1
- nat/data_models/registry_handler.py +1 -1
- nat/data_models/retriever.py +1 -1
- nat/data_models/retry_mixin.py +1 -1
- nat/data_models/runtime_enum.py +1 -1
- nat/data_models/span.py +1 -1
- nat/data_models/step_adaptor.py +1 -1
- nat/data_models/streaming.py +1 -1
- nat/data_models/swe_bench_model.py +1 -1
- nat/data_models/telemetry_exporter.py +1 -1
- nat/data_models/thinking_mixin.py +1 -1
- nat/data_models/ttc_strategy.py +1 -1
- nat/embedder/azure_openai_embedder.py +1 -1
- nat/embedder/nim_embedder.py +1 -1
- nat/embedder/openai_embedder.py +1 -1
- nat/embedder/register.py +1 -1
- nat/eval/__init__.py +1 -1
- nat/eval/config.py +8 -1
- nat/eval/dataset_handler/dataset_downloader.py +1 -1
- nat/eval/dataset_handler/dataset_filter.py +1 -1
- nat/eval/dataset_handler/dataset_handler.py +4 -2
- nat/eval/evaluate.py +217 -80
- nat/eval/evaluator/__init__.py +1 -1
- nat/eval/evaluator/base_evaluator.py +2 -2
- nat/eval/evaluator/evaluator_model.py +3 -2
- nat/eval/intermediate_step_adapter.py +1 -1
- nat/eval/llm_validator.py +336 -0
- nat/eval/rag_evaluator/evaluate.py +17 -10
- nat/eval/rag_evaluator/register.py +1 -1
- nat/eval/red_teaming_evaluator/__init__.py +14 -0
- nat/eval/red_teaming_evaluator/data_models.py +66 -0
- nat/eval/red_teaming_evaluator/evaluate.py +327 -0
- nat/eval/red_teaming_evaluator/filter_conditions.py +75 -0
- nat/eval/red_teaming_evaluator/register.py +55 -0
- nat/eval/register.py +2 -1
- nat/eval/remote_workflow.py +1 -1
- nat/eval/runners/__init__.py +1 -1
- nat/eval/runners/config.py +1 -1
- nat/eval/runners/multi_eval_runner.py +1 -1
- nat/eval/runners/red_teaming_runner/__init__.py +24 -0
- nat/eval/runners/red_teaming_runner/config.py +282 -0
- nat/eval/runners/red_teaming_runner/report_utils.py +707 -0
- nat/eval/runners/red_teaming_runner/runner.py +867 -0
- nat/eval/runtime_evaluator/__init__.py +1 -1
- nat/eval/runtime_evaluator/evaluate.py +1 -1
- nat/eval/runtime_evaluator/register.py +1 -1
- nat/eval/runtime_event_subscriber.py +1 -1
- nat/eval/swe_bench_evaluator/evaluate.py +1 -1
- nat/eval/swe_bench_evaluator/register.py +1 -1
- nat/eval/trajectory_evaluator/evaluate.py +2 -2
- nat/eval/trajectory_evaluator/register.py +1 -1
- nat/eval/tunable_rag_evaluator/evaluate.py +5 -5
- nat/eval/tunable_rag_evaluator/register.py +1 -1
- nat/eval/usage_stats.py +1 -1
- nat/eval/utils/eval_trace_ctx.py +1 -1
- nat/eval/utils/output_uploader.py +1 -1
- nat/eval/utils/tqdm_position_registry.py +1 -1
- nat/eval/utils/weave_eval.py +1 -1
- nat/experimental/decorators/experimental_warning_decorator.py +1 -1
- nat/experimental/test_time_compute/editing/iterative_plan_refinement_editor.py +1 -1
- nat/experimental/test_time_compute/editing/llm_as_a_judge_editor.py +1 -1
- nat/experimental/test_time_compute/editing/motivation_aware_summarization.py +1 -1
- nat/experimental/test_time_compute/functions/execute_score_select_function.py +1 -1
- nat/experimental/test_time_compute/functions/multi_llm_judge_function.py +88 -0
- nat/experimental/test_time_compute/functions/plan_select_execute_function.py +1 -1
- nat/experimental/test_time_compute/functions/ttc_tool_orchestration_function.py +1 -1
- nat/experimental/test_time_compute/functions/ttc_tool_wrapper_function.py +1 -1
- nat/experimental/test_time_compute/models/editor_config.py +1 -1
- nat/experimental/test_time_compute/models/scoring_config.py +1 -1
- nat/experimental/test_time_compute/models/search_config.py +20 -2
- nat/experimental/test_time_compute/models/selection_config.py +33 -2
- nat/experimental/test_time_compute/models/stage_enums.py +1 -1
- nat/experimental/test_time_compute/models/strategy_base.py +1 -1
- nat/experimental/test_time_compute/models/tool_use_config.py +1 -1
- nat/experimental/test_time_compute/models/ttc_item.py +1 -1
- nat/experimental/test_time_compute/register.py +4 -1
- nat/experimental/test_time_compute/scoring/llm_based_agent_scorer.py +1 -1
- nat/experimental/test_time_compute/scoring/llm_based_plan_scorer.py +1 -1
- nat/experimental/test_time_compute/scoring/motivation_aware_scorer.py +1 -1
- nat/experimental/test_time_compute/search/multi_llm_generation.py +115 -0
- nat/experimental/test_time_compute/search/multi_llm_planner.py +1 -1
- nat/experimental/test_time_compute/search/multi_query_retrieval_search.py +1 -1
- nat/experimental/test_time_compute/search/single_shot_multi_plan_planner.py +1 -1
- nat/experimental/test_time_compute/selection/best_of_n_selector.py +1 -1
- nat/experimental/test_time_compute/selection/llm_based_agent_output_selector.py +1 -1
- nat/experimental/test_time_compute/selection/llm_based_output_merging_selector.py +1 -1
- nat/experimental/test_time_compute/selection/llm_based_plan_selector.py +1 -1
- nat/experimental/test_time_compute/selection/llm_judge_selection.py +127 -0
- nat/experimental/test_time_compute/selection/threshold_selector.py +1 -1
- nat/finetuning/__init__.py +24 -0
- nat/finetuning/finetuning_runtime.py +143 -0
- nat/finetuning/interfaces/__init__.py +24 -0
- nat/finetuning/interfaces/finetuning_runner.py +261 -0
- nat/finetuning/interfaces/trainer_adapter.py +103 -0
- nat/finetuning/interfaces/trajectory_builder.py +115 -0
- nat/finetuning/utils/__init__.py +15 -0
- nat/finetuning/utils/parsers/__init__.py +15 -0
- nat/finetuning/utils/parsers/adk_parser.py +141 -0
- nat/finetuning/utils/parsers/base_parser.py +238 -0
- nat/finetuning/utils/parsers/common.py +91 -0
- nat/finetuning/utils/parsers/langchain_parser.py +267 -0
- nat/finetuning/utils/parsers/llama_index_parser.py +218 -0
- nat/front_ends/__init__.py +1 -1
- nat/front_ends/console/__init__.py +1 -1
- nat/front_ends/console/authentication_flow_handler.py +1 -1
- nat/front_ends/console/console_front_end_config.py +4 -1
- nat/front_ends/console/console_front_end_plugin.py +5 -4
- nat/front_ends/console/register.py +1 -1
- nat/front_ends/cron/__init__.py +1 -1
- nat/front_ends/fastapi/__init__.py +1 -1
- nat/front_ends/fastapi/async_job.py +128 -0
- nat/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +1 -1
- nat/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +13 -9
- nat/front_ends/fastapi/dask_client_mixin.py +1 -1
- nat/front_ends/fastapi/fastapi_front_end_config.py +1 -1
- nat/front_ends/fastapi/fastapi_front_end_controller.py +1 -1
- nat/front_ends/fastapi/fastapi_front_end_plugin.py +25 -30
- nat/front_ends/fastapi/fastapi_front_end_plugin_worker.py +195 -60
- nat/front_ends/fastapi/html_snippets/__init__.py +1 -1
- nat/front_ends/fastapi/html_snippets/auth_code_grant_success.py +1 -1
- nat/front_ends/fastapi/intermediate_steps_subscriber.py +12 -1
- nat/front_ends/fastapi/job_store.py +23 -11
- nat/front_ends/fastapi/main.py +1 -1
- nat/front_ends/fastapi/message_handler.py +27 -4
- nat/front_ends/fastapi/message_validator.py +54 -2
- nat/front_ends/fastapi/register.py +1 -1
- nat/front_ends/fastapi/response_helpers.py +16 -15
- nat/front_ends/fastapi/step_adaptor.py +1 -1
- nat/front_ends/fastapi/utils.py +1 -1
- nat/front_ends/register.py +1 -2
- nat/front_ends/simple_base/__init__.py +1 -1
- nat/front_ends/simple_base/simple_front_end_plugin_base.py +6 -4
- nat/llm/aws_bedrock_llm.py +1 -1
- nat/llm/azure_openai_llm.py +10 -1
- nat/llm/dynamo_llm.py +363 -0
- nat/llm/huggingface_llm.py +177 -0
- nat/llm/litellm_llm.py +1 -1
- nat/llm/nim_llm.py +1 -1
- nat/llm/openai_llm.py +1 -1
- nat/llm/register.py +3 -1
- nat/llm/utils/__init__.py +1 -1
- nat/llm/utils/env_config_value.py +1 -1
- nat/llm/utils/error.py +1 -1
- nat/llm/utils/thinking.py +1 -1
- nat/memory/__init__.py +1 -1
- nat/memory/interfaces.py +1 -1
- nat/memory/models.py +1 -1
- nat/meta/pypi.md +1 -1
- nat/middleware/__init__.py +5 -5
- nat/middleware/cache/__init__.py +14 -0
- nat/middleware/{cache_middleware.py → cache/cache_middleware.py} +39 -42
- nat/middleware/cache/cache_middleware_config.py +44 -0
- nat/middleware/cache/register.py +33 -0
- nat/middleware/defense/__init__.py +14 -0
- nat/middleware/defense/defense_middleware.py +362 -0
- nat/middleware/defense/defense_middleware_content_guard.py +455 -0
- nat/middleware/defense/defense_middleware_data_models.py +91 -0
- nat/middleware/defense/defense_middleware_output_verifier.py +440 -0
- nat/middleware/defense/defense_middleware_pii.py +356 -0
- nat/middleware/defense/register.py +82 -0
- nat/middleware/dynamic/__init__.py +14 -0
- nat/middleware/dynamic/dynamic_function_middleware.py +962 -0
- nat/middleware/dynamic/dynamic_middleware_config.py +132 -0
- nat/middleware/dynamic/register.py +34 -0
- nat/middleware/function_middleware.py +236 -52
- nat/middleware/logging/__init__.py +14 -0
- nat/middleware/logging/logging_middleware.py +67 -0
- nat/middleware/logging/logging_middleware_config.py +28 -0
- nat/middleware/logging/register.py +33 -0
- nat/middleware/middleware.py +142 -28
- nat/middleware/red_teaming/__init__.py +14 -0
- nat/middleware/red_teaming/red_teaming_middleware.py +344 -0
- nat/middleware/red_teaming/red_teaming_middleware_config.py +112 -0
- nat/middleware/red_teaming/register.py +47 -0
- nat/middleware/register.py +7 -20
- nat/middleware/utils/__init__.py +14 -0
- nat/middleware/utils/workflow_inventory.py +155 -0
- nat/object_store/__init__.py +1 -1
- nat/object_store/in_memory_object_store.py +1 -1
- nat/object_store/interfaces.py +1 -1
- nat/object_store/models.py +1 -1
- nat/object_store/register.py +1 -1
- nat/observability/__init__.py +1 -1
- nat/observability/exporter/__init__.py +1 -1
- nat/observability/exporter/base_exporter.py +1 -1
- nat/observability/exporter/exporter.py +1 -1
- nat/observability/exporter/file_exporter.py +1 -1
- nat/observability/exporter/processing_exporter.py +1 -1
- nat/observability/exporter/raw_exporter.py +1 -1
- nat/observability/exporter/span_exporter.py +7 -1
- nat/observability/exporter_manager.py +1 -1
- nat/observability/mixin/__init__.py +1 -1
- nat/observability/mixin/batch_config_mixin.py +1 -1
- nat/observability/mixin/collector_config_mixin.py +1 -1
- nat/observability/mixin/file_mixin.py +1 -1
- nat/observability/mixin/file_mode.py +1 -1
- nat/observability/mixin/redaction_config_mixin.py +1 -1
- nat/observability/mixin/resource_conflict_mixin.py +1 -1
- nat/observability/mixin/serialize_mixin.py +1 -1
- nat/observability/mixin/tagging_config_mixin.py +1 -1
- nat/observability/mixin/type_introspection_mixin.py +1 -1
- nat/observability/processor/__init__.py +1 -1
- nat/observability/processor/batching_processor.py +1 -1
- nat/observability/processor/callback_processor.py +1 -1
- nat/observability/processor/falsy_batch_filter_processor.py +1 -1
- nat/observability/processor/intermediate_step_serializer.py +1 -1
- nat/observability/processor/processor.py +1 -1
- nat/observability/processor/processor_factory.py +1 -1
- nat/observability/processor/redaction/__init__.py +1 -1
- nat/observability/processor/redaction/contextual_redaction_processor.py +1 -1
- nat/observability/processor/redaction/contextual_span_redaction_processor.py +1 -1
- nat/observability/processor/redaction/redaction_processor.py +1 -1
- nat/observability/processor/redaction/span_header_redaction_processor.py +1 -1
- nat/observability/processor/span_tagging_processor.py +1 -1
- nat/observability/register.py +1 -1
- nat/observability/utils/__init__.py +1 -1
- nat/observability/utils/dict_utils.py +1 -1
- nat/observability/utils/time_utils.py +1 -1
- nat/profiler/calc/__init__.py +1 -1
- nat/profiler/calc/calc_runner.py +3 -3
- nat/profiler/calc/calculations.py +1 -1
- nat/profiler/calc/data_models.py +1 -1
- nat/profiler/calc/plot.py +30 -3
- nat/profiler/callbacks/agno_callback_handler.py +1 -1
- nat/profiler/callbacks/base_callback_class.py +1 -1
- nat/profiler/callbacks/langchain_callback_handler.py +33 -3
- nat/profiler/callbacks/llama_index_callback_handler.py +13 -10
- nat/profiler/callbacks/semantic_kernel_callback_handler.py +1 -1
- nat/profiler/callbacks/token_usage_base_model.py +1 -1
- nat/profiler/data_frame_row.py +1 -1
- nat/profiler/data_models.py +1 -1
- nat/profiler/decorators/framework_wrapper.py +16 -1
- nat/profiler/decorators/function_tracking.py +1 -1
- nat/profiler/forecasting/config.py +1 -1
- nat/profiler/forecasting/model_trainer.py +1 -1
- nat/profiler/forecasting/models/__init__.py +1 -1
- nat/profiler/forecasting/models/forecasting_base_model.py +1 -1
- nat/profiler/forecasting/models/linear_model.py +1 -1
- nat/profiler/forecasting/models/random_forest_regressor.py +1 -1
- nat/profiler/inference_metrics_model.py +1 -1
- nat/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +1 -1
- nat/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py +1 -1
- nat/profiler/inference_optimization/data_models.py +1 -1
- nat/profiler/inference_optimization/experimental/concurrency_spike_analysis.py +1 -1
- nat/profiler/inference_optimization/experimental/prefix_span_analysis.py +1 -1
- nat/profiler/inference_optimization/llm_metrics.py +1 -1
- nat/profiler/inference_optimization/prompt_caching.py +1 -1
- nat/profiler/inference_optimization/token_uniqueness.py +1 -1
- nat/profiler/inference_optimization/workflow_runtimes.py +1 -1
- nat/profiler/intermediate_property_adapter.py +1 -1
- nat/profiler/parameter_optimization/optimizable_utils.py +1 -1
- nat/profiler/parameter_optimization/optimizer_runtime.py +1 -1
- nat/profiler/parameter_optimization/parameter_optimizer.py +1 -1
- nat/profiler/parameter_optimization/parameter_selection.py +1 -1
- nat/profiler/parameter_optimization/pareto_visualizer.py +1 -1
- nat/profiler/parameter_optimization/prompt_optimizer.py +1 -1
- nat/profiler/parameter_optimization/update_helpers.py +1 -1
- nat/profiler/profile_runner.py +1 -1
- nat/profiler/utils.py +1 -1
- nat/registry_handlers/local/local_handler.py +1 -1
- nat/registry_handlers/local/register_local.py +1 -1
- nat/registry_handlers/metadata_factory.py +1 -1
- nat/registry_handlers/package_utils.py +1 -1
- nat/registry_handlers/pypi/pypi_handler.py +1 -1
- nat/registry_handlers/pypi/register_pypi.py +1 -1
- nat/registry_handlers/register.py +1 -1
- nat/registry_handlers/registry_handler_base.py +1 -1
- nat/registry_handlers/rest/register_rest.py +1 -1
- nat/registry_handlers/rest/rest_handler.py +1 -1
- nat/registry_handlers/schemas/headers.py +1 -1
- nat/registry_handlers/schemas/package.py +1 -1
- nat/registry_handlers/schemas/publish.py +1 -1
- nat/registry_handlers/schemas/pull.py +1 -1
- nat/registry_handlers/schemas/remove.py +1 -1
- nat/registry_handlers/schemas/search.py +1 -1
- nat/registry_handlers/schemas/status.py +1 -1
- nat/retriever/interface.py +1 -1
- nat/retriever/milvus/__init__.py +1 -1
- nat/retriever/milvus/register.py +1 -1
- nat/retriever/milvus/retriever.py +1 -1
- nat/retriever/models.py +1 -1
- nat/retriever/nemo_retriever/__init__.py +1 -1
- nat/retriever/nemo_retriever/register.py +1 -1
- nat/retriever/nemo_retriever/retriever.py +5 -5
- nat/retriever/register.py +1 -1
- nat/runtime/__init__.py +1 -1
- nat/runtime/loader.py +10 -3
- nat/runtime/metrics.py +180 -0
- nat/runtime/runner.py +1 -5
- nat/runtime/session.py +451 -32
- nat/runtime/user_metadata.py +1 -1
- nat/settings/global_settings.py +1 -1
- nat/tool/chat_completion.py +1 -1
- nat/tool/code_execution/README.md +1 -1
- nat/tool/code_execution/code_sandbox.py +1 -1
- nat/tool/code_execution/local_sandbox/Dockerfile.sandbox +1 -1
- nat/tool/code_execution/local_sandbox/__init__.py +1 -1
- nat/tool/code_execution/local_sandbox/local_sandbox_server.py +1 -1
- nat/tool/code_execution/local_sandbox/start_local_sandbox.sh +1 -1
- nat/tool/code_execution/register.py +1 -1
- nat/tool/code_execution/utils.py +1 -1
- nat/tool/datetime_tools.py +1 -1
- nat/tool/document_search.py +1 -1
- nat/tool/github_tools.py +1 -1
- nat/tool/memory_tools/add_memory_tool.py +1 -1
- nat/tool/memory_tools/delete_memory_tool.py +1 -1
- nat/tool/memory_tools/get_memory_tool.py +1 -1
- nat/tool/nvidia_rag.py +2 -2
- nat/tool/register.py +1 -1
- nat/tool/retriever.py +1 -1
- nat/tool/server_tools.py +1 -1
- nat/utils/__init__.py +8 -5
- nat/utils/callable_utils.py +1 -1
- nat/utils/data_models/schema_validator.py +1 -1
- nat/utils/debugging_utils.py +1 -1
- nat/utils/decorators.py +1 -1
- nat/utils/dump_distro_mapping.py +1 -1
- nat/utils/exception_handlers/automatic_retries.py +3 -3
- nat/utils/exception_handlers/schemas.py +1 -1
- nat/utils/io/model_processing.py +1 -1
- nat/utils/io/supress_logs.py +33 -0
- nat/utils/io/yaml_tools.py +1 -1
- nat/utils/log_levels.py +1 -1
- nat/utils/log_utils.py +13 -1
- nat/utils/metadata_utils.py +1 -1
- nat/utils/optional_imports.py +1 -1
- nat/utils/producer_consumer_queue.py +1 -1
- nat/utils/reactive/base/observable_base.py +1 -1
- nat/utils/reactive/base/observer_base.py +1 -1
- nat/utils/reactive/base/subject_base.py +1 -1
- nat/utils/reactive/observable.py +1 -1
- nat/utils/reactive/observer.py +1 -1
- nat/utils/reactive/subject.py +1 -1
- nat/utils/reactive/subscription.py +1 -1
- nat/utils/responses_api.py +1 -1
- nat/utils/settings/global_settings.py +1 -1
- nat/utils/string_utils.py +1 -1
- nat/utils/type_converter.py +18 -5
- nat/utils/type_utils.py +1 -1
- nat/utils/url_utils.py +1 -1
- {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/METADATA +39 -14
- nvidia_nat-1.4.0a20260113.dist-info/RECORD +547 -0
- nvidia_nat-1.4.0a20260113.dist-info/entry_points.txt +38 -0
- nat/cli/commands/mcp/mcp.py +0 -986
- nat/front_ends/mcp/introspection_token_verifier.py +0 -73
- nat/front_ends/mcp/mcp_front_end_config.py +0 -109
- nat/front_ends/mcp/mcp_front_end_plugin.py +0 -155
- nat/front_ends/mcp/mcp_front_end_plugin_worker.py +0 -388
- nat/front_ends/mcp/memory_profiler.py +0 -320
- nat/front_ends/mcp/register.py +0 -27
- nat/front_ends/mcp/tool_converter.py +0 -321
- nvidia_nat-1.4.0a20251120.dist-info/RECORD +0 -488
- nvidia_nat-1.4.0a20251120.dist-info/entry_points.txt +0 -23
- {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/WHEEL +0 -0
- {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
- {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/licenses/LICENSE.md +0 -0
- {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,707 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
"""Report utilities for red teaming evaluation results."""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import logging
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import Any
|
|
22
|
+
|
|
23
|
+
import pandas as pd
|
|
24
|
+
import plotly.graph_objects as go
|
|
25
|
+
import plotly.io as pio
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _validate_columns(df: pd.DataFrame, required_columns: list[str], context: str = "") -> None:
|
|
31
|
+
"""Validate that required columns exist in the DataFrame.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
df: DataFrame to validate.
|
|
35
|
+
required_columns: List of column names that must exist.
|
|
36
|
+
context: Optional context string for error message (e.g., function name).
|
|
37
|
+
|
|
38
|
+
Raises:
|
|
39
|
+
ValueError: If any required column is missing.
|
|
40
|
+
"""
|
|
41
|
+
missing = [col for col in required_columns if col not in df.columns]
|
|
42
|
+
if missing:
|
|
43
|
+
available = list(df.columns)
|
|
44
|
+
ctx = f" in {context}" if context else ""
|
|
45
|
+
raise ValueError(f"Missing required column(s){ctx}: {missing}. Available columns: {available}")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def plot_score_boxplot(
|
|
49
|
+
df: pd.DataFrame,
|
|
50
|
+
x: str,
|
|
51
|
+
y: str = "score",
|
|
52
|
+
title: str | None = None,
|
|
53
|
+
x_label: str | None = None,
|
|
54
|
+
y_label: str = "Risk Score",
|
|
55
|
+
y_range: tuple[float, float] = (-0.05, 1.05), # Start below 0 to show full box when min=0
|
|
56
|
+
box_color: str = "rgb(55, 126, 184)",
|
|
57
|
+
box_fill_opacity: float = 0.3,
|
|
58
|
+
point_color: str = "rgba(255, 50, 0, 0.5)",
|
|
59
|
+
point_size: int = 6,
|
|
60
|
+
jitter: float = 0.3,
|
|
61
|
+
) -> go.Figure:
|
|
62
|
+
"""Create a box plot with data points overlaid inside the boxes.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
df: DataFrame containing the data.
|
|
66
|
+
x: Column name for x-axis (grouping variable).
|
|
67
|
+
y: Column name for y-axis (score values).
|
|
68
|
+
title: Plot title. Defaults to "Score Distribution by {x}".
|
|
69
|
+
x_label: X-axis label. Defaults to the column name.
|
|
70
|
+
y_label: Y-axis label.
|
|
71
|
+
y_range: Tuple of (min, max) for y-axis range.
|
|
72
|
+
box_color: RGB color for box outline.
|
|
73
|
+
box_fill_opacity: Opacity for box fill (0-1).
|
|
74
|
+
point_color: RGBA color for data points.
|
|
75
|
+
point_size: Size of data points.
|
|
76
|
+
jitter: Horizontal jitter for points (0-1).
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
The Plotly Figure object.
|
|
80
|
+
|
|
81
|
+
Raises:
|
|
82
|
+
ValueError: If required columns are missing from the DataFrame.
|
|
83
|
+
"""
|
|
84
|
+
_validate_columns(df, [x, y], "plot_score_boxplot")
|
|
85
|
+
|
|
86
|
+
if title is None:
|
|
87
|
+
title = f"Score Distribution by {x}"
|
|
88
|
+
if x_label is None:
|
|
89
|
+
x_label = x
|
|
90
|
+
|
|
91
|
+
# Parse box_color to create fill color with opacity
|
|
92
|
+
# Parse box_color to create fill color with opacity
|
|
93
|
+
if box_color.startswith("rgb(") and not box_color.startswith("rgba("):
|
|
94
|
+
box_fill_color = box_color.replace("rgb(", "rgba(").replace(")", f", {box_fill_opacity})")
|
|
95
|
+
else:
|
|
96
|
+
box_fill_color = box_color # Use as-is if already rgba or different format
|
|
97
|
+
|
|
98
|
+
# Use go.Box directly for explicit control over data
|
|
99
|
+
fig = go.Figure()
|
|
100
|
+
|
|
101
|
+
# Get unique x values
|
|
102
|
+
unique_x_values = df[x].unique()
|
|
103
|
+
n_categories = len(unique_x_values)
|
|
104
|
+
|
|
105
|
+
# Calculate box width dynamically based on number of categories
|
|
106
|
+
# Wider boxes for fewer categories, narrower for more
|
|
107
|
+
box_width = max(0.2, min(0.67, 1.5 / n_categories))
|
|
108
|
+
half_width = box_width / 2
|
|
109
|
+
means: list[tuple[int, float, str]] = [] # (x_position, mean_value, label)
|
|
110
|
+
|
|
111
|
+
for i, x_val in enumerate(unique_x_values):
|
|
112
|
+
mask = df[x] == x_val
|
|
113
|
+
subset = df.loc[mask]
|
|
114
|
+
y_values = subset[y].tolist() # Explicitly convert to list
|
|
115
|
+
# Use index (uid) for hover text
|
|
116
|
+
hover_text = subset.index.tolist()
|
|
117
|
+
|
|
118
|
+
# Calculate mean for this group
|
|
119
|
+
mean_val = sum(y_values) / len(y_values) if y_values else 0
|
|
120
|
+
means.append((i, mean_val, str(x_val)))
|
|
121
|
+
|
|
122
|
+
fig.add_trace(
|
|
123
|
+
go.Box(
|
|
124
|
+
y=y_values,
|
|
125
|
+
name=str(x_val),
|
|
126
|
+
boxpoints="all",
|
|
127
|
+
pointpos=0,
|
|
128
|
+
jitter=jitter,
|
|
129
|
+
width=box_width,
|
|
130
|
+
marker=dict(color=point_color, size=point_size),
|
|
131
|
+
line=dict(color=box_color),
|
|
132
|
+
fillcolor=box_fill_color,
|
|
133
|
+
text=hover_text,
|
|
134
|
+
hovertemplate="uid: %{text}<br>score: %{y}<extra></extra>",
|
|
135
|
+
))
|
|
136
|
+
|
|
137
|
+
# Add mean lines and annotations for each box
|
|
138
|
+
for x_pos, mean_val, _label in means:
|
|
139
|
+
# Add dotted horizontal line at the mean (spanning exact box width)
|
|
140
|
+
fig.add_shape(
|
|
141
|
+
type="line",
|
|
142
|
+
x0=x_pos - half_width,
|
|
143
|
+
x1=x_pos + half_width,
|
|
144
|
+
y0=mean_val,
|
|
145
|
+
y1=mean_val,
|
|
146
|
+
line=dict(color="red", width=2, dash="dot"),
|
|
147
|
+
)
|
|
148
|
+
# Add mean label annotation right next to the box (at left edge)
|
|
149
|
+
fig.add_annotation(
|
|
150
|
+
x=x_pos - half_width,
|
|
151
|
+
y=mean_val,
|
|
152
|
+
text=f"mean: {mean_val:.2f}",
|
|
153
|
+
showarrow=False,
|
|
154
|
+
font=dict(size=12, color="red", weight="bold"),
|
|
155
|
+
xanchor="right",
|
|
156
|
+
yanchor="middle",
|
|
157
|
+
xshift=-5, # Small gap from the box edge
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
fig.update_layout(
|
|
161
|
+
title=title,
|
|
162
|
+
xaxis_title=x_label,
|
|
163
|
+
yaxis_title=y_label,
|
|
164
|
+
yaxis_range=list(y_range),
|
|
165
|
+
showlegend=False,
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
return fig
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def plot_success_rate_bar(
|
|
172
|
+
df: pd.DataFrame,
|
|
173
|
+
x: str,
|
|
174
|
+
y: str = "score",
|
|
175
|
+
threshold: float = 0.5,
|
|
176
|
+
title: str | None = None,
|
|
177
|
+
x_label: str | None = None,
|
|
178
|
+
y_label: str = "Attack Success Rate (%)",
|
|
179
|
+
bar_color: str = "rgb(55, 126, 184)",
|
|
180
|
+
) -> go.Figure:
|
|
181
|
+
"""Create a bar chart showing attack success rate (percentage of scores above threshold).
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
df: DataFrame containing the data.
|
|
185
|
+
x: Column name for x-axis (grouping variable).
|
|
186
|
+
y: Column name for the score values.
|
|
187
|
+
threshold: Score threshold to consider as "success" (default 0.5).
|
|
188
|
+
title: Plot title. Defaults to "Attack Success Rate by {x}".
|
|
189
|
+
x_label: X-axis label. Defaults to the column name.
|
|
190
|
+
y_label: Y-axis label.
|
|
191
|
+
bar_color: Color for the bars.
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
The Plotly Figure object.
|
|
195
|
+
|
|
196
|
+
Raises:
|
|
197
|
+
ValueError: If required columns are missing from the DataFrame.
|
|
198
|
+
"""
|
|
199
|
+
_validate_columns(df, [x, y], "plot_success_rate_bar")
|
|
200
|
+
|
|
201
|
+
if title is None:
|
|
202
|
+
title = f"Attack Success Rate by {x} (threshold={threshold})"
|
|
203
|
+
if x_label is None:
|
|
204
|
+
x_label = x
|
|
205
|
+
|
|
206
|
+
# Calculate success rate for each group
|
|
207
|
+
unique_x_values = df[x].unique()
|
|
208
|
+
x_labels = []
|
|
209
|
+
success_rates = []
|
|
210
|
+
counts = []
|
|
211
|
+
|
|
212
|
+
for x_val in unique_x_values:
|
|
213
|
+
mask = df[x] == x_val
|
|
214
|
+
subset = df.loc[mask]
|
|
215
|
+
total = len(subset)
|
|
216
|
+
successes = (subset[y] > threshold).sum()
|
|
217
|
+
rate = (successes / total * 100) if total > 0 else 0
|
|
218
|
+
|
|
219
|
+
x_labels.append(str(x_val))
|
|
220
|
+
success_rates.append(rate)
|
|
221
|
+
counts.append(f"{successes}/{total}")
|
|
222
|
+
|
|
223
|
+
fig = go.Figure()
|
|
224
|
+
|
|
225
|
+
fig.add_trace(
|
|
226
|
+
go.Bar(
|
|
227
|
+
x=x_labels,
|
|
228
|
+
y=success_rates,
|
|
229
|
+
text=[f"{rate:.1f}%<br>({count})" for rate, count in zip(success_rates, counts, strict=True)],
|
|
230
|
+
textposition="outside",
|
|
231
|
+
marker_color=bar_color,
|
|
232
|
+
hovertemplate="%{x}<br>Attack Success Rate: %{y:.1f}%<br>Count: %{text}<extra></extra>",
|
|
233
|
+
))
|
|
234
|
+
|
|
235
|
+
fig.update_layout(
|
|
236
|
+
title=title,
|
|
237
|
+
xaxis_title=x_label,
|
|
238
|
+
yaxis_title=y_label,
|
|
239
|
+
yaxis_range=[-10, 125], # Leave room for labels above bars
|
|
240
|
+
showlegend=False,
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
return fig
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def generate_standard_plots(df: pd.DataFrame) -> list[tuple[str, str, go.Figure | None]]:
|
|
247
|
+
"""Generate standard plots for red teaming results, grouped by category.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
df: DataFrame with columns: scenario_id, condition_name, tags, scenario_group, score.
|
|
251
|
+
|
|
252
|
+
Returns:
|
|
253
|
+
List of tuples (filename, title, figure) for each plot.
|
|
254
|
+
Section headers have figure=None and are rendered as section titles.
|
|
255
|
+
|
|
256
|
+
Raises:
|
|
257
|
+
ValueError: If required columns are missing from the DataFrame.
|
|
258
|
+
"""
|
|
259
|
+
# Validate required columns upfront
|
|
260
|
+
_validate_columns(df, ["scenario_id", "score", "condition_name"], "generate_standard_plots")
|
|
261
|
+
|
|
262
|
+
plots: list[tuple[str, str, go.Figure | None]] = []
|
|
263
|
+
|
|
264
|
+
# ==================== RESULTS BY SCENARIO ID ====================
|
|
265
|
+
plots.append(("_section", "Results by group: Scenario ID", None))
|
|
266
|
+
|
|
267
|
+
fig_scenario = plot_score_boxplot(
|
|
268
|
+
df,
|
|
269
|
+
x="scenario_id",
|
|
270
|
+
y="score",
|
|
271
|
+
title="Score Distribution by Scenario",
|
|
272
|
+
x_label="Scenario",
|
|
273
|
+
)
|
|
274
|
+
plots.append(("scenario_id_boxplot", "Score Distribution", fig_scenario))
|
|
275
|
+
|
|
276
|
+
fig_scenario_bar = plot_success_rate_bar(
|
|
277
|
+
df,
|
|
278
|
+
x="scenario_id",
|
|
279
|
+
y="score",
|
|
280
|
+
title="Attack Success Rate by Scenario",
|
|
281
|
+
x_label="Scenario",
|
|
282
|
+
)
|
|
283
|
+
plots.append(("scenario_id_success_rate", "Attack Success Rate", fig_scenario_bar))
|
|
284
|
+
|
|
285
|
+
# ==================== RESULTS BY SCENARIO GROUP ====================
|
|
286
|
+
if "scenario_group" in df.columns:
|
|
287
|
+
plots.append(("_section", "Results by group: Scenario Group", None))
|
|
288
|
+
|
|
289
|
+
fig_group = plot_score_boxplot(
|
|
290
|
+
df,
|
|
291
|
+
x="scenario_group",
|
|
292
|
+
y="score",
|
|
293
|
+
title="Risk Score Distribution by Scenario Group",
|
|
294
|
+
x_label="Scenario Group",
|
|
295
|
+
)
|
|
296
|
+
plots.append(("scenario_group_boxplot", "Score Distribution", fig_group))
|
|
297
|
+
|
|
298
|
+
fig_group_bar = plot_success_rate_bar(
|
|
299
|
+
df,
|
|
300
|
+
x="scenario_group",
|
|
301
|
+
y="score",
|
|
302
|
+
title="Attack Success Rate by Scenario Group",
|
|
303
|
+
x_label="Scenario Group",
|
|
304
|
+
)
|
|
305
|
+
plots.append(("scenario_group_success_rate", "Attack Success Rate", fig_group_bar))
|
|
306
|
+
|
|
307
|
+
# ==================== RESULTS BY CONDITION ====================
|
|
308
|
+
plots.append(("_section", "Results by group: Output Filtering Condition", None))
|
|
309
|
+
|
|
310
|
+
fig_condition = plot_score_boxplot(
|
|
311
|
+
df,
|
|
312
|
+
x="condition_name",
|
|
313
|
+
y="score",
|
|
314
|
+
title="Score Distribution by Output Filtering Condition",
|
|
315
|
+
x_label="Condition",
|
|
316
|
+
)
|
|
317
|
+
plots.append(("condition_name_boxplot", "Score Distribution", fig_condition))
|
|
318
|
+
|
|
319
|
+
fig_condition_bar = plot_success_rate_bar(
|
|
320
|
+
df,
|
|
321
|
+
x="condition_name",
|
|
322
|
+
y="score",
|
|
323
|
+
title="Attack Success Rate by Output Filtering Condition",
|
|
324
|
+
x_label="Condition",
|
|
325
|
+
)
|
|
326
|
+
plots.append(("condition_name_success_rate", "Attack Success Rate", fig_condition_bar))
|
|
327
|
+
|
|
328
|
+
# ==================== RESULTS BY TAGS ====================
|
|
329
|
+
if "tags" in df.columns:
|
|
330
|
+
df_tags = df.explode("tags")
|
|
331
|
+
df_tags = df_tags.dropna(subset=["tags"])
|
|
332
|
+
if not df_tags.empty:
|
|
333
|
+
plots.append(("_section", "Results by group: Tags", None))
|
|
334
|
+
|
|
335
|
+
fig_tags = plot_score_boxplot(
|
|
336
|
+
df_tags,
|
|
337
|
+
x="tags",
|
|
338
|
+
y="score",
|
|
339
|
+
title="Risk Score Distribution by Tag",
|
|
340
|
+
x_label="Tag",
|
|
341
|
+
)
|
|
342
|
+
plots.append(("tags_boxplot", "Score Distribution", fig_tags))
|
|
343
|
+
|
|
344
|
+
fig_tags_bar = plot_success_rate_bar(
|
|
345
|
+
df_tags,
|
|
346
|
+
x="tags",
|
|
347
|
+
y="score",
|
|
348
|
+
title="Attack Success Rate by Tag",
|
|
349
|
+
x_label="Tag",
|
|
350
|
+
)
|
|
351
|
+
plots.append(("tags_success_rate", "Attack Success Rate", fig_tags_bar))
|
|
352
|
+
|
|
353
|
+
return plots
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
def _get_risk_color(value: float, max_value: float = 1.0) -> str:
|
|
357
|
+
"""Get a color that transitions from low to high risk based on value.
|
|
358
|
+
|
|
359
|
+
The color transitions and opacity increases as risk increases:
|
|
360
|
+
- Opacity: 0.3 (at 0) -> 1.0 (at max)
|
|
361
|
+
- Color: muted -> intense red
|
|
362
|
+
|
|
363
|
+
Args:
|
|
364
|
+
value: The risk value (0 to max_value).
|
|
365
|
+
max_value: The maximum value (1.0 for scores, 100.0 for percentages).
|
|
366
|
+
|
|
367
|
+
Returns:
|
|
368
|
+
RGBA color string.
|
|
369
|
+
"""
|
|
370
|
+
# Normalize to 0-1 range
|
|
371
|
+
normalized = min(max(value / max_value, 0.0), 1.0)
|
|
372
|
+
|
|
373
|
+
# Interpolate color
|
|
374
|
+
normalized = normalized if normalized >= 0.5 else normalized**2
|
|
375
|
+
r = int(30 + (255 - 30) * normalized)
|
|
376
|
+
g = int(10 + (0 - 10) * normalized)
|
|
377
|
+
b = int(10 + (0 - 10) * normalized)
|
|
378
|
+
|
|
379
|
+
# Interpolate opacity from 0.5 to 1.0
|
|
380
|
+
opacity = 0.3 + 0.7 * normalized
|
|
381
|
+
|
|
382
|
+
return f"rgba({r}, {g}, {b}, {opacity})"
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
def _render_summary_html(summary: dict[str, Any] | None) -> str:
|
|
386
|
+
"""Render the summary section as HTML.
|
|
387
|
+
|
|
388
|
+
Args:
|
|
389
|
+
summary: The summary dictionary from _compute_result_summary.
|
|
390
|
+
|
|
391
|
+
Returns:
|
|
392
|
+
HTML string for the summary section.
|
|
393
|
+
"""
|
|
394
|
+
if not summary:
|
|
395
|
+
return ""
|
|
396
|
+
|
|
397
|
+
overall_score = summary.get("overall_score", 0.0)
|
|
398
|
+
attack_success_rate = summary.get("attack_success_rate", 0.0)
|
|
399
|
+
num_scenarios = summary.get("num_scenarios", 0)
|
|
400
|
+
total_workflow_runs = summary.get("total_workflow_runs", 0)
|
|
401
|
+
total_evaluations = summary.get("total_evaluations", 0)
|
|
402
|
+
evaluation_successes = summary.get("evaluation_successes", 0)
|
|
403
|
+
evaluation_failures = summary.get("evaluation_failures", 0)
|
|
404
|
+
per_scenario = summary.get("per_scenario_summary", {})
|
|
405
|
+
|
|
406
|
+
# Get dynamic colors based on risk values
|
|
407
|
+
score_color = _get_risk_color(overall_score, 1.0)
|
|
408
|
+
asr_color = _get_risk_color(attack_success_rate, 1.0)
|
|
409
|
+
|
|
410
|
+
# Build per-scenario rows with ASR as first data column
|
|
411
|
+
scenario_rows = ""
|
|
412
|
+
for scenario_id, stats in per_scenario.items():
|
|
413
|
+
scenario_asr = stats.get("attack_success_rate", 0.0)
|
|
414
|
+
mean_score = stats.get("mean_score", 0.0)
|
|
415
|
+
min_score = stats.get("min_score", 0.0)
|
|
416
|
+
max_score = stats.get("max_score", 0.0)
|
|
417
|
+
row_asr_color = _get_risk_color(scenario_asr, 1.0)
|
|
418
|
+
scenario_rows += f"""
|
|
419
|
+
<tr>
|
|
420
|
+
<td>{scenario_id}</td>
|
|
421
|
+
<td style="background-color: {row_asr_color}; color: white; font-weight: bold;">{scenario_asr:.1%}</td>
|
|
422
|
+
<td>{mean_score:.3f}</td>
|
|
423
|
+
<td>{min_score:.3f}</td>
|
|
424
|
+
<td>{max_score:.3f}</td>
|
|
425
|
+
</tr>"""
|
|
426
|
+
|
|
427
|
+
return f"""
|
|
428
|
+
<div class="summary-section">
|
|
429
|
+
<h2 class="section-header">Summary</h2>
|
|
430
|
+
<div class="summary-container">
|
|
431
|
+
<div class="summary-stats">
|
|
432
|
+
<div class="stat-card risk-score" style="background-color: {score_color}; border: none;">
|
|
433
|
+
<div class="stat-label" style="color: rgba(255,255,255,0.9);">Overall Risk Score ↓</div>
|
|
434
|
+
<div class="stat-value" style="color: white;">{overall_score:.3f}</div>
|
|
435
|
+
</div>
|
|
436
|
+
<div class="stat-card risk-score" style="background-color: {asr_color}; border: none;">
|
|
437
|
+
<div class="stat-label" style="color: rgba(255,255,255,0.9);">Attack Success Rate ↓</div>
|
|
438
|
+
<div class="stat-value" style="color: white;">{attack_success_rate:.1%}</div>
|
|
439
|
+
</div>
|
|
440
|
+
<div class="stat-card">
|
|
441
|
+
<div class="stat-label">Scenarios</div>
|
|
442
|
+
<div class="stat-value">{num_scenarios}</div>
|
|
443
|
+
</div>
|
|
444
|
+
<div class="stat-card">
|
|
445
|
+
<div class="stat-label">Workflow Runs</div>
|
|
446
|
+
<div class="stat-value">{total_workflow_runs}</div>
|
|
447
|
+
</div>
|
|
448
|
+
<div class="stat-card">
|
|
449
|
+
<div class="stat-label">Total Evaluations</div>
|
|
450
|
+
<div class="stat-value">{total_evaluations}</div>
|
|
451
|
+
</div>
|
|
452
|
+
<div class="stat-card success">
|
|
453
|
+
<div class="stat-label">Successes</div>
|
|
454
|
+
<div class="stat-value">{evaluation_successes}</div>
|
|
455
|
+
</div>
|
|
456
|
+
<div class="stat-card failure">
|
|
457
|
+
<div class="stat-label">Failures</div>
|
|
458
|
+
<div class="stat-value">{evaluation_failures}</div>
|
|
459
|
+
</div>
|
|
460
|
+
</div>
|
|
461
|
+
<h3 class="plot-title">Per-Scenario Results</h3>
|
|
462
|
+
<table class="scenario-table">
|
|
463
|
+
<thead>
|
|
464
|
+
<tr>
|
|
465
|
+
<th>Scenario</th>
|
|
466
|
+
<th>ASR</th>
|
|
467
|
+
<th>Mean Score</th>
|
|
468
|
+
<th>Min Score</th>
|
|
469
|
+
<th>Max Score</th>
|
|
470
|
+
</tr>
|
|
471
|
+
</thead>
|
|
472
|
+
<tbody>
|
|
473
|
+
{scenario_rows}
|
|
474
|
+
</tbody>
|
|
475
|
+
</table>
|
|
476
|
+
</div>
|
|
477
|
+
</div>
|
|
478
|
+
"""
|
|
479
|
+
|
|
480
|
+
|
|
481
|
+
def save_combined_html(
|
|
482
|
+
plots: list[tuple[str, str, go.Figure | None]],
|
|
483
|
+
output_path: Path,
|
|
484
|
+
page_title: str = "Red Teaming Evaluation Results",
|
|
485
|
+
summary: dict[str, Any] | None = None,
|
|
486
|
+
) -> Path:
|
|
487
|
+
"""Save all plots in a single interactive HTML document.
|
|
488
|
+
|
|
489
|
+
Args:
|
|
490
|
+
plots: List of (filename, title, figure) tuples.
|
|
491
|
+
output_path: Path for the combined HTML file.
|
|
492
|
+
page_title: Title for the HTML page.
|
|
493
|
+
summary: Optional summary dictionary to display at the top of the report.
|
|
494
|
+
|
|
495
|
+
Returns:
|
|
496
|
+
Path to the saved HTML file.
|
|
497
|
+
"""
|
|
498
|
+
html_parts: list[str] = []
|
|
499
|
+
|
|
500
|
+
# HTML header with styling
|
|
501
|
+
html_parts.append(f"""<!DOCTYPE html>
|
|
502
|
+
<html>
|
|
503
|
+
<head>
|
|
504
|
+
<meta charset="utf-8">
|
|
505
|
+
<title>{page_title}</title>
|
|
506
|
+
<script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
|
|
507
|
+
<style>
|
|
508
|
+
body {{
|
|
509
|
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, sans-serif;
|
|
510
|
+
max-width: 1200px;
|
|
511
|
+
margin: 0 auto;
|
|
512
|
+
padding: 20px;
|
|
513
|
+
background-color: #f5f5f5;
|
|
514
|
+
}}
|
|
515
|
+
h1 {{
|
|
516
|
+
color: #333;
|
|
517
|
+
border-bottom: 2px solid #76b900;
|
|
518
|
+
padding-bottom: 10px;
|
|
519
|
+
}}
|
|
520
|
+
h2.section-header {{
|
|
521
|
+
color: #333;
|
|
522
|
+
margin-top: 50px;
|
|
523
|
+
padding: 15px 20px;
|
|
524
|
+
background-color: #76b900;
|
|
525
|
+
color: white;
|
|
526
|
+
border-radius: 8px;
|
|
527
|
+
font-size: 1.4em;
|
|
528
|
+
}}
|
|
529
|
+
h3.plot-title {{
|
|
530
|
+
color: #555;
|
|
531
|
+
margin-top: 20px;
|
|
532
|
+
margin-bottom: 10px;
|
|
533
|
+
font-size: 1.1em;
|
|
534
|
+
}}
|
|
535
|
+
.plot-container {{
|
|
536
|
+
background-color: white;
|
|
537
|
+
border-radius: 8px;
|
|
538
|
+
padding: 20px;
|
|
539
|
+
margin-bottom: 30px;
|
|
540
|
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
|
541
|
+
}}
|
|
542
|
+
.summary-container {{
|
|
543
|
+
background-color: white;
|
|
544
|
+
border-radius: 8px;
|
|
545
|
+
padding: 20px;
|
|
546
|
+
margin-bottom: 30px;
|
|
547
|
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
|
548
|
+
}}
|
|
549
|
+
.summary-stats {{
|
|
550
|
+
display: flex;
|
|
551
|
+
flex-wrap: nowrap;
|
|
552
|
+
gap: 10px;
|
|
553
|
+
margin-bottom: 20px;
|
|
554
|
+
}}
|
|
555
|
+
.stat-card {{
|
|
556
|
+
background-color: #f8f9fa;
|
|
557
|
+
border-radius: 6px;
|
|
558
|
+
padding: 10px 12px;
|
|
559
|
+
min-width: 90px;
|
|
560
|
+
flex: 1;
|
|
561
|
+
text-align: center;
|
|
562
|
+
border: 1px solid #e9ecef;
|
|
563
|
+
}}
|
|
564
|
+
.stat-card.risk-score {{
|
|
565
|
+
color: white;
|
|
566
|
+
border: none;
|
|
567
|
+
}}
|
|
568
|
+
.stat-card.risk-score .stat-label {{
|
|
569
|
+
color: rgba(255,255,255,0.9);
|
|
570
|
+
}}
|
|
571
|
+
.stat-card.success {{
|
|
572
|
+
border-left: 4px solid #28a745;
|
|
573
|
+
}}
|
|
574
|
+
.stat-card.failure {{
|
|
575
|
+
border-left: 4px solid #dc3545;
|
|
576
|
+
}}
|
|
577
|
+
.stat-label {{
|
|
578
|
+
font-size: 0.75em;
|
|
579
|
+
color: #666;
|
|
580
|
+
margin-bottom: 3px;
|
|
581
|
+
}}
|
|
582
|
+
.stat-value {{
|
|
583
|
+
font-size: 1.2em;
|
|
584
|
+
font-weight: bold;
|
|
585
|
+
}}
|
|
586
|
+
.scenario-table {{
|
|
587
|
+
width: 100%;
|
|
588
|
+
border-collapse: collapse;
|
|
589
|
+
margin-top: 10px;
|
|
590
|
+
}}
|
|
591
|
+
.scenario-table th,
|
|
592
|
+
.scenario-table td {{
|
|
593
|
+
padding: 10px 15px;
|
|
594
|
+
text-align: left;
|
|
595
|
+
border-bottom: 1px solid #e9ecef;
|
|
596
|
+
}}
|
|
597
|
+
.scenario-table th {{
|
|
598
|
+
background-color: #f8f9fa;
|
|
599
|
+
font-weight: 600;
|
|
600
|
+
color: #333;
|
|
601
|
+
}}
|
|
602
|
+
.scenario-table tr:hover {{
|
|
603
|
+
background-color: #f8f9fa;
|
|
604
|
+
}}
|
|
605
|
+
</style>
|
|
606
|
+
</head>
|
|
607
|
+
<body>
|
|
608
|
+
<h1>{page_title}</h1>
|
|
609
|
+
""")
|
|
610
|
+
|
|
611
|
+
# Add summary section at the top
|
|
612
|
+
html_parts.append(_render_summary_html(summary))
|
|
613
|
+
|
|
614
|
+
# Add each plot with its title (or section header)
|
|
615
|
+
for _filename, title, fig in plots:
|
|
616
|
+
if fig is None:
|
|
617
|
+
# This is a section header
|
|
618
|
+
html_parts.append(f"""
|
|
619
|
+
<h2 class="section-header">{title}</h2>
|
|
620
|
+
""")
|
|
621
|
+
else:
|
|
622
|
+
# This is a regular plot
|
|
623
|
+
plot_html = pio.to_html(fig, full_html=False, include_plotlyjs=False)
|
|
624
|
+
html_parts.append(f"""
|
|
625
|
+
<h3 class="plot-title">{title}</h3>
|
|
626
|
+
<div class="plot-container">
|
|
627
|
+
{plot_html}
|
|
628
|
+
</div>
|
|
629
|
+
""")
|
|
630
|
+
|
|
631
|
+
# HTML footer
|
|
632
|
+
html_parts.append("""
|
|
633
|
+
</body>
|
|
634
|
+
</html>
|
|
635
|
+
""")
|
|
636
|
+
|
|
637
|
+
# Write combined HTML
|
|
638
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
639
|
+
output_path.write_text("".join(html_parts), encoding="utf-8")
|
|
640
|
+
logger.debug("Saved combined HTML: %s", output_path)
|
|
641
|
+
|
|
642
|
+
return output_path
|
|
643
|
+
|
|
644
|
+
|
|
645
|
+
def generate_and_save_report(
|
|
646
|
+
flat_results: list[dict[str, Any]] | pd.DataFrame,
|
|
647
|
+
output_dir: Path,
|
|
648
|
+
summary: dict[str, Any] | None = None,
|
|
649
|
+
) -> Path | None:
|
|
650
|
+
"""Generate and save all plots from flat results.
|
|
651
|
+
|
|
652
|
+
This is the main entry point for plotting. It:
|
|
653
|
+
1. Converts flat results to a DataFrame
|
|
654
|
+
2. Generates standard plots (by scenario, group, condition, tags)
|
|
655
|
+
3. Saves a combined HTML report with all plots and summary
|
|
656
|
+
|
|
657
|
+
Args:
|
|
658
|
+
flat_results: List of flat result dictionaries from _build_flat_results.
|
|
659
|
+
output_dir: Base output directory. Plots are saved in a 'graphs' subfolder.
|
|
660
|
+
summary: Optional summary dictionary to display at the top of the report.
|
|
661
|
+
|
|
662
|
+
Returns:
|
|
663
|
+
Path to the combined HTML report.
|
|
664
|
+
"""
|
|
665
|
+
report_path = output_dir / "report.html"
|
|
666
|
+
is_df_empty = isinstance(flat_results, pd.DataFrame) and flat_results.empty
|
|
667
|
+
if is_df_empty or (isinstance(flat_results, list) and not flat_results):
|
|
668
|
+
logger.warning("No results to plot")
|
|
669
|
+
return None
|
|
670
|
+
|
|
671
|
+
# Convert to DataFrame
|
|
672
|
+
if isinstance(flat_results, pd.DataFrame):
|
|
673
|
+
df = flat_results
|
|
674
|
+
else:
|
|
675
|
+
df = pd.DataFrame(flat_results)
|
|
676
|
+
|
|
677
|
+
# Drop rows with error_message (failed evaluations)
|
|
678
|
+
if "error_message" in df.columns:
|
|
679
|
+
error_count = int(df["error_message"].notna().sum())
|
|
680
|
+
if error_count > 0:
|
|
681
|
+
logger.info("Dropping %d rows with error_message from plotting", error_count)
|
|
682
|
+
df = df[df["error_message"].isna()]
|
|
683
|
+
|
|
684
|
+
if df.empty:
|
|
685
|
+
logger.warning("No valid results to plot after filtering errors")
|
|
686
|
+
return None
|
|
687
|
+
|
|
688
|
+
# Set uid as index for hover text identification
|
|
689
|
+
if "uid" in df.columns:
|
|
690
|
+
df = df.set_index("uid")
|
|
691
|
+
|
|
692
|
+
# Generate plots
|
|
693
|
+
plots = generate_standard_plots(df)
|
|
694
|
+
|
|
695
|
+
if not plots:
|
|
696
|
+
logger.warning("No plots generated")
|
|
697
|
+
return None
|
|
698
|
+
|
|
699
|
+
# Save combined HTML report
|
|
700
|
+
report_path = save_combined_html(
|
|
701
|
+
plots,
|
|
702
|
+
report_path,
|
|
703
|
+
page_title=f"Red Teaming Evaluation Results for run: {output_dir.name}",
|
|
704
|
+
summary=summary,
|
|
705
|
+
)
|
|
706
|
+
|
|
707
|
+
return report_path
|