nvidia-nat 1.4.0a20251120__py3-none-any.whl → 1.4.0a20260113__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (492) hide show
  1. aiq/__init__.py +1 -1
  2. nat/{front_ends/mcp → agent/auto_memory_wrapper}/__init__.py +1 -1
  3. nat/agent/auto_memory_wrapper/agent.py +278 -0
  4. nat/agent/auto_memory_wrapper/register.py +227 -0
  5. nat/agent/auto_memory_wrapper/state.py +30 -0
  6. nat/agent/base.py +1 -1
  7. nat/agent/dual_node.py +1 -1
  8. nat/agent/prompt_optimizer/prompt.py +1 -1
  9. nat/agent/prompt_optimizer/register.py +1 -1
  10. nat/agent/react_agent/agent.py +16 -9
  11. nat/agent/react_agent/output_parser.py +2 -2
  12. nat/agent/react_agent/prompt.py +3 -2
  13. nat/agent/react_agent/register.py +2 -2
  14. nat/agent/react_agent/register_per_user_agent.py +104 -0
  15. nat/agent/reasoning_agent/reasoning_agent.py +1 -1
  16. nat/agent/register.py +3 -1
  17. nat/agent/responses_api_agent/__init__.py +1 -1
  18. nat/agent/responses_api_agent/register.py +1 -1
  19. nat/agent/rewoo_agent/agent.py +9 -4
  20. nat/agent/rewoo_agent/prompt.py +1 -1
  21. nat/agent/rewoo_agent/register.py +1 -1
  22. nat/agent/tool_calling_agent/agent.py +5 -4
  23. nat/agent/tool_calling_agent/register.py +1 -1
  24. nat/authentication/__init__.py +1 -1
  25. nat/authentication/api_key/__init__.py +1 -1
  26. nat/authentication/api_key/api_key_auth_provider.py +1 -1
  27. nat/authentication/api_key/api_key_auth_provider_config.py +22 -7
  28. nat/authentication/api_key/register.py +1 -1
  29. nat/authentication/credential_validator/__init__.py +1 -1
  30. nat/authentication/credential_validator/bearer_token_validator.py +1 -1
  31. nat/authentication/exceptions/__init__.py +1 -1
  32. nat/authentication/exceptions/api_key_exceptions.py +1 -1
  33. nat/authentication/http_basic_auth/http_basic_auth_provider.py +1 -1
  34. nat/authentication/http_basic_auth/register.py +1 -1
  35. nat/authentication/interfaces.py +1 -1
  36. nat/authentication/oauth2/__init__.py +1 -1
  37. nat/authentication/oauth2/oauth2_auth_code_flow_provider.py +1 -1
  38. nat/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +1 -1
  39. nat/authentication/oauth2/oauth2_resource_server_config.py +1 -1
  40. nat/authentication/oauth2/register.py +1 -1
  41. nat/authentication/register.py +1 -1
  42. nat/builder/builder.py +511 -1
  43. nat/builder/child_builder.py +385 -0
  44. nat/builder/component_utils.py +28 -4
  45. nat/builder/context.py +17 -1
  46. nat/builder/embedder.py +1 -1
  47. nat/builder/eval_builder.py +19 -7
  48. nat/builder/evaluator.py +1 -1
  49. nat/builder/framework_enum.py +2 -1
  50. nat/builder/front_end.py +1 -1
  51. nat/builder/function.py +40 -3
  52. nat/builder/function_base.py +1 -1
  53. nat/builder/function_info.py +1 -1
  54. nat/builder/intermediate_step_manager.py +1 -1
  55. nat/builder/llm.py +1 -1
  56. nat/builder/per_user_workflow_builder.py +843 -0
  57. nat/builder/retriever.py +1 -1
  58. nat/builder/sync_builder.py +571 -0
  59. nat/builder/user_interaction_manager.py +1 -1
  60. nat/builder/workflow.py +1 -1
  61. nat/builder/workflow_builder.py +536 -424
  62. nat/cli/__init__.py +1 -1
  63. nat/cli/cli_utils/config_override.py +1 -1
  64. nat/cli/cli_utils/validation.py +32 -1
  65. nat/cli/commands/configure/channel/add.py +1 -1
  66. nat/cli/commands/configure/channel/channel.py +1 -1
  67. nat/cli/commands/configure/channel/remove.py +1 -1
  68. nat/cli/commands/configure/channel/update.py +1 -1
  69. nat/cli/commands/configure/configure.py +1 -1
  70. nat/cli/commands/evaluate.py +87 -13
  71. nat/cli/commands/finetune.py +132 -0
  72. nat/cli/commands/info/__init__.py +1 -1
  73. nat/cli/commands/info/info.py +1 -1
  74. nat/cli/commands/info/list_channels.py +1 -1
  75. nat/cli/commands/info/list_components.py +1 -1
  76. nat/cli/commands/object_store/__init__.py +1 -1
  77. nat/cli/commands/object_store/object_store.py +1 -1
  78. nat/cli/commands/optimize.py +1 -1
  79. nat/cli/commands/{mcp → red_teaming}/__init__.py +1 -1
  80. nat/cli/commands/red_teaming/red_teaming.py +138 -0
  81. nat/cli/commands/red_teaming/red_teaming_utils.py +73 -0
  82. nat/cli/commands/registry/__init__.py +1 -1
  83. nat/cli/commands/registry/publish.py +1 -1
  84. nat/cli/commands/registry/pull.py +1 -1
  85. nat/cli/commands/registry/registry.py +1 -1
  86. nat/cli/commands/registry/remove.py +1 -1
  87. nat/cli/commands/registry/search.py +1 -1
  88. nat/cli/commands/sizing/__init__.py +1 -1
  89. nat/cli/commands/sizing/calc.py +1 -1
  90. nat/cli/commands/sizing/sizing.py +1 -1
  91. nat/cli/commands/start.py +1 -1
  92. nat/cli/commands/uninstall.py +1 -1
  93. nat/cli/commands/validate.py +1 -1
  94. nat/cli/commands/workflow/__init__.py +1 -1
  95. nat/cli/commands/workflow/workflow.py +1 -1
  96. nat/cli/commands/workflow/workflow_commands.py +3 -2
  97. nat/cli/entrypoint.py +15 -37
  98. nat/cli/main.py +2 -2
  99. nat/cli/plugin_loader.py +69 -0
  100. nat/cli/register_workflow.py +183 -5
  101. nat/cli/type_registry.py +169 -3
  102. nat/control_flow/register.py +1 -1
  103. nat/control_flow/router_agent/agent.py +1 -1
  104. nat/control_flow/router_agent/prompt.py +1 -1
  105. nat/control_flow/router_agent/register.py +1 -1
  106. nat/control_flow/sequential_executor.py +28 -7
  107. nat/data_models/__init__.py +1 -1
  108. nat/data_models/agent.py +1 -1
  109. nat/data_models/api_server.py +38 -3
  110. nat/data_models/authentication.py +1 -1
  111. nat/data_models/common.py +1 -1
  112. nat/data_models/component.py +7 -1
  113. nat/data_models/component_ref.py +34 -1
  114. nat/data_models/config.py +62 -1
  115. nat/data_models/dataset_handler.py +15 -2
  116. nat/data_models/discovery_metadata.py +1 -1
  117. nat/data_models/embedder.py +1 -1
  118. nat/data_models/evaluate.py +6 -1
  119. nat/data_models/evaluator.py +1 -1
  120. nat/data_models/finetuning.py +260 -0
  121. nat/data_models/front_end.py +1 -1
  122. nat/data_models/function.py +1 -1
  123. nat/data_models/function_dependencies.py +1 -1
  124. nat/data_models/gated_field_mixin.py +1 -1
  125. nat/data_models/interactive.py +1 -1
  126. nat/data_models/intermediate_step.py +29 -2
  127. nat/data_models/invocation_node.py +1 -1
  128. nat/data_models/llm.py +1 -1
  129. nat/data_models/logging.py +1 -1
  130. nat/data_models/memory.py +1 -1
  131. nat/data_models/middleware.py +3 -1
  132. nat/data_models/object_store.py +1 -1
  133. nat/data_models/openai_mcp.py +1 -1
  134. nat/data_models/optimizable.py +1 -1
  135. nat/data_models/optimizer.py +1 -1
  136. nat/data_models/profiler.py +1 -1
  137. nat/data_models/registry_handler.py +1 -1
  138. nat/data_models/retriever.py +1 -1
  139. nat/data_models/retry_mixin.py +1 -1
  140. nat/data_models/runtime_enum.py +1 -1
  141. nat/data_models/span.py +1 -1
  142. nat/data_models/step_adaptor.py +1 -1
  143. nat/data_models/streaming.py +1 -1
  144. nat/data_models/swe_bench_model.py +1 -1
  145. nat/data_models/telemetry_exporter.py +1 -1
  146. nat/data_models/thinking_mixin.py +1 -1
  147. nat/data_models/ttc_strategy.py +1 -1
  148. nat/embedder/azure_openai_embedder.py +1 -1
  149. nat/embedder/nim_embedder.py +1 -1
  150. nat/embedder/openai_embedder.py +1 -1
  151. nat/embedder/register.py +1 -1
  152. nat/eval/__init__.py +1 -1
  153. nat/eval/config.py +8 -1
  154. nat/eval/dataset_handler/dataset_downloader.py +1 -1
  155. nat/eval/dataset_handler/dataset_filter.py +1 -1
  156. nat/eval/dataset_handler/dataset_handler.py +4 -2
  157. nat/eval/evaluate.py +217 -80
  158. nat/eval/evaluator/__init__.py +1 -1
  159. nat/eval/evaluator/base_evaluator.py +2 -2
  160. nat/eval/evaluator/evaluator_model.py +3 -2
  161. nat/eval/intermediate_step_adapter.py +1 -1
  162. nat/eval/llm_validator.py +336 -0
  163. nat/eval/rag_evaluator/evaluate.py +17 -10
  164. nat/eval/rag_evaluator/register.py +1 -1
  165. nat/eval/red_teaming_evaluator/__init__.py +14 -0
  166. nat/eval/red_teaming_evaluator/data_models.py +66 -0
  167. nat/eval/red_teaming_evaluator/evaluate.py +327 -0
  168. nat/eval/red_teaming_evaluator/filter_conditions.py +75 -0
  169. nat/eval/red_teaming_evaluator/register.py +55 -0
  170. nat/eval/register.py +2 -1
  171. nat/eval/remote_workflow.py +1 -1
  172. nat/eval/runners/__init__.py +1 -1
  173. nat/eval/runners/config.py +1 -1
  174. nat/eval/runners/multi_eval_runner.py +1 -1
  175. nat/eval/runners/red_teaming_runner/__init__.py +24 -0
  176. nat/eval/runners/red_teaming_runner/config.py +282 -0
  177. nat/eval/runners/red_teaming_runner/report_utils.py +707 -0
  178. nat/eval/runners/red_teaming_runner/runner.py +867 -0
  179. nat/eval/runtime_evaluator/__init__.py +1 -1
  180. nat/eval/runtime_evaluator/evaluate.py +1 -1
  181. nat/eval/runtime_evaluator/register.py +1 -1
  182. nat/eval/runtime_event_subscriber.py +1 -1
  183. nat/eval/swe_bench_evaluator/evaluate.py +1 -1
  184. nat/eval/swe_bench_evaluator/register.py +1 -1
  185. nat/eval/trajectory_evaluator/evaluate.py +2 -2
  186. nat/eval/trajectory_evaluator/register.py +1 -1
  187. nat/eval/tunable_rag_evaluator/evaluate.py +5 -5
  188. nat/eval/tunable_rag_evaluator/register.py +1 -1
  189. nat/eval/usage_stats.py +1 -1
  190. nat/eval/utils/eval_trace_ctx.py +1 -1
  191. nat/eval/utils/output_uploader.py +1 -1
  192. nat/eval/utils/tqdm_position_registry.py +1 -1
  193. nat/eval/utils/weave_eval.py +1 -1
  194. nat/experimental/decorators/experimental_warning_decorator.py +1 -1
  195. nat/experimental/test_time_compute/editing/iterative_plan_refinement_editor.py +1 -1
  196. nat/experimental/test_time_compute/editing/llm_as_a_judge_editor.py +1 -1
  197. nat/experimental/test_time_compute/editing/motivation_aware_summarization.py +1 -1
  198. nat/experimental/test_time_compute/functions/execute_score_select_function.py +1 -1
  199. nat/experimental/test_time_compute/functions/multi_llm_judge_function.py +88 -0
  200. nat/experimental/test_time_compute/functions/plan_select_execute_function.py +1 -1
  201. nat/experimental/test_time_compute/functions/ttc_tool_orchestration_function.py +1 -1
  202. nat/experimental/test_time_compute/functions/ttc_tool_wrapper_function.py +1 -1
  203. nat/experimental/test_time_compute/models/editor_config.py +1 -1
  204. nat/experimental/test_time_compute/models/scoring_config.py +1 -1
  205. nat/experimental/test_time_compute/models/search_config.py +20 -2
  206. nat/experimental/test_time_compute/models/selection_config.py +33 -2
  207. nat/experimental/test_time_compute/models/stage_enums.py +1 -1
  208. nat/experimental/test_time_compute/models/strategy_base.py +1 -1
  209. nat/experimental/test_time_compute/models/tool_use_config.py +1 -1
  210. nat/experimental/test_time_compute/models/ttc_item.py +1 -1
  211. nat/experimental/test_time_compute/register.py +4 -1
  212. nat/experimental/test_time_compute/scoring/llm_based_agent_scorer.py +1 -1
  213. nat/experimental/test_time_compute/scoring/llm_based_plan_scorer.py +1 -1
  214. nat/experimental/test_time_compute/scoring/motivation_aware_scorer.py +1 -1
  215. nat/experimental/test_time_compute/search/multi_llm_generation.py +115 -0
  216. nat/experimental/test_time_compute/search/multi_llm_planner.py +1 -1
  217. nat/experimental/test_time_compute/search/multi_query_retrieval_search.py +1 -1
  218. nat/experimental/test_time_compute/search/single_shot_multi_plan_planner.py +1 -1
  219. nat/experimental/test_time_compute/selection/best_of_n_selector.py +1 -1
  220. nat/experimental/test_time_compute/selection/llm_based_agent_output_selector.py +1 -1
  221. nat/experimental/test_time_compute/selection/llm_based_output_merging_selector.py +1 -1
  222. nat/experimental/test_time_compute/selection/llm_based_plan_selector.py +1 -1
  223. nat/experimental/test_time_compute/selection/llm_judge_selection.py +127 -0
  224. nat/experimental/test_time_compute/selection/threshold_selector.py +1 -1
  225. nat/finetuning/__init__.py +24 -0
  226. nat/finetuning/finetuning_runtime.py +143 -0
  227. nat/finetuning/interfaces/__init__.py +24 -0
  228. nat/finetuning/interfaces/finetuning_runner.py +261 -0
  229. nat/finetuning/interfaces/trainer_adapter.py +103 -0
  230. nat/finetuning/interfaces/trajectory_builder.py +115 -0
  231. nat/finetuning/utils/__init__.py +15 -0
  232. nat/finetuning/utils/parsers/__init__.py +15 -0
  233. nat/finetuning/utils/parsers/adk_parser.py +141 -0
  234. nat/finetuning/utils/parsers/base_parser.py +238 -0
  235. nat/finetuning/utils/parsers/common.py +91 -0
  236. nat/finetuning/utils/parsers/langchain_parser.py +267 -0
  237. nat/finetuning/utils/parsers/llama_index_parser.py +218 -0
  238. nat/front_ends/__init__.py +1 -1
  239. nat/front_ends/console/__init__.py +1 -1
  240. nat/front_ends/console/authentication_flow_handler.py +1 -1
  241. nat/front_ends/console/console_front_end_config.py +4 -1
  242. nat/front_ends/console/console_front_end_plugin.py +5 -4
  243. nat/front_ends/console/register.py +1 -1
  244. nat/front_ends/cron/__init__.py +1 -1
  245. nat/front_ends/fastapi/__init__.py +1 -1
  246. nat/front_ends/fastapi/async_job.py +128 -0
  247. nat/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +1 -1
  248. nat/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +13 -9
  249. nat/front_ends/fastapi/dask_client_mixin.py +1 -1
  250. nat/front_ends/fastapi/fastapi_front_end_config.py +1 -1
  251. nat/front_ends/fastapi/fastapi_front_end_controller.py +1 -1
  252. nat/front_ends/fastapi/fastapi_front_end_plugin.py +25 -30
  253. nat/front_ends/fastapi/fastapi_front_end_plugin_worker.py +195 -60
  254. nat/front_ends/fastapi/html_snippets/__init__.py +1 -1
  255. nat/front_ends/fastapi/html_snippets/auth_code_grant_success.py +1 -1
  256. nat/front_ends/fastapi/intermediate_steps_subscriber.py +12 -1
  257. nat/front_ends/fastapi/job_store.py +23 -11
  258. nat/front_ends/fastapi/main.py +1 -1
  259. nat/front_ends/fastapi/message_handler.py +27 -4
  260. nat/front_ends/fastapi/message_validator.py +54 -2
  261. nat/front_ends/fastapi/register.py +1 -1
  262. nat/front_ends/fastapi/response_helpers.py +16 -15
  263. nat/front_ends/fastapi/step_adaptor.py +1 -1
  264. nat/front_ends/fastapi/utils.py +1 -1
  265. nat/front_ends/register.py +1 -2
  266. nat/front_ends/simple_base/__init__.py +1 -1
  267. nat/front_ends/simple_base/simple_front_end_plugin_base.py +6 -4
  268. nat/llm/aws_bedrock_llm.py +1 -1
  269. nat/llm/azure_openai_llm.py +10 -1
  270. nat/llm/dynamo_llm.py +363 -0
  271. nat/llm/huggingface_llm.py +177 -0
  272. nat/llm/litellm_llm.py +1 -1
  273. nat/llm/nim_llm.py +1 -1
  274. nat/llm/openai_llm.py +1 -1
  275. nat/llm/register.py +3 -1
  276. nat/llm/utils/__init__.py +1 -1
  277. nat/llm/utils/env_config_value.py +1 -1
  278. nat/llm/utils/error.py +1 -1
  279. nat/llm/utils/thinking.py +1 -1
  280. nat/memory/__init__.py +1 -1
  281. nat/memory/interfaces.py +1 -1
  282. nat/memory/models.py +1 -1
  283. nat/meta/pypi.md +1 -1
  284. nat/middleware/__init__.py +5 -5
  285. nat/middleware/cache/__init__.py +14 -0
  286. nat/middleware/{cache_middleware.py → cache/cache_middleware.py} +39 -42
  287. nat/middleware/cache/cache_middleware_config.py +44 -0
  288. nat/middleware/cache/register.py +33 -0
  289. nat/middleware/defense/__init__.py +14 -0
  290. nat/middleware/defense/defense_middleware.py +362 -0
  291. nat/middleware/defense/defense_middleware_content_guard.py +455 -0
  292. nat/middleware/defense/defense_middleware_data_models.py +91 -0
  293. nat/middleware/defense/defense_middleware_output_verifier.py +440 -0
  294. nat/middleware/defense/defense_middleware_pii.py +356 -0
  295. nat/middleware/defense/register.py +82 -0
  296. nat/middleware/dynamic/__init__.py +14 -0
  297. nat/middleware/dynamic/dynamic_function_middleware.py +962 -0
  298. nat/middleware/dynamic/dynamic_middleware_config.py +132 -0
  299. nat/middleware/dynamic/register.py +34 -0
  300. nat/middleware/function_middleware.py +236 -52
  301. nat/middleware/logging/__init__.py +14 -0
  302. nat/middleware/logging/logging_middleware.py +67 -0
  303. nat/middleware/logging/logging_middleware_config.py +28 -0
  304. nat/middleware/logging/register.py +33 -0
  305. nat/middleware/middleware.py +142 -28
  306. nat/middleware/red_teaming/__init__.py +14 -0
  307. nat/middleware/red_teaming/red_teaming_middleware.py +344 -0
  308. nat/middleware/red_teaming/red_teaming_middleware_config.py +112 -0
  309. nat/middleware/red_teaming/register.py +47 -0
  310. nat/middleware/register.py +7 -20
  311. nat/middleware/utils/__init__.py +14 -0
  312. nat/middleware/utils/workflow_inventory.py +155 -0
  313. nat/object_store/__init__.py +1 -1
  314. nat/object_store/in_memory_object_store.py +1 -1
  315. nat/object_store/interfaces.py +1 -1
  316. nat/object_store/models.py +1 -1
  317. nat/object_store/register.py +1 -1
  318. nat/observability/__init__.py +1 -1
  319. nat/observability/exporter/__init__.py +1 -1
  320. nat/observability/exporter/base_exporter.py +1 -1
  321. nat/observability/exporter/exporter.py +1 -1
  322. nat/observability/exporter/file_exporter.py +1 -1
  323. nat/observability/exporter/processing_exporter.py +1 -1
  324. nat/observability/exporter/raw_exporter.py +1 -1
  325. nat/observability/exporter/span_exporter.py +7 -1
  326. nat/observability/exporter_manager.py +1 -1
  327. nat/observability/mixin/__init__.py +1 -1
  328. nat/observability/mixin/batch_config_mixin.py +1 -1
  329. nat/observability/mixin/collector_config_mixin.py +1 -1
  330. nat/observability/mixin/file_mixin.py +1 -1
  331. nat/observability/mixin/file_mode.py +1 -1
  332. nat/observability/mixin/redaction_config_mixin.py +1 -1
  333. nat/observability/mixin/resource_conflict_mixin.py +1 -1
  334. nat/observability/mixin/serialize_mixin.py +1 -1
  335. nat/observability/mixin/tagging_config_mixin.py +1 -1
  336. nat/observability/mixin/type_introspection_mixin.py +1 -1
  337. nat/observability/processor/__init__.py +1 -1
  338. nat/observability/processor/batching_processor.py +1 -1
  339. nat/observability/processor/callback_processor.py +1 -1
  340. nat/observability/processor/falsy_batch_filter_processor.py +1 -1
  341. nat/observability/processor/intermediate_step_serializer.py +1 -1
  342. nat/observability/processor/processor.py +1 -1
  343. nat/observability/processor/processor_factory.py +1 -1
  344. nat/observability/processor/redaction/__init__.py +1 -1
  345. nat/observability/processor/redaction/contextual_redaction_processor.py +1 -1
  346. nat/observability/processor/redaction/contextual_span_redaction_processor.py +1 -1
  347. nat/observability/processor/redaction/redaction_processor.py +1 -1
  348. nat/observability/processor/redaction/span_header_redaction_processor.py +1 -1
  349. nat/observability/processor/span_tagging_processor.py +1 -1
  350. nat/observability/register.py +1 -1
  351. nat/observability/utils/__init__.py +1 -1
  352. nat/observability/utils/dict_utils.py +1 -1
  353. nat/observability/utils/time_utils.py +1 -1
  354. nat/profiler/calc/__init__.py +1 -1
  355. nat/profiler/calc/calc_runner.py +3 -3
  356. nat/profiler/calc/calculations.py +1 -1
  357. nat/profiler/calc/data_models.py +1 -1
  358. nat/profiler/calc/plot.py +30 -3
  359. nat/profiler/callbacks/agno_callback_handler.py +1 -1
  360. nat/profiler/callbacks/base_callback_class.py +1 -1
  361. nat/profiler/callbacks/langchain_callback_handler.py +33 -3
  362. nat/profiler/callbacks/llama_index_callback_handler.py +13 -10
  363. nat/profiler/callbacks/semantic_kernel_callback_handler.py +1 -1
  364. nat/profiler/callbacks/token_usage_base_model.py +1 -1
  365. nat/profiler/data_frame_row.py +1 -1
  366. nat/profiler/data_models.py +1 -1
  367. nat/profiler/decorators/framework_wrapper.py +16 -1
  368. nat/profiler/decorators/function_tracking.py +1 -1
  369. nat/profiler/forecasting/config.py +1 -1
  370. nat/profiler/forecasting/model_trainer.py +1 -1
  371. nat/profiler/forecasting/models/__init__.py +1 -1
  372. nat/profiler/forecasting/models/forecasting_base_model.py +1 -1
  373. nat/profiler/forecasting/models/linear_model.py +1 -1
  374. nat/profiler/forecasting/models/random_forest_regressor.py +1 -1
  375. nat/profiler/inference_metrics_model.py +1 -1
  376. nat/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +1 -1
  377. nat/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py +1 -1
  378. nat/profiler/inference_optimization/data_models.py +1 -1
  379. nat/profiler/inference_optimization/experimental/concurrency_spike_analysis.py +1 -1
  380. nat/profiler/inference_optimization/experimental/prefix_span_analysis.py +1 -1
  381. nat/profiler/inference_optimization/llm_metrics.py +1 -1
  382. nat/profiler/inference_optimization/prompt_caching.py +1 -1
  383. nat/profiler/inference_optimization/token_uniqueness.py +1 -1
  384. nat/profiler/inference_optimization/workflow_runtimes.py +1 -1
  385. nat/profiler/intermediate_property_adapter.py +1 -1
  386. nat/profiler/parameter_optimization/optimizable_utils.py +1 -1
  387. nat/profiler/parameter_optimization/optimizer_runtime.py +1 -1
  388. nat/profiler/parameter_optimization/parameter_optimizer.py +1 -1
  389. nat/profiler/parameter_optimization/parameter_selection.py +1 -1
  390. nat/profiler/parameter_optimization/pareto_visualizer.py +1 -1
  391. nat/profiler/parameter_optimization/prompt_optimizer.py +1 -1
  392. nat/profiler/parameter_optimization/update_helpers.py +1 -1
  393. nat/profiler/profile_runner.py +1 -1
  394. nat/profiler/utils.py +1 -1
  395. nat/registry_handlers/local/local_handler.py +1 -1
  396. nat/registry_handlers/local/register_local.py +1 -1
  397. nat/registry_handlers/metadata_factory.py +1 -1
  398. nat/registry_handlers/package_utils.py +1 -1
  399. nat/registry_handlers/pypi/pypi_handler.py +1 -1
  400. nat/registry_handlers/pypi/register_pypi.py +1 -1
  401. nat/registry_handlers/register.py +1 -1
  402. nat/registry_handlers/registry_handler_base.py +1 -1
  403. nat/registry_handlers/rest/register_rest.py +1 -1
  404. nat/registry_handlers/rest/rest_handler.py +1 -1
  405. nat/registry_handlers/schemas/headers.py +1 -1
  406. nat/registry_handlers/schemas/package.py +1 -1
  407. nat/registry_handlers/schemas/publish.py +1 -1
  408. nat/registry_handlers/schemas/pull.py +1 -1
  409. nat/registry_handlers/schemas/remove.py +1 -1
  410. nat/registry_handlers/schemas/search.py +1 -1
  411. nat/registry_handlers/schemas/status.py +1 -1
  412. nat/retriever/interface.py +1 -1
  413. nat/retriever/milvus/__init__.py +1 -1
  414. nat/retriever/milvus/register.py +1 -1
  415. nat/retriever/milvus/retriever.py +1 -1
  416. nat/retriever/models.py +1 -1
  417. nat/retriever/nemo_retriever/__init__.py +1 -1
  418. nat/retriever/nemo_retriever/register.py +1 -1
  419. nat/retriever/nemo_retriever/retriever.py +5 -5
  420. nat/retriever/register.py +1 -1
  421. nat/runtime/__init__.py +1 -1
  422. nat/runtime/loader.py +10 -3
  423. nat/runtime/metrics.py +180 -0
  424. nat/runtime/runner.py +1 -5
  425. nat/runtime/session.py +451 -32
  426. nat/runtime/user_metadata.py +1 -1
  427. nat/settings/global_settings.py +1 -1
  428. nat/tool/chat_completion.py +1 -1
  429. nat/tool/code_execution/README.md +1 -1
  430. nat/tool/code_execution/code_sandbox.py +1 -1
  431. nat/tool/code_execution/local_sandbox/Dockerfile.sandbox +1 -1
  432. nat/tool/code_execution/local_sandbox/__init__.py +1 -1
  433. nat/tool/code_execution/local_sandbox/local_sandbox_server.py +1 -1
  434. nat/tool/code_execution/local_sandbox/start_local_sandbox.sh +1 -1
  435. nat/tool/code_execution/register.py +1 -1
  436. nat/tool/code_execution/utils.py +1 -1
  437. nat/tool/datetime_tools.py +1 -1
  438. nat/tool/document_search.py +1 -1
  439. nat/tool/github_tools.py +1 -1
  440. nat/tool/memory_tools/add_memory_tool.py +1 -1
  441. nat/tool/memory_tools/delete_memory_tool.py +1 -1
  442. nat/tool/memory_tools/get_memory_tool.py +1 -1
  443. nat/tool/nvidia_rag.py +2 -2
  444. nat/tool/register.py +1 -1
  445. nat/tool/retriever.py +1 -1
  446. nat/tool/server_tools.py +1 -1
  447. nat/utils/__init__.py +8 -5
  448. nat/utils/callable_utils.py +1 -1
  449. nat/utils/data_models/schema_validator.py +1 -1
  450. nat/utils/debugging_utils.py +1 -1
  451. nat/utils/decorators.py +1 -1
  452. nat/utils/dump_distro_mapping.py +1 -1
  453. nat/utils/exception_handlers/automatic_retries.py +3 -3
  454. nat/utils/exception_handlers/schemas.py +1 -1
  455. nat/utils/io/model_processing.py +1 -1
  456. nat/utils/io/supress_logs.py +33 -0
  457. nat/utils/io/yaml_tools.py +1 -1
  458. nat/utils/log_levels.py +1 -1
  459. nat/utils/log_utils.py +13 -1
  460. nat/utils/metadata_utils.py +1 -1
  461. nat/utils/optional_imports.py +1 -1
  462. nat/utils/producer_consumer_queue.py +1 -1
  463. nat/utils/reactive/base/observable_base.py +1 -1
  464. nat/utils/reactive/base/observer_base.py +1 -1
  465. nat/utils/reactive/base/subject_base.py +1 -1
  466. nat/utils/reactive/observable.py +1 -1
  467. nat/utils/reactive/observer.py +1 -1
  468. nat/utils/reactive/subject.py +1 -1
  469. nat/utils/reactive/subscription.py +1 -1
  470. nat/utils/responses_api.py +1 -1
  471. nat/utils/settings/global_settings.py +1 -1
  472. nat/utils/string_utils.py +1 -1
  473. nat/utils/type_converter.py +18 -5
  474. nat/utils/type_utils.py +1 -1
  475. nat/utils/url_utils.py +1 -1
  476. {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/METADATA +39 -14
  477. nvidia_nat-1.4.0a20260113.dist-info/RECORD +547 -0
  478. nvidia_nat-1.4.0a20260113.dist-info/entry_points.txt +38 -0
  479. nat/cli/commands/mcp/mcp.py +0 -986
  480. nat/front_ends/mcp/introspection_token_verifier.py +0 -73
  481. nat/front_ends/mcp/mcp_front_end_config.py +0 -109
  482. nat/front_ends/mcp/mcp_front_end_plugin.py +0 -155
  483. nat/front_ends/mcp/mcp_front_end_plugin_worker.py +0 -388
  484. nat/front_ends/mcp/memory_profiler.py +0 -320
  485. nat/front_ends/mcp/register.py +0 -27
  486. nat/front_ends/mcp/tool_converter.py +0 -321
  487. nvidia_nat-1.4.0a20251120.dist-info/RECORD +0 -488
  488. nvidia_nat-1.4.0a20251120.dist-info/entry_points.txt +0 -23
  489. {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/WHEEL +0 -0
  490. {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
  491. {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/licenses/LICENSE.md +0 -0
  492. {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,707 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Report utilities for red teaming evaluation results."""
16
+
17
+ from __future__ import annotations
18
+
19
+ import logging
20
+ from pathlib import Path
21
+ from typing import Any
22
+
23
+ import pandas as pd
24
+ import plotly.graph_objects as go
25
+ import plotly.io as pio
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ def _validate_columns(df: pd.DataFrame, required_columns: list[str], context: str = "") -> None:
31
+ """Validate that required columns exist in the DataFrame.
32
+
33
+ Args:
34
+ df: DataFrame to validate.
35
+ required_columns: List of column names that must exist.
36
+ context: Optional context string for error message (e.g., function name).
37
+
38
+ Raises:
39
+ ValueError: If any required column is missing.
40
+ """
41
+ missing = [col for col in required_columns if col not in df.columns]
42
+ if missing:
43
+ available = list(df.columns)
44
+ ctx = f" in {context}" if context else ""
45
+ raise ValueError(f"Missing required column(s){ctx}: {missing}. Available columns: {available}")
46
+
47
+
48
+ def plot_score_boxplot(
49
+ df: pd.DataFrame,
50
+ x: str,
51
+ y: str = "score",
52
+ title: str | None = None,
53
+ x_label: str | None = None,
54
+ y_label: str = "Risk Score",
55
+ y_range: tuple[float, float] = (-0.05, 1.05), # Start below 0 to show full box when min=0
56
+ box_color: str = "rgb(55, 126, 184)",
57
+ box_fill_opacity: float = 0.3,
58
+ point_color: str = "rgba(255, 50, 0, 0.5)",
59
+ point_size: int = 6,
60
+ jitter: float = 0.3,
61
+ ) -> go.Figure:
62
+ """Create a box plot with data points overlaid inside the boxes.
63
+
64
+ Args:
65
+ df: DataFrame containing the data.
66
+ x: Column name for x-axis (grouping variable).
67
+ y: Column name for y-axis (score values).
68
+ title: Plot title. Defaults to "Score Distribution by {x}".
69
+ x_label: X-axis label. Defaults to the column name.
70
+ y_label: Y-axis label.
71
+ y_range: Tuple of (min, max) for y-axis range.
72
+ box_color: RGB color for box outline.
73
+ box_fill_opacity: Opacity for box fill (0-1).
74
+ point_color: RGBA color for data points.
75
+ point_size: Size of data points.
76
+ jitter: Horizontal jitter for points (0-1).
77
+
78
+ Returns:
79
+ The Plotly Figure object.
80
+
81
+ Raises:
82
+ ValueError: If required columns are missing from the DataFrame.
83
+ """
84
+ _validate_columns(df, [x, y], "plot_score_boxplot")
85
+
86
+ if title is None:
87
+ title = f"Score Distribution by {x}"
88
+ if x_label is None:
89
+ x_label = x
90
+
91
+ # Parse box_color to create fill color with opacity
92
+ # Parse box_color to create fill color with opacity
93
+ if box_color.startswith("rgb(") and not box_color.startswith("rgba("):
94
+ box_fill_color = box_color.replace("rgb(", "rgba(").replace(")", f", {box_fill_opacity})")
95
+ else:
96
+ box_fill_color = box_color # Use as-is if already rgba or different format
97
+
98
+ # Use go.Box directly for explicit control over data
99
+ fig = go.Figure()
100
+
101
+ # Get unique x values
102
+ unique_x_values = df[x].unique()
103
+ n_categories = len(unique_x_values)
104
+
105
+ # Calculate box width dynamically based on number of categories
106
+ # Wider boxes for fewer categories, narrower for more
107
+ box_width = max(0.2, min(0.67, 1.5 / n_categories))
108
+ half_width = box_width / 2
109
+ means: list[tuple[int, float, str]] = [] # (x_position, mean_value, label)
110
+
111
+ for i, x_val in enumerate(unique_x_values):
112
+ mask = df[x] == x_val
113
+ subset = df.loc[mask]
114
+ y_values = subset[y].tolist() # Explicitly convert to list
115
+ # Use index (uid) for hover text
116
+ hover_text = subset.index.tolist()
117
+
118
+ # Calculate mean for this group
119
+ mean_val = sum(y_values) / len(y_values) if y_values else 0
120
+ means.append((i, mean_val, str(x_val)))
121
+
122
+ fig.add_trace(
123
+ go.Box(
124
+ y=y_values,
125
+ name=str(x_val),
126
+ boxpoints="all",
127
+ pointpos=0,
128
+ jitter=jitter,
129
+ width=box_width,
130
+ marker=dict(color=point_color, size=point_size),
131
+ line=dict(color=box_color),
132
+ fillcolor=box_fill_color,
133
+ text=hover_text,
134
+ hovertemplate="uid: %{text}<br>score: %{y}<extra></extra>",
135
+ ))
136
+
137
+ # Add mean lines and annotations for each box
138
+ for x_pos, mean_val, _label in means:
139
+ # Add dotted horizontal line at the mean (spanning exact box width)
140
+ fig.add_shape(
141
+ type="line",
142
+ x0=x_pos - half_width,
143
+ x1=x_pos + half_width,
144
+ y0=mean_val,
145
+ y1=mean_val,
146
+ line=dict(color="red", width=2, dash="dot"),
147
+ )
148
+ # Add mean label annotation right next to the box (at left edge)
149
+ fig.add_annotation(
150
+ x=x_pos - half_width,
151
+ y=mean_val,
152
+ text=f"mean: {mean_val:.2f}",
153
+ showarrow=False,
154
+ font=dict(size=12, color="red", weight="bold"),
155
+ xanchor="right",
156
+ yanchor="middle",
157
+ xshift=-5, # Small gap from the box edge
158
+ )
159
+
160
+ fig.update_layout(
161
+ title=title,
162
+ xaxis_title=x_label,
163
+ yaxis_title=y_label,
164
+ yaxis_range=list(y_range),
165
+ showlegend=False,
166
+ )
167
+
168
+ return fig
169
+
170
+
171
+ def plot_success_rate_bar(
172
+ df: pd.DataFrame,
173
+ x: str,
174
+ y: str = "score",
175
+ threshold: float = 0.5,
176
+ title: str | None = None,
177
+ x_label: str | None = None,
178
+ y_label: str = "Attack Success Rate (%)",
179
+ bar_color: str = "rgb(55, 126, 184)",
180
+ ) -> go.Figure:
181
+ """Create a bar chart showing attack success rate (percentage of scores above threshold).
182
+
183
+ Args:
184
+ df: DataFrame containing the data.
185
+ x: Column name for x-axis (grouping variable).
186
+ y: Column name for the score values.
187
+ threshold: Score threshold to consider as "success" (default 0.5).
188
+ title: Plot title. Defaults to "Attack Success Rate by {x}".
189
+ x_label: X-axis label. Defaults to the column name.
190
+ y_label: Y-axis label.
191
+ bar_color: Color for the bars.
192
+
193
+ Returns:
194
+ The Plotly Figure object.
195
+
196
+ Raises:
197
+ ValueError: If required columns are missing from the DataFrame.
198
+ """
199
+ _validate_columns(df, [x, y], "plot_success_rate_bar")
200
+
201
+ if title is None:
202
+ title = f"Attack Success Rate by {x} (threshold={threshold})"
203
+ if x_label is None:
204
+ x_label = x
205
+
206
+ # Calculate success rate for each group
207
+ unique_x_values = df[x].unique()
208
+ x_labels = []
209
+ success_rates = []
210
+ counts = []
211
+
212
+ for x_val in unique_x_values:
213
+ mask = df[x] == x_val
214
+ subset = df.loc[mask]
215
+ total = len(subset)
216
+ successes = (subset[y] > threshold).sum()
217
+ rate = (successes / total * 100) if total > 0 else 0
218
+
219
+ x_labels.append(str(x_val))
220
+ success_rates.append(rate)
221
+ counts.append(f"{successes}/{total}")
222
+
223
+ fig = go.Figure()
224
+
225
+ fig.add_trace(
226
+ go.Bar(
227
+ x=x_labels,
228
+ y=success_rates,
229
+ text=[f"{rate:.1f}%<br>({count})" for rate, count in zip(success_rates, counts, strict=True)],
230
+ textposition="outside",
231
+ marker_color=bar_color,
232
+ hovertemplate="%{x}<br>Attack Success Rate: %{y:.1f}%<br>Count: %{text}<extra></extra>",
233
+ ))
234
+
235
+ fig.update_layout(
236
+ title=title,
237
+ xaxis_title=x_label,
238
+ yaxis_title=y_label,
239
+ yaxis_range=[-10, 125], # Leave room for labels above bars
240
+ showlegend=False,
241
+ )
242
+
243
+ return fig
244
+
245
+
246
+ def generate_standard_plots(df: pd.DataFrame) -> list[tuple[str, str, go.Figure | None]]:
247
+ """Generate standard plots for red teaming results, grouped by category.
248
+
249
+ Args:
250
+ df: DataFrame with columns: scenario_id, condition_name, tags, scenario_group, score.
251
+
252
+ Returns:
253
+ List of tuples (filename, title, figure) for each plot.
254
+ Section headers have figure=None and are rendered as section titles.
255
+
256
+ Raises:
257
+ ValueError: If required columns are missing from the DataFrame.
258
+ """
259
+ # Validate required columns upfront
260
+ _validate_columns(df, ["scenario_id", "score", "condition_name"], "generate_standard_plots")
261
+
262
+ plots: list[tuple[str, str, go.Figure | None]] = []
263
+
264
+ # ==================== RESULTS BY SCENARIO ID ====================
265
+ plots.append(("_section", "Results by group: Scenario ID", None))
266
+
267
+ fig_scenario = plot_score_boxplot(
268
+ df,
269
+ x="scenario_id",
270
+ y="score",
271
+ title="Score Distribution by Scenario",
272
+ x_label="Scenario",
273
+ )
274
+ plots.append(("scenario_id_boxplot", "Score Distribution", fig_scenario))
275
+
276
+ fig_scenario_bar = plot_success_rate_bar(
277
+ df,
278
+ x="scenario_id",
279
+ y="score",
280
+ title="Attack Success Rate by Scenario",
281
+ x_label="Scenario",
282
+ )
283
+ plots.append(("scenario_id_success_rate", "Attack Success Rate", fig_scenario_bar))
284
+
285
+ # ==================== RESULTS BY SCENARIO GROUP ====================
286
+ if "scenario_group" in df.columns:
287
+ plots.append(("_section", "Results by group: Scenario Group", None))
288
+
289
+ fig_group = plot_score_boxplot(
290
+ df,
291
+ x="scenario_group",
292
+ y="score",
293
+ title="Risk Score Distribution by Scenario Group",
294
+ x_label="Scenario Group",
295
+ )
296
+ plots.append(("scenario_group_boxplot", "Score Distribution", fig_group))
297
+
298
+ fig_group_bar = plot_success_rate_bar(
299
+ df,
300
+ x="scenario_group",
301
+ y="score",
302
+ title="Attack Success Rate by Scenario Group",
303
+ x_label="Scenario Group",
304
+ )
305
+ plots.append(("scenario_group_success_rate", "Attack Success Rate", fig_group_bar))
306
+
307
+ # ==================== RESULTS BY CONDITION ====================
308
+ plots.append(("_section", "Results by group: Output Filtering Condition", None))
309
+
310
+ fig_condition = plot_score_boxplot(
311
+ df,
312
+ x="condition_name",
313
+ y="score",
314
+ title="Score Distribution by Output Filtering Condition",
315
+ x_label="Condition",
316
+ )
317
+ plots.append(("condition_name_boxplot", "Score Distribution", fig_condition))
318
+
319
+ fig_condition_bar = plot_success_rate_bar(
320
+ df,
321
+ x="condition_name",
322
+ y="score",
323
+ title="Attack Success Rate by Output Filtering Condition",
324
+ x_label="Condition",
325
+ )
326
+ plots.append(("condition_name_success_rate", "Attack Success Rate", fig_condition_bar))
327
+
328
+ # ==================== RESULTS BY TAGS ====================
329
+ if "tags" in df.columns:
330
+ df_tags = df.explode("tags")
331
+ df_tags = df_tags.dropna(subset=["tags"])
332
+ if not df_tags.empty:
333
+ plots.append(("_section", "Results by group: Tags", None))
334
+
335
+ fig_tags = plot_score_boxplot(
336
+ df_tags,
337
+ x="tags",
338
+ y="score",
339
+ title="Risk Score Distribution by Tag",
340
+ x_label="Tag",
341
+ )
342
+ plots.append(("tags_boxplot", "Score Distribution", fig_tags))
343
+
344
+ fig_tags_bar = plot_success_rate_bar(
345
+ df_tags,
346
+ x="tags",
347
+ y="score",
348
+ title="Attack Success Rate by Tag",
349
+ x_label="Tag",
350
+ )
351
+ plots.append(("tags_success_rate", "Attack Success Rate", fig_tags_bar))
352
+
353
+ return plots
354
+
355
+
356
+ def _get_risk_color(value: float, max_value: float = 1.0) -> str:
357
+ """Get a color that transitions from low to high risk based on value.
358
+
359
+ The color transitions and opacity increases as risk increases:
360
+ - Opacity: 0.3 (at 0) -> 1.0 (at max)
361
+ - Color: muted -> intense red
362
+
363
+ Args:
364
+ value: The risk value (0 to max_value).
365
+ max_value: The maximum value (1.0 for scores, 100.0 for percentages).
366
+
367
+ Returns:
368
+ RGBA color string.
369
+ """
370
+ # Normalize to 0-1 range
371
+ normalized = min(max(value / max_value, 0.0), 1.0)
372
+
373
+ # Interpolate color
374
+ normalized = normalized if normalized >= 0.5 else normalized**2
375
+ r = int(30 + (255 - 30) * normalized)
376
+ g = int(10 + (0 - 10) * normalized)
377
+ b = int(10 + (0 - 10) * normalized)
378
+
379
+ # Interpolate opacity from 0.5 to 1.0
380
+ opacity = 0.3 + 0.7 * normalized
381
+
382
+ return f"rgba({r}, {g}, {b}, {opacity})"
383
+
384
+
385
+ def _render_summary_html(summary: dict[str, Any] | None) -> str:
386
+ """Render the summary section as HTML.
387
+
388
+ Args:
389
+ summary: The summary dictionary from _compute_result_summary.
390
+
391
+ Returns:
392
+ HTML string for the summary section.
393
+ """
394
+ if not summary:
395
+ return ""
396
+
397
+ overall_score = summary.get("overall_score", 0.0)
398
+ attack_success_rate = summary.get("attack_success_rate", 0.0)
399
+ num_scenarios = summary.get("num_scenarios", 0)
400
+ total_workflow_runs = summary.get("total_workflow_runs", 0)
401
+ total_evaluations = summary.get("total_evaluations", 0)
402
+ evaluation_successes = summary.get("evaluation_successes", 0)
403
+ evaluation_failures = summary.get("evaluation_failures", 0)
404
+ per_scenario = summary.get("per_scenario_summary", {})
405
+
406
+ # Get dynamic colors based on risk values
407
+ score_color = _get_risk_color(overall_score, 1.0)
408
+ asr_color = _get_risk_color(attack_success_rate, 1.0)
409
+
410
+ # Build per-scenario rows with ASR as first data column
411
+ scenario_rows = ""
412
+ for scenario_id, stats in per_scenario.items():
413
+ scenario_asr = stats.get("attack_success_rate", 0.0)
414
+ mean_score = stats.get("mean_score", 0.0)
415
+ min_score = stats.get("min_score", 0.0)
416
+ max_score = stats.get("max_score", 0.0)
417
+ row_asr_color = _get_risk_color(scenario_asr, 1.0)
418
+ scenario_rows += f"""
419
+ <tr>
420
+ <td>{scenario_id}</td>
421
+ <td style="background-color: {row_asr_color}; color: white; font-weight: bold;">{scenario_asr:.1%}</td>
422
+ <td>{mean_score:.3f}</td>
423
+ <td>{min_score:.3f}</td>
424
+ <td>{max_score:.3f}</td>
425
+ </tr>"""
426
+
427
+ return f"""
428
+ <div class="summary-section">
429
+ <h2 class="section-header">Summary</h2>
430
+ <div class="summary-container">
431
+ <div class="summary-stats">
432
+ <div class="stat-card risk-score" style="background-color: {score_color}; border: none;">
433
+ <div class="stat-label" style="color: rgba(255,255,255,0.9);">Overall Risk Score ↓</div>
434
+ <div class="stat-value" style="color: white;">{overall_score:.3f}</div>
435
+ </div>
436
+ <div class="stat-card risk-score" style="background-color: {asr_color}; border: none;">
437
+ <div class="stat-label" style="color: rgba(255,255,255,0.9);">Attack Success Rate ↓</div>
438
+ <div class="stat-value" style="color: white;">{attack_success_rate:.1%}</div>
439
+ </div>
440
+ <div class="stat-card">
441
+ <div class="stat-label">Scenarios</div>
442
+ <div class="stat-value">{num_scenarios}</div>
443
+ </div>
444
+ <div class="stat-card">
445
+ <div class="stat-label">Workflow Runs</div>
446
+ <div class="stat-value">{total_workflow_runs}</div>
447
+ </div>
448
+ <div class="stat-card">
449
+ <div class="stat-label">Total Evaluations</div>
450
+ <div class="stat-value">{total_evaluations}</div>
451
+ </div>
452
+ <div class="stat-card success">
453
+ <div class="stat-label">Successes</div>
454
+ <div class="stat-value">{evaluation_successes}</div>
455
+ </div>
456
+ <div class="stat-card failure">
457
+ <div class="stat-label">Failures</div>
458
+ <div class="stat-value">{evaluation_failures}</div>
459
+ </div>
460
+ </div>
461
+ <h3 class="plot-title">Per-Scenario Results</h3>
462
+ <table class="scenario-table">
463
+ <thead>
464
+ <tr>
465
+ <th>Scenario</th>
466
+ <th>ASR</th>
467
+ <th>Mean Score</th>
468
+ <th>Min Score</th>
469
+ <th>Max Score</th>
470
+ </tr>
471
+ </thead>
472
+ <tbody>
473
+ {scenario_rows}
474
+ </tbody>
475
+ </table>
476
+ </div>
477
+ </div>
478
+ """
479
+
480
+
481
+ def save_combined_html(
482
+ plots: list[tuple[str, str, go.Figure | None]],
483
+ output_path: Path,
484
+ page_title: str = "Red Teaming Evaluation Results",
485
+ summary: dict[str, Any] | None = None,
486
+ ) -> Path:
487
+ """Save all plots in a single interactive HTML document.
488
+
489
+ Args:
490
+ plots: List of (filename, title, figure) tuples.
491
+ output_path: Path for the combined HTML file.
492
+ page_title: Title for the HTML page.
493
+ summary: Optional summary dictionary to display at the top of the report.
494
+
495
+ Returns:
496
+ Path to the saved HTML file.
497
+ """
498
+ html_parts: list[str] = []
499
+
500
+ # HTML header with styling
501
+ html_parts.append(f"""<!DOCTYPE html>
502
+ <html>
503
+ <head>
504
+ <meta charset="utf-8">
505
+ <title>{page_title}</title>
506
+ <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
507
+ <style>
508
+ body {{
509
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, sans-serif;
510
+ max-width: 1200px;
511
+ margin: 0 auto;
512
+ padding: 20px;
513
+ background-color: #f5f5f5;
514
+ }}
515
+ h1 {{
516
+ color: #333;
517
+ border-bottom: 2px solid #76b900;
518
+ padding-bottom: 10px;
519
+ }}
520
+ h2.section-header {{
521
+ color: #333;
522
+ margin-top: 50px;
523
+ padding: 15px 20px;
524
+ background-color: #76b900;
525
+ color: white;
526
+ border-radius: 8px;
527
+ font-size: 1.4em;
528
+ }}
529
+ h3.plot-title {{
530
+ color: #555;
531
+ margin-top: 20px;
532
+ margin-bottom: 10px;
533
+ font-size: 1.1em;
534
+ }}
535
+ .plot-container {{
536
+ background-color: white;
537
+ border-radius: 8px;
538
+ padding: 20px;
539
+ margin-bottom: 30px;
540
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
541
+ }}
542
+ .summary-container {{
543
+ background-color: white;
544
+ border-radius: 8px;
545
+ padding: 20px;
546
+ margin-bottom: 30px;
547
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
548
+ }}
549
+ .summary-stats {{
550
+ display: flex;
551
+ flex-wrap: nowrap;
552
+ gap: 10px;
553
+ margin-bottom: 20px;
554
+ }}
555
+ .stat-card {{
556
+ background-color: #f8f9fa;
557
+ border-radius: 6px;
558
+ padding: 10px 12px;
559
+ min-width: 90px;
560
+ flex: 1;
561
+ text-align: center;
562
+ border: 1px solid #e9ecef;
563
+ }}
564
+ .stat-card.risk-score {{
565
+ color: white;
566
+ border: none;
567
+ }}
568
+ .stat-card.risk-score .stat-label {{
569
+ color: rgba(255,255,255,0.9);
570
+ }}
571
+ .stat-card.success {{
572
+ border-left: 4px solid #28a745;
573
+ }}
574
+ .stat-card.failure {{
575
+ border-left: 4px solid #dc3545;
576
+ }}
577
+ .stat-label {{
578
+ font-size: 0.75em;
579
+ color: #666;
580
+ margin-bottom: 3px;
581
+ }}
582
+ .stat-value {{
583
+ font-size: 1.2em;
584
+ font-weight: bold;
585
+ }}
586
+ .scenario-table {{
587
+ width: 100%;
588
+ border-collapse: collapse;
589
+ margin-top: 10px;
590
+ }}
591
+ .scenario-table th,
592
+ .scenario-table td {{
593
+ padding: 10px 15px;
594
+ text-align: left;
595
+ border-bottom: 1px solid #e9ecef;
596
+ }}
597
+ .scenario-table th {{
598
+ background-color: #f8f9fa;
599
+ font-weight: 600;
600
+ color: #333;
601
+ }}
602
+ .scenario-table tr:hover {{
603
+ background-color: #f8f9fa;
604
+ }}
605
+ </style>
606
+ </head>
607
+ <body>
608
+ <h1>{page_title}</h1>
609
+ """)
610
+
611
+ # Add summary section at the top
612
+ html_parts.append(_render_summary_html(summary))
613
+
614
+ # Add each plot with its title (or section header)
615
+ for _filename, title, fig in plots:
616
+ if fig is None:
617
+ # This is a section header
618
+ html_parts.append(f"""
619
+ <h2 class="section-header">{title}</h2>
620
+ """)
621
+ else:
622
+ # This is a regular plot
623
+ plot_html = pio.to_html(fig, full_html=False, include_plotlyjs=False)
624
+ html_parts.append(f"""
625
+ <h3 class="plot-title">{title}</h3>
626
+ <div class="plot-container">
627
+ {plot_html}
628
+ </div>
629
+ """)
630
+
631
+ # HTML footer
632
+ html_parts.append("""
633
+ </body>
634
+ </html>
635
+ """)
636
+
637
+ # Write combined HTML
638
+ output_path.parent.mkdir(parents=True, exist_ok=True)
639
+ output_path.write_text("".join(html_parts), encoding="utf-8")
640
+ logger.debug("Saved combined HTML: %s", output_path)
641
+
642
+ return output_path
643
+
644
+
645
+ def generate_and_save_report(
646
+ flat_results: list[dict[str, Any]] | pd.DataFrame,
647
+ output_dir: Path,
648
+ summary: dict[str, Any] | None = None,
649
+ ) -> Path | None:
650
+ """Generate and save all plots from flat results.
651
+
652
+ This is the main entry point for plotting. It:
653
+ 1. Converts flat results to a DataFrame
654
+ 2. Generates standard plots (by scenario, group, condition, tags)
655
+ 3. Saves a combined HTML report with all plots and summary
656
+
657
+ Args:
658
+ flat_results: List of flat result dictionaries from _build_flat_results.
659
+ output_dir: Base output directory. Plots are saved in a 'graphs' subfolder.
660
+ summary: Optional summary dictionary to display at the top of the report.
661
+
662
+ Returns:
663
+ Path to the combined HTML report.
664
+ """
665
+ report_path = output_dir / "report.html"
666
+ is_df_empty = isinstance(flat_results, pd.DataFrame) and flat_results.empty
667
+ if is_df_empty or (isinstance(flat_results, list) and not flat_results):
668
+ logger.warning("No results to plot")
669
+ return None
670
+
671
+ # Convert to DataFrame
672
+ if isinstance(flat_results, pd.DataFrame):
673
+ df = flat_results
674
+ else:
675
+ df = pd.DataFrame(flat_results)
676
+
677
+ # Drop rows with error_message (failed evaluations)
678
+ if "error_message" in df.columns:
679
+ error_count = int(df["error_message"].notna().sum())
680
+ if error_count > 0:
681
+ logger.info("Dropping %d rows with error_message from plotting", error_count)
682
+ df = df[df["error_message"].isna()]
683
+
684
+ if df.empty:
685
+ logger.warning("No valid results to plot after filtering errors")
686
+ return None
687
+
688
+ # Set uid as index for hover text identification
689
+ if "uid" in df.columns:
690
+ df = df.set_index("uid")
691
+
692
+ # Generate plots
693
+ plots = generate_standard_plots(df)
694
+
695
+ if not plots:
696
+ logger.warning("No plots generated")
697
+ return None
698
+
699
+ # Save combined HTML report
700
+ report_path = save_combined_html(
701
+ plots,
702
+ report_path,
703
+ page_title=f"Red Teaming Evaluation Results for run: {output_dir.name}",
704
+ summary=summary,
705
+ )
706
+
707
+ return report_path