nvidia-nat 1.4.0a20251120__py3-none-any.whl → 1.4.0a20260113__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (492) hide show
  1. aiq/__init__.py +1 -1
  2. nat/{front_ends/mcp → agent/auto_memory_wrapper}/__init__.py +1 -1
  3. nat/agent/auto_memory_wrapper/agent.py +278 -0
  4. nat/agent/auto_memory_wrapper/register.py +227 -0
  5. nat/agent/auto_memory_wrapper/state.py +30 -0
  6. nat/agent/base.py +1 -1
  7. nat/agent/dual_node.py +1 -1
  8. nat/agent/prompt_optimizer/prompt.py +1 -1
  9. nat/agent/prompt_optimizer/register.py +1 -1
  10. nat/agent/react_agent/agent.py +16 -9
  11. nat/agent/react_agent/output_parser.py +2 -2
  12. nat/agent/react_agent/prompt.py +3 -2
  13. nat/agent/react_agent/register.py +2 -2
  14. nat/agent/react_agent/register_per_user_agent.py +104 -0
  15. nat/agent/reasoning_agent/reasoning_agent.py +1 -1
  16. nat/agent/register.py +3 -1
  17. nat/agent/responses_api_agent/__init__.py +1 -1
  18. nat/agent/responses_api_agent/register.py +1 -1
  19. nat/agent/rewoo_agent/agent.py +9 -4
  20. nat/agent/rewoo_agent/prompt.py +1 -1
  21. nat/agent/rewoo_agent/register.py +1 -1
  22. nat/agent/tool_calling_agent/agent.py +5 -4
  23. nat/agent/tool_calling_agent/register.py +1 -1
  24. nat/authentication/__init__.py +1 -1
  25. nat/authentication/api_key/__init__.py +1 -1
  26. nat/authentication/api_key/api_key_auth_provider.py +1 -1
  27. nat/authentication/api_key/api_key_auth_provider_config.py +22 -7
  28. nat/authentication/api_key/register.py +1 -1
  29. nat/authentication/credential_validator/__init__.py +1 -1
  30. nat/authentication/credential_validator/bearer_token_validator.py +1 -1
  31. nat/authentication/exceptions/__init__.py +1 -1
  32. nat/authentication/exceptions/api_key_exceptions.py +1 -1
  33. nat/authentication/http_basic_auth/http_basic_auth_provider.py +1 -1
  34. nat/authentication/http_basic_auth/register.py +1 -1
  35. nat/authentication/interfaces.py +1 -1
  36. nat/authentication/oauth2/__init__.py +1 -1
  37. nat/authentication/oauth2/oauth2_auth_code_flow_provider.py +1 -1
  38. nat/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +1 -1
  39. nat/authentication/oauth2/oauth2_resource_server_config.py +1 -1
  40. nat/authentication/oauth2/register.py +1 -1
  41. nat/authentication/register.py +1 -1
  42. nat/builder/builder.py +511 -1
  43. nat/builder/child_builder.py +385 -0
  44. nat/builder/component_utils.py +28 -4
  45. nat/builder/context.py +17 -1
  46. nat/builder/embedder.py +1 -1
  47. nat/builder/eval_builder.py +19 -7
  48. nat/builder/evaluator.py +1 -1
  49. nat/builder/framework_enum.py +2 -1
  50. nat/builder/front_end.py +1 -1
  51. nat/builder/function.py +40 -3
  52. nat/builder/function_base.py +1 -1
  53. nat/builder/function_info.py +1 -1
  54. nat/builder/intermediate_step_manager.py +1 -1
  55. nat/builder/llm.py +1 -1
  56. nat/builder/per_user_workflow_builder.py +843 -0
  57. nat/builder/retriever.py +1 -1
  58. nat/builder/sync_builder.py +571 -0
  59. nat/builder/user_interaction_manager.py +1 -1
  60. nat/builder/workflow.py +1 -1
  61. nat/builder/workflow_builder.py +536 -424
  62. nat/cli/__init__.py +1 -1
  63. nat/cli/cli_utils/config_override.py +1 -1
  64. nat/cli/cli_utils/validation.py +32 -1
  65. nat/cli/commands/configure/channel/add.py +1 -1
  66. nat/cli/commands/configure/channel/channel.py +1 -1
  67. nat/cli/commands/configure/channel/remove.py +1 -1
  68. nat/cli/commands/configure/channel/update.py +1 -1
  69. nat/cli/commands/configure/configure.py +1 -1
  70. nat/cli/commands/evaluate.py +87 -13
  71. nat/cli/commands/finetune.py +132 -0
  72. nat/cli/commands/info/__init__.py +1 -1
  73. nat/cli/commands/info/info.py +1 -1
  74. nat/cli/commands/info/list_channels.py +1 -1
  75. nat/cli/commands/info/list_components.py +1 -1
  76. nat/cli/commands/object_store/__init__.py +1 -1
  77. nat/cli/commands/object_store/object_store.py +1 -1
  78. nat/cli/commands/optimize.py +1 -1
  79. nat/cli/commands/{mcp → red_teaming}/__init__.py +1 -1
  80. nat/cli/commands/red_teaming/red_teaming.py +138 -0
  81. nat/cli/commands/red_teaming/red_teaming_utils.py +73 -0
  82. nat/cli/commands/registry/__init__.py +1 -1
  83. nat/cli/commands/registry/publish.py +1 -1
  84. nat/cli/commands/registry/pull.py +1 -1
  85. nat/cli/commands/registry/registry.py +1 -1
  86. nat/cli/commands/registry/remove.py +1 -1
  87. nat/cli/commands/registry/search.py +1 -1
  88. nat/cli/commands/sizing/__init__.py +1 -1
  89. nat/cli/commands/sizing/calc.py +1 -1
  90. nat/cli/commands/sizing/sizing.py +1 -1
  91. nat/cli/commands/start.py +1 -1
  92. nat/cli/commands/uninstall.py +1 -1
  93. nat/cli/commands/validate.py +1 -1
  94. nat/cli/commands/workflow/__init__.py +1 -1
  95. nat/cli/commands/workflow/workflow.py +1 -1
  96. nat/cli/commands/workflow/workflow_commands.py +3 -2
  97. nat/cli/entrypoint.py +15 -37
  98. nat/cli/main.py +2 -2
  99. nat/cli/plugin_loader.py +69 -0
  100. nat/cli/register_workflow.py +183 -5
  101. nat/cli/type_registry.py +169 -3
  102. nat/control_flow/register.py +1 -1
  103. nat/control_flow/router_agent/agent.py +1 -1
  104. nat/control_flow/router_agent/prompt.py +1 -1
  105. nat/control_flow/router_agent/register.py +1 -1
  106. nat/control_flow/sequential_executor.py +28 -7
  107. nat/data_models/__init__.py +1 -1
  108. nat/data_models/agent.py +1 -1
  109. nat/data_models/api_server.py +38 -3
  110. nat/data_models/authentication.py +1 -1
  111. nat/data_models/common.py +1 -1
  112. nat/data_models/component.py +7 -1
  113. nat/data_models/component_ref.py +34 -1
  114. nat/data_models/config.py +62 -1
  115. nat/data_models/dataset_handler.py +15 -2
  116. nat/data_models/discovery_metadata.py +1 -1
  117. nat/data_models/embedder.py +1 -1
  118. nat/data_models/evaluate.py +6 -1
  119. nat/data_models/evaluator.py +1 -1
  120. nat/data_models/finetuning.py +260 -0
  121. nat/data_models/front_end.py +1 -1
  122. nat/data_models/function.py +1 -1
  123. nat/data_models/function_dependencies.py +1 -1
  124. nat/data_models/gated_field_mixin.py +1 -1
  125. nat/data_models/interactive.py +1 -1
  126. nat/data_models/intermediate_step.py +29 -2
  127. nat/data_models/invocation_node.py +1 -1
  128. nat/data_models/llm.py +1 -1
  129. nat/data_models/logging.py +1 -1
  130. nat/data_models/memory.py +1 -1
  131. nat/data_models/middleware.py +3 -1
  132. nat/data_models/object_store.py +1 -1
  133. nat/data_models/openai_mcp.py +1 -1
  134. nat/data_models/optimizable.py +1 -1
  135. nat/data_models/optimizer.py +1 -1
  136. nat/data_models/profiler.py +1 -1
  137. nat/data_models/registry_handler.py +1 -1
  138. nat/data_models/retriever.py +1 -1
  139. nat/data_models/retry_mixin.py +1 -1
  140. nat/data_models/runtime_enum.py +1 -1
  141. nat/data_models/span.py +1 -1
  142. nat/data_models/step_adaptor.py +1 -1
  143. nat/data_models/streaming.py +1 -1
  144. nat/data_models/swe_bench_model.py +1 -1
  145. nat/data_models/telemetry_exporter.py +1 -1
  146. nat/data_models/thinking_mixin.py +1 -1
  147. nat/data_models/ttc_strategy.py +1 -1
  148. nat/embedder/azure_openai_embedder.py +1 -1
  149. nat/embedder/nim_embedder.py +1 -1
  150. nat/embedder/openai_embedder.py +1 -1
  151. nat/embedder/register.py +1 -1
  152. nat/eval/__init__.py +1 -1
  153. nat/eval/config.py +8 -1
  154. nat/eval/dataset_handler/dataset_downloader.py +1 -1
  155. nat/eval/dataset_handler/dataset_filter.py +1 -1
  156. nat/eval/dataset_handler/dataset_handler.py +4 -2
  157. nat/eval/evaluate.py +217 -80
  158. nat/eval/evaluator/__init__.py +1 -1
  159. nat/eval/evaluator/base_evaluator.py +2 -2
  160. nat/eval/evaluator/evaluator_model.py +3 -2
  161. nat/eval/intermediate_step_adapter.py +1 -1
  162. nat/eval/llm_validator.py +336 -0
  163. nat/eval/rag_evaluator/evaluate.py +17 -10
  164. nat/eval/rag_evaluator/register.py +1 -1
  165. nat/eval/red_teaming_evaluator/__init__.py +14 -0
  166. nat/eval/red_teaming_evaluator/data_models.py +66 -0
  167. nat/eval/red_teaming_evaluator/evaluate.py +327 -0
  168. nat/eval/red_teaming_evaluator/filter_conditions.py +75 -0
  169. nat/eval/red_teaming_evaluator/register.py +55 -0
  170. nat/eval/register.py +2 -1
  171. nat/eval/remote_workflow.py +1 -1
  172. nat/eval/runners/__init__.py +1 -1
  173. nat/eval/runners/config.py +1 -1
  174. nat/eval/runners/multi_eval_runner.py +1 -1
  175. nat/eval/runners/red_teaming_runner/__init__.py +24 -0
  176. nat/eval/runners/red_teaming_runner/config.py +282 -0
  177. nat/eval/runners/red_teaming_runner/report_utils.py +707 -0
  178. nat/eval/runners/red_teaming_runner/runner.py +867 -0
  179. nat/eval/runtime_evaluator/__init__.py +1 -1
  180. nat/eval/runtime_evaluator/evaluate.py +1 -1
  181. nat/eval/runtime_evaluator/register.py +1 -1
  182. nat/eval/runtime_event_subscriber.py +1 -1
  183. nat/eval/swe_bench_evaluator/evaluate.py +1 -1
  184. nat/eval/swe_bench_evaluator/register.py +1 -1
  185. nat/eval/trajectory_evaluator/evaluate.py +2 -2
  186. nat/eval/trajectory_evaluator/register.py +1 -1
  187. nat/eval/tunable_rag_evaluator/evaluate.py +5 -5
  188. nat/eval/tunable_rag_evaluator/register.py +1 -1
  189. nat/eval/usage_stats.py +1 -1
  190. nat/eval/utils/eval_trace_ctx.py +1 -1
  191. nat/eval/utils/output_uploader.py +1 -1
  192. nat/eval/utils/tqdm_position_registry.py +1 -1
  193. nat/eval/utils/weave_eval.py +1 -1
  194. nat/experimental/decorators/experimental_warning_decorator.py +1 -1
  195. nat/experimental/test_time_compute/editing/iterative_plan_refinement_editor.py +1 -1
  196. nat/experimental/test_time_compute/editing/llm_as_a_judge_editor.py +1 -1
  197. nat/experimental/test_time_compute/editing/motivation_aware_summarization.py +1 -1
  198. nat/experimental/test_time_compute/functions/execute_score_select_function.py +1 -1
  199. nat/experimental/test_time_compute/functions/multi_llm_judge_function.py +88 -0
  200. nat/experimental/test_time_compute/functions/plan_select_execute_function.py +1 -1
  201. nat/experimental/test_time_compute/functions/ttc_tool_orchestration_function.py +1 -1
  202. nat/experimental/test_time_compute/functions/ttc_tool_wrapper_function.py +1 -1
  203. nat/experimental/test_time_compute/models/editor_config.py +1 -1
  204. nat/experimental/test_time_compute/models/scoring_config.py +1 -1
  205. nat/experimental/test_time_compute/models/search_config.py +20 -2
  206. nat/experimental/test_time_compute/models/selection_config.py +33 -2
  207. nat/experimental/test_time_compute/models/stage_enums.py +1 -1
  208. nat/experimental/test_time_compute/models/strategy_base.py +1 -1
  209. nat/experimental/test_time_compute/models/tool_use_config.py +1 -1
  210. nat/experimental/test_time_compute/models/ttc_item.py +1 -1
  211. nat/experimental/test_time_compute/register.py +4 -1
  212. nat/experimental/test_time_compute/scoring/llm_based_agent_scorer.py +1 -1
  213. nat/experimental/test_time_compute/scoring/llm_based_plan_scorer.py +1 -1
  214. nat/experimental/test_time_compute/scoring/motivation_aware_scorer.py +1 -1
  215. nat/experimental/test_time_compute/search/multi_llm_generation.py +115 -0
  216. nat/experimental/test_time_compute/search/multi_llm_planner.py +1 -1
  217. nat/experimental/test_time_compute/search/multi_query_retrieval_search.py +1 -1
  218. nat/experimental/test_time_compute/search/single_shot_multi_plan_planner.py +1 -1
  219. nat/experimental/test_time_compute/selection/best_of_n_selector.py +1 -1
  220. nat/experimental/test_time_compute/selection/llm_based_agent_output_selector.py +1 -1
  221. nat/experimental/test_time_compute/selection/llm_based_output_merging_selector.py +1 -1
  222. nat/experimental/test_time_compute/selection/llm_based_plan_selector.py +1 -1
  223. nat/experimental/test_time_compute/selection/llm_judge_selection.py +127 -0
  224. nat/experimental/test_time_compute/selection/threshold_selector.py +1 -1
  225. nat/finetuning/__init__.py +24 -0
  226. nat/finetuning/finetuning_runtime.py +143 -0
  227. nat/finetuning/interfaces/__init__.py +24 -0
  228. nat/finetuning/interfaces/finetuning_runner.py +261 -0
  229. nat/finetuning/interfaces/trainer_adapter.py +103 -0
  230. nat/finetuning/interfaces/trajectory_builder.py +115 -0
  231. nat/finetuning/utils/__init__.py +15 -0
  232. nat/finetuning/utils/parsers/__init__.py +15 -0
  233. nat/finetuning/utils/parsers/adk_parser.py +141 -0
  234. nat/finetuning/utils/parsers/base_parser.py +238 -0
  235. nat/finetuning/utils/parsers/common.py +91 -0
  236. nat/finetuning/utils/parsers/langchain_parser.py +267 -0
  237. nat/finetuning/utils/parsers/llama_index_parser.py +218 -0
  238. nat/front_ends/__init__.py +1 -1
  239. nat/front_ends/console/__init__.py +1 -1
  240. nat/front_ends/console/authentication_flow_handler.py +1 -1
  241. nat/front_ends/console/console_front_end_config.py +4 -1
  242. nat/front_ends/console/console_front_end_plugin.py +5 -4
  243. nat/front_ends/console/register.py +1 -1
  244. nat/front_ends/cron/__init__.py +1 -1
  245. nat/front_ends/fastapi/__init__.py +1 -1
  246. nat/front_ends/fastapi/async_job.py +128 -0
  247. nat/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +1 -1
  248. nat/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +13 -9
  249. nat/front_ends/fastapi/dask_client_mixin.py +1 -1
  250. nat/front_ends/fastapi/fastapi_front_end_config.py +1 -1
  251. nat/front_ends/fastapi/fastapi_front_end_controller.py +1 -1
  252. nat/front_ends/fastapi/fastapi_front_end_plugin.py +25 -30
  253. nat/front_ends/fastapi/fastapi_front_end_plugin_worker.py +195 -60
  254. nat/front_ends/fastapi/html_snippets/__init__.py +1 -1
  255. nat/front_ends/fastapi/html_snippets/auth_code_grant_success.py +1 -1
  256. nat/front_ends/fastapi/intermediate_steps_subscriber.py +12 -1
  257. nat/front_ends/fastapi/job_store.py +23 -11
  258. nat/front_ends/fastapi/main.py +1 -1
  259. nat/front_ends/fastapi/message_handler.py +27 -4
  260. nat/front_ends/fastapi/message_validator.py +54 -2
  261. nat/front_ends/fastapi/register.py +1 -1
  262. nat/front_ends/fastapi/response_helpers.py +16 -15
  263. nat/front_ends/fastapi/step_adaptor.py +1 -1
  264. nat/front_ends/fastapi/utils.py +1 -1
  265. nat/front_ends/register.py +1 -2
  266. nat/front_ends/simple_base/__init__.py +1 -1
  267. nat/front_ends/simple_base/simple_front_end_plugin_base.py +6 -4
  268. nat/llm/aws_bedrock_llm.py +1 -1
  269. nat/llm/azure_openai_llm.py +10 -1
  270. nat/llm/dynamo_llm.py +363 -0
  271. nat/llm/huggingface_llm.py +177 -0
  272. nat/llm/litellm_llm.py +1 -1
  273. nat/llm/nim_llm.py +1 -1
  274. nat/llm/openai_llm.py +1 -1
  275. nat/llm/register.py +3 -1
  276. nat/llm/utils/__init__.py +1 -1
  277. nat/llm/utils/env_config_value.py +1 -1
  278. nat/llm/utils/error.py +1 -1
  279. nat/llm/utils/thinking.py +1 -1
  280. nat/memory/__init__.py +1 -1
  281. nat/memory/interfaces.py +1 -1
  282. nat/memory/models.py +1 -1
  283. nat/meta/pypi.md +1 -1
  284. nat/middleware/__init__.py +5 -5
  285. nat/middleware/cache/__init__.py +14 -0
  286. nat/middleware/{cache_middleware.py → cache/cache_middleware.py} +39 -42
  287. nat/middleware/cache/cache_middleware_config.py +44 -0
  288. nat/middleware/cache/register.py +33 -0
  289. nat/middleware/defense/__init__.py +14 -0
  290. nat/middleware/defense/defense_middleware.py +362 -0
  291. nat/middleware/defense/defense_middleware_content_guard.py +455 -0
  292. nat/middleware/defense/defense_middleware_data_models.py +91 -0
  293. nat/middleware/defense/defense_middleware_output_verifier.py +440 -0
  294. nat/middleware/defense/defense_middleware_pii.py +356 -0
  295. nat/middleware/defense/register.py +82 -0
  296. nat/middleware/dynamic/__init__.py +14 -0
  297. nat/middleware/dynamic/dynamic_function_middleware.py +962 -0
  298. nat/middleware/dynamic/dynamic_middleware_config.py +132 -0
  299. nat/middleware/dynamic/register.py +34 -0
  300. nat/middleware/function_middleware.py +236 -52
  301. nat/middleware/logging/__init__.py +14 -0
  302. nat/middleware/logging/logging_middleware.py +67 -0
  303. nat/middleware/logging/logging_middleware_config.py +28 -0
  304. nat/middleware/logging/register.py +33 -0
  305. nat/middleware/middleware.py +142 -28
  306. nat/middleware/red_teaming/__init__.py +14 -0
  307. nat/middleware/red_teaming/red_teaming_middleware.py +344 -0
  308. nat/middleware/red_teaming/red_teaming_middleware_config.py +112 -0
  309. nat/middleware/red_teaming/register.py +47 -0
  310. nat/middleware/register.py +7 -20
  311. nat/middleware/utils/__init__.py +14 -0
  312. nat/middleware/utils/workflow_inventory.py +155 -0
  313. nat/object_store/__init__.py +1 -1
  314. nat/object_store/in_memory_object_store.py +1 -1
  315. nat/object_store/interfaces.py +1 -1
  316. nat/object_store/models.py +1 -1
  317. nat/object_store/register.py +1 -1
  318. nat/observability/__init__.py +1 -1
  319. nat/observability/exporter/__init__.py +1 -1
  320. nat/observability/exporter/base_exporter.py +1 -1
  321. nat/observability/exporter/exporter.py +1 -1
  322. nat/observability/exporter/file_exporter.py +1 -1
  323. nat/observability/exporter/processing_exporter.py +1 -1
  324. nat/observability/exporter/raw_exporter.py +1 -1
  325. nat/observability/exporter/span_exporter.py +7 -1
  326. nat/observability/exporter_manager.py +1 -1
  327. nat/observability/mixin/__init__.py +1 -1
  328. nat/observability/mixin/batch_config_mixin.py +1 -1
  329. nat/observability/mixin/collector_config_mixin.py +1 -1
  330. nat/observability/mixin/file_mixin.py +1 -1
  331. nat/observability/mixin/file_mode.py +1 -1
  332. nat/observability/mixin/redaction_config_mixin.py +1 -1
  333. nat/observability/mixin/resource_conflict_mixin.py +1 -1
  334. nat/observability/mixin/serialize_mixin.py +1 -1
  335. nat/observability/mixin/tagging_config_mixin.py +1 -1
  336. nat/observability/mixin/type_introspection_mixin.py +1 -1
  337. nat/observability/processor/__init__.py +1 -1
  338. nat/observability/processor/batching_processor.py +1 -1
  339. nat/observability/processor/callback_processor.py +1 -1
  340. nat/observability/processor/falsy_batch_filter_processor.py +1 -1
  341. nat/observability/processor/intermediate_step_serializer.py +1 -1
  342. nat/observability/processor/processor.py +1 -1
  343. nat/observability/processor/processor_factory.py +1 -1
  344. nat/observability/processor/redaction/__init__.py +1 -1
  345. nat/observability/processor/redaction/contextual_redaction_processor.py +1 -1
  346. nat/observability/processor/redaction/contextual_span_redaction_processor.py +1 -1
  347. nat/observability/processor/redaction/redaction_processor.py +1 -1
  348. nat/observability/processor/redaction/span_header_redaction_processor.py +1 -1
  349. nat/observability/processor/span_tagging_processor.py +1 -1
  350. nat/observability/register.py +1 -1
  351. nat/observability/utils/__init__.py +1 -1
  352. nat/observability/utils/dict_utils.py +1 -1
  353. nat/observability/utils/time_utils.py +1 -1
  354. nat/profiler/calc/__init__.py +1 -1
  355. nat/profiler/calc/calc_runner.py +3 -3
  356. nat/profiler/calc/calculations.py +1 -1
  357. nat/profiler/calc/data_models.py +1 -1
  358. nat/profiler/calc/plot.py +30 -3
  359. nat/profiler/callbacks/agno_callback_handler.py +1 -1
  360. nat/profiler/callbacks/base_callback_class.py +1 -1
  361. nat/profiler/callbacks/langchain_callback_handler.py +33 -3
  362. nat/profiler/callbacks/llama_index_callback_handler.py +13 -10
  363. nat/profiler/callbacks/semantic_kernel_callback_handler.py +1 -1
  364. nat/profiler/callbacks/token_usage_base_model.py +1 -1
  365. nat/profiler/data_frame_row.py +1 -1
  366. nat/profiler/data_models.py +1 -1
  367. nat/profiler/decorators/framework_wrapper.py +16 -1
  368. nat/profiler/decorators/function_tracking.py +1 -1
  369. nat/profiler/forecasting/config.py +1 -1
  370. nat/profiler/forecasting/model_trainer.py +1 -1
  371. nat/profiler/forecasting/models/__init__.py +1 -1
  372. nat/profiler/forecasting/models/forecasting_base_model.py +1 -1
  373. nat/profiler/forecasting/models/linear_model.py +1 -1
  374. nat/profiler/forecasting/models/random_forest_regressor.py +1 -1
  375. nat/profiler/inference_metrics_model.py +1 -1
  376. nat/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +1 -1
  377. nat/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py +1 -1
  378. nat/profiler/inference_optimization/data_models.py +1 -1
  379. nat/profiler/inference_optimization/experimental/concurrency_spike_analysis.py +1 -1
  380. nat/profiler/inference_optimization/experimental/prefix_span_analysis.py +1 -1
  381. nat/profiler/inference_optimization/llm_metrics.py +1 -1
  382. nat/profiler/inference_optimization/prompt_caching.py +1 -1
  383. nat/profiler/inference_optimization/token_uniqueness.py +1 -1
  384. nat/profiler/inference_optimization/workflow_runtimes.py +1 -1
  385. nat/profiler/intermediate_property_adapter.py +1 -1
  386. nat/profiler/parameter_optimization/optimizable_utils.py +1 -1
  387. nat/profiler/parameter_optimization/optimizer_runtime.py +1 -1
  388. nat/profiler/parameter_optimization/parameter_optimizer.py +1 -1
  389. nat/profiler/parameter_optimization/parameter_selection.py +1 -1
  390. nat/profiler/parameter_optimization/pareto_visualizer.py +1 -1
  391. nat/profiler/parameter_optimization/prompt_optimizer.py +1 -1
  392. nat/profiler/parameter_optimization/update_helpers.py +1 -1
  393. nat/profiler/profile_runner.py +1 -1
  394. nat/profiler/utils.py +1 -1
  395. nat/registry_handlers/local/local_handler.py +1 -1
  396. nat/registry_handlers/local/register_local.py +1 -1
  397. nat/registry_handlers/metadata_factory.py +1 -1
  398. nat/registry_handlers/package_utils.py +1 -1
  399. nat/registry_handlers/pypi/pypi_handler.py +1 -1
  400. nat/registry_handlers/pypi/register_pypi.py +1 -1
  401. nat/registry_handlers/register.py +1 -1
  402. nat/registry_handlers/registry_handler_base.py +1 -1
  403. nat/registry_handlers/rest/register_rest.py +1 -1
  404. nat/registry_handlers/rest/rest_handler.py +1 -1
  405. nat/registry_handlers/schemas/headers.py +1 -1
  406. nat/registry_handlers/schemas/package.py +1 -1
  407. nat/registry_handlers/schemas/publish.py +1 -1
  408. nat/registry_handlers/schemas/pull.py +1 -1
  409. nat/registry_handlers/schemas/remove.py +1 -1
  410. nat/registry_handlers/schemas/search.py +1 -1
  411. nat/registry_handlers/schemas/status.py +1 -1
  412. nat/retriever/interface.py +1 -1
  413. nat/retriever/milvus/__init__.py +1 -1
  414. nat/retriever/milvus/register.py +1 -1
  415. nat/retriever/milvus/retriever.py +1 -1
  416. nat/retriever/models.py +1 -1
  417. nat/retriever/nemo_retriever/__init__.py +1 -1
  418. nat/retriever/nemo_retriever/register.py +1 -1
  419. nat/retriever/nemo_retriever/retriever.py +5 -5
  420. nat/retriever/register.py +1 -1
  421. nat/runtime/__init__.py +1 -1
  422. nat/runtime/loader.py +10 -3
  423. nat/runtime/metrics.py +180 -0
  424. nat/runtime/runner.py +1 -5
  425. nat/runtime/session.py +451 -32
  426. nat/runtime/user_metadata.py +1 -1
  427. nat/settings/global_settings.py +1 -1
  428. nat/tool/chat_completion.py +1 -1
  429. nat/tool/code_execution/README.md +1 -1
  430. nat/tool/code_execution/code_sandbox.py +1 -1
  431. nat/tool/code_execution/local_sandbox/Dockerfile.sandbox +1 -1
  432. nat/tool/code_execution/local_sandbox/__init__.py +1 -1
  433. nat/tool/code_execution/local_sandbox/local_sandbox_server.py +1 -1
  434. nat/tool/code_execution/local_sandbox/start_local_sandbox.sh +1 -1
  435. nat/tool/code_execution/register.py +1 -1
  436. nat/tool/code_execution/utils.py +1 -1
  437. nat/tool/datetime_tools.py +1 -1
  438. nat/tool/document_search.py +1 -1
  439. nat/tool/github_tools.py +1 -1
  440. nat/tool/memory_tools/add_memory_tool.py +1 -1
  441. nat/tool/memory_tools/delete_memory_tool.py +1 -1
  442. nat/tool/memory_tools/get_memory_tool.py +1 -1
  443. nat/tool/nvidia_rag.py +2 -2
  444. nat/tool/register.py +1 -1
  445. nat/tool/retriever.py +1 -1
  446. nat/tool/server_tools.py +1 -1
  447. nat/utils/__init__.py +8 -5
  448. nat/utils/callable_utils.py +1 -1
  449. nat/utils/data_models/schema_validator.py +1 -1
  450. nat/utils/debugging_utils.py +1 -1
  451. nat/utils/decorators.py +1 -1
  452. nat/utils/dump_distro_mapping.py +1 -1
  453. nat/utils/exception_handlers/automatic_retries.py +3 -3
  454. nat/utils/exception_handlers/schemas.py +1 -1
  455. nat/utils/io/model_processing.py +1 -1
  456. nat/utils/io/supress_logs.py +33 -0
  457. nat/utils/io/yaml_tools.py +1 -1
  458. nat/utils/log_levels.py +1 -1
  459. nat/utils/log_utils.py +13 -1
  460. nat/utils/metadata_utils.py +1 -1
  461. nat/utils/optional_imports.py +1 -1
  462. nat/utils/producer_consumer_queue.py +1 -1
  463. nat/utils/reactive/base/observable_base.py +1 -1
  464. nat/utils/reactive/base/observer_base.py +1 -1
  465. nat/utils/reactive/base/subject_base.py +1 -1
  466. nat/utils/reactive/observable.py +1 -1
  467. nat/utils/reactive/observer.py +1 -1
  468. nat/utils/reactive/subject.py +1 -1
  469. nat/utils/reactive/subscription.py +1 -1
  470. nat/utils/responses_api.py +1 -1
  471. nat/utils/settings/global_settings.py +1 -1
  472. nat/utils/string_utils.py +1 -1
  473. nat/utils/type_converter.py +18 -5
  474. nat/utils/type_utils.py +1 -1
  475. nat/utils/url_utils.py +1 -1
  476. {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/METADATA +39 -14
  477. nvidia_nat-1.4.0a20260113.dist-info/RECORD +547 -0
  478. nvidia_nat-1.4.0a20260113.dist-info/entry_points.txt +38 -0
  479. nat/cli/commands/mcp/mcp.py +0 -986
  480. nat/front_ends/mcp/introspection_token_verifier.py +0 -73
  481. nat/front_ends/mcp/mcp_front_end_config.py +0 -109
  482. nat/front_ends/mcp/mcp_front_end_plugin.py +0 -155
  483. nat/front_ends/mcp/mcp_front_end_plugin_worker.py +0 -388
  484. nat/front_ends/mcp/memory_profiler.py +0 -320
  485. nat/front_ends/mcp/register.py +0 -27
  486. nat/front_ends/mcp/tool_converter.py +0 -321
  487. nvidia_nat-1.4.0a20251120.dist-info/RECORD +0 -488
  488. nvidia_nat-1.4.0a20251120.dist-info/entry_points.txt +0 -23
  489. {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/WHEEL +0 -0
  490. {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
  491. {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/licenses/LICENSE.md +0 -0
  492. {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -0,0 +1,336 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """
16
+ LLM Endpoint Validator for NeMo Agent Toolkit evaluation.
17
+
18
+ This module provides functionality to validate LLM endpoints before running evaluation
19
+ workflows. This helps catch deployment issues early (e.g., models not deployed after
20
+ training cancellation) and provides actionable error messages.
21
+
22
+ The validation uses the NeMo Agent Toolkit `WorkflowBuilder` to instantiate LLMs in a framework-agnostic way,
23
+ then tests them with a minimal `ainvoke()` call. This approach works for all LLM types
24
+ (OpenAI, NIM, AWS Bedrock, vLLM, etc.) and respects the auth and config system.
25
+
26
+ Note: Validation invokes actual LLM endpoints with minimal test prompts. This may incur
27
+ small API costs for cloud-hosted models.
28
+ """
29
+
30
+ import asyncio
31
+ import logging
32
+ import time
33
+ from typing import TYPE_CHECKING
34
+
35
+ from nat.builder.framework_enum import LLMFrameworkEnum
36
+ from nat.builder.workflow_builder import WorkflowBuilder
37
+ from nat.data_models.llm import LLMBaseConfig
38
+
39
+ if TYPE_CHECKING:
40
+ from nat.data_models.config import Config
41
+
42
+ logger = logging.getLogger(__name__)
43
+
44
+ # Constants
45
+ VALIDATION_TIMEOUT_SECONDS = 30 # Timeout for each LLM validation
46
+ MAX_ERROR_MESSAGE_LENGTH = 500 # Truncate long error messages
47
+ CONCURRENT_VALIDATION_BATCH_SIZE = 5 # Max LLMs to validate in parallel
48
+ VALIDATION_PROMPT = "test" # Minimal prompt for endpoint validation
49
+
50
+
51
+ def _is_404_error(exception: Exception) -> bool:
52
+ """
53
+ Detect if an exception represents a 404 (model not found) error.
54
+
55
+ This handles various 404 error formats from different LLM providers:
56
+ - OpenAI SDK: openai.NotFoundError
57
+ - HTTP responses: HTTP 404 or status code 404
58
+ - LangChain wrappers: Various wrapped 404s
59
+
60
+ Args:
61
+ exception: The exception to check.
62
+
63
+ Returns:
64
+ True if this is a 404 error, False otherwise.
65
+ """
66
+ exception_str = str(exception).lower()
67
+ exception_type = type(exception).__name__
68
+
69
+ # Check for NotFoundError type (OpenAI SDK)
70
+ if "notfounderror" in exception_type.lower():
71
+ return True
72
+
73
+ # Check for HTTP 404 specifically (not just "404" which could appear in other contexts)
74
+ if any(pattern in exception_str for pattern in ["http 404", "status code 404", "status_code=404"]):
75
+ return True
76
+
77
+ # Check for model-specific not found errors
78
+ if "model" in exception_str and any(phrase in exception_str
79
+ for phrase in ["not found", "does not exist", "not deployed", "not available"]):
80
+ return True
81
+
82
+ return False
83
+
84
+
85
+ def _get_llm_endpoint_info(llm_config: LLMBaseConfig) -> tuple[str | None, str | None]:
86
+ """
87
+ Extract endpoint and model information from an LLM config.
88
+
89
+ Args:
90
+ llm_config: The LLM configuration object.
91
+
92
+ Returns:
93
+ Tuple of (base_url, model_name), either may be None.
94
+ """
95
+ base_url = getattr(llm_config, "base_url", None)
96
+
97
+ # Try multiple attributes for model name
98
+ model_name = getattr(llm_config, "model_name", None)
99
+ if model_name is None:
100
+ model_name = getattr(llm_config, "model", None)
101
+
102
+ return base_url, model_name
103
+
104
+
105
+ def _truncate_error_message(message: str, max_length: int = MAX_ERROR_MESSAGE_LENGTH) -> str:
106
+ """
107
+ Truncate error messages to prevent memory issues with large stack traces.
108
+
109
+ Keeps both the start and end of the message to preserve context from both
110
+ the error description (start) and the stack trace (end).
111
+
112
+ Args:
113
+ message: The error message to truncate.
114
+ max_length: Maximum length to keep.
115
+
116
+ Returns:
117
+ Truncated message with ellipsis if needed.
118
+ """
119
+ if len(message) <= max_length:
120
+ return message
121
+
122
+ # Keep first and last portions to preserve both error description and stack trace
123
+ separator = " ... (truncated) ... "
124
+
125
+ # Guard for very small max_length values
126
+ if max_length <= len(separator) + 2:
127
+ return message[:max_length]
128
+
129
+ keep_length = (max_length - len(separator)) // 2
130
+ return f"{message[:keep_length]}{separator}{message[-keep_length:]}"
131
+
132
+
133
+ async def _validate_single_llm(builder: WorkflowBuilder, llm_name: str,
134
+ llm_config: LLMBaseConfig) -> tuple[str | None, str | None]:
135
+ """
136
+ Validate a single LLM endpoint.
137
+
138
+ Args:
139
+ builder: The WorkflowBuilder instance.
140
+ llm_name: Name of the LLM to validate.
141
+ llm_config: Configuration for the LLM.
142
+
143
+ Returns:
144
+ Tuple of (error_type, error_message):
145
+ - error_type: "404" for model not found, "warning" for non-critical, None for success
146
+ - error_message: Description of the error, or None if successful
147
+ """
148
+ try:
149
+ logger.info("Validating LLM '%s' (type: %s)", llm_name, llm_config.type)
150
+ start_time = time.time()
151
+
152
+ # Add LLM to builder (handles all LLM types)
153
+ await builder.add_llm(llm_name, llm_config)
154
+
155
+ # Try all frameworks to find one that works with this LLM
156
+ llm = None
157
+ for framework in LLMFrameworkEnum:
158
+ try:
159
+ llm = await builder.get_llm(llm_name, framework)
160
+ logger.debug("LLM '%s' successfully loaded with framework '%s'", llm_name, framework.value)
161
+ break # Found a working framework
162
+ except Exception as e:
163
+ logger.debug("LLM '%s' failed with framework '%s': %s", llm_name, framework.value, e)
164
+ continue # Try next framework
165
+
166
+ if llm is None:
167
+ # Log all attempted frameworks for debugging
168
+ attempted = [f.value for f in LLMFrameworkEnum]
169
+ error_msg = (f"Could not instantiate LLM '{llm_name}' with any known framework. "
170
+ f"Attempted: {', '.join(attempted)}. "
171
+ f"If this LLM uses a custom framework, this warning can be safely ignored. "
172
+ f"Otherwise, verify the LLM type '{llm_config.type}' is supported and configured correctly.")
173
+ logger.warning("LLM '%s' - Framework instantiation failed: %s", llm_name, error_msg)
174
+ return ("warning", error_msg)
175
+
176
+ # Test with minimal prompt - this will hit the endpoint
177
+ await asyncio.wait_for(llm.ainvoke(VALIDATION_PROMPT), timeout=VALIDATION_TIMEOUT_SECONDS)
178
+
179
+ duration = time.time() - start_time
180
+ logger.info("LLM '%s' validated successfully in %.2fs", llm_name, duration)
181
+ return (None, None)
182
+
183
+ except TimeoutError:
184
+ error_msg = f"Validation timed out after {VALIDATION_TIMEOUT_SECONDS}s"
185
+ logger.warning("LLM '%s' validation timed out", llm_name)
186
+ return ("warning", _truncate_error_message(error_msg))
187
+
188
+ except (KeyboardInterrupt, SystemExit):
189
+ # Don't catch system-level interrupts
190
+ raise
191
+
192
+ except Exception as invoke_error:
193
+ # Check if this is a 404 error (model not deployed)
194
+ if _is_404_error(invoke_error):
195
+ base_url, model_name = _get_llm_endpoint_info(llm_config)
196
+
197
+ error_msg = (f"LLM '{llm_name}' validation failed: Model not found (404).\n"
198
+ f"\nThis typically means:\n"
199
+ f" 1. The model has not been deployed yet\n"
200
+ f" 2. The model name is incorrect\n"
201
+ f" 3. A training job was canceled and the model was never deployed\n"
202
+ f"\nLLM Configuration:\n"
203
+ f" Type: {str(llm_config.type)}\n"
204
+ f" Endpoint: {base_url or 'N/A'}\n"
205
+ f" Model: {model_name or 'N/A'}\n"
206
+ f"\nACTION REQUIRED:\n"
207
+ f" 1. Verify the model is deployed (check your deployment service)\n"
208
+ f" 2. If using NeMo Customizer, ensure training completed successfully\n"
209
+ f" 3. Check model deployment status in your platform\n"
210
+ f" 4. Verify the model name matches the deployed model\n"
211
+ f"\nOriginal error: {_truncate_error_message(str(invoke_error))}")
212
+ logger.exception(error_msg)
213
+ return ("404", error_msg)
214
+
215
+ else:
216
+ # Non-404 error - might be auth, rate limit, temporary issue, etc.
217
+ error_msg = (f"Could not fully validate LLM '{llm_name}': {_truncate_error_message(str(invoke_error))}. "
218
+ f"This might be due to auth requirements, rate limits, or temporary issues. "
219
+ f"Evaluation will proceed, but may fail if the LLM is truly inaccessible.")
220
+ logger.exception(error_msg)
221
+ return ("warning", _truncate_error_message(error_msg))
222
+
223
+
224
+ async def validate_llm_endpoints(config: "Config") -> None:
225
+ """
226
+ Validate that all LLM endpoints in the config are accessible.
227
+
228
+ This function uses NAT's WorkflowBuilder to instantiate each configured LLM
229
+ and tests it with a minimal ainvoke() call. This approach is framework-agnostic
230
+ and works for all LLM types (OpenAI, NIM, AWS Bedrock, vLLM, etc.).
231
+
232
+ The validation distinguishes between critical errors (404s indicating model not
233
+ deployed) and non-critical errors (auth issues, rate limits, etc.):
234
+ - 404 errors: Fail fast with detailed troubleshooting guidance
235
+ - Other errors: Log warning but continue (to avoid false positives)
236
+
237
+ LLMs are validated in parallel batches to improve performance while respecting
238
+ rate limits. Each validation has a timeout to prevent hanging.
239
+
240
+ Note: This function invokes actual LLM endpoints, which may incur small API costs.
241
+
242
+ Args:
243
+ config: The NAT configuration object containing LLM definitions.
244
+
245
+ Raises:
246
+ RuntimeError: If any LLM endpoint has a 404 error (model not deployed).
247
+ ValueError: If config.llms is not properly structured.
248
+ """
249
+
250
+ # Validate config structure
251
+ if not hasattr(config, "llms"):
252
+ raise ValueError("Config does not have 'llms' attribute. Cannot validate LLM endpoints.")
253
+
254
+ if not isinstance(config.llms, dict):
255
+ raise ValueError(
256
+ f"Config.llms must be a dict, got {type(config.llms).__name__}. Cannot validate LLM endpoints.")
257
+
258
+ if not config.llms:
259
+ logger.info("No LLMs configured - skipping endpoint validation")
260
+ return
261
+
262
+ failed_llms = [] # List of (llm_name, error_message) tuples for 404 errors
263
+ validation_warnings = [] # List of (llm_name, warning_message) tuples for non-critical errors
264
+
265
+ # Use WorkflowBuilder to instantiate and test LLMs
266
+ async with WorkflowBuilder() as builder:
267
+ # Get list of LLMs to validate
268
+ llm_items = list(config.llms.items())
269
+
270
+ # Validate in batches to respect rate limits
271
+ for batch_start in range(0, len(llm_items), CONCURRENT_VALIDATION_BATCH_SIZE):
272
+ batch = llm_items[batch_start:batch_start + CONCURRENT_VALIDATION_BATCH_SIZE]
273
+
274
+ # Validate batch in parallel
275
+ validation_tasks = [_validate_single_llm(builder, llm_name, llm_config) for llm_name, llm_config in batch]
276
+
277
+ results = await asyncio.gather(*validation_tasks, return_exceptions=True)
278
+
279
+ # Process results - zip with batch to maintain llm_name association
280
+ for (llm_name, _llm_config), result in zip(batch, results, strict=True):
281
+ if isinstance(result, BaseException):
282
+ # Re-raise system interrupts if they somehow got through
283
+ if isinstance(result, KeyboardInterrupt | SystemExit):
284
+ raise result
285
+
286
+ # Unexpected exception during validation
287
+ logger.warning("Unexpected error during validation: %s", _truncate_error_message(str(result)))
288
+ validation_warnings.append((llm_name, _truncate_error_message(str(result))))
289
+ else:
290
+ # Normal result: (error_type, error_message)
291
+ error_type, error_message = result
292
+
293
+ if error_type == "404":
294
+ failed_llms.append((llm_name, error_message))
295
+ elif error_type == "warning":
296
+ validation_warnings.append((llm_name, error_message))
297
+ # If error_type is None, validation succeeded (no action needed)
298
+
299
+ # Calculate validation metrics
300
+ total_llms = len(llm_items)
301
+ succeeded_count = total_llms - len(failed_llms) - len(validation_warnings)
302
+
303
+ # Report non-critical warnings
304
+ if validation_warnings:
305
+ warning_summary = "\n".join([f" - {name}: {msg}" for name, msg in validation_warnings])
306
+ logger.warning(
307
+ "LLM validation completed with %d warning(s):\n%s\nThese LLMs may still work during evaluation.",
308
+ len(validation_warnings),
309
+ warning_summary,
310
+ )
311
+
312
+ # If any LLMs have 404 errors, fail validation
313
+ if failed_llms:
314
+ error_summary = "\n\n".join([f"LLM '{name}':\n{msg}" for name, msg in failed_llms])
315
+
316
+ # Log metrics before raising error
317
+ logger.error(
318
+ "Validation summary: %d total, %d succeeded, %d warned, %d failed (404)",
319
+ total_llms,
320
+ succeeded_count,
321
+ len(validation_warnings),
322
+ len(failed_llms),
323
+ )
324
+
325
+ raise RuntimeError(f"LLM endpoint validation failed for {len(failed_llms)} LLM(s) with 404 errors:\n\n"
326
+ f"{error_summary}\n\n"
327
+ f"Evaluation cannot proceed with undeployed models. "
328
+ f"Please resolve the deployment issues above before retrying.")
329
+
330
+ # Log success metrics
331
+ logger.info(
332
+ "All LLM endpoints validated successfully - %d total, %d succeeded, %d warned",
333
+ total_llms,
334
+ succeeded_count,
335
+ len(validation_warnings),
336
+ )
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -15,14 +15,10 @@
15
15
 
16
16
  import logging
17
17
  import math
18
+ import typing
18
19
  from collections.abc import Sequence
19
20
 
20
21
  from pydantic import BaseModel
21
- from ragas import EvaluationDataset
22
- from ragas import SingleTurnSample
23
- from ragas.dataset_schema import EvaluationResult
24
- from ragas.llms import LangchainLLMWrapper
25
- from ragas.metrics import Metric
26
22
  from tqdm import tqdm
27
23
 
28
24
  from nat.data_models.intermediate_step import IntermediateStepType
@@ -32,14 +28,22 @@ from nat.eval.evaluator.evaluator_model import EvalOutput
32
28
  from nat.eval.evaluator.evaluator_model import EvalOutputItem
33
29
  from nat.eval.utils.tqdm_position_registry import TqdmPositionRegistry
34
30
 
31
+ if typing.TYPE_CHECKING:
32
+ # We are lazily importing ragas to avoid import-time side effects such as applying the nest_asyncio patch, which is
33
+ # not compatible with Python 3.12+, we want to ensure that we are able to apply the nest_asyncio2 patch instead.
34
+ from ragas import EvaluationDataset
35
+ from ragas.dataset_schema import EvaluationResult
36
+ from ragas.llms import LangchainLLMWrapper
37
+ from ragas.metrics import Metric
38
+
35
39
  logger = logging.getLogger(__name__)
36
40
 
37
41
 
38
42
  class RAGEvaluator:
39
43
 
40
44
  def __init__(self,
41
- evaluator_llm: LangchainLLMWrapper,
42
- metrics: Sequence[Metric],
45
+ evaluator_llm: "LangchainLLMWrapper",
46
+ metrics: Sequence["Metric"],
43
47
  max_concurrency=8,
44
48
  input_obj_field: str | None = None):
45
49
  self.evaluator_llm = evaluator_llm
@@ -66,8 +70,11 @@ class RAGEvaluator:
66
70
 
67
71
  return str(input_obj) # Fallback to string representation of the dict
68
72
 
69
- def eval_input_to_ragas(self, eval_input: EvalInput) -> EvaluationDataset:
73
+ def eval_input_to_ragas(self, eval_input: EvalInput) -> "EvaluationDataset":
70
74
  """Converts EvalInput into a Ragas-compatible EvaluationDataset."""
75
+ from ragas import EvaluationDataset
76
+ from ragas import SingleTurnSample
77
+
71
78
  from nat.eval.intermediate_step_adapter import IntermediateStepAdapter
72
79
  event_filter = [IntermediateStepType.TOOL_END, IntermediateStepType.LLM_END, IntermediateStepType.CUSTOM_END]
73
80
  samples = []
@@ -98,7 +105,7 @@ class RAGEvaluator:
98
105
 
99
106
  return EvaluationDataset(samples=samples)
100
107
 
101
- def ragas_to_eval_output(self, eval_input: EvalInput, results_dataset: EvaluationResult | None) -> EvalOutput:
108
+ def ragas_to_eval_output(self, eval_input: EvalInput, results_dataset: "EvaluationResult | None") -> EvalOutput:
102
109
  """Converts the ragas EvaluationResult to nat EvalOutput"""
103
110
 
104
111
  if not results_dataset:
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -0,0 +1,14 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
@@ -0,0 +1,66 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Data models for red teaming evaluation output."""
16
+
17
+ from __future__ import annotations
18
+
19
+ from pydantic import Field
20
+
21
+ from nat.data_models.intermediate_step import IntermediateStep
22
+ from nat.eval.evaluator.evaluator_model import EvalOutputItem
23
+
24
+
25
+ class ConditionEvalOutputItem(EvalOutputItem):
26
+ """Evaluation results for a single IntermediateStep that meets the filtering condition.
27
+
28
+ Attributes:
29
+ id: Identifier from the input item.
30
+ score: Average score across all filter conditions.
31
+ reasoning: Reasoning for given score.
32
+ intermediate_step: IntermediateStep selected and evaluated via reduction strategy.
33
+ error_message: Error message if any step of the evaluation has failed.
34
+ """
35
+
36
+ intermediate_step: IntermediateStep | None = Field(
37
+ default=None,
38
+ description="The single IntermediateStep that was selected and evaluated (based on reduction strategy)")
39
+ error_message: str | None = Field(default=None,
40
+ description="Error message if any step of the evaluation has failed")
41
+
42
+ @classmethod
43
+ def empty(cls, id: str, error: str | None = None) -> ConditionEvalOutputItem:
44
+ """Create an empty ConditionEvalOutputItem.
45
+
46
+ Returns:
47
+ Empty ConditionEvalOutputItem instance
48
+ """
49
+ return cls(id=id, score=0.0, reasoning={}, error_message=error, intermediate_step=None)
50
+
51
+
52
+ class RedTeamingEvalOutputItem(EvalOutputItem):
53
+ """Extended evaluation output item for red teaming evaluations.
54
+
55
+ Organizes results by filter condition name, with each condition containing
56
+ its score, the evaluated output, and the single intermediate step that was selected.
57
+
58
+ Attributes:
59
+ id: Identifier from the input item
60
+ score: Average score across all filter conditions
61
+ reasoning: Summary information for compatibility
62
+ results_by_condition: Map from condition name to evaluation results
63
+ """
64
+
65
+ results_by_condition: dict[str, ConditionEvalOutputItem] = Field(
66
+ description="Results organized by filter condition name")