nvidia-nat 1.4.0a20251120__py3-none-any.whl → 1.4.0a20260113__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (492) hide show
  1. aiq/__init__.py +1 -1
  2. nat/{front_ends/mcp → agent/auto_memory_wrapper}/__init__.py +1 -1
  3. nat/agent/auto_memory_wrapper/agent.py +278 -0
  4. nat/agent/auto_memory_wrapper/register.py +227 -0
  5. nat/agent/auto_memory_wrapper/state.py +30 -0
  6. nat/agent/base.py +1 -1
  7. nat/agent/dual_node.py +1 -1
  8. nat/agent/prompt_optimizer/prompt.py +1 -1
  9. nat/agent/prompt_optimizer/register.py +1 -1
  10. nat/agent/react_agent/agent.py +16 -9
  11. nat/agent/react_agent/output_parser.py +2 -2
  12. nat/agent/react_agent/prompt.py +3 -2
  13. nat/agent/react_agent/register.py +2 -2
  14. nat/agent/react_agent/register_per_user_agent.py +104 -0
  15. nat/agent/reasoning_agent/reasoning_agent.py +1 -1
  16. nat/agent/register.py +3 -1
  17. nat/agent/responses_api_agent/__init__.py +1 -1
  18. nat/agent/responses_api_agent/register.py +1 -1
  19. nat/agent/rewoo_agent/agent.py +9 -4
  20. nat/agent/rewoo_agent/prompt.py +1 -1
  21. nat/agent/rewoo_agent/register.py +1 -1
  22. nat/agent/tool_calling_agent/agent.py +5 -4
  23. nat/agent/tool_calling_agent/register.py +1 -1
  24. nat/authentication/__init__.py +1 -1
  25. nat/authentication/api_key/__init__.py +1 -1
  26. nat/authentication/api_key/api_key_auth_provider.py +1 -1
  27. nat/authentication/api_key/api_key_auth_provider_config.py +22 -7
  28. nat/authentication/api_key/register.py +1 -1
  29. nat/authentication/credential_validator/__init__.py +1 -1
  30. nat/authentication/credential_validator/bearer_token_validator.py +1 -1
  31. nat/authentication/exceptions/__init__.py +1 -1
  32. nat/authentication/exceptions/api_key_exceptions.py +1 -1
  33. nat/authentication/http_basic_auth/http_basic_auth_provider.py +1 -1
  34. nat/authentication/http_basic_auth/register.py +1 -1
  35. nat/authentication/interfaces.py +1 -1
  36. nat/authentication/oauth2/__init__.py +1 -1
  37. nat/authentication/oauth2/oauth2_auth_code_flow_provider.py +1 -1
  38. nat/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +1 -1
  39. nat/authentication/oauth2/oauth2_resource_server_config.py +1 -1
  40. nat/authentication/oauth2/register.py +1 -1
  41. nat/authentication/register.py +1 -1
  42. nat/builder/builder.py +511 -1
  43. nat/builder/child_builder.py +385 -0
  44. nat/builder/component_utils.py +28 -4
  45. nat/builder/context.py +17 -1
  46. nat/builder/embedder.py +1 -1
  47. nat/builder/eval_builder.py +19 -7
  48. nat/builder/evaluator.py +1 -1
  49. nat/builder/framework_enum.py +2 -1
  50. nat/builder/front_end.py +1 -1
  51. nat/builder/function.py +40 -3
  52. nat/builder/function_base.py +1 -1
  53. nat/builder/function_info.py +1 -1
  54. nat/builder/intermediate_step_manager.py +1 -1
  55. nat/builder/llm.py +1 -1
  56. nat/builder/per_user_workflow_builder.py +843 -0
  57. nat/builder/retriever.py +1 -1
  58. nat/builder/sync_builder.py +571 -0
  59. nat/builder/user_interaction_manager.py +1 -1
  60. nat/builder/workflow.py +1 -1
  61. nat/builder/workflow_builder.py +536 -424
  62. nat/cli/__init__.py +1 -1
  63. nat/cli/cli_utils/config_override.py +1 -1
  64. nat/cli/cli_utils/validation.py +32 -1
  65. nat/cli/commands/configure/channel/add.py +1 -1
  66. nat/cli/commands/configure/channel/channel.py +1 -1
  67. nat/cli/commands/configure/channel/remove.py +1 -1
  68. nat/cli/commands/configure/channel/update.py +1 -1
  69. nat/cli/commands/configure/configure.py +1 -1
  70. nat/cli/commands/evaluate.py +87 -13
  71. nat/cli/commands/finetune.py +132 -0
  72. nat/cli/commands/info/__init__.py +1 -1
  73. nat/cli/commands/info/info.py +1 -1
  74. nat/cli/commands/info/list_channels.py +1 -1
  75. nat/cli/commands/info/list_components.py +1 -1
  76. nat/cli/commands/object_store/__init__.py +1 -1
  77. nat/cli/commands/object_store/object_store.py +1 -1
  78. nat/cli/commands/optimize.py +1 -1
  79. nat/cli/commands/{mcp → red_teaming}/__init__.py +1 -1
  80. nat/cli/commands/red_teaming/red_teaming.py +138 -0
  81. nat/cli/commands/red_teaming/red_teaming_utils.py +73 -0
  82. nat/cli/commands/registry/__init__.py +1 -1
  83. nat/cli/commands/registry/publish.py +1 -1
  84. nat/cli/commands/registry/pull.py +1 -1
  85. nat/cli/commands/registry/registry.py +1 -1
  86. nat/cli/commands/registry/remove.py +1 -1
  87. nat/cli/commands/registry/search.py +1 -1
  88. nat/cli/commands/sizing/__init__.py +1 -1
  89. nat/cli/commands/sizing/calc.py +1 -1
  90. nat/cli/commands/sizing/sizing.py +1 -1
  91. nat/cli/commands/start.py +1 -1
  92. nat/cli/commands/uninstall.py +1 -1
  93. nat/cli/commands/validate.py +1 -1
  94. nat/cli/commands/workflow/__init__.py +1 -1
  95. nat/cli/commands/workflow/workflow.py +1 -1
  96. nat/cli/commands/workflow/workflow_commands.py +3 -2
  97. nat/cli/entrypoint.py +15 -37
  98. nat/cli/main.py +2 -2
  99. nat/cli/plugin_loader.py +69 -0
  100. nat/cli/register_workflow.py +183 -5
  101. nat/cli/type_registry.py +169 -3
  102. nat/control_flow/register.py +1 -1
  103. nat/control_flow/router_agent/agent.py +1 -1
  104. nat/control_flow/router_agent/prompt.py +1 -1
  105. nat/control_flow/router_agent/register.py +1 -1
  106. nat/control_flow/sequential_executor.py +28 -7
  107. nat/data_models/__init__.py +1 -1
  108. nat/data_models/agent.py +1 -1
  109. nat/data_models/api_server.py +38 -3
  110. nat/data_models/authentication.py +1 -1
  111. nat/data_models/common.py +1 -1
  112. nat/data_models/component.py +7 -1
  113. nat/data_models/component_ref.py +34 -1
  114. nat/data_models/config.py +62 -1
  115. nat/data_models/dataset_handler.py +15 -2
  116. nat/data_models/discovery_metadata.py +1 -1
  117. nat/data_models/embedder.py +1 -1
  118. nat/data_models/evaluate.py +6 -1
  119. nat/data_models/evaluator.py +1 -1
  120. nat/data_models/finetuning.py +260 -0
  121. nat/data_models/front_end.py +1 -1
  122. nat/data_models/function.py +1 -1
  123. nat/data_models/function_dependencies.py +1 -1
  124. nat/data_models/gated_field_mixin.py +1 -1
  125. nat/data_models/interactive.py +1 -1
  126. nat/data_models/intermediate_step.py +29 -2
  127. nat/data_models/invocation_node.py +1 -1
  128. nat/data_models/llm.py +1 -1
  129. nat/data_models/logging.py +1 -1
  130. nat/data_models/memory.py +1 -1
  131. nat/data_models/middleware.py +3 -1
  132. nat/data_models/object_store.py +1 -1
  133. nat/data_models/openai_mcp.py +1 -1
  134. nat/data_models/optimizable.py +1 -1
  135. nat/data_models/optimizer.py +1 -1
  136. nat/data_models/profiler.py +1 -1
  137. nat/data_models/registry_handler.py +1 -1
  138. nat/data_models/retriever.py +1 -1
  139. nat/data_models/retry_mixin.py +1 -1
  140. nat/data_models/runtime_enum.py +1 -1
  141. nat/data_models/span.py +1 -1
  142. nat/data_models/step_adaptor.py +1 -1
  143. nat/data_models/streaming.py +1 -1
  144. nat/data_models/swe_bench_model.py +1 -1
  145. nat/data_models/telemetry_exporter.py +1 -1
  146. nat/data_models/thinking_mixin.py +1 -1
  147. nat/data_models/ttc_strategy.py +1 -1
  148. nat/embedder/azure_openai_embedder.py +1 -1
  149. nat/embedder/nim_embedder.py +1 -1
  150. nat/embedder/openai_embedder.py +1 -1
  151. nat/embedder/register.py +1 -1
  152. nat/eval/__init__.py +1 -1
  153. nat/eval/config.py +8 -1
  154. nat/eval/dataset_handler/dataset_downloader.py +1 -1
  155. nat/eval/dataset_handler/dataset_filter.py +1 -1
  156. nat/eval/dataset_handler/dataset_handler.py +4 -2
  157. nat/eval/evaluate.py +217 -80
  158. nat/eval/evaluator/__init__.py +1 -1
  159. nat/eval/evaluator/base_evaluator.py +2 -2
  160. nat/eval/evaluator/evaluator_model.py +3 -2
  161. nat/eval/intermediate_step_adapter.py +1 -1
  162. nat/eval/llm_validator.py +336 -0
  163. nat/eval/rag_evaluator/evaluate.py +17 -10
  164. nat/eval/rag_evaluator/register.py +1 -1
  165. nat/eval/red_teaming_evaluator/__init__.py +14 -0
  166. nat/eval/red_teaming_evaluator/data_models.py +66 -0
  167. nat/eval/red_teaming_evaluator/evaluate.py +327 -0
  168. nat/eval/red_teaming_evaluator/filter_conditions.py +75 -0
  169. nat/eval/red_teaming_evaluator/register.py +55 -0
  170. nat/eval/register.py +2 -1
  171. nat/eval/remote_workflow.py +1 -1
  172. nat/eval/runners/__init__.py +1 -1
  173. nat/eval/runners/config.py +1 -1
  174. nat/eval/runners/multi_eval_runner.py +1 -1
  175. nat/eval/runners/red_teaming_runner/__init__.py +24 -0
  176. nat/eval/runners/red_teaming_runner/config.py +282 -0
  177. nat/eval/runners/red_teaming_runner/report_utils.py +707 -0
  178. nat/eval/runners/red_teaming_runner/runner.py +867 -0
  179. nat/eval/runtime_evaluator/__init__.py +1 -1
  180. nat/eval/runtime_evaluator/evaluate.py +1 -1
  181. nat/eval/runtime_evaluator/register.py +1 -1
  182. nat/eval/runtime_event_subscriber.py +1 -1
  183. nat/eval/swe_bench_evaluator/evaluate.py +1 -1
  184. nat/eval/swe_bench_evaluator/register.py +1 -1
  185. nat/eval/trajectory_evaluator/evaluate.py +2 -2
  186. nat/eval/trajectory_evaluator/register.py +1 -1
  187. nat/eval/tunable_rag_evaluator/evaluate.py +5 -5
  188. nat/eval/tunable_rag_evaluator/register.py +1 -1
  189. nat/eval/usage_stats.py +1 -1
  190. nat/eval/utils/eval_trace_ctx.py +1 -1
  191. nat/eval/utils/output_uploader.py +1 -1
  192. nat/eval/utils/tqdm_position_registry.py +1 -1
  193. nat/eval/utils/weave_eval.py +1 -1
  194. nat/experimental/decorators/experimental_warning_decorator.py +1 -1
  195. nat/experimental/test_time_compute/editing/iterative_plan_refinement_editor.py +1 -1
  196. nat/experimental/test_time_compute/editing/llm_as_a_judge_editor.py +1 -1
  197. nat/experimental/test_time_compute/editing/motivation_aware_summarization.py +1 -1
  198. nat/experimental/test_time_compute/functions/execute_score_select_function.py +1 -1
  199. nat/experimental/test_time_compute/functions/multi_llm_judge_function.py +88 -0
  200. nat/experimental/test_time_compute/functions/plan_select_execute_function.py +1 -1
  201. nat/experimental/test_time_compute/functions/ttc_tool_orchestration_function.py +1 -1
  202. nat/experimental/test_time_compute/functions/ttc_tool_wrapper_function.py +1 -1
  203. nat/experimental/test_time_compute/models/editor_config.py +1 -1
  204. nat/experimental/test_time_compute/models/scoring_config.py +1 -1
  205. nat/experimental/test_time_compute/models/search_config.py +20 -2
  206. nat/experimental/test_time_compute/models/selection_config.py +33 -2
  207. nat/experimental/test_time_compute/models/stage_enums.py +1 -1
  208. nat/experimental/test_time_compute/models/strategy_base.py +1 -1
  209. nat/experimental/test_time_compute/models/tool_use_config.py +1 -1
  210. nat/experimental/test_time_compute/models/ttc_item.py +1 -1
  211. nat/experimental/test_time_compute/register.py +4 -1
  212. nat/experimental/test_time_compute/scoring/llm_based_agent_scorer.py +1 -1
  213. nat/experimental/test_time_compute/scoring/llm_based_plan_scorer.py +1 -1
  214. nat/experimental/test_time_compute/scoring/motivation_aware_scorer.py +1 -1
  215. nat/experimental/test_time_compute/search/multi_llm_generation.py +115 -0
  216. nat/experimental/test_time_compute/search/multi_llm_planner.py +1 -1
  217. nat/experimental/test_time_compute/search/multi_query_retrieval_search.py +1 -1
  218. nat/experimental/test_time_compute/search/single_shot_multi_plan_planner.py +1 -1
  219. nat/experimental/test_time_compute/selection/best_of_n_selector.py +1 -1
  220. nat/experimental/test_time_compute/selection/llm_based_agent_output_selector.py +1 -1
  221. nat/experimental/test_time_compute/selection/llm_based_output_merging_selector.py +1 -1
  222. nat/experimental/test_time_compute/selection/llm_based_plan_selector.py +1 -1
  223. nat/experimental/test_time_compute/selection/llm_judge_selection.py +127 -0
  224. nat/experimental/test_time_compute/selection/threshold_selector.py +1 -1
  225. nat/finetuning/__init__.py +24 -0
  226. nat/finetuning/finetuning_runtime.py +143 -0
  227. nat/finetuning/interfaces/__init__.py +24 -0
  228. nat/finetuning/interfaces/finetuning_runner.py +261 -0
  229. nat/finetuning/interfaces/trainer_adapter.py +103 -0
  230. nat/finetuning/interfaces/trajectory_builder.py +115 -0
  231. nat/finetuning/utils/__init__.py +15 -0
  232. nat/finetuning/utils/parsers/__init__.py +15 -0
  233. nat/finetuning/utils/parsers/adk_parser.py +141 -0
  234. nat/finetuning/utils/parsers/base_parser.py +238 -0
  235. nat/finetuning/utils/parsers/common.py +91 -0
  236. nat/finetuning/utils/parsers/langchain_parser.py +267 -0
  237. nat/finetuning/utils/parsers/llama_index_parser.py +218 -0
  238. nat/front_ends/__init__.py +1 -1
  239. nat/front_ends/console/__init__.py +1 -1
  240. nat/front_ends/console/authentication_flow_handler.py +1 -1
  241. nat/front_ends/console/console_front_end_config.py +4 -1
  242. nat/front_ends/console/console_front_end_plugin.py +5 -4
  243. nat/front_ends/console/register.py +1 -1
  244. nat/front_ends/cron/__init__.py +1 -1
  245. nat/front_ends/fastapi/__init__.py +1 -1
  246. nat/front_ends/fastapi/async_job.py +128 -0
  247. nat/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +1 -1
  248. nat/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +13 -9
  249. nat/front_ends/fastapi/dask_client_mixin.py +1 -1
  250. nat/front_ends/fastapi/fastapi_front_end_config.py +1 -1
  251. nat/front_ends/fastapi/fastapi_front_end_controller.py +1 -1
  252. nat/front_ends/fastapi/fastapi_front_end_plugin.py +25 -30
  253. nat/front_ends/fastapi/fastapi_front_end_plugin_worker.py +195 -60
  254. nat/front_ends/fastapi/html_snippets/__init__.py +1 -1
  255. nat/front_ends/fastapi/html_snippets/auth_code_grant_success.py +1 -1
  256. nat/front_ends/fastapi/intermediate_steps_subscriber.py +12 -1
  257. nat/front_ends/fastapi/job_store.py +23 -11
  258. nat/front_ends/fastapi/main.py +1 -1
  259. nat/front_ends/fastapi/message_handler.py +27 -4
  260. nat/front_ends/fastapi/message_validator.py +54 -2
  261. nat/front_ends/fastapi/register.py +1 -1
  262. nat/front_ends/fastapi/response_helpers.py +16 -15
  263. nat/front_ends/fastapi/step_adaptor.py +1 -1
  264. nat/front_ends/fastapi/utils.py +1 -1
  265. nat/front_ends/register.py +1 -2
  266. nat/front_ends/simple_base/__init__.py +1 -1
  267. nat/front_ends/simple_base/simple_front_end_plugin_base.py +6 -4
  268. nat/llm/aws_bedrock_llm.py +1 -1
  269. nat/llm/azure_openai_llm.py +10 -1
  270. nat/llm/dynamo_llm.py +363 -0
  271. nat/llm/huggingface_llm.py +177 -0
  272. nat/llm/litellm_llm.py +1 -1
  273. nat/llm/nim_llm.py +1 -1
  274. nat/llm/openai_llm.py +1 -1
  275. nat/llm/register.py +3 -1
  276. nat/llm/utils/__init__.py +1 -1
  277. nat/llm/utils/env_config_value.py +1 -1
  278. nat/llm/utils/error.py +1 -1
  279. nat/llm/utils/thinking.py +1 -1
  280. nat/memory/__init__.py +1 -1
  281. nat/memory/interfaces.py +1 -1
  282. nat/memory/models.py +1 -1
  283. nat/meta/pypi.md +1 -1
  284. nat/middleware/__init__.py +5 -5
  285. nat/middleware/cache/__init__.py +14 -0
  286. nat/middleware/{cache_middleware.py → cache/cache_middleware.py} +39 -42
  287. nat/middleware/cache/cache_middleware_config.py +44 -0
  288. nat/middleware/cache/register.py +33 -0
  289. nat/middleware/defense/__init__.py +14 -0
  290. nat/middleware/defense/defense_middleware.py +362 -0
  291. nat/middleware/defense/defense_middleware_content_guard.py +455 -0
  292. nat/middleware/defense/defense_middleware_data_models.py +91 -0
  293. nat/middleware/defense/defense_middleware_output_verifier.py +440 -0
  294. nat/middleware/defense/defense_middleware_pii.py +356 -0
  295. nat/middleware/defense/register.py +82 -0
  296. nat/middleware/dynamic/__init__.py +14 -0
  297. nat/middleware/dynamic/dynamic_function_middleware.py +962 -0
  298. nat/middleware/dynamic/dynamic_middleware_config.py +132 -0
  299. nat/middleware/dynamic/register.py +34 -0
  300. nat/middleware/function_middleware.py +236 -52
  301. nat/middleware/logging/__init__.py +14 -0
  302. nat/middleware/logging/logging_middleware.py +67 -0
  303. nat/middleware/logging/logging_middleware_config.py +28 -0
  304. nat/middleware/logging/register.py +33 -0
  305. nat/middleware/middleware.py +142 -28
  306. nat/middleware/red_teaming/__init__.py +14 -0
  307. nat/middleware/red_teaming/red_teaming_middleware.py +344 -0
  308. nat/middleware/red_teaming/red_teaming_middleware_config.py +112 -0
  309. nat/middleware/red_teaming/register.py +47 -0
  310. nat/middleware/register.py +7 -20
  311. nat/middleware/utils/__init__.py +14 -0
  312. nat/middleware/utils/workflow_inventory.py +155 -0
  313. nat/object_store/__init__.py +1 -1
  314. nat/object_store/in_memory_object_store.py +1 -1
  315. nat/object_store/interfaces.py +1 -1
  316. nat/object_store/models.py +1 -1
  317. nat/object_store/register.py +1 -1
  318. nat/observability/__init__.py +1 -1
  319. nat/observability/exporter/__init__.py +1 -1
  320. nat/observability/exporter/base_exporter.py +1 -1
  321. nat/observability/exporter/exporter.py +1 -1
  322. nat/observability/exporter/file_exporter.py +1 -1
  323. nat/observability/exporter/processing_exporter.py +1 -1
  324. nat/observability/exporter/raw_exporter.py +1 -1
  325. nat/observability/exporter/span_exporter.py +7 -1
  326. nat/observability/exporter_manager.py +1 -1
  327. nat/observability/mixin/__init__.py +1 -1
  328. nat/observability/mixin/batch_config_mixin.py +1 -1
  329. nat/observability/mixin/collector_config_mixin.py +1 -1
  330. nat/observability/mixin/file_mixin.py +1 -1
  331. nat/observability/mixin/file_mode.py +1 -1
  332. nat/observability/mixin/redaction_config_mixin.py +1 -1
  333. nat/observability/mixin/resource_conflict_mixin.py +1 -1
  334. nat/observability/mixin/serialize_mixin.py +1 -1
  335. nat/observability/mixin/tagging_config_mixin.py +1 -1
  336. nat/observability/mixin/type_introspection_mixin.py +1 -1
  337. nat/observability/processor/__init__.py +1 -1
  338. nat/observability/processor/batching_processor.py +1 -1
  339. nat/observability/processor/callback_processor.py +1 -1
  340. nat/observability/processor/falsy_batch_filter_processor.py +1 -1
  341. nat/observability/processor/intermediate_step_serializer.py +1 -1
  342. nat/observability/processor/processor.py +1 -1
  343. nat/observability/processor/processor_factory.py +1 -1
  344. nat/observability/processor/redaction/__init__.py +1 -1
  345. nat/observability/processor/redaction/contextual_redaction_processor.py +1 -1
  346. nat/observability/processor/redaction/contextual_span_redaction_processor.py +1 -1
  347. nat/observability/processor/redaction/redaction_processor.py +1 -1
  348. nat/observability/processor/redaction/span_header_redaction_processor.py +1 -1
  349. nat/observability/processor/span_tagging_processor.py +1 -1
  350. nat/observability/register.py +1 -1
  351. nat/observability/utils/__init__.py +1 -1
  352. nat/observability/utils/dict_utils.py +1 -1
  353. nat/observability/utils/time_utils.py +1 -1
  354. nat/profiler/calc/__init__.py +1 -1
  355. nat/profiler/calc/calc_runner.py +3 -3
  356. nat/profiler/calc/calculations.py +1 -1
  357. nat/profiler/calc/data_models.py +1 -1
  358. nat/profiler/calc/plot.py +30 -3
  359. nat/profiler/callbacks/agno_callback_handler.py +1 -1
  360. nat/profiler/callbacks/base_callback_class.py +1 -1
  361. nat/profiler/callbacks/langchain_callback_handler.py +33 -3
  362. nat/profiler/callbacks/llama_index_callback_handler.py +13 -10
  363. nat/profiler/callbacks/semantic_kernel_callback_handler.py +1 -1
  364. nat/profiler/callbacks/token_usage_base_model.py +1 -1
  365. nat/profiler/data_frame_row.py +1 -1
  366. nat/profiler/data_models.py +1 -1
  367. nat/profiler/decorators/framework_wrapper.py +16 -1
  368. nat/profiler/decorators/function_tracking.py +1 -1
  369. nat/profiler/forecasting/config.py +1 -1
  370. nat/profiler/forecasting/model_trainer.py +1 -1
  371. nat/profiler/forecasting/models/__init__.py +1 -1
  372. nat/profiler/forecasting/models/forecasting_base_model.py +1 -1
  373. nat/profiler/forecasting/models/linear_model.py +1 -1
  374. nat/profiler/forecasting/models/random_forest_regressor.py +1 -1
  375. nat/profiler/inference_metrics_model.py +1 -1
  376. nat/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +1 -1
  377. nat/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py +1 -1
  378. nat/profiler/inference_optimization/data_models.py +1 -1
  379. nat/profiler/inference_optimization/experimental/concurrency_spike_analysis.py +1 -1
  380. nat/profiler/inference_optimization/experimental/prefix_span_analysis.py +1 -1
  381. nat/profiler/inference_optimization/llm_metrics.py +1 -1
  382. nat/profiler/inference_optimization/prompt_caching.py +1 -1
  383. nat/profiler/inference_optimization/token_uniqueness.py +1 -1
  384. nat/profiler/inference_optimization/workflow_runtimes.py +1 -1
  385. nat/profiler/intermediate_property_adapter.py +1 -1
  386. nat/profiler/parameter_optimization/optimizable_utils.py +1 -1
  387. nat/profiler/parameter_optimization/optimizer_runtime.py +1 -1
  388. nat/profiler/parameter_optimization/parameter_optimizer.py +1 -1
  389. nat/profiler/parameter_optimization/parameter_selection.py +1 -1
  390. nat/profiler/parameter_optimization/pareto_visualizer.py +1 -1
  391. nat/profiler/parameter_optimization/prompt_optimizer.py +1 -1
  392. nat/profiler/parameter_optimization/update_helpers.py +1 -1
  393. nat/profiler/profile_runner.py +1 -1
  394. nat/profiler/utils.py +1 -1
  395. nat/registry_handlers/local/local_handler.py +1 -1
  396. nat/registry_handlers/local/register_local.py +1 -1
  397. nat/registry_handlers/metadata_factory.py +1 -1
  398. nat/registry_handlers/package_utils.py +1 -1
  399. nat/registry_handlers/pypi/pypi_handler.py +1 -1
  400. nat/registry_handlers/pypi/register_pypi.py +1 -1
  401. nat/registry_handlers/register.py +1 -1
  402. nat/registry_handlers/registry_handler_base.py +1 -1
  403. nat/registry_handlers/rest/register_rest.py +1 -1
  404. nat/registry_handlers/rest/rest_handler.py +1 -1
  405. nat/registry_handlers/schemas/headers.py +1 -1
  406. nat/registry_handlers/schemas/package.py +1 -1
  407. nat/registry_handlers/schemas/publish.py +1 -1
  408. nat/registry_handlers/schemas/pull.py +1 -1
  409. nat/registry_handlers/schemas/remove.py +1 -1
  410. nat/registry_handlers/schemas/search.py +1 -1
  411. nat/registry_handlers/schemas/status.py +1 -1
  412. nat/retriever/interface.py +1 -1
  413. nat/retriever/milvus/__init__.py +1 -1
  414. nat/retriever/milvus/register.py +1 -1
  415. nat/retriever/milvus/retriever.py +1 -1
  416. nat/retriever/models.py +1 -1
  417. nat/retriever/nemo_retriever/__init__.py +1 -1
  418. nat/retriever/nemo_retriever/register.py +1 -1
  419. nat/retriever/nemo_retriever/retriever.py +5 -5
  420. nat/retriever/register.py +1 -1
  421. nat/runtime/__init__.py +1 -1
  422. nat/runtime/loader.py +10 -3
  423. nat/runtime/metrics.py +180 -0
  424. nat/runtime/runner.py +1 -5
  425. nat/runtime/session.py +451 -32
  426. nat/runtime/user_metadata.py +1 -1
  427. nat/settings/global_settings.py +1 -1
  428. nat/tool/chat_completion.py +1 -1
  429. nat/tool/code_execution/README.md +1 -1
  430. nat/tool/code_execution/code_sandbox.py +1 -1
  431. nat/tool/code_execution/local_sandbox/Dockerfile.sandbox +1 -1
  432. nat/tool/code_execution/local_sandbox/__init__.py +1 -1
  433. nat/tool/code_execution/local_sandbox/local_sandbox_server.py +1 -1
  434. nat/tool/code_execution/local_sandbox/start_local_sandbox.sh +1 -1
  435. nat/tool/code_execution/register.py +1 -1
  436. nat/tool/code_execution/utils.py +1 -1
  437. nat/tool/datetime_tools.py +1 -1
  438. nat/tool/document_search.py +1 -1
  439. nat/tool/github_tools.py +1 -1
  440. nat/tool/memory_tools/add_memory_tool.py +1 -1
  441. nat/tool/memory_tools/delete_memory_tool.py +1 -1
  442. nat/tool/memory_tools/get_memory_tool.py +1 -1
  443. nat/tool/nvidia_rag.py +2 -2
  444. nat/tool/register.py +1 -1
  445. nat/tool/retriever.py +1 -1
  446. nat/tool/server_tools.py +1 -1
  447. nat/utils/__init__.py +8 -5
  448. nat/utils/callable_utils.py +1 -1
  449. nat/utils/data_models/schema_validator.py +1 -1
  450. nat/utils/debugging_utils.py +1 -1
  451. nat/utils/decorators.py +1 -1
  452. nat/utils/dump_distro_mapping.py +1 -1
  453. nat/utils/exception_handlers/automatic_retries.py +3 -3
  454. nat/utils/exception_handlers/schemas.py +1 -1
  455. nat/utils/io/model_processing.py +1 -1
  456. nat/utils/io/supress_logs.py +33 -0
  457. nat/utils/io/yaml_tools.py +1 -1
  458. nat/utils/log_levels.py +1 -1
  459. nat/utils/log_utils.py +13 -1
  460. nat/utils/metadata_utils.py +1 -1
  461. nat/utils/optional_imports.py +1 -1
  462. nat/utils/producer_consumer_queue.py +1 -1
  463. nat/utils/reactive/base/observable_base.py +1 -1
  464. nat/utils/reactive/base/observer_base.py +1 -1
  465. nat/utils/reactive/base/subject_base.py +1 -1
  466. nat/utils/reactive/observable.py +1 -1
  467. nat/utils/reactive/observer.py +1 -1
  468. nat/utils/reactive/subject.py +1 -1
  469. nat/utils/reactive/subscription.py +1 -1
  470. nat/utils/responses_api.py +1 -1
  471. nat/utils/settings/global_settings.py +1 -1
  472. nat/utils/string_utils.py +1 -1
  473. nat/utils/type_converter.py +18 -5
  474. nat/utils/type_utils.py +1 -1
  475. nat/utils/url_utils.py +1 -1
  476. {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/METADATA +39 -14
  477. nvidia_nat-1.4.0a20260113.dist-info/RECORD +547 -0
  478. nvidia_nat-1.4.0a20260113.dist-info/entry_points.txt +38 -0
  479. nat/cli/commands/mcp/mcp.py +0 -986
  480. nat/front_ends/mcp/introspection_token_verifier.py +0 -73
  481. nat/front_ends/mcp/mcp_front_end_config.py +0 -109
  482. nat/front_ends/mcp/mcp_front_end_plugin.py +0 -155
  483. nat/front_ends/mcp/mcp_front_end_plugin_worker.py +0 -388
  484. nat/front_ends/mcp/memory_profiler.py +0 -320
  485. nat/front_ends/mcp/register.py +0 -27
  486. nat/front_ends/mcp/tool_converter.py +0 -321
  487. nvidia_nat-1.4.0a20251120.dist-info/RECORD +0 -488
  488. nvidia_nat-1.4.0a20251120.dist-info/entry_points.txt +0 -23
  489. {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/WHEEL +0 -0
  490. {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
  491. {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/licenses/LICENSE.md +0 -0
  492. {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
nat/data_models/span.py CHANGED
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
nat/embedder/register.py CHANGED
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
nat/eval/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
nat/eval/config.py CHANGED
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -46,6 +46,8 @@ class EvaluationRunConfig(BaseModel):
46
46
  num_passes: int = 0
47
47
  # timeout for waiting for trace export tasks to complete
48
48
  export_timeout: float = 60.0
49
+ # User ID to use for workflow session. Defaults to 'nat_eval_user_id'.
50
+ user_id: str = "nat_eval_user_id"
49
51
 
50
52
 
51
53
  class EvaluationRunOutput(BaseModel):
@@ -60,3 +62,8 @@ class EvaluationRunOutput(BaseModel):
60
62
  evaluation_results: list[tuple[str, EvalOutput]]
61
63
  usage_stats: UsageStats | None = None
62
64
  profiler_results: ProfilerResults
65
+
66
+ # Configuration files written to output directory
67
+ config_original_file: Path | None = None
68
+ config_effective_file: Path | None = None
69
+ config_metadata_file: Path | None = None
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -217,7 +217,9 @@ class DatasetHandler:
217
217
  """
218
218
  # Apply filters and deduplicate
219
219
  input_df = self.dataset_filter.apply_filters(input_df)
220
- input_df.drop_duplicates(subset=[self.dataset_config.id_key], inplace=True)
220
+
221
+ if (self.dataset_config.id_key in input_df.columns):
222
+ input_df.drop_duplicates(subset=[self.dataset_config.id_key], inplace=True)
221
223
 
222
224
  if self.reps > 1 and self.adjust_dataset_size:
223
225
  raise ValueError("reps and adjust_dataset_size are mutually exclusive")
nat/eval/evaluate.py CHANGED
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,16 +14,21 @@
14
14
  # limitations under the License.
15
15
 
16
16
  import asyncio
17
+ import json
17
18
  import logging
18
19
  import shutil
19
20
  import warnings
21
+ from datetime import UTC
22
+ from datetime import datetime
20
23
  from pathlib import Path
21
24
  from typing import Any
22
25
  from uuid import uuid4
23
26
 
27
+ import yaml
24
28
  from pydantic import BaseModel
25
29
  from tqdm import tqdm
26
30
 
31
+ from nat.data_models.config import Config
27
32
  from nat.data_models.evaluate import EvalConfig
28
33
  from nat.data_models.evaluate import JobEvictionPolicy
29
34
  from nat.data_models.runtime_enum import RuntimeTypeEnum
@@ -33,6 +38,7 @@ from nat.eval.dataset_handler.dataset_handler import DatasetHandler
33
38
  from nat.eval.evaluator.evaluator_model import EvalInput
34
39
  from nat.eval.evaluator.evaluator_model import EvalInputItem
35
40
  from nat.eval.evaluator.evaluator_model import EvalOutput
41
+ from nat.eval.llm_validator import validate_llm_endpoints
36
42
  from nat.eval.usage_stats import UsageStats
37
43
  from nat.eval.usage_stats import UsageStatsItem
38
44
  from nat.eval.usage_stats import UsageStatsLLM
@@ -62,6 +68,7 @@ class EvaluationRun:
62
68
  # Run-specific configuration
63
69
  self.config: EvaluationRunConfig = config
64
70
  self.eval_config: EvalConfig | None = None
71
+ self.effective_config: Config | None = None # Stores the complete config after applying overrides
65
72
 
66
73
  # Helpers
67
74
  self.intermediate_step_adapter: IntermediateStepAdapter = IntermediateStepAdapter()
@@ -97,6 +104,11 @@ class EvaluationRun:
97
104
  # evaluation output files
98
105
  self.evaluator_output_files: list[Path] = []
99
106
 
107
+ # configuration output files
108
+ self.config_original_file: Path | None = None
109
+ self.config_effective_file: Path | None = None
110
+ self.config_metadata_file: Path | None = None
111
+
100
112
  def _compute_usage_stats(self, item: EvalInputItem):
101
113
  """Compute usage stats for a single item using the intermediate steps"""
102
114
  # get the prompt and completion tokens from the intermediate steps
@@ -169,62 +181,65 @@ class EvaluationRun:
169
181
  if stop_event.is_set():
170
182
  return "", []
171
183
 
172
- async with session_manager.run(item.input_obj, runtime_type=RuntimeTypeEnum.EVALUATE) as runner:
173
- if not session_manager.workflow.has_single_output:
174
- # raise an error if the workflow has multiple outputs
175
- raise NotImplementedError("Multiple outputs are not supported")
176
-
177
- runner_result = None
178
- intermediate_future = None
179
-
180
- try:
181
- # Start usage stats and intermediate steps collection in parallel
182
- intermediate_future = pull_intermediate()
183
- runner_result = runner.result()
184
- base_output = await runner_result
185
- intermediate_steps = await intermediate_future
186
- except NotImplementedError as e:
187
- logger.error("Failed to run the workflow: %s", e)
188
- # raise original error
189
- raise
190
- except Exception as e:
191
- logger.exception("Failed to run the workflow: %s", e)
192
- # stop processing if a workflow error occurs
193
- self.workflow_interrupted = True
194
-
195
- # Cancel any coroutines that are still running, avoiding a warning about unawaited coroutines
196
- # (typically one of these two is what raised the exception and the other is still running)
197
- for coro in (runner_result, intermediate_future):
198
- if coro is not None:
199
- asyncio.ensure_future(coro).cancel()
200
-
201
- stop_event.set()
202
- return
203
-
204
- try:
205
- base_output = runner.convert(base_output, to_type=str)
206
- except ValueError:
207
- pass
208
-
209
- # if base_output is a pydantic model dump it to json
210
- if isinstance(base_output, BaseModel):
211
- output = base_output.model_dump_json(indent=2)
212
- else:
213
- m = jsonpath_expr.find(base_output)
214
- if (not m):
215
- raise RuntimeError(f"Failed to extract output using jsonpath: {self.config.result_json_path}")
216
- if (len(m) > 1):
217
- logger.warning("Multiple matches found for jsonpath at row '%s'. Matches: %s. Using the first",
218
- base_output,
219
- m)
220
- output = m[0].value
221
-
222
- item.output_obj = output
223
- item.trajectory = self.intermediate_step_adapter.validate_intermediate_steps(intermediate_steps)
224
- usage_stats_item = self._compute_usage_stats(item)
225
-
226
- self.weave_eval.log_prediction(item, output)
227
- await self.weave_eval.log_usage_stats(item, usage_stats_item)
184
+ async with session_manager.session(user_id=self.config.user_id) as session:
185
+ async with session.run(item.input_obj, runtime_type=RuntimeTypeEnum.EVALUATE) as runner:
186
+ if not session.workflow.has_single_output:
187
+ # raise an error if the workflow has multiple outputs
188
+ raise NotImplementedError("Multiple outputs are not supported")
189
+
190
+ runner_result = None
191
+ intermediate_future = None
192
+
193
+ try:
194
+ # Start usage stats and intermediate steps collection in parallel
195
+ intermediate_future = pull_intermediate()
196
+ runner_result = runner.result()
197
+ base_output = await runner_result
198
+ intermediate_steps = await intermediate_future
199
+ except NotImplementedError as e:
200
+ logger.error("Failed to run the workflow: %s", e)
201
+ # raise original error
202
+ raise
203
+ except Exception as e:
204
+ logger.exception("Failed to run the workflow: %s", e)
205
+ # stop processing if a workflow error occurs
206
+ self.workflow_interrupted = True
207
+
208
+ # Cancel any coroutines that are still running, avoiding a warning about unawaited coroutines
209
+ # (typically one of these two is what raised the exception and the other is still running)
210
+ for coro in (runner_result, intermediate_future):
211
+ if coro is not None:
212
+ asyncio.ensure_future(coro).cancel()
213
+
214
+ stop_event.set()
215
+ return
216
+
217
+ try:
218
+ base_output = runner.convert(base_output, to_type=str)
219
+ except ValueError:
220
+ pass
221
+
222
+ # if base_output is a pydantic model dump it to json
223
+ if isinstance(base_output, BaseModel):
224
+ output = base_output.model_dump_json(indent=2)
225
+ else:
226
+ m = jsonpath_expr.find(base_output)
227
+ if (not m):
228
+ raise RuntimeError(
229
+ f"Failed to extract output using jsonpath: {self.config.result_json_path}")
230
+ if (len(m) > 1):
231
+ logger.warning(
232
+ "Multiple matches found for jsonpath at row '%s'. Matches: %s. Using the first",
233
+ base_output,
234
+ m)
235
+ output = m[0].value
236
+
237
+ item.output_obj = output
238
+ item.trajectory = self.intermediate_step_adapter.validate_intermediate_steps(intermediate_steps)
239
+ usage_stats_item = self._compute_usage_stats(item)
240
+
241
+ self.weave_eval.log_prediction(item, output)
242
+ await self.weave_eval.log_usage_stats(item, usage_stats_item)
228
243
 
229
244
  async def wrapped_run(item: EvalInputItem) -> None:
230
245
  await run_one(item)
@@ -329,10 +344,99 @@ class EvaluationRun:
329
344
  except Exception as e:
330
345
  logger.exception("Failed to delete old job directory: %s: %s", dir_to_delete, e)
331
346
 
347
+ def write_configuration(self) -> None:
348
+ """Save the configuration used for this evaluation run to the output directory.
349
+
350
+ This saves three files:
351
+ 1. config_original.yml - The original configuration file
352
+ 2. config_effective.yml - The configuration with all overrides applied
353
+ 3. config_metadata.json - Metadata about the evaluation run and overrides
354
+ """
355
+ output_dir = self.eval_config.general.output_dir
356
+ output_dir.mkdir(parents=True, exist_ok=True)
357
+
358
+ try:
359
+ # 1. Save original configuration
360
+ config_original_file = output_dir / "config_original.yml"
361
+ if isinstance(self.config.config_file, Path):
362
+ # Copy original file if it exists
363
+ if self.config.config_file.exists():
364
+ shutil.copy2(self.config.config_file, config_original_file)
365
+ self.config_original_file = config_original_file
366
+ logger.info("Original config file copied to %s", config_original_file)
367
+ else:
368
+ logger.warning("Original config file not found at %s", self.config.config_file)
369
+ elif isinstance(self.config.config_file, BaseModel):
370
+ # Serialize programmatic config, using mode='json' to handle special types like timedelta
371
+ config_dict = self.config.config_file.model_dump(mode='json')
372
+ with open(config_original_file, "w", encoding="utf-8") as f:
373
+ yaml.safe_dump(config_dict, f, default_flow_style=False, sort_keys=False)
374
+ self.config_original_file = config_original_file
375
+ logger.info("Programmatic config saved to %s", config_original_file)
376
+
377
+ # 2. Save effective configuration (with overrides applied)
378
+ config_effective_file = output_dir / "config_effective.yml"
379
+ if self.effective_config is not None:
380
+ effective_config_dict = self.effective_config.model_dump(mode='json') if self.effective_config else {}
381
+ with open(config_effective_file, "w", encoding="utf-8") as f:
382
+ yaml.safe_dump(effective_config_dict, f, default_flow_style=False, sort_keys=False)
383
+ self.config_effective_file = config_effective_file
384
+ logger.info("Effective config (with overrides) saved to %s", config_effective_file)
385
+ else:
386
+ logger.warning("Effective config not available, skipping config_effective.yml")
387
+
388
+ # 3. Save metadata about the run
389
+ config_metadata_file = output_dir / "config_metadata.json"
390
+ metadata = {
391
+ "config_file":
392
+ str(self.config.config_file),
393
+ "config_file_type":
394
+ "Path" if isinstance(self.config.config_file, Path) else "BaseModel",
395
+ "overrides": [{
396
+ "path": path, "value": value
397
+ } for path, value in self.config.override] if self.config.override else [],
398
+ "dataset":
399
+ self.config.dataset,
400
+ "result_json_path":
401
+ self.config.result_json_path,
402
+ "skip_workflow":
403
+ self.config.skip_workflow,
404
+ "skip_completed_entries":
405
+ self.config.skip_completed_entries,
406
+ "reps":
407
+ self.config.reps,
408
+ "endpoint":
409
+ self.config.endpoint,
410
+ "endpoint_timeout":
411
+ self.config.endpoint_timeout,
412
+ "adjust_dataset_size":
413
+ self.config.adjust_dataset_size,
414
+ "num_passes":
415
+ self.config.num_passes,
416
+ "export_timeout":
417
+ self.config.export_timeout,
418
+ "user_id":
419
+ self.config.user_id,
420
+ "timestamp":
421
+ datetime.now(tz=UTC).isoformat(),
422
+ }
423
+
424
+ with open(config_metadata_file, "w", encoding="utf-8") as f:
425
+ json.dump(metadata, f, indent=2)
426
+ self.config_metadata_file = config_metadata_file
427
+ logger.info("Configuration metadata saved to %s", config_metadata_file)
428
+
429
+ except Exception:
430
+ logger.exception("Failed to write configuration files")
431
+ # Don't raise - this is not critical enough to fail the entire evaluation
432
+
332
433
  def write_output(self, dataset_handler: DatasetHandler, profiler_results: ProfilerResults):
333
434
  workflow_output_file = self.eval_config.general.output_dir / "workflow_output.json"
334
435
  workflow_output_file.parent.mkdir(parents=True, exist_ok=True)
335
436
 
437
+ # Write the configuration files (original, effective, and metadata)
438
+ self.write_configuration()
439
+
336
440
  # Write the workflow output to a file (this can be used for re-running the evaluation)
337
441
 
338
442
  step_filter = self.eval_config.general.output.workflow_output_step_filter \
@@ -459,7 +563,7 @@ class EvaluationRun:
459
563
  from nat.runtime.loader import load_config
460
564
 
461
565
  # Load and override the config
462
- config = None
566
+ config: Config | None = None
463
567
  if isinstance(self.config.config_file, BaseModel):
464
568
  config = self.config.config_file
465
569
  elif self.config.override:
@@ -467,6 +571,8 @@ class EvaluationRun:
467
571
  else:
468
572
  config = load_config(self.config.config_file)
469
573
 
574
+ # Store the effective configuration for later saving to output directory
575
+ self.effective_config = config
470
576
  self.eval_config = config.eval
471
577
  workflow_alias = self._get_workflow_alias(config.workflow.type)
472
578
  logger.debug("Loaded %s evaluation configuration: %s", workflow_alias, self.eval_config)
@@ -498,7 +604,10 @@ class EvaluationRun:
498
604
  eval_input=EvalInput(eval_input_items=[]),
499
605
  evaluation_results=[],
500
606
  usage_stats=UsageStats(),
501
- profiler_results=ProfilerResults())
607
+ profiler_results=ProfilerResults(),
608
+ config_original_file=self.config_original_file,
609
+ config_effective_file=self.config_effective_file,
610
+ config_metadata_file=self.config_metadata_file)
502
611
 
503
612
  custom_pre_eval_process_function = self.eval_config.general.output.custom_pre_eval_process_function \
504
613
  if self.eval_config.general.output else None
@@ -517,7 +626,25 @@ class EvaluationRun:
517
626
  eval_input=self.eval_input,
518
627
  evaluation_results=self.evaluation_results,
519
628
  usage_stats=self.usage_stats,
520
- profiler_results=ProfilerResults())
629
+ profiler_results=ProfilerResults(),
630
+ config_original_file=self.config_original_file,
631
+ config_effective_file=self.config_effective_file,
632
+ config_metadata_file=self.config_metadata_file)
633
+
634
+ # Validate LLM endpoints before running evaluation (opt-in via config)
635
+ if (not self.config.skip_workflow and not self.config.endpoint and config.eval.general.validate_llm_endpoints):
636
+ try:
637
+ logger.info("Validating LLM endpoints before evaluation (enabled via config)...")
638
+ await validate_llm_endpoints(config)
639
+ except RuntimeError as e:
640
+ # Critical validation errors (404, connection failures) - fail fast
641
+ logger.error("LLM endpoint validation failed: %s", e)
642
+ raise
643
+ except Exception as e:
644
+ # Non-critical errors (missing packages, config issues) - warn but continue
645
+ logger.warning("LLM endpoint validation incomplete: %s. Continuing with evaluation...",
646
+ e,
647
+ exc_info=True)
521
648
 
522
649
  # Run workflow and evaluate
523
650
  async with WorkflowEvalBuilder.from_config(config=config) as eval_workflow:
@@ -526,25 +653,32 @@ class EvaluationRun:
526
653
 
527
654
  with self.eval_trace_context.evaluation_context():
528
655
  # Run workflow
529
- if self.config.endpoint:
530
- await self.run_workflow_remote()
531
- elif not self.config.skip_workflow:
532
- if session_manager is None:
533
- workflow = await eval_workflow.build()
534
- session_manager = SessionManager(workflow,
535
- max_concurrency=self.eval_config.general.max_concurrency)
536
- await self.run_workflow_local(session_manager)
537
-
538
- # Pre-evaluation process the workflow output
539
- self.eval_input = dataset_handler.pre_eval_process_eval_input(self.eval_input)
540
-
541
- # Evaluate
542
- evaluators = {name: eval_workflow.get_evaluator(name) for name in self.eval_config.evaluators}
543
- await self.run_evaluators(evaluators)
544
-
545
- # Wait for all trace export tasks to complete (local workflows only)
546
- if session_manager and not self.config.endpoint:
547
- await self.wait_for_all_export_tasks_local(session_manager, timeout=self.config.export_timeout)
656
+ local_session_manager: SessionManager | None = None
657
+ try:
658
+ if self.config.endpoint:
659
+ await self.run_workflow_remote()
660
+ elif not self.config.skip_workflow:
661
+ if session_manager is None:
662
+ session_manager = await SessionManager.create(
663
+ config=config,
664
+ shared_builder=eval_workflow,
665
+ max_concurrency=self.eval_config.general.max_concurrency)
666
+ local_session_manager = session_manager
667
+ await self.run_workflow_local(session_manager)
668
+
669
+ # Pre-evaluation process the workflow output
670
+ self.eval_input = dataset_handler.pre_eval_process_eval_input(self.eval_input)
671
+
672
+ # Evaluate
673
+ evaluators = {name: eval_workflow.get_evaluator(name) for name in self.eval_config.evaluators}
674
+ await self.run_evaluators(evaluators)
675
+
676
+ # Wait for all trace export tasks to complete (local workflows only)
677
+ if session_manager and not self.config.endpoint:
678
+ await self.wait_for_all_export_tasks_local(session_manager, timeout=self.config.export_timeout)
679
+ finally:
680
+ if local_session_manager is not None:
681
+ await local_session_manager.shutdown()
548
682
 
549
683
  # Profile the workflow
550
684
  profiler_results = await self.profile_workflow()
@@ -572,4 +706,7 @@ class EvaluationRun:
572
706
  eval_input=self.eval_input,
573
707
  evaluation_results=self.evaluation_results,
574
708
  usage_stats=self.usage_stats,
575
- profiler_results=profiler_results)
709
+ profiler_results=profiler_results,
710
+ config_original_file=self.config_original_file,
711
+ config_effective_file=self.config_effective_file,
712
+ config_metadata_file=self.config_metadata_file)
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -34,7 +34,7 @@ class BaseEvaluator(ABC):
34
34
  **Experimental Feature**: The Evaluation API is experimental and may change in future releases.
35
35
  Future versions may introduce breaking changes without notice.
36
36
 
37
- Each custom evaluator must implement the `evaluate_item` method which is used to evaluate a
37
+ Each custom evaluator must implement the ``evaluate_item`` method which is used to evaluate a
38
38
  single EvalInputItem.
39
39
  """
40
40
 
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -16,6 +16,7 @@
16
16
  import typing
17
17
 
18
18
  from pydantic import BaseModel
19
+ from pydantic import SerializeAsAny
19
20
 
20
21
  from nat.data_models.intermediate_step import IntermediateStep
21
22
 
@@ -55,4 +56,4 @@ class EvalOutputItem(BaseModel):
55
56
 
56
57
  class EvalOutput(BaseModel):
57
58
  average_score: typing.Any # float or any serializable type
58
- eval_output_items: list[EvalOutputItem]
59
+ eval_output_items: list[SerializeAsAny[EvalOutputItem]]