nvidia-nat 1.4.0a20251112__py3-none-any.whl → 1.4.0a20260113__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (492) hide show
  1. aiq/__init__.py +1 -1
  2. nat/{front_ends/mcp → agent/auto_memory_wrapper}/__init__.py +1 -1
  3. nat/agent/auto_memory_wrapper/agent.py +278 -0
  4. nat/agent/auto_memory_wrapper/register.py +227 -0
  5. nat/agent/auto_memory_wrapper/state.py +30 -0
  6. nat/agent/base.py +1 -1
  7. nat/agent/dual_node.py +1 -1
  8. nat/agent/prompt_optimizer/prompt.py +1 -1
  9. nat/agent/prompt_optimizer/register.py +1 -1
  10. nat/agent/react_agent/agent.py +16 -9
  11. nat/agent/react_agent/output_parser.py +2 -2
  12. nat/agent/react_agent/prompt.py +3 -2
  13. nat/agent/react_agent/register.py +2 -2
  14. nat/agent/react_agent/register_per_user_agent.py +104 -0
  15. nat/agent/reasoning_agent/reasoning_agent.py +1 -1
  16. nat/agent/register.py +3 -1
  17. nat/agent/responses_api_agent/__init__.py +1 -1
  18. nat/agent/responses_api_agent/register.py +1 -1
  19. nat/agent/rewoo_agent/agent.py +9 -4
  20. nat/agent/rewoo_agent/prompt.py +1 -1
  21. nat/agent/rewoo_agent/register.py +1 -1
  22. nat/agent/tool_calling_agent/agent.py +5 -4
  23. nat/agent/tool_calling_agent/register.py +1 -1
  24. nat/authentication/__init__.py +1 -1
  25. nat/authentication/api_key/__init__.py +1 -1
  26. nat/authentication/api_key/api_key_auth_provider.py +1 -1
  27. nat/authentication/api_key/api_key_auth_provider_config.py +22 -7
  28. nat/authentication/api_key/register.py +1 -1
  29. nat/authentication/credential_validator/__init__.py +1 -1
  30. nat/authentication/credential_validator/bearer_token_validator.py +1 -1
  31. nat/authentication/exceptions/__init__.py +1 -1
  32. nat/authentication/exceptions/api_key_exceptions.py +1 -1
  33. nat/authentication/http_basic_auth/http_basic_auth_provider.py +1 -1
  34. nat/authentication/http_basic_auth/register.py +1 -1
  35. nat/authentication/interfaces.py +1 -1
  36. nat/authentication/oauth2/__init__.py +1 -1
  37. nat/authentication/oauth2/oauth2_auth_code_flow_provider.py +1 -1
  38. nat/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +1 -1
  39. nat/authentication/oauth2/oauth2_resource_server_config.py +1 -1
  40. nat/authentication/oauth2/register.py +1 -1
  41. nat/authentication/register.py +1 -1
  42. nat/builder/builder.py +563 -1
  43. nat/builder/child_builder.py +385 -0
  44. nat/builder/component_utils.py +34 -4
  45. nat/builder/context.py +34 -1
  46. nat/builder/embedder.py +1 -1
  47. nat/builder/eval_builder.py +19 -7
  48. nat/builder/evaluator.py +1 -1
  49. nat/builder/framework_enum.py +3 -1
  50. nat/builder/front_end.py +1 -1
  51. nat/builder/function.py +113 -5
  52. nat/builder/function_base.py +1 -1
  53. nat/builder/function_info.py +1 -1
  54. nat/builder/intermediate_step_manager.py +1 -1
  55. nat/builder/llm.py +1 -1
  56. nat/builder/per_user_workflow_builder.py +843 -0
  57. nat/builder/retriever.py +1 -1
  58. nat/builder/sync_builder.py +571 -0
  59. nat/builder/user_interaction_manager.py +1 -1
  60. nat/builder/workflow.py +5 -3
  61. nat/builder/workflow_builder.py +619 -378
  62. nat/cli/__init__.py +1 -1
  63. nat/cli/cli_utils/config_override.py +1 -1
  64. nat/cli/cli_utils/validation.py +32 -1
  65. nat/cli/commands/configure/channel/add.py +1 -1
  66. nat/cli/commands/configure/channel/channel.py +1 -1
  67. nat/cli/commands/configure/channel/remove.py +1 -1
  68. nat/cli/commands/configure/channel/update.py +1 -1
  69. nat/cli/commands/configure/configure.py +1 -1
  70. nat/cli/commands/evaluate.py +87 -13
  71. nat/cli/commands/finetune.py +132 -0
  72. nat/cli/commands/info/__init__.py +1 -1
  73. nat/cli/commands/info/info.py +1 -1
  74. nat/cli/commands/info/list_channels.py +1 -1
  75. nat/cli/commands/info/list_components.py +1 -1
  76. nat/cli/commands/object_store/__init__.py +1 -1
  77. nat/cli/commands/object_store/object_store.py +1 -1
  78. nat/cli/commands/optimize.py +1 -1
  79. nat/cli/commands/{mcp → red_teaming}/__init__.py +1 -1
  80. nat/cli/commands/red_teaming/red_teaming.py +138 -0
  81. nat/cli/commands/red_teaming/red_teaming_utils.py +73 -0
  82. nat/cli/commands/registry/__init__.py +1 -1
  83. nat/cli/commands/registry/publish.py +1 -1
  84. nat/cli/commands/registry/pull.py +1 -1
  85. nat/cli/commands/registry/registry.py +1 -1
  86. nat/cli/commands/registry/remove.py +1 -1
  87. nat/cli/commands/registry/search.py +1 -1
  88. nat/cli/commands/sizing/__init__.py +1 -1
  89. nat/cli/commands/sizing/calc.py +1 -1
  90. nat/cli/commands/sizing/sizing.py +1 -1
  91. nat/cli/commands/start.py +1 -1
  92. nat/cli/commands/uninstall.py +1 -1
  93. nat/cli/commands/validate.py +1 -1
  94. nat/cli/commands/workflow/__init__.py +1 -1
  95. nat/cli/commands/workflow/workflow.py +1 -1
  96. nat/cli/commands/workflow/workflow_commands.py +3 -2
  97. nat/cli/entrypoint.py +15 -37
  98. nat/cli/main.py +2 -2
  99. nat/cli/plugin_loader.py +69 -0
  100. nat/cli/register_workflow.py +233 -5
  101. nat/cli/type_registry.py +237 -3
  102. nat/control_flow/register.py +1 -1
  103. nat/control_flow/router_agent/agent.py +1 -1
  104. nat/control_flow/router_agent/prompt.py +1 -1
  105. nat/control_flow/router_agent/register.py +1 -1
  106. nat/control_flow/sequential_executor.py +28 -7
  107. nat/data_models/__init__.py +1 -1
  108. nat/data_models/agent.py +1 -1
  109. nat/data_models/api_server.py +38 -3
  110. nat/data_models/authentication.py +1 -1
  111. nat/data_models/common.py +1 -1
  112. nat/data_models/component.py +9 -1
  113. nat/data_models/component_ref.py +45 -1
  114. nat/data_models/config.py +78 -1
  115. nat/data_models/dataset_handler.py +15 -2
  116. nat/data_models/discovery_metadata.py +1 -1
  117. nat/data_models/embedder.py +1 -1
  118. nat/data_models/evaluate.py +6 -1
  119. nat/data_models/evaluator.py +1 -1
  120. nat/data_models/finetuning.py +260 -0
  121. nat/data_models/front_end.py +1 -1
  122. nat/data_models/function.py +15 -2
  123. nat/data_models/function_dependencies.py +1 -1
  124. nat/data_models/gated_field_mixin.py +1 -1
  125. nat/data_models/interactive.py +1 -1
  126. nat/data_models/intermediate_step.py +29 -2
  127. nat/data_models/invocation_node.py +1 -1
  128. nat/data_models/llm.py +1 -1
  129. nat/data_models/logging.py +1 -1
  130. nat/data_models/memory.py +1 -1
  131. nat/data_models/middleware.py +37 -0
  132. nat/data_models/object_store.py +1 -1
  133. nat/data_models/openai_mcp.py +1 -1
  134. nat/data_models/optimizable.py +1 -1
  135. nat/data_models/optimizer.py +1 -1
  136. nat/data_models/profiler.py +1 -1
  137. nat/data_models/registry_handler.py +1 -1
  138. nat/data_models/retriever.py +1 -1
  139. nat/data_models/retry_mixin.py +1 -1
  140. nat/data_models/runtime_enum.py +26 -0
  141. nat/data_models/span.py +1 -1
  142. nat/data_models/step_adaptor.py +1 -1
  143. nat/data_models/streaming.py +1 -1
  144. nat/data_models/swe_bench_model.py +1 -1
  145. nat/data_models/telemetry_exporter.py +1 -1
  146. nat/data_models/thinking_mixin.py +1 -1
  147. nat/data_models/ttc_strategy.py +1 -1
  148. nat/embedder/azure_openai_embedder.py +1 -1
  149. nat/embedder/nim_embedder.py +1 -1
  150. nat/embedder/openai_embedder.py +1 -1
  151. nat/embedder/register.py +1 -1
  152. nat/eval/__init__.py +1 -1
  153. nat/eval/config.py +8 -1
  154. nat/eval/dataset_handler/dataset_downloader.py +1 -1
  155. nat/eval/dataset_handler/dataset_filter.py +1 -1
  156. nat/eval/dataset_handler/dataset_handler.py +4 -2
  157. nat/eval/evaluate.py +226 -81
  158. nat/eval/evaluator/__init__.py +1 -1
  159. nat/eval/evaluator/base_evaluator.py +2 -2
  160. nat/eval/evaluator/evaluator_model.py +3 -2
  161. nat/eval/intermediate_step_adapter.py +1 -1
  162. nat/eval/llm_validator.py +336 -0
  163. nat/eval/rag_evaluator/evaluate.py +17 -10
  164. nat/eval/rag_evaluator/register.py +1 -1
  165. nat/eval/red_teaming_evaluator/__init__.py +14 -0
  166. nat/eval/red_teaming_evaluator/data_models.py +66 -0
  167. nat/eval/red_teaming_evaluator/evaluate.py +327 -0
  168. nat/eval/red_teaming_evaluator/filter_conditions.py +75 -0
  169. nat/eval/red_teaming_evaluator/register.py +55 -0
  170. nat/eval/register.py +2 -1
  171. nat/eval/remote_workflow.py +1 -1
  172. nat/eval/runners/__init__.py +1 -1
  173. nat/eval/runners/config.py +1 -1
  174. nat/eval/runners/multi_eval_runner.py +1 -1
  175. nat/eval/runners/red_teaming_runner/__init__.py +24 -0
  176. nat/eval/runners/red_teaming_runner/config.py +282 -0
  177. nat/eval/runners/red_teaming_runner/report_utils.py +707 -0
  178. nat/eval/runners/red_teaming_runner/runner.py +867 -0
  179. nat/eval/runtime_evaluator/__init__.py +1 -1
  180. nat/eval/runtime_evaluator/evaluate.py +1 -1
  181. nat/eval/runtime_evaluator/register.py +1 -1
  182. nat/eval/runtime_event_subscriber.py +1 -1
  183. nat/eval/swe_bench_evaluator/evaluate.py +1 -1
  184. nat/eval/swe_bench_evaluator/register.py +1 -1
  185. nat/eval/trajectory_evaluator/evaluate.py +2 -2
  186. nat/eval/trajectory_evaluator/register.py +1 -1
  187. nat/eval/tunable_rag_evaluator/evaluate.py +5 -5
  188. nat/eval/tunable_rag_evaluator/register.py +1 -1
  189. nat/eval/usage_stats.py +1 -1
  190. nat/eval/utils/eval_trace_ctx.py +1 -1
  191. nat/eval/utils/output_uploader.py +1 -1
  192. nat/eval/utils/tqdm_position_registry.py +1 -1
  193. nat/eval/utils/weave_eval.py +1 -1
  194. nat/experimental/decorators/experimental_warning_decorator.py +1 -1
  195. nat/experimental/test_time_compute/editing/iterative_plan_refinement_editor.py +1 -1
  196. nat/experimental/test_time_compute/editing/llm_as_a_judge_editor.py +1 -1
  197. nat/experimental/test_time_compute/editing/motivation_aware_summarization.py +1 -1
  198. nat/experimental/test_time_compute/functions/execute_score_select_function.py +1 -1
  199. nat/experimental/test_time_compute/functions/multi_llm_judge_function.py +88 -0
  200. nat/experimental/test_time_compute/functions/plan_select_execute_function.py +1 -1
  201. nat/experimental/test_time_compute/functions/ttc_tool_orchestration_function.py +1 -1
  202. nat/experimental/test_time_compute/functions/ttc_tool_wrapper_function.py +1 -1
  203. nat/experimental/test_time_compute/models/editor_config.py +1 -1
  204. nat/experimental/test_time_compute/models/scoring_config.py +1 -1
  205. nat/experimental/test_time_compute/models/search_config.py +20 -2
  206. nat/experimental/test_time_compute/models/selection_config.py +33 -2
  207. nat/experimental/test_time_compute/models/stage_enums.py +1 -1
  208. nat/experimental/test_time_compute/models/strategy_base.py +1 -1
  209. nat/experimental/test_time_compute/models/tool_use_config.py +1 -1
  210. nat/experimental/test_time_compute/models/ttc_item.py +1 -1
  211. nat/experimental/test_time_compute/register.py +4 -1
  212. nat/experimental/test_time_compute/scoring/llm_based_agent_scorer.py +1 -1
  213. nat/experimental/test_time_compute/scoring/llm_based_plan_scorer.py +1 -1
  214. nat/experimental/test_time_compute/scoring/motivation_aware_scorer.py +1 -1
  215. nat/experimental/test_time_compute/search/multi_llm_generation.py +115 -0
  216. nat/experimental/test_time_compute/search/multi_llm_planner.py +1 -1
  217. nat/experimental/test_time_compute/search/multi_query_retrieval_search.py +1 -1
  218. nat/experimental/test_time_compute/search/single_shot_multi_plan_planner.py +1 -1
  219. nat/experimental/test_time_compute/selection/best_of_n_selector.py +1 -1
  220. nat/experimental/test_time_compute/selection/llm_based_agent_output_selector.py +1 -1
  221. nat/experimental/test_time_compute/selection/llm_based_output_merging_selector.py +1 -1
  222. nat/experimental/test_time_compute/selection/llm_based_plan_selector.py +1 -1
  223. nat/experimental/test_time_compute/selection/llm_judge_selection.py +127 -0
  224. nat/experimental/test_time_compute/selection/threshold_selector.py +1 -1
  225. nat/finetuning/__init__.py +24 -0
  226. nat/finetuning/finetuning_runtime.py +143 -0
  227. nat/finetuning/interfaces/__init__.py +24 -0
  228. nat/finetuning/interfaces/finetuning_runner.py +261 -0
  229. nat/finetuning/interfaces/trainer_adapter.py +103 -0
  230. nat/finetuning/interfaces/trajectory_builder.py +115 -0
  231. nat/finetuning/utils/__init__.py +15 -0
  232. nat/finetuning/utils/parsers/__init__.py +15 -0
  233. nat/finetuning/utils/parsers/adk_parser.py +141 -0
  234. nat/finetuning/utils/parsers/base_parser.py +238 -0
  235. nat/finetuning/utils/parsers/common.py +91 -0
  236. nat/finetuning/utils/parsers/langchain_parser.py +267 -0
  237. nat/finetuning/utils/parsers/llama_index_parser.py +218 -0
  238. nat/front_ends/__init__.py +1 -1
  239. nat/front_ends/console/__init__.py +1 -1
  240. nat/front_ends/console/authentication_flow_handler.py +1 -1
  241. nat/front_ends/console/console_front_end_config.py +4 -1
  242. nat/front_ends/console/console_front_end_plugin.py +5 -4
  243. nat/front_ends/console/register.py +1 -1
  244. nat/front_ends/cron/__init__.py +1 -1
  245. nat/front_ends/fastapi/__init__.py +1 -1
  246. nat/front_ends/fastapi/async_job.py +128 -0
  247. nat/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +1 -1
  248. nat/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +13 -9
  249. nat/front_ends/fastapi/dask_client_mixin.py +1 -1
  250. nat/front_ends/fastapi/fastapi_front_end_config.py +23 -1
  251. nat/front_ends/fastapi/fastapi_front_end_controller.py +1 -1
  252. nat/front_ends/fastapi/fastapi_front_end_plugin.py +25 -30
  253. nat/front_ends/fastapi/fastapi_front_end_plugin_worker.py +318 -59
  254. nat/front_ends/fastapi/html_snippets/__init__.py +1 -1
  255. nat/front_ends/fastapi/html_snippets/auth_code_grant_success.py +1 -1
  256. nat/front_ends/fastapi/intermediate_steps_subscriber.py +12 -1
  257. nat/front_ends/fastapi/job_store.py +23 -11
  258. nat/front_ends/fastapi/main.py +1 -1
  259. nat/front_ends/fastapi/message_handler.py +27 -4
  260. nat/front_ends/fastapi/message_validator.py +54 -2
  261. nat/front_ends/fastapi/register.py +1 -1
  262. nat/front_ends/fastapi/response_helpers.py +16 -15
  263. nat/front_ends/fastapi/step_adaptor.py +1 -1
  264. nat/front_ends/fastapi/utils.py +1 -1
  265. nat/front_ends/register.py +1 -2
  266. nat/front_ends/simple_base/__init__.py +1 -1
  267. nat/front_ends/simple_base/simple_front_end_plugin_base.py +6 -4
  268. nat/llm/aws_bedrock_llm.py +1 -1
  269. nat/llm/azure_openai_llm.py +10 -1
  270. nat/llm/dynamo_llm.py +363 -0
  271. nat/llm/huggingface_llm.py +177 -0
  272. nat/llm/litellm_llm.py +1 -1
  273. nat/llm/nim_llm.py +1 -1
  274. nat/llm/openai_llm.py +1 -1
  275. nat/llm/register.py +3 -1
  276. nat/llm/utils/__init__.py +1 -1
  277. nat/llm/utils/env_config_value.py +1 -1
  278. nat/llm/utils/error.py +1 -1
  279. nat/llm/utils/thinking.py +1 -1
  280. nat/memory/__init__.py +1 -1
  281. nat/memory/interfaces.py +1 -1
  282. nat/memory/models.py +1 -1
  283. nat/meta/pypi.md +1 -1
  284. nat/middleware/__init__.py +35 -0
  285. nat/middleware/cache/__init__.py +14 -0
  286. nat/middleware/cache/cache_middleware.py +253 -0
  287. nat/middleware/cache/cache_middleware_config.py +44 -0
  288. nat/middleware/cache/register.py +33 -0
  289. nat/middleware/defense/__init__.py +14 -0
  290. nat/middleware/defense/defense_middleware.py +362 -0
  291. nat/middleware/defense/defense_middleware_content_guard.py +455 -0
  292. nat/middleware/defense/defense_middleware_data_models.py +91 -0
  293. nat/middleware/defense/defense_middleware_output_verifier.py +440 -0
  294. nat/middleware/defense/defense_middleware_pii.py +356 -0
  295. nat/middleware/defense/register.py +82 -0
  296. nat/middleware/dynamic/__init__.py +14 -0
  297. nat/middleware/dynamic/dynamic_function_middleware.py +962 -0
  298. nat/middleware/dynamic/dynamic_middleware_config.py +132 -0
  299. nat/middleware/dynamic/register.py +34 -0
  300. nat/middleware/function_middleware.py +370 -0
  301. nat/middleware/logging/__init__.py +14 -0
  302. nat/middleware/logging/logging_middleware.py +67 -0
  303. nat/middleware/logging/logging_middleware_config.py +28 -0
  304. nat/middleware/logging/register.py +33 -0
  305. nat/middleware/middleware.py +298 -0
  306. nat/middleware/red_teaming/__init__.py +14 -0
  307. nat/middleware/red_teaming/red_teaming_middleware.py +344 -0
  308. nat/middleware/red_teaming/red_teaming_middleware_config.py +112 -0
  309. nat/middleware/red_teaming/register.py +47 -0
  310. nat/middleware/register.py +22 -0
  311. nat/middleware/utils/__init__.py +14 -0
  312. nat/middleware/utils/workflow_inventory.py +155 -0
  313. nat/object_store/__init__.py +1 -1
  314. nat/object_store/in_memory_object_store.py +1 -1
  315. nat/object_store/interfaces.py +1 -1
  316. nat/object_store/models.py +1 -1
  317. nat/object_store/register.py +1 -1
  318. nat/observability/__init__.py +1 -1
  319. nat/observability/exporter/__init__.py +1 -1
  320. nat/observability/exporter/base_exporter.py +1 -1
  321. nat/observability/exporter/exporter.py +1 -1
  322. nat/observability/exporter/file_exporter.py +1 -1
  323. nat/observability/exporter/processing_exporter.py +1 -1
  324. nat/observability/exporter/raw_exporter.py +1 -1
  325. nat/observability/exporter/span_exporter.py +7 -1
  326. nat/observability/exporter_manager.py +1 -1
  327. nat/observability/mixin/__init__.py +1 -1
  328. nat/observability/mixin/batch_config_mixin.py +1 -1
  329. nat/observability/mixin/collector_config_mixin.py +1 -1
  330. nat/observability/mixin/file_mixin.py +1 -1
  331. nat/observability/mixin/file_mode.py +1 -1
  332. nat/observability/mixin/redaction_config_mixin.py +1 -1
  333. nat/observability/mixin/resource_conflict_mixin.py +1 -1
  334. nat/observability/mixin/serialize_mixin.py +1 -1
  335. nat/observability/mixin/tagging_config_mixin.py +1 -1
  336. nat/observability/mixin/type_introspection_mixin.py +1 -1
  337. nat/observability/processor/__init__.py +1 -1
  338. nat/observability/processor/batching_processor.py +1 -1
  339. nat/observability/processor/callback_processor.py +1 -1
  340. nat/observability/processor/falsy_batch_filter_processor.py +1 -1
  341. nat/observability/processor/intermediate_step_serializer.py +1 -1
  342. nat/observability/processor/processor.py +1 -1
  343. nat/observability/processor/processor_factory.py +1 -1
  344. nat/observability/processor/redaction/__init__.py +1 -1
  345. nat/observability/processor/redaction/contextual_redaction_processor.py +1 -1
  346. nat/observability/processor/redaction/contextual_span_redaction_processor.py +1 -1
  347. nat/observability/processor/redaction/redaction_processor.py +1 -1
  348. nat/observability/processor/redaction/span_header_redaction_processor.py +1 -1
  349. nat/observability/processor/span_tagging_processor.py +1 -1
  350. nat/observability/register.py +1 -1
  351. nat/observability/utils/__init__.py +1 -1
  352. nat/observability/utils/dict_utils.py +1 -1
  353. nat/observability/utils/time_utils.py +1 -1
  354. nat/profiler/calc/__init__.py +1 -1
  355. nat/profiler/calc/calc_runner.py +3 -3
  356. nat/profiler/calc/calculations.py +1 -1
  357. nat/profiler/calc/data_models.py +1 -1
  358. nat/profiler/calc/plot.py +30 -3
  359. nat/profiler/callbacks/agno_callback_handler.py +1 -1
  360. nat/profiler/callbacks/base_callback_class.py +1 -1
  361. nat/profiler/callbacks/langchain_callback_handler.py +33 -3
  362. nat/profiler/callbacks/llama_index_callback_handler.py +13 -10
  363. nat/profiler/callbacks/semantic_kernel_callback_handler.py +1 -1
  364. nat/profiler/callbacks/token_usage_base_model.py +1 -1
  365. nat/profiler/data_frame_row.py +1 -1
  366. nat/profiler/data_models.py +1 -1
  367. nat/profiler/decorators/framework_wrapper.py +32 -1
  368. nat/profiler/decorators/function_tracking.py +1 -1
  369. nat/profiler/forecasting/config.py +1 -1
  370. nat/profiler/forecasting/model_trainer.py +1 -1
  371. nat/profiler/forecasting/models/__init__.py +1 -1
  372. nat/profiler/forecasting/models/forecasting_base_model.py +1 -1
  373. nat/profiler/forecasting/models/linear_model.py +1 -1
  374. nat/profiler/forecasting/models/random_forest_regressor.py +1 -1
  375. nat/profiler/inference_metrics_model.py +1 -1
  376. nat/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +1 -1
  377. nat/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py +1 -1
  378. nat/profiler/inference_optimization/data_models.py +1 -1
  379. nat/profiler/inference_optimization/experimental/concurrency_spike_analysis.py +1 -1
  380. nat/profiler/inference_optimization/experimental/prefix_span_analysis.py +1 -1
  381. nat/profiler/inference_optimization/llm_metrics.py +1 -1
  382. nat/profiler/inference_optimization/prompt_caching.py +1 -1
  383. nat/profiler/inference_optimization/token_uniqueness.py +1 -1
  384. nat/profiler/inference_optimization/workflow_runtimes.py +1 -1
  385. nat/profiler/intermediate_property_adapter.py +1 -1
  386. nat/profiler/parameter_optimization/optimizable_utils.py +1 -1
  387. nat/profiler/parameter_optimization/optimizer_runtime.py +1 -1
  388. nat/profiler/parameter_optimization/parameter_optimizer.py +1 -1
  389. nat/profiler/parameter_optimization/parameter_selection.py +1 -1
  390. nat/profiler/parameter_optimization/pareto_visualizer.py +1 -1
  391. nat/profiler/parameter_optimization/prompt_optimizer.py +1 -1
  392. nat/profiler/parameter_optimization/update_helpers.py +1 -1
  393. nat/profiler/profile_runner.py +1 -1
  394. nat/profiler/utils.py +1 -1
  395. nat/registry_handlers/local/local_handler.py +1 -1
  396. nat/registry_handlers/local/register_local.py +1 -1
  397. nat/registry_handlers/metadata_factory.py +1 -1
  398. nat/registry_handlers/package_utils.py +1 -1
  399. nat/registry_handlers/pypi/pypi_handler.py +1 -1
  400. nat/registry_handlers/pypi/register_pypi.py +1 -1
  401. nat/registry_handlers/register.py +1 -1
  402. nat/registry_handlers/registry_handler_base.py +1 -1
  403. nat/registry_handlers/rest/register_rest.py +1 -1
  404. nat/registry_handlers/rest/rest_handler.py +1 -1
  405. nat/registry_handlers/schemas/headers.py +1 -1
  406. nat/registry_handlers/schemas/package.py +1 -1
  407. nat/registry_handlers/schemas/publish.py +1 -1
  408. nat/registry_handlers/schemas/pull.py +1 -1
  409. nat/registry_handlers/schemas/remove.py +1 -1
  410. nat/registry_handlers/schemas/search.py +1 -1
  411. nat/registry_handlers/schemas/status.py +1 -1
  412. nat/retriever/interface.py +1 -1
  413. nat/retriever/milvus/__init__.py +1 -1
  414. nat/retriever/milvus/register.py +12 -4
  415. nat/retriever/milvus/retriever.py +103 -41
  416. nat/retriever/models.py +1 -1
  417. nat/retriever/nemo_retriever/__init__.py +1 -1
  418. nat/retriever/nemo_retriever/register.py +1 -1
  419. nat/retriever/nemo_retriever/retriever.py +5 -5
  420. nat/retriever/register.py +1 -1
  421. nat/runtime/__init__.py +1 -1
  422. nat/runtime/loader.py +10 -3
  423. nat/runtime/metrics.py +180 -0
  424. nat/runtime/runner.py +13 -6
  425. nat/runtime/session.py +458 -32
  426. nat/runtime/user_metadata.py +1 -1
  427. nat/settings/global_settings.py +1 -1
  428. nat/tool/chat_completion.py +1 -1
  429. nat/tool/code_execution/README.md +1 -1
  430. nat/tool/code_execution/code_sandbox.py +2 -2
  431. nat/tool/code_execution/local_sandbox/Dockerfile.sandbox +1 -1
  432. nat/tool/code_execution/local_sandbox/__init__.py +1 -1
  433. nat/tool/code_execution/local_sandbox/local_sandbox_server.py +1 -1
  434. nat/tool/code_execution/local_sandbox/start_local_sandbox.sh +1 -1
  435. nat/tool/code_execution/register.py +1 -1
  436. nat/tool/code_execution/utils.py +1 -1
  437. nat/tool/datetime_tools.py +1 -1
  438. nat/tool/document_search.py +1 -1
  439. nat/tool/github_tools.py +1 -1
  440. nat/tool/memory_tools/add_memory_tool.py +1 -1
  441. nat/tool/memory_tools/delete_memory_tool.py +1 -1
  442. nat/tool/memory_tools/get_memory_tool.py +1 -1
  443. nat/tool/nvidia_rag.py +2 -2
  444. nat/tool/register.py +1 -1
  445. nat/tool/retriever.py +1 -1
  446. nat/tool/server_tools.py +1 -1
  447. nat/utils/__init__.py +8 -5
  448. nat/utils/callable_utils.py +1 -1
  449. nat/utils/data_models/schema_validator.py +1 -1
  450. nat/utils/debugging_utils.py +1 -1
  451. nat/utils/decorators.py +1 -1
  452. nat/utils/dump_distro_mapping.py +1 -1
  453. nat/utils/exception_handlers/automatic_retries.py +3 -3
  454. nat/utils/exception_handlers/schemas.py +1 -1
  455. nat/utils/io/model_processing.py +1 -1
  456. nat/utils/io/supress_logs.py +33 -0
  457. nat/utils/io/yaml_tools.py +1 -1
  458. nat/utils/log_levels.py +1 -1
  459. nat/utils/log_utils.py +13 -1
  460. nat/utils/metadata_utils.py +1 -1
  461. nat/utils/optional_imports.py +1 -1
  462. nat/utils/producer_consumer_queue.py +1 -1
  463. nat/utils/reactive/base/observable_base.py +1 -1
  464. nat/utils/reactive/base/observer_base.py +1 -1
  465. nat/utils/reactive/base/subject_base.py +1 -1
  466. nat/utils/reactive/observable.py +1 -1
  467. nat/utils/reactive/observer.py +1 -1
  468. nat/utils/reactive/subject.py +1 -1
  469. nat/utils/reactive/subscription.py +1 -1
  470. nat/utils/responses_api.py +1 -1
  471. nat/utils/settings/global_settings.py +1 -1
  472. nat/utils/string_utils.py +1 -1
  473. nat/utils/type_converter.py +18 -5
  474. nat/utils/type_utils.py +1 -1
  475. nat/utils/url_utils.py +1 -1
  476. {nvidia_nat-1.4.0a20251112.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/METADATA +46 -15
  477. nvidia_nat-1.4.0a20260113.dist-info/RECORD +547 -0
  478. nvidia_nat-1.4.0a20260113.dist-info/entry_points.txt +38 -0
  479. nat/cli/commands/mcp/mcp.py +0 -986
  480. nat/front_ends/mcp/introspection_token_verifier.py +0 -73
  481. nat/front_ends/mcp/mcp_front_end_config.py +0 -109
  482. nat/front_ends/mcp/mcp_front_end_plugin.py +0 -151
  483. nat/front_ends/mcp/mcp_front_end_plugin_worker.py +0 -362
  484. nat/front_ends/mcp/memory_profiler.py +0 -320
  485. nat/front_ends/mcp/register.py +0 -27
  486. nat/front_ends/mcp/tool_converter.py +0 -321
  487. nvidia_nat-1.4.0a20251112.dist-info/RECORD +0 -481
  488. nvidia_nat-1.4.0a20251112.dist-info/entry_points.txt +0 -22
  489. {nvidia_nat-1.4.0a20251112.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/WHEEL +0 -0
  490. {nvidia_nat-1.4.0a20251112.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
  491. {nvidia_nat-1.4.0a20251112.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/licenses/LICENSE.md +0 -0
  492. {nvidia_nat-1.4.0a20251112.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/top_level.txt +0 -0
nat/eval/evaluate.py CHANGED
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,23 +14,31 @@
14
14
  # limitations under the License.
15
15
 
16
16
  import asyncio
17
+ import json
17
18
  import logging
18
19
  import shutil
20
+ import warnings
21
+ from datetime import UTC
22
+ from datetime import datetime
19
23
  from pathlib import Path
20
24
  from typing import Any
21
25
  from uuid import uuid4
22
26
 
27
+ import yaml
23
28
  from pydantic import BaseModel
24
29
  from tqdm import tqdm
25
30
 
31
+ from nat.data_models.config import Config
26
32
  from nat.data_models.evaluate import EvalConfig
27
33
  from nat.data_models.evaluate import JobEvictionPolicy
34
+ from nat.data_models.runtime_enum import RuntimeTypeEnum
28
35
  from nat.eval.config import EvaluationRunConfig
29
36
  from nat.eval.config import EvaluationRunOutput
30
37
  from nat.eval.dataset_handler.dataset_handler import DatasetHandler
31
38
  from nat.eval.evaluator.evaluator_model import EvalInput
32
39
  from nat.eval.evaluator.evaluator_model import EvalInputItem
33
40
  from nat.eval.evaluator.evaluator_model import EvalOutput
41
+ from nat.eval.llm_validator import validate_llm_endpoints
34
42
  from nat.eval.usage_stats import UsageStats
35
43
  from nat.eval.usage_stats import UsageStatsItem
36
44
  from nat.eval.usage_stats import UsageStatsLLM
@@ -60,6 +68,7 @@ class EvaluationRun:
60
68
  # Run-specific configuration
61
69
  self.config: EvaluationRunConfig = config
62
70
  self.eval_config: EvalConfig | None = None
71
+ self.effective_config: Config | None = None # Stores the complete config after applying overrides
63
72
 
64
73
  # Helpers
65
74
  self.intermediate_step_adapter: IntermediateStepAdapter = IntermediateStepAdapter()
@@ -67,7 +76,13 @@ class EvaluationRun:
67
76
  # Create evaluation trace context
68
77
  try:
69
78
  from nat.eval.utils.eval_trace_ctx import WeaveEvalTraceContext
70
- self.eval_trace_context = WeaveEvalTraceContext()
79
+ with warnings.catch_warnings():
80
+ # Ignore deprecation warnings being triggered by weave. https://github.com/wandb/weave/issues/3666
81
+ warnings.filterwarnings("ignore",
82
+ category=DeprecationWarning,
83
+ message=r"`sentry_sdk\.Hub` is deprecated")
84
+
85
+ self.eval_trace_context = WeaveEvalTraceContext()
71
86
  except Exception:
72
87
  from nat.eval.utils.eval_trace_ctx import EvalTraceContext
73
88
  self.eval_trace_context = EvalTraceContext()
@@ -89,6 +104,11 @@ class EvaluationRun:
89
104
  # evaluation output files
90
105
  self.evaluator_output_files: list[Path] = []
91
106
 
107
+ # configuration output files
108
+ self.config_original_file: Path | None = None
109
+ self.config_effective_file: Path | None = None
110
+ self.config_metadata_file: Path | None = None
111
+
92
112
  def _compute_usage_stats(self, item: EvalInputItem):
93
113
  """Compute usage stats for a single item using the intermediate steps"""
94
114
  # get the prompt and completion tokens from the intermediate steps
@@ -161,62 +181,65 @@ class EvaluationRun:
161
181
  if stop_event.is_set():
162
182
  return "", []
163
183
 
164
- async with session_manager.run(item.input_obj) as runner:
165
- if not session_manager.workflow.has_single_output:
166
- # raise an error if the workflow has multiple outputs
167
- raise NotImplementedError("Multiple outputs are not supported")
168
-
169
- runner_result = None
170
- intermediate_future = None
171
-
172
- try:
173
- # Start usage stats and intermediate steps collection in parallel
174
- intermediate_future = pull_intermediate()
175
- runner_result = runner.result()
176
- base_output = await runner_result
177
- intermediate_steps = await intermediate_future
178
- except NotImplementedError as e:
179
- logger.error("Failed to run the workflow: %s", e)
180
- # raise original error
181
- raise
182
- except Exception as e:
183
- logger.exception("Failed to run the workflow: %s", e)
184
- # stop processing if a workflow error occurs
185
- self.workflow_interrupted = True
186
-
187
- # Cancel any coroutines that are still running, avoiding a warning about unawaited coroutines
188
- # (typically one of these two is what raised the exception and the other is still running)
189
- for coro in (runner_result, intermediate_future):
190
- if coro is not None:
191
- asyncio.ensure_future(coro).cancel()
192
-
193
- stop_event.set()
194
- return
195
-
196
- try:
197
- base_output = runner.convert(base_output, to_type=str)
198
- except ValueError:
199
- pass
200
-
201
- # if base_output is a pydantic model dump it to json
202
- if isinstance(base_output, BaseModel):
203
- output = base_output.model_dump_json(indent=2)
204
- else:
205
- m = jsonpath_expr.find(base_output)
206
- if (not m):
207
- raise RuntimeError(f"Failed to extract output using jsonpath: {self.config.result_json_path}")
208
- if (len(m) > 1):
209
- logger.warning("Multiple matches found for jsonpath at row '%s'. Matches: %s. Using the first",
210
- base_output,
211
- m)
212
- output = m[0].value
213
-
214
- item.output_obj = output
215
- item.trajectory = self.intermediate_step_adapter.validate_intermediate_steps(intermediate_steps)
216
- usage_stats_item = self._compute_usage_stats(item)
217
-
218
- self.weave_eval.log_prediction(item, output)
219
- await self.weave_eval.log_usage_stats(item, usage_stats_item)
184
+ async with session_manager.session(user_id=self.config.user_id) as session:
185
+ async with session.run(item.input_obj, runtime_type=RuntimeTypeEnum.EVALUATE) as runner:
186
+ if not session.workflow.has_single_output:
187
+ # raise an error if the workflow has multiple outputs
188
+ raise NotImplementedError("Multiple outputs are not supported")
189
+
190
+ runner_result = None
191
+ intermediate_future = None
192
+
193
+ try:
194
+ # Start usage stats and intermediate steps collection in parallel
195
+ intermediate_future = pull_intermediate()
196
+ runner_result = runner.result()
197
+ base_output = await runner_result
198
+ intermediate_steps = await intermediate_future
199
+ except NotImplementedError as e:
200
+ logger.error("Failed to run the workflow: %s", e)
201
+ # raise original error
202
+ raise
203
+ except Exception as e:
204
+ logger.exception("Failed to run the workflow: %s", e)
205
+ # stop processing if a workflow error occurs
206
+ self.workflow_interrupted = True
207
+
208
+ # Cancel any coroutines that are still running, avoiding a warning about unawaited coroutines
209
+ # (typically one of these two is what raised the exception and the other is still running)
210
+ for coro in (runner_result, intermediate_future):
211
+ if coro is not None:
212
+ asyncio.ensure_future(coro).cancel()
213
+
214
+ stop_event.set()
215
+ return
216
+
217
+ try:
218
+ base_output = runner.convert(base_output, to_type=str)
219
+ except ValueError:
220
+ pass
221
+
222
+ # if base_output is a pydantic model dump it to json
223
+ if isinstance(base_output, BaseModel):
224
+ output = base_output.model_dump_json(indent=2)
225
+ else:
226
+ m = jsonpath_expr.find(base_output)
227
+ if (not m):
228
+ raise RuntimeError(
229
+ f"Failed to extract output using jsonpath: {self.config.result_json_path}")
230
+ if (len(m) > 1):
231
+ logger.warning(
232
+ "Multiple matches found for jsonpath at row '%s'. Matches: %s. Using the first",
233
+ base_output,
234
+ m)
235
+ output = m[0].value
236
+
237
+ item.output_obj = output
238
+ item.trajectory = self.intermediate_step_adapter.validate_intermediate_steps(intermediate_steps)
239
+ usage_stats_item = self._compute_usage_stats(item)
240
+
241
+ self.weave_eval.log_prediction(item, output)
242
+ await self.weave_eval.log_usage_stats(item, usage_stats_item)
220
243
 
221
244
  async def wrapped_run(item: EvalInputItem) -> None:
222
245
  await run_one(item)
@@ -321,10 +344,99 @@ class EvaluationRun:
321
344
  except Exception as e:
322
345
  logger.exception("Failed to delete old job directory: %s: %s", dir_to_delete, e)
323
346
 
347
+ def write_configuration(self) -> None:
348
+ """Save the configuration used for this evaluation run to the output directory.
349
+
350
+ This saves three files:
351
+ 1. config_original.yml - The original configuration file
352
+ 2. config_effective.yml - The configuration with all overrides applied
353
+ 3. config_metadata.json - Metadata about the evaluation run and overrides
354
+ """
355
+ output_dir = self.eval_config.general.output_dir
356
+ output_dir.mkdir(parents=True, exist_ok=True)
357
+
358
+ try:
359
+ # 1. Save original configuration
360
+ config_original_file = output_dir / "config_original.yml"
361
+ if isinstance(self.config.config_file, Path):
362
+ # Copy original file if it exists
363
+ if self.config.config_file.exists():
364
+ shutil.copy2(self.config.config_file, config_original_file)
365
+ self.config_original_file = config_original_file
366
+ logger.info("Original config file copied to %s", config_original_file)
367
+ else:
368
+ logger.warning("Original config file not found at %s", self.config.config_file)
369
+ elif isinstance(self.config.config_file, BaseModel):
370
+ # Serialize programmatic config, using mode='json' to handle special types like timedelta
371
+ config_dict = self.config.config_file.model_dump(mode='json')
372
+ with open(config_original_file, "w", encoding="utf-8") as f:
373
+ yaml.safe_dump(config_dict, f, default_flow_style=False, sort_keys=False)
374
+ self.config_original_file = config_original_file
375
+ logger.info("Programmatic config saved to %s", config_original_file)
376
+
377
+ # 2. Save effective configuration (with overrides applied)
378
+ config_effective_file = output_dir / "config_effective.yml"
379
+ if self.effective_config is not None:
380
+ effective_config_dict = self.effective_config.model_dump(mode='json') if self.effective_config else {}
381
+ with open(config_effective_file, "w", encoding="utf-8") as f:
382
+ yaml.safe_dump(effective_config_dict, f, default_flow_style=False, sort_keys=False)
383
+ self.config_effective_file = config_effective_file
384
+ logger.info("Effective config (with overrides) saved to %s", config_effective_file)
385
+ else:
386
+ logger.warning("Effective config not available, skipping config_effective.yml")
387
+
388
+ # 3. Save metadata about the run
389
+ config_metadata_file = output_dir / "config_metadata.json"
390
+ metadata = {
391
+ "config_file":
392
+ str(self.config.config_file),
393
+ "config_file_type":
394
+ "Path" if isinstance(self.config.config_file, Path) else "BaseModel",
395
+ "overrides": [{
396
+ "path": path, "value": value
397
+ } for path, value in self.config.override] if self.config.override else [],
398
+ "dataset":
399
+ self.config.dataset,
400
+ "result_json_path":
401
+ self.config.result_json_path,
402
+ "skip_workflow":
403
+ self.config.skip_workflow,
404
+ "skip_completed_entries":
405
+ self.config.skip_completed_entries,
406
+ "reps":
407
+ self.config.reps,
408
+ "endpoint":
409
+ self.config.endpoint,
410
+ "endpoint_timeout":
411
+ self.config.endpoint_timeout,
412
+ "adjust_dataset_size":
413
+ self.config.adjust_dataset_size,
414
+ "num_passes":
415
+ self.config.num_passes,
416
+ "export_timeout":
417
+ self.config.export_timeout,
418
+ "user_id":
419
+ self.config.user_id,
420
+ "timestamp":
421
+ datetime.now(tz=UTC).isoformat(),
422
+ }
423
+
424
+ with open(config_metadata_file, "w", encoding="utf-8") as f:
425
+ json.dump(metadata, f, indent=2)
426
+ self.config_metadata_file = config_metadata_file
427
+ logger.info("Configuration metadata saved to %s", config_metadata_file)
428
+
429
+ except Exception:
430
+ logger.exception("Failed to write configuration files")
431
+ # Don't raise - this is not critical enough to fail the entire evaluation
432
+
324
433
  def write_output(self, dataset_handler: DatasetHandler, profiler_results: ProfilerResults):
325
434
  workflow_output_file = self.eval_config.general.output_dir / "workflow_output.json"
326
435
  workflow_output_file.parent.mkdir(parents=True, exist_ok=True)
327
436
 
437
+ # Write the configuration files (original, effective, and metadata)
438
+ self.write_configuration()
439
+
328
440
  # Write the workflow output to a file (this can be used for re-running the evaluation)
329
441
 
330
442
  step_filter = self.eval_config.general.output.workflow_output_step_filter \
@@ -451,7 +563,7 @@ class EvaluationRun:
451
563
  from nat.runtime.loader import load_config
452
564
 
453
565
  # Load and override the config
454
- config = None
566
+ config: Config | None = None
455
567
  if isinstance(self.config.config_file, BaseModel):
456
568
  config = self.config.config_file
457
569
  elif self.config.override:
@@ -459,6 +571,8 @@ class EvaluationRun:
459
571
  else:
460
572
  config = load_config(self.config.config_file)
461
573
 
574
+ # Store the effective configuration for later saving to output directory
575
+ self.effective_config = config
462
576
  self.eval_config = config.eval
463
577
  workflow_alias = self._get_workflow_alias(config.workflow.type)
464
578
  logger.debug("Loaded %s evaluation configuration: %s", workflow_alias, self.eval_config)
@@ -490,7 +604,10 @@ class EvaluationRun:
490
604
  eval_input=EvalInput(eval_input_items=[]),
491
605
  evaluation_results=[],
492
606
  usage_stats=UsageStats(),
493
- profiler_results=ProfilerResults())
607
+ profiler_results=ProfilerResults(),
608
+ config_original_file=self.config_original_file,
609
+ config_effective_file=self.config_effective_file,
610
+ config_metadata_file=self.config_metadata_file)
494
611
 
495
612
  custom_pre_eval_process_function = self.eval_config.general.output.custom_pre_eval_process_function \
496
613
  if self.eval_config.general.output else None
@@ -509,7 +626,25 @@ class EvaluationRun:
509
626
  eval_input=self.eval_input,
510
627
  evaluation_results=self.evaluation_results,
511
628
  usage_stats=self.usage_stats,
512
- profiler_results=ProfilerResults())
629
+ profiler_results=ProfilerResults(),
630
+ config_original_file=self.config_original_file,
631
+ config_effective_file=self.config_effective_file,
632
+ config_metadata_file=self.config_metadata_file)
633
+
634
+ # Validate LLM endpoints before running evaluation (opt-in via config)
635
+ if (not self.config.skip_workflow and not self.config.endpoint and config.eval.general.validate_llm_endpoints):
636
+ try:
637
+ logger.info("Validating LLM endpoints before evaluation (enabled via config)...")
638
+ await validate_llm_endpoints(config)
639
+ except RuntimeError as e:
640
+ # Critical validation errors (404, connection failures) - fail fast
641
+ logger.error("LLM endpoint validation failed: %s", e)
642
+ raise
643
+ except Exception as e:
644
+ # Non-critical errors (missing packages, config issues) - warn but continue
645
+ logger.warning("LLM endpoint validation incomplete: %s. Continuing with evaluation...",
646
+ e,
647
+ exc_info=True)
513
648
 
514
649
  # Run workflow and evaluate
515
650
  async with WorkflowEvalBuilder.from_config(config=config) as eval_workflow:
@@ -518,25 +653,32 @@ class EvaluationRun:
518
653
 
519
654
  with self.eval_trace_context.evaluation_context():
520
655
  # Run workflow
521
- if self.config.endpoint:
522
- await self.run_workflow_remote()
523
- elif not self.config.skip_workflow:
524
- if session_manager is None:
525
- workflow = await eval_workflow.build()
526
- session_manager = SessionManager(workflow,
527
- max_concurrency=self.eval_config.general.max_concurrency)
528
- await self.run_workflow_local(session_manager)
529
-
530
- # Pre-evaluation process the workflow output
531
- self.eval_input = dataset_handler.pre_eval_process_eval_input(self.eval_input)
532
-
533
- # Evaluate
534
- evaluators = {name: eval_workflow.get_evaluator(name) for name in self.eval_config.evaluators}
535
- await self.run_evaluators(evaluators)
536
-
537
- # Wait for all trace export tasks to complete (local workflows only)
538
- if session_manager and not self.config.endpoint:
539
- await self.wait_for_all_export_tasks_local(session_manager, timeout=self.config.export_timeout)
656
+ local_session_manager: SessionManager | None = None
657
+ try:
658
+ if self.config.endpoint:
659
+ await self.run_workflow_remote()
660
+ elif not self.config.skip_workflow:
661
+ if session_manager is None:
662
+ session_manager = await SessionManager.create(
663
+ config=config,
664
+ shared_builder=eval_workflow,
665
+ max_concurrency=self.eval_config.general.max_concurrency)
666
+ local_session_manager = session_manager
667
+ await self.run_workflow_local(session_manager)
668
+
669
+ # Pre-evaluation process the workflow output
670
+ self.eval_input = dataset_handler.pre_eval_process_eval_input(self.eval_input)
671
+
672
+ # Evaluate
673
+ evaluators = {name: eval_workflow.get_evaluator(name) for name in self.eval_config.evaluators}
674
+ await self.run_evaluators(evaluators)
675
+
676
+ # Wait for all trace export tasks to complete (local workflows only)
677
+ if session_manager and not self.config.endpoint:
678
+ await self.wait_for_all_export_tasks_local(session_manager, timeout=self.config.export_timeout)
679
+ finally:
680
+ if local_session_manager is not None:
681
+ await local_session_manager.shutdown()
540
682
 
541
683
  # Profile the workflow
542
684
  profiler_results = await self.profile_workflow()
@@ -564,4 +706,7 @@ class EvaluationRun:
564
706
  eval_input=self.eval_input,
565
707
  evaluation_results=self.evaluation_results,
566
708
  usage_stats=self.usage_stats,
567
- profiler_results=profiler_results)
709
+ profiler_results=profiler_results,
710
+ config_original_file=self.config_original_file,
711
+ config_effective_file=self.config_effective_file,
712
+ config_metadata_file=self.config_metadata_file)
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -34,7 +34,7 @@ class BaseEvaluator(ABC):
34
34
  **Experimental Feature**: The Evaluation API is experimental and may change in future releases.
35
35
  Future versions may introduce breaking changes without notice.
36
36
 
37
- Each custom evaluator must implement the `evaluate_item` method which is used to evaluate a
37
+ Each custom evaluator must implement the ``evaluate_item`` method which is used to evaluate a
38
38
  single EvalInputItem.
39
39
  """
40
40
 
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -16,6 +16,7 @@
16
16
  import typing
17
17
 
18
18
  from pydantic import BaseModel
19
+ from pydantic import SerializeAsAny
19
20
 
20
21
  from nat.data_models.intermediate_step import IntermediateStep
21
22
 
@@ -55,4 +56,4 @@ class EvalOutputItem(BaseModel):
55
56
 
56
57
  class EvalOutput(BaseModel):
57
58
  average_score: typing.Any # float or any serializable type
58
- eval_output_items: list[EvalOutputItem]
59
+ eval_output_items: list[SerializeAsAny[EvalOutputItem]]
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");