nvidia-nat 1.4.0a20251120__py3-none-any.whl → 1.4.0a20260113__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (492) hide show
  1. aiq/__init__.py +1 -1
  2. nat/{front_ends/mcp → agent/auto_memory_wrapper}/__init__.py +1 -1
  3. nat/agent/auto_memory_wrapper/agent.py +278 -0
  4. nat/agent/auto_memory_wrapper/register.py +227 -0
  5. nat/agent/auto_memory_wrapper/state.py +30 -0
  6. nat/agent/base.py +1 -1
  7. nat/agent/dual_node.py +1 -1
  8. nat/agent/prompt_optimizer/prompt.py +1 -1
  9. nat/agent/prompt_optimizer/register.py +1 -1
  10. nat/agent/react_agent/agent.py +16 -9
  11. nat/agent/react_agent/output_parser.py +2 -2
  12. nat/agent/react_agent/prompt.py +3 -2
  13. nat/agent/react_agent/register.py +2 -2
  14. nat/agent/react_agent/register_per_user_agent.py +104 -0
  15. nat/agent/reasoning_agent/reasoning_agent.py +1 -1
  16. nat/agent/register.py +3 -1
  17. nat/agent/responses_api_agent/__init__.py +1 -1
  18. nat/agent/responses_api_agent/register.py +1 -1
  19. nat/agent/rewoo_agent/agent.py +9 -4
  20. nat/agent/rewoo_agent/prompt.py +1 -1
  21. nat/agent/rewoo_agent/register.py +1 -1
  22. nat/agent/tool_calling_agent/agent.py +5 -4
  23. nat/agent/tool_calling_agent/register.py +1 -1
  24. nat/authentication/__init__.py +1 -1
  25. nat/authentication/api_key/__init__.py +1 -1
  26. nat/authentication/api_key/api_key_auth_provider.py +1 -1
  27. nat/authentication/api_key/api_key_auth_provider_config.py +22 -7
  28. nat/authentication/api_key/register.py +1 -1
  29. nat/authentication/credential_validator/__init__.py +1 -1
  30. nat/authentication/credential_validator/bearer_token_validator.py +1 -1
  31. nat/authentication/exceptions/__init__.py +1 -1
  32. nat/authentication/exceptions/api_key_exceptions.py +1 -1
  33. nat/authentication/http_basic_auth/http_basic_auth_provider.py +1 -1
  34. nat/authentication/http_basic_auth/register.py +1 -1
  35. nat/authentication/interfaces.py +1 -1
  36. nat/authentication/oauth2/__init__.py +1 -1
  37. nat/authentication/oauth2/oauth2_auth_code_flow_provider.py +1 -1
  38. nat/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +1 -1
  39. nat/authentication/oauth2/oauth2_resource_server_config.py +1 -1
  40. nat/authentication/oauth2/register.py +1 -1
  41. nat/authentication/register.py +1 -1
  42. nat/builder/builder.py +511 -1
  43. nat/builder/child_builder.py +385 -0
  44. nat/builder/component_utils.py +28 -4
  45. nat/builder/context.py +17 -1
  46. nat/builder/embedder.py +1 -1
  47. nat/builder/eval_builder.py +19 -7
  48. nat/builder/evaluator.py +1 -1
  49. nat/builder/framework_enum.py +2 -1
  50. nat/builder/front_end.py +1 -1
  51. nat/builder/function.py +40 -3
  52. nat/builder/function_base.py +1 -1
  53. nat/builder/function_info.py +1 -1
  54. nat/builder/intermediate_step_manager.py +1 -1
  55. nat/builder/llm.py +1 -1
  56. nat/builder/per_user_workflow_builder.py +843 -0
  57. nat/builder/retriever.py +1 -1
  58. nat/builder/sync_builder.py +571 -0
  59. nat/builder/user_interaction_manager.py +1 -1
  60. nat/builder/workflow.py +1 -1
  61. nat/builder/workflow_builder.py +536 -424
  62. nat/cli/__init__.py +1 -1
  63. nat/cli/cli_utils/config_override.py +1 -1
  64. nat/cli/cli_utils/validation.py +32 -1
  65. nat/cli/commands/configure/channel/add.py +1 -1
  66. nat/cli/commands/configure/channel/channel.py +1 -1
  67. nat/cli/commands/configure/channel/remove.py +1 -1
  68. nat/cli/commands/configure/channel/update.py +1 -1
  69. nat/cli/commands/configure/configure.py +1 -1
  70. nat/cli/commands/evaluate.py +87 -13
  71. nat/cli/commands/finetune.py +132 -0
  72. nat/cli/commands/info/__init__.py +1 -1
  73. nat/cli/commands/info/info.py +1 -1
  74. nat/cli/commands/info/list_channels.py +1 -1
  75. nat/cli/commands/info/list_components.py +1 -1
  76. nat/cli/commands/object_store/__init__.py +1 -1
  77. nat/cli/commands/object_store/object_store.py +1 -1
  78. nat/cli/commands/optimize.py +1 -1
  79. nat/cli/commands/{mcp → red_teaming}/__init__.py +1 -1
  80. nat/cli/commands/red_teaming/red_teaming.py +138 -0
  81. nat/cli/commands/red_teaming/red_teaming_utils.py +73 -0
  82. nat/cli/commands/registry/__init__.py +1 -1
  83. nat/cli/commands/registry/publish.py +1 -1
  84. nat/cli/commands/registry/pull.py +1 -1
  85. nat/cli/commands/registry/registry.py +1 -1
  86. nat/cli/commands/registry/remove.py +1 -1
  87. nat/cli/commands/registry/search.py +1 -1
  88. nat/cli/commands/sizing/__init__.py +1 -1
  89. nat/cli/commands/sizing/calc.py +1 -1
  90. nat/cli/commands/sizing/sizing.py +1 -1
  91. nat/cli/commands/start.py +1 -1
  92. nat/cli/commands/uninstall.py +1 -1
  93. nat/cli/commands/validate.py +1 -1
  94. nat/cli/commands/workflow/__init__.py +1 -1
  95. nat/cli/commands/workflow/workflow.py +1 -1
  96. nat/cli/commands/workflow/workflow_commands.py +3 -2
  97. nat/cli/entrypoint.py +15 -37
  98. nat/cli/main.py +2 -2
  99. nat/cli/plugin_loader.py +69 -0
  100. nat/cli/register_workflow.py +183 -5
  101. nat/cli/type_registry.py +169 -3
  102. nat/control_flow/register.py +1 -1
  103. nat/control_flow/router_agent/agent.py +1 -1
  104. nat/control_flow/router_agent/prompt.py +1 -1
  105. nat/control_flow/router_agent/register.py +1 -1
  106. nat/control_flow/sequential_executor.py +28 -7
  107. nat/data_models/__init__.py +1 -1
  108. nat/data_models/agent.py +1 -1
  109. nat/data_models/api_server.py +38 -3
  110. nat/data_models/authentication.py +1 -1
  111. nat/data_models/common.py +1 -1
  112. nat/data_models/component.py +7 -1
  113. nat/data_models/component_ref.py +34 -1
  114. nat/data_models/config.py +62 -1
  115. nat/data_models/dataset_handler.py +15 -2
  116. nat/data_models/discovery_metadata.py +1 -1
  117. nat/data_models/embedder.py +1 -1
  118. nat/data_models/evaluate.py +6 -1
  119. nat/data_models/evaluator.py +1 -1
  120. nat/data_models/finetuning.py +260 -0
  121. nat/data_models/front_end.py +1 -1
  122. nat/data_models/function.py +1 -1
  123. nat/data_models/function_dependencies.py +1 -1
  124. nat/data_models/gated_field_mixin.py +1 -1
  125. nat/data_models/interactive.py +1 -1
  126. nat/data_models/intermediate_step.py +29 -2
  127. nat/data_models/invocation_node.py +1 -1
  128. nat/data_models/llm.py +1 -1
  129. nat/data_models/logging.py +1 -1
  130. nat/data_models/memory.py +1 -1
  131. nat/data_models/middleware.py +3 -1
  132. nat/data_models/object_store.py +1 -1
  133. nat/data_models/openai_mcp.py +1 -1
  134. nat/data_models/optimizable.py +1 -1
  135. nat/data_models/optimizer.py +1 -1
  136. nat/data_models/profiler.py +1 -1
  137. nat/data_models/registry_handler.py +1 -1
  138. nat/data_models/retriever.py +1 -1
  139. nat/data_models/retry_mixin.py +1 -1
  140. nat/data_models/runtime_enum.py +1 -1
  141. nat/data_models/span.py +1 -1
  142. nat/data_models/step_adaptor.py +1 -1
  143. nat/data_models/streaming.py +1 -1
  144. nat/data_models/swe_bench_model.py +1 -1
  145. nat/data_models/telemetry_exporter.py +1 -1
  146. nat/data_models/thinking_mixin.py +1 -1
  147. nat/data_models/ttc_strategy.py +1 -1
  148. nat/embedder/azure_openai_embedder.py +1 -1
  149. nat/embedder/nim_embedder.py +1 -1
  150. nat/embedder/openai_embedder.py +1 -1
  151. nat/embedder/register.py +1 -1
  152. nat/eval/__init__.py +1 -1
  153. nat/eval/config.py +8 -1
  154. nat/eval/dataset_handler/dataset_downloader.py +1 -1
  155. nat/eval/dataset_handler/dataset_filter.py +1 -1
  156. nat/eval/dataset_handler/dataset_handler.py +4 -2
  157. nat/eval/evaluate.py +217 -80
  158. nat/eval/evaluator/__init__.py +1 -1
  159. nat/eval/evaluator/base_evaluator.py +2 -2
  160. nat/eval/evaluator/evaluator_model.py +3 -2
  161. nat/eval/intermediate_step_adapter.py +1 -1
  162. nat/eval/llm_validator.py +336 -0
  163. nat/eval/rag_evaluator/evaluate.py +17 -10
  164. nat/eval/rag_evaluator/register.py +1 -1
  165. nat/eval/red_teaming_evaluator/__init__.py +14 -0
  166. nat/eval/red_teaming_evaluator/data_models.py +66 -0
  167. nat/eval/red_teaming_evaluator/evaluate.py +327 -0
  168. nat/eval/red_teaming_evaluator/filter_conditions.py +75 -0
  169. nat/eval/red_teaming_evaluator/register.py +55 -0
  170. nat/eval/register.py +2 -1
  171. nat/eval/remote_workflow.py +1 -1
  172. nat/eval/runners/__init__.py +1 -1
  173. nat/eval/runners/config.py +1 -1
  174. nat/eval/runners/multi_eval_runner.py +1 -1
  175. nat/eval/runners/red_teaming_runner/__init__.py +24 -0
  176. nat/eval/runners/red_teaming_runner/config.py +282 -0
  177. nat/eval/runners/red_teaming_runner/report_utils.py +707 -0
  178. nat/eval/runners/red_teaming_runner/runner.py +867 -0
  179. nat/eval/runtime_evaluator/__init__.py +1 -1
  180. nat/eval/runtime_evaluator/evaluate.py +1 -1
  181. nat/eval/runtime_evaluator/register.py +1 -1
  182. nat/eval/runtime_event_subscriber.py +1 -1
  183. nat/eval/swe_bench_evaluator/evaluate.py +1 -1
  184. nat/eval/swe_bench_evaluator/register.py +1 -1
  185. nat/eval/trajectory_evaluator/evaluate.py +2 -2
  186. nat/eval/trajectory_evaluator/register.py +1 -1
  187. nat/eval/tunable_rag_evaluator/evaluate.py +5 -5
  188. nat/eval/tunable_rag_evaluator/register.py +1 -1
  189. nat/eval/usage_stats.py +1 -1
  190. nat/eval/utils/eval_trace_ctx.py +1 -1
  191. nat/eval/utils/output_uploader.py +1 -1
  192. nat/eval/utils/tqdm_position_registry.py +1 -1
  193. nat/eval/utils/weave_eval.py +1 -1
  194. nat/experimental/decorators/experimental_warning_decorator.py +1 -1
  195. nat/experimental/test_time_compute/editing/iterative_plan_refinement_editor.py +1 -1
  196. nat/experimental/test_time_compute/editing/llm_as_a_judge_editor.py +1 -1
  197. nat/experimental/test_time_compute/editing/motivation_aware_summarization.py +1 -1
  198. nat/experimental/test_time_compute/functions/execute_score_select_function.py +1 -1
  199. nat/experimental/test_time_compute/functions/multi_llm_judge_function.py +88 -0
  200. nat/experimental/test_time_compute/functions/plan_select_execute_function.py +1 -1
  201. nat/experimental/test_time_compute/functions/ttc_tool_orchestration_function.py +1 -1
  202. nat/experimental/test_time_compute/functions/ttc_tool_wrapper_function.py +1 -1
  203. nat/experimental/test_time_compute/models/editor_config.py +1 -1
  204. nat/experimental/test_time_compute/models/scoring_config.py +1 -1
  205. nat/experimental/test_time_compute/models/search_config.py +20 -2
  206. nat/experimental/test_time_compute/models/selection_config.py +33 -2
  207. nat/experimental/test_time_compute/models/stage_enums.py +1 -1
  208. nat/experimental/test_time_compute/models/strategy_base.py +1 -1
  209. nat/experimental/test_time_compute/models/tool_use_config.py +1 -1
  210. nat/experimental/test_time_compute/models/ttc_item.py +1 -1
  211. nat/experimental/test_time_compute/register.py +4 -1
  212. nat/experimental/test_time_compute/scoring/llm_based_agent_scorer.py +1 -1
  213. nat/experimental/test_time_compute/scoring/llm_based_plan_scorer.py +1 -1
  214. nat/experimental/test_time_compute/scoring/motivation_aware_scorer.py +1 -1
  215. nat/experimental/test_time_compute/search/multi_llm_generation.py +115 -0
  216. nat/experimental/test_time_compute/search/multi_llm_planner.py +1 -1
  217. nat/experimental/test_time_compute/search/multi_query_retrieval_search.py +1 -1
  218. nat/experimental/test_time_compute/search/single_shot_multi_plan_planner.py +1 -1
  219. nat/experimental/test_time_compute/selection/best_of_n_selector.py +1 -1
  220. nat/experimental/test_time_compute/selection/llm_based_agent_output_selector.py +1 -1
  221. nat/experimental/test_time_compute/selection/llm_based_output_merging_selector.py +1 -1
  222. nat/experimental/test_time_compute/selection/llm_based_plan_selector.py +1 -1
  223. nat/experimental/test_time_compute/selection/llm_judge_selection.py +127 -0
  224. nat/experimental/test_time_compute/selection/threshold_selector.py +1 -1
  225. nat/finetuning/__init__.py +24 -0
  226. nat/finetuning/finetuning_runtime.py +143 -0
  227. nat/finetuning/interfaces/__init__.py +24 -0
  228. nat/finetuning/interfaces/finetuning_runner.py +261 -0
  229. nat/finetuning/interfaces/trainer_adapter.py +103 -0
  230. nat/finetuning/interfaces/trajectory_builder.py +115 -0
  231. nat/finetuning/utils/__init__.py +15 -0
  232. nat/finetuning/utils/parsers/__init__.py +15 -0
  233. nat/finetuning/utils/parsers/adk_parser.py +141 -0
  234. nat/finetuning/utils/parsers/base_parser.py +238 -0
  235. nat/finetuning/utils/parsers/common.py +91 -0
  236. nat/finetuning/utils/parsers/langchain_parser.py +267 -0
  237. nat/finetuning/utils/parsers/llama_index_parser.py +218 -0
  238. nat/front_ends/__init__.py +1 -1
  239. nat/front_ends/console/__init__.py +1 -1
  240. nat/front_ends/console/authentication_flow_handler.py +1 -1
  241. nat/front_ends/console/console_front_end_config.py +4 -1
  242. nat/front_ends/console/console_front_end_plugin.py +5 -4
  243. nat/front_ends/console/register.py +1 -1
  244. nat/front_ends/cron/__init__.py +1 -1
  245. nat/front_ends/fastapi/__init__.py +1 -1
  246. nat/front_ends/fastapi/async_job.py +128 -0
  247. nat/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +1 -1
  248. nat/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +13 -9
  249. nat/front_ends/fastapi/dask_client_mixin.py +1 -1
  250. nat/front_ends/fastapi/fastapi_front_end_config.py +1 -1
  251. nat/front_ends/fastapi/fastapi_front_end_controller.py +1 -1
  252. nat/front_ends/fastapi/fastapi_front_end_plugin.py +25 -30
  253. nat/front_ends/fastapi/fastapi_front_end_plugin_worker.py +195 -60
  254. nat/front_ends/fastapi/html_snippets/__init__.py +1 -1
  255. nat/front_ends/fastapi/html_snippets/auth_code_grant_success.py +1 -1
  256. nat/front_ends/fastapi/intermediate_steps_subscriber.py +12 -1
  257. nat/front_ends/fastapi/job_store.py +23 -11
  258. nat/front_ends/fastapi/main.py +1 -1
  259. nat/front_ends/fastapi/message_handler.py +27 -4
  260. nat/front_ends/fastapi/message_validator.py +54 -2
  261. nat/front_ends/fastapi/register.py +1 -1
  262. nat/front_ends/fastapi/response_helpers.py +16 -15
  263. nat/front_ends/fastapi/step_adaptor.py +1 -1
  264. nat/front_ends/fastapi/utils.py +1 -1
  265. nat/front_ends/register.py +1 -2
  266. nat/front_ends/simple_base/__init__.py +1 -1
  267. nat/front_ends/simple_base/simple_front_end_plugin_base.py +6 -4
  268. nat/llm/aws_bedrock_llm.py +1 -1
  269. nat/llm/azure_openai_llm.py +10 -1
  270. nat/llm/dynamo_llm.py +363 -0
  271. nat/llm/huggingface_llm.py +177 -0
  272. nat/llm/litellm_llm.py +1 -1
  273. nat/llm/nim_llm.py +1 -1
  274. nat/llm/openai_llm.py +1 -1
  275. nat/llm/register.py +3 -1
  276. nat/llm/utils/__init__.py +1 -1
  277. nat/llm/utils/env_config_value.py +1 -1
  278. nat/llm/utils/error.py +1 -1
  279. nat/llm/utils/thinking.py +1 -1
  280. nat/memory/__init__.py +1 -1
  281. nat/memory/interfaces.py +1 -1
  282. nat/memory/models.py +1 -1
  283. nat/meta/pypi.md +1 -1
  284. nat/middleware/__init__.py +5 -5
  285. nat/middleware/cache/__init__.py +14 -0
  286. nat/middleware/{cache_middleware.py → cache/cache_middleware.py} +39 -42
  287. nat/middleware/cache/cache_middleware_config.py +44 -0
  288. nat/middleware/cache/register.py +33 -0
  289. nat/middleware/defense/__init__.py +14 -0
  290. nat/middleware/defense/defense_middleware.py +362 -0
  291. nat/middleware/defense/defense_middleware_content_guard.py +455 -0
  292. nat/middleware/defense/defense_middleware_data_models.py +91 -0
  293. nat/middleware/defense/defense_middleware_output_verifier.py +440 -0
  294. nat/middleware/defense/defense_middleware_pii.py +356 -0
  295. nat/middleware/defense/register.py +82 -0
  296. nat/middleware/dynamic/__init__.py +14 -0
  297. nat/middleware/dynamic/dynamic_function_middleware.py +962 -0
  298. nat/middleware/dynamic/dynamic_middleware_config.py +132 -0
  299. nat/middleware/dynamic/register.py +34 -0
  300. nat/middleware/function_middleware.py +236 -52
  301. nat/middleware/logging/__init__.py +14 -0
  302. nat/middleware/logging/logging_middleware.py +67 -0
  303. nat/middleware/logging/logging_middleware_config.py +28 -0
  304. nat/middleware/logging/register.py +33 -0
  305. nat/middleware/middleware.py +142 -28
  306. nat/middleware/red_teaming/__init__.py +14 -0
  307. nat/middleware/red_teaming/red_teaming_middleware.py +344 -0
  308. nat/middleware/red_teaming/red_teaming_middleware_config.py +112 -0
  309. nat/middleware/red_teaming/register.py +47 -0
  310. nat/middleware/register.py +7 -20
  311. nat/middleware/utils/__init__.py +14 -0
  312. nat/middleware/utils/workflow_inventory.py +155 -0
  313. nat/object_store/__init__.py +1 -1
  314. nat/object_store/in_memory_object_store.py +1 -1
  315. nat/object_store/interfaces.py +1 -1
  316. nat/object_store/models.py +1 -1
  317. nat/object_store/register.py +1 -1
  318. nat/observability/__init__.py +1 -1
  319. nat/observability/exporter/__init__.py +1 -1
  320. nat/observability/exporter/base_exporter.py +1 -1
  321. nat/observability/exporter/exporter.py +1 -1
  322. nat/observability/exporter/file_exporter.py +1 -1
  323. nat/observability/exporter/processing_exporter.py +1 -1
  324. nat/observability/exporter/raw_exporter.py +1 -1
  325. nat/observability/exporter/span_exporter.py +7 -1
  326. nat/observability/exporter_manager.py +1 -1
  327. nat/observability/mixin/__init__.py +1 -1
  328. nat/observability/mixin/batch_config_mixin.py +1 -1
  329. nat/observability/mixin/collector_config_mixin.py +1 -1
  330. nat/observability/mixin/file_mixin.py +1 -1
  331. nat/observability/mixin/file_mode.py +1 -1
  332. nat/observability/mixin/redaction_config_mixin.py +1 -1
  333. nat/observability/mixin/resource_conflict_mixin.py +1 -1
  334. nat/observability/mixin/serialize_mixin.py +1 -1
  335. nat/observability/mixin/tagging_config_mixin.py +1 -1
  336. nat/observability/mixin/type_introspection_mixin.py +1 -1
  337. nat/observability/processor/__init__.py +1 -1
  338. nat/observability/processor/batching_processor.py +1 -1
  339. nat/observability/processor/callback_processor.py +1 -1
  340. nat/observability/processor/falsy_batch_filter_processor.py +1 -1
  341. nat/observability/processor/intermediate_step_serializer.py +1 -1
  342. nat/observability/processor/processor.py +1 -1
  343. nat/observability/processor/processor_factory.py +1 -1
  344. nat/observability/processor/redaction/__init__.py +1 -1
  345. nat/observability/processor/redaction/contextual_redaction_processor.py +1 -1
  346. nat/observability/processor/redaction/contextual_span_redaction_processor.py +1 -1
  347. nat/observability/processor/redaction/redaction_processor.py +1 -1
  348. nat/observability/processor/redaction/span_header_redaction_processor.py +1 -1
  349. nat/observability/processor/span_tagging_processor.py +1 -1
  350. nat/observability/register.py +1 -1
  351. nat/observability/utils/__init__.py +1 -1
  352. nat/observability/utils/dict_utils.py +1 -1
  353. nat/observability/utils/time_utils.py +1 -1
  354. nat/profiler/calc/__init__.py +1 -1
  355. nat/profiler/calc/calc_runner.py +3 -3
  356. nat/profiler/calc/calculations.py +1 -1
  357. nat/profiler/calc/data_models.py +1 -1
  358. nat/profiler/calc/plot.py +30 -3
  359. nat/profiler/callbacks/agno_callback_handler.py +1 -1
  360. nat/profiler/callbacks/base_callback_class.py +1 -1
  361. nat/profiler/callbacks/langchain_callback_handler.py +33 -3
  362. nat/profiler/callbacks/llama_index_callback_handler.py +13 -10
  363. nat/profiler/callbacks/semantic_kernel_callback_handler.py +1 -1
  364. nat/profiler/callbacks/token_usage_base_model.py +1 -1
  365. nat/profiler/data_frame_row.py +1 -1
  366. nat/profiler/data_models.py +1 -1
  367. nat/profiler/decorators/framework_wrapper.py +16 -1
  368. nat/profiler/decorators/function_tracking.py +1 -1
  369. nat/profiler/forecasting/config.py +1 -1
  370. nat/profiler/forecasting/model_trainer.py +1 -1
  371. nat/profiler/forecasting/models/__init__.py +1 -1
  372. nat/profiler/forecasting/models/forecasting_base_model.py +1 -1
  373. nat/profiler/forecasting/models/linear_model.py +1 -1
  374. nat/profiler/forecasting/models/random_forest_regressor.py +1 -1
  375. nat/profiler/inference_metrics_model.py +1 -1
  376. nat/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +1 -1
  377. nat/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py +1 -1
  378. nat/profiler/inference_optimization/data_models.py +1 -1
  379. nat/profiler/inference_optimization/experimental/concurrency_spike_analysis.py +1 -1
  380. nat/profiler/inference_optimization/experimental/prefix_span_analysis.py +1 -1
  381. nat/profiler/inference_optimization/llm_metrics.py +1 -1
  382. nat/profiler/inference_optimization/prompt_caching.py +1 -1
  383. nat/profiler/inference_optimization/token_uniqueness.py +1 -1
  384. nat/profiler/inference_optimization/workflow_runtimes.py +1 -1
  385. nat/profiler/intermediate_property_adapter.py +1 -1
  386. nat/profiler/parameter_optimization/optimizable_utils.py +1 -1
  387. nat/profiler/parameter_optimization/optimizer_runtime.py +1 -1
  388. nat/profiler/parameter_optimization/parameter_optimizer.py +1 -1
  389. nat/profiler/parameter_optimization/parameter_selection.py +1 -1
  390. nat/profiler/parameter_optimization/pareto_visualizer.py +1 -1
  391. nat/profiler/parameter_optimization/prompt_optimizer.py +1 -1
  392. nat/profiler/parameter_optimization/update_helpers.py +1 -1
  393. nat/profiler/profile_runner.py +1 -1
  394. nat/profiler/utils.py +1 -1
  395. nat/registry_handlers/local/local_handler.py +1 -1
  396. nat/registry_handlers/local/register_local.py +1 -1
  397. nat/registry_handlers/metadata_factory.py +1 -1
  398. nat/registry_handlers/package_utils.py +1 -1
  399. nat/registry_handlers/pypi/pypi_handler.py +1 -1
  400. nat/registry_handlers/pypi/register_pypi.py +1 -1
  401. nat/registry_handlers/register.py +1 -1
  402. nat/registry_handlers/registry_handler_base.py +1 -1
  403. nat/registry_handlers/rest/register_rest.py +1 -1
  404. nat/registry_handlers/rest/rest_handler.py +1 -1
  405. nat/registry_handlers/schemas/headers.py +1 -1
  406. nat/registry_handlers/schemas/package.py +1 -1
  407. nat/registry_handlers/schemas/publish.py +1 -1
  408. nat/registry_handlers/schemas/pull.py +1 -1
  409. nat/registry_handlers/schemas/remove.py +1 -1
  410. nat/registry_handlers/schemas/search.py +1 -1
  411. nat/registry_handlers/schemas/status.py +1 -1
  412. nat/retriever/interface.py +1 -1
  413. nat/retriever/milvus/__init__.py +1 -1
  414. nat/retriever/milvus/register.py +1 -1
  415. nat/retriever/milvus/retriever.py +1 -1
  416. nat/retriever/models.py +1 -1
  417. nat/retriever/nemo_retriever/__init__.py +1 -1
  418. nat/retriever/nemo_retriever/register.py +1 -1
  419. nat/retriever/nemo_retriever/retriever.py +5 -5
  420. nat/retriever/register.py +1 -1
  421. nat/runtime/__init__.py +1 -1
  422. nat/runtime/loader.py +10 -3
  423. nat/runtime/metrics.py +180 -0
  424. nat/runtime/runner.py +1 -5
  425. nat/runtime/session.py +451 -32
  426. nat/runtime/user_metadata.py +1 -1
  427. nat/settings/global_settings.py +1 -1
  428. nat/tool/chat_completion.py +1 -1
  429. nat/tool/code_execution/README.md +1 -1
  430. nat/tool/code_execution/code_sandbox.py +1 -1
  431. nat/tool/code_execution/local_sandbox/Dockerfile.sandbox +1 -1
  432. nat/tool/code_execution/local_sandbox/__init__.py +1 -1
  433. nat/tool/code_execution/local_sandbox/local_sandbox_server.py +1 -1
  434. nat/tool/code_execution/local_sandbox/start_local_sandbox.sh +1 -1
  435. nat/tool/code_execution/register.py +1 -1
  436. nat/tool/code_execution/utils.py +1 -1
  437. nat/tool/datetime_tools.py +1 -1
  438. nat/tool/document_search.py +1 -1
  439. nat/tool/github_tools.py +1 -1
  440. nat/tool/memory_tools/add_memory_tool.py +1 -1
  441. nat/tool/memory_tools/delete_memory_tool.py +1 -1
  442. nat/tool/memory_tools/get_memory_tool.py +1 -1
  443. nat/tool/nvidia_rag.py +2 -2
  444. nat/tool/register.py +1 -1
  445. nat/tool/retriever.py +1 -1
  446. nat/tool/server_tools.py +1 -1
  447. nat/utils/__init__.py +8 -5
  448. nat/utils/callable_utils.py +1 -1
  449. nat/utils/data_models/schema_validator.py +1 -1
  450. nat/utils/debugging_utils.py +1 -1
  451. nat/utils/decorators.py +1 -1
  452. nat/utils/dump_distro_mapping.py +1 -1
  453. nat/utils/exception_handlers/automatic_retries.py +3 -3
  454. nat/utils/exception_handlers/schemas.py +1 -1
  455. nat/utils/io/model_processing.py +1 -1
  456. nat/utils/io/supress_logs.py +33 -0
  457. nat/utils/io/yaml_tools.py +1 -1
  458. nat/utils/log_levels.py +1 -1
  459. nat/utils/log_utils.py +13 -1
  460. nat/utils/metadata_utils.py +1 -1
  461. nat/utils/optional_imports.py +1 -1
  462. nat/utils/producer_consumer_queue.py +1 -1
  463. nat/utils/reactive/base/observable_base.py +1 -1
  464. nat/utils/reactive/base/observer_base.py +1 -1
  465. nat/utils/reactive/base/subject_base.py +1 -1
  466. nat/utils/reactive/observable.py +1 -1
  467. nat/utils/reactive/observer.py +1 -1
  468. nat/utils/reactive/subject.py +1 -1
  469. nat/utils/reactive/subscription.py +1 -1
  470. nat/utils/responses_api.py +1 -1
  471. nat/utils/settings/global_settings.py +1 -1
  472. nat/utils/string_utils.py +1 -1
  473. nat/utils/type_converter.py +18 -5
  474. nat/utils/type_utils.py +1 -1
  475. nat/utils/url_utils.py +1 -1
  476. {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/METADATA +39 -14
  477. nvidia_nat-1.4.0a20260113.dist-info/RECORD +547 -0
  478. nvidia_nat-1.4.0a20260113.dist-info/entry_points.txt +38 -0
  479. nat/cli/commands/mcp/mcp.py +0 -986
  480. nat/front_ends/mcp/introspection_token_verifier.py +0 -73
  481. nat/front_ends/mcp/mcp_front_end_config.py +0 -109
  482. nat/front_ends/mcp/mcp_front_end_plugin.py +0 -155
  483. nat/front_ends/mcp/mcp_front_end_plugin_worker.py +0 -388
  484. nat/front_ends/mcp/memory_profiler.py +0 -320
  485. nat/front_ends/mcp/register.py +0 -27
  486. nat/front_ends/mcp/tool_converter.py +0 -321
  487. nvidia_nat-1.4.0a20251120.dist-info/RECORD +0 -488
  488. nvidia_nat-1.4.0a20251120.dist-info/entry_points.txt +0 -23
  489. {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/WHEEL +0 -0
  490. {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
  491. {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/licenses/LICENSE.md +0 -0
  492. {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -39,6 +39,7 @@ from pydantic import BaseModel
39
39
  from pydantic import Field
40
40
  from starlette.websockets import WebSocket
41
41
 
42
+ from nat.builder.context import Context
42
43
  from nat.builder.eval_builder import WorkflowEvalBuilder
43
44
  from nat.builder.evaluator import EvaluatorInfo
44
45
  from nat.builder.function import Function
@@ -54,6 +55,7 @@ from nat.eval.config import EvaluationRunOutput
54
55
  from nat.eval.evaluate import EvaluationRun
55
56
  from nat.eval.evaluate import EvaluationRunConfig
56
57
  from nat.eval.evaluator.evaluator_model import EvalInput
58
+ from nat.front_ends.fastapi.async_job import run_generation
57
59
  from nat.front_ends.fastapi.auth_flow_handlers.http_flow_handler import HTTPAuthenticationFlowHandler
58
60
  from nat.front_ends.fastapi.auth_flow_handlers.websocket_flow_handler import FlowState
59
61
  from nat.front_ends.fastapi.auth_flow_handlers.websocket_flow_handler import WebSocketAuthenticationFlowHandler
@@ -74,6 +76,7 @@ from nat.front_ends.fastapi.utils import get_config_file_path
74
76
  from nat.object_store.models import ObjectStoreItem
75
77
  from nat.runtime.loader import load_workflow
76
78
  from nat.runtime.session import SessionManager
79
+ from nat.utils.log_utils import setup_logging
77
80
 
78
81
  logger = logging.getLogger(__name__)
79
82
 
@@ -105,6 +108,9 @@ class FastApiFrontEndPluginWorkerBase(ABC):
105
108
  self._scheduler_address = os.environ.get("NAT_DASK_SCHEDULER_ADDRESS")
106
109
  self._db_url = os.environ.get("NAT_JOB_STORE_DB_URL")
107
110
  self._config_file_path = get_config_file_path()
111
+ self._use_dask_threads = os.environ.get("NAT_USE_DASK_THREADS", "0") == "1"
112
+ self._log_level = int(os.environ.get("NAT_FASTAPI_LOG_LEVEL", logging.INFO))
113
+ setup_logging(self._log_level)
108
114
 
109
115
  if self._scheduler_address is not None:
110
116
  if not _DASK_AVAILABLE:
@@ -232,6 +238,9 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
232
238
  self._outstanding_flows: dict[str, FlowState] = {}
233
239
  self._outstanding_flows_lock = asyncio.Lock()
234
240
 
241
+ # Track session managers for each route
242
+ self._session_managers: list[SessionManager] = []
243
+
235
244
  # Evaluator storage for single-item evaluation
236
245
  self._evaluators: dict[str, EvaluatorInfo] = {}
237
246
  self._eval_builder: WorkflowEvalBuilder | None = None
@@ -268,6 +277,27 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
268
277
  # Don't fail startup, just log the error
269
278
  self._evaluators = {}
270
279
 
280
+ async def _create_session_manager(self,
281
+ builder: WorkflowBuilder,
282
+ entry_function: str | None = None) -> SessionManager:
283
+ """Create and register a SessionManager."""
284
+
285
+ sm = await SessionManager.create(config=self._config, shared_builder=builder, entry_function=entry_function)
286
+ self._session_managers.append(sm)
287
+
288
+ return sm
289
+
290
+ async def cleanup_session_managers(self):
291
+ """Clean up all SessionManager resources on shutdown."""
292
+ for sm in self._session_managers:
293
+ try:
294
+ await sm.shutdown()
295
+ except Exception as e:
296
+ logger.error(f"Error cleaning up SessionManager: {e}")
297
+
298
+ self._session_managers.clear()
299
+ logger.info("All SessionManagers cleaned up")
300
+
271
301
  async def cleanup_evaluators(self):
272
302
  """Clean up evaluator resources on shutdown."""
273
303
  if self._eval_builder:
@@ -293,6 +323,9 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
293
323
  # TODO: we need config control over this as it's not always needed
294
324
  await self.initialize_evaluators(self._config)
295
325
 
326
+ # Ensure session manager resources are cleaned up when the app shuts down
327
+ app.add_event_handler("shutdown", self.cleanup_session_managers)
328
+
296
329
  # Ensure evaluator resources are cleaned up when the app shuts down
297
330
  app.add_event_handler("shutdown", self.cleanup_evaluators)
298
331
 
@@ -300,18 +333,20 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
300
333
 
301
334
  async def add_routes(self, app: FastAPI, builder: WorkflowBuilder):
302
335
 
303
- await self.add_default_route(app, SessionManager(await builder.build()))
304
- await self.add_evaluate_route(app, SessionManager(await builder.build()))
305
- await self.add_evaluate_item_route(app, SessionManager(await builder.build()))
336
+ await self.add_default_route(app, await self._create_session_manager(builder))
337
+ await self.add_evaluate_route(app, await self._create_session_manager(builder))
338
+ await self.add_evaluate_item_route(app, await self._create_session_manager(builder))
339
+
306
340
  await self.add_static_files_route(app, builder)
307
341
  await self.add_authorization_route(app)
308
342
  await self.add_mcp_client_tool_list_route(app, builder)
343
+ await self.add_monitor_route(app)
309
344
 
310
345
  for ep in self.front_end_config.endpoints:
311
346
 
312
- entry_workflow = await builder.build(entry_function=ep.function_name)
313
-
314
- await self.add_route(app, endpoint=ep, session_manager=SessionManager(entry_workflow))
347
+ await self.add_route(app,
348
+ endpoint=ep,
349
+ session_manager=await self._create_session_manager(builder, ep.function_name))
315
350
 
316
351
  async def add_default_route(self, app: FastAPI, session_manager: SessionManager):
317
352
 
@@ -662,11 +697,15 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
662
697
  endpoint: FastApiFrontEndConfig.EndpointBase,
663
698
  session_manager: SessionManager):
664
699
 
665
- workflow = session_manager.workflow
700
+ GenerateBodyType = session_manager.get_workflow_input_schema()
701
+ GenerateStreamResponseType = session_manager.get_workflow_streaming_output_schema()
702
+ GenerateSingleResponseType = session_manager.get_workflow_single_output_schema()
666
703
 
667
- GenerateBodyType = workflow.input_schema
668
- GenerateStreamResponseType = workflow.streaming_output_schema
669
- GenerateSingleResponseType = workflow.single_output_schema
704
+ def add_context_headers_to_response(response: Response) -> None:
705
+ """Add context-based headers to response if available."""
706
+ observability_trace_id = Context.get().observability_trace_id
707
+ if observability_trace_id:
708
+ response.headers["Observability-Trace-Id"] = observability_trace_id
670
709
 
671
710
  # Skip async generation for custom routes (those with function_name)
672
711
  if self._dask_available and not hasattr(endpoint, 'function_name'):
@@ -715,10 +754,13 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
715
754
 
716
755
  response.headers["Content-Type"] = "application/json"
717
756
 
718
- async with session_manager.session(http_connection=request,
719
- user_authentication_callback=self._http_flow_handler.authenticate):
757
+ async with session_manager.session(
758
+ http_connection=request,
759
+ user_authentication_callback=self._http_flow_handler.authenticate) as session:
720
760
 
721
- return await generate_single_response(None, session_manager, result_type=result_type)
761
+ result = await generate_single_response(None, session, result_type=result_type)
762
+ add_context_headers_to_response(response)
763
+ return result
722
764
 
723
765
  return get_single
724
766
 
@@ -726,13 +768,14 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
726
768
 
727
769
  async def get_stream(request: Request):
728
770
 
729
- async with session_manager.session(http_connection=request,
730
- user_authentication_callback=self._http_flow_handler.authenticate):
771
+ async with session_manager.session(
772
+ http_connection=request,
773
+ user_authentication_callback=self._http_flow_handler.authenticate) as session:
731
774
 
732
775
  return StreamingResponse(headers={"Content-Type": "text/event-stream; charset=utf-8"},
733
776
  content=generate_streaming_response_as_str(
734
777
  None,
735
- session_manager=session_manager,
778
+ session=session,
736
779
  streaming=streaming,
737
780
  step_adaptor=self.get_step_adaptor(),
738
781
  result_type=result_type,
@@ -744,14 +787,14 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
744
787
 
745
788
  async def get_stream(filter_steps: str | None = None):
746
789
 
747
- return StreamingResponse(headers={"Content-Type": "text/event-stream; charset=utf-8"},
748
- content=generate_streaming_response_full_as_str(
749
- None,
750
- session_manager=session_manager,
751
- streaming=streaming,
752
- result_type=result_type,
753
- output_type=output_type,
754
- filter_steps=filter_steps))
790
+ async with session_manager.session(http_connection=None) as session:
791
+ return StreamingResponse(headers={"Content-Type": "text/event-stream; charset=utf-8"},
792
+ content=generate_streaming_response_full_as_str(None,
793
+ session=session,
794
+ streaming=streaming,
795
+ result_type=result_type,
796
+ output_type=output_type,
797
+ filter_steps=filter_steps))
755
798
 
756
799
  return get_stream
757
800
 
@@ -761,10 +804,13 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
761
804
 
762
805
  response.headers["Content-Type"] = "application/json"
763
806
 
764
- async with session_manager.session(http_connection=request,
765
- user_authentication_callback=self._http_flow_handler.authenticate):
807
+ async with session_manager.session(
808
+ http_connection=request,
809
+ user_authentication_callback=self._http_flow_handler.authenticate) as session:
766
810
 
767
- return await generate_single_response(payload, session_manager, result_type=result_type)
811
+ result = await generate_single_response(payload, session, result_type=result_type)
812
+ add_context_headers_to_response(response)
813
+ return result
768
814
 
769
815
  return post_single
770
816
 
@@ -775,13 +821,14 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
775
821
 
776
822
  async def post_stream(request: Request, payload: request_type):
777
823
 
778
- async with session_manager.session(http_connection=request,
779
- user_authentication_callback=self._http_flow_handler.authenticate):
824
+ async with session_manager.session(
825
+ http_connection=request,
826
+ user_authentication_callback=self._http_flow_handler.authenticate) as session:
780
827
 
781
828
  return StreamingResponse(headers={"Content-Type": "text/event-stream; charset=utf-8"},
782
829
  content=generate_streaming_response_as_str(
783
830
  payload,
784
- session_manager=session_manager,
831
+ session=session,
785
832
  streaming=streaming,
786
833
  step_adaptor=self.get_step_adaptor(),
787
834
  result_type=result_type,
@@ -799,14 +846,14 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
799
846
 
800
847
  async def post_stream(payload: request_type, filter_steps: str | None = None):
801
848
 
802
- return StreamingResponse(headers={"Content-Type": "text/event-stream; charset=utf-8"},
803
- content=generate_streaming_response_full_as_str(
804
- payload,
805
- session_manager=session_manager,
806
- streaming=streaming,
807
- result_type=result_type,
808
- output_type=output_type,
809
- filter_steps=filter_steps))
849
+ async with session_manager.session(http_connection=None) as session:
850
+ return StreamingResponse(headers={"Content-Type": "text/event-stream; charset=utf-8"},
851
+ content=generate_streaming_response_full_as_str(payload,
852
+ session=session,
853
+ streaming=streaming,
854
+ result_type=result_type,
855
+ output_type=output_type,
856
+ filter_steps=filter_steps))
810
857
 
811
858
  return post_stream
812
859
 
@@ -822,20 +869,22 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
822
869
  response.headers["Content-Type"] = "application/json"
823
870
  stream_requested = getattr(payload, 'stream', False)
824
871
 
825
- async with session_manager.session(http_connection=request):
872
+ async with session_manager.session(http_connection=request) as session:
826
873
  if stream_requested:
827
874
 
828
875
  # Return streaming response
829
876
  return StreamingResponse(headers={"Content-Type": "text/event-stream; charset=utf-8"},
830
877
  content=generate_streaming_response_as_str(
831
878
  payload,
832
- session_manager=session_manager,
879
+ session=session,
833
880
  streaming=True,
834
881
  step_adaptor=self.get_step_adaptor(),
835
882
  result_type=ChatResponseChunk,
836
883
  output_type=ChatResponseChunk))
837
884
 
838
- return await generate_single_response(payload, session_manager, result_type=ChatResponse)
885
+ result = await generate_single_response(payload, session, result_type=ChatResponse)
886
+ add_context_headers_to_response(response)
887
+ return result
839
888
 
840
889
  return post_openai_api_compatible
841
890
 
@@ -856,23 +905,6 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
856
905
  updated_at=job.updated_at,
857
906
  expires_at=self._job_store.get_expires_at(job))
858
907
 
859
- async def run_generation(scheduler_address: str,
860
- db_url: str,
861
- config_file_path: str,
862
- job_id: str,
863
- payload: typing.Any):
864
- """Background task to run the workflow."""
865
- job_store = JobStore(scheduler_address=scheduler_address, db_url=db_url)
866
- try:
867
- async with load_workflow(config_file_path) as local_session_manager:
868
- result = await generate_single_response(
869
- payload, local_session_manager, result_type=local_session_manager.workflow.single_output_schema)
870
-
871
- await job_store.update_status(job_id, JobStatus.SUCCESS, output=result)
872
- except Exception as e:
873
- logger.exception("Error in async job %s", job_id)
874
- await job_store.update_status(job_id, JobStatus.FAILURE, error=str(e))
875
-
876
908
  def post_async_generation(request_type: type):
877
909
 
878
910
  async def start_async_generation(
@@ -895,6 +927,8 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
895
927
  job_fn=run_generation,
896
928
  sync_timeout=request.sync_timeout,
897
929
  job_args=[
930
+ not self._use_dask_threads,
931
+ self._log_level,
898
932
  self._scheduler_address,
899
933
  self._db_url,
900
934
  self._config_file_path,
@@ -1228,7 +1262,7 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
1228
1262
  if configured_group.config.type != "mcp_client":
1229
1263
  continue
1230
1264
 
1231
- from nat.plugins.mcp.client_config import MCPClientConfig
1265
+ from nat.plugins.mcp.client.client_config import MCPClientConfig
1232
1266
 
1233
1267
  config = configured_group.config
1234
1268
  assert isinstance(config, MCPClientConfig)
@@ -1377,6 +1411,107 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
1377
1411
  }
1378
1412
  })
1379
1413
 
1414
+ async def add_monitor_route(self, app: FastAPI):
1415
+ """Add the per-user monitoring endpoint to the FastAPI app.
1416
+
1417
+ Security Warning:
1418
+ This endpoint exposes per-user identifiers and usage metrics. It should be
1419
+ protected by deploying behind an internal network, a reverse proxy with
1420
+ authentication, or similar access controls to prevent exposure to untrusted callers.
1421
+ """
1422
+ # Check if monitoring is enabled in config
1423
+ if not self._config.general.enable_per_user_monitoring:
1424
+ logger.debug("Per-user monitoring disabled, skipping /monitor/users endpoint")
1425
+ return
1426
+
1427
+ from nat.runtime.metrics import PerUserMetricsCollector
1428
+ from nat.runtime.metrics import PerUserMonitorResponse
1429
+ from nat.runtime.metrics import PerUserResourceUsage
1430
+
1431
+ async def get_per_user_metrics(user_id: str | None = None) -> PerUserMonitorResponse:
1432
+ """
1433
+ Get resource usage metrics for per-user workflows.
1434
+
1435
+ Args:
1436
+ user_id: Optional user ID to filter metrics for a specific user
1437
+
1438
+ Returns:
1439
+ PerUserMonitorResponse with metrics for all or specified users
1440
+ """
1441
+ # Collect metrics from all session managers that have per-user workflows
1442
+ all_users: list[PerUserResourceUsage] = []
1443
+
1444
+ for session_manager in self._session_managers:
1445
+ if not session_manager.is_workflow_per_user:
1446
+ continue
1447
+
1448
+ collector = PerUserMetricsCollector(session_manager)
1449
+
1450
+ if user_id is not None:
1451
+ # Filter for specific user
1452
+ user_metrics = await collector.collect_user_metrics(user_id)
1453
+ if user_metrics:
1454
+ all_users.append(user_metrics)
1455
+ else:
1456
+ # Get all users
1457
+ response = await collector.collect_all_metrics()
1458
+ all_users.extend(response.users)
1459
+
1460
+ from datetime import datetime
1461
+ return PerUserMonitorResponse(
1462
+ timestamp=datetime.now(),
1463
+ total_active_users=len(all_users),
1464
+ users=all_users,
1465
+ )
1466
+
1467
+ # Register the monitoring endpoint
1468
+ app.add_api_route(path="/monitor/users",
1469
+ endpoint=get_per_user_metrics,
1470
+ methods=["GET"],
1471
+ response_model=PerUserMonitorResponse,
1472
+ description="Get resource usage metrics for per-user workflows",
1473
+ tags=["Monitoring"],
1474
+ responses={
1475
+ 200: {
1476
+ "description": "Successfully retrieved per-user metrics",
1477
+ "content": {
1478
+ "application/json": {
1479
+ "example": {
1480
+ "timestamp":
1481
+ "2025-12-16T10:30:00Z",
1482
+ "total_active_users":
1483
+ 2,
1484
+ "users": [{
1485
+ "user_id": "alice",
1486
+ "session": {
1487
+ "created_at": "2025-12-16T09:00:00Z",
1488
+ "last_activity": "2025-12-16T10:29:55Z",
1489
+ "ref_count": 1,
1490
+ "is_active": True
1491
+ },
1492
+ "requests": {
1493
+ "total_requests": 42,
1494
+ "active_requests": 1,
1495
+ "avg_latency_ms": 1250.5,
1496
+ "error_count": 2
1497
+ },
1498
+ "memory": {
1499
+ "per_user_functions_count": 2,
1500
+ "per_user_function_groups_count": 1,
1501
+ "exit_stack_size": 3
1502
+ }
1503
+ }]
1504
+ }
1505
+ }
1506
+ }
1507
+ },
1508
+ 500: {
1509
+ "description": "Internal Server Error"
1510
+ }
1511
+ })
1512
+
1513
+ logger.info("Added per-user monitoring endpoint at /monitor/users")
1514
+
1380
1515
  async def _add_flow(self, state: str, flow_state: FlowState):
1381
1516
  async with self._outstanding_flows_lock:
1382
1517
  self._outstanding_flows[state] = flow_state
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -32,6 +32,7 @@ async def pull_intermediate(_q, adapter):
32
32
  intermediate_done = asyncio.Event()
33
33
  context = Context.get()
34
34
  loop = asyncio.get_running_loop()
35
+ trace_id_emitted = False
35
36
 
36
37
  async def set_intermediate_done():
37
38
  intermediate_done.set()
@@ -43,6 +44,16 @@ async def pull_intermediate(_q, adapter):
43
44
  If adapter is None, convert the raw IntermediateStep into the complete
44
45
  ResponseIntermediateStep and place it into the queue.
45
46
  """
47
+ nonlocal trace_id_emitted
48
+
49
+ # Check if trace ID is now available and emit it once
50
+ if not trace_id_emitted:
51
+ observability_trace_id = context.observability_trace_id
52
+ if observability_trace_id:
53
+ from nat.data_models.api_server import ResponseObservabilityTrace
54
+ loop.create_task(_q.put(ResponseObservabilityTrace(observability_trace_id=observability_trace_id)))
55
+ trace_id_emitted = True
56
+
46
57
  if adapter is None:
47
58
  adapted = ResponseIntermediateStep(id=item.UUID,
48
59
  type=item.event_type,
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -208,13 +208,14 @@ class JobStore(DaskClientMixin):
208
208
  AsyncSession
209
209
  An active SQLAlchemy async session with an open transaction.
210
210
  """
211
- async with self._session() as session:
212
- async with session.begin():
213
- yield session
214
-
215
- # Removes the current task key from the session registry, preventing
216
- # potential memory leaks
217
- await self._session.remove()
211
+ try:
212
+ async with self._session() as session:
213
+ async with session.begin():
214
+ yield session
215
+ finally:
216
+ # Removes the current task key from the session registry, preventing
217
+ # potential memory leaks
218
+ await self._session.remove()
218
219
 
219
220
  def ensure_job_id(self, job_id: str | None) -> str:
220
221
  """
@@ -502,13 +503,14 @@ class JobStore(DaskClientMixin):
502
503
 
503
504
  return updated_at + timedelta(seconds=job.expiry_seconds)
504
505
 
505
- async def cleanup_expired_jobs(self):
506
+ async def cleanup_expired_jobs(self) -> int:
506
507
  """
507
508
  Cleanup expired jobs, keeping the most recent one.
508
509
 
509
510
  Updated_at is used instead of created_at to determine the most recent job. This is because jobs may not be
510
511
  processed in the order they are created.
511
512
  """
513
+ logger.info("Starting cleanup of expired jobs")
512
514
  now = datetime.now(UTC)
513
515
 
514
516
  stmt = select(JobInfo).where(
@@ -536,9 +538,11 @@ class JobStore(DaskClientMixin):
536
538
  elif os.path.isdir(job.output_path):
537
539
  shutil.rmtree(job.output_path)
538
540
 
539
- if len(expired_ids) > 0:
541
+ num_expired = len(expired_ids)
542
+ if num_expired > 0:
540
543
  successfully_expired = []
541
544
  for job_id in expired_ids:
545
+ var = None
542
546
  try:
543
547
  var = Variable(name=job_id, client=client)
544
548
  try:
@@ -549,14 +553,22 @@ class JobStore(DaskClientMixin):
549
553
  except TimeoutError:
550
554
  pass
551
555
 
552
- var.delete()
553
556
  successfully_expired.append(job_id)
554
557
  except Exception:
555
558
  logger.exception("Failed to expire %s", job_id)
556
559
 
560
+ finally:
561
+ if var is not None:
562
+ try:
563
+ var.delete()
564
+ except Exception:
565
+ logger.exception("Failed to delete variable %s", job_id)
566
+
557
567
  await session.execute(
558
568
  update(JobInfo).where(JobInfo.job_id.in_(successfully_expired)).values(is_expired=True))
559
569
 
570
+ return num_expired
571
+
560
572
 
561
573
  def get_db_engine(db_url: str | None = None, echo: bool = False, use_async: bool = True) -> "Engine | AsyncEngine":
562
574
  """
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -30,6 +30,7 @@ from nat.data_models.api_server import ChatResponse
30
30
  from nat.data_models.api_server import ChatResponseChunk
31
31
  from nat.data_models.api_server import Error
32
32
  from nat.data_models.api_server import ErrorTypes
33
+ from nat.data_models.api_server import ResponseObservabilityTrace
33
34
  from nat.data_models.api_server import ResponsePayloadOutput
34
35
  from nat.data_models.api_server import ResponseSerializable
35
36
  from nat.data_models.api_server import SystemResponseContent
@@ -38,6 +39,7 @@ from nat.data_models.api_server import UserMessageContentRoleType
38
39
  from nat.data_models.api_server import UserMessages
39
40
  from nat.data_models.api_server import WebSocketMessageStatus
40
41
  from nat.data_models.api_server import WebSocketMessageType
42
+ from nat.data_models.api_server import WebSocketObservabilityTraceMessage
41
43
  from nat.data_models.api_server import WebSocketSystemInteractionMessage
42
44
  from nat.data_models.api_server import WebSocketSystemIntermediateStepMessage
43
45
  from nat.data_models.api_server import WebSocketSystemResponseTokenMessage
@@ -69,14 +71,15 @@ class WebSocketMessageHandler:
69
71
  self._conversation_id: str | None = None
70
72
  self._workflow_schema_type: str | None = None
71
73
  self._user_interaction_response: asyncio.Future[TextContent] | None = None
74
+ self._pending_observability_trace: ResponseObservabilityTrace | None = None
72
75
 
73
76
  self._flow_handler: FlowHandlerBase | None = None
74
77
 
75
78
  self._schema_output_mapping: dict[str, type[BaseModel] | type[None]] = {
76
- WorkflowSchemaType.GENERATE: self._session_manager.workflow.single_output_schema,
79
+ WorkflowSchemaType.GENERATE: self._session_manager.get_workflow_single_output_schema(),
77
80
  WorkflowSchemaType.CHAT: ChatResponse,
78
81
  WorkflowSchemaType.CHAT_STREAM: ChatResponseChunk,
79
- WorkflowSchemaType.GENERATE_STREAM: self._session_manager.workflow.streaming_output_schema,
82
+ WorkflowSchemaType.GENERATE_STREAM: self._session_manager.get_workflow_streaming_output_schema(),
80
83
  }
81
84
 
82
85
  def set_flow_handler(self, flow_handler: FlowHandlerBase) -> None:
@@ -175,6 +178,7 @@ class WebSocketMessageHandler:
175
178
  self._message_parent_id = user_message_as_validated_type.id
176
179
  self._workflow_schema_type = user_message_as_validated_type.schema_type
177
180
  self._conversation_id = user_message_as_validated_type.conversation_id
181
+ self._pending_observability_trace = None
178
182
 
179
183
  message_content: typing.Any = await self._process_websocket_user_message(user_message_as_validated_type)
180
184
 
@@ -250,6 +254,13 @@ class WebSocketMessageHandler:
250
254
  content=content,
251
255
  status=status)
252
256
 
257
+ elif issubclass(message_schema, WebSocketObservabilityTraceMessage):
258
+ message = await self._message_validator.create_observability_trace_message(
259
+ message_id=message_id,
260
+ parent_id=self._message_parent_id,
261
+ conversation_id=self._conversation_id,
262
+ content=content)
263
+
253
264
  elif isinstance(content, Error):
254
265
  raise ValidationError(f"Invalid input data creating websocket message. {data_model.model_dump_json()}")
255
266
 
@@ -327,12 +338,18 @@ class WebSocketMessageHandler:
327
338
  user_authentication_callback=auth_callback) as session:
328
339
 
329
340
  async for value in generate_streaming_response(payload,
330
- session_manager=session,
341
+ session=session,
331
342
  streaming=True,
332
343
  step_adaptor=self._step_adaptor,
333
344
  result_type=result_type,
334
345
  output_type=output_type):
335
346
 
347
+ # Store observability trace to send after completion message
348
+ if isinstance(value, ResponseObservabilityTrace):
349
+ if self._pending_observability_trace is None:
350
+ self._pending_observability_trace = value
351
+ continue
352
+
336
353
  if not isinstance(value, ResponseSerializable):
337
354
  value = ResponsePayloadOutput(payload=value)
338
355
 
@@ -342,3 +359,9 @@ class WebSocketMessageHandler:
342
359
  await self.create_websocket_message(data_model=SystemResponseContent(),
343
360
  message_type=WebSocketMessageType.RESPONSE_MESSAGE,
344
361
  status=WebSocketMessageStatus.COMPLETE)
362
+
363
+ # Send observability trace after completion message
364
+ if self._pending_observability_trace is not None:
365
+ await self.create_websocket_message(data_model=self._pending_observability_trace,
366
+ message_type=WebSocketMessageType.OBSERVABILITY_TRACE_MESSAGE)
367
+ self._pending_observability_trace = None