nvidia-nat 1.4.0a20251120__py3-none-any.whl → 1.4.0a20260113__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (492) hide show
  1. aiq/__init__.py +1 -1
  2. nat/{front_ends/mcp → agent/auto_memory_wrapper}/__init__.py +1 -1
  3. nat/agent/auto_memory_wrapper/agent.py +278 -0
  4. nat/agent/auto_memory_wrapper/register.py +227 -0
  5. nat/agent/auto_memory_wrapper/state.py +30 -0
  6. nat/agent/base.py +1 -1
  7. nat/agent/dual_node.py +1 -1
  8. nat/agent/prompt_optimizer/prompt.py +1 -1
  9. nat/agent/prompt_optimizer/register.py +1 -1
  10. nat/agent/react_agent/agent.py +16 -9
  11. nat/agent/react_agent/output_parser.py +2 -2
  12. nat/agent/react_agent/prompt.py +3 -2
  13. nat/agent/react_agent/register.py +2 -2
  14. nat/agent/react_agent/register_per_user_agent.py +104 -0
  15. nat/agent/reasoning_agent/reasoning_agent.py +1 -1
  16. nat/agent/register.py +3 -1
  17. nat/agent/responses_api_agent/__init__.py +1 -1
  18. nat/agent/responses_api_agent/register.py +1 -1
  19. nat/agent/rewoo_agent/agent.py +9 -4
  20. nat/agent/rewoo_agent/prompt.py +1 -1
  21. nat/agent/rewoo_agent/register.py +1 -1
  22. nat/agent/tool_calling_agent/agent.py +5 -4
  23. nat/agent/tool_calling_agent/register.py +1 -1
  24. nat/authentication/__init__.py +1 -1
  25. nat/authentication/api_key/__init__.py +1 -1
  26. nat/authentication/api_key/api_key_auth_provider.py +1 -1
  27. nat/authentication/api_key/api_key_auth_provider_config.py +22 -7
  28. nat/authentication/api_key/register.py +1 -1
  29. nat/authentication/credential_validator/__init__.py +1 -1
  30. nat/authentication/credential_validator/bearer_token_validator.py +1 -1
  31. nat/authentication/exceptions/__init__.py +1 -1
  32. nat/authentication/exceptions/api_key_exceptions.py +1 -1
  33. nat/authentication/http_basic_auth/http_basic_auth_provider.py +1 -1
  34. nat/authentication/http_basic_auth/register.py +1 -1
  35. nat/authentication/interfaces.py +1 -1
  36. nat/authentication/oauth2/__init__.py +1 -1
  37. nat/authentication/oauth2/oauth2_auth_code_flow_provider.py +1 -1
  38. nat/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +1 -1
  39. nat/authentication/oauth2/oauth2_resource_server_config.py +1 -1
  40. nat/authentication/oauth2/register.py +1 -1
  41. nat/authentication/register.py +1 -1
  42. nat/builder/builder.py +511 -1
  43. nat/builder/child_builder.py +385 -0
  44. nat/builder/component_utils.py +28 -4
  45. nat/builder/context.py +17 -1
  46. nat/builder/embedder.py +1 -1
  47. nat/builder/eval_builder.py +19 -7
  48. nat/builder/evaluator.py +1 -1
  49. nat/builder/framework_enum.py +2 -1
  50. nat/builder/front_end.py +1 -1
  51. nat/builder/function.py +40 -3
  52. nat/builder/function_base.py +1 -1
  53. nat/builder/function_info.py +1 -1
  54. nat/builder/intermediate_step_manager.py +1 -1
  55. nat/builder/llm.py +1 -1
  56. nat/builder/per_user_workflow_builder.py +843 -0
  57. nat/builder/retriever.py +1 -1
  58. nat/builder/sync_builder.py +571 -0
  59. nat/builder/user_interaction_manager.py +1 -1
  60. nat/builder/workflow.py +1 -1
  61. nat/builder/workflow_builder.py +536 -424
  62. nat/cli/__init__.py +1 -1
  63. nat/cli/cli_utils/config_override.py +1 -1
  64. nat/cli/cli_utils/validation.py +32 -1
  65. nat/cli/commands/configure/channel/add.py +1 -1
  66. nat/cli/commands/configure/channel/channel.py +1 -1
  67. nat/cli/commands/configure/channel/remove.py +1 -1
  68. nat/cli/commands/configure/channel/update.py +1 -1
  69. nat/cli/commands/configure/configure.py +1 -1
  70. nat/cli/commands/evaluate.py +87 -13
  71. nat/cli/commands/finetune.py +132 -0
  72. nat/cli/commands/info/__init__.py +1 -1
  73. nat/cli/commands/info/info.py +1 -1
  74. nat/cli/commands/info/list_channels.py +1 -1
  75. nat/cli/commands/info/list_components.py +1 -1
  76. nat/cli/commands/object_store/__init__.py +1 -1
  77. nat/cli/commands/object_store/object_store.py +1 -1
  78. nat/cli/commands/optimize.py +1 -1
  79. nat/cli/commands/{mcp → red_teaming}/__init__.py +1 -1
  80. nat/cli/commands/red_teaming/red_teaming.py +138 -0
  81. nat/cli/commands/red_teaming/red_teaming_utils.py +73 -0
  82. nat/cli/commands/registry/__init__.py +1 -1
  83. nat/cli/commands/registry/publish.py +1 -1
  84. nat/cli/commands/registry/pull.py +1 -1
  85. nat/cli/commands/registry/registry.py +1 -1
  86. nat/cli/commands/registry/remove.py +1 -1
  87. nat/cli/commands/registry/search.py +1 -1
  88. nat/cli/commands/sizing/__init__.py +1 -1
  89. nat/cli/commands/sizing/calc.py +1 -1
  90. nat/cli/commands/sizing/sizing.py +1 -1
  91. nat/cli/commands/start.py +1 -1
  92. nat/cli/commands/uninstall.py +1 -1
  93. nat/cli/commands/validate.py +1 -1
  94. nat/cli/commands/workflow/__init__.py +1 -1
  95. nat/cli/commands/workflow/workflow.py +1 -1
  96. nat/cli/commands/workflow/workflow_commands.py +3 -2
  97. nat/cli/entrypoint.py +15 -37
  98. nat/cli/main.py +2 -2
  99. nat/cli/plugin_loader.py +69 -0
  100. nat/cli/register_workflow.py +183 -5
  101. nat/cli/type_registry.py +169 -3
  102. nat/control_flow/register.py +1 -1
  103. nat/control_flow/router_agent/agent.py +1 -1
  104. nat/control_flow/router_agent/prompt.py +1 -1
  105. nat/control_flow/router_agent/register.py +1 -1
  106. nat/control_flow/sequential_executor.py +28 -7
  107. nat/data_models/__init__.py +1 -1
  108. nat/data_models/agent.py +1 -1
  109. nat/data_models/api_server.py +38 -3
  110. nat/data_models/authentication.py +1 -1
  111. nat/data_models/common.py +1 -1
  112. nat/data_models/component.py +7 -1
  113. nat/data_models/component_ref.py +34 -1
  114. nat/data_models/config.py +62 -1
  115. nat/data_models/dataset_handler.py +15 -2
  116. nat/data_models/discovery_metadata.py +1 -1
  117. nat/data_models/embedder.py +1 -1
  118. nat/data_models/evaluate.py +6 -1
  119. nat/data_models/evaluator.py +1 -1
  120. nat/data_models/finetuning.py +260 -0
  121. nat/data_models/front_end.py +1 -1
  122. nat/data_models/function.py +1 -1
  123. nat/data_models/function_dependencies.py +1 -1
  124. nat/data_models/gated_field_mixin.py +1 -1
  125. nat/data_models/interactive.py +1 -1
  126. nat/data_models/intermediate_step.py +29 -2
  127. nat/data_models/invocation_node.py +1 -1
  128. nat/data_models/llm.py +1 -1
  129. nat/data_models/logging.py +1 -1
  130. nat/data_models/memory.py +1 -1
  131. nat/data_models/middleware.py +3 -1
  132. nat/data_models/object_store.py +1 -1
  133. nat/data_models/openai_mcp.py +1 -1
  134. nat/data_models/optimizable.py +1 -1
  135. nat/data_models/optimizer.py +1 -1
  136. nat/data_models/profiler.py +1 -1
  137. nat/data_models/registry_handler.py +1 -1
  138. nat/data_models/retriever.py +1 -1
  139. nat/data_models/retry_mixin.py +1 -1
  140. nat/data_models/runtime_enum.py +1 -1
  141. nat/data_models/span.py +1 -1
  142. nat/data_models/step_adaptor.py +1 -1
  143. nat/data_models/streaming.py +1 -1
  144. nat/data_models/swe_bench_model.py +1 -1
  145. nat/data_models/telemetry_exporter.py +1 -1
  146. nat/data_models/thinking_mixin.py +1 -1
  147. nat/data_models/ttc_strategy.py +1 -1
  148. nat/embedder/azure_openai_embedder.py +1 -1
  149. nat/embedder/nim_embedder.py +1 -1
  150. nat/embedder/openai_embedder.py +1 -1
  151. nat/embedder/register.py +1 -1
  152. nat/eval/__init__.py +1 -1
  153. nat/eval/config.py +8 -1
  154. nat/eval/dataset_handler/dataset_downloader.py +1 -1
  155. nat/eval/dataset_handler/dataset_filter.py +1 -1
  156. nat/eval/dataset_handler/dataset_handler.py +4 -2
  157. nat/eval/evaluate.py +217 -80
  158. nat/eval/evaluator/__init__.py +1 -1
  159. nat/eval/evaluator/base_evaluator.py +2 -2
  160. nat/eval/evaluator/evaluator_model.py +3 -2
  161. nat/eval/intermediate_step_adapter.py +1 -1
  162. nat/eval/llm_validator.py +336 -0
  163. nat/eval/rag_evaluator/evaluate.py +17 -10
  164. nat/eval/rag_evaluator/register.py +1 -1
  165. nat/eval/red_teaming_evaluator/__init__.py +14 -0
  166. nat/eval/red_teaming_evaluator/data_models.py +66 -0
  167. nat/eval/red_teaming_evaluator/evaluate.py +327 -0
  168. nat/eval/red_teaming_evaluator/filter_conditions.py +75 -0
  169. nat/eval/red_teaming_evaluator/register.py +55 -0
  170. nat/eval/register.py +2 -1
  171. nat/eval/remote_workflow.py +1 -1
  172. nat/eval/runners/__init__.py +1 -1
  173. nat/eval/runners/config.py +1 -1
  174. nat/eval/runners/multi_eval_runner.py +1 -1
  175. nat/eval/runners/red_teaming_runner/__init__.py +24 -0
  176. nat/eval/runners/red_teaming_runner/config.py +282 -0
  177. nat/eval/runners/red_teaming_runner/report_utils.py +707 -0
  178. nat/eval/runners/red_teaming_runner/runner.py +867 -0
  179. nat/eval/runtime_evaluator/__init__.py +1 -1
  180. nat/eval/runtime_evaluator/evaluate.py +1 -1
  181. nat/eval/runtime_evaluator/register.py +1 -1
  182. nat/eval/runtime_event_subscriber.py +1 -1
  183. nat/eval/swe_bench_evaluator/evaluate.py +1 -1
  184. nat/eval/swe_bench_evaluator/register.py +1 -1
  185. nat/eval/trajectory_evaluator/evaluate.py +2 -2
  186. nat/eval/trajectory_evaluator/register.py +1 -1
  187. nat/eval/tunable_rag_evaluator/evaluate.py +5 -5
  188. nat/eval/tunable_rag_evaluator/register.py +1 -1
  189. nat/eval/usage_stats.py +1 -1
  190. nat/eval/utils/eval_trace_ctx.py +1 -1
  191. nat/eval/utils/output_uploader.py +1 -1
  192. nat/eval/utils/tqdm_position_registry.py +1 -1
  193. nat/eval/utils/weave_eval.py +1 -1
  194. nat/experimental/decorators/experimental_warning_decorator.py +1 -1
  195. nat/experimental/test_time_compute/editing/iterative_plan_refinement_editor.py +1 -1
  196. nat/experimental/test_time_compute/editing/llm_as_a_judge_editor.py +1 -1
  197. nat/experimental/test_time_compute/editing/motivation_aware_summarization.py +1 -1
  198. nat/experimental/test_time_compute/functions/execute_score_select_function.py +1 -1
  199. nat/experimental/test_time_compute/functions/multi_llm_judge_function.py +88 -0
  200. nat/experimental/test_time_compute/functions/plan_select_execute_function.py +1 -1
  201. nat/experimental/test_time_compute/functions/ttc_tool_orchestration_function.py +1 -1
  202. nat/experimental/test_time_compute/functions/ttc_tool_wrapper_function.py +1 -1
  203. nat/experimental/test_time_compute/models/editor_config.py +1 -1
  204. nat/experimental/test_time_compute/models/scoring_config.py +1 -1
  205. nat/experimental/test_time_compute/models/search_config.py +20 -2
  206. nat/experimental/test_time_compute/models/selection_config.py +33 -2
  207. nat/experimental/test_time_compute/models/stage_enums.py +1 -1
  208. nat/experimental/test_time_compute/models/strategy_base.py +1 -1
  209. nat/experimental/test_time_compute/models/tool_use_config.py +1 -1
  210. nat/experimental/test_time_compute/models/ttc_item.py +1 -1
  211. nat/experimental/test_time_compute/register.py +4 -1
  212. nat/experimental/test_time_compute/scoring/llm_based_agent_scorer.py +1 -1
  213. nat/experimental/test_time_compute/scoring/llm_based_plan_scorer.py +1 -1
  214. nat/experimental/test_time_compute/scoring/motivation_aware_scorer.py +1 -1
  215. nat/experimental/test_time_compute/search/multi_llm_generation.py +115 -0
  216. nat/experimental/test_time_compute/search/multi_llm_planner.py +1 -1
  217. nat/experimental/test_time_compute/search/multi_query_retrieval_search.py +1 -1
  218. nat/experimental/test_time_compute/search/single_shot_multi_plan_planner.py +1 -1
  219. nat/experimental/test_time_compute/selection/best_of_n_selector.py +1 -1
  220. nat/experimental/test_time_compute/selection/llm_based_agent_output_selector.py +1 -1
  221. nat/experimental/test_time_compute/selection/llm_based_output_merging_selector.py +1 -1
  222. nat/experimental/test_time_compute/selection/llm_based_plan_selector.py +1 -1
  223. nat/experimental/test_time_compute/selection/llm_judge_selection.py +127 -0
  224. nat/experimental/test_time_compute/selection/threshold_selector.py +1 -1
  225. nat/finetuning/__init__.py +24 -0
  226. nat/finetuning/finetuning_runtime.py +143 -0
  227. nat/finetuning/interfaces/__init__.py +24 -0
  228. nat/finetuning/interfaces/finetuning_runner.py +261 -0
  229. nat/finetuning/interfaces/trainer_adapter.py +103 -0
  230. nat/finetuning/interfaces/trajectory_builder.py +115 -0
  231. nat/finetuning/utils/__init__.py +15 -0
  232. nat/finetuning/utils/parsers/__init__.py +15 -0
  233. nat/finetuning/utils/parsers/adk_parser.py +141 -0
  234. nat/finetuning/utils/parsers/base_parser.py +238 -0
  235. nat/finetuning/utils/parsers/common.py +91 -0
  236. nat/finetuning/utils/parsers/langchain_parser.py +267 -0
  237. nat/finetuning/utils/parsers/llama_index_parser.py +218 -0
  238. nat/front_ends/__init__.py +1 -1
  239. nat/front_ends/console/__init__.py +1 -1
  240. nat/front_ends/console/authentication_flow_handler.py +1 -1
  241. nat/front_ends/console/console_front_end_config.py +4 -1
  242. nat/front_ends/console/console_front_end_plugin.py +5 -4
  243. nat/front_ends/console/register.py +1 -1
  244. nat/front_ends/cron/__init__.py +1 -1
  245. nat/front_ends/fastapi/__init__.py +1 -1
  246. nat/front_ends/fastapi/async_job.py +128 -0
  247. nat/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +1 -1
  248. nat/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +13 -9
  249. nat/front_ends/fastapi/dask_client_mixin.py +1 -1
  250. nat/front_ends/fastapi/fastapi_front_end_config.py +1 -1
  251. nat/front_ends/fastapi/fastapi_front_end_controller.py +1 -1
  252. nat/front_ends/fastapi/fastapi_front_end_plugin.py +25 -30
  253. nat/front_ends/fastapi/fastapi_front_end_plugin_worker.py +195 -60
  254. nat/front_ends/fastapi/html_snippets/__init__.py +1 -1
  255. nat/front_ends/fastapi/html_snippets/auth_code_grant_success.py +1 -1
  256. nat/front_ends/fastapi/intermediate_steps_subscriber.py +12 -1
  257. nat/front_ends/fastapi/job_store.py +23 -11
  258. nat/front_ends/fastapi/main.py +1 -1
  259. nat/front_ends/fastapi/message_handler.py +27 -4
  260. nat/front_ends/fastapi/message_validator.py +54 -2
  261. nat/front_ends/fastapi/register.py +1 -1
  262. nat/front_ends/fastapi/response_helpers.py +16 -15
  263. nat/front_ends/fastapi/step_adaptor.py +1 -1
  264. nat/front_ends/fastapi/utils.py +1 -1
  265. nat/front_ends/register.py +1 -2
  266. nat/front_ends/simple_base/__init__.py +1 -1
  267. nat/front_ends/simple_base/simple_front_end_plugin_base.py +6 -4
  268. nat/llm/aws_bedrock_llm.py +1 -1
  269. nat/llm/azure_openai_llm.py +10 -1
  270. nat/llm/dynamo_llm.py +363 -0
  271. nat/llm/huggingface_llm.py +177 -0
  272. nat/llm/litellm_llm.py +1 -1
  273. nat/llm/nim_llm.py +1 -1
  274. nat/llm/openai_llm.py +1 -1
  275. nat/llm/register.py +3 -1
  276. nat/llm/utils/__init__.py +1 -1
  277. nat/llm/utils/env_config_value.py +1 -1
  278. nat/llm/utils/error.py +1 -1
  279. nat/llm/utils/thinking.py +1 -1
  280. nat/memory/__init__.py +1 -1
  281. nat/memory/interfaces.py +1 -1
  282. nat/memory/models.py +1 -1
  283. nat/meta/pypi.md +1 -1
  284. nat/middleware/__init__.py +5 -5
  285. nat/middleware/cache/__init__.py +14 -0
  286. nat/middleware/{cache_middleware.py → cache/cache_middleware.py} +39 -42
  287. nat/middleware/cache/cache_middleware_config.py +44 -0
  288. nat/middleware/cache/register.py +33 -0
  289. nat/middleware/defense/__init__.py +14 -0
  290. nat/middleware/defense/defense_middleware.py +362 -0
  291. nat/middleware/defense/defense_middleware_content_guard.py +455 -0
  292. nat/middleware/defense/defense_middleware_data_models.py +91 -0
  293. nat/middleware/defense/defense_middleware_output_verifier.py +440 -0
  294. nat/middleware/defense/defense_middleware_pii.py +356 -0
  295. nat/middleware/defense/register.py +82 -0
  296. nat/middleware/dynamic/__init__.py +14 -0
  297. nat/middleware/dynamic/dynamic_function_middleware.py +962 -0
  298. nat/middleware/dynamic/dynamic_middleware_config.py +132 -0
  299. nat/middleware/dynamic/register.py +34 -0
  300. nat/middleware/function_middleware.py +236 -52
  301. nat/middleware/logging/__init__.py +14 -0
  302. nat/middleware/logging/logging_middleware.py +67 -0
  303. nat/middleware/logging/logging_middleware_config.py +28 -0
  304. nat/middleware/logging/register.py +33 -0
  305. nat/middleware/middleware.py +142 -28
  306. nat/middleware/red_teaming/__init__.py +14 -0
  307. nat/middleware/red_teaming/red_teaming_middleware.py +344 -0
  308. nat/middleware/red_teaming/red_teaming_middleware_config.py +112 -0
  309. nat/middleware/red_teaming/register.py +47 -0
  310. nat/middleware/register.py +7 -20
  311. nat/middleware/utils/__init__.py +14 -0
  312. nat/middleware/utils/workflow_inventory.py +155 -0
  313. nat/object_store/__init__.py +1 -1
  314. nat/object_store/in_memory_object_store.py +1 -1
  315. nat/object_store/interfaces.py +1 -1
  316. nat/object_store/models.py +1 -1
  317. nat/object_store/register.py +1 -1
  318. nat/observability/__init__.py +1 -1
  319. nat/observability/exporter/__init__.py +1 -1
  320. nat/observability/exporter/base_exporter.py +1 -1
  321. nat/observability/exporter/exporter.py +1 -1
  322. nat/observability/exporter/file_exporter.py +1 -1
  323. nat/observability/exporter/processing_exporter.py +1 -1
  324. nat/observability/exporter/raw_exporter.py +1 -1
  325. nat/observability/exporter/span_exporter.py +7 -1
  326. nat/observability/exporter_manager.py +1 -1
  327. nat/observability/mixin/__init__.py +1 -1
  328. nat/observability/mixin/batch_config_mixin.py +1 -1
  329. nat/observability/mixin/collector_config_mixin.py +1 -1
  330. nat/observability/mixin/file_mixin.py +1 -1
  331. nat/observability/mixin/file_mode.py +1 -1
  332. nat/observability/mixin/redaction_config_mixin.py +1 -1
  333. nat/observability/mixin/resource_conflict_mixin.py +1 -1
  334. nat/observability/mixin/serialize_mixin.py +1 -1
  335. nat/observability/mixin/tagging_config_mixin.py +1 -1
  336. nat/observability/mixin/type_introspection_mixin.py +1 -1
  337. nat/observability/processor/__init__.py +1 -1
  338. nat/observability/processor/batching_processor.py +1 -1
  339. nat/observability/processor/callback_processor.py +1 -1
  340. nat/observability/processor/falsy_batch_filter_processor.py +1 -1
  341. nat/observability/processor/intermediate_step_serializer.py +1 -1
  342. nat/observability/processor/processor.py +1 -1
  343. nat/observability/processor/processor_factory.py +1 -1
  344. nat/observability/processor/redaction/__init__.py +1 -1
  345. nat/observability/processor/redaction/contextual_redaction_processor.py +1 -1
  346. nat/observability/processor/redaction/contextual_span_redaction_processor.py +1 -1
  347. nat/observability/processor/redaction/redaction_processor.py +1 -1
  348. nat/observability/processor/redaction/span_header_redaction_processor.py +1 -1
  349. nat/observability/processor/span_tagging_processor.py +1 -1
  350. nat/observability/register.py +1 -1
  351. nat/observability/utils/__init__.py +1 -1
  352. nat/observability/utils/dict_utils.py +1 -1
  353. nat/observability/utils/time_utils.py +1 -1
  354. nat/profiler/calc/__init__.py +1 -1
  355. nat/profiler/calc/calc_runner.py +3 -3
  356. nat/profiler/calc/calculations.py +1 -1
  357. nat/profiler/calc/data_models.py +1 -1
  358. nat/profiler/calc/plot.py +30 -3
  359. nat/profiler/callbacks/agno_callback_handler.py +1 -1
  360. nat/profiler/callbacks/base_callback_class.py +1 -1
  361. nat/profiler/callbacks/langchain_callback_handler.py +33 -3
  362. nat/profiler/callbacks/llama_index_callback_handler.py +13 -10
  363. nat/profiler/callbacks/semantic_kernel_callback_handler.py +1 -1
  364. nat/profiler/callbacks/token_usage_base_model.py +1 -1
  365. nat/profiler/data_frame_row.py +1 -1
  366. nat/profiler/data_models.py +1 -1
  367. nat/profiler/decorators/framework_wrapper.py +16 -1
  368. nat/profiler/decorators/function_tracking.py +1 -1
  369. nat/profiler/forecasting/config.py +1 -1
  370. nat/profiler/forecasting/model_trainer.py +1 -1
  371. nat/profiler/forecasting/models/__init__.py +1 -1
  372. nat/profiler/forecasting/models/forecasting_base_model.py +1 -1
  373. nat/profiler/forecasting/models/linear_model.py +1 -1
  374. nat/profiler/forecasting/models/random_forest_regressor.py +1 -1
  375. nat/profiler/inference_metrics_model.py +1 -1
  376. nat/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +1 -1
  377. nat/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py +1 -1
  378. nat/profiler/inference_optimization/data_models.py +1 -1
  379. nat/profiler/inference_optimization/experimental/concurrency_spike_analysis.py +1 -1
  380. nat/profiler/inference_optimization/experimental/prefix_span_analysis.py +1 -1
  381. nat/profiler/inference_optimization/llm_metrics.py +1 -1
  382. nat/profiler/inference_optimization/prompt_caching.py +1 -1
  383. nat/profiler/inference_optimization/token_uniqueness.py +1 -1
  384. nat/profiler/inference_optimization/workflow_runtimes.py +1 -1
  385. nat/profiler/intermediate_property_adapter.py +1 -1
  386. nat/profiler/parameter_optimization/optimizable_utils.py +1 -1
  387. nat/profiler/parameter_optimization/optimizer_runtime.py +1 -1
  388. nat/profiler/parameter_optimization/parameter_optimizer.py +1 -1
  389. nat/profiler/parameter_optimization/parameter_selection.py +1 -1
  390. nat/profiler/parameter_optimization/pareto_visualizer.py +1 -1
  391. nat/profiler/parameter_optimization/prompt_optimizer.py +1 -1
  392. nat/profiler/parameter_optimization/update_helpers.py +1 -1
  393. nat/profiler/profile_runner.py +1 -1
  394. nat/profiler/utils.py +1 -1
  395. nat/registry_handlers/local/local_handler.py +1 -1
  396. nat/registry_handlers/local/register_local.py +1 -1
  397. nat/registry_handlers/metadata_factory.py +1 -1
  398. nat/registry_handlers/package_utils.py +1 -1
  399. nat/registry_handlers/pypi/pypi_handler.py +1 -1
  400. nat/registry_handlers/pypi/register_pypi.py +1 -1
  401. nat/registry_handlers/register.py +1 -1
  402. nat/registry_handlers/registry_handler_base.py +1 -1
  403. nat/registry_handlers/rest/register_rest.py +1 -1
  404. nat/registry_handlers/rest/rest_handler.py +1 -1
  405. nat/registry_handlers/schemas/headers.py +1 -1
  406. nat/registry_handlers/schemas/package.py +1 -1
  407. nat/registry_handlers/schemas/publish.py +1 -1
  408. nat/registry_handlers/schemas/pull.py +1 -1
  409. nat/registry_handlers/schemas/remove.py +1 -1
  410. nat/registry_handlers/schemas/search.py +1 -1
  411. nat/registry_handlers/schemas/status.py +1 -1
  412. nat/retriever/interface.py +1 -1
  413. nat/retriever/milvus/__init__.py +1 -1
  414. nat/retriever/milvus/register.py +1 -1
  415. nat/retriever/milvus/retriever.py +1 -1
  416. nat/retriever/models.py +1 -1
  417. nat/retriever/nemo_retriever/__init__.py +1 -1
  418. nat/retriever/nemo_retriever/register.py +1 -1
  419. nat/retriever/nemo_retriever/retriever.py +5 -5
  420. nat/retriever/register.py +1 -1
  421. nat/runtime/__init__.py +1 -1
  422. nat/runtime/loader.py +10 -3
  423. nat/runtime/metrics.py +180 -0
  424. nat/runtime/runner.py +1 -5
  425. nat/runtime/session.py +451 -32
  426. nat/runtime/user_metadata.py +1 -1
  427. nat/settings/global_settings.py +1 -1
  428. nat/tool/chat_completion.py +1 -1
  429. nat/tool/code_execution/README.md +1 -1
  430. nat/tool/code_execution/code_sandbox.py +1 -1
  431. nat/tool/code_execution/local_sandbox/Dockerfile.sandbox +1 -1
  432. nat/tool/code_execution/local_sandbox/__init__.py +1 -1
  433. nat/tool/code_execution/local_sandbox/local_sandbox_server.py +1 -1
  434. nat/tool/code_execution/local_sandbox/start_local_sandbox.sh +1 -1
  435. nat/tool/code_execution/register.py +1 -1
  436. nat/tool/code_execution/utils.py +1 -1
  437. nat/tool/datetime_tools.py +1 -1
  438. nat/tool/document_search.py +1 -1
  439. nat/tool/github_tools.py +1 -1
  440. nat/tool/memory_tools/add_memory_tool.py +1 -1
  441. nat/tool/memory_tools/delete_memory_tool.py +1 -1
  442. nat/tool/memory_tools/get_memory_tool.py +1 -1
  443. nat/tool/nvidia_rag.py +2 -2
  444. nat/tool/register.py +1 -1
  445. nat/tool/retriever.py +1 -1
  446. nat/tool/server_tools.py +1 -1
  447. nat/utils/__init__.py +8 -5
  448. nat/utils/callable_utils.py +1 -1
  449. nat/utils/data_models/schema_validator.py +1 -1
  450. nat/utils/debugging_utils.py +1 -1
  451. nat/utils/decorators.py +1 -1
  452. nat/utils/dump_distro_mapping.py +1 -1
  453. nat/utils/exception_handlers/automatic_retries.py +3 -3
  454. nat/utils/exception_handlers/schemas.py +1 -1
  455. nat/utils/io/model_processing.py +1 -1
  456. nat/utils/io/supress_logs.py +33 -0
  457. nat/utils/io/yaml_tools.py +1 -1
  458. nat/utils/log_levels.py +1 -1
  459. nat/utils/log_utils.py +13 -1
  460. nat/utils/metadata_utils.py +1 -1
  461. nat/utils/optional_imports.py +1 -1
  462. nat/utils/producer_consumer_queue.py +1 -1
  463. nat/utils/reactive/base/observable_base.py +1 -1
  464. nat/utils/reactive/base/observer_base.py +1 -1
  465. nat/utils/reactive/base/subject_base.py +1 -1
  466. nat/utils/reactive/observable.py +1 -1
  467. nat/utils/reactive/observer.py +1 -1
  468. nat/utils/reactive/subject.py +1 -1
  469. nat/utils/reactive/subscription.py +1 -1
  470. nat/utils/responses_api.py +1 -1
  471. nat/utils/settings/global_settings.py +1 -1
  472. nat/utils/string_utils.py +1 -1
  473. nat/utils/type_converter.py +18 -5
  474. nat/utils/type_utils.py +1 -1
  475. nat/utils/url_utils.py +1 -1
  476. {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/METADATA +39 -14
  477. nvidia_nat-1.4.0a20260113.dist-info/RECORD +547 -0
  478. nvidia_nat-1.4.0a20260113.dist-info/entry_points.txt +38 -0
  479. nat/cli/commands/mcp/mcp.py +0 -986
  480. nat/front_ends/mcp/introspection_token_verifier.py +0 -73
  481. nat/front_ends/mcp/mcp_front_end_config.py +0 -109
  482. nat/front_ends/mcp/mcp_front_end_plugin.py +0 -155
  483. nat/front_ends/mcp/mcp_front_end_plugin_worker.py +0 -388
  484. nat/front_ends/mcp/memory_profiler.py +0 -320
  485. nat/front_ends/mcp/register.py +0 -27
  486. nat/front_ends/mcp/tool_converter.py +0 -321
  487. nvidia_nat-1.4.0a20251120.dist-info/RECORD +0 -488
  488. nvidia_nat-1.4.0a20251120.dist-info/entry_points.txt +0 -23
  489. {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/WHEEL +0 -0
  490. {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
  491. {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/licenses/LICENSE.md +0 -0
  492. {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -27,13 +27,16 @@ from nat.data_models.api_server import ChatResponse
27
27
  from nat.data_models.api_server import ChatResponseChunk
28
28
  from nat.data_models.api_server import Error
29
29
  from nat.data_models.api_server import ErrorTypes
30
+ from nat.data_models.api_server import ObservabilityTraceContent
30
31
  from nat.data_models.api_server import ResponseIntermediateStep
32
+ from nat.data_models.api_server import ResponseObservabilityTrace
31
33
  from nat.data_models.api_server import ResponsePayloadOutput
32
34
  from nat.data_models.api_server import SystemIntermediateStepContent
33
35
  from nat.data_models.api_server import SystemResponseContent
34
36
  from nat.data_models.api_server import TextContent
35
37
  from nat.data_models.api_server import WebSocketMessageStatus
36
38
  from nat.data_models.api_server import WebSocketMessageType
39
+ from nat.data_models.api_server import WebSocketObservabilityTraceMessage
37
40
  from nat.data_models.api_server import WebSocketSystemInteractionMessage
38
41
  from nat.data_models.api_server import WebSocketSystemIntermediateStepMessage
39
42
  from nat.data_models.api_server import WebSocketSystemResponseTokenMessage
@@ -67,11 +70,24 @@ class MessageValidator:
67
70
  WebSocketMessageType.INTERMEDIATE_STEP_MESSAGE: WebSocketSystemIntermediateStepMessage,
68
71
  WebSocketMessageType.SYSTEM_INTERACTION_MESSAGE: WebSocketSystemInteractionMessage,
69
72
  WebSocketMessageType.USER_INTERACTION_MESSAGE: WebSocketUserInteractionResponseMessage,
70
- WebSocketMessageType.ERROR_MESSAGE: Error
73
+ WebSocketMessageType.OBSERVABILITY_TRACE_MESSAGE: WebSocketObservabilityTraceMessage,
74
+ WebSocketMessageType.ERROR_MESSAGE: Error,
71
75
  }
72
76
 
73
77
  self._message_parent_id: str = "default_id"
74
78
 
79
+ def _get_observability_trace_id_from_context(self) -> str | None:
80
+ """
81
+ Retrieves observability_trace_id from Context
82
+
83
+ :return: observability_trace_id if available, None otherwise.
84
+ """
85
+ try:
86
+ from nat.builder.context import Context
87
+ return Context.get().observability_trace_id
88
+ except (ImportError, AttributeError, KeyError):
89
+ return None
90
+
75
91
  async def validate_message(self, message: dict[str, Any]) -> BaseModel:
76
92
  """
77
93
  Validates an incoming WebSocket message against its expected schema.
@@ -147,6 +163,9 @@ class MessageValidator:
147
163
  elif (isinstance(data_model, ResponseIntermediateStep)):
148
164
  validated_message_content = SystemIntermediateStepContent(name=data_model.name,
149
165
  payload=data_model.payload)
166
+ elif (isinstance(data_model, ResponseObservabilityTrace)):
167
+ validated_message_content = ObservabilityTraceContent(
168
+ observability_trace_id=data_model.observability_trace_id)
150
169
  elif (isinstance(data_model, HumanPromptBase)):
151
170
  validated_message_content = data_model
152
171
  elif (isinstance(data_model, SystemResponseContent)):
@@ -212,6 +231,9 @@ class MessageValidator:
212
231
  elif (isinstance(data_model, ResponseIntermediateStep)):
213
232
  validated_message_type = WebSocketMessageType.INTERMEDIATE_STEP_MESSAGE
214
233
 
234
+ elif (isinstance(data_model, ResponseObservabilityTrace)):
235
+ validated_message_type = WebSocketMessageType.OBSERVABILITY_TRACE_MESSAGE
236
+
215
237
  elif (isinstance(data_model, HumanPromptBase)):
216
238
  validated_message_type = WebSocketMessageType.SYSTEM_INTERACTION_MESSAGE
217
239
  else:
@@ -349,3 +371,33 @@ class MessageValidator:
349
371
  except Exception as e:
350
372
  logger.exception("Error creating system interaction message: %s", str(e))
351
373
  return None
374
+
375
+ async def create_observability_trace_message(
376
+ self,
377
+ *,
378
+ message_id: str | None = str(uuid.uuid4()),
379
+ parent_id: str = "default",
380
+ conversation_id: str | None = None,
381
+ content: ObservabilityTraceContent,
382
+ timestamp: str = str(datetime.datetime.now(datetime.UTC))
383
+ ) -> WebSocketObservabilityTraceMessage | None:
384
+ """
385
+ Creates an observability trace message.
386
+
387
+ :param message_id: Unique identifier for the message (default: generated UUID).
388
+ :param parent_id: ID of the user message that spawned child messages.
389
+ :param conversation_id: ID of the conversation this message belongs to (default: None).
390
+ :param content: Message content.
391
+ :param timestamp: Timestamp of the message (default: current UTC time).
392
+ :return: A WebSocketObservabilityTraceMessage instance.
393
+ """
394
+ try:
395
+ return WebSocketObservabilityTraceMessage(id=message_id,
396
+ parent_id=parent_id,
397
+ conversation_id=conversation_id,
398
+ content=content,
399
+ timestamp=timestamp)
400
+
401
+ except Exception as e:
402
+ logger.exception("Error creating observability trace message: %s", str(e))
403
+ return None
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -23,20 +23,20 @@ from nat.data_models.api_server import ResponseSerializable
23
23
  from nat.data_models.step_adaptor import StepAdaptorConfig
24
24
  from nat.front_ends.fastapi.intermediate_steps_subscriber import pull_intermediate
25
25
  from nat.front_ends.fastapi.step_adaptor import StepAdaptor
26
- from nat.runtime.session import SessionManager
26
+ from nat.runtime.session import Session
27
27
  from nat.utils.producer_consumer_queue import AsyncIOProducerConsumerQueue
28
28
 
29
29
 
30
30
  async def generate_streaming_response_as_str(payload: typing.Any,
31
31
  *,
32
- session_manager: SessionManager,
32
+ session: Session,
33
33
  streaming: bool,
34
34
  step_adaptor: StepAdaptor = StepAdaptor(StepAdaptorConfig()),
35
35
  result_type: type | None = None,
36
36
  output_type: type | None = None) -> AsyncGenerator[str]:
37
37
 
38
38
  async for item in generate_streaming_response(payload,
39
- session_manager=session_manager,
39
+ session=session,
40
40
  streaming=streaming,
41
41
  step_adaptor=step_adaptor,
42
42
  result_type=result_type,
@@ -51,13 +51,13 @@ async def generate_streaming_response_as_str(payload: typing.Any,
51
51
 
52
52
  async def generate_streaming_response(payload: typing.Any,
53
53
  *,
54
- session_manager: SessionManager,
54
+ session: Session,
55
55
  streaming: bool,
56
56
  step_adaptor: StepAdaptor = StepAdaptor(StepAdaptorConfig()),
57
57
  result_type: type | None = None,
58
58
  output_type: type | None = None) -> AsyncGenerator[ResponseSerializable]:
59
59
 
60
- async with session_manager.run(payload) as runner:
60
+ async with session.run(payload) as runner:
61
61
 
62
62
  q: AsyncIOProducerConsumerQueue[ResponseSerializable] = AsyncIOProducerConsumerQueue()
63
63
 
@@ -65,7 +65,7 @@ async def generate_streaming_response(payload: typing.Any,
65
65
  intermediate_complete = await pull_intermediate(q, step_adaptor)
66
66
 
67
67
  async def pull_result():
68
- if session_manager.workflow.has_streaming_output and streaming:
68
+ if session.workflow.has_streaming_output and streaming:
69
69
  async for chunk in runner.result_stream(to_type=output_type):
70
70
  await q.put(chunk)
71
71
  else:
@@ -107,19 +107,20 @@ async def generate_streaming_response(payload: typing.Any,
107
107
 
108
108
  async def generate_single_response(
109
109
  payload: typing.Any,
110
- session_manager: SessionManager,
110
+ session: Session,
111
111
  result_type: type | None = None,
112
112
  ) -> typing.Any:
113
- if (not session_manager.workflow.has_single_output):
113
+
114
+ if not session.workflow.has_single_output:
114
115
  raise ValueError("Cannot get a single output value for streaming workflows")
115
116
 
116
- async with session_manager.run(payload) as runner:
117
+ async with session.run(payload) as runner:
117
118
  return await runner.result(to_type=result_type)
118
119
 
119
120
 
120
121
  async def generate_streaming_response_full(payload: typing.Any,
121
122
  *,
122
- session_manager: SessionManager,
123
+ session: Session,
123
124
  streaming: bool,
124
125
  result_type: type | None = None,
125
126
  output_type: type | None = None,
@@ -137,14 +138,14 @@ async def generate_streaming_response_full(payload: typing.Any,
137
138
  else:
138
139
  allowed_types = set(filter_steps.split(','))
139
140
 
140
- async with session_manager.run(payload) as runner:
141
+ async with session.run(payload) as runner:
141
142
  q: AsyncIOProducerConsumerQueue[ResponseSerializable] = AsyncIOProducerConsumerQueue()
142
143
 
143
144
  # Start the intermediate stream without step adaptor
144
145
  intermediate_complete = await pull_intermediate(q, None)
145
146
 
146
147
  async def pull_result():
147
- if session_manager.workflow.has_streaming_output and streaming:
148
+ if session.workflow.has_streaming_output and streaming:
148
149
  async for chunk in runner.result_stream(to_type=output_type):
149
150
  await q.put(chunk)
150
151
  else:
@@ -174,7 +175,7 @@ async def generate_streaming_response_full(payload: typing.Any,
174
175
 
175
176
  async def generate_streaming_response_full_as_str(payload: typing.Any,
176
177
  *,
177
- session_manager: SessionManager,
178
+ session: Session,
178
179
  streaming: bool,
179
180
  result_type: type | None = None,
180
181
  output_type: type | None = None,
@@ -183,7 +184,7 @@ async def generate_streaming_response_full_as_str(payload: typing.Any,
183
184
  Similar to generate_streaming_response but converts the response to a string format.
184
185
  """
185
186
  async for item in generate_streaming_response_full(payload,
186
- session_manager=session_manager,
187
+ session=session,
187
188
  streaming=streaming,
188
189
  result_type=result_type,
189
190
  output_type=output_type,
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -18,4 +18,3 @@
18
18
 
19
19
  from .console import register as console_register
20
20
  from .fastapi import register as fastapi_register
21
- from .mcp import register as mcp_register
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -47,9 +47,11 @@ class SimpleFrontEndPluginBase(FrontEndBase[FrontEndConfigT], ABC):
47
47
 
48
48
  click.echo(stream.getvalue())
49
49
 
50
- workflow = await builder.build()
51
- session_manager = SessionManager(workflow)
52
- await self.run_workflow(session_manager)
50
+ session_manager = await SessionManager.create(config=self.full_config, shared_builder=builder)
51
+ try:
52
+ await self.run_workflow(session_manager)
53
+ finally:
54
+ await session_manager.shutdown()
53
55
 
54
56
  @abstractmethod
55
57
  async def run_workflow(self, session_manager: SessionManager):
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -16,6 +16,7 @@
16
16
  from pydantic import AliasChoices
17
17
  from pydantic import ConfigDict
18
18
  from pydantic import Field
19
+ from pydantic import computed_field
19
20
 
20
21
  from nat.builder.builder import Builder
21
22
  from nat.builder.llm import LLMProviderInfo
@@ -59,6 +60,14 @@ class AzureOpenAIModelConfig(
59
60
  description="Top-p for distribution sampling.",
60
61
  space=SearchSpace(high=1.0, low=0.5, step=0.1))
61
62
 
63
+ @computed_field
64
+ @property
65
+ def model_name(self) -> str:
66
+ """
67
+ Returns the model name for compatibility with other parts of the code base which expect a model_name attribute.
68
+ """
69
+ return self.azure_deployment
70
+
62
71
 
63
72
  @register_llm_provider(config_type=AzureOpenAIModelConfig)
64
73
  async def azure_openai_llm(config: AzureOpenAIModelConfig, _builder: Builder):
nat/llm/dynamo_llm.py ADDED
@@ -0,0 +1,363 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """
16
+ Dynamo LLM provider with automatic prefix header injection for KV cache optimization.
17
+
18
+ This module provides a specialized OpenAI-compatible LLM that sends Dynamo prefix headers
19
+ for optimal KV cache management and request routing. The prefix parameters are optimizable
20
+ via the NAT optimizer.
21
+
22
+ The implementation uses httpx event hooks to inject headers at the HTTP transport level,
23
+ making it framework-agnostic (works with LangChain, LlamaIndex, etc.).
24
+
25
+ Dynamo Prefix Parameters
26
+ -------------------------
27
+
28
+ prefix_osl (Output Sequence Length)
29
+ Hint for expected response length:
30
+
31
+ - LOW: decode_cost=1.0, short responses
32
+ - MEDIUM: decode_cost=2.0, typical responses
33
+ - HIGH: decode_cost=3.0, long responses
34
+
35
+ prefix_iat (Inter-Arrival Time)
36
+ Hint for request pacing:
37
+
38
+ - LOW: iat_factor=1.5, rapid bursts -> high worker stickiness
39
+ - MEDIUM: iat_factor=1.0, normal pacing
40
+ - HIGH: iat_factor=0.6, slow requests -> more exploration
41
+
42
+ prefix_total_requests
43
+ Expected requests per conversation:
44
+
45
+ - Higher values increase KV cache affinity and worker stickiness
46
+ - Lower values allow more load balancing
47
+ """
48
+
49
+ import logging
50
+ import uuid
51
+ from collections.abc import Callable
52
+ from collections.abc import Coroutine
53
+ from collections.abc import Iterator
54
+ from contextlib import contextmanager
55
+ from contextvars import ContextVar
56
+ from typing import TYPE_CHECKING
57
+ from typing import Any
58
+ from typing import Literal
59
+
60
+ if TYPE_CHECKING:
61
+ import httpx
62
+
63
+ from pydantic import Field
64
+
65
+ from nat.builder.builder import Builder
66
+ from nat.builder.llm import LLMProviderInfo
67
+ from nat.cli.register_workflow import register_llm_provider
68
+ from nat.data_models.optimizable import OptimizableField
69
+ from nat.data_models.optimizable import SearchSpace
70
+ from nat.llm.openai_llm import OpenAIModelConfig
71
+
72
+ logger = logging.getLogger(__name__)
73
+
74
+ # Define valid prefix hint values
75
+ PrefixLevel = Literal["LOW", "MEDIUM", "HIGH"]
76
+
77
+ # =============================================================================
78
+ # CONTEXT MANAGEMENT FOR DYNAMO PREFIX ID
79
+ # =============================================================================
80
+
81
+
82
+ class DynamoPrefixContext:
83
+ """
84
+ Singleton class for managing Dynamo prefix IDs across LLM calls.
85
+
86
+ This allows evaluation code to set a prefix ID that persists across all LLM
87
+ calls for a single evaluation question (multi-turn conversation).
88
+
89
+ Usage::
90
+
91
+ from nat.llm.dynamo_llm import DynamoPrefixContext
92
+
93
+ # Set prefix ID at the start of each evaluation question
94
+ DynamoPrefixContext.set("eval-q001-abc123")
95
+
96
+ # ... perform LLM calls ...
97
+
98
+ # Clear when done
99
+ DynamoPrefixContext.clear()
100
+
101
+ # Or use as a context manager
102
+ with DynamoPrefixContext.scope("eval-q001-abc123"):
103
+ # ... perform LLM calls ...
104
+ """
105
+
106
+ _current_prefix_id: ContextVar[str | None] = ContextVar('dynamo_prefix_id', default=None)
107
+
108
+ @classmethod
109
+ def set(cls, prefix_id: str) -> None:
110
+ """
111
+ Set the Dynamo prefix ID for the current context.
112
+
113
+ Call this at the start of each evaluation question to ensure all LLM calls
114
+ for that question share the same prefix ID (enabling KV cache reuse).
115
+
116
+ Args:
117
+ prefix_id: The unique prefix ID (e.g., "eval-q001-abc123")
118
+ """
119
+ cls._current_prefix_id.set(prefix_id)
120
+ logger.debug("Set Dynamo prefix ID: %s", prefix_id)
121
+
122
+ @classmethod
123
+ def clear(cls) -> None:
124
+ """Clear the current Dynamo prefix ID context."""
125
+ cls._current_prefix_id.set(None)
126
+ logger.debug("Cleared Dynamo prefix ID")
127
+
128
+ @classmethod
129
+ def get(cls) -> str | None:
130
+ """Get the current Dynamo prefix ID from context, if any."""
131
+ return cls._current_prefix_id.get()
132
+
133
+ @classmethod
134
+ @contextmanager
135
+ def scope(cls, prefix_id: str) -> Iterator[None]:
136
+ """
137
+ Context manager for scoped prefix ID usage.
138
+
139
+ Automatically sets the prefix ID on entry and clears it on exit,
140
+ ensuring proper cleanup even if exceptions occur.
141
+
142
+ Args:
143
+ prefix_id: The unique prefix ID for this scope
144
+
145
+ Yields:
146
+ None
147
+
148
+ Usage:
149
+ with DynamoPrefixContext.scope("eval-q001"):
150
+ # All LLM calls here will use "eval-q001" prefix
151
+ await llm.ainvoke(...)
152
+ """
153
+ cls.set(prefix_id)
154
+ try:
155
+ yield
156
+ finally:
157
+ cls.clear()
158
+
159
+
160
+ # =============================================================================
161
+ # DYNAMO MODEL CONFIGURATION
162
+ # =============================================================================
163
+
164
+
165
+ class DynamoModelConfig(OpenAIModelConfig, name="dynamo"):
166
+ """
167
+ A Dynamo LLM provider with automatic prefix header injection for KV cache optimization.
168
+
169
+ This is a specialized OpenAI-compatible LLM that sends Dynamo prefix headers
170
+ for optimal KV cache management and request routing. Prefix headers are enabled
171
+ by default using the template "nat-dynamo-{uuid}". The prefix routing parameters
172
+ (prefix_total_requests, prefix_osl, prefix_iat) are optimizable via the NAT optimizer.
173
+
174
+ To disable prefix headers, set prefix_template to null/None in your config.
175
+ """
176
+
177
+ # =========================================================================
178
+ # DYNAMO PREFIX PARAMETERS
179
+ # =========================================================================
180
+
181
+ prefix_template: str | None = Field(
182
+ default="nat-dynamo-{uuid}",
183
+ description="Template for prefix ID. The {uuid} placeholder will be replaced with a unique ID. "
184
+ "Prefix headers are sent by default for KV cache optimization. "
185
+ "Set to null/None to disable prefix header injection.",
186
+ )
187
+
188
+ prefix_total_requests: int = OptimizableField(
189
+ default=10,
190
+ ge=1,
191
+ le=50,
192
+ description=("Expected number of requests for this conversation/prefix. "
193
+ "Higher values increase worker stickiness and KV cache locality. "
194
+ "Lower values allow more load balancing across workers."),
195
+ space=SearchSpace(low=1, high=20, step=5))
196
+
197
+ prefix_osl: PrefixLevel = OptimizableField(default="MEDIUM",
198
+ description=("Output Sequence Length hint for the Dynamo router. "
199
+ "LOW=short responses (decode_cost=1.0), "
200
+ "MEDIUM=typical (decode_cost=2.0), "
201
+ "HIGH=long responses (decode_cost=3.0)."),
202
+ space=SearchSpace(values=["LOW", "MEDIUM", "HIGH"]))
203
+
204
+ prefix_iat: PrefixLevel = OptimizableField(default="MEDIUM",
205
+ description=("Inter-Arrival Time hint for the Dynamo router. "
206
+ "LOW=rapid bursts (iat_factor=1.5, high stickiness), "
207
+ "MEDIUM=normal (iat_factor=1.0), "
208
+ "HIGH=slow requests (iat_factor=0.6, more exploration)."),
209
+ space=SearchSpace(values=["LOW", "MEDIUM", "HIGH"]))
210
+
211
+ request_timeout: float = Field(
212
+ default=600.0,
213
+ gt=0.0,
214
+ description="HTTP request timeout in seconds for LLM requests.",
215
+ )
216
+
217
+ # =========================================================================
218
+ # UTILITY METHODS
219
+ # =========================================================================
220
+
221
+ @staticmethod
222
+ def get_dynamo_field_names() -> frozenset[str]:
223
+ """
224
+ Get the set of Dynamo-specific field names for model_dump exclusion.
225
+
226
+ Use this when building config dicts for framework clients to exclude
227
+ Dynamo-specific parameters that should not be passed to the underlying client.
228
+
229
+ Returns:
230
+ A frozenset of Dynamo-specific field names.
231
+
232
+ Example::
233
+
234
+ config_dict = config.model_dump(
235
+ exclude={"type", "thinking", *DynamoModelConfig.get_dynamo_field_names()},
236
+ ...
237
+ )
238
+ """
239
+ return frozenset({
240
+ "prefix_template",
241
+ "prefix_total_requests",
242
+ "prefix_osl",
243
+ "prefix_iat",
244
+ "request_timeout",
245
+ })
246
+
247
+
248
+ # =============================================================================
249
+ # HTTPX EVENT HOOK FOR HEADER INJECTION
250
+ # =============================================================================
251
+
252
+
253
+ def _create_dynamo_request_hook(
254
+ prefix_template: str | None,
255
+ total_requests: int,
256
+ osl: str,
257
+ iat: str,
258
+ ) -> Callable[["httpx.Request"], Coroutine[Any, Any, None]]:
259
+ """
260
+ Create an httpx event hook that injects Dynamo prefix headers into requests.
261
+
262
+ This hook is called before each HTTP request is sent, allowing us to inject
263
+ headers dynamically. The prefix ID is generated ONCE when the hook is created,
264
+ ensuring all requests from the same client share the same prefix ID. This enables
265
+ Dynamo's KV cache optimization across multi-turn conversations.
266
+
267
+ The context variable can override this for scenarios where you need different
268
+ prefix IDs (e.g., per-question in batch evaluation).
269
+
270
+ Args:
271
+ prefix_template: Template string with {uuid} placeholder
272
+ total_requests: Expected number of requests for this prefix
273
+ osl: Output sequence length hint (LOW/MEDIUM/HIGH)
274
+ iat: Inter-arrival time hint (LOW/MEDIUM/HIGH)
275
+
276
+ Returns:
277
+ An async function suitable for use as an httpx event hook.
278
+ """
279
+ # Generate the default prefix ID ONCE when the hook is created
280
+ # This ensures all requests from this client share the same prefix ID
281
+ unique_id = uuid.uuid4().hex[:16]
282
+ if prefix_template:
283
+ default_prefix_id = prefix_template.format(uuid=unique_id)
284
+ else:
285
+ default_prefix_id = f"nat-dynamo-{unique_id}"
286
+
287
+ logger.debug("Created Dynamo request hook with default prefix ID: %s", default_prefix_id)
288
+
289
+ async def on_request(request):
290
+ """Inject Dynamo prefix headers before each request."""
291
+ # Check context variable first (allows per-question override in batch evaluation)
292
+ context_prefix_id = DynamoPrefixContext.get()
293
+
294
+ if context_prefix_id:
295
+ prefix_id = context_prefix_id
296
+ logger.debug("Using context prefix ID: %s", prefix_id)
297
+ else:
298
+ # Use the pre-generated prefix ID (same for all requests from this client)
299
+ prefix_id = default_prefix_id
300
+ logger.debug("Using default prefix ID: %s", prefix_id)
301
+
302
+ # Inject Dynamo headers
303
+ request.headers["x-prefix-id"] = prefix_id
304
+ request.headers["x-prefix-total-requests"] = str(total_requests)
305
+ request.headers["x-prefix-osl"] = osl.upper()
306
+ request.headers["x-prefix-iat"] = iat.upper()
307
+
308
+ logger.debug("Injected Dynamo headers: prefix_id=%s, total_requests=%d, osl=%s, iat=%s",
309
+ prefix_id,
310
+ total_requests,
311
+ osl.upper(),
312
+ iat.upper())
313
+
314
+ return on_request
315
+
316
+
317
+ def create_httpx_client_with_dynamo_hooks(
318
+ prefix_template: str | None,
319
+ total_requests: int,
320
+ osl: str,
321
+ iat: str,
322
+ timeout: float = 600.0,
323
+ ) -> "httpx.AsyncClient":
324
+ """
325
+ Create an httpx.AsyncClient with Dynamo prefix header injection.
326
+
327
+ This client can be passed to the OpenAI SDK to inject headers at the HTTP level,
328
+ making it framework-agnostic.
329
+
330
+ Args:
331
+ prefix_template: Template string with {uuid} placeholder
332
+ total_requests: Expected number of requests for this prefix
333
+ osl: Output sequence length hint (LOW/MEDIUM/HIGH)
334
+ iat: Inter-arrival time hint (LOW/MEDIUM/HIGH)
335
+ timeout: HTTP request timeout in seconds
336
+
337
+ Returns:
338
+ An httpx.AsyncClient configured with Dynamo header injection.
339
+ """
340
+ import httpx
341
+
342
+ request_hook = _create_dynamo_request_hook(prefix_template, total_requests, osl, iat)
343
+
344
+ return httpx.AsyncClient(
345
+ event_hooks={"request": [request_hook]},
346
+ timeout=httpx.Timeout(timeout),
347
+ )
348
+
349
+
350
+ # =============================================================================
351
+ # PROVIDER REGISTRATION
352
+ # =============================================================================
353
+ # Note: Client registrations for each framework (LangChain, LlamaIndex, etc.)
354
+ # are in the respective plugin packages under packages/nvidia_nat_<framework>/
355
+
356
+
357
+ @register_llm_provider(config_type=DynamoModelConfig)
358
+ async def dynamo_llm(config: DynamoModelConfig, _builder: Builder):
359
+ """Register the Dynamo LLM provider."""
360
+ yield LLMProviderInfo(
361
+ config=config,
362
+ description="A Dynamo-optimized model with automatic prefix headers for KV cache management.",
363
+ )