nvidia-nat 1.4.0a20251112__py3-none-any.whl → 1.4.0a20260113__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (492) hide show
  1. aiq/__init__.py +1 -1
  2. nat/{front_ends/mcp → agent/auto_memory_wrapper}/__init__.py +1 -1
  3. nat/agent/auto_memory_wrapper/agent.py +278 -0
  4. nat/agent/auto_memory_wrapper/register.py +227 -0
  5. nat/agent/auto_memory_wrapper/state.py +30 -0
  6. nat/agent/base.py +1 -1
  7. nat/agent/dual_node.py +1 -1
  8. nat/agent/prompt_optimizer/prompt.py +1 -1
  9. nat/agent/prompt_optimizer/register.py +1 -1
  10. nat/agent/react_agent/agent.py +16 -9
  11. nat/agent/react_agent/output_parser.py +2 -2
  12. nat/agent/react_agent/prompt.py +3 -2
  13. nat/agent/react_agent/register.py +2 -2
  14. nat/agent/react_agent/register_per_user_agent.py +104 -0
  15. nat/agent/reasoning_agent/reasoning_agent.py +1 -1
  16. nat/agent/register.py +3 -1
  17. nat/agent/responses_api_agent/__init__.py +1 -1
  18. nat/agent/responses_api_agent/register.py +1 -1
  19. nat/agent/rewoo_agent/agent.py +9 -4
  20. nat/agent/rewoo_agent/prompt.py +1 -1
  21. nat/agent/rewoo_agent/register.py +1 -1
  22. nat/agent/tool_calling_agent/agent.py +5 -4
  23. nat/agent/tool_calling_agent/register.py +1 -1
  24. nat/authentication/__init__.py +1 -1
  25. nat/authentication/api_key/__init__.py +1 -1
  26. nat/authentication/api_key/api_key_auth_provider.py +1 -1
  27. nat/authentication/api_key/api_key_auth_provider_config.py +22 -7
  28. nat/authentication/api_key/register.py +1 -1
  29. nat/authentication/credential_validator/__init__.py +1 -1
  30. nat/authentication/credential_validator/bearer_token_validator.py +1 -1
  31. nat/authentication/exceptions/__init__.py +1 -1
  32. nat/authentication/exceptions/api_key_exceptions.py +1 -1
  33. nat/authentication/http_basic_auth/http_basic_auth_provider.py +1 -1
  34. nat/authentication/http_basic_auth/register.py +1 -1
  35. nat/authentication/interfaces.py +1 -1
  36. nat/authentication/oauth2/__init__.py +1 -1
  37. nat/authentication/oauth2/oauth2_auth_code_flow_provider.py +1 -1
  38. nat/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +1 -1
  39. nat/authentication/oauth2/oauth2_resource_server_config.py +1 -1
  40. nat/authentication/oauth2/register.py +1 -1
  41. nat/authentication/register.py +1 -1
  42. nat/builder/builder.py +563 -1
  43. nat/builder/child_builder.py +385 -0
  44. nat/builder/component_utils.py +34 -4
  45. nat/builder/context.py +34 -1
  46. nat/builder/embedder.py +1 -1
  47. nat/builder/eval_builder.py +19 -7
  48. nat/builder/evaluator.py +1 -1
  49. nat/builder/framework_enum.py +3 -1
  50. nat/builder/front_end.py +1 -1
  51. nat/builder/function.py +113 -5
  52. nat/builder/function_base.py +1 -1
  53. nat/builder/function_info.py +1 -1
  54. nat/builder/intermediate_step_manager.py +1 -1
  55. nat/builder/llm.py +1 -1
  56. nat/builder/per_user_workflow_builder.py +843 -0
  57. nat/builder/retriever.py +1 -1
  58. nat/builder/sync_builder.py +571 -0
  59. nat/builder/user_interaction_manager.py +1 -1
  60. nat/builder/workflow.py +5 -3
  61. nat/builder/workflow_builder.py +619 -378
  62. nat/cli/__init__.py +1 -1
  63. nat/cli/cli_utils/config_override.py +1 -1
  64. nat/cli/cli_utils/validation.py +32 -1
  65. nat/cli/commands/configure/channel/add.py +1 -1
  66. nat/cli/commands/configure/channel/channel.py +1 -1
  67. nat/cli/commands/configure/channel/remove.py +1 -1
  68. nat/cli/commands/configure/channel/update.py +1 -1
  69. nat/cli/commands/configure/configure.py +1 -1
  70. nat/cli/commands/evaluate.py +87 -13
  71. nat/cli/commands/finetune.py +132 -0
  72. nat/cli/commands/info/__init__.py +1 -1
  73. nat/cli/commands/info/info.py +1 -1
  74. nat/cli/commands/info/list_channels.py +1 -1
  75. nat/cli/commands/info/list_components.py +1 -1
  76. nat/cli/commands/object_store/__init__.py +1 -1
  77. nat/cli/commands/object_store/object_store.py +1 -1
  78. nat/cli/commands/optimize.py +1 -1
  79. nat/cli/commands/{mcp → red_teaming}/__init__.py +1 -1
  80. nat/cli/commands/red_teaming/red_teaming.py +138 -0
  81. nat/cli/commands/red_teaming/red_teaming_utils.py +73 -0
  82. nat/cli/commands/registry/__init__.py +1 -1
  83. nat/cli/commands/registry/publish.py +1 -1
  84. nat/cli/commands/registry/pull.py +1 -1
  85. nat/cli/commands/registry/registry.py +1 -1
  86. nat/cli/commands/registry/remove.py +1 -1
  87. nat/cli/commands/registry/search.py +1 -1
  88. nat/cli/commands/sizing/__init__.py +1 -1
  89. nat/cli/commands/sizing/calc.py +1 -1
  90. nat/cli/commands/sizing/sizing.py +1 -1
  91. nat/cli/commands/start.py +1 -1
  92. nat/cli/commands/uninstall.py +1 -1
  93. nat/cli/commands/validate.py +1 -1
  94. nat/cli/commands/workflow/__init__.py +1 -1
  95. nat/cli/commands/workflow/workflow.py +1 -1
  96. nat/cli/commands/workflow/workflow_commands.py +3 -2
  97. nat/cli/entrypoint.py +15 -37
  98. nat/cli/main.py +2 -2
  99. nat/cli/plugin_loader.py +69 -0
  100. nat/cli/register_workflow.py +233 -5
  101. nat/cli/type_registry.py +237 -3
  102. nat/control_flow/register.py +1 -1
  103. nat/control_flow/router_agent/agent.py +1 -1
  104. nat/control_flow/router_agent/prompt.py +1 -1
  105. nat/control_flow/router_agent/register.py +1 -1
  106. nat/control_flow/sequential_executor.py +28 -7
  107. nat/data_models/__init__.py +1 -1
  108. nat/data_models/agent.py +1 -1
  109. nat/data_models/api_server.py +38 -3
  110. nat/data_models/authentication.py +1 -1
  111. nat/data_models/common.py +1 -1
  112. nat/data_models/component.py +9 -1
  113. nat/data_models/component_ref.py +45 -1
  114. nat/data_models/config.py +78 -1
  115. nat/data_models/dataset_handler.py +15 -2
  116. nat/data_models/discovery_metadata.py +1 -1
  117. nat/data_models/embedder.py +1 -1
  118. nat/data_models/evaluate.py +6 -1
  119. nat/data_models/evaluator.py +1 -1
  120. nat/data_models/finetuning.py +260 -0
  121. nat/data_models/front_end.py +1 -1
  122. nat/data_models/function.py +15 -2
  123. nat/data_models/function_dependencies.py +1 -1
  124. nat/data_models/gated_field_mixin.py +1 -1
  125. nat/data_models/interactive.py +1 -1
  126. nat/data_models/intermediate_step.py +29 -2
  127. nat/data_models/invocation_node.py +1 -1
  128. nat/data_models/llm.py +1 -1
  129. nat/data_models/logging.py +1 -1
  130. nat/data_models/memory.py +1 -1
  131. nat/data_models/middleware.py +37 -0
  132. nat/data_models/object_store.py +1 -1
  133. nat/data_models/openai_mcp.py +1 -1
  134. nat/data_models/optimizable.py +1 -1
  135. nat/data_models/optimizer.py +1 -1
  136. nat/data_models/profiler.py +1 -1
  137. nat/data_models/registry_handler.py +1 -1
  138. nat/data_models/retriever.py +1 -1
  139. nat/data_models/retry_mixin.py +1 -1
  140. nat/data_models/runtime_enum.py +26 -0
  141. nat/data_models/span.py +1 -1
  142. nat/data_models/step_adaptor.py +1 -1
  143. nat/data_models/streaming.py +1 -1
  144. nat/data_models/swe_bench_model.py +1 -1
  145. nat/data_models/telemetry_exporter.py +1 -1
  146. nat/data_models/thinking_mixin.py +1 -1
  147. nat/data_models/ttc_strategy.py +1 -1
  148. nat/embedder/azure_openai_embedder.py +1 -1
  149. nat/embedder/nim_embedder.py +1 -1
  150. nat/embedder/openai_embedder.py +1 -1
  151. nat/embedder/register.py +1 -1
  152. nat/eval/__init__.py +1 -1
  153. nat/eval/config.py +8 -1
  154. nat/eval/dataset_handler/dataset_downloader.py +1 -1
  155. nat/eval/dataset_handler/dataset_filter.py +1 -1
  156. nat/eval/dataset_handler/dataset_handler.py +4 -2
  157. nat/eval/evaluate.py +226 -81
  158. nat/eval/evaluator/__init__.py +1 -1
  159. nat/eval/evaluator/base_evaluator.py +2 -2
  160. nat/eval/evaluator/evaluator_model.py +3 -2
  161. nat/eval/intermediate_step_adapter.py +1 -1
  162. nat/eval/llm_validator.py +336 -0
  163. nat/eval/rag_evaluator/evaluate.py +17 -10
  164. nat/eval/rag_evaluator/register.py +1 -1
  165. nat/eval/red_teaming_evaluator/__init__.py +14 -0
  166. nat/eval/red_teaming_evaluator/data_models.py +66 -0
  167. nat/eval/red_teaming_evaluator/evaluate.py +327 -0
  168. nat/eval/red_teaming_evaluator/filter_conditions.py +75 -0
  169. nat/eval/red_teaming_evaluator/register.py +55 -0
  170. nat/eval/register.py +2 -1
  171. nat/eval/remote_workflow.py +1 -1
  172. nat/eval/runners/__init__.py +1 -1
  173. nat/eval/runners/config.py +1 -1
  174. nat/eval/runners/multi_eval_runner.py +1 -1
  175. nat/eval/runners/red_teaming_runner/__init__.py +24 -0
  176. nat/eval/runners/red_teaming_runner/config.py +282 -0
  177. nat/eval/runners/red_teaming_runner/report_utils.py +707 -0
  178. nat/eval/runners/red_teaming_runner/runner.py +867 -0
  179. nat/eval/runtime_evaluator/__init__.py +1 -1
  180. nat/eval/runtime_evaluator/evaluate.py +1 -1
  181. nat/eval/runtime_evaluator/register.py +1 -1
  182. nat/eval/runtime_event_subscriber.py +1 -1
  183. nat/eval/swe_bench_evaluator/evaluate.py +1 -1
  184. nat/eval/swe_bench_evaluator/register.py +1 -1
  185. nat/eval/trajectory_evaluator/evaluate.py +2 -2
  186. nat/eval/trajectory_evaluator/register.py +1 -1
  187. nat/eval/tunable_rag_evaluator/evaluate.py +5 -5
  188. nat/eval/tunable_rag_evaluator/register.py +1 -1
  189. nat/eval/usage_stats.py +1 -1
  190. nat/eval/utils/eval_trace_ctx.py +1 -1
  191. nat/eval/utils/output_uploader.py +1 -1
  192. nat/eval/utils/tqdm_position_registry.py +1 -1
  193. nat/eval/utils/weave_eval.py +1 -1
  194. nat/experimental/decorators/experimental_warning_decorator.py +1 -1
  195. nat/experimental/test_time_compute/editing/iterative_plan_refinement_editor.py +1 -1
  196. nat/experimental/test_time_compute/editing/llm_as_a_judge_editor.py +1 -1
  197. nat/experimental/test_time_compute/editing/motivation_aware_summarization.py +1 -1
  198. nat/experimental/test_time_compute/functions/execute_score_select_function.py +1 -1
  199. nat/experimental/test_time_compute/functions/multi_llm_judge_function.py +88 -0
  200. nat/experimental/test_time_compute/functions/plan_select_execute_function.py +1 -1
  201. nat/experimental/test_time_compute/functions/ttc_tool_orchestration_function.py +1 -1
  202. nat/experimental/test_time_compute/functions/ttc_tool_wrapper_function.py +1 -1
  203. nat/experimental/test_time_compute/models/editor_config.py +1 -1
  204. nat/experimental/test_time_compute/models/scoring_config.py +1 -1
  205. nat/experimental/test_time_compute/models/search_config.py +20 -2
  206. nat/experimental/test_time_compute/models/selection_config.py +33 -2
  207. nat/experimental/test_time_compute/models/stage_enums.py +1 -1
  208. nat/experimental/test_time_compute/models/strategy_base.py +1 -1
  209. nat/experimental/test_time_compute/models/tool_use_config.py +1 -1
  210. nat/experimental/test_time_compute/models/ttc_item.py +1 -1
  211. nat/experimental/test_time_compute/register.py +4 -1
  212. nat/experimental/test_time_compute/scoring/llm_based_agent_scorer.py +1 -1
  213. nat/experimental/test_time_compute/scoring/llm_based_plan_scorer.py +1 -1
  214. nat/experimental/test_time_compute/scoring/motivation_aware_scorer.py +1 -1
  215. nat/experimental/test_time_compute/search/multi_llm_generation.py +115 -0
  216. nat/experimental/test_time_compute/search/multi_llm_planner.py +1 -1
  217. nat/experimental/test_time_compute/search/multi_query_retrieval_search.py +1 -1
  218. nat/experimental/test_time_compute/search/single_shot_multi_plan_planner.py +1 -1
  219. nat/experimental/test_time_compute/selection/best_of_n_selector.py +1 -1
  220. nat/experimental/test_time_compute/selection/llm_based_agent_output_selector.py +1 -1
  221. nat/experimental/test_time_compute/selection/llm_based_output_merging_selector.py +1 -1
  222. nat/experimental/test_time_compute/selection/llm_based_plan_selector.py +1 -1
  223. nat/experimental/test_time_compute/selection/llm_judge_selection.py +127 -0
  224. nat/experimental/test_time_compute/selection/threshold_selector.py +1 -1
  225. nat/finetuning/__init__.py +24 -0
  226. nat/finetuning/finetuning_runtime.py +143 -0
  227. nat/finetuning/interfaces/__init__.py +24 -0
  228. nat/finetuning/interfaces/finetuning_runner.py +261 -0
  229. nat/finetuning/interfaces/trainer_adapter.py +103 -0
  230. nat/finetuning/interfaces/trajectory_builder.py +115 -0
  231. nat/finetuning/utils/__init__.py +15 -0
  232. nat/finetuning/utils/parsers/__init__.py +15 -0
  233. nat/finetuning/utils/parsers/adk_parser.py +141 -0
  234. nat/finetuning/utils/parsers/base_parser.py +238 -0
  235. nat/finetuning/utils/parsers/common.py +91 -0
  236. nat/finetuning/utils/parsers/langchain_parser.py +267 -0
  237. nat/finetuning/utils/parsers/llama_index_parser.py +218 -0
  238. nat/front_ends/__init__.py +1 -1
  239. nat/front_ends/console/__init__.py +1 -1
  240. nat/front_ends/console/authentication_flow_handler.py +1 -1
  241. nat/front_ends/console/console_front_end_config.py +4 -1
  242. nat/front_ends/console/console_front_end_plugin.py +5 -4
  243. nat/front_ends/console/register.py +1 -1
  244. nat/front_ends/cron/__init__.py +1 -1
  245. nat/front_ends/fastapi/__init__.py +1 -1
  246. nat/front_ends/fastapi/async_job.py +128 -0
  247. nat/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +1 -1
  248. nat/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +13 -9
  249. nat/front_ends/fastapi/dask_client_mixin.py +1 -1
  250. nat/front_ends/fastapi/fastapi_front_end_config.py +23 -1
  251. nat/front_ends/fastapi/fastapi_front_end_controller.py +1 -1
  252. nat/front_ends/fastapi/fastapi_front_end_plugin.py +25 -30
  253. nat/front_ends/fastapi/fastapi_front_end_plugin_worker.py +318 -59
  254. nat/front_ends/fastapi/html_snippets/__init__.py +1 -1
  255. nat/front_ends/fastapi/html_snippets/auth_code_grant_success.py +1 -1
  256. nat/front_ends/fastapi/intermediate_steps_subscriber.py +12 -1
  257. nat/front_ends/fastapi/job_store.py +23 -11
  258. nat/front_ends/fastapi/main.py +1 -1
  259. nat/front_ends/fastapi/message_handler.py +27 -4
  260. nat/front_ends/fastapi/message_validator.py +54 -2
  261. nat/front_ends/fastapi/register.py +1 -1
  262. nat/front_ends/fastapi/response_helpers.py +16 -15
  263. nat/front_ends/fastapi/step_adaptor.py +1 -1
  264. nat/front_ends/fastapi/utils.py +1 -1
  265. nat/front_ends/register.py +1 -2
  266. nat/front_ends/simple_base/__init__.py +1 -1
  267. nat/front_ends/simple_base/simple_front_end_plugin_base.py +6 -4
  268. nat/llm/aws_bedrock_llm.py +1 -1
  269. nat/llm/azure_openai_llm.py +10 -1
  270. nat/llm/dynamo_llm.py +363 -0
  271. nat/llm/huggingface_llm.py +177 -0
  272. nat/llm/litellm_llm.py +1 -1
  273. nat/llm/nim_llm.py +1 -1
  274. nat/llm/openai_llm.py +1 -1
  275. nat/llm/register.py +3 -1
  276. nat/llm/utils/__init__.py +1 -1
  277. nat/llm/utils/env_config_value.py +1 -1
  278. nat/llm/utils/error.py +1 -1
  279. nat/llm/utils/thinking.py +1 -1
  280. nat/memory/__init__.py +1 -1
  281. nat/memory/interfaces.py +1 -1
  282. nat/memory/models.py +1 -1
  283. nat/meta/pypi.md +1 -1
  284. nat/middleware/__init__.py +35 -0
  285. nat/middleware/cache/__init__.py +14 -0
  286. nat/middleware/cache/cache_middleware.py +253 -0
  287. nat/middleware/cache/cache_middleware_config.py +44 -0
  288. nat/middleware/cache/register.py +33 -0
  289. nat/middleware/defense/__init__.py +14 -0
  290. nat/middleware/defense/defense_middleware.py +362 -0
  291. nat/middleware/defense/defense_middleware_content_guard.py +455 -0
  292. nat/middleware/defense/defense_middleware_data_models.py +91 -0
  293. nat/middleware/defense/defense_middleware_output_verifier.py +440 -0
  294. nat/middleware/defense/defense_middleware_pii.py +356 -0
  295. nat/middleware/defense/register.py +82 -0
  296. nat/middleware/dynamic/__init__.py +14 -0
  297. nat/middleware/dynamic/dynamic_function_middleware.py +962 -0
  298. nat/middleware/dynamic/dynamic_middleware_config.py +132 -0
  299. nat/middleware/dynamic/register.py +34 -0
  300. nat/middleware/function_middleware.py +370 -0
  301. nat/middleware/logging/__init__.py +14 -0
  302. nat/middleware/logging/logging_middleware.py +67 -0
  303. nat/middleware/logging/logging_middleware_config.py +28 -0
  304. nat/middleware/logging/register.py +33 -0
  305. nat/middleware/middleware.py +298 -0
  306. nat/middleware/red_teaming/__init__.py +14 -0
  307. nat/middleware/red_teaming/red_teaming_middleware.py +344 -0
  308. nat/middleware/red_teaming/red_teaming_middleware_config.py +112 -0
  309. nat/middleware/red_teaming/register.py +47 -0
  310. nat/middleware/register.py +22 -0
  311. nat/middleware/utils/__init__.py +14 -0
  312. nat/middleware/utils/workflow_inventory.py +155 -0
  313. nat/object_store/__init__.py +1 -1
  314. nat/object_store/in_memory_object_store.py +1 -1
  315. nat/object_store/interfaces.py +1 -1
  316. nat/object_store/models.py +1 -1
  317. nat/object_store/register.py +1 -1
  318. nat/observability/__init__.py +1 -1
  319. nat/observability/exporter/__init__.py +1 -1
  320. nat/observability/exporter/base_exporter.py +1 -1
  321. nat/observability/exporter/exporter.py +1 -1
  322. nat/observability/exporter/file_exporter.py +1 -1
  323. nat/observability/exporter/processing_exporter.py +1 -1
  324. nat/observability/exporter/raw_exporter.py +1 -1
  325. nat/observability/exporter/span_exporter.py +7 -1
  326. nat/observability/exporter_manager.py +1 -1
  327. nat/observability/mixin/__init__.py +1 -1
  328. nat/observability/mixin/batch_config_mixin.py +1 -1
  329. nat/observability/mixin/collector_config_mixin.py +1 -1
  330. nat/observability/mixin/file_mixin.py +1 -1
  331. nat/observability/mixin/file_mode.py +1 -1
  332. nat/observability/mixin/redaction_config_mixin.py +1 -1
  333. nat/observability/mixin/resource_conflict_mixin.py +1 -1
  334. nat/observability/mixin/serialize_mixin.py +1 -1
  335. nat/observability/mixin/tagging_config_mixin.py +1 -1
  336. nat/observability/mixin/type_introspection_mixin.py +1 -1
  337. nat/observability/processor/__init__.py +1 -1
  338. nat/observability/processor/batching_processor.py +1 -1
  339. nat/observability/processor/callback_processor.py +1 -1
  340. nat/observability/processor/falsy_batch_filter_processor.py +1 -1
  341. nat/observability/processor/intermediate_step_serializer.py +1 -1
  342. nat/observability/processor/processor.py +1 -1
  343. nat/observability/processor/processor_factory.py +1 -1
  344. nat/observability/processor/redaction/__init__.py +1 -1
  345. nat/observability/processor/redaction/contextual_redaction_processor.py +1 -1
  346. nat/observability/processor/redaction/contextual_span_redaction_processor.py +1 -1
  347. nat/observability/processor/redaction/redaction_processor.py +1 -1
  348. nat/observability/processor/redaction/span_header_redaction_processor.py +1 -1
  349. nat/observability/processor/span_tagging_processor.py +1 -1
  350. nat/observability/register.py +1 -1
  351. nat/observability/utils/__init__.py +1 -1
  352. nat/observability/utils/dict_utils.py +1 -1
  353. nat/observability/utils/time_utils.py +1 -1
  354. nat/profiler/calc/__init__.py +1 -1
  355. nat/profiler/calc/calc_runner.py +3 -3
  356. nat/profiler/calc/calculations.py +1 -1
  357. nat/profiler/calc/data_models.py +1 -1
  358. nat/profiler/calc/plot.py +30 -3
  359. nat/profiler/callbacks/agno_callback_handler.py +1 -1
  360. nat/profiler/callbacks/base_callback_class.py +1 -1
  361. nat/profiler/callbacks/langchain_callback_handler.py +33 -3
  362. nat/profiler/callbacks/llama_index_callback_handler.py +13 -10
  363. nat/profiler/callbacks/semantic_kernel_callback_handler.py +1 -1
  364. nat/profiler/callbacks/token_usage_base_model.py +1 -1
  365. nat/profiler/data_frame_row.py +1 -1
  366. nat/profiler/data_models.py +1 -1
  367. nat/profiler/decorators/framework_wrapper.py +32 -1
  368. nat/profiler/decorators/function_tracking.py +1 -1
  369. nat/profiler/forecasting/config.py +1 -1
  370. nat/profiler/forecasting/model_trainer.py +1 -1
  371. nat/profiler/forecasting/models/__init__.py +1 -1
  372. nat/profiler/forecasting/models/forecasting_base_model.py +1 -1
  373. nat/profiler/forecasting/models/linear_model.py +1 -1
  374. nat/profiler/forecasting/models/random_forest_regressor.py +1 -1
  375. nat/profiler/inference_metrics_model.py +1 -1
  376. nat/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +1 -1
  377. nat/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py +1 -1
  378. nat/profiler/inference_optimization/data_models.py +1 -1
  379. nat/profiler/inference_optimization/experimental/concurrency_spike_analysis.py +1 -1
  380. nat/profiler/inference_optimization/experimental/prefix_span_analysis.py +1 -1
  381. nat/profiler/inference_optimization/llm_metrics.py +1 -1
  382. nat/profiler/inference_optimization/prompt_caching.py +1 -1
  383. nat/profiler/inference_optimization/token_uniqueness.py +1 -1
  384. nat/profiler/inference_optimization/workflow_runtimes.py +1 -1
  385. nat/profiler/intermediate_property_adapter.py +1 -1
  386. nat/profiler/parameter_optimization/optimizable_utils.py +1 -1
  387. nat/profiler/parameter_optimization/optimizer_runtime.py +1 -1
  388. nat/profiler/parameter_optimization/parameter_optimizer.py +1 -1
  389. nat/profiler/parameter_optimization/parameter_selection.py +1 -1
  390. nat/profiler/parameter_optimization/pareto_visualizer.py +1 -1
  391. nat/profiler/parameter_optimization/prompt_optimizer.py +1 -1
  392. nat/profiler/parameter_optimization/update_helpers.py +1 -1
  393. nat/profiler/profile_runner.py +1 -1
  394. nat/profiler/utils.py +1 -1
  395. nat/registry_handlers/local/local_handler.py +1 -1
  396. nat/registry_handlers/local/register_local.py +1 -1
  397. nat/registry_handlers/metadata_factory.py +1 -1
  398. nat/registry_handlers/package_utils.py +1 -1
  399. nat/registry_handlers/pypi/pypi_handler.py +1 -1
  400. nat/registry_handlers/pypi/register_pypi.py +1 -1
  401. nat/registry_handlers/register.py +1 -1
  402. nat/registry_handlers/registry_handler_base.py +1 -1
  403. nat/registry_handlers/rest/register_rest.py +1 -1
  404. nat/registry_handlers/rest/rest_handler.py +1 -1
  405. nat/registry_handlers/schemas/headers.py +1 -1
  406. nat/registry_handlers/schemas/package.py +1 -1
  407. nat/registry_handlers/schemas/publish.py +1 -1
  408. nat/registry_handlers/schemas/pull.py +1 -1
  409. nat/registry_handlers/schemas/remove.py +1 -1
  410. nat/registry_handlers/schemas/search.py +1 -1
  411. nat/registry_handlers/schemas/status.py +1 -1
  412. nat/retriever/interface.py +1 -1
  413. nat/retriever/milvus/__init__.py +1 -1
  414. nat/retriever/milvus/register.py +12 -4
  415. nat/retriever/milvus/retriever.py +103 -41
  416. nat/retriever/models.py +1 -1
  417. nat/retriever/nemo_retriever/__init__.py +1 -1
  418. nat/retriever/nemo_retriever/register.py +1 -1
  419. nat/retriever/nemo_retriever/retriever.py +5 -5
  420. nat/retriever/register.py +1 -1
  421. nat/runtime/__init__.py +1 -1
  422. nat/runtime/loader.py +10 -3
  423. nat/runtime/metrics.py +180 -0
  424. nat/runtime/runner.py +13 -6
  425. nat/runtime/session.py +458 -32
  426. nat/runtime/user_metadata.py +1 -1
  427. nat/settings/global_settings.py +1 -1
  428. nat/tool/chat_completion.py +1 -1
  429. nat/tool/code_execution/README.md +1 -1
  430. nat/tool/code_execution/code_sandbox.py +2 -2
  431. nat/tool/code_execution/local_sandbox/Dockerfile.sandbox +1 -1
  432. nat/tool/code_execution/local_sandbox/__init__.py +1 -1
  433. nat/tool/code_execution/local_sandbox/local_sandbox_server.py +1 -1
  434. nat/tool/code_execution/local_sandbox/start_local_sandbox.sh +1 -1
  435. nat/tool/code_execution/register.py +1 -1
  436. nat/tool/code_execution/utils.py +1 -1
  437. nat/tool/datetime_tools.py +1 -1
  438. nat/tool/document_search.py +1 -1
  439. nat/tool/github_tools.py +1 -1
  440. nat/tool/memory_tools/add_memory_tool.py +1 -1
  441. nat/tool/memory_tools/delete_memory_tool.py +1 -1
  442. nat/tool/memory_tools/get_memory_tool.py +1 -1
  443. nat/tool/nvidia_rag.py +2 -2
  444. nat/tool/register.py +1 -1
  445. nat/tool/retriever.py +1 -1
  446. nat/tool/server_tools.py +1 -1
  447. nat/utils/__init__.py +8 -5
  448. nat/utils/callable_utils.py +1 -1
  449. nat/utils/data_models/schema_validator.py +1 -1
  450. nat/utils/debugging_utils.py +1 -1
  451. nat/utils/decorators.py +1 -1
  452. nat/utils/dump_distro_mapping.py +1 -1
  453. nat/utils/exception_handlers/automatic_retries.py +3 -3
  454. nat/utils/exception_handlers/schemas.py +1 -1
  455. nat/utils/io/model_processing.py +1 -1
  456. nat/utils/io/supress_logs.py +33 -0
  457. nat/utils/io/yaml_tools.py +1 -1
  458. nat/utils/log_levels.py +1 -1
  459. nat/utils/log_utils.py +13 -1
  460. nat/utils/metadata_utils.py +1 -1
  461. nat/utils/optional_imports.py +1 -1
  462. nat/utils/producer_consumer_queue.py +1 -1
  463. nat/utils/reactive/base/observable_base.py +1 -1
  464. nat/utils/reactive/base/observer_base.py +1 -1
  465. nat/utils/reactive/base/subject_base.py +1 -1
  466. nat/utils/reactive/observable.py +1 -1
  467. nat/utils/reactive/observer.py +1 -1
  468. nat/utils/reactive/subject.py +1 -1
  469. nat/utils/reactive/subscription.py +1 -1
  470. nat/utils/responses_api.py +1 -1
  471. nat/utils/settings/global_settings.py +1 -1
  472. nat/utils/string_utils.py +1 -1
  473. nat/utils/type_converter.py +18 -5
  474. nat/utils/type_utils.py +1 -1
  475. nat/utils/url_utils.py +1 -1
  476. {nvidia_nat-1.4.0a20251112.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/METADATA +46 -15
  477. nvidia_nat-1.4.0a20260113.dist-info/RECORD +547 -0
  478. nvidia_nat-1.4.0a20260113.dist-info/entry_points.txt +38 -0
  479. nat/cli/commands/mcp/mcp.py +0 -986
  480. nat/front_ends/mcp/introspection_token_verifier.py +0 -73
  481. nat/front_ends/mcp/mcp_front_end_config.py +0 -109
  482. nat/front_ends/mcp/mcp_front_end_plugin.py +0 -151
  483. nat/front_ends/mcp/mcp_front_end_plugin_worker.py +0 -362
  484. nat/front_ends/mcp/memory_profiler.py +0 -320
  485. nat/front_ends/mcp/register.py +0 -27
  486. nat/front_ends/mcp/tool_converter.py +0 -321
  487. nvidia_nat-1.4.0a20251112.dist-info/RECORD +0 -481
  488. nvidia_nat-1.4.0a20251112.dist-info/entry_points.txt +0 -22
  489. {nvidia_nat-1.4.0a20251112.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/WHEEL +0 -0
  490. {nvidia_nat-1.4.0a20251112.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
  491. {nvidia_nat-1.4.0a20251112.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/licenses/LICENSE.md +0 -0
  492. {nvidia_nat-1.4.0a20251112.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/top_level.txt +0 -0
nat/llm/dynamo_llm.py ADDED
@@ -0,0 +1,363 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """
16
+ Dynamo LLM provider with automatic prefix header injection for KV cache optimization.
17
+
18
+ This module provides a specialized OpenAI-compatible LLM that sends Dynamo prefix headers
19
+ for optimal KV cache management and request routing. The prefix parameters are optimizable
20
+ via the NAT optimizer.
21
+
22
+ The implementation uses httpx event hooks to inject headers at the HTTP transport level,
23
+ making it framework-agnostic (works with LangChain, LlamaIndex, etc.).
24
+
25
+ Dynamo Prefix Parameters
26
+ -------------------------
27
+
28
+ prefix_osl (Output Sequence Length)
29
+ Hint for expected response length:
30
+
31
+ - LOW: decode_cost=1.0, short responses
32
+ - MEDIUM: decode_cost=2.0, typical responses
33
+ - HIGH: decode_cost=3.0, long responses
34
+
35
+ prefix_iat (Inter-Arrival Time)
36
+ Hint for request pacing:
37
+
38
+ - LOW: iat_factor=1.5, rapid bursts -> high worker stickiness
39
+ - MEDIUM: iat_factor=1.0, normal pacing
40
+ - HIGH: iat_factor=0.6, slow requests -> more exploration
41
+
42
+ prefix_total_requests
43
+ Expected requests per conversation:
44
+
45
+ - Higher values increase KV cache affinity and worker stickiness
46
+ - Lower values allow more load balancing
47
+ """
48
+
49
+ import logging
50
+ import uuid
51
+ from collections.abc import Callable
52
+ from collections.abc import Coroutine
53
+ from collections.abc import Iterator
54
+ from contextlib import contextmanager
55
+ from contextvars import ContextVar
56
+ from typing import TYPE_CHECKING
57
+ from typing import Any
58
+ from typing import Literal
59
+
60
+ if TYPE_CHECKING:
61
+ import httpx
62
+
63
+ from pydantic import Field
64
+
65
+ from nat.builder.builder import Builder
66
+ from nat.builder.llm import LLMProviderInfo
67
+ from nat.cli.register_workflow import register_llm_provider
68
+ from nat.data_models.optimizable import OptimizableField
69
+ from nat.data_models.optimizable import SearchSpace
70
+ from nat.llm.openai_llm import OpenAIModelConfig
71
+
72
+ logger = logging.getLogger(__name__)
73
+
74
+ # Define valid prefix hint values
75
+ PrefixLevel = Literal["LOW", "MEDIUM", "HIGH"]
76
+
77
+ # =============================================================================
78
+ # CONTEXT MANAGEMENT FOR DYNAMO PREFIX ID
79
+ # =============================================================================
80
+
81
+
82
+ class DynamoPrefixContext:
83
+ """
84
+ Singleton class for managing Dynamo prefix IDs across LLM calls.
85
+
86
+ This allows evaluation code to set a prefix ID that persists across all LLM
87
+ calls for a single evaluation question (multi-turn conversation).
88
+
89
+ Usage::
90
+
91
+ from nat.llm.dynamo_llm import DynamoPrefixContext
92
+
93
+ # Set prefix ID at the start of each evaluation question
94
+ DynamoPrefixContext.set("eval-q001-abc123")
95
+
96
+ # ... perform LLM calls ...
97
+
98
+ # Clear when done
99
+ DynamoPrefixContext.clear()
100
+
101
+ # Or use as a context manager
102
+ with DynamoPrefixContext.scope("eval-q001-abc123"):
103
+ # ... perform LLM calls ...
104
+ """
105
+
106
+ _current_prefix_id: ContextVar[str | None] = ContextVar('dynamo_prefix_id', default=None)
107
+
108
+ @classmethod
109
+ def set(cls, prefix_id: str) -> None:
110
+ """
111
+ Set the Dynamo prefix ID for the current context.
112
+
113
+ Call this at the start of each evaluation question to ensure all LLM calls
114
+ for that question share the same prefix ID (enabling KV cache reuse).
115
+
116
+ Args:
117
+ prefix_id: The unique prefix ID (e.g., "eval-q001-abc123")
118
+ """
119
+ cls._current_prefix_id.set(prefix_id)
120
+ logger.debug("Set Dynamo prefix ID: %s", prefix_id)
121
+
122
+ @classmethod
123
+ def clear(cls) -> None:
124
+ """Clear the current Dynamo prefix ID context."""
125
+ cls._current_prefix_id.set(None)
126
+ logger.debug("Cleared Dynamo prefix ID")
127
+
128
+ @classmethod
129
+ def get(cls) -> str | None:
130
+ """Get the current Dynamo prefix ID from context, if any."""
131
+ return cls._current_prefix_id.get()
132
+
133
+ @classmethod
134
+ @contextmanager
135
+ def scope(cls, prefix_id: str) -> Iterator[None]:
136
+ """
137
+ Context manager for scoped prefix ID usage.
138
+
139
+ Automatically sets the prefix ID on entry and clears it on exit,
140
+ ensuring proper cleanup even if exceptions occur.
141
+
142
+ Args:
143
+ prefix_id: The unique prefix ID for this scope
144
+
145
+ Yields:
146
+ None
147
+
148
+ Usage:
149
+ with DynamoPrefixContext.scope("eval-q001"):
150
+ # All LLM calls here will use "eval-q001" prefix
151
+ await llm.ainvoke(...)
152
+ """
153
+ cls.set(prefix_id)
154
+ try:
155
+ yield
156
+ finally:
157
+ cls.clear()
158
+
159
+
160
+ # =============================================================================
161
+ # DYNAMO MODEL CONFIGURATION
162
+ # =============================================================================
163
+
164
+
165
+ class DynamoModelConfig(OpenAIModelConfig, name="dynamo"):
166
+ """
167
+ A Dynamo LLM provider with automatic prefix header injection for KV cache optimization.
168
+
169
+ This is a specialized OpenAI-compatible LLM that sends Dynamo prefix headers
170
+ for optimal KV cache management and request routing. Prefix headers are enabled
171
+ by default using the template "nat-dynamo-{uuid}". The prefix routing parameters
172
+ (prefix_total_requests, prefix_osl, prefix_iat) are optimizable via the NAT optimizer.
173
+
174
+ To disable prefix headers, set prefix_template to null/None in your config.
175
+ """
176
+
177
+ # =========================================================================
178
+ # DYNAMO PREFIX PARAMETERS
179
+ # =========================================================================
180
+
181
+ prefix_template: str | None = Field(
182
+ default="nat-dynamo-{uuid}",
183
+ description="Template for prefix ID. The {uuid} placeholder will be replaced with a unique ID. "
184
+ "Prefix headers are sent by default for KV cache optimization. "
185
+ "Set to null/None to disable prefix header injection.",
186
+ )
187
+
188
+ prefix_total_requests: int = OptimizableField(
189
+ default=10,
190
+ ge=1,
191
+ le=50,
192
+ description=("Expected number of requests for this conversation/prefix. "
193
+ "Higher values increase worker stickiness and KV cache locality. "
194
+ "Lower values allow more load balancing across workers."),
195
+ space=SearchSpace(low=1, high=20, step=5))
196
+
197
+ prefix_osl: PrefixLevel = OptimizableField(default="MEDIUM",
198
+ description=("Output Sequence Length hint for the Dynamo router. "
199
+ "LOW=short responses (decode_cost=1.0), "
200
+ "MEDIUM=typical (decode_cost=2.0), "
201
+ "HIGH=long responses (decode_cost=3.0)."),
202
+ space=SearchSpace(values=["LOW", "MEDIUM", "HIGH"]))
203
+
204
+ prefix_iat: PrefixLevel = OptimizableField(default="MEDIUM",
205
+ description=("Inter-Arrival Time hint for the Dynamo router. "
206
+ "LOW=rapid bursts (iat_factor=1.5, high stickiness), "
207
+ "MEDIUM=normal (iat_factor=1.0), "
208
+ "HIGH=slow requests (iat_factor=0.6, more exploration)."),
209
+ space=SearchSpace(values=["LOW", "MEDIUM", "HIGH"]))
210
+
211
+ request_timeout: float = Field(
212
+ default=600.0,
213
+ gt=0.0,
214
+ description="HTTP request timeout in seconds for LLM requests.",
215
+ )
216
+
217
+ # =========================================================================
218
+ # UTILITY METHODS
219
+ # =========================================================================
220
+
221
+ @staticmethod
222
+ def get_dynamo_field_names() -> frozenset[str]:
223
+ """
224
+ Get the set of Dynamo-specific field names for model_dump exclusion.
225
+
226
+ Use this when building config dicts for framework clients to exclude
227
+ Dynamo-specific parameters that should not be passed to the underlying client.
228
+
229
+ Returns:
230
+ A frozenset of Dynamo-specific field names.
231
+
232
+ Example::
233
+
234
+ config_dict = config.model_dump(
235
+ exclude={"type", "thinking", *DynamoModelConfig.get_dynamo_field_names()},
236
+ ...
237
+ )
238
+ """
239
+ return frozenset({
240
+ "prefix_template",
241
+ "prefix_total_requests",
242
+ "prefix_osl",
243
+ "prefix_iat",
244
+ "request_timeout",
245
+ })
246
+
247
+
248
+ # =============================================================================
249
+ # HTTPX EVENT HOOK FOR HEADER INJECTION
250
+ # =============================================================================
251
+
252
+
253
+ def _create_dynamo_request_hook(
254
+ prefix_template: str | None,
255
+ total_requests: int,
256
+ osl: str,
257
+ iat: str,
258
+ ) -> Callable[["httpx.Request"], Coroutine[Any, Any, None]]:
259
+ """
260
+ Create an httpx event hook that injects Dynamo prefix headers into requests.
261
+
262
+ This hook is called before each HTTP request is sent, allowing us to inject
263
+ headers dynamically. The prefix ID is generated ONCE when the hook is created,
264
+ ensuring all requests from the same client share the same prefix ID. This enables
265
+ Dynamo's KV cache optimization across multi-turn conversations.
266
+
267
+ The context variable can override this for scenarios where you need different
268
+ prefix IDs (e.g., per-question in batch evaluation).
269
+
270
+ Args:
271
+ prefix_template: Template string with {uuid} placeholder
272
+ total_requests: Expected number of requests for this prefix
273
+ osl: Output sequence length hint (LOW/MEDIUM/HIGH)
274
+ iat: Inter-arrival time hint (LOW/MEDIUM/HIGH)
275
+
276
+ Returns:
277
+ An async function suitable for use as an httpx event hook.
278
+ """
279
+ # Generate the default prefix ID ONCE when the hook is created
280
+ # This ensures all requests from this client share the same prefix ID
281
+ unique_id = uuid.uuid4().hex[:16]
282
+ if prefix_template:
283
+ default_prefix_id = prefix_template.format(uuid=unique_id)
284
+ else:
285
+ default_prefix_id = f"nat-dynamo-{unique_id}"
286
+
287
+ logger.debug("Created Dynamo request hook with default prefix ID: %s", default_prefix_id)
288
+
289
+ async def on_request(request):
290
+ """Inject Dynamo prefix headers before each request."""
291
+ # Check context variable first (allows per-question override in batch evaluation)
292
+ context_prefix_id = DynamoPrefixContext.get()
293
+
294
+ if context_prefix_id:
295
+ prefix_id = context_prefix_id
296
+ logger.debug("Using context prefix ID: %s", prefix_id)
297
+ else:
298
+ # Use the pre-generated prefix ID (same for all requests from this client)
299
+ prefix_id = default_prefix_id
300
+ logger.debug("Using default prefix ID: %s", prefix_id)
301
+
302
+ # Inject Dynamo headers
303
+ request.headers["x-prefix-id"] = prefix_id
304
+ request.headers["x-prefix-total-requests"] = str(total_requests)
305
+ request.headers["x-prefix-osl"] = osl.upper()
306
+ request.headers["x-prefix-iat"] = iat.upper()
307
+
308
+ logger.debug("Injected Dynamo headers: prefix_id=%s, total_requests=%d, osl=%s, iat=%s",
309
+ prefix_id,
310
+ total_requests,
311
+ osl.upper(),
312
+ iat.upper())
313
+
314
+ return on_request
315
+
316
+
317
+ def create_httpx_client_with_dynamo_hooks(
318
+ prefix_template: str | None,
319
+ total_requests: int,
320
+ osl: str,
321
+ iat: str,
322
+ timeout: float = 600.0,
323
+ ) -> "httpx.AsyncClient":
324
+ """
325
+ Create an httpx.AsyncClient with Dynamo prefix header injection.
326
+
327
+ This client can be passed to the OpenAI SDK to inject headers at the HTTP level,
328
+ making it framework-agnostic.
329
+
330
+ Args:
331
+ prefix_template: Template string with {uuid} placeholder
332
+ total_requests: Expected number of requests for this prefix
333
+ osl: Output sequence length hint (LOW/MEDIUM/HIGH)
334
+ iat: Inter-arrival time hint (LOW/MEDIUM/HIGH)
335
+ timeout: HTTP request timeout in seconds
336
+
337
+ Returns:
338
+ An httpx.AsyncClient configured with Dynamo header injection.
339
+ """
340
+ import httpx
341
+
342
+ request_hook = _create_dynamo_request_hook(prefix_template, total_requests, osl, iat)
343
+
344
+ return httpx.AsyncClient(
345
+ event_hooks={"request": [request_hook]},
346
+ timeout=httpx.Timeout(timeout),
347
+ )
348
+
349
+
350
+ # =============================================================================
351
+ # PROVIDER REGISTRATION
352
+ # =============================================================================
353
+ # Note: Client registrations for each framework (LangChain, LlamaIndex, etc.)
354
+ # are in the respective plugin packages under packages/nvidia_nat_<framework>/
355
+
356
+
357
+ @register_llm_provider(config_type=DynamoModelConfig)
358
+ async def dynamo_llm(config: DynamoModelConfig, _builder: Builder):
359
+ """Register the Dynamo LLM provider."""
360
+ yield LLMProviderInfo(
361
+ config=config,
362
+ description="A Dynamo-optimized model with automatic prefix headers for KV cache management.",
363
+ )
@@ -0,0 +1,177 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """HuggingFace Transformers LLM Provider - Local in-process model execution."""
16
+
17
+ import logging
18
+ from collections.abc import AsyncIterator
19
+ from dataclasses import dataclass
20
+ from typing import Any
21
+
22
+ from pydantic import Field
23
+
24
+ from nat.builder.builder import Builder
25
+ from nat.builder.llm import LLMProviderInfo
26
+ from nat.cli.register_workflow import register_llm_provider
27
+ from nat.data_models.llm import LLMBaseConfig
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ @dataclass
33
+ class ModelCacheEntry:
34
+ model: Any
35
+ tokenizer: Any
36
+ torch: Any
37
+
38
+
39
+ class ModelCache:
40
+ """Singleton cache for loaded HuggingFace models.
41
+
42
+ Models remain cached for the provider's lifetime (not per-query!) to enable fast reuse:
43
+ - During nat serve: Cached while server runs, cleaned up on shutdown
44
+ - During nat red-team: Cached across all evaluation queries, cleaned up when complete
45
+ - During nat run: Cached for single workflow execution, cleaned up when done
46
+ """
47
+
48
+ _instance: "ModelCache | None" = None
49
+ _cache: dict[str, ModelCacheEntry]
50
+
51
+ def __new__(cls) -> "ModelCache":
52
+ if cls._instance is None:
53
+ cls._instance = super().__new__(cls)
54
+ cls._instance._cache = {}
55
+ return cls._instance
56
+
57
+ def get(self, model_name: str) -> ModelCacheEntry | None:
58
+ """Return cached model data or None if not loaded."""
59
+ return self._cache.get(model_name)
60
+
61
+ def set(self, model_name: str, data: ModelCacheEntry) -> None:
62
+ """Cache model data."""
63
+ self._cache[model_name] = data
64
+
65
+ def remove(self, model_name: str) -> None:
66
+ """Remove model from cache."""
67
+ self._cache.pop(model_name, None)
68
+
69
+ def __contains__(self, model_name: str) -> bool:
70
+ """Check if model is cached."""
71
+ return model_name in self._cache
72
+
73
+
74
+ class HuggingFaceConfig(LLMBaseConfig, name="huggingface"):
75
+ """Configuration for HuggingFace LLM - loads model directly for local execution."""
76
+
77
+ model_name: str = Field(description="HuggingFace model name (e.g. 'Qwen/Qwen3Guard-Gen-0.6B')")
78
+
79
+ device: str = Field(default="auto", description="Device: 'cpu', 'cuda', 'cuda:0', or 'auto'")
80
+
81
+ dtype: str | None = Field(default="auto", description="Torch dtype: 'float16', 'bfloat16', 'float32', or 'auto'")
82
+
83
+ max_new_tokens: int = Field(default=128, description="Maximum number of new tokens to generate")
84
+
85
+ temperature: float = Field(default=0.0,
86
+ description="Sampling temperature (0 = deterministic greedy, > 0 = sampling enabled)")
87
+
88
+ trust_remote_code: bool = Field(default=False, description="Trust remote code when loading model")
89
+
90
+
91
+ def get_cached_model(model_name: str) -> ModelCacheEntry | None:
92
+ """Return cached model data (model, tokenizer, torch) or None if not loaded."""
93
+ return ModelCache().get(model_name)
94
+
95
+
96
+ async def _cleanup_model(model_name: str) -> None:
97
+ """Clean up a loaded model and free GPU memory.
98
+
99
+ Args:
100
+ model_name: Name of the model to clean up.
101
+ """
102
+ try:
103
+ cache = ModelCache()
104
+ cached = cache.get(model_name)
105
+
106
+ if cached is not None:
107
+ # Move model to CPU to free GPU memory
108
+ if cached.model:
109
+ cached.model.to("cpu")
110
+ cached.model = None
111
+
112
+ # Clear CUDA cache if available
113
+ if cached.torch and hasattr(cached.torch.cuda, "empty_cache"):
114
+ cached.torch.cuda.empty_cache()
115
+ cached.torch = None
116
+
117
+ cached.tokenizer = None
118
+
119
+ # Remove from cache
120
+ cache.remove(model_name)
121
+
122
+ logger.debug("Model cleaned up: %s", model_name)
123
+ except Exception:
124
+ logger.exception("Error cleaning up HuggingFace model '%s'", model_name)
125
+
126
+
127
+ @register_llm_provider(config_type=HuggingFaceConfig)
128
+ async def huggingface_provider(
129
+ config: HuggingFaceConfig,
130
+ builder: Builder, # noqa: ARG001 - kept for provider interface, currently unused
131
+ ) -> AsyncIterator[LLMProviderInfo]:
132
+ """HuggingFace model provider - loads models locally for in-process execution.
133
+
134
+ Args:
135
+ config: Configuration for the HuggingFace model.
136
+ builder: The NAT builder instance.
137
+
138
+ Yields:
139
+ LLMProviderInfo: Provider information for the loaded model.
140
+ """
141
+ try:
142
+ import torch
143
+ from transformers import AutoModelForCausalLM
144
+ from transformers import AutoTokenizer
145
+ except ImportError:
146
+ raise ValueError("HuggingFace dependencies not installed. \n"
147
+ "Install with:\n"
148
+ " `pip install nvidia-nat[huggingface]` (package) or\n"
149
+ " `uv pip install -e '.[huggingface]'` (source)\n")
150
+
151
+ cache = ModelCache()
152
+
153
+ # Load model if not cached
154
+ if config.model_name not in cache:
155
+ logger.debug("Loading model from HuggingFace: %s", config.model_name)
156
+
157
+ # Load tokenizer
158
+ tokenizer = AutoTokenizer.from_pretrained(config.model_name, trust_remote_code=config.trust_remote_code)
159
+
160
+ # Load model
161
+ model = AutoModelForCausalLM.from_pretrained(config.model_name,
162
+ dtype=config.dtype,
163
+ device_map=config.device,
164
+ trust_remote_code=config.trust_remote_code)
165
+
166
+ # Cache it
167
+ cache.set(config.model_name, ModelCacheEntry(model=model, tokenizer=tokenizer, torch=torch))
168
+
169
+ logger.debug("Model loaded: %s on device: %s", config.model_name, config.device)
170
+ else:
171
+ logger.debug("Using cached model: %s", config.model_name)
172
+
173
+ try:
174
+ yield LLMProviderInfo(config=config, description=f"HuggingFace model: {config.model_name}")
175
+ finally:
176
+ # Cleanup when workflow/application shuts down
177
+ await _cleanup_model(config.model_name)
nat/llm/litellm_llm.py CHANGED
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
nat/llm/nim_llm.py CHANGED
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
nat/llm/openai_llm.py CHANGED
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
nat/llm/register.py CHANGED
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -22,6 +22,8 @@ This module is imported by the NeMo Agent Toolkit runtime to ensure providers ar
22
22
  # Import any providers which need to be automatically registered here
23
23
  from . import aws_bedrock_llm
24
24
  from . import azure_openai_llm
25
+ from . import dynamo_llm
26
+ from . import huggingface_llm
25
27
  from . import litellm_llm
26
28
  from . import nim_llm
27
29
  from . import openai_llm
nat/llm/utils/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
nat/llm/utils/error.py CHANGED
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
nat/llm/utils/thinking.py CHANGED
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
nat/memory/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
nat/memory/interfaces.py CHANGED
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
nat/memory/models.py CHANGED
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
nat/meta/pypi.md CHANGED
@@ -1,5 +1,5 @@
1
1
  <!--
2
- SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
3
  SPDX-License-Identifier: Apache-2.0
4
4
 
5
5
  Licensed under the Apache License, Version 2.0 (the "License");
@@ -0,0 +1,35 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Middleware implementations for NeMo Agent Toolkit."""
16
+
17
+ from nat.middleware.function_middleware import FunctionMiddleware
18
+ from nat.middleware.function_middleware import FunctionMiddlewareChain
19
+ from nat.middleware.function_middleware import validate_middleware
20
+ from nat.middleware.middleware import CallNext
21
+ from nat.middleware.middleware import CallNextStream
22
+ from nat.middleware.middleware import FunctionMiddlewareContext
23
+ from nat.middleware.middleware import Middleware
24
+ from nat.middleware.red_teaming.red_teaming_middleware import RedTeamingMiddleware
25
+
26
+ __all__ = [
27
+ "CallNext",
28
+ "CallNextStream",
29
+ "FunctionMiddleware",
30
+ "FunctionMiddlewareChain",
31
+ "FunctionMiddlewareContext",
32
+ "Middleware",
33
+ "RedTeamingMiddleware",
34
+ "validate_middleware",
35
+ ]