nvidia-nat 1.4.0a20251112__py3-none-any.whl → 1.4.0a20260113__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiq/__init__.py +1 -1
- nat/{front_ends/mcp → agent/auto_memory_wrapper}/__init__.py +1 -1
- nat/agent/auto_memory_wrapper/agent.py +278 -0
- nat/agent/auto_memory_wrapper/register.py +227 -0
- nat/agent/auto_memory_wrapper/state.py +30 -0
- nat/agent/base.py +1 -1
- nat/agent/dual_node.py +1 -1
- nat/agent/prompt_optimizer/prompt.py +1 -1
- nat/agent/prompt_optimizer/register.py +1 -1
- nat/agent/react_agent/agent.py +16 -9
- nat/agent/react_agent/output_parser.py +2 -2
- nat/agent/react_agent/prompt.py +3 -2
- nat/agent/react_agent/register.py +2 -2
- nat/agent/react_agent/register_per_user_agent.py +104 -0
- nat/agent/reasoning_agent/reasoning_agent.py +1 -1
- nat/agent/register.py +3 -1
- nat/agent/responses_api_agent/__init__.py +1 -1
- nat/agent/responses_api_agent/register.py +1 -1
- nat/agent/rewoo_agent/agent.py +9 -4
- nat/agent/rewoo_agent/prompt.py +1 -1
- nat/agent/rewoo_agent/register.py +1 -1
- nat/agent/tool_calling_agent/agent.py +5 -4
- nat/agent/tool_calling_agent/register.py +1 -1
- nat/authentication/__init__.py +1 -1
- nat/authentication/api_key/__init__.py +1 -1
- nat/authentication/api_key/api_key_auth_provider.py +1 -1
- nat/authentication/api_key/api_key_auth_provider_config.py +22 -7
- nat/authentication/api_key/register.py +1 -1
- nat/authentication/credential_validator/__init__.py +1 -1
- nat/authentication/credential_validator/bearer_token_validator.py +1 -1
- nat/authentication/exceptions/__init__.py +1 -1
- nat/authentication/exceptions/api_key_exceptions.py +1 -1
- nat/authentication/http_basic_auth/http_basic_auth_provider.py +1 -1
- nat/authentication/http_basic_auth/register.py +1 -1
- nat/authentication/interfaces.py +1 -1
- nat/authentication/oauth2/__init__.py +1 -1
- nat/authentication/oauth2/oauth2_auth_code_flow_provider.py +1 -1
- nat/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +1 -1
- nat/authentication/oauth2/oauth2_resource_server_config.py +1 -1
- nat/authentication/oauth2/register.py +1 -1
- nat/authentication/register.py +1 -1
- nat/builder/builder.py +563 -1
- nat/builder/child_builder.py +385 -0
- nat/builder/component_utils.py +34 -4
- nat/builder/context.py +34 -1
- nat/builder/embedder.py +1 -1
- nat/builder/eval_builder.py +19 -7
- nat/builder/evaluator.py +1 -1
- nat/builder/framework_enum.py +3 -1
- nat/builder/front_end.py +1 -1
- nat/builder/function.py +113 -5
- nat/builder/function_base.py +1 -1
- nat/builder/function_info.py +1 -1
- nat/builder/intermediate_step_manager.py +1 -1
- nat/builder/llm.py +1 -1
- nat/builder/per_user_workflow_builder.py +843 -0
- nat/builder/retriever.py +1 -1
- nat/builder/sync_builder.py +571 -0
- nat/builder/user_interaction_manager.py +1 -1
- nat/builder/workflow.py +5 -3
- nat/builder/workflow_builder.py +619 -378
- nat/cli/__init__.py +1 -1
- nat/cli/cli_utils/config_override.py +1 -1
- nat/cli/cli_utils/validation.py +32 -1
- nat/cli/commands/configure/channel/add.py +1 -1
- nat/cli/commands/configure/channel/channel.py +1 -1
- nat/cli/commands/configure/channel/remove.py +1 -1
- nat/cli/commands/configure/channel/update.py +1 -1
- nat/cli/commands/configure/configure.py +1 -1
- nat/cli/commands/evaluate.py +87 -13
- nat/cli/commands/finetune.py +132 -0
- nat/cli/commands/info/__init__.py +1 -1
- nat/cli/commands/info/info.py +1 -1
- nat/cli/commands/info/list_channels.py +1 -1
- nat/cli/commands/info/list_components.py +1 -1
- nat/cli/commands/object_store/__init__.py +1 -1
- nat/cli/commands/object_store/object_store.py +1 -1
- nat/cli/commands/optimize.py +1 -1
- nat/cli/commands/{mcp → red_teaming}/__init__.py +1 -1
- nat/cli/commands/red_teaming/red_teaming.py +138 -0
- nat/cli/commands/red_teaming/red_teaming_utils.py +73 -0
- nat/cli/commands/registry/__init__.py +1 -1
- nat/cli/commands/registry/publish.py +1 -1
- nat/cli/commands/registry/pull.py +1 -1
- nat/cli/commands/registry/registry.py +1 -1
- nat/cli/commands/registry/remove.py +1 -1
- nat/cli/commands/registry/search.py +1 -1
- nat/cli/commands/sizing/__init__.py +1 -1
- nat/cli/commands/sizing/calc.py +1 -1
- nat/cli/commands/sizing/sizing.py +1 -1
- nat/cli/commands/start.py +1 -1
- nat/cli/commands/uninstall.py +1 -1
- nat/cli/commands/validate.py +1 -1
- nat/cli/commands/workflow/__init__.py +1 -1
- nat/cli/commands/workflow/workflow.py +1 -1
- nat/cli/commands/workflow/workflow_commands.py +3 -2
- nat/cli/entrypoint.py +15 -37
- nat/cli/main.py +2 -2
- nat/cli/plugin_loader.py +69 -0
- nat/cli/register_workflow.py +233 -5
- nat/cli/type_registry.py +237 -3
- nat/control_flow/register.py +1 -1
- nat/control_flow/router_agent/agent.py +1 -1
- nat/control_flow/router_agent/prompt.py +1 -1
- nat/control_flow/router_agent/register.py +1 -1
- nat/control_flow/sequential_executor.py +28 -7
- nat/data_models/__init__.py +1 -1
- nat/data_models/agent.py +1 -1
- nat/data_models/api_server.py +38 -3
- nat/data_models/authentication.py +1 -1
- nat/data_models/common.py +1 -1
- nat/data_models/component.py +9 -1
- nat/data_models/component_ref.py +45 -1
- nat/data_models/config.py +78 -1
- nat/data_models/dataset_handler.py +15 -2
- nat/data_models/discovery_metadata.py +1 -1
- nat/data_models/embedder.py +1 -1
- nat/data_models/evaluate.py +6 -1
- nat/data_models/evaluator.py +1 -1
- nat/data_models/finetuning.py +260 -0
- nat/data_models/front_end.py +1 -1
- nat/data_models/function.py +15 -2
- nat/data_models/function_dependencies.py +1 -1
- nat/data_models/gated_field_mixin.py +1 -1
- nat/data_models/interactive.py +1 -1
- nat/data_models/intermediate_step.py +29 -2
- nat/data_models/invocation_node.py +1 -1
- nat/data_models/llm.py +1 -1
- nat/data_models/logging.py +1 -1
- nat/data_models/memory.py +1 -1
- nat/data_models/middleware.py +37 -0
- nat/data_models/object_store.py +1 -1
- nat/data_models/openai_mcp.py +1 -1
- nat/data_models/optimizable.py +1 -1
- nat/data_models/optimizer.py +1 -1
- nat/data_models/profiler.py +1 -1
- nat/data_models/registry_handler.py +1 -1
- nat/data_models/retriever.py +1 -1
- nat/data_models/retry_mixin.py +1 -1
- nat/data_models/runtime_enum.py +26 -0
- nat/data_models/span.py +1 -1
- nat/data_models/step_adaptor.py +1 -1
- nat/data_models/streaming.py +1 -1
- nat/data_models/swe_bench_model.py +1 -1
- nat/data_models/telemetry_exporter.py +1 -1
- nat/data_models/thinking_mixin.py +1 -1
- nat/data_models/ttc_strategy.py +1 -1
- nat/embedder/azure_openai_embedder.py +1 -1
- nat/embedder/nim_embedder.py +1 -1
- nat/embedder/openai_embedder.py +1 -1
- nat/embedder/register.py +1 -1
- nat/eval/__init__.py +1 -1
- nat/eval/config.py +8 -1
- nat/eval/dataset_handler/dataset_downloader.py +1 -1
- nat/eval/dataset_handler/dataset_filter.py +1 -1
- nat/eval/dataset_handler/dataset_handler.py +4 -2
- nat/eval/evaluate.py +226 -81
- nat/eval/evaluator/__init__.py +1 -1
- nat/eval/evaluator/base_evaluator.py +2 -2
- nat/eval/evaluator/evaluator_model.py +3 -2
- nat/eval/intermediate_step_adapter.py +1 -1
- nat/eval/llm_validator.py +336 -0
- nat/eval/rag_evaluator/evaluate.py +17 -10
- nat/eval/rag_evaluator/register.py +1 -1
- nat/eval/red_teaming_evaluator/__init__.py +14 -0
- nat/eval/red_teaming_evaluator/data_models.py +66 -0
- nat/eval/red_teaming_evaluator/evaluate.py +327 -0
- nat/eval/red_teaming_evaluator/filter_conditions.py +75 -0
- nat/eval/red_teaming_evaluator/register.py +55 -0
- nat/eval/register.py +2 -1
- nat/eval/remote_workflow.py +1 -1
- nat/eval/runners/__init__.py +1 -1
- nat/eval/runners/config.py +1 -1
- nat/eval/runners/multi_eval_runner.py +1 -1
- nat/eval/runners/red_teaming_runner/__init__.py +24 -0
- nat/eval/runners/red_teaming_runner/config.py +282 -0
- nat/eval/runners/red_teaming_runner/report_utils.py +707 -0
- nat/eval/runners/red_teaming_runner/runner.py +867 -0
- nat/eval/runtime_evaluator/__init__.py +1 -1
- nat/eval/runtime_evaluator/evaluate.py +1 -1
- nat/eval/runtime_evaluator/register.py +1 -1
- nat/eval/runtime_event_subscriber.py +1 -1
- nat/eval/swe_bench_evaluator/evaluate.py +1 -1
- nat/eval/swe_bench_evaluator/register.py +1 -1
- nat/eval/trajectory_evaluator/evaluate.py +2 -2
- nat/eval/trajectory_evaluator/register.py +1 -1
- nat/eval/tunable_rag_evaluator/evaluate.py +5 -5
- nat/eval/tunable_rag_evaluator/register.py +1 -1
- nat/eval/usage_stats.py +1 -1
- nat/eval/utils/eval_trace_ctx.py +1 -1
- nat/eval/utils/output_uploader.py +1 -1
- nat/eval/utils/tqdm_position_registry.py +1 -1
- nat/eval/utils/weave_eval.py +1 -1
- nat/experimental/decorators/experimental_warning_decorator.py +1 -1
- nat/experimental/test_time_compute/editing/iterative_plan_refinement_editor.py +1 -1
- nat/experimental/test_time_compute/editing/llm_as_a_judge_editor.py +1 -1
- nat/experimental/test_time_compute/editing/motivation_aware_summarization.py +1 -1
- nat/experimental/test_time_compute/functions/execute_score_select_function.py +1 -1
- nat/experimental/test_time_compute/functions/multi_llm_judge_function.py +88 -0
- nat/experimental/test_time_compute/functions/plan_select_execute_function.py +1 -1
- nat/experimental/test_time_compute/functions/ttc_tool_orchestration_function.py +1 -1
- nat/experimental/test_time_compute/functions/ttc_tool_wrapper_function.py +1 -1
- nat/experimental/test_time_compute/models/editor_config.py +1 -1
- nat/experimental/test_time_compute/models/scoring_config.py +1 -1
- nat/experimental/test_time_compute/models/search_config.py +20 -2
- nat/experimental/test_time_compute/models/selection_config.py +33 -2
- nat/experimental/test_time_compute/models/stage_enums.py +1 -1
- nat/experimental/test_time_compute/models/strategy_base.py +1 -1
- nat/experimental/test_time_compute/models/tool_use_config.py +1 -1
- nat/experimental/test_time_compute/models/ttc_item.py +1 -1
- nat/experimental/test_time_compute/register.py +4 -1
- nat/experimental/test_time_compute/scoring/llm_based_agent_scorer.py +1 -1
- nat/experimental/test_time_compute/scoring/llm_based_plan_scorer.py +1 -1
- nat/experimental/test_time_compute/scoring/motivation_aware_scorer.py +1 -1
- nat/experimental/test_time_compute/search/multi_llm_generation.py +115 -0
- nat/experimental/test_time_compute/search/multi_llm_planner.py +1 -1
- nat/experimental/test_time_compute/search/multi_query_retrieval_search.py +1 -1
- nat/experimental/test_time_compute/search/single_shot_multi_plan_planner.py +1 -1
- nat/experimental/test_time_compute/selection/best_of_n_selector.py +1 -1
- nat/experimental/test_time_compute/selection/llm_based_agent_output_selector.py +1 -1
- nat/experimental/test_time_compute/selection/llm_based_output_merging_selector.py +1 -1
- nat/experimental/test_time_compute/selection/llm_based_plan_selector.py +1 -1
- nat/experimental/test_time_compute/selection/llm_judge_selection.py +127 -0
- nat/experimental/test_time_compute/selection/threshold_selector.py +1 -1
- nat/finetuning/__init__.py +24 -0
- nat/finetuning/finetuning_runtime.py +143 -0
- nat/finetuning/interfaces/__init__.py +24 -0
- nat/finetuning/interfaces/finetuning_runner.py +261 -0
- nat/finetuning/interfaces/trainer_adapter.py +103 -0
- nat/finetuning/interfaces/trajectory_builder.py +115 -0
- nat/finetuning/utils/__init__.py +15 -0
- nat/finetuning/utils/parsers/__init__.py +15 -0
- nat/finetuning/utils/parsers/adk_parser.py +141 -0
- nat/finetuning/utils/parsers/base_parser.py +238 -0
- nat/finetuning/utils/parsers/common.py +91 -0
- nat/finetuning/utils/parsers/langchain_parser.py +267 -0
- nat/finetuning/utils/parsers/llama_index_parser.py +218 -0
- nat/front_ends/__init__.py +1 -1
- nat/front_ends/console/__init__.py +1 -1
- nat/front_ends/console/authentication_flow_handler.py +1 -1
- nat/front_ends/console/console_front_end_config.py +4 -1
- nat/front_ends/console/console_front_end_plugin.py +5 -4
- nat/front_ends/console/register.py +1 -1
- nat/front_ends/cron/__init__.py +1 -1
- nat/front_ends/fastapi/__init__.py +1 -1
- nat/front_ends/fastapi/async_job.py +128 -0
- nat/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +1 -1
- nat/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +13 -9
- nat/front_ends/fastapi/dask_client_mixin.py +1 -1
- nat/front_ends/fastapi/fastapi_front_end_config.py +23 -1
- nat/front_ends/fastapi/fastapi_front_end_controller.py +1 -1
- nat/front_ends/fastapi/fastapi_front_end_plugin.py +25 -30
- nat/front_ends/fastapi/fastapi_front_end_plugin_worker.py +318 -59
- nat/front_ends/fastapi/html_snippets/__init__.py +1 -1
- nat/front_ends/fastapi/html_snippets/auth_code_grant_success.py +1 -1
- nat/front_ends/fastapi/intermediate_steps_subscriber.py +12 -1
- nat/front_ends/fastapi/job_store.py +23 -11
- nat/front_ends/fastapi/main.py +1 -1
- nat/front_ends/fastapi/message_handler.py +27 -4
- nat/front_ends/fastapi/message_validator.py +54 -2
- nat/front_ends/fastapi/register.py +1 -1
- nat/front_ends/fastapi/response_helpers.py +16 -15
- nat/front_ends/fastapi/step_adaptor.py +1 -1
- nat/front_ends/fastapi/utils.py +1 -1
- nat/front_ends/register.py +1 -2
- nat/front_ends/simple_base/__init__.py +1 -1
- nat/front_ends/simple_base/simple_front_end_plugin_base.py +6 -4
- nat/llm/aws_bedrock_llm.py +1 -1
- nat/llm/azure_openai_llm.py +10 -1
- nat/llm/dynamo_llm.py +363 -0
- nat/llm/huggingface_llm.py +177 -0
- nat/llm/litellm_llm.py +1 -1
- nat/llm/nim_llm.py +1 -1
- nat/llm/openai_llm.py +1 -1
- nat/llm/register.py +3 -1
- nat/llm/utils/__init__.py +1 -1
- nat/llm/utils/env_config_value.py +1 -1
- nat/llm/utils/error.py +1 -1
- nat/llm/utils/thinking.py +1 -1
- nat/memory/__init__.py +1 -1
- nat/memory/interfaces.py +1 -1
- nat/memory/models.py +1 -1
- nat/meta/pypi.md +1 -1
- nat/middleware/__init__.py +35 -0
- nat/middleware/cache/__init__.py +14 -0
- nat/middleware/cache/cache_middleware.py +253 -0
- nat/middleware/cache/cache_middleware_config.py +44 -0
- nat/middleware/cache/register.py +33 -0
- nat/middleware/defense/__init__.py +14 -0
- nat/middleware/defense/defense_middleware.py +362 -0
- nat/middleware/defense/defense_middleware_content_guard.py +455 -0
- nat/middleware/defense/defense_middleware_data_models.py +91 -0
- nat/middleware/defense/defense_middleware_output_verifier.py +440 -0
- nat/middleware/defense/defense_middleware_pii.py +356 -0
- nat/middleware/defense/register.py +82 -0
- nat/middleware/dynamic/__init__.py +14 -0
- nat/middleware/dynamic/dynamic_function_middleware.py +962 -0
- nat/middleware/dynamic/dynamic_middleware_config.py +132 -0
- nat/middleware/dynamic/register.py +34 -0
- nat/middleware/function_middleware.py +370 -0
- nat/middleware/logging/__init__.py +14 -0
- nat/middleware/logging/logging_middleware.py +67 -0
- nat/middleware/logging/logging_middleware_config.py +28 -0
- nat/middleware/logging/register.py +33 -0
- nat/middleware/middleware.py +298 -0
- nat/middleware/red_teaming/__init__.py +14 -0
- nat/middleware/red_teaming/red_teaming_middleware.py +344 -0
- nat/middleware/red_teaming/red_teaming_middleware_config.py +112 -0
- nat/middleware/red_teaming/register.py +47 -0
- nat/middleware/register.py +22 -0
- nat/middleware/utils/__init__.py +14 -0
- nat/middleware/utils/workflow_inventory.py +155 -0
- nat/object_store/__init__.py +1 -1
- nat/object_store/in_memory_object_store.py +1 -1
- nat/object_store/interfaces.py +1 -1
- nat/object_store/models.py +1 -1
- nat/object_store/register.py +1 -1
- nat/observability/__init__.py +1 -1
- nat/observability/exporter/__init__.py +1 -1
- nat/observability/exporter/base_exporter.py +1 -1
- nat/observability/exporter/exporter.py +1 -1
- nat/observability/exporter/file_exporter.py +1 -1
- nat/observability/exporter/processing_exporter.py +1 -1
- nat/observability/exporter/raw_exporter.py +1 -1
- nat/observability/exporter/span_exporter.py +7 -1
- nat/observability/exporter_manager.py +1 -1
- nat/observability/mixin/__init__.py +1 -1
- nat/observability/mixin/batch_config_mixin.py +1 -1
- nat/observability/mixin/collector_config_mixin.py +1 -1
- nat/observability/mixin/file_mixin.py +1 -1
- nat/observability/mixin/file_mode.py +1 -1
- nat/observability/mixin/redaction_config_mixin.py +1 -1
- nat/observability/mixin/resource_conflict_mixin.py +1 -1
- nat/observability/mixin/serialize_mixin.py +1 -1
- nat/observability/mixin/tagging_config_mixin.py +1 -1
- nat/observability/mixin/type_introspection_mixin.py +1 -1
- nat/observability/processor/__init__.py +1 -1
- nat/observability/processor/batching_processor.py +1 -1
- nat/observability/processor/callback_processor.py +1 -1
- nat/observability/processor/falsy_batch_filter_processor.py +1 -1
- nat/observability/processor/intermediate_step_serializer.py +1 -1
- nat/observability/processor/processor.py +1 -1
- nat/observability/processor/processor_factory.py +1 -1
- nat/observability/processor/redaction/__init__.py +1 -1
- nat/observability/processor/redaction/contextual_redaction_processor.py +1 -1
- nat/observability/processor/redaction/contextual_span_redaction_processor.py +1 -1
- nat/observability/processor/redaction/redaction_processor.py +1 -1
- nat/observability/processor/redaction/span_header_redaction_processor.py +1 -1
- nat/observability/processor/span_tagging_processor.py +1 -1
- nat/observability/register.py +1 -1
- nat/observability/utils/__init__.py +1 -1
- nat/observability/utils/dict_utils.py +1 -1
- nat/observability/utils/time_utils.py +1 -1
- nat/profiler/calc/__init__.py +1 -1
- nat/profiler/calc/calc_runner.py +3 -3
- nat/profiler/calc/calculations.py +1 -1
- nat/profiler/calc/data_models.py +1 -1
- nat/profiler/calc/plot.py +30 -3
- nat/profiler/callbacks/agno_callback_handler.py +1 -1
- nat/profiler/callbacks/base_callback_class.py +1 -1
- nat/profiler/callbacks/langchain_callback_handler.py +33 -3
- nat/profiler/callbacks/llama_index_callback_handler.py +13 -10
- nat/profiler/callbacks/semantic_kernel_callback_handler.py +1 -1
- nat/profiler/callbacks/token_usage_base_model.py +1 -1
- nat/profiler/data_frame_row.py +1 -1
- nat/profiler/data_models.py +1 -1
- nat/profiler/decorators/framework_wrapper.py +32 -1
- nat/profiler/decorators/function_tracking.py +1 -1
- nat/profiler/forecasting/config.py +1 -1
- nat/profiler/forecasting/model_trainer.py +1 -1
- nat/profiler/forecasting/models/__init__.py +1 -1
- nat/profiler/forecasting/models/forecasting_base_model.py +1 -1
- nat/profiler/forecasting/models/linear_model.py +1 -1
- nat/profiler/forecasting/models/random_forest_regressor.py +1 -1
- nat/profiler/inference_metrics_model.py +1 -1
- nat/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +1 -1
- nat/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py +1 -1
- nat/profiler/inference_optimization/data_models.py +1 -1
- nat/profiler/inference_optimization/experimental/concurrency_spike_analysis.py +1 -1
- nat/profiler/inference_optimization/experimental/prefix_span_analysis.py +1 -1
- nat/profiler/inference_optimization/llm_metrics.py +1 -1
- nat/profiler/inference_optimization/prompt_caching.py +1 -1
- nat/profiler/inference_optimization/token_uniqueness.py +1 -1
- nat/profiler/inference_optimization/workflow_runtimes.py +1 -1
- nat/profiler/intermediate_property_adapter.py +1 -1
- nat/profiler/parameter_optimization/optimizable_utils.py +1 -1
- nat/profiler/parameter_optimization/optimizer_runtime.py +1 -1
- nat/profiler/parameter_optimization/parameter_optimizer.py +1 -1
- nat/profiler/parameter_optimization/parameter_selection.py +1 -1
- nat/profiler/parameter_optimization/pareto_visualizer.py +1 -1
- nat/profiler/parameter_optimization/prompt_optimizer.py +1 -1
- nat/profiler/parameter_optimization/update_helpers.py +1 -1
- nat/profiler/profile_runner.py +1 -1
- nat/profiler/utils.py +1 -1
- nat/registry_handlers/local/local_handler.py +1 -1
- nat/registry_handlers/local/register_local.py +1 -1
- nat/registry_handlers/metadata_factory.py +1 -1
- nat/registry_handlers/package_utils.py +1 -1
- nat/registry_handlers/pypi/pypi_handler.py +1 -1
- nat/registry_handlers/pypi/register_pypi.py +1 -1
- nat/registry_handlers/register.py +1 -1
- nat/registry_handlers/registry_handler_base.py +1 -1
- nat/registry_handlers/rest/register_rest.py +1 -1
- nat/registry_handlers/rest/rest_handler.py +1 -1
- nat/registry_handlers/schemas/headers.py +1 -1
- nat/registry_handlers/schemas/package.py +1 -1
- nat/registry_handlers/schemas/publish.py +1 -1
- nat/registry_handlers/schemas/pull.py +1 -1
- nat/registry_handlers/schemas/remove.py +1 -1
- nat/registry_handlers/schemas/search.py +1 -1
- nat/registry_handlers/schemas/status.py +1 -1
- nat/retriever/interface.py +1 -1
- nat/retriever/milvus/__init__.py +1 -1
- nat/retriever/milvus/register.py +12 -4
- nat/retriever/milvus/retriever.py +103 -41
- nat/retriever/models.py +1 -1
- nat/retriever/nemo_retriever/__init__.py +1 -1
- nat/retriever/nemo_retriever/register.py +1 -1
- nat/retriever/nemo_retriever/retriever.py +5 -5
- nat/retriever/register.py +1 -1
- nat/runtime/__init__.py +1 -1
- nat/runtime/loader.py +10 -3
- nat/runtime/metrics.py +180 -0
- nat/runtime/runner.py +13 -6
- nat/runtime/session.py +458 -32
- nat/runtime/user_metadata.py +1 -1
- nat/settings/global_settings.py +1 -1
- nat/tool/chat_completion.py +1 -1
- nat/tool/code_execution/README.md +1 -1
- nat/tool/code_execution/code_sandbox.py +2 -2
- nat/tool/code_execution/local_sandbox/Dockerfile.sandbox +1 -1
- nat/tool/code_execution/local_sandbox/__init__.py +1 -1
- nat/tool/code_execution/local_sandbox/local_sandbox_server.py +1 -1
- nat/tool/code_execution/local_sandbox/start_local_sandbox.sh +1 -1
- nat/tool/code_execution/register.py +1 -1
- nat/tool/code_execution/utils.py +1 -1
- nat/tool/datetime_tools.py +1 -1
- nat/tool/document_search.py +1 -1
- nat/tool/github_tools.py +1 -1
- nat/tool/memory_tools/add_memory_tool.py +1 -1
- nat/tool/memory_tools/delete_memory_tool.py +1 -1
- nat/tool/memory_tools/get_memory_tool.py +1 -1
- nat/tool/nvidia_rag.py +2 -2
- nat/tool/register.py +1 -1
- nat/tool/retriever.py +1 -1
- nat/tool/server_tools.py +1 -1
- nat/utils/__init__.py +8 -5
- nat/utils/callable_utils.py +1 -1
- nat/utils/data_models/schema_validator.py +1 -1
- nat/utils/debugging_utils.py +1 -1
- nat/utils/decorators.py +1 -1
- nat/utils/dump_distro_mapping.py +1 -1
- nat/utils/exception_handlers/automatic_retries.py +3 -3
- nat/utils/exception_handlers/schemas.py +1 -1
- nat/utils/io/model_processing.py +1 -1
- nat/utils/io/supress_logs.py +33 -0
- nat/utils/io/yaml_tools.py +1 -1
- nat/utils/log_levels.py +1 -1
- nat/utils/log_utils.py +13 -1
- nat/utils/metadata_utils.py +1 -1
- nat/utils/optional_imports.py +1 -1
- nat/utils/producer_consumer_queue.py +1 -1
- nat/utils/reactive/base/observable_base.py +1 -1
- nat/utils/reactive/base/observer_base.py +1 -1
- nat/utils/reactive/base/subject_base.py +1 -1
- nat/utils/reactive/observable.py +1 -1
- nat/utils/reactive/observer.py +1 -1
- nat/utils/reactive/subject.py +1 -1
- nat/utils/reactive/subscription.py +1 -1
- nat/utils/responses_api.py +1 -1
- nat/utils/settings/global_settings.py +1 -1
- nat/utils/string_utils.py +1 -1
- nat/utils/type_converter.py +18 -5
- nat/utils/type_utils.py +1 -1
- nat/utils/url_utils.py +1 -1
- {nvidia_nat-1.4.0a20251112.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/METADATA +46 -15
- nvidia_nat-1.4.0a20260113.dist-info/RECORD +547 -0
- nvidia_nat-1.4.0a20260113.dist-info/entry_points.txt +38 -0
- nat/cli/commands/mcp/mcp.py +0 -986
- nat/front_ends/mcp/introspection_token_verifier.py +0 -73
- nat/front_ends/mcp/mcp_front_end_config.py +0 -109
- nat/front_ends/mcp/mcp_front_end_plugin.py +0 -151
- nat/front_ends/mcp/mcp_front_end_plugin_worker.py +0 -362
- nat/front_ends/mcp/memory_profiler.py +0 -320
- nat/front_ends/mcp/register.py +0 -27
- nat/front_ends/mcp/tool_converter.py +0 -321
- nvidia_nat-1.4.0a20251112.dist-info/RECORD +0 -481
- nvidia_nat-1.4.0a20251112.dist-info/entry_points.txt +0 -22
- {nvidia_nat-1.4.0a20251112.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/WHEEL +0 -0
- {nvidia_nat-1.4.0a20251112.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
- {nvidia_nat-1.4.0a20251112.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/licenses/LICENSE.md +0 -0
- {nvidia_nat-1.4.0a20251112.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/top_level.txt +0 -0
nat/llm/dynamo_llm.py
ADDED
|
@@ -0,0 +1,363 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
"""
|
|
16
|
+
Dynamo LLM provider with automatic prefix header injection for KV cache optimization.
|
|
17
|
+
|
|
18
|
+
This module provides a specialized OpenAI-compatible LLM that sends Dynamo prefix headers
|
|
19
|
+
for optimal KV cache management and request routing. The prefix parameters are optimizable
|
|
20
|
+
via the NAT optimizer.
|
|
21
|
+
|
|
22
|
+
The implementation uses httpx event hooks to inject headers at the HTTP transport level,
|
|
23
|
+
making it framework-agnostic (works with LangChain, LlamaIndex, etc.).
|
|
24
|
+
|
|
25
|
+
Dynamo Prefix Parameters
|
|
26
|
+
-------------------------
|
|
27
|
+
|
|
28
|
+
prefix_osl (Output Sequence Length)
|
|
29
|
+
Hint for expected response length:
|
|
30
|
+
|
|
31
|
+
- LOW: decode_cost=1.0, short responses
|
|
32
|
+
- MEDIUM: decode_cost=2.0, typical responses
|
|
33
|
+
- HIGH: decode_cost=3.0, long responses
|
|
34
|
+
|
|
35
|
+
prefix_iat (Inter-Arrival Time)
|
|
36
|
+
Hint for request pacing:
|
|
37
|
+
|
|
38
|
+
- LOW: iat_factor=1.5, rapid bursts -> high worker stickiness
|
|
39
|
+
- MEDIUM: iat_factor=1.0, normal pacing
|
|
40
|
+
- HIGH: iat_factor=0.6, slow requests -> more exploration
|
|
41
|
+
|
|
42
|
+
prefix_total_requests
|
|
43
|
+
Expected requests per conversation:
|
|
44
|
+
|
|
45
|
+
- Higher values increase KV cache affinity and worker stickiness
|
|
46
|
+
- Lower values allow more load balancing
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
import logging
|
|
50
|
+
import uuid
|
|
51
|
+
from collections.abc import Callable
|
|
52
|
+
from collections.abc import Coroutine
|
|
53
|
+
from collections.abc import Iterator
|
|
54
|
+
from contextlib import contextmanager
|
|
55
|
+
from contextvars import ContextVar
|
|
56
|
+
from typing import TYPE_CHECKING
|
|
57
|
+
from typing import Any
|
|
58
|
+
from typing import Literal
|
|
59
|
+
|
|
60
|
+
if TYPE_CHECKING:
|
|
61
|
+
import httpx
|
|
62
|
+
|
|
63
|
+
from pydantic import Field
|
|
64
|
+
|
|
65
|
+
from nat.builder.builder import Builder
|
|
66
|
+
from nat.builder.llm import LLMProviderInfo
|
|
67
|
+
from nat.cli.register_workflow import register_llm_provider
|
|
68
|
+
from nat.data_models.optimizable import OptimizableField
|
|
69
|
+
from nat.data_models.optimizable import SearchSpace
|
|
70
|
+
from nat.llm.openai_llm import OpenAIModelConfig
|
|
71
|
+
|
|
72
|
+
logger = logging.getLogger(__name__)
|
|
73
|
+
|
|
74
|
+
# Define valid prefix hint values
|
|
75
|
+
PrefixLevel = Literal["LOW", "MEDIUM", "HIGH"]
|
|
76
|
+
|
|
77
|
+
# =============================================================================
|
|
78
|
+
# CONTEXT MANAGEMENT FOR DYNAMO PREFIX ID
|
|
79
|
+
# =============================================================================
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class DynamoPrefixContext:
|
|
83
|
+
"""
|
|
84
|
+
Singleton class for managing Dynamo prefix IDs across LLM calls.
|
|
85
|
+
|
|
86
|
+
This allows evaluation code to set a prefix ID that persists across all LLM
|
|
87
|
+
calls for a single evaluation question (multi-turn conversation).
|
|
88
|
+
|
|
89
|
+
Usage::
|
|
90
|
+
|
|
91
|
+
from nat.llm.dynamo_llm import DynamoPrefixContext
|
|
92
|
+
|
|
93
|
+
# Set prefix ID at the start of each evaluation question
|
|
94
|
+
DynamoPrefixContext.set("eval-q001-abc123")
|
|
95
|
+
|
|
96
|
+
# ... perform LLM calls ...
|
|
97
|
+
|
|
98
|
+
# Clear when done
|
|
99
|
+
DynamoPrefixContext.clear()
|
|
100
|
+
|
|
101
|
+
# Or use as a context manager
|
|
102
|
+
with DynamoPrefixContext.scope("eval-q001-abc123"):
|
|
103
|
+
# ... perform LLM calls ...
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
_current_prefix_id: ContextVar[str | None] = ContextVar('dynamo_prefix_id', default=None)
|
|
107
|
+
|
|
108
|
+
@classmethod
|
|
109
|
+
def set(cls, prefix_id: str) -> None:
|
|
110
|
+
"""
|
|
111
|
+
Set the Dynamo prefix ID for the current context.
|
|
112
|
+
|
|
113
|
+
Call this at the start of each evaluation question to ensure all LLM calls
|
|
114
|
+
for that question share the same prefix ID (enabling KV cache reuse).
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
prefix_id: The unique prefix ID (e.g., "eval-q001-abc123")
|
|
118
|
+
"""
|
|
119
|
+
cls._current_prefix_id.set(prefix_id)
|
|
120
|
+
logger.debug("Set Dynamo prefix ID: %s", prefix_id)
|
|
121
|
+
|
|
122
|
+
@classmethod
|
|
123
|
+
def clear(cls) -> None:
|
|
124
|
+
"""Clear the current Dynamo prefix ID context."""
|
|
125
|
+
cls._current_prefix_id.set(None)
|
|
126
|
+
logger.debug("Cleared Dynamo prefix ID")
|
|
127
|
+
|
|
128
|
+
@classmethod
|
|
129
|
+
def get(cls) -> str | None:
|
|
130
|
+
"""Get the current Dynamo prefix ID from context, if any."""
|
|
131
|
+
return cls._current_prefix_id.get()
|
|
132
|
+
|
|
133
|
+
@classmethod
|
|
134
|
+
@contextmanager
|
|
135
|
+
def scope(cls, prefix_id: str) -> Iterator[None]:
|
|
136
|
+
"""
|
|
137
|
+
Context manager for scoped prefix ID usage.
|
|
138
|
+
|
|
139
|
+
Automatically sets the prefix ID on entry and clears it on exit,
|
|
140
|
+
ensuring proper cleanup even if exceptions occur.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
prefix_id: The unique prefix ID for this scope
|
|
144
|
+
|
|
145
|
+
Yields:
|
|
146
|
+
None
|
|
147
|
+
|
|
148
|
+
Usage:
|
|
149
|
+
with DynamoPrefixContext.scope("eval-q001"):
|
|
150
|
+
# All LLM calls here will use "eval-q001" prefix
|
|
151
|
+
await llm.ainvoke(...)
|
|
152
|
+
"""
|
|
153
|
+
cls.set(prefix_id)
|
|
154
|
+
try:
|
|
155
|
+
yield
|
|
156
|
+
finally:
|
|
157
|
+
cls.clear()
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
# =============================================================================
|
|
161
|
+
# DYNAMO MODEL CONFIGURATION
|
|
162
|
+
# =============================================================================
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
class DynamoModelConfig(OpenAIModelConfig, name="dynamo"):
|
|
166
|
+
"""
|
|
167
|
+
A Dynamo LLM provider with automatic prefix header injection for KV cache optimization.
|
|
168
|
+
|
|
169
|
+
This is a specialized OpenAI-compatible LLM that sends Dynamo prefix headers
|
|
170
|
+
for optimal KV cache management and request routing. Prefix headers are enabled
|
|
171
|
+
by default using the template "nat-dynamo-{uuid}". The prefix routing parameters
|
|
172
|
+
(prefix_total_requests, prefix_osl, prefix_iat) are optimizable via the NAT optimizer.
|
|
173
|
+
|
|
174
|
+
To disable prefix headers, set prefix_template to null/None in your config.
|
|
175
|
+
"""
|
|
176
|
+
|
|
177
|
+
# =========================================================================
|
|
178
|
+
# DYNAMO PREFIX PARAMETERS
|
|
179
|
+
# =========================================================================
|
|
180
|
+
|
|
181
|
+
prefix_template: str | None = Field(
|
|
182
|
+
default="nat-dynamo-{uuid}",
|
|
183
|
+
description="Template for prefix ID. The {uuid} placeholder will be replaced with a unique ID. "
|
|
184
|
+
"Prefix headers are sent by default for KV cache optimization. "
|
|
185
|
+
"Set to null/None to disable prefix header injection.",
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
prefix_total_requests: int = OptimizableField(
|
|
189
|
+
default=10,
|
|
190
|
+
ge=1,
|
|
191
|
+
le=50,
|
|
192
|
+
description=("Expected number of requests for this conversation/prefix. "
|
|
193
|
+
"Higher values increase worker stickiness and KV cache locality. "
|
|
194
|
+
"Lower values allow more load balancing across workers."),
|
|
195
|
+
space=SearchSpace(low=1, high=20, step=5))
|
|
196
|
+
|
|
197
|
+
prefix_osl: PrefixLevel = OptimizableField(default="MEDIUM",
|
|
198
|
+
description=("Output Sequence Length hint for the Dynamo router. "
|
|
199
|
+
"LOW=short responses (decode_cost=1.0), "
|
|
200
|
+
"MEDIUM=typical (decode_cost=2.0), "
|
|
201
|
+
"HIGH=long responses (decode_cost=3.0)."),
|
|
202
|
+
space=SearchSpace(values=["LOW", "MEDIUM", "HIGH"]))
|
|
203
|
+
|
|
204
|
+
prefix_iat: PrefixLevel = OptimizableField(default="MEDIUM",
|
|
205
|
+
description=("Inter-Arrival Time hint for the Dynamo router. "
|
|
206
|
+
"LOW=rapid bursts (iat_factor=1.5, high stickiness), "
|
|
207
|
+
"MEDIUM=normal (iat_factor=1.0), "
|
|
208
|
+
"HIGH=slow requests (iat_factor=0.6, more exploration)."),
|
|
209
|
+
space=SearchSpace(values=["LOW", "MEDIUM", "HIGH"]))
|
|
210
|
+
|
|
211
|
+
request_timeout: float = Field(
|
|
212
|
+
default=600.0,
|
|
213
|
+
gt=0.0,
|
|
214
|
+
description="HTTP request timeout in seconds for LLM requests.",
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
# =========================================================================
|
|
218
|
+
# UTILITY METHODS
|
|
219
|
+
# =========================================================================
|
|
220
|
+
|
|
221
|
+
@staticmethod
|
|
222
|
+
def get_dynamo_field_names() -> frozenset[str]:
|
|
223
|
+
"""
|
|
224
|
+
Get the set of Dynamo-specific field names for model_dump exclusion.
|
|
225
|
+
|
|
226
|
+
Use this when building config dicts for framework clients to exclude
|
|
227
|
+
Dynamo-specific parameters that should not be passed to the underlying client.
|
|
228
|
+
|
|
229
|
+
Returns:
|
|
230
|
+
A frozenset of Dynamo-specific field names.
|
|
231
|
+
|
|
232
|
+
Example::
|
|
233
|
+
|
|
234
|
+
config_dict = config.model_dump(
|
|
235
|
+
exclude={"type", "thinking", *DynamoModelConfig.get_dynamo_field_names()},
|
|
236
|
+
...
|
|
237
|
+
)
|
|
238
|
+
"""
|
|
239
|
+
return frozenset({
|
|
240
|
+
"prefix_template",
|
|
241
|
+
"prefix_total_requests",
|
|
242
|
+
"prefix_osl",
|
|
243
|
+
"prefix_iat",
|
|
244
|
+
"request_timeout",
|
|
245
|
+
})
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
# =============================================================================
|
|
249
|
+
# HTTPX EVENT HOOK FOR HEADER INJECTION
|
|
250
|
+
# =============================================================================
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def _create_dynamo_request_hook(
|
|
254
|
+
prefix_template: str | None,
|
|
255
|
+
total_requests: int,
|
|
256
|
+
osl: str,
|
|
257
|
+
iat: str,
|
|
258
|
+
) -> Callable[["httpx.Request"], Coroutine[Any, Any, None]]:
|
|
259
|
+
"""
|
|
260
|
+
Create an httpx event hook that injects Dynamo prefix headers into requests.
|
|
261
|
+
|
|
262
|
+
This hook is called before each HTTP request is sent, allowing us to inject
|
|
263
|
+
headers dynamically. The prefix ID is generated ONCE when the hook is created,
|
|
264
|
+
ensuring all requests from the same client share the same prefix ID. This enables
|
|
265
|
+
Dynamo's KV cache optimization across multi-turn conversations.
|
|
266
|
+
|
|
267
|
+
The context variable can override this for scenarios where you need different
|
|
268
|
+
prefix IDs (e.g., per-question in batch evaluation).
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
prefix_template: Template string with {uuid} placeholder
|
|
272
|
+
total_requests: Expected number of requests for this prefix
|
|
273
|
+
osl: Output sequence length hint (LOW/MEDIUM/HIGH)
|
|
274
|
+
iat: Inter-arrival time hint (LOW/MEDIUM/HIGH)
|
|
275
|
+
|
|
276
|
+
Returns:
|
|
277
|
+
An async function suitable for use as an httpx event hook.
|
|
278
|
+
"""
|
|
279
|
+
# Generate the default prefix ID ONCE when the hook is created
|
|
280
|
+
# This ensures all requests from this client share the same prefix ID
|
|
281
|
+
unique_id = uuid.uuid4().hex[:16]
|
|
282
|
+
if prefix_template:
|
|
283
|
+
default_prefix_id = prefix_template.format(uuid=unique_id)
|
|
284
|
+
else:
|
|
285
|
+
default_prefix_id = f"nat-dynamo-{unique_id}"
|
|
286
|
+
|
|
287
|
+
logger.debug("Created Dynamo request hook with default prefix ID: %s", default_prefix_id)
|
|
288
|
+
|
|
289
|
+
async def on_request(request):
|
|
290
|
+
"""Inject Dynamo prefix headers before each request."""
|
|
291
|
+
# Check context variable first (allows per-question override in batch evaluation)
|
|
292
|
+
context_prefix_id = DynamoPrefixContext.get()
|
|
293
|
+
|
|
294
|
+
if context_prefix_id:
|
|
295
|
+
prefix_id = context_prefix_id
|
|
296
|
+
logger.debug("Using context prefix ID: %s", prefix_id)
|
|
297
|
+
else:
|
|
298
|
+
# Use the pre-generated prefix ID (same for all requests from this client)
|
|
299
|
+
prefix_id = default_prefix_id
|
|
300
|
+
logger.debug("Using default prefix ID: %s", prefix_id)
|
|
301
|
+
|
|
302
|
+
# Inject Dynamo headers
|
|
303
|
+
request.headers["x-prefix-id"] = prefix_id
|
|
304
|
+
request.headers["x-prefix-total-requests"] = str(total_requests)
|
|
305
|
+
request.headers["x-prefix-osl"] = osl.upper()
|
|
306
|
+
request.headers["x-prefix-iat"] = iat.upper()
|
|
307
|
+
|
|
308
|
+
logger.debug("Injected Dynamo headers: prefix_id=%s, total_requests=%d, osl=%s, iat=%s",
|
|
309
|
+
prefix_id,
|
|
310
|
+
total_requests,
|
|
311
|
+
osl.upper(),
|
|
312
|
+
iat.upper())
|
|
313
|
+
|
|
314
|
+
return on_request
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def create_httpx_client_with_dynamo_hooks(
|
|
318
|
+
prefix_template: str | None,
|
|
319
|
+
total_requests: int,
|
|
320
|
+
osl: str,
|
|
321
|
+
iat: str,
|
|
322
|
+
timeout: float = 600.0,
|
|
323
|
+
) -> "httpx.AsyncClient":
|
|
324
|
+
"""
|
|
325
|
+
Create an httpx.AsyncClient with Dynamo prefix header injection.
|
|
326
|
+
|
|
327
|
+
This client can be passed to the OpenAI SDK to inject headers at the HTTP level,
|
|
328
|
+
making it framework-agnostic.
|
|
329
|
+
|
|
330
|
+
Args:
|
|
331
|
+
prefix_template: Template string with {uuid} placeholder
|
|
332
|
+
total_requests: Expected number of requests for this prefix
|
|
333
|
+
osl: Output sequence length hint (LOW/MEDIUM/HIGH)
|
|
334
|
+
iat: Inter-arrival time hint (LOW/MEDIUM/HIGH)
|
|
335
|
+
timeout: HTTP request timeout in seconds
|
|
336
|
+
|
|
337
|
+
Returns:
|
|
338
|
+
An httpx.AsyncClient configured with Dynamo header injection.
|
|
339
|
+
"""
|
|
340
|
+
import httpx
|
|
341
|
+
|
|
342
|
+
request_hook = _create_dynamo_request_hook(prefix_template, total_requests, osl, iat)
|
|
343
|
+
|
|
344
|
+
return httpx.AsyncClient(
|
|
345
|
+
event_hooks={"request": [request_hook]},
|
|
346
|
+
timeout=httpx.Timeout(timeout),
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
# =============================================================================
|
|
351
|
+
# PROVIDER REGISTRATION
|
|
352
|
+
# =============================================================================
|
|
353
|
+
# Note: Client registrations for each framework (LangChain, LlamaIndex, etc.)
|
|
354
|
+
# are in the respective plugin packages under packages/nvidia_nat_<framework>/
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
@register_llm_provider(config_type=DynamoModelConfig)
|
|
358
|
+
async def dynamo_llm(config: DynamoModelConfig, _builder: Builder):
|
|
359
|
+
"""Register the Dynamo LLM provider."""
|
|
360
|
+
yield LLMProviderInfo(
|
|
361
|
+
config=config,
|
|
362
|
+
description="A Dynamo-optimized model with automatic prefix headers for KV cache management.",
|
|
363
|
+
)
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
"""HuggingFace Transformers LLM Provider - Local in-process model execution."""
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
from collections.abc import AsyncIterator
|
|
19
|
+
from dataclasses import dataclass
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
from pydantic import Field
|
|
23
|
+
|
|
24
|
+
from nat.builder.builder import Builder
|
|
25
|
+
from nat.builder.llm import LLMProviderInfo
|
|
26
|
+
from nat.cli.register_workflow import register_llm_provider
|
|
27
|
+
from nat.data_models.llm import LLMBaseConfig
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class ModelCacheEntry:
|
|
34
|
+
model: Any
|
|
35
|
+
tokenizer: Any
|
|
36
|
+
torch: Any
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class ModelCache:
|
|
40
|
+
"""Singleton cache for loaded HuggingFace models.
|
|
41
|
+
|
|
42
|
+
Models remain cached for the provider's lifetime (not per-query!) to enable fast reuse:
|
|
43
|
+
- During nat serve: Cached while server runs, cleaned up on shutdown
|
|
44
|
+
- During nat red-team: Cached across all evaluation queries, cleaned up when complete
|
|
45
|
+
- During nat run: Cached for single workflow execution, cleaned up when done
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
_instance: "ModelCache | None" = None
|
|
49
|
+
_cache: dict[str, ModelCacheEntry]
|
|
50
|
+
|
|
51
|
+
def __new__(cls) -> "ModelCache":
|
|
52
|
+
if cls._instance is None:
|
|
53
|
+
cls._instance = super().__new__(cls)
|
|
54
|
+
cls._instance._cache = {}
|
|
55
|
+
return cls._instance
|
|
56
|
+
|
|
57
|
+
def get(self, model_name: str) -> ModelCacheEntry | None:
|
|
58
|
+
"""Return cached model data or None if not loaded."""
|
|
59
|
+
return self._cache.get(model_name)
|
|
60
|
+
|
|
61
|
+
def set(self, model_name: str, data: ModelCacheEntry) -> None:
|
|
62
|
+
"""Cache model data."""
|
|
63
|
+
self._cache[model_name] = data
|
|
64
|
+
|
|
65
|
+
def remove(self, model_name: str) -> None:
|
|
66
|
+
"""Remove model from cache."""
|
|
67
|
+
self._cache.pop(model_name, None)
|
|
68
|
+
|
|
69
|
+
def __contains__(self, model_name: str) -> bool:
|
|
70
|
+
"""Check if model is cached."""
|
|
71
|
+
return model_name in self._cache
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class HuggingFaceConfig(LLMBaseConfig, name="huggingface"):
|
|
75
|
+
"""Configuration for HuggingFace LLM - loads model directly for local execution."""
|
|
76
|
+
|
|
77
|
+
model_name: str = Field(description="HuggingFace model name (e.g. 'Qwen/Qwen3Guard-Gen-0.6B')")
|
|
78
|
+
|
|
79
|
+
device: str = Field(default="auto", description="Device: 'cpu', 'cuda', 'cuda:0', or 'auto'")
|
|
80
|
+
|
|
81
|
+
dtype: str | None = Field(default="auto", description="Torch dtype: 'float16', 'bfloat16', 'float32', or 'auto'")
|
|
82
|
+
|
|
83
|
+
max_new_tokens: int = Field(default=128, description="Maximum number of new tokens to generate")
|
|
84
|
+
|
|
85
|
+
temperature: float = Field(default=0.0,
|
|
86
|
+
description="Sampling temperature (0 = deterministic greedy, > 0 = sampling enabled)")
|
|
87
|
+
|
|
88
|
+
trust_remote_code: bool = Field(default=False, description="Trust remote code when loading model")
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def get_cached_model(model_name: str) -> ModelCacheEntry | None:
|
|
92
|
+
"""Return cached model data (model, tokenizer, torch) or None if not loaded."""
|
|
93
|
+
return ModelCache().get(model_name)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
async def _cleanup_model(model_name: str) -> None:
|
|
97
|
+
"""Clean up a loaded model and free GPU memory.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
model_name: Name of the model to clean up.
|
|
101
|
+
"""
|
|
102
|
+
try:
|
|
103
|
+
cache = ModelCache()
|
|
104
|
+
cached = cache.get(model_name)
|
|
105
|
+
|
|
106
|
+
if cached is not None:
|
|
107
|
+
# Move model to CPU to free GPU memory
|
|
108
|
+
if cached.model:
|
|
109
|
+
cached.model.to("cpu")
|
|
110
|
+
cached.model = None
|
|
111
|
+
|
|
112
|
+
# Clear CUDA cache if available
|
|
113
|
+
if cached.torch and hasattr(cached.torch.cuda, "empty_cache"):
|
|
114
|
+
cached.torch.cuda.empty_cache()
|
|
115
|
+
cached.torch = None
|
|
116
|
+
|
|
117
|
+
cached.tokenizer = None
|
|
118
|
+
|
|
119
|
+
# Remove from cache
|
|
120
|
+
cache.remove(model_name)
|
|
121
|
+
|
|
122
|
+
logger.debug("Model cleaned up: %s", model_name)
|
|
123
|
+
except Exception:
|
|
124
|
+
logger.exception("Error cleaning up HuggingFace model '%s'", model_name)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
@register_llm_provider(config_type=HuggingFaceConfig)
|
|
128
|
+
async def huggingface_provider(
|
|
129
|
+
config: HuggingFaceConfig,
|
|
130
|
+
builder: Builder, # noqa: ARG001 - kept for provider interface, currently unused
|
|
131
|
+
) -> AsyncIterator[LLMProviderInfo]:
|
|
132
|
+
"""HuggingFace model provider - loads models locally for in-process execution.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
config: Configuration for the HuggingFace model.
|
|
136
|
+
builder: The NAT builder instance.
|
|
137
|
+
|
|
138
|
+
Yields:
|
|
139
|
+
LLMProviderInfo: Provider information for the loaded model.
|
|
140
|
+
"""
|
|
141
|
+
try:
|
|
142
|
+
import torch
|
|
143
|
+
from transformers import AutoModelForCausalLM
|
|
144
|
+
from transformers import AutoTokenizer
|
|
145
|
+
except ImportError:
|
|
146
|
+
raise ValueError("HuggingFace dependencies not installed. \n"
|
|
147
|
+
"Install with:\n"
|
|
148
|
+
" `pip install nvidia-nat[huggingface]` (package) or\n"
|
|
149
|
+
" `uv pip install -e '.[huggingface]'` (source)\n")
|
|
150
|
+
|
|
151
|
+
cache = ModelCache()
|
|
152
|
+
|
|
153
|
+
# Load model if not cached
|
|
154
|
+
if config.model_name not in cache:
|
|
155
|
+
logger.debug("Loading model from HuggingFace: %s", config.model_name)
|
|
156
|
+
|
|
157
|
+
# Load tokenizer
|
|
158
|
+
tokenizer = AutoTokenizer.from_pretrained(config.model_name, trust_remote_code=config.trust_remote_code)
|
|
159
|
+
|
|
160
|
+
# Load model
|
|
161
|
+
model = AutoModelForCausalLM.from_pretrained(config.model_name,
|
|
162
|
+
dtype=config.dtype,
|
|
163
|
+
device_map=config.device,
|
|
164
|
+
trust_remote_code=config.trust_remote_code)
|
|
165
|
+
|
|
166
|
+
# Cache it
|
|
167
|
+
cache.set(config.model_name, ModelCacheEntry(model=model, tokenizer=tokenizer, torch=torch))
|
|
168
|
+
|
|
169
|
+
logger.debug("Model loaded: %s on device: %s", config.model_name, config.device)
|
|
170
|
+
else:
|
|
171
|
+
logger.debug("Using cached model: %s", config.model_name)
|
|
172
|
+
|
|
173
|
+
try:
|
|
174
|
+
yield LLMProviderInfo(config=config, description=f"HuggingFace model: {config.model_name}")
|
|
175
|
+
finally:
|
|
176
|
+
# Cleanup when workflow/application shuts down
|
|
177
|
+
await _cleanup_model(config.model_name)
|
nat/llm/litellm_llm.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/llm/nim_llm.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/llm/openai_llm.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/llm/register.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -22,6 +22,8 @@ This module is imported by the NeMo Agent Toolkit runtime to ensure providers ar
|
|
|
22
22
|
# Import any providers which need to be automatically registered here
|
|
23
23
|
from . import aws_bedrock_llm
|
|
24
24
|
from . import azure_openai_llm
|
|
25
|
+
from . import dynamo_llm
|
|
26
|
+
from . import huggingface_llm
|
|
25
27
|
from . import litellm_llm
|
|
26
28
|
from . import nim_llm
|
|
27
29
|
from . import openai_llm
|
nat/llm/utils/__init__.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/llm/utils/error.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/llm/utils/thinking.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/memory/__init__.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/memory/interfaces.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/memory/models.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/meta/pypi.md
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
<!--
|
|
2
|
-
SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
2
|
+
SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
3
|
SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
5
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
"""Middleware implementations for NeMo Agent Toolkit."""
|
|
16
|
+
|
|
17
|
+
from nat.middleware.function_middleware import FunctionMiddleware
|
|
18
|
+
from nat.middleware.function_middleware import FunctionMiddlewareChain
|
|
19
|
+
from nat.middleware.function_middleware import validate_middleware
|
|
20
|
+
from nat.middleware.middleware import CallNext
|
|
21
|
+
from nat.middleware.middleware import CallNextStream
|
|
22
|
+
from nat.middleware.middleware import FunctionMiddlewareContext
|
|
23
|
+
from nat.middleware.middleware import Middleware
|
|
24
|
+
from nat.middleware.red_teaming.red_teaming_middleware import RedTeamingMiddleware
|
|
25
|
+
|
|
26
|
+
__all__ = [
|
|
27
|
+
"CallNext",
|
|
28
|
+
"CallNextStream",
|
|
29
|
+
"FunctionMiddleware",
|
|
30
|
+
"FunctionMiddlewareChain",
|
|
31
|
+
"FunctionMiddlewareContext",
|
|
32
|
+
"Middleware",
|
|
33
|
+
"RedTeamingMiddleware",
|
|
34
|
+
"validate_middleware",
|
|
35
|
+
]
|