nvidia-nat 1.4.0a20251120__py3-none-any.whl → 1.4.0a20260113__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiq/__init__.py +1 -1
- nat/{front_ends/mcp → agent/auto_memory_wrapper}/__init__.py +1 -1
- nat/agent/auto_memory_wrapper/agent.py +278 -0
- nat/agent/auto_memory_wrapper/register.py +227 -0
- nat/agent/auto_memory_wrapper/state.py +30 -0
- nat/agent/base.py +1 -1
- nat/agent/dual_node.py +1 -1
- nat/agent/prompt_optimizer/prompt.py +1 -1
- nat/agent/prompt_optimizer/register.py +1 -1
- nat/agent/react_agent/agent.py +16 -9
- nat/agent/react_agent/output_parser.py +2 -2
- nat/agent/react_agent/prompt.py +3 -2
- nat/agent/react_agent/register.py +2 -2
- nat/agent/react_agent/register_per_user_agent.py +104 -0
- nat/agent/reasoning_agent/reasoning_agent.py +1 -1
- nat/agent/register.py +3 -1
- nat/agent/responses_api_agent/__init__.py +1 -1
- nat/agent/responses_api_agent/register.py +1 -1
- nat/agent/rewoo_agent/agent.py +9 -4
- nat/agent/rewoo_agent/prompt.py +1 -1
- nat/agent/rewoo_agent/register.py +1 -1
- nat/agent/tool_calling_agent/agent.py +5 -4
- nat/agent/tool_calling_agent/register.py +1 -1
- nat/authentication/__init__.py +1 -1
- nat/authentication/api_key/__init__.py +1 -1
- nat/authentication/api_key/api_key_auth_provider.py +1 -1
- nat/authentication/api_key/api_key_auth_provider_config.py +22 -7
- nat/authentication/api_key/register.py +1 -1
- nat/authentication/credential_validator/__init__.py +1 -1
- nat/authentication/credential_validator/bearer_token_validator.py +1 -1
- nat/authentication/exceptions/__init__.py +1 -1
- nat/authentication/exceptions/api_key_exceptions.py +1 -1
- nat/authentication/http_basic_auth/http_basic_auth_provider.py +1 -1
- nat/authentication/http_basic_auth/register.py +1 -1
- nat/authentication/interfaces.py +1 -1
- nat/authentication/oauth2/__init__.py +1 -1
- nat/authentication/oauth2/oauth2_auth_code_flow_provider.py +1 -1
- nat/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +1 -1
- nat/authentication/oauth2/oauth2_resource_server_config.py +1 -1
- nat/authentication/oauth2/register.py +1 -1
- nat/authentication/register.py +1 -1
- nat/builder/builder.py +511 -1
- nat/builder/child_builder.py +385 -0
- nat/builder/component_utils.py +28 -4
- nat/builder/context.py +17 -1
- nat/builder/embedder.py +1 -1
- nat/builder/eval_builder.py +19 -7
- nat/builder/evaluator.py +1 -1
- nat/builder/framework_enum.py +2 -1
- nat/builder/front_end.py +1 -1
- nat/builder/function.py +40 -3
- nat/builder/function_base.py +1 -1
- nat/builder/function_info.py +1 -1
- nat/builder/intermediate_step_manager.py +1 -1
- nat/builder/llm.py +1 -1
- nat/builder/per_user_workflow_builder.py +843 -0
- nat/builder/retriever.py +1 -1
- nat/builder/sync_builder.py +571 -0
- nat/builder/user_interaction_manager.py +1 -1
- nat/builder/workflow.py +1 -1
- nat/builder/workflow_builder.py +536 -424
- nat/cli/__init__.py +1 -1
- nat/cli/cli_utils/config_override.py +1 -1
- nat/cli/cli_utils/validation.py +32 -1
- nat/cli/commands/configure/channel/add.py +1 -1
- nat/cli/commands/configure/channel/channel.py +1 -1
- nat/cli/commands/configure/channel/remove.py +1 -1
- nat/cli/commands/configure/channel/update.py +1 -1
- nat/cli/commands/configure/configure.py +1 -1
- nat/cli/commands/evaluate.py +87 -13
- nat/cli/commands/finetune.py +132 -0
- nat/cli/commands/info/__init__.py +1 -1
- nat/cli/commands/info/info.py +1 -1
- nat/cli/commands/info/list_channels.py +1 -1
- nat/cli/commands/info/list_components.py +1 -1
- nat/cli/commands/object_store/__init__.py +1 -1
- nat/cli/commands/object_store/object_store.py +1 -1
- nat/cli/commands/optimize.py +1 -1
- nat/cli/commands/{mcp → red_teaming}/__init__.py +1 -1
- nat/cli/commands/red_teaming/red_teaming.py +138 -0
- nat/cli/commands/red_teaming/red_teaming_utils.py +73 -0
- nat/cli/commands/registry/__init__.py +1 -1
- nat/cli/commands/registry/publish.py +1 -1
- nat/cli/commands/registry/pull.py +1 -1
- nat/cli/commands/registry/registry.py +1 -1
- nat/cli/commands/registry/remove.py +1 -1
- nat/cli/commands/registry/search.py +1 -1
- nat/cli/commands/sizing/__init__.py +1 -1
- nat/cli/commands/sizing/calc.py +1 -1
- nat/cli/commands/sizing/sizing.py +1 -1
- nat/cli/commands/start.py +1 -1
- nat/cli/commands/uninstall.py +1 -1
- nat/cli/commands/validate.py +1 -1
- nat/cli/commands/workflow/__init__.py +1 -1
- nat/cli/commands/workflow/workflow.py +1 -1
- nat/cli/commands/workflow/workflow_commands.py +3 -2
- nat/cli/entrypoint.py +15 -37
- nat/cli/main.py +2 -2
- nat/cli/plugin_loader.py +69 -0
- nat/cli/register_workflow.py +183 -5
- nat/cli/type_registry.py +169 -3
- nat/control_flow/register.py +1 -1
- nat/control_flow/router_agent/agent.py +1 -1
- nat/control_flow/router_agent/prompt.py +1 -1
- nat/control_flow/router_agent/register.py +1 -1
- nat/control_flow/sequential_executor.py +28 -7
- nat/data_models/__init__.py +1 -1
- nat/data_models/agent.py +1 -1
- nat/data_models/api_server.py +38 -3
- nat/data_models/authentication.py +1 -1
- nat/data_models/common.py +1 -1
- nat/data_models/component.py +7 -1
- nat/data_models/component_ref.py +34 -1
- nat/data_models/config.py +62 -1
- nat/data_models/dataset_handler.py +15 -2
- nat/data_models/discovery_metadata.py +1 -1
- nat/data_models/embedder.py +1 -1
- nat/data_models/evaluate.py +6 -1
- nat/data_models/evaluator.py +1 -1
- nat/data_models/finetuning.py +260 -0
- nat/data_models/front_end.py +1 -1
- nat/data_models/function.py +1 -1
- nat/data_models/function_dependencies.py +1 -1
- nat/data_models/gated_field_mixin.py +1 -1
- nat/data_models/interactive.py +1 -1
- nat/data_models/intermediate_step.py +29 -2
- nat/data_models/invocation_node.py +1 -1
- nat/data_models/llm.py +1 -1
- nat/data_models/logging.py +1 -1
- nat/data_models/memory.py +1 -1
- nat/data_models/middleware.py +3 -1
- nat/data_models/object_store.py +1 -1
- nat/data_models/openai_mcp.py +1 -1
- nat/data_models/optimizable.py +1 -1
- nat/data_models/optimizer.py +1 -1
- nat/data_models/profiler.py +1 -1
- nat/data_models/registry_handler.py +1 -1
- nat/data_models/retriever.py +1 -1
- nat/data_models/retry_mixin.py +1 -1
- nat/data_models/runtime_enum.py +1 -1
- nat/data_models/span.py +1 -1
- nat/data_models/step_adaptor.py +1 -1
- nat/data_models/streaming.py +1 -1
- nat/data_models/swe_bench_model.py +1 -1
- nat/data_models/telemetry_exporter.py +1 -1
- nat/data_models/thinking_mixin.py +1 -1
- nat/data_models/ttc_strategy.py +1 -1
- nat/embedder/azure_openai_embedder.py +1 -1
- nat/embedder/nim_embedder.py +1 -1
- nat/embedder/openai_embedder.py +1 -1
- nat/embedder/register.py +1 -1
- nat/eval/__init__.py +1 -1
- nat/eval/config.py +8 -1
- nat/eval/dataset_handler/dataset_downloader.py +1 -1
- nat/eval/dataset_handler/dataset_filter.py +1 -1
- nat/eval/dataset_handler/dataset_handler.py +4 -2
- nat/eval/evaluate.py +217 -80
- nat/eval/evaluator/__init__.py +1 -1
- nat/eval/evaluator/base_evaluator.py +2 -2
- nat/eval/evaluator/evaluator_model.py +3 -2
- nat/eval/intermediate_step_adapter.py +1 -1
- nat/eval/llm_validator.py +336 -0
- nat/eval/rag_evaluator/evaluate.py +17 -10
- nat/eval/rag_evaluator/register.py +1 -1
- nat/eval/red_teaming_evaluator/__init__.py +14 -0
- nat/eval/red_teaming_evaluator/data_models.py +66 -0
- nat/eval/red_teaming_evaluator/evaluate.py +327 -0
- nat/eval/red_teaming_evaluator/filter_conditions.py +75 -0
- nat/eval/red_teaming_evaluator/register.py +55 -0
- nat/eval/register.py +2 -1
- nat/eval/remote_workflow.py +1 -1
- nat/eval/runners/__init__.py +1 -1
- nat/eval/runners/config.py +1 -1
- nat/eval/runners/multi_eval_runner.py +1 -1
- nat/eval/runners/red_teaming_runner/__init__.py +24 -0
- nat/eval/runners/red_teaming_runner/config.py +282 -0
- nat/eval/runners/red_teaming_runner/report_utils.py +707 -0
- nat/eval/runners/red_teaming_runner/runner.py +867 -0
- nat/eval/runtime_evaluator/__init__.py +1 -1
- nat/eval/runtime_evaluator/evaluate.py +1 -1
- nat/eval/runtime_evaluator/register.py +1 -1
- nat/eval/runtime_event_subscriber.py +1 -1
- nat/eval/swe_bench_evaluator/evaluate.py +1 -1
- nat/eval/swe_bench_evaluator/register.py +1 -1
- nat/eval/trajectory_evaluator/evaluate.py +2 -2
- nat/eval/trajectory_evaluator/register.py +1 -1
- nat/eval/tunable_rag_evaluator/evaluate.py +5 -5
- nat/eval/tunable_rag_evaluator/register.py +1 -1
- nat/eval/usage_stats.py +1 -1
- nat/eval/utils/eval_trace_ctx.py +1 -1
- nat/eval/utils/output_uploader.py +1 -1
- nat/eval/utils/tqdm_position_registry.py +1 -1
- nat/eval/utils/weave_eval.py +1 -1
- nat/experimental/decorators/experimental_warning_decorator.py +1 -1
- nat/experimental/test_time_compute/editing/iterative_plan_refinement_editor.py +1 -1
- nat/experimental/test_time_compute/editing/llm_as_a_judge_editor.py +1 -1
- nat/experimental/test_time_compute/editing/motivation_aware_summarization.py +1 -1
- nat/experimental/test_time_compute/functions/execute_score_select_function.py +1 -1
- nat/experimental/test_time_compute/functions/multi_llm_judge_function.py +88 -0
- nat/experimental/test_time_compute/functions/plan_select_execute_function.py +1 -1
- nat/experimental/test_time_compute/functions/ttc_tool_orchestration_function.py +1 -1
- nat/experimental/test_time_compute/functions/ttc_tool_wrapper_function.py +1 -1
- nat/experimental/test_time_compute/models/editor_config.py +1 -1
- nat/experimental/test_time_compute/models/scoring_config.py +1 -1
- nat/experimental/test_time_compute/models/search_config.py +20 -2
- nat/experimental/test_time_compute/models/selection_config.py +33 -2
- nat/experimental/test_time_compute/models/stage_enums.py +1 -1
- nat/experimental/test_time_compute/models/strategy_base.py +1 -1
- nat/experimental/test_time_compute/models/tool_use_config.py +1 -1
- nat/experimental/test_time_compute/models/ttc_item.py +1 -1
- nat/experimental/test_time_compute/register.py +4 -1
- nat/experimental/test_time_compute/scoring/llm_based_agent_scorer.py +1 -1
- nat/experimental/test_time_compute/scoring/llm_based_plan_scorer.py +1 -1
- nat/experimental/test_time_compute/scoring/motivation_aware_scorer.py +1 -1
- nat/experimental/test_time_compute/search/multi_llm_generation.py +115 -0
- nat/experimental/test_time_compute/search/multi_llm_planner.py +1 -1
- nat/experimental/test_time_compute/search/multi_query_retrieval_search.py +1 -1
- nat/experimental/test_time_compute/search/single_shot_multi_plan_planner.py +1 -1
- nat/experimental/test_time_compute/selection/best_of_n_selector.py +1 -1
- nat/experimental/test_time_compute/selection/llm_based_agent_output_selector.py +1 -1
- nat/experimental/test_time_compute/selection/llm_based_output_merging_selector.py +1 -1
- nat/experimental/test_time_compute/selection/llm_based_plan_selector.py +1 -1
- nat/experimental/test_time_compute/selection/llm_judge_selection.py +127 -0
- nat/experimental/test_time_compute/selection/threshold_selector.py +1 -1
- nat/finetuning/__init__.py +24 -0
- nat/finetuning/finetuning_runtime.py +143 -0
- nat/finetuning/interfaces/__init__.py +24 -0
- nat/finetuning/interfaces/finetuning_runner.py +261 -0
- nat/finetuning/interfaces/trainer_adapter.py +103 -0
- nat/finetuning/interfaces/trajectory_builder.py +115 -0
- nat/finetuning/utils/__init__.py +15 -0
- nat/finetuning/utils/parsers/__init__.py +15 -0
- nat/finetuning/utils/parsers/adk_parser.py +141 -0
- nat/finetuning/utils/parsers/base_parser.py +238 -0
- nat/finetuning/utils/parsers/common.py +91 -0
- nat/finetuning/utils/parsers/langchain_parser.py +267 -0
- nat/finetuning/utils/parsers/llama_index_parser.py +218 -0
- nat/front_ends/__init__.py +1 -1
- nat/front_ends/console/__init__.py +1 -1
- nat/front_ends/console/authentication_flow_handler.py +1 -1
- nat/front_ends/console/console_front_end_config.py +4 -1
- nat/front_ends/console/console_front_end_plugin.py +5 -4
- nat/front_ends/console/register.py +1 -1
- nat/front_ends/cron/__init__.py +1 -1
- nat/front_ends/fastapi/__init__.py +1 -1
- nat/front_ends/fastapi/async_job.py +128 -0
- nat/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +1 -1
- nat/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +13 -9
- nat/front_ends/fastapi/dask_client_mixin.py +1 -1
- nat/front_ends/fastapi/fastapi_front_end_config.py +1 -1
- nat/front_ends/fastapi/fastapi_front_end_controller.py +1 -1
- nat/front_ends/fastapi/fastapi_front_end_plugin.py +25 -30
- nat/front_ends/fastapi/fastapi_front_end_plugin_worker.py +195 -60
- nat/front_ends/fastapi/html_snippets/__init__.py +1 -1
- nat/front_ends/fastapi/html_snippets/auth_code_grant_success.py +1 -1
- nat/front_ends/fastapi/intermediate_steps_subscriber.py +12 -1
- nat/front_ends/fastapi/job_store.py +23 -11
- nat/front_ends/fastapi/main.py +1 -1
- nat/front_ends/fastapi/message_handler.py +27 -4
- nat/front_ends/fastapi/message_validator.py +54 -2
- nat/front_ends/fastapi/register.py +1 -1
- nat/front_ends/fastapi/response_helpers.py +16 -15
- nat/front_ends/fastapi/step_adaptor.py +1 -1
- nat/front_ends/fastapi/utils.py +1 -1
- nat/front_ends/register.py +1 -2
- nat/front_ends/simple_base/__init__.py +1 -1
- nat/front_ends/simple_base/simple_front_end_plugin_base.py +6 -4
- nat/llm/aws_bedrock_llm.py +1 -1
- nat/llm/azure_openai_llm.py +10 -1
- nat/llm/dynamo_llm.py +363 -0
- nat/llm/huggingface_llm.py +177 -0
- nat/llm/litellm_llm.py +1 -1
- nat/llm/nim_llm.py +1 -1
- nat/llm/openai_llm.py +1 -1
- nat/llm/register.py +3 -1
- nat/llm/utils/__init__.py +1 -1
- nat/llm/utils/env_config_value.py +1 -1
- nat/llm/utils/error.py +1 -1
- nat/llm/utils/thinking.py +1 -1
- nat/memory/__init__.py +1 -1
- nat/memory/interfaces.py +1 -1
- nat/memory/models.py +1 -1
- nat/meta/pypi.md +1 -1
- nat/middleware/__init__.py +5 -5
- nat/middleware/cache/__init__.py +14 -0
- nat/middleware/{cache_middleware.py → cache/cache_middleware.py} +39 -42
- nat/middleware/cache/cache_middleware_config.py +44 -0
- nat/middleware/cache/register.py +33 -0
- nat/middleware/defense/__init__.py +14 -0
- nat/middleware/defense/defense_middleware.py +362 -0
- nat/middleware/defense/defense_middleware_content_guard.py +455 -0
- nat/middleware/defense/defense_middleware_data_models.py +91 -0
- nat/middleware/defense/defense_middleware_output_verifier.py +440 -0
- nat/middleware/defense/defense_middleware_pii.py +356 -0
- nat/middleware/defense/register.py +82 -0
- nat/middleware/dynamic/__init__.py +14 -0
- nat/middleware/dynamic/dynamic_function_middleware.py +962 -0
- nat/middleware/dynamic/dynamic_middleware_config.py +132 -0
- nat/middleware/dynamic/register.py +34 -0
- nat/middleware/function_middleware.py +236 -52
- nat/middleware/logging/__init__.py +14 -0
- nat/middleware/logging/logging_middleware.py +67 -0
- nat/middleware/logging/logging_middleware_config.py +28 -0
- nat/middleware/logging/register.py +33 -0
- nat/middleware/middleware.py +142 -28
- nat/middleware/red_teaming/__init__.py +14 -0
- nat/middleware/red_teaming/red_teaming_middleware.py +344 -0
- nat/middleware/red_teaming/red_teaming_middleware_config.py +112 -0
- nat/middleware/red_teaming/register.py +47 -0
- nat/middleware/register.py +7 -20
- nat/middleware/utils/__init__.py +14 -0
- nat/middleware/utils/workflow_inventory.py +155 -0
- nat/object_store/__init__.py +1 -1
- nat/object_store/in_memory_object_store.py +1 -1
- nat/object_store/interfaces.py +1 -1
- nat/object_store/models.py +1 -1
- nat/object_store/register.py +1 -1
- nat/observability/__init__.py +1 -1
- nat/observability/exporter/__init__.py +1 -1
- nat/observability/exporter/base_exporter.py +1 -1
- nat/observability/exporter/exporter.py +1 -1
- nat/observability/exporter/file_exporter.py +1 -1
- nat/observability/exporter/processing_exporter.py +1 -1
- nat/observability/exporter/raw_exporter.py +1 -1
- nat/observability/exporter/span_exporter.py +7 -1
- nat/observability/exporter_manager.py +1 -1
- nat/observability/mixin/__init__.py +1 -1
- nat/observability/mixin/batch_config_mixin.py +1 -1
- nat/observability/mixin/collector_config_mixin.py +1 -1
- nat/observability/mixin/file_mixin.py +1 -1
- nat/observability/mixin/file_mode.py +1 -1
- nat/observability/mixin/redaction_config_mixin.py +1 -1
- nat/observability/mixin/resource_conflict_mixin.py +1 -1
- nat/observability/mixin/serialize_mixin.py +1 -1
- nat/observability/mixin/tagging_config_mixin.py +1 -1
- nat/observability/mixin/type_introspection_mixin.py +1 -1
- nat/observability/processor/__init__.py +1 -1
- nat/observability/processor/batching_processor.py +1 -1
- nat/observability/processor/callback_processor.py +1 -1
- nat/observability/processor/falsy_batch_filter_processor.py +1 -1
- nat/observability/processor/intermediate_step_serializer.py +1 -1
- nat/observability/processor/processor.py +1 -1
- nat/observability/processor/processor_factory.py +1 -1
- nat/observability/processor/redaction/__init__.py +1 -1
- nat/observability/processor/redaction/contextual_redaction_processor.py +1 -1
- nat/observability/processor/redaction/contextual_span_redaction_processor.py +1 -1
- nat/observability/processor/redaction/redaction_processor.py +1 -1
- nat/observability/processor/redaction/span_header_redaction_processor.py +1 -1
- nat/observability/processor/span_tagging_processor.py +1 -1
- nat/observability/register.py +1 -1
- nat/observability/utils/__init__.py +1 -1
- nat/observability/utils/dict_utils.py +1 -1
- nat/observability/utils/time_utils.py +1 -1
- nat/profiler/calc/__init__.py +1 -1
- nat/profiler/calc/calc_runner.py +3 -3
- nat/profiler/calc/calculations.py +1 -1
- nat/profiler/calc/data_models.py +1 -1
- nat/profiler/calc/plot.py +30 -3
- nat/profiler/callbacks/agno_callback_handler.py +1 -1
- nat/profiler/callbacks/base_callback_class.py +1 -1
- nat/profiler/callbacks/langchain_callback_handler.py +33 -3
- nat/profiler/callbacks/llama_index_callback_handler.py +13 -10
- nat/profiler/callbacks/semantic_kernel_callback_handler.py +1 -1
- nat/profiler/callbacks/token_usage_base_model.py +1 -1
- nat/profiler/data_frame_row.py +1 -1
- nat/profiler/data_models.py +1 -1
- nat/profiler/decorators/framework_wrapper.py +16 -1
- nat/profiler/decorators/function_tracking.py +1 -1
- nat/profiler/forecasting/config.py +1 -1
- nat/profiler/forecasting/model_trainer.py +1 -1
- nat/profiler/forecasting/models/__init__.py +1 -1
- nat/profiler/forecasting/models/forecasting_base_model.py +1 -1
- nat/profiler/forecasting/models/linear_model.py +1 -1
- nat/profiler/forecasting/models/random_forest_regressor.py +1 -1
- nat/profiler/inference_metrics_model.py +1 -1
- nat/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +1 -1
- nat/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py +1 -1
- nat/profiler/inference_optimization/data_models.py +1 -1
- nat/profiler/inference_optimization/experimental/concurrency_spike_analysis.py +1 -1
- nat/profiler/inference_optimization/experimental/prefix_span_analysis.py +1 -1
- nat/profiler/inference_optimization/llm_metrics.py +1 -1
- nat/profiler/inference_optimization/prompt_caching.py +1 -1
- nat/profiler/inference_optimization/token_uniqueness.py +1 -1
- nat/profiler/inference_optimization/workflow_runtimes.py +1 -1
- nat/profiler/intermediate_property_adapter.py +1 -1
- nat/profiler/parameter_optimization/optimizable_utils.py +1 -1
- nat/profiler/parameter_optimization/optimizer_runtime.py +1 -1
- nat/profiler/parameter_optimization/parameter_optimizer.py +1 -1
- nat/profiler/parameter_optimization/parameter_selection.py +1 -1
- nat/profiler/parameter_optimization/pareto_visualizer.py +1 -1
- nat/profiler/parameter_optimization/prompt_optimizer.py +1 -1
- nat/profiler/parameter_optimization/update_helpers.py +1 -1
- nat/profiler/profile_runner.py +1 -1
- nat/profiler/utils.py +1 -1
- nat/registry_handlers/local/local_handler.py +1 -1
- nat/registry_handlers/local/register_local.py +1 -1
- nat/registry_handlers/metadata_factory.py +1 -1
- nat/registry_handlers/package_utils.py +1 -1
- nat/registry_handlers/pypi/pypi_handler.py +1 -1
- nat/registry_handlers/pypi/register_pypi.py +1 -1
- nat/registry_handlers/register.py +1 -1
- nat/registry_handlers/registry_handler_base.py +1 -1
- nat/registry_handlers/rest/register_rest.py +1 -1
- nat/registry_handlers/rest/rest_handler.py +1 -1
- nat/registry_handlers/schemas/headers.py +1 -1
- nat/registry_handlers/schemas/package.py +1 -1
- nat/registry_handlers/schemas/publish.py +1 -1
- nat/registry_handlers/schemas/pull.py +1 -1
- nat/registry_handlers/schemas/remove.py +1 -1
- nat/registry_handlers/schemas/search.py +1 -1
- nat/registry_handlers/schemas/status.py +1 -1
- nat/retriever/interface.py +1 -1
- nat/retriever/milvus/__init__.py +1 -1
- nat/retriever/milvus/register.py +1 -1
- nat/retriever/milvus/retriever.py +1 -1
- nat/retriever/models.py +1 -1
- nat/retriever/nemo_retriever/__init__.py +1 -1
- nat/retriever/nemo_retriever/register.py +1 -1
- nat/retriever/nemo_retriever/retriever.py +5 -5
- nat/retriever/register.py +1 -1
- nat/runtime/__init__.py +1 -1
- nat/runtime/loader.py +10 -3
- nat/runtime/metrics.py +180 -0
- nat/runtime/runner.py +1 -5
- nat/runtime/session.py +451 -32
- nat/runtime/user_metadata.py +1 -1
- nat/settings/global_settings.py +1 -1
- nat/tool/chat_completion.py +1 -1
- nat/tool/code_execution/README.md +1 -1
- nat/tool/code_execution/code_sandbox.py +1 -1
- nat/tool/code_execution/local_sandbox/Dockerfile.sandbox +1 -1
- nat/tool/code_execution/local_sandbox/__init__.py +1 -1
- nat/tool/code_execution/local_sandbox/local_sandbox_server.py +1 -1
- nat/tool/code_execution/local_sandbox/start_local_sandbox.sh +1 -1
- nat/tool/code_execution/register.py +1 -1
- nat/tool/code_execution/utils.py +1 -1
- nat/tool/datetime_tools.py +1 -1
- nat/tool/document_search.py +1 -1
- nat/tool/github_tools.py +1 -1
- nat/tool/memory_tools/add_memory_tool.py +1 -1
- nat/tool/memory_tools/delete_memory_tool.py +1 -1
- nat/tool/memory_tools/get_memory_tool.py +1 -1
- nat/tool/nvidia_rag.py +2 -2
- nat/tool/register.py +1 -1
- nat/tool/retriever.py +1 -1
- nat/tool/server_tools.py +1 -1
- nat/utils/__init__.py +8 -5
- nat/utils/callable_utils.py +1 -1
- nat/utils/data_models/schema_validator.py +1 -1
- nat/utils/debugging_utils.py +1 -1
- nat/utils/decorators.py +1 -1
- nat/utils/dump_distro_mapping.py +1 -1
- nat/utils/exception_handlers/automatic_retries.py +3 -3
- nat/utils/exception_handlers/schemas.py +1 -1
- nat/utils/io/model_processing.py +1 -1
- nat/utils/io/supress_logs.py +33 -0
- nat/utils/io/yaml_tools.py +1 -1
- nat/utils/log_levels.py +1 -1
- nat/utils/log_utils.py +13 -1
- nat/utils/metadata_utils.py +1 -1
- nat/utils/optional_imports.py +1 -1
- nat/utils/producer_consumer_queue.py +1 -1
- nat/utils/reactive/base/observable_base.py +1 -1
- nat/utils/reactive/base/observer_base.py +1 -1
- nat/utils/reactive/base/subject_base.py +1 -1
- nat/utils/reactive/observable.py +1 -1
- nat/utils/reactive/observer.py +1 -1
- nat/utils/reactive/subject.py +1 -1
- nat/utils/reactive/subscription.py +1 -1
- nat/utils/responses_api.py +1 -1
- nat/utils/settings/global_settings.py +1 -1
- nat/utils/string_utils.py +1 -1
- nat/utils/type_converter.py +18 -5
- nat/utils/type_utils.py +1 -1
- nat/utils/url_utils.py +1 -1
- {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/METADATA +39 -14
- nvidia_nat-1.4.0a20260113.dist-info/RECORD +547 -0
- nvidia_nat-1.4.0a20260113.dist-info/entry_points.txt +38 -0
- nat/cli/commands/mcp/mcp.py +0 -986
- nat/front_ends/mcp/introspection_token_verifier.py +0 -73
- nat/front_ends/mcp/mcp_front_end_config.py +0 -109
- nat/front_ends/mcp/mcp_front_end_plugin.py +0 -155
- nat/front_ends/mcp/mcp_front_end_plugin_worker.py +0 -388
- nat/front_ends/mcp/memory_profiler.py +0 -320
- nat/front_ends/mcp/register.py +0 -27
- nat/front_ends/mcp/tool_converter.py +0 -321
- nvidia_nat-1.4.0a20251120.dist-info/RECORD +0 -488
- nvidia_nat-1.4.0a20251120.dist-info/entry_points.txt +0 -23
- {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/WHEEL +0 -0
- {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
- {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/licenses/LICENSE.md +0 -0
- {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -27,13 +27,16 @@ from nat.data_models.api_server import ChatResponse
|
|
|
27
27
|
from nat.data_models.api_server import ChatResponseChunk
|
|
28
28
|
from nat.data_models.api_server import Error
|
|
29
29
|
from nat.data_models.api_server import ErrorTypes
|
|
30
|
+
from nat.data_models.api_server import ObservabilityTraceContent
|
|
30
31
|
from nat.data_models.api_server import ResponseIntermediateStep
|
|
32
|
+
from nat.data_models.api_server import ResponseObservabilityTrace
|
|
31
33
|
from nat.data_models.api_server import ResponsePayloadOutput
|
|
32
34
|
from nat.data_models.api_server import SystemIntermediateStepContent
|
|
33
35
|
from nat.data_models.api_server import SystemResponseContent
|
|
34
36
|
from nat.data_models.api_server import TextContent
|
|
35
37
|
from nat.data_models.api_server import WebSocketMessageStatus
|
|
36
38
|
from nat.data_models.api_server import WebSocketMessageType
|
|
39
|
+
from nat.data_models.api_server import WebSocketObservabilityTraceMessage
|
|
37
40
|
from nat.data_models.api_server import WebSocketSystemInteractionMessage
|
|
38
41
|
from nat.data_models.api_server import WebSocketSystemIntermediateStepMessage
|
|
39
42
|
from nat.data_models.api_server import WebSocketSystemResponseTokenMessage
|
|
@@ -67,11 +70,24 @@ class MessageValidator:
|
|
|
67
70
|
WebSocketMessageType.INTERMEDIATE_STEP_MESSAGE: WebSocketSystemIntermediateStepMessage,
|
|
68
71
|
WebSocketMessageType.SYSTEM_INTERACTION_MESSAGE: WebSocketSystemInteractionMessage,
|
|
69
72
|
WebSocketMessageType.USER_INTERACTION_MESSAGE: WebSocketUserInteractionResponseMessage,
|
|
70
|
-
WebSocketMessageType.
|
|
73
|
+
WebSocketMessageType.OBSERVABILITY_TRACE_MESSAGE: WebSocketObservabilityTraceMessage,
|
|
74
|
+
WebSocketMessageType.ERROR_MESSAGE: Error,
|
|
71
75
|
}
|
|
72
76
|
|
|
73
77
|
self._message_parent_id: str = "default_id"
|
|
74
78
|
|
|
79
|
+
def _get_observability_trace_id_from_context(self) -> str | None:
|
|
80
|
+
"""
|
|
81
|
+
Retrieves observability_trace_id from Context
|
|
82
|
+
|
|
83
|
+
:return: observability_trace_id if available, None otherwise.
|
|
84
|
+
"""
|
|
85
|
+
try:
|
|
86
|
+
from nat.builder.context import Context
|
|
87
|
+
return Context.get().observability_trace_id
|
|
88
|
+
except (ImportError, AttributeError, KeyError):
|
|
89
|
+
return None
|
|
90
|
+
|
|
75
91
|
async def validate_message(self, message: dict[str, Any]) -> BaseModel:
|
|
76
92
|
"""
|
|
77
93
|
Validates an incoming WebSocket message against its expected schema.
|
|
@@ -147,6 +163,9 @@ class MessageValidator:
|
|
|
147
163
|
elif (isinstance(data_model, ResponseIntermediateStep)):
|
|
148
164
|
validated_message_content = SystemIntermediateStepContent(name=data_model.name,
|
|
149
165
|
payload=data_model.payload)
|
|
166
|
+
elif (isinstance(data_model, ResponseObservabilityTrace)):
|
|
167
|
+
validated_message_content = ObservabilityTraceContent(
|
|
168
|
+
observability_trace_id=data_model.observability_trace_id)
|
|
150
169
|
elif (isinstance(data_model, HumanPromptBase)):
|
|
151
170
|
validated_message_content = data_model
|
|
152
171
|
elif (isinstance(data_model, SystemResponseContent)):
|
|
@@ -212,6 +231,9 @@ class MessageValidator:
|
|
|
212
231
|
elif (isinstance(data_model, ResponseIntermediateStep)):
|
|
213
232
|
validated_message_type = WebSocketMessageType.INTERMEDIATE_STEP_MESSAGE
|
|
214
233
|
|
|
234
|
+
elif (isinstance(data_model, ResponseObservabilityTrace)):
|
|
235
|
+
validated_message_type = WebSocketMessageType.OBSERVABILITY_TRACE_MESSAGE
|
|
236
|
+
|
|
215
237
|
elif (isinstance(data_model, HumanPromptBase)):
|
|
216
238
|
validated_message_type = WebSocketMessageType.SYSTEM_INTERACTION_MESSAGE
|
|
217
239
|
else:
|
|
@@ -349,3 +371,33 @@ class MessageValidator:
|
|
|
349
371
|
except Exception as e:
|
|
350
372
|
logger.exception("Error creating system interaction message: %s", str(e))
|
|
351
373
|
return None
|
|
374
|
+
|
|
375
|
+
async def create_observability_trace_message(
|
|
376
|
+
self,
|
|
377
|
+
*,
|
|
378
|
+
message_id: str | None = str(uuid.uuid4()),
|
|
379
|
+
parent_id: str = "default",
|
|
380
|
+
conversation_id: str | None = None,
|
|
381
|
+
content: ObservabilityTraceContent,
|
|
382
|
+
timestamp: str = str(datetime.datetime.now(datetime.UTC))
|
|
383
|
+
) -> WebSocketObservabilityTraceMessage | None:
|
|
384
|
+
"""
|
|
385
|
+
Creates an observability trace message.
|
|
386
|
+
|
|
387
|
+
:param message_id: Unique identifier for the message (default: generated UUID).
|
|
388
|
+
:param parent_id: ID of the user message that spawned child messages.
|
|
389
|
+
:param conversation_id: ID of the conversation this message belongs to (default: None).
|
|
390
|
+
:param content: Message content.
|
|
391
|
+
:param timestamp: Timestamp of the message (default: current UTC time).
|
|
392
|
+
:return: A WebSocketObservabilityTraceMessage instance.
|
|
393
|
+
"""
|
|
394
|
+
try:
|
|
395
|
+
return WebSocketObservabilityTraceMessage(id=message_id,
|
|
396
|
+
parent_id=parent_id,
|
|
397
|
+
conversation_id=conversation_id,
|
|
398
|
+
content=content,
|
|
399
|
+
timestamp=timestamp)
|
|
400
|
+
|
|
401
|
+
except Exception as e:
|
|
402
|
+
logger.exception("Error creating observability trace message: %s", str(e))
|
|
403
|
+
return None
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -23,20 +23,20 @@ from nat.data_models.api_server import ResponseSerializable
|
|
|
23
23
|
from nat.data_models.step_adaptor import StepAdaptorConfig
|
|
24
24
|
from nat.front_ends.fastapi.intermediate_steps_subscriber import pull_intermediate
|
|
25
25
|
from nat.front_ends.fastapi.step_adaptor import StepAdaptor
|
|
26
|
-
from nat.runtime.session import
|
|
26
|
+
from nat.runtime.session import Session
|
|
27
27
|
from nat.utils.producer_consumer_queue import AsyncIOProducerConsumerQueue
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
async def generate_streaming_response_as_str(payload: typing.Any,
|
|
31
31
|
*,
|
|
32
|
-
|
|
32
|
+
session: Session,
|
|
33
33
|
streaming: bool,
|
|
34
34
|
step_adaptor: StepAdaptor = StepAdaptor(StepAdaptorConfig()),
|
|
35
35
|
result_type: type | None = None,
|
|
36
36
|
output_type: type | None = None) -> AsyncGenerator[str]:
|
|
37
37
|
|
|
38
38
|
async for item in generate_streaming_response(payload,
|
|
39
|
-
|
|
39
|
+
session=session,
|
|
40
40
|
streaming=streaming,
|
|
41
41
|
step_adaptor=step_adaptor,
|
|
42
42
|
result_type=result_type,
|
|
@@ -51,13 +51,13 @@ async def generate_streaming_response_as_str(payload: typing.Any,
|
|
|
51
51
|
|
|
52
52
|
async def generate_streaming_response(payload: typing.Any,
|
|
53
53
|
*,
|
|
54
|
-
|
|
54
|
+
session: Session,
|
|
55
55
|
streaming: bool,
|
|
56
56
|
step_adaptor: StepAdaptor = StepAdaptor(StepAdaptorConfig()),
|
|
57
57
|
result_type: type | None = None,
|
|
58
58
|
output_type: type | None = None) -> AsyncGenerator[ResponseSerializable]:
|
|
59
59
|
|
|
60
|
-
async with
|
|
60
|
+
async with session.run(payload) as runner:
|
|
61
61
|
|
|
62
62
|
q: AsyncIOProducerConsumerQueue[ResponseSerializable] = AsyncIOProducerConsumerQueue()
|
|
63
63
|
|
|
@@ -65,7 +65,7 @@ async def generate_streaming_response(payload: typing.Any,
|
|
|
65
65
|
intermediate_complete = await pull_intermediate(q, step_adaptor)
|
|
66
66
|
|
|
67
67
|
async def pull_result():
|
|
68
|
-
if
|
|
68
|
+
if session.workflow.has_streaming_output and streaming:
|
|
69
69
|
async for chunk in runner.result_stream(to_type=output_type):
|
|
70
70
|
await q.put(chunk)
|
|
71
71
|
else:
|
|
@@ -107,19 +107,20 @@ async def generate_streaming_response(payload: typing.Any,
|
|
|
107
107
|
|
|
108
108
|
async def generate_single_response(
|
|
109
109
|
payload: typing.Any,
|
|
110
|
-
|
|
110
|
+
session: Session,
|
|
111
111
|
result_type: type | None = None,
|
|
112
112
|
) -> typing.Any:
|
|
113
|
-
|
|
113
|
+
|
|
114
|
+
if not session.workflow.has_single_output:
|
|
114
115
|
raise ValueError("Cannot get a single output value for streaming workflows")
|
|
115
116
|
|
|
116
|
-
async with
|
|
117
|
+
async with session.run(payload) as runner:
|
|
117
118
|
return await runner.result(to_type=result_type)
|
|
118
119
|
|
|
119
120
|
|
|
120
121
|
async def generate_streaming_response_full(payload: typing.Any,
|
|
121
122
|
*,
|
|
122
|
-
|
|
123
|
+
session: Session,
|
|
123
124
|
streaming: bool,
|
|
124
125
|
result_type: type | None = None,
|
|
125
126
|
output_type: type | None = None,
|
|
@@ -137,14 +138,14 @@ async def generate_streaming_response_full(payload: typing.Any,
|
|
|
137
138
|
else:
|
|
138
139
|
allowed_types = set(filter_steps.split(','))
|
|
139
140
|
|
|
140
|
-
async with
|
|
141
|
+
async with session.run(payload) as runner:
|
|
141
142
|
q: AsyncIOProducerConsumerQueue[ResponseSerializable] = AsyncIOProducerConsumerQueue()
|
|
142
143
|
|
|
143
144
|
# Start the intermediate stream without step adaptor
|
|
144
145
|
intermediate_complete = await pull_intermediate(q, None)
|
|
145
146
|
|
|
146
147
|
async def pull_result():
|
|
147
|
-
if
|
|
148
|
+
if session.workflow.has_streaming_output and streaming:
|
|
148
149
|
async for chunk in runner.result_stream(to_type=output_type):
|
|
149
150
|
await q.put(chunk)
|
|
150
151
|
else:
|
|
@@ -174,7 +175,7 @@ async def generate_streaming_response_full(payload: typing.Any,
|
|
|
174
175
|
|
|
175
176
|
async def generate_streaming_response_full_as_str(payload: typing.Any,
|
|
176
177
|
*,
|
|
177
|
-
|
|
178
|
+
session: Session,
|
|
178
179
|
streaming: bool,
|
|
179
180
|
result_type: type | None = None,
|
|
180
181
|
output_type: type | None = None,
|
|
@@ -183,7 +184,7 @@ async def generate_streaming_response_full_as_str(payload: typing.Any,
|
|
|
183
184
|
Similar to generate_streaming_response but converts the response to a string format.
|
|
184
185
|
"""
|
|
185
186
|
async for item in generate_streaming_response_full(payload,
|
|
186
|
-
|
|
187
|
+
session=session,
|
|
187
188
|
streaming=streaming,
|
|
188
189
|
result_type=result_type,
|
|
189
190
|
output_type=output_type,
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/front_ends/fastapi/utils.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/front_ends/register.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -18,4 +18,3 @@
|
|
|
18
18
|
|
|
19
19
|
from .console import register as console_register
|
|
20
20
|
from .fastapi import register as fastapi_register
|
|
21
|
-
from .mcp import register as mcp_register
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -47,9 +47,11 @@ class SimpleFrontEndPluginBase(FrontEndBase[FrontEndConfigT], ABC):
|
|
|
47
47
|
|
|
48
48
|
click.echo(stream.getvalue())
|
|
49
49
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
50
|
+
session_manager = await SessionManager.create(config=self.full_config, shared_builder=builder)
|
|
51
|
+
try:
|
|
52
|
+
await self.run_workflow(session_manager)
|
|
53
|
+
finally:
|
|
54
|
+
await session_manager.shutdown()
|
|
53
55
|
|
|
54
56
|
@abstractmethod
|
|
55
57
|
async def run_workflow(self, session_manager: SessionManager):
|
nat/llm/aws_bedrock_llm.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/llm/azure_openai_llm.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
from pydantic import AliasChoices
|
|
17
17
|
from pydantic import ConfigDict
|
|
18
18
|
from pydantic import Field
|
|
19
|
+
from pydantic import computed_field
|
|
19
20
|
|
|
20
21
|
from nat.builder.builder import Builder
|
|
21
22
|
from nat.builder.llm import LLMProviderInfo
|
|
@@ -59,6 +60,14 @@ class AzureOpenAIModelConfig(
|
|
|
59
60
|
description="Top-p for distribution sampling.",
|
|
60
61
|
space=SearchSpace(high=1.0, low=0.5, step=0.1))
|
|
61
62
|
|
|
63
|
+
@computed_field
|
|
64
|
+
@property
|
|
65
|
+
def model_name(self) -> str:
|
|
66
|
+
"""
|
|
67
|
+
Returns the model name for compatibility with other parts of the code base which expect a model_name attribute.
|
|
68
|
+
"""
|
|
69
|
+
return self.azure_deployment
|
|
70
|
+
|
|
62
71
|
|
|
63
72
|
@register_llm_provider(config_type=AzureOpenAIModelConfig)
|
|
64
73
|
async def azure_openai_llm(config: AzureOpenAIModelConfig, _builder: Builder):
|
nat/llm/dynamo_llm.py
ADDED
|
@@ -0,0 +1,363 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
"""
|
|
16
|
+
Dynamo LLM provider with automatic prefix header injection for KV cache optimization.
|
|
17
|
+
|
|
18
|
+
This module provides a specialized OpenAI-compatible LLM that sends Dynamo prefix headers
|
|
19
|
+
for optimal KV cache management and request routing. The prefix parameters are optimizable
|
|
20
|
+
via the NAT optimizer.
|
|
21
|
+
|
|
22
|
+
The implementation uses httpx event hooks to inject headers at the HTTP transport level,
|
|
23
|
+
making it framework-agnostic (works with LangChain, LlamaIndex, etc.).
|
|
24
|
+
|
|
25
|
+
Dynamo Prefix Parameters
|
|
26
|
+
-------------------------
|
|
27
|
+
|
|
28
|
+
prefix_osl (Output Sequence Length)
|
|
29
|
+
Hint for expected response length:
|
|
30
|
+
|
|
31
|
+
- LOW: decode_cost=1.0, short responses
|
|
32
|
+
- MEDIUM: decode_cost=2.0, typical responses
|
|
33
|
+
- HIGH: decode_cost=3.0, long responses
|
|
34
|
+
|
|
35
|
+
prefix_iat (Inter-Arrival Time)
|
|
36
|
+
Hint for request pacing:
|
|
37
|
+
|
|
38
|
+
- LOW: iat_factor=1.5, rapid bursts -> high worker stickiness
|
|
39
|
+
- MEDIUM: iat_factor=1.0, normal pacing
|
|
40
|
+
- HIGH: iat_factor=0.6, slow requests -> more exploration
|
|
41
|
+
|
|
42
|
+
prefix_total_requests
|
|
43
|
+
Expected requests per conversation:
|
|
44
|
+
|
|
45
|
+
- Higher values increase KV cache affinity and worker stickiness
|
|
46
|
+
- Lower values allow more load balancing
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
import logging
|
|
50
|
+
import uuid
|
|
51
|
+
from collections.abc import Callable
|
|
52
|
+
from collections.abc import Coroutine
|
|
53
|
+
from collections.abc import Iterator
|
|
54
|
+
from contextlib import contextmanager
|
|
55
|
+
from contextvars import ContextVar
|
|
56
|
+
from typing import TYPE_CHECKING
|
|
57
|
+
from typing import Any
|
|
58
|
+
from typing import Literal
|
|
59
|
+
|
|
60
|
+
if TYPE_CHECKING:
|
|
61
|
+
import httpx
|
|
62
|
+
|
|
63
|
+
from pydantic import Field
|
|
64
|
+
|
|
65
|
+
from nat.builder.builder import Builder
|
|
66
|
+
from nat.builder.llm import LLMProviderInfo
|
|
67
|
+
from nat.cli.register_workflow import register_llm_provider
|
|
68
|
+
from nat.data_models.optimizable import OptimizableField
|
|
69
|
+
from nat.data_models.optimizable import SearchSpace
|
|
70
|
+
from nat.llm.openai_llm import OpenAIModelConfig
|
|
71
|
+
|
|
72
|
+
logger = logging.getLogger(__name__)
|
|
73
|
+
|
|
74
|
+
# Define valid prefix hint values
|
|
75
|
+
PrefixLevel = Literal["LOW", "MEDIUM", "HIGH"]
|
|
76
|
+
|
|
77
|
+
# =============================================================================
|
|
78
|
+
# CONTEXT MANAGEMENT FOR DYNAMO PREFIX ID
|
|
79
|
+
# =============================================================================
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class DynamoPrefixContext:
|
|
83
|
+
"""
|
|
84
|
+
Singleton class for managing Dynamo prefix IDs across LLM calls.
|
|
85
|
+
|
|
86
|
+
This allows evaluation code to set a prefix ID that persists across all LLM
|
|
87
|
+
calls for a single evaluation question (multi-turn conversation).
|
|
88
|
+
|
|
89
|
+
Usage::
|
|
90
|
+
|
|
91
|
+
from nat.llm.dynamo_llm import DynamoPrefixContext
|
|
92
|
+
|
|
93
|
+
# Set prefix ID at the start of each evaluation question
|
|
94
|
+
DynamoPrefixContext.set("eval-q001-abc123")
|
|
95
|
+
|
|
96
|
+
# ... perform LLM calls ...
|
|
97
|
+
|
|
98
|
+
# Clear when done
|
|
99
|
+
DynamoPrefixContext.clear()
|
|
100
|
+
|
|
101
|
+
# Or use as a context manager
|
|
102
|
+
with DynamoPrefixContext.scope("eval-q001-abc123"):
|
|
103
|
+
# ... perform LLM calls ...
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
_current_prefix_id: ContextVar[str | None] = ContextVar('dynamo_prefix_id', default=None)
|
|
107
|
+
|
|
108
|
+
@classmethod
|
|
109
|
+
def set(cls, prefix_id: str) -> None:
|
|
110
|
+
"""
|
|
111
|
+
Set the Dynamo prefix ID for the current context.
|
|
112
|
+
|
|
113
|
+
Call this at the start of each evaluation question to ensure all LLM calls
|
|
114
|
+
for that question share the same prefix ID (enabling KV cache reuse).
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
prefix_id: The unique prefix ID (e.g., "eval-q001-abc123")
|
|
118
|
+
"""
|
|
119
|
+
cls._current_prefix_id.set(prefix_id)
|
|
120
|
+
logger.debug("Set Dynamo prefix ID: %s", prefix_id)
|
|
121
|
+
|
|
122
|
+
@classmethod
|
|
123
|
+
def clear(cls) -> None:
|
|
124
|
+
"""Clear the current Dynamo prefix ID context."""
|
|
125
|
+
cls._current_prefix_id.set(None)
|
|
126
|
+
logger.debug("Cleared Dynamo prefix ID")
|
|
127
|
+
|
|
128
|
+
@classmethod
|
|
129
|
+
def get(cls) -> str | None:
|
|
130
|
+
"""Get the current Dynamo prefix ID from context, if any."""
|
|
131
|
+
return cls._current_prefix_id.get()
|
|
132
|
+
|
|
133
|
+
@classmethod
|
|
134
|
+
@contextmanager
|
|
135
|
+
def scope(cls, prefix_id: str) -> Iterator[None]:
|
|
136
|
+
"""
|
|
137
|
+
Context manager for scoped prefix ID usage.
|
|
138
|
+
|
|
139
|
+
Automatically sets the prefix ID on entry and clears it on exit,
|
|
140
|
+
ensuring proper cleanup even if exceptions occur.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
prefix_id: The unique prefix ID for this scope
|
|
144
|
+
|
|
145
|
+
Yields:
|
|
146
|
+
None
|
|
147
|
+
|
|
148
|
+
Usage:
|
|
149
|
+
with DynamoPrefixContext.scope("eval-q001"):
|
|
150
|
+
# All LLM calls here will use "eval-q001" prefix
|
|
151
|
+
await llm.ainvoke(...)
|
|
152
|
+
"""
|
|
153
|
+
cls.set(prefix_id)
|
|
154
|
+
try:
|
|
155
|
+
yield
|
|
156
|
+
finally:
|
|
157
|
+
cls.clear()
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
# =============================================================================
|
|
161
|
+
# DYNAMO MODEL CONFIGURATION
|
|
162
|
+
# =============================================================================
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
class DynamoModelConfig(OpenAIModelConfig, name="dynamo"):
|
|
166
|
+
"""
|
|
167
|
+
A Dynamo LLM provider with automatic prefix header injection for KV cache optimization.
|
|
168
|
+
|
|
169
|
+
This is a specialized OpenAI-compatible LLM that sends Dynamo prefix headers
|
|
170
|
+
for optimal KV cache management and request routing. Prefix headers are enabled
|
|
171
|
+
by default using the template "nat-dynamo-{uuid}". The prefix routing parameters
|
|
172
|
+
(prefix_total_requests, prefix_osl, prefix_iat) are optimizable via the NAT optimizer.
|
|
173
|
+
|
|
174
|
+
To disable prefix headers, set prefix_template to null/None in your config.
|
|
175
|
+
"""
|
|
176
|
+
|
|
177
|
+
# =========================================================================
|
|
178
|
+
# DYNAMO PREFIX PARAMETERS
|
|
179
|
+
# =========================================================================
|
|
180
|
+
|
|
181
|
+
prefix_template: str | None = Field(
|
|
182
|
+
default="nat-dynamo-{uuid}",
|
|
183
|
+
description="Template for prefix ID. The {uuid} placeholder will be replaced with a unique ID. "
|
|
184
|
+
"Prefix headers are sent by default for KV cache optimization. "
|
|
185
|
+
"Set to null/None to disable prefix header injection.",
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
prefix_total_requests: int = OptimizableField(
|
|
189
|
+
default=10,
|
|
190
|
+
ge=1,
|
|
191
|
+
le=50,
|
|
192
|
+
description=("Expected number of requests for this conversation/prefix. "
|
|
193
|
+
"Higher values increase worker stickiness and KV cache locality. "
|
|
194
|
+
"Lower values allow more load balancing across workers."),
|
|
195
|
+
space=SearchSpace(low=1, high=20, step=5))
|
|
196
|
+
|
|
197
|
+
prefix_osl: PrefixLevel = OptimizableField(default="MEDIUM",
|
|
198
|
+
description=("Output Sequence Length hint for the Dynamo router. "
|
|
199
|
+
"LOW=short responses (decode_cost=1.0), "
|
|
200
|
+
"MEDIUM=typical (decode_cost=2.0), "
|
|
201
|
+
"HIGH=long responses (decode_cost=3.0)."),
|
|
202
|
+
space=SearchSpace(values=["LOW", "MEDIUM", "HIGH"]))
|
|
203
|
+
|
|
204
|
+
prefix_iat: PrefixLevel = OptimizableField(default="MEDIUM",
|
|
205
|
+
description=("Inter-Arrival Time hint for the Dynamo router. "
|
|
206
|
+
"LOW=rapid bursts (iat_factor=1.5, high stickiness), "
|
|
207
|
+
"MEDIUM=normal (iat_factor=1.0), "
|
|
208
|
+
"HIGH=slow requests (iat_factor=0.6, more exploration)."),
|
|
209
|
+
space=SearchSpace(values=["LOW", "MEDIUM", "HIGH"]))
|
|
210
|
+
|
|
211
|
+
request_timeout: float = Field(
|
|
212
|
+
default=600.0,
|
|
213
|
+
gt=0.0,
|
|
214
|
+
description="HTTP request timeout in seconds for LLM requests.",
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
# =========================================================================
|
|
218
|
+
# UTILITY METHODS
|
|
219
|
+
# =========================================================================
|
|
220
|
+
|
|
221
|
+
@staticmethod
|
|
222
|
+
def get_dynamo_field_names() -> frozenset[str]:
|
|
223
|
+
"""
|
|
224
|
+
Get the set of Dynamo-specific field names for model_dump exclusion.
|
|
225
|
+
|
|
226
|
+
Use this when building config dicts for framework clients to exclude
|
|
227
|
+
Dynamo-specific parameters that should not be passed to the underlying client.
|
|
228
|
+
|
|
229
|
+
Returns:
|
|
230
|
+
A frozenset of Dynamo-specific field names.
|
|
231
|
+
|
|
232
|
+
Example::
|
|
233
|
+
|
|
234
|
+
config_dict = config.model_dump(
|
|
235
|
+
exclude={"type", "thinking", *DynamoModelConfig.get_dynamo_field_names()},
|
|
236
|
+
...
|
|
237
|
+
)
|
|
238
|
+
"""
|
|
239
|
+
return frozenset({
|
|
240
|
+
"prefix_template",
|
|
241
|
+
"prefix_total_requests",
|
|
242
|
+
"prefix_osl",
|
|
243
|
+
"prefix_iat",
|
|
244
|
+
"request_timeout",
|
|
245
|
+
})
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
# =============================================================================
|
|
249
|
+
# HTTPX EVENT HOOK FOR HEADER INJECTION
|
|
250
|
+
# =============================================================================
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def _create_dynamo_request_hook(
|
|
254
|
+
prefix_template: str | None,
|
|
255
|
+
total_requests: int,
|
|
256
|
+
osl: str,
|
|
257
|
+
iat: str,
|
|
258
|
+
) -> Callable[["httpx.Request"], Coroutine[Any, Any, None]]:
|
|
259
|
+
"""
|
|
260
|
+
Create an httpx event hook that injects Dynamo prefix headers into requests.
|
|
261
|
+
|
|
262
|
+
This hook is called before each HTTP request is sent, allowing us to inject
|
|
263
|
+
headers dynamically. The prefix ID is generated ONCE when the hook is created,
|
|
264
|
+
ensuring all requests from the same client share the same prefix ID. This enables
|
|
265
|
+
Dynamo's KV cache optimization across multi-turn conversations.
|
|
266
|
+
|
|
267
|
+
The context variable can override this for scenarios where you need different
|
|
268
|
+
prefix IDs (e.g., per-question in batch evaluation).
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
prefix_template: Template string with {uuid} placeholder
|
|
272
|
+
total_requests: Expected number of requests for this prefix
|
|
273
|
+
osl: Output sequence length hint (LOW/MEDIUM/HIGH)
|
|
274
|
+
iat: Inter-arrival time hint (LOW/MEDIUM/HIGH)
|
|
275
|
+
|
|
276
|
+
Returns:
|
|
277
|
+
An async function suitable for use as an httpx event hook.
|
|
278
|
+
"""
|
|
279
|
+
# Generate the default prefix ID ONCE when the hook is created
|
|
280
|
+
# This ensures all requests from this client share the same prefix ID
|
|
281
|
+
unique_id = uuid.uuid4().hex[:16]
|
|
282
|
+
if prefix_template:
|
|
283
|
+
default_prefix_id = prefix_template.format(uuid=unique_id)
|
|
284
|
+
else:
|
|
285
|
+
default_prefix_id = f"nat-dynamo-{unique_id}"
|
|
286
|
+
|
|
287
|
+
logger.debug("Created Dynamo request hook with default prefix ID: %s", default_prefix_id)
|
|
288
|
+
|
|
289
|
+
async def on_request(request):
|
|
290
|
+
"""Inject Dynamo prefix headers before each request."""
|
|
291
|
+
# Check context variable first (allows per-question override in batch evaluation)
|
|
292
|
+
context_prefix_id = DynamoPrefixContext.get()
|
|
293
|
+
|
|
294
|
+
if context_prefix_id:
|
|
295
|
+
prefix_id = context_prefix_id
|
|
296
|
+
logger.debug("Using context prefix ID: %s", prefix_id)
|
|
297
|
+
else:
|
|
298
|
+
# Use the pre-generated prefix ID (same for all requests from this client)
|
|
299
|
+
prefix_id = default_prefix_id
|
|
300
|
+
logger.debug("Using default prefix ID: %s", prefix_id)
|
|
301
|
+
|
|
302
|
+
# Inject Dynamo headers
|
|
303
|
+
request.headers["x-prefix-id"] = prefix_id
|
|
304
|
+
request.headers["x-prefix-total-requests"] = str(total_requests)
|
|
305
|
+
request.headers["x-prefix-osl"] = osl.upper()
|
|
306
|
+
request.headers["x-prefix-iat"] = iat.upper()
|
|
307
|
+
|
|
308
|
+
logger.debug("Injected Dynamo headers: prefix_id=%s, total_requests=%d, osl=%s, iat=%s",
|
|
309
|
+
prefix_id,
|
|
310
|
+
total_requests,
|
|
311
|
+
osl.upper(),
|
|
312
|
+
iat.upper())
|
|
313
|
+
|
|
314
|
+
return on_request
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def create_httpx_client_with_dynamo_hooks(
|
|
318
|
+
prefix_template: str | None,
|
|
319
|
+
total_requests: int,
|
|
320
|
+
osl: str,
|
|
321
|
+
iat: str,
|
|
322
|
+
timeout: float = 600.0,
|
|
323
|
+
) -> "httpx.AsyncClient":
|
|
324
|
+
"""
|
|
325
|
+
Create an httpx.AsyncClient with Dynamo prefix header injection.
|
|
326
|
+
|
|
327
|
+
This client can be passed to the OpenAI SDK to inject headers at the HTTP level,
|
|
328
|
+
making it framework-agnostic.
|
|
329
|
+
|
|
330
|
+
Args:
|
|
331
|
+
prefix_template: Template string with {uuid} placeholder
|
|
332
|
+
total_requests: Expected number of requests for this prefix
|
|
333
|
+
osl: Output sequence length hint (LOW/MEDIUM/HIGH)
|
|
334
|
+
iat: Inter-arrival time hint (LOW/MEDIUM/HIGH)
|
|
335
|
+
timeout: HTTP request timeout in seconds
|
|
336
|
+
|
|
337
|
+
Returns:
|
|
338
|
+
An httpx.AsyncClient configured with Dynamo header injection.
|
|
339
|
+
"""
|
|
340
|
+
import httpx
|
|
341
|
+
|
|
342
|
+
request_hook = _create_dynamo_request_hook(prefix_template, total_requests, osl, iat)
|
|
343
|
+
|
|
344
|
+
return httpx.AsyncClient(
|
|
345
|
+
event_hooks={"request": [request_hook]},
|
|
346
|
+
timeout=httpx.Timeout(timeout),
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
# =============================================================================
|
|
351
|
+
# PROVIDER REGISTRATION
|
|
352
|
+
# =============================================================================
|
|
353
|
+
# Note: Client registrations for each framework (LangChain, LlamaIndex, etc.)
|
|
354
|
+
# are in the respective plugin packages under packages/nvidia_nat_<framework>/
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
@register_llm_provider(config_type=DynamoModelConfig)
|
|
358
|
+
async def dynamo_llm(config: DynamoModelConfig, _builder: Builder):
|
|
359
|
+
"""Register the Dynamo LLM provider."""
|
|
360
|
+
yield LLMProviderInfo(
|
|
361
|
+
config=config,
|
|
362
|
+
description="A Dynamo-optimized model with automatic prefix headers for KV cache management.",
|
|
363
|
+
)
|