nvidia-nat 1.4.0a20251120__py3-none-any.whl → 1.4.0a20260113__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiq/__init__.py +1 -1
- nat/{front_ends/mcp → agent/auto_memory_wrapper}/__init__.py +1 -1
- nat/agent/auto_memory_wrapper/agent.py +278 -0
- nat/agent/auto_memory_wrapper/register.py +227 -0
- nat/agent/auto_memory_wrapper/state.py +30 -0
- nat/agent/base.py +1 -1
- nat/agent/dual_node.py +1 -1
- nat/agent/prompt_optimizer/prompt.py +1 -1
- nat/agent/prompt_optimizer/register.py +1 -1
- nat/agent/react_agent/agent.py +16 -9
- nat/agent/react_agent/output_parser.py +2 -2
- nat/agent/react_agent/prompt.py +3 -2
- nat/agent/react_agent/register.py +2 -2
- nat/agent/react_agent/register_per_user_agent.py +104 -0
- nat/agent/reasoning_agent/reasoning_agent.py +1 -1
- nat/agent/register.py +3 -1
- nat/agent/responses_api_agent/__init__.py +1 -1
- nat/agent/responses_api_agent/register.py +1 -1
- nat/agent/rewoo_agent/agent.py +9 -4
- nat/agent/rewoo_agent/prompt.py +1 -1
- nat/agent/rewoo_agent/register.py +1 -1
- nat/agent/tool_calling_agent/agent.py +5 -4
- nat/agent/tool_calling_agent/register.py +1 -1
- nat/authentication/__init__.py +1 -1
- nat/authentication/api_key/__init__.py +1 -1
- nat/authentication/api_key/api_key_auth_provider.py +1 -1
- nat/authentication/api_key/api_key_auth_provider_config.py +22 -7
- nat/authentication/api_key/register.py +1 -1
- nat/authentication/credential_validator/__init__.py +1 -1
- nat/authentication/credential_validator/bearer_token_validator.py +1 -1
- nat/authentication/exceptions/__init__.py +1 -1
- nat/authentication/exceptions/api_key_exceptions.py +1 -1
- nat/authentication/http_basic_auth/http_basic_auth_provider.py +1 -1
- nat/authentication/http_basic_auth/register.py +1 -1
- nat/authentication/interfaces.py +1 -1
- nat/authentication/oauth2/__init__.py +1 -1
- nat/authentication/oauth2/oauth2_auth_code_flow_provider.py +1 -1
- nat/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +1 -1
- nat/authentication/oauth2/oauth2_resource_server_config.py +1 -1
- nat/authentication/oauth2/register.py +1 -1
- nat/authentication/register.py +1 -1
- nat/builder/builder.py +511 -1
- nat/builder/child_builder.py +385 -0
- nat/builder/component_utils.py +28 -4
- nat/builder/context.py +17 -1
- nat/builder/embedder.py +1 -1
- nat/builder/eval_builder.py +19 -7
- nat/builder/evaluator.py +1 -1
- nat/builder/framework_enum.py +2 -1
- nat/builder/front_end.py +1 -1
- nat/builder/function.py +40 -3
- nat/builder/function_base.py +1 -1
- nat/builder/function_info.py +1 -1
- nat/builder/intermediate_step_manager.py +1 -1
- nat/builder/llm.py +1 -1
- nat/builder/per_user_workflow_builder.py +843 -0
- nat/builder/retriever.py +1 -1
- nat/builder/sync_builder.py +571 -0
- nat/builder/user_interaction_manager.py +1 -1
- nat/builder/workflow.py +1 -1
- nat/builder/workflow_builder.py +536 -424
- nat/cli/__init__.py +1 -1
- nat/cli/cli_utils/config_override.py +1 -1
- nat/cli/cli_utils/validation.py +32 -1
- nat/cli/commands/configure/channel/add.py +1 -1
- nat/cli/commands/configure/channel/channel.py +1 -1
- nat/cli/commands/configure/channel/remove.py +1 -1
- nat/cli/commands/configure/channel/update.py +1 -1
- nat/cli/commands/configure/configure.py +1 -1
- nat/cli/commands/evaluate.py +87 -13
- nat/cli/commands/finetune.py +132 -0
- nat/cli/commands/info/__init__.py +1 -1
- nat/cli/commands/info/info.py +1 -1
- nat/cli/commands/info/list_channels.py +1 -1
- nat/cli/commands/info/list_components.py +1 -1
- nat/cli/commands/object_store/__init__.py +1 -1
- nat/cli/commands/object_store/object_store.py +1 -1
- nat/cli/commands/optimize.py +1 -1
- nat/cli/commands/{mcp → red_teaming}/__init__.py +1 -1
- nat/cli/commands/red_teaming/red_teaming.py +138 -0
- nat/cli/commands/red_teaming/red_teaming_utils.py +73 -0
- nat/cli/commands/registry/__init__.py +1 -1
- nat/cli/commands/registry/publish.py +1 -1
- nat/cli/commands/registry/pull.py +1 -1
- nat/cli/commands/registry/registry.py +1 -1
- nat/cli/commands/registry/remove.py +1 -1
- nat/cli/commands/registry/search.py +1 -1
- nat/cli/commands/sizing/__init__.py +1 -1
- nat/cli/commands/sizing/calc.py +1 -1
- nat/cli/commands/sizing/sizing.py +1 -1
- nat/cli/commands/start.py +1 -1
- nat/cli/commands/uninstall.py +1 -1
- nat/cli/commands/validate.py +1 -1
- nat/cli/commands/workflow/__init__.py +1 -1
- nat/cli/commands/workflow/workflow.py +1 -1
- nat/cli/commands/workflow/workflow_commands.py +3 -2
- nat/cli/entrypoint.py +15 -37
- nat/cli/main.py +2 -2
- nat/cli/plugin_loader.py +69 -0
- nat/cli/register_workflow.py +183 -5
- nat/cli/type_registry.py +169 -3
- nat/control_flow/register.py +1 -1
- nat/control_flow/router_agent/agent.py +1 -1
- nat/control_flow/router_agent/prompt.py +1 -1
- nat/control_flow/router_agent/register.py +1 -1
- nat/control_flow/sequential_executor.py +28 -7
- nat/data_models/__init__.py +1 -1
- nat/data_models/agent.py +1 -1
- nat/data_models/api_server.py +38 -3
- nat/data_models/authentication.py +1 -1
- nat/data_models/common.py +1 -1
- nat/data_models/component.py +7 -1
- nat/data_models/component_ref.py +34 -1
- nat/data_models/config.py +62 -1
- nat/data_models/dataset_handler.py +15 -2
- nat/data_models/discovery_metadata.py +1 -1
- nat/data_models/embedder.py +1 -1
- nat/data_models/evaluate.py +6 -1
- nat/data_models/evaluator.py +1 -1
- nat/data_models/finetuning.py +260 -0
- nat/data_models/front_end.py +1 -1
- nat/data_models/function.py +1 -1
- nat/data_models/function_dependencies.py +1 -1
- nat/data_models/gated_field_mixin.py +1 -1
- nat/data_models/interactive.py +1 -1
- nat/data_models/intermediate_step.py +29 -2
- nat/data_models/invocation_node.py +1 -1
- nat/data_models/llm.py +1 -1
- nat/data_models/logging.py +1 -1
- nat/data_models/memory.py +1 -1
- nat/data_models/middleware.py +3 -1
- nat/data_models/object_store.py +1 -1
- nat/data_models/openai_mcp.py +1 -1
- nat/data_models/optimizable.py +1 -1
- nat/data_models/optimizer.py +1 -1
- nat/data_models/profiler.py +1 -1
- nat/data_models/registry_handler.py +1 -1
- nat/data_models/retriever.py +1 -1
- nat/data_models/retry_mixin.py +1 -1
- nat/data_models/runtime_enum.py +1 -1
- nat/data_models/span.py +1 -1
- nat/data_models/step_adaptor.py +1 -1
- nat/data_models/streaming.py +1 -1
- nat/data_models/swe_bench_model.py +1 -1
- nat/data_models/telemetry_exporter.py +1 -1
- nat/data_models/thinking_mixin.py +1 -1
- nat/data_models/ttc_strategy.py +1 -1
- nat/embedder/azure_openai_embedder.py +1 -1
- nat/embedder/nim_embedder.py +1 -1
- nat/embedder/openai_embedder.py +1 -1
- nat/embedder/register.py +1 -1
- nat/eval/__init__.py +1 -1
- nat/eval/config.py +8 -1
- nat/eval/dataset_handler/dataset_downloader.py +1 -1
- nat/eval/dataset_handler/dataset_filter.py +1 -1
- nat/eval/dataset_handler/dataset_handler.py +4 -2
- nat/eval/evaluate.py +217 -80
- nat/eval/evaluator/__init__.py +1 -1
- nat/eval/evaluator/base_evaluator.py +2 -2
- nat/eval/evaluator/evaluator_model.py +3 -2
- nat/eval/intermediate_step_adapter.py +1 -1
- nat/eval/llm_validator.py +336 -0
- nat/eval/rag_evaluator/evaluate.py +17 -10
- nat/eval/rag_evaluator/register.py +1 -1
- nat/eval/red_teaming_evaluator/__init__.py +14 -0
- nat/eval/red_teaming_evaluator/data_models.py +66 -0
- nat/eval/red_teaming_evaluator/evaluate.py +327 -0
- nat/eval/red_teaming_evaluator/filter_conditions.py +75 -0
- nat/eval/red_teaming_evaluator/register.py +55 -0
- nat/eval/register.py +2 -1
- nat/eval/remote_workflow.py +1 -1
- nat/eval/runners/__init__.py +1 -1
- nat/eval/runners/config.py +1 -1
- nat/eval/runners/multi_eval_runner.py +1 -1
- nat/eval/runners/red_teaming_runner/__init__.py +24 -0
- nat/eval/runners/red_teaming_runner/config.py +282 -0
- nat/eval/runners/red_teaming_runner/report_utils.py +707 -0
- nat/eval/runners/red_teaming_runner/runner.py +867 -0
- nat/eval/runtime_evaluator/__init__.py +1 -1
- nat/eval/runtime_evaluator/evaluate.py +1 -1
- nat/eval/runtime_evaluator/register.py +1 -1
- nat/eval/runtime_event_subscriber.py +1 -1
- nat/eval/swe_bench_evaluator/evaluate.py +1 -1
- nat/eval/swe_bench_evaluator/register.py +1 -1
- nat/eval/trajectory_evaluator/evaluate.py +2 -2
- nat/eval/trajectory_evaluator/register.py +1 -1
- nat/eval/tunable_rag_evaluator/evaluate.py +5 -5
- nat/eval/tunable_rag_evaluator/register.py +1 -1
- nat/eval/usage_stats.py +1 -1
- nat/eval/utils/eval_trace_ctx.py +1 -1
- nat/eval/utils/output_uploader.py +1 -1
- nat/eval/utils/tqdm_position_registry.py +1 -1
- nat/eval/utils/weave_eval.py +1 -1
- nat/experimental/decorators/experimental_warning_decorator.py +1 -1
- nat/experimental/test_time_compute/editing/iterative_plan_refinement_editor.py +1 -1
- nat/experimental/test_time_compute/editing/llm_as_a_judge_editor.py +1 -1
- nat/experimental/test_time_compute/editing/motivation_aware_summarization.py +1 -1
- nat/experimental/test_time_compute/functions/execute_score_select_function.py +1 -1
- nat/experimental/test_time_compute/functions/multi_llm_judge_function.py +88 -0
- nat/experimental/test_time_compute/functions/plan_select_execute_function.py +1 -1
- nat/experimental/test_time_compute/functions/ttc_tool_orchestration_function.py +1 -1
- nat/experimental/test_time_compute/functions/ttc_tool_wrapper_function.py +1 -1
- nat/experimental/test_time_compute/models/editor_config.py +1 -1
- nat/experimental/test_time_compute/models/scoring_config.py +1 -1
- nat/experimental/test_time_compute/models/search_config.py +20 -2
- nat/experimental/test_time_compute/models/selection_config.py +33 -2
- nat/experimental/test_time_compute/models/stage_enums.py +1 -1
- nat/experimental/test_time_compute/models/strategy_base.py +1 -1
- nat/experimental/test_time_compute/models/tool_use_config.py +1 -1
- nat/experimental/test_time_compute/models/ttc_item.py +1 -1
- nat/experimental/test_time_compute/register.py +4 -1
- nat/experimental/test_time_compute/scoring/llm_based_agent_scorer.py +1 -1
- nat/experimental/test_time_compute/scoring/llm_based_plan_scorer.py +1 -1
- nat/experimental/test_time_compute/scoring/motivation_aware_scorer.py +1 -1
- nat/experimental/test_time_compute/search/multi_llm_generation.py +115 -0
- nat/experimental/test_time_compute/search/multi_llm_planner.py +1 -1
- nat/experimental/test_time_compute/search/multi_query_retrieval_search.py +1 -1
- nat/experimental/test_time_compute/search/single_shot_multi_plan_planner.py +1 -1
- nat/experimental/test_time_compute/selection/best_of_n_selector.py +1 -1
- nat/experimental/test_time_compute/selection/llm_based_agent_output_selector.py +1 -1
- nat/experimental/test_time_compute/selection/llm_based_output_merging_selector.py +1 -1
- nat/experimental/test_time_compute/selection/llm_based_plan_selector.py +1 -1
- nat/experimental/test_time_compute/selection/llm_judge_selection.py +127 -0
- nat/experimental/test_time_compute/selection/threshold_selector.py +1 -1
- nat/finetuning/__init__.py +24 -0
- nat/finetuning/finetuning_runtime.py +143 -0
- nat/finetuning/interfaces/__init__.py +24 -0
- nat/finetuning/interfaces/finetuning_runner.py +261 -0
- nat/finetuning/interfaces/trainer_adapter.py +103 -0
- nat/finetuning/interfaces/trajectory_builder.py +115 -0
- nat/finetuning/utils/__init__.py +15 -0
- nat/finetuning/utils/parsers/__init__.py +15 -0
- nat/finetuning/utils/parsers/adk_parser.py +141 -0
- nat/finetuning/utils/parsers/base_parser.py +238 -0
- nat/finetuning/utils/parsers/common.py +91 -0
- nat/finetuning/utils/parsers/langchain_parser.py +267 -0
- nat/finetuning/utils/parsers/llama_index_parser.py +218 -0
- nat/front_ends/__init__.py +1 -1
- nat/front_ends/console/__init__.py +1 -1
- nat/front_ends/console/authentication_flow_handler.py +1 -1
- nat/front_ends/console/console_front_end_config.py +4 -1
- nat/front_ends/console/console_front_end_plugin.py +5 -4
- nat/front_ends/console/register.py +1 -1
- nat/front_ends/cron/__init__.py +1 -1
- nat/front_ends/fastapi/__init__.py +1 -1
- nat/front_ends/fastapi/async_job.py +128 -0
- nat/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +1 -1
- nat/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +13 -9
- nat/front_ends/fastapi/dask_client_mixin.py +1 -1
- nat/front_ends/fastapi/fastapi_front_end_config.py +1 -1
- nat/front_ends/fastapi/fastapi_front_end_controller.py +1 -1
- nat/front_ends/fastapi/fastapi_front_end_plugin.py +25 -30
- nat/front_ends/fastapi/fastapi_front_end_plugin_worker.py +195 -60
- nat/front_ends/fastapi/html_snippets/__init__.py +1 -1
- nat/front_ends/fastapi/html_snippets/auth_code_grant_success.py +1 -1
- nat/front_ends/fastapi/intermediate_steps_subscriber.py +12 -1
- nat/front_ends/fastapi/job_store.py +23 -11
- nat/front_ends/fastapi/main.py +1 -1
- nat/front_ends/fastapi/message_handler.py +27 -4
- nat/front_ends/fastapi/message_validator.py +54 -2
- nat/front_ends/fastapi/register.py +1 -1
- nat/front_ends/fastapi/response_helpers.py +16 -15
- nat/front_ends/fastapi/step_adaptor.py +1 -1
- nat/front_ends/fastapi/utils.py +1 -1
- nat/front_ends/register.py +1 -2
- nat/front_ends/simple_base/__init__.py +1 -1
- nat/front_ends/simple_base/simple_front_end_plugin_base.py +6 -4
- nat/llm/aws_bedrock_llm.py +1 -1
- nat/llm/azure_openai_llm.py +10 -1
- nat/llm/dynamo_llm.py +363 -0
- nat/llm/huggingface_llm.py +177 -0
- nat/llm/litellm_llm.py +1 -1
- nat/llm/nim_llm.py +1 -1
- nat/llm/openai_llm.py +1 -1
- nat/llm/register.py +3 -1
- nat/llm/utils/__init__.py +1 -1
- nat/llm/utils/env_config_value.py +1 -1
- nat/llm/utils/error.py +1 -1
- nat/llm/utils/thinking.py +1 -1
- nat/memory/__init__.py +1 -1
- nat/memory/interfaces.py +1 -1
- nat/memory/models.py +1 -1
- nat/meta/pypi.md +1 -1
- nat/middleware/__init__.py +5 -5
- nat/middleware/cache/__init__.py +14 -0
- nat/middleware/{cache_middleware.py → cache/cache_middleware.py} +39 -42
- nat/middleware/cache/cache_middleware_config.py +44 -0
- nat/middleware/cache/register.py +33 -0
- nat/middleware/defense/__init__.py +14 -0
- nat/middleware/defense/defense_middleware.py +362 -0
- nat/middleware/defense/defense_middleware_content_guard.py +455 -0
- nat/middleware/defense/defense_middleware_data_models.py +91 -0
- nat/middleware/defense/defense_middleware_output_verifier.py +440 -0
- nat/middleware/defense/defense_middleware_pii.py +356 -0
- nat/middleware/defense/register.py +82 -0
- nat/middleware/dynamic/__init__.py +14 -0
- nat/middleware/dynamic/dynamic_function_middleware.py +962 -0
- nat/middleware/dynamic/dynamic_middleware_config.py +132 -0
- nat/middleware/dynamic/register.py +34 -0
- nat/middleware/function_middleware.py +236 -52
- nat/middleware/logging/__init__.py +14 -0
- nat/middleware/logging/logging_middleware.py +67 -0
- nat/middleware/logging/logging_middleware_config.py +28 -0
- nat/middleware/logging/register.py +33 -0
- nat/middleware/middleware.py +142 -28
- nat/middleware/red_teaming/__init__.py +14 -0
- nat/middleware/red_teaming/red_teaming_middleware.py +344 -0
- nat/middleware/red_teaming/red_teaming_middleware_config.py +112 -0
- nat/middleware/red_teaming/register.py +47 -0
- nat/middleware/register.py +7 -20
- nat/middleware/utils/__init__.py +14 -0
- nat/middleware/utils/workflow_inventory.py +155 -0
- nat/object_store/__init__.py +1 -1
- nat/object_store/in_memory_object_store.py +1 -1
- nat/object_store/interfaces.py +1 -1
- nat/object_store/models.py +1 -1
- nat/object_store/register.py +1 -1
- nat/observability/__init__.py +1 -1
- nat/observability/exporter/__init__.py +1 -1
- nat/observability/exporter/base_exporter.py +1 -1
- nat/observability/exporter/exporter.py +1 -1
- nat/observability/exporter/file_exporter.py +1 -1
- nat/observability/exporter/processing_exporter.py +1 -1
- nat/observability/exporter/raw_exporter.py +1 -1
- nat/observability/exporter/span_exporter.py +7 -1
- nat/observability/exporter_manager.py +1 -1
- nat/observability/mixin/__init__.py +1 -1
- nat/observability/mixin/batch_config_mixin.py +1 -1
- nat/observability/mixin/collector_config_mixin.py +1 -1
- nat/observability/mixin/file_mixin.py +1 -1
- nat/observability/mixin/file_mode.py +1 -1
- nat/observability/mixin/redaction_config_mixin.py +1 -1
- nat/observability/mixin/resource_conflict_mixin.py +1 -1
- nat/observability/mixin/serialize_mixin.py +1 -1
- nat/observability/mixin/tagging_config_mixin.py +1 -1
- nat/observability/mixin/type_introspection_mixin.py +1 -1
- nat/observability/processor/__init__.py +1 -1
- nat/observability/processor/batching_processor.py +1 -1
- nat/observability/processor/callback_processor.py +1 -1
- nat/observability/processor/falsy_batch_filter_processor.py +1 -1
- nat/observability/processor/intermediate_step_serializer.py +1 -1
- nat/observability/processor/processor.py +1 -1
- nat/observability/processor/processor_factory.py +1 -1
- nat/observability/processor/redaction/__init__.py +1 -1
- nat/observability/processor/redaction/contextual_redaction_processor.py +1 -1
- nat/observability/processor/redaction/contextual_span_redaction_processor.py +1 -1
- nat/observability/processor/redaction/redaction_processor.py +1 -1
- nat/observability/processor/redaction/span_header_redaction_processor.py +1 -1
- nat/observability/processor/span_tagging_processor.py +1 -1
- nat/observability/register.py +1 -1
- nat/observability/utils/__init__.py +1 -1
- nat/observability/utils/dict_utils.py +1 -1
- nat/observability/utils/time_utils.py +1 -1
- nat/profiler/calc/__init__.py +1 -1
- nat/profiler/calc/calc_runner.py +3 -3
- nat/profiler/calc/calculations.py +1 -1
- nat/profiler/calc/data_models.py +1 -1
- nat/profiler/calc/plot.py +30 -3
- nat/profiler/callbacks/agno_callback_handler.py +1 -1
- nat/profiler/callbacks/base_callback_class.py +1 -1
- nat/profiler/callbacks/langchain_callback_handler.py +33 -3
- nat/profiler/callbacks/llama_index_callback_handler.py +13 -10
- nat/profiler/callbacks/semantic_kernel_callback_handler.py +1 -1
- nat/profiler/callbacks/token_usage_base_model.py +1 -1
- nat/profiler/data_frame_row.py +1 -1
- nat/profiler/data_models.py +1 -1
- nat/profiler/decorators/framework_wrapper.py +16 -1
- nat/profiler/decorators/function_tracking.py +1 -1
- nat/profiler/forecasting/config.py +1 -1
- nat/profiler/forecasting/model_trainer.py +1 -1
- nat/profiler/forecasting/models/__init__.py +1 -1
- nat/profiler/forecasting/models/forecasting_base_model.py +1 -1
- nat/profiler/forecasting/models/linear_model.py +1 -1
- nat/profiler/forecasting/models/random_forest_regressor.py +1 -1
- nat/profiler/inference_metrics_model.py +1 -1
- nat/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +1 -1
- nat/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py +1 -1
- nat/profiler/inference_optimization/data_models.py +1 -1
- nat/profiler/inference_optimization/experimental/concurrency_spike_analysis.py +1 -1
- nat/profiler/inference_optimization/experimental/prefix_span_analysis.py +1 -1
- nat/profiler/inference_optimization/llm_metrics.py +1 -1
- nat/profiler/inference_optimization/prompt_caching.py +1 -1
- nat/profiler/inference_optimization/token_uniqueness.py +1 -1
- nat/profiler/inference_optimization/workflow_runtimes.py +1 -1
- nat/profiler/intermediate_property_adapter.py +1 -1
- nat/profiler/parameter_optimization/optimizable_utils.py +1 -1
- nat/profiler/parameter_optimization/optimizer_runtime.py +1 -1
- nat/profiler/parameter_optimization/parameter_optimizer.py +1 -1
- nat/profiler/parameter_optimization/parameter_selection.py +1 -1
- nat/profiler/parameter_optimization/pareto_visualizer.py +1 -1
- nat/profiler/parameter_optimization/prompt_optimizer.py +1 -1
- nat/profiler/parameter_optimization/update_helpers.py +1 -1
- nat/profiler/profile_runner.py +1 -1
- nat/profiler/utils.py +1 -1
- nat/registry_handlers/local/local_handler.py +1 -1
- nat/registry_handlers/local/register_local.py +1 -1
- nat/registry_handlers/metadata_factory.py +1 -1
- nat/registry_handlers/package_utils.py +1 -1
- nat/registry_handlers/pypi/pypi_handler.py +1 -1
- nat/registry_handlers/pypi/register_pypi.py +1 -1
- nat/registry_handlers/register.py +1 -1
- nat/registry_handlers/registry_handler_base.py +1 -1
- nat/registry_handlers/rest/register_rest.py +1 -1
- nat/registry_handlers/rest/rest_handler.py +1 -1
- nat/registry_handlers/schemas/headers.py +1 -1
- nat/registry_handlers/schemas/package.py +1 -1
- nat/registry_handlers/schemas/publish.py +1 -1
- nat/registry_handlers/schemas/pull.py +1 -1
- nat/registry_handlers/schemas/remove.py +1 -1
- nat/registry_handlers/schemas/search.py +1 -1
- nat/registry_handlers/schemas/status.py +1 -1
- nat/retriever/interface.py +1 -1
- nat/retriever/milvus/__init__.py +1 -1
- nat/retriever/milvus/register.py +1 -1
- nat/retriever/milvus/retriever.py +1 -1
- nat/retriever/models.py +1 -1
- nat/retriever/nemo_retriever/__init__.py +1 -1
- nat/retriever/nemo_retriever/register.py +1 -1
- nat/retriever/nemo_retriever/retriever.py +5 -5
- nat/retriever/register.py +1 -1
- nat/runtime/__init__.py +1 -1
- nat/runtime/loader.py +10 -3
- nat/runtime/metrics.py +180 -0
- nat/runtime/runner.py +1 -5
- nat/runtime/session.py +451 -32
- nat/runtime/user_metadata.py +1 -1
- nat/settings/global_settings.py +1 -1
- nat/tool/chat_completion.py +1 -1
- nat/tool/code_execution/README.md +1 -1
- nat/tool/code_execution/code_sandbox.py +1 -1
- nat/tool/code_execution/local_sandbox/Dockerfile.sandbox +1 -1
- nat/tool/code_execution/local_sandbox/__init__.py +1 -1
- nat/tool/code_execution/local_sandbox/local_sandbox_server.py +1 -1
- nat/tool/code_execution/local_sandbox/start_local_sandbox.sh +1 -1
- nat/tool/code_execution/register.py +1 -1
- nat/tool/code_execution/utils.py +1 -1
- nat/tool/datetime_tools.py +1 -1
- nat/tool/document_search.py +1 -1
- nat/tool/github_tools.py +1 -1
- nat/tool/memory_tools/add_memory_tool.py +1 -1
- nat/tool/memory_tools/delete_memory_tool.py +1 -1
- nat/tool/memory_tools/get_memory_tool.py +1 -1
- nat/tool/nvidia_rag.py +2 -2
- nat/tool/register.py +1 -1
- nat/tool/retriever.py +1 -1
- nat/tool/server_tools.py +1 -1
- nat/utils/__init__.py +8 -5
- nat/utils/callable_utils.py +1 -1
- nat/utils/data_models/schema_validator.py +1 -1
- nat/utils/debugging_utils.py +1 -1
- nat/utils/decorators.py +1 -1
- nat/utils/dump_distro_mapping.py +1 -1
- nat/utils/exception_handlers/automatic_retries.py +3 -3
- nat/utils/exception_handlers/schemas.py +1 -1
- nat/utils/io/model_processing.py +1 -1
- nat/utils/io/supress_logs.py +33 -0
- nat/utils/io/yaml_tools.py +1 -1
- nat/utils/log_levels.py +1 -1
- nat/utils/log_utils.py +13 -1
- nat/utils/metadata_utils.py +1 -1
- nat/utils/optional_imports.py +1 -1
- nat/utils/producer_consumer_queue.py +1 -1
- nat/utils/reactive/base/observable_base.py +1 -1
- nat/utils/reactive/base/observer_base.py +1 -1
- nat/utils/reactive/base/subject_base.py +1 -1
- nat/utils/reactive/observable.py +1 -1
- nat/utils/reactive/observer.py +1 -1
- nat/utils/reactive/subject.py +1 -1
- nat/utils/reactive/subscription.py +1 -1
- nat/utils/responses_api.py +1 -1
- nat/utils/settings/global_settings.py +1 -1
- nat/utils/string_utils.py +1 -1
- nat/utils/type_converter.py +18 -5
- nat/utils/type_utils.py +1 -1
- nat/utils/url_utils.py +1 -1
- {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/METADATA +39 -14
- nvidia_nat-1.4.0a20260113.dist-info/RECORD +547 -0
- nvidia_nat-1.4.0a20260113.dist-info/entry_points.txt +38 -0
- nat/cli/commands/mcp/mcp.py +0 -986
- nat/front_ends/mcp/introspection_token_verifier.py +0 -73
- nat/front_ends/mcp/mcp_front_end_config.py +0 -109
- nat/front_ends/mcp/mcp_front_end_plugin.py +0 -155
- nat/front_ends/mcp/mcp_front_end_plugin_worker.py +0 -388
- nat/front_ends/mcp/memory_profiler.py +0 -320
- nat/front_ends/mcp/register.py +0 -27
- nat/front_ends/mcp/tool_converter.py +0 -321
- nvidia_nat-1.4.0a20251120.dist-info/RECORD +0 -488
- nvidia_nat-1.4.0a20251120.dist-info/entry_points.txt +0 -23
- {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/WHEEL +0 -0
- {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
- {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/licenses/LICENSE.md +0 -0
- {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -39,6 +39,7 @@ from pydantic import BaseModel
|
|
|
39
39
|
from pydantic import Field
|
|
40
40
|
from starlette.websockets import WebSocket
|
|
41
41
|
|
|
42
|
+
from nat.builder.context import Context
|
|
42
43
|
from nat.builder.eval_builder import WorkflowEvalBuilder
|
|
43
44
|
from nat.builder.evaluator import EvaluatorInfo
|
|
44
45
|
from nat.builder.function import Function
|
|
@@ -54,6 +55,7 @@ from nat.eval.config import EvaluationRunOutput
|
|
|
54
55
|
from nat.eval.evaluate import EvaluationRun
|
|
55
56
|
from nat.eval.evaluate import EvaluationRunConfig
|
|
56
57
|
from nat.eval.evaluator.evaluator_model import EvalInput
|
|
58
|
+
from nat.front_ends.fastapi.async_job import run_generation
|
|
57
59
|
from nat.front_ends.fastapi.auth_flow_handlers.http_flow_handler import HTTPAuthenticationFlowHandler
|
|
58
60
|
from nat.front_ends.fastapi.auth_flow_handlers.websocket_flow_handler import FlowState
|
|
59
61
|
from nat.front_ends.fastapi.auth_flow_handlers.websocket_flow_handler import WebSocketAuthenticationFlowHandler
|
|
@@ -74,6 +76,7 @@ from nat.front_ends.fastapi.utils import get_config_file_path
|
|
|
74
76
|
from nat.object_store.models import ObjectStoreItem
|
|
75
77
|
from nat.runtime.loader import load_workflow
|
|
76
78
|
from nat.runtime.session import SessionManager
|
|
79
|
+
from nat.utils.log_utils import setup_logging
|
|
77
80
|
|
|
78
81
|
logger = logging.getLogger(__name__)
|
|
79
82
|
|
|
@@ -105,6 +108,9 @@ class FastApiFrontEndPluginWorkerBase(ABC):
|
|
|
105
108
|
self._scheduler_address = os.environ.get("NAT_DASK_SCHEDULER_ADDRESS")
|
|
106
109
|
self._db_url = os.environ.get("NAT_JOB_STORE_DB_URL")
|
|
107
110
|
self._config_file_path = get_config_file_path()
|
|
111
|
+
self._use_dask_threads = os.environ.get("NAT_USE_DASK_THREADS", "0") == "1"
|
|
112
|
+
self._log_level = int(os.environ.get("NAT_FASTAPI_LOG_LEVEL", logging.INFO))
|
|
113
|
+
setup_logging(self._log_level)
|
|
108
114
|
|
|
109
115
|
if self._scheduler_address is not None:
|
|
110
116
|
if not _DASK_AVAILABLE:
|
|
@@ -232,6 +238,9 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
|
|
|
232
238
|
self._outstanding_flows: dict[str, FlowState] = {}
|
|
233
239
|
self._outstanding_flows_lock = asyncio.Lock()
|
|
234
240
|
|
|
241
|
+
# Track session managers for each route
|
|
242
|
+
self._session_managers: list[SessionManager] = []
|
|
243
|
+
|
|
235
244
|
# Evaluator storage for single-item evaluation
|
|
236
245
|
self._evaluators: dict[str, EvaluatorInfo] = {}
|
|
237
246
|
self._eval_builder: WorkflowEvalBuilder | None = None
|
|
@@ -268,6 +277,27 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
|
|
|
268
277
|
# Don't fail startup, just log the error
|
|
269
278
|
self._evaluators = {}
|
|
270
279
|
|
|
280
|
+
async def _create_session_manager(self,
|
|
281
|
+
builder: WorkflowBuilder,
|
|
282
|
+
entry_function: str | None = None) -> SessionManager:
|
|
283
|
+
"""Create and register a SessionManager."""
|
|
284
|
+
|
|
285
|
+
sm = await SessionManager.create(config=self._config, shared_builder=builder, entry_function=entry_function)
|
|
286
|
+
self._session_managers.append(sm)
|
|
287
|
+
|
|
288
|
+
return sm
|
|
289
|
+
|
|
290
|
+
async def cleanup_session_managers(self):
|
|
291
|
+
"""Clean up all SessionManager resources on shutdown."""
|
|
292
|
+
for sm in self._session_managers:
|
|
293
|
+
try:
|
|
294
|
+
await sm.shutdown()
|
|
295
|
+
except Exception as e:
|
|
296
|
+
logger.error(f"Error cleaning up SessionManager: {e}")
|
|
297
|
+
|
|
298
|
+
self._session_managers.clear()
|
|
299
|
+
logger.info("All SessionManagers cleaned up")
|
|
300
|
+
|
|
271
301
|
async def cleanup_evaluators(self):
|
|
272
302
|
"""Clean up evaluator resources on shutdown."""
|
|
273
303
|
if self._eval_builder:
|
|
@@ -293,6 +323,9 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
|
|
|
293
323
|
# TODO: we need config control over this as it's not always needed
|
|
294
324
|
await self.initialize_evaluators(self._config)
|
|
295
325
|
|
|
326
|
+
# Ensure session manager resources are cleaned up when the app shuts down
|
|
327
|
+
app.add_event_handler("shutdown", self.cleanup_session_managers)
|
|
328
|
+
|
|
296
329
|
# Ensure evaluator resources are cleaned up when the app shuts down
|
|
297
330
|
app.add_event_handler("shutdown", self.cleanup_evaluators)
|
|
298
331
|
|
|
@@ -300,18 +333,20 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
|
|
|
300
333
|
|
|
301
334
|
async def add_routes(self, app: FastAPI, builder: WorkflowBuilder):
|
|
302
335
|
|
|
303
|
-
await self.add_default_route(app,
|
|
304
|
-
await self.add_evaluate_route(app,
|
|
305
|
-
await self.add_evaluate_item_route(app,
|
|
336
|
+
await self.add_default_route(app, await self._create_session_manager(builder))
|
|
337
|
+
await self.add_evaluate_route(app, await self._create_session_manager(builder))
|
|
338
|
+
await self.add_evaluate_item_route(app, await self._create_session_manager(builder))
|
|
339
|
+
|
|
306
340
|
await self.add_static_files_route(app, builder)
|
|
307
341
|
await self.add_authorization_route(app)
|
|
308
342
|
await self.add_mcp_client_tool_list_route(app, builder)
|
|
343
|
+
await self.add_monitor_route(app)
|
|
309
344
|
|
|
310
345
|
for ep in self.front_end_config.endpoints:
|
|
311
346
|
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
347
|
+
await self.add_route(app,
|
|
348
|
+
endpoint=ep,
|
|
349
|
+
session_manager=await self._create_session_manager(builder, ep.function_name))
|
|
315
350
|
|
|
316
351
|
async def add_default_route(self, app: FastAPI, session_manager: SessionManager):
|
|
317
352
|
|
|
@@ -662,11 +697,15 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
|
|
|
662
697
|
endpoint: FastApiFrontEndConfig.EndpointBase,
|
|
663
698
|
session_manager: SessionManager):
|
|
664
699
|
|
|
665
|
-
|
|
700
|
+
GenerateBodyType = session_manager.get_workflow_input_schema()
|
|
701
|
+
GenerateStreamResponseType = session_manager.get_workflow_streaming_output_schema()
|
|
702
|
+
GenerateSingleResponseType = session_manager.get_workflow_single_output_schema()
|
|
666
703
|
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
704
|
+
def add_context_headers_to_response(response: Response) -> None:
|
|
705
|
+
"""Add context-based headers to response if available."""
|
|
706
|
+
observability_trace_id = Context.get().observability_trace_id
|
|
707
|
+
if observability_trace_id:
|
|
708
|
+
response.headers["Observability-Trace-Id"] = observability_trace_id
|
|
670
709
|
|
|
671
710
|
# Skip async generation for custom routes (those with function_name)
|
|
672
711
|
if self._dask_available and not hasattr(endpoint, 'function_name'):
|
|
@@ -715,10 +754,13 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
|
|
|
715
754
|
|
|
716
755
|
response.headers["Content-Type"] = "application/json"
|
|
717
756
|
|
|
718
|
-
async with session_manager.session(
|
|
719
|
-
|
|
757
|
+
async with session_manager.session(
|
|
758
|
+
http_connection=request,
|
|
759
|
+
user_authentication_callback=self._http_flow_handler.authenticate) as session:
|
|
720
760
|
|
|
721
|
-
|
|
761
|
+
result = await generate_single_response(None, session, result_type=result_type)
|
|
762
|
+
add_context_headers_to_response(response)
|
|
763
|
+
return result
|
|
722
764
|
|
|
723
765
|
return get_single
|
|
724
766
|
|
|
@@ -726,13 +768,14 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
|
|
|
726
768
|
|
|
727
769
|
async def get_stream(request: Request):
|
|
728
770
|
|
|
729
|
-
async with session_manager.session(
|
|
730
|
-
|
|
771
|
+
async with session_manager.session(
|
|
772
|
+
http_connection=request,
|
|
773
|
+
user_authentication_callback=self._http_flow_handler.authenticate) as session:
|
|
731
774
|
|
|
732
775
|
return StreamingResponse(headers={"Content-Type": "text/event-stream; charset=utf-8"},
|
|
733
776
|
content=generate_streaming_response_as_str(
|
|
734
777
|
None,
|
|
735
|
-
|
|
778
|
+
session=session,
|
|
736
779
|
streaming=streaming,
|
|
737
780
|
step_adaptor=self.get_step_adaptor(),
|
|
738
781
|
result_type=result_type,
|
|
@@ -744,14 +787,14 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
|
|
|
744
787
|
|
|
745
788
|
async def get_stream(filter_steps: str | None = None):
|
|
746
789
|
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
None,
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
790
|
+
async with session_manager.session(http_connection=None) as session:
|
|
791
|
+
return StreamingResponse(headers={"Content-Type": "text/event-stream; charset=utf-8"},
|
|
792
|
+
content=generate_streaming_response_full_as_str(None,
|
|
793
|
+
session=session,
|
|
794
|
+
streaming=streaming,
|
|
795
|
+
result_type=result_type,
|
|
796
|
+
output_type=output_type,
|
|
797
|
+
filter_steps=filter_steps))
|
|
755
798
|
|
|
756
799
|
return get_stream
|
|
757
800
|
|
|
@@ -761,10 +804,13 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
|
|
|
761
804
|
|
|
762
805
|
response.headers["Content-Type"] = "application/json"
|
|
763
806
|
|
|
764
|
-
async with session_manager.session(
|
|
765
|
-
|
|
807
|
+
async with session_manager.session(
|
|
808
|
+
http_connection=request,
|
|
809
|
+
user_authentication_callback=self._http_flow_handler.authenticate) as session:
|
|
766
810
|
|
|
767
|
-
|
|
811
|
+
result = await generate_single_response(payload, session, result_type=result_type)
|
|
812
|
+
add_context_headers_to_response(response)
|
|
813
|
+
return result
|
|
768
814
|
|
|
769
815
|
return post_single
|
|
770
816
|
|
|
@@ -775,13 +821,14 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
|
|
|
775
821
|
|
|
776
822
|
async def post_stream(request: Request, payload: request_type):
|
|
777
823
|
|
|
778
|
-
async with session_manager.session(
|
|
779
|
-
|
|
824
|
+
async with session_manager.session(
|
|
825
|
+
http_connection=request,
|
|
826
|
+
user_authentication_callback=self._http_flow_handler.authenticate) as session:
|
|
780
827
|
|
|
781
828
|
return StreamingResponse(headers={"Content-Type": "text/event-stream; charset=utf-8"},
|
|
782
829
|
content=generate_streaming_response_as_str(
|
|
783
830
|
payload,
|
|
784
|
-
|
|
831
|
+
session=session,
|
|
785
832
|
streaming=streaming,
|
|
786
833
|
step_adaptor=self.get_step_adaptor(),
|
|
787
834
|
result_type=result_type,
|
|
@@ -799,14 +846,14 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
|
|
|
799
846
|
|
|
800
847
|
async def post_stream(payload: request_type, filter_steps: str | None = None):
|
|
801
848
|
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
payload,
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
849
|
+
async with session_manager.session(http_connection=None) as session:
|
|
850
|
+
return StreamingResponse(headers={"Content-Type": "text/event-stream; charset=utf-8"},
|
|
851
|
+
content=generate_streaming_response_full_as_str(payload,
|
|
852
|
+
session=session,
|
|
853
|
+
streaming=streaming,
|
|
854
|
+
result_type=result_type,
|
|
855
|
+
output_type=output_type,
|
|
856
|
+
filter_steps=filter_steps))
|
|
810
857
|
|
|
811
858
|
return post_stream
|
|
812
859
|
|
|
@@ -822,20 +869,22 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
|
|
|
822
869
|
response.headers["Content-Type"] = "application/json"
|
|
823
870
|
stream_requested = getattr(payload, 'stream', False)
|
|
824
871
|
|
|
825
|
-
async with session_manager.session(http_connection=request):
|
|
872
|
+
async with session_manager.session(http_connection=request) as session:
|
|
826
873
|
if stream_requested:
|
|
827
874
|
|
|
828
875
|
# Return streaming response
|
|
829
876
|
return StreamingResponse(headers={"Content-Type": "text/event-stream; charset=utf-8"},
|
|
830
877
|
content=generate_streaming_response_as_str(
|
|
831
878
|
payload,
|
|
832
|
-
|
|
879
|
+
session=session,
|
|
833
880
|
streaming=True,
|
|
834
881
|
step_adaptor=self.get_step_adaptor(),
|
|
835
882
|
result_type=ChatResponseChunk,
|
|
836
883
|
output_type=ChatResponseChunk))
|
|
837
884
|
|
|
838
|
-
|
|
885
|
+
result = await generate_single_response(payload, session, result_type=ChatResponse)
|
|
886
|
+
add_context_headers_to_response(response)
|
|
887
|
+
return result
|
|
839
888
|
|
|
840
889
|
return post_openai_api_compatible
|
|
841
890
|
|
|
@@ -856,23 +905,6 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
|
|
|
856
905
|
updated_at=job.updated_at,
|
|
857
906
|
expires_at=self._job_store.get_expires_at(job))
|
|
858
907
|
|
|
859
|
-
async def run_generation(scheduler_address: str,
|
|
860
|
-
db_url: str,
|
|
861
|
-
config_file_path: str,
|
|
862
|
-
job_id: str,
|
|
863
|
-
payload: typing.Any):
|
|
864
|
-
"""Background task to run the workflow."""
|
|
865
|
-
job_store = JobStore(scheduler_address=scheduler_address, db_url=db_url)
|
|
866
|
-
try:
|
|
867
|
-
async with load_workflow(config_file_path) as local_session_manager:
|
|
868
|
-
result = await generate_single_response(
|
|
869
|
-
payload, local_session_manager, result_type=local_session_manager.workflow.single_output_schema)
|
|
870
|
-
|
|
871
|
-
await job_store.update_status(job_id, JobStatus.SUCCESS, output=result)
|
|
872
|
-
except Exception as e:
|
|
873
|
-
logger.exception("Error in async job %s", job_id)
|
|
874
|
-
await job_store.update_status(job_id, JobStatus.FAILURE, error=str(e))
|
|
875
|
-
|
|
876
908
|
def post_async_generation(request_type: type):
|
|
877
909
|
|
|
878
910
|
async def start_async_generation(
|
|
@@ -895,6 +927,8 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
|
|
|
895
927
|
job_fn=run_generation,
|
|
896
928
|
sync_timeout=request.sync_timeout,
|
|
897
929
|
job_args=[
|
|
930
|
+
not self._use_dask_threads,
|
|
931
|
+
self._log_level,
|
|
898
932
|
self._scheduler_address,
|
|
899
933
|
self._db_url,
|
|
900
934
|
self._config_file_path,
|
|
@@ -1228,7 +1262,7 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
|
|
|
1228
1262
|
if configured_group.config.type != "mcp_client":
|
|
1229
1263
|
continue
|
|
1230
1264
|
|
|
1231
|
-
from nat.plugins.mcp.client_config import MCPClientConfig
|
|
1265
|
+
from nat.plugins.mcp.client.client_config import MCPClientConfig
|
|
1232
1266
|
|
|
1233
1267
|
config = configured_group.config
|
|
1234
1268
|
assert isinstance(config, MCPClientConfig)
|
|
@@ -1377,6 +1411,107 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
|
|
|
1377
1411
|
}
|
|
1378
1412
|
})
|
|
1379
1413
|
|
|
1414
|
+
async def add_monitor_route(self, app: FastAPI):
|
|
1415
|
+
"""Add the per-user monitoring endpoint to the FastAPI app.
|
|
1416
|
+
|
|
1417
|
+
Security Warning:
|
|
1418
|
+
This endpoint exposes per-user identifiers and usage metrics. It should be
|
|
1419
|
+
protected by deploying behind an internal network, a reverse proxy with
|
|
1420
|
+
authentication, or similar access controls to prevent exposure to untrusted callers.
|
|
1421
|
+
"""
|
|
1422
|
+
# Check if monitoring is enabled in config
|
|
1423
|
+
if not self._config.general.enable_per_user_monitoring:
|
|
1424
|
+
logger.debug("Per-user monitoring disabled, skipping /monitor/users endpoint")
|
|
1425
|
+
return
|
|
1426
|
+
|
|
1427
|
+
from nat.runtime.metrics import PerUserMetricsCollector
|
|
1428
|
+
from nat.runtime.metrics import PerUserMonitorResponse
|
|
1429
|
+
from nat.runtime.metrics import PerUserResourceUsage
|
|
1430
|
+
|
|
1431
|
+
async def get_per_user_metrics(user_id: str | None = None) -> PerUserMonitorResponse:
|
|
1432
|
+
"""
|
|
1433
|
+
Get resource usage metrics for per-user workflows.
|
|
1434
|
+
|
|
1435
|
+
Args:
|
|
1436
|
+
user_id: Optional user ID to filter metrics for a specific user
|
|
1437
|
+
|
|
1438
|
+
Returns:
|
|
1439
|
+
PerUserMonitorResponse with metrics for all or specified users
|
|
1440
|
+
"""
|
|
1441
|
+
# Collect metrics from all session managers that have per-user workflows
|
|
1442
|
+
all_users: list[PerUserResourceUsage] = []
|
|
1443
|
+
|
|
1444
|
+
for session_manager in self._session_managers:
|
|
1445
|
+
if not session_manager.is_workflow_per_user:
|
|
1446
|
+
continue
|
|
1447
|
+
|
|
1448
|
+
collector = PerUserMetricsCollector(session_manager)
|
|
1449
|
+
|
|
1450
|
+
if user_id is not None:
|
|
1451
|
+
# Filter for specific user
|
|
1452
|
+
user_metrics = await collector.collect_user_metrics(user_id)
|
|
1453
|
+
if user_metrics:
|
|
1454
|
+
all_users.append(user_metrics)
|
|
1455
|
+
else:
|
|
1456
|
+
# Get all users
|
|
1457
|
+
response = await collector.collect_all_metrics()
|
|
1458
|
+
all_users.extend(response.users)
|
|
1459
|
+
|
|
1460
|
+
from datetime import datetime
|
|
1461
|
+
return PerUserMonitorResponse(
|
|
1462
|
+
timestamp=datetime.now(),
|
|
1463
|
+
total_active_users=len(all_users),
|
|
1464
|
+
users=all_users,
|
|
1465
|
+
)
|
|
1466
|
+
|
|
1467
|
+
# Register the monitoring endpoint
|
|
1468
|
+
app.add_api_route(path="/monitor/users",
|
|
1469
|
+
endpoint=get_per_user_metrics,
|
|
1470
|
+
methods=["GET"],
|
|
1471
|
+
response_model=PerUserMonitorResponse,
|
|
1472
|
+
description="Get resource usage metrics for per-user workflows",
|
|
1473
|
+
tags=["Monitoring"],
|
|
1474
|
+
responses={
|
|
1475
|
+
200: {
|
|
1476
|
+
"description": "Successfully retrieved per-user metrics",
|
|
1477
|
+
"content": {
|
|
1478
|
+
"application/json": {
|
|
1479
|
+
"example": {
|
|
1480
|
+
"timestamp":
|
|
1481
|
+
"2025-12-16T10:30:00Z",
|
|
1482
|
+
"total_active_users":
|
|
1483
|
+
2,
|
|
1484
|
+
"users": [{
|
|
1485
|
+
"user_id": "alice",
|
|
1486
|
+
"session": {
|
|
1487
|
+
"created_at": "2025-12-16T09:00:00Z",
|
|
1488
|
+
"last_activity": "2025-12-16T10:29:55Z",
|
|
1489
|
+
"ref_count": 1,
|
|
1490
|
+
"is_active": True
|
|
1491
|
+
},
|
|
1492
|
+
"requests": {
|
|
1493
|
+
"total_requests": 42,
|
|
1494
|
+
"active_requests": 1,
|
|
1495
|
+
"avg_latency_ms": 1250.5,
|
|
1496
|
+
"error_count": 2
|
|
1497
|
+
},
|
|
1498
|
+
"memory": {
|
|
1499
|
+
"per_user_functions_count": 2,
|
|
1500
|
+
"per_user_function_groups_count": 1,
|
|
1501
|
+
"exit_stack_size": 3
|
|
1502
|
+
}
|
|
1503
|
+
}]
|
|
1504
|
+
}
|
|
1505
|
+
}
|
|
1506
|
+
}
|
|
1507
|
+
},
|
|
1508
|
+
500: {
|
|
1509
|
+
"description": "Internal Server Error"
|
|
1510
|
+
}
|
|
1511
|
+
})
|
|
1512
|
+
|
|
1513
|
+
logger.info("Added per-user monitoring endpoint at /monitor/users")
|
|
1514
|
+
|
|
1380
1515
|
async def _add_flow(self, state: str, flow_state: FlowState):
|
|
1381
1516
|
async with self._outstanding_flows_lock:
|
|
1382
1517
|
self._outstanding_flows[state] = flow_state
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -32,6 +32,7 @@ async def pull_intermediate(_q, adapter):
|
|
|
32
32
|
intermediate_done = asyncio.Event()
|
|
33
33
|
context = Context.get()
|
|
34
34
|
loop = asyncio.get_running_loop()
|
|
35
|
+
trace_id_emitted = False
|
|
35
36
|
|
|
36
37
|
async def set_intermediate_done():
|
|
37
38
|
intermediate_done.set()
|
|
@@ -43,6 +44,16 @@ async def pull_intermediate(_q, adapter):
|
|
|
43
44
|
If adapter is None, convert the raw IntermediateStep into the complete
|
|
44
45
|
ResponseIntermediateStep and place it into the queue.
|
|
45
46
|
"""
|
|
47
|
+
nonlocal trace_id_emitted
|
|
48
|
+
|
|
49
|
+
# Check if trace ID is now available and emit it once
|
|
50
|
+
if not trace_id_emitted:
|
|
51
|
+
observability_trace_id = context.observability_trace_id
|
|
52
|
+
if observability_trace_id:
|
|
53
|
+
from nat.data_models.api_server import ResponseObservabilityTrace
|
|
54
|
+
loop.create_task(_q.put(ResponseObservabilityTrace(observability_trace_id=observability_trace_id)))
|
|
55
|
+
trace_id_emitted = True
|
|
56
|
+
|
|
46
57
|
if adapter is None:
|
|
47
58
|
adapted = ResponseIntermediateStep(id=item.UUID,
|
|
48
59
|
type=item.event_type,
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -208,13 +208,14 @@ class JobStore(DaskClientMixin):
|
|
|
208
208
|
AsyncSession
|
|
209
209
|
An active SQLAlchemy async session with an open transaction.
|
|
210
210
|
"""
|
|
211
|
-
|
|
212
|
-
async with
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
211
|
+
try:
|
|
212
|
+
async with self._session() as session:
|
|
213
|
+
async with session.begin():
|
|
214
|
+
yield session
|
|
215
|
+
finally:
|
|
216
|
+
# Removes the current task key from the session registry, preventing
|
|
217
|
+
# potential memory leaks
|
|
218
|
+
await self._session.remove()
|
|
218
219
|
|
|
219
220
|
def ensure_job_id(self, job_id: str | None) -> str:
|
|
220
221
|
"""
|
|
@@ -502,13 +503,14 @@ class JobStore(DaskClientMixin):
|
|
|
502
503
|
|
|
503
504
|
return updated_at + timedelta(seconds=job.expiry_seconds)
|
|
504
505
|
|
|
505
|
-
async def cleanup_expired_jobs(self):
|
|
506
|
+
async def cleanup_expired_jobs(self) -> int:
|
|
506
507
|
"""
|
|
507
508
|
Cleanup expired jobs, keeping the most recent one.
|
|
508
509
|
|
|
509
510
|
Updated_at is used instead of created_at to determine the most recent job. This is because jobs may not be
|
|
510
511
|
processed in the order they are created.
|
|
511
512
|
"""
|
|
513
|
+
logger.info("Starting cleanup of expired jobs")
|
|
512
514
|
now = datetime.now(UTC)
|
|
513
515
|
|
|
514
516
|
stmt = select(JobInfo).where(
|
|
@@ -536,9 +538,11 @@ class JobStore(DaskClientMixin):
|
|
|
536
538
|
elif os.path.isdir(job.output_path):
|
|
537
539
|
shutil.rmtree(job.output_path)
|
|
538
540
|
|
|
539
|
-
|
|
541
|
+
num_expired = len(expired_ids)
|
|
542
|
+
if num_expired > 0:
|
|
540
543
|
successfully_expired = []
|
|
541
544
|
for job_id in expired_ids:
|
|
545
|
+
var = None
|
|
542
546
|
try:
|
|
543
547
|
var = Variable(name=job_id, client=client)
|
|
544
548
|
try:
|
|
@@ -549,14 +553,22 @@ class JobStore(DaskClientMixin):
|
|
|
549
553
|
except TimeoutError:
|
|
550
554
|
pass
|
|
551
555
|
|
|
552
|
-
var.delete()
|
|
553
556
|
successfully_expired.append(job_id)
|
|
554
557
|
except Exception:
|
|
555
558
|
logger.exception("Failed to expire %s", job_id)
|
|
556
559
|
|
|
560
|
+
finally:
|
|
561
|
+
if var is not None:
|
|
562
|
+
try:
|
|
563
|
+
var.delete()
|
|
564
|
+
except Exception:
|
|
565
|
+
logger.exception("Failed to delete variable %s", job_id)
|
|
566
|
+
|
|
557
567
|
await session.execute(
|
|
558
568
|
update(JobInfo).where(JobInfo.job_id.in_(successfully_expired)).values(is_expired=True))
|
|
559
569
|
|
|
570
|
+
return num_expired
|
|
571
|
+
|
|
560
572
|
|
|
561
573
|
def get_db_engine(db_url: str | None = None, echo: bool = False, use_async: bool = True) -> "Engine | AsyncEngine":
|
|
562
574
|
"""
|
nat/front_ends/fastapi/main.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -30,6 +30,7 @@ from nat.data_models.api_server import ChatResponse
|
|
|
30
30
|
from nat.data_models.api_server import ChatResponseChunk
|
|
31
31
|
from nat.data_models.api_server import Error
|
|
32
32
|
from nat.data_models.api_server import ErrorTypes
|
|
33
|
+
from nat.data_models.api_server import ResponseObservabilityTrace
|
|
33
34
|
from nat.data_models.api_server import ResponsePayloadOutput
|
|
34
35
|
from nat.data_models.api_server import ResponseSerializable
|
|
35
36
|
from nat.data_models.api_server import SystemResponseContent
|
|
@@ -38,6 +39,7 @@ from nat.data_models.api_server import UserMessageContentRoleType
|
|
|
38
39
|
from nat.data_models.api_server import UserMessages
|
|
39
40
|
from nat.data_models.api_server import WebSocketMessageStatus
|
|
40
41
|
from nat.data_models.api_server import WebSocketMessageType
|
|
42
|
+
from nat.data_models.api_server import WebSocketObservabilityTraceMessage
|
|
41
43
|
from nat.data_models.api_server import WebSocketSystemInteractionMessage
|
|
42
44
|
from nat.data_models.api_server import WebSocketSystemIntermediateStepMessage
|
|
43
45
|
from nat.data_models.api_server import WebSocketSystemResponseTokenMessage
|
|
@@ -69,14 +71,15 @@ class WebSocketMessageHandler:
|
|
|
69
71
|
self._conversation_id: str | None = None
|
|
70
72
|
self._workflow_schema_type: str | None = None
|
|
71
73
|
self._user_interaction_response: asyncio.Future[TextContent] | None = None
|
|
74
|
+
self._pending_observability_trace: ResponseObservabilityTrace | None = None
|
|
72
75
|
|
|
73
76
|
self._flow_handler: FlowHandlerBase | None = None
|
|
74
77
|
|
|
75
78
|
self._schema_output_mapping: dict[str, type[BaseModel] | type[None]] = {
|
|
76
|
-
WorkflowSchemaType.GENERATE: self._session_manager.
|
|
79
|
+
WorkflowSchemaType.GENERATE: self._session_manager.get_workflow_single_output_schema(),
|
|
77
80
|
WorkflowSchemaType.CHAT: ChatResponse,
|
|
78
81
|
WorkflowSchemaType.CHAT_STREAM: ChatResponseChunk,
|
|
79
|
-
WorkflowSchemaType.GENERATE_STREAM: self._session_manager.
|
|
82
|
+
WorkflowSchemaType.GENERATE_STREAM: self._session_manager.get_workflow_streaming_output_schema(),
|
|
80
83
|
}
|
|
81
84
|
|
|
82
85
|
def set_flow_handler(self, flow_handler: FlowHandlerBase) -> None:
|
|
@@ -175,6 +178,7 @@ class WebSocketMessageHandler:
|
|
|
175
178
|
self._message_parent_id = user_message_as_validated_type.id
|
|
176
179
|
self._workflow_schema_type = user_message_as_validated_type.schema_type
|
|
177
180
|
self._conversation_id = user_message_as_validated_type.conversation_id
|
|
181
|
+
self._pending_observability_trace = None
|
|
178
182
|
|
|
179
183
|
message_content: typing.Any = await self._process_websocket_user_message(user_message_as_validated_type)
|
|
180
184
|
|
|
@@ -250,6 +254,13 @@ class WebSocketMessageHandler:
|
|
|
250
254
|
content=content,
|
|
251
255
|
status=status)
|
|
252
256
|
|
|
257
|
+
elif issubclass(message_schema, WebSocketObservabilityTraceMessage):
|
|
258
|
+
message = await self._message_validator.create_observability_trace_message(
|
|
259
|
+
message_id=message_id,
|
|
260
|
+
parent_id=self._message_parent_id,
|
|
261
|
+
conversation_id=self._conversation_id,
|
|
262
|
+
content=content)
|
|
263
|
+
|
|
253
264
|
elif isinstance(content, Error):
|
|
254
265
|
raise ValidationError(f"Invalid input data creating websocket message. {data_model.model_dump_json()}")
|
|
255
266
|
|
|
@@ -327,12 +338,18 @@ class WebSocketMessageHandler:
|
|
|
327
338
|
user_authentication_callback=auth_callback) as session:
|
|
328
339
|
|
|
329
340
|
async for value in generate_streaming_response(payload,
|
|
330
|
-
|
|
341
|
+
session=session,
|
|
331
342
|
streaming=True,
|
|
332
343
|
step_adaptor=self._step_adaptor,
|
|
333
344
|
result_type=result_type,
|
|
334
345
|
output_type=output_type):
|
|
335
346
|
|
|
347
|
+
# Store observability trace to send after completion message
|
|
348
|
+
if isinstance(value, ResponseObservabilityTrace):
|
|
349
|
+
if self._pending_observability_trace is None:
|
|
350
|
+
self._pending_observability_trace = value
|
|
351
|
+
continue
|
|
352
|
+
|
|
336
353
|
if not isinstance(value, ResponseSerializable):
|
|
337
354
|
value = ResponsePayloadOutput(payload=value)
|
|
338
355
|
|
|
@@ -342,3 +359,9 @@ class WebSocketMessageHandler:
|
|
|
342
359
|
await self.create_websocket_message(data_model=SystemResponseContent(),
|
|
343
360
|
message_type=WebSocketMessageType.RESPONSE_MESSAGE,
|
|
344
361
|
status=WebSocketMessageStatus.COMPLETE)
|
|
362
|
+
|
|
363
|
+
# Send observability trace after completion message
|
|
364
|
+
if self._pending_observability_trace is not None:
|
|
365
|
+
await self.create_websocket_message(data_model=self._pending_observability_trace,
|
|
366
|
+
message_type=WebSocketMessageType.OBSERVABILITY_TRACE_MESSAGE)
|
|
367
|
+
self._pending_observability_trace = None
|