nvidia-nat 1.4.0a20251120__py3-none-any.whl → 1.4.0a20260113__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiq/__init__.py +1 -1
- nat/{front_ends/mcp → agent/auto_memory_wrapper}/__init__.py +1 -1
- nat/agent/auto_memory_wrapper/agent.py +278 -0
- nat/agent/auto_memory_wrapper/register.py +227 -0
- nat/agent/auto_memory_wrapper/state.py +30 -0
- nat/agent/base.py +1 -1
- nat/agent/dual_node.py +1 -1
- nat/agent/prompt_optimizer/prompt.py +1 -1
- nat/agent/prompt_optimizer/register.py +1 -1
- nat/agent/react_agent/agent.py +16 -9
- nat/agent/react_agent/output_parser.py +2 -2
- nat/agent/react_agent/prompt.py +3 -2
- nat/agent/react_agent/register.py +2 -2
- nat/agent/react_agent/register_per_user_agent.py +104 -0
- nat/agent/reasoning_agent/reasoning_agent.py +1 -1
- nat/agent/register.py +3 -1
- nat/agent/responses_api_agent/__init__.py +1 -1
- nat/agent/responses_api_agent/register.py +1 -1
- nat/agent/rewoo_agent/agent.py +9 -4
- nat/agent/rewoo_agent/prompt.py +1 -1
- nat/agent/rewoo_agent/register.py +1 -1
- nat/agent/tool_calling_agent/agent.py +5 -4
- nat/agent/tool_calling_agent/register.py +1 -1
- nat/authentication/__init__.py +1 -1
- nat/authentication/api_key/__init__.py +1 -1
- nat/authentication/api_key/api_key_auth_provider.py +1 -1
- nat/authentication/api_key/api_key_auth_provider_config.py +22 -7
- nat/authentication/api_key/register.py +1 -1
- nat/authentication/credential_validator/__init__.py +1 -1
- nat/authentication/credential_validator/bearer_token_validator.py +1 -1
- nat/authentication/exceptions/__init__.py +1 -1
- nat/authentication/exceptions/api_key_exceptions.py +1 -1
- nat/authentication/http_basic_auth/http_basic_auth_provider.py +1 -1
- nat/authentication/http_basic_auth/register.py +1 -1
- nat/authentication/interfaces.py +1 -1
- nat/authentication/oauth2/__init__.py +1 -1
- nat/authentication/oauth2/oauth2_auth_code_flow_provider.py +1 -1
- nat/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +1 -1
- nat/authentication/oauth2/oauth2_resource_server_config.py +1 -1
- nat/authentication/oauth2/register.py +1 -1
- nat/authentication/register.py +1 -1
- nat/builder/builder.py +511 -1
- nat/builder/child_builder.py +385 -0
- nat/builder/component_utils.py +28 -4
- nat/builder/context.py +17 -1
- nat/builder/embedder.py +1 -1
- nat/builder/eval_builder.py +19 -7
- nat/builder/evaluator.py +1 -1
- nat/builder/framework_enum.py +2 -1
- nat/builder/front_end.py +1 -1
- nat/builder/function.py +40 -3
- nat/builder/function_base.py +1 -1
- nat/builder/function_info.py +1 -1
- nat/builder/intermediate_step_manager.py +1 -1
- nat/builder/llm.py +1 -1
- nat/builder/per_user_workflow_builder.py +843 -0
- nat/builder/retriever.py +1 -1
- nat/builder/sync_builder.py +571 -0
- nat/builder/user_interaction_manager.py +1 -1
- nat/builder/workflow.py +1 -1
- nat/builder/workflow_builder.py +536 -424
- nat/cli/__init__.py +1 -1
- nat/cli/cli_utils/config_override.py +1 -1
- nat/cli/cli_utils/validation.py +32 -1
- nat/cli/commands/configure/channel/add.py +1 -1
- nat/cli/commands/configure/channel/channel.py +1 -1
- nat/cli/commands/configure/channel/remove.py +1 -1
- nat/cli/commands/configure/channel/update.py +1 -1
- nat/cli/commands/configure/configure.py +1 -1
- nat/cli/commands/evaluate.py +87 -13
- nat/cli/commands/finetune.py +132 -0
- nat/cli/commands/info/__init__.py +1 -1
- nat/cli/commands/info/info.py +1 -1
- nat/cli/commands/info/list_channels.py +1 -1
- nat/cli/commands/info/list_components.py +1 -1
- nat/cli/commands/object_store/__init__.py +1 -1
- nat/cli/commands/object_store/object_store.py +1 -1
- nat/cli/commands/optimize.py +1 -1
- nat/cli/commands/{mcp → red_teaming}/__init__.py +1 -1
- nat/cli/commands/red_teaming/red_teaming.py +138 -0
- nat/cli/commands/red_teaming/red_teaming_utils.py +73 -0
- nat/cli/commands/registry/__init__.py +1 -1
- nat/cli/commands/registry/publish.py +1 -1
- nat/cli/commands/registry/pull.py +1 -1
- nat/cli/commands/registry/registry.py +1 -1
- nat/cli/commands/registry/remove.py +1 -1
- nat/cli/commands/registry/search.py +1 -1
- nat/cli/commands/sizing/__init__.py +1 -1
- nat/cli/commands/sizing/calc.py +1 -1
- nat/cli/commands/sizing/sizing.py +1 -1
- nat/cli/commands/start.py +1 -1
- nat/cli/commands/uninstall.py +1 -1
- nat/cli/commands/validate.py +1 -1
- nat/cli/commands/workflow/__init__.py +1 -1
- nat/cli/commands/workflow/workflow.py +1 -1
- nat/cli/commands/workflow/workflow_commands.py +3 -2
- nat/cli/entrypoint.py +15 -37
- nat/cli/main.py +2 -2
- nat/cli/plugin_loader.py +69 -0
- nat/cli/register_workflow.py +183 -5
- nat/cli/type_registry.py +169 -3
- nat/control_flow/register.py +1 -1
- nat/control_flow/router_agent/agent.py +1 -1
- nat/control_flow/router_agent/prompt.py +1 -1
- nat/control_flow/router_agent/register.py +1 -1
- nat/control_flow/sequential_executor.py +28 -7
- nat/data_models/__init__.py +1 -1
- nat/data_models/agent.py +1 -1
- nat/data_models/api_server.py +38 -3
- nat/data_models/authentication.py +1 -1
- nat/data_models/common.py +1 -1
- nat/data_models/component.py +7 -1
- nat/data_models/component_ref.py +34 -1
- nat/data_models/config.py +62 -1
- nat/data_models/dataset_handler.py +15 -2
- nat/data_models/discovery_metadata.py +1 -1
- nat/data_models/embedder.py +1 -1
- nat/data_models/evaluate.py +6 -1
- nat/data_models/evaluator.py +1 -1
- nat/data_models/finetuning.py +260 -0
- nat/data_models/front_end.py +1 -1
- nat/data_models/function.py +1 -1
- nat/data_models/function_dependencies.py +1 -1
- nat/data_models/gated_field_mixin.py +1 -1
- nat/data_models/interactive.py +1 -1
- nat/data_models/intermediate_step.py +29 -2
- nat/data_models/invocation_node.py +1 -1
- nat/data_models/llm.py +1 -1
- nat/data_models/logging.py +1 -1
- nat/data_models/memory.py +1 -1
- nat/data_models/middleware.py +3 -1
- nat/data_models/object_store.py +1 -1
- nat/data_models/openai_mcp.py +1 -1
- nat/data_models/optimizable.py +1 -1
- nat/data_models/optimizer.py +1 -1
- nat/data_models/profiler.py +1 -1
- nat/data_models/registry_handler.py +1 -1
- nat/data_models/retriever.py +1 -1
- nat/data_models/retry_mixin.py +1 -1
- nat/data_models/runtime_enum.py +1 -1
- nat/data_models/span.py +1 -1
- nat/data_models/step_adaptor.py +1 -1
- nat/data_models/streaming.py +1 -1
- nat/data_models/swe_bench_model.py +1 -1
- nat/data_models/telemetry_exporter.py +1 -1
- nat/data_models/thinking_mixin.py +1 -1
- nat/data_models/ttc_strategy.py +1 -1
- nat/embedder/azure_openai_embedder.py +1 -1
- nat/embedder/nim_embedder.py +1 -1
- nat/embedder/openai_embedder.py +1 -1
- nat/embedder/register.py +1 -1
- nat/eval/__init__.py +1 -1
- nat/eval/config.py +8 -1
- nat/eval/dataset_handler/dataset_downloader.py +1 -1
- nat/eval/dataset_handler/dataset_filter.py +1 -1
- nat/eval/dataset_handler/dataset_handler.py +4 -2
- nat/eval/evaluate.py +217 -80
- nat/eval/evaluator/__init__.py +1 -1
- nat/eval/evaluator/base_evaluator.py +2 -2
- nat/eval/evaluator/evaluator_model.py +3 -2
- nat/eval/intermediate_step_adapter.py +1 -1
- nat/eval/llm_validator.py +336 -0
- nat/eval/rag_evaluator/evaluate.py +17 -10
- nat/eval/rag_evaluator/register.py +1 -1
- nat/eval/red_teaming_evaluator/__init__.py +14 -0
- nat/eval/red_teaming_evaluator/data_models.py +66 -0
- nat/eval/red_teaming_evaluator/evaluate.py +327 -0
- nat/eval/red_teaming_evaluator/filter_conditions.py +75 -0
- nat/eval/red_teaming_evaluator/register.py +55 -0
- nat/eval/register.py +2 -1
- nat/eval/remote_workflow.py +1 -1
- nat/eval/runners/__init__.py +1 -1
- nat/eval/runners/config.py +1 -1
- nat/eval/runners/multi_eval_runner.py +1 -1
- nat/eval/runners/red_teaming_runner/__init__.py +24 -0
- nat/eval/runners/red_teaming_runner/config.py +282 -0
- nat/eval/runners/red_teaming_runner/report_utils.py +707 -0
- nat/eval/runners/red_teaming_runner/runner.py +867 -0
- nat/eval/runtime_evaluator/__init__.py +1 -1
- nat/eval/runtime_evaluator/evaluate.py +1 -1
- nat/eval/runtime_evaluator/register.py +1 -1
- nat/eval/runtime_event_subscriber.py +1 -1
- nat/eval/swe_bench_evaluator/evaluate.py +1 -1
- nat/eval/swe_bench_evaluator/register.py +1 -1
- nat/eval/trajectory_evaluator/evaluate.py +2 -2
- nat/eval/trajectory_evaluator/register.py +1 -1
- nat/eval/tunable_rag_evaluator/evaluate.py +5 -5
- nat/eval/tunable_rag_evaluator/register.py +1 -1
- nat/eval/usage_stats.py +1 -1
- nat/eval/utils/eval_trace_ctx.py +1 -1
- nat/eval/utils/output_uploader.py +1 -1
- nat/eval/utils/tqdm_position_registry.py +1 -1
- nat/eval/utils/weave_eval.py +1 -1
- nat/experimental/decorators/experimental_warning_decorator.py +1 -1
- nat/experimental/test_time_compute/editing/iterative_plan_refinement_editor.py +1 -1
- nat/experimental/test_time_compute/editing/llm_as_a_judge_editor.py +1 -1
- nat/experimental/test_time_compute/editing/motivation_aware_summarization.py +1 -1
- nat/experimental/test_time_compute/functions/execute_score_select_function.py +1 -1
- nat/experimental/test_time_compute/functions/multi_llm_judge_function.py +88 -0
- nat/experimental/test_time_compute/functions/plan_select_execute_function.py +1 -1
- nat/experimental/test_time_compute/functions/ttc_tool_orchestration_function.py +1 -1
- nat/experimental/test_time_compute/functions/ttc_tool_wrapper_function.py +1 -1
- nat/experimental/test_time_compute/models/editor_config.py +1 -1
- nat/experimental/test_time_compute/models/scoring_config.py +1 -1
- nat/experimental/test_time_compute/models/search_config.py +20 -2
- nat/experimental/test_time_compute/models/selection_config.py +33 -2
- nat/experimental/test_time_compute/models/stage_enums.py +1 -1
- nat/experimental/test_time_compute/models/strategy_base.py +1 -1
- nat/experimental/test_time_compute/models/tool_use_config.py +1 -1
- nat/experimental/test_time_compute/models/ttc_item.py +1 -1
- nat/experimental/test_time_compute/register.py +4 -1
- nat/experimental/test_time_compute/scoring/llm_based_agent_scorer.py +1 -1
- nat/experimental/test_time_compute/scoring/llm_based_plan_scorer.py +1 -1
- nat/experimental/test_time_compute/scoring/motivation_aware_scorer.py +1 -1
- nat/experimental/test_time_compute/search/multi_llm_generation.py +115 -0
- nat/experimental/test_time_compute/search/multi_llm_planner.py +1 -1
- nat/experimental/test_time_compute/search/multi_query_retrieval_search.py +1 -1
- nat/experimental/test_time_compute/search/single_shot_multi_plan_planner.py +1 -1
- nat/experimental/test_time_compute/selection/best_of_n_selector.py +1 -1
- nat/experimental/test_time_compute/selection/llm_based_agent_output_selector.py +1 -1
- nat/experimental/test_time_compute/selection/llm_based_output_merging_selector.py +1 -1
- nat/experimental/test_time_compute/selection/llm_based_plan_selector.py +1 -1
- nat/experimental/test_time_compute/selection/llm_judge_selection.py +127 -0
- nat/experimental/test_time_compute/selection/threshold_selector.py +1 -1
- nat/finetuning/__init__.py +24 -0
- nat/finetuning/finetuning_runtime.py +143 -0
- nat/finetuning/interfaces/__init__.py +24 -0
- nat/finetuning/interfaces/finetuning_runner.py +261 -0
- nat/finetuning/interfaces/trainer_adapter.py +103 -0
- nat/finetuning/interfaces/trajectory_builder.py +115 -0
- nat/finetuning/utils/__init__.py +15 -0
- nat/finetuning/utils/parsers/__init__.py +15 -0
- nat/finetuning/utils/parsers/adk_parser.py +141 -0
- nat/finetuning/utils/parsers/base_parser.py +238 -0
- nat/finetuning/utils/parsers/common.py +91 -0
- nat/finetuning/utils/parsers/langchain_parser.py +267 -0
- nat/finetuning/utils/parsers/llama_index_parser.py +218 -0
- nat/front_ends/__init__.py +1 -1
- nat/front_ends/console/__init__.py +1 -1
- nat/front_ends/console/authentication_flow_handler.py +1 -1
- nat/front_ends/console/console_front_end_config.py +4 -1
- nat/front_ends/console/console_front_end_plugin.py +5 -4
- nat/front_ends/console/register.py +1 -1
- nat/front_ends/cron/__init__.py +1 -1
- nat/front_ends/fastapi/__init__.py +1 -1
- nat/front_ends/fastapi/async_job.py +128 -0
- nat/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +1 -1
- nat/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +13 -9
- nat/front_ends/fastapi/dask_client_mixin.py +1 -1
- nat/front_ends/fastapi/fastapi_front_end_config.py +1 -1
- nat/front_ends/fastapi/fastapi_front_end_controller.py +1 -1
- nat/front_ends/fastapi/fastapi_front_end_plugin.py +25 -30
- nat/front_ends/fastapi/fastapi_front_end_plugin_worker.py +195 -60
- nat/front_ends/fastapi/html_snippets/__init__.py +1 -1
- nat/front_ends/fastapi/html_snippets/auth_code_grant_success.py +1 -1
- nat/front_ends/fastapi/intermediate_steps_subscriber.py +12 -1
- nat/front_ends/fastapi/job_store.py +23 -11
- nat/front_ends/fastapi/main.py +1 -1
- nat/front_ends/fastapi/message_handler.py +27 -4
- nat/front_ends/fastapi/message_validator.py +54 -2
- nat/front_ends/fastapi/register.py +1 -1
- nat/front_ends/fastapi/response_helpers.py +16 -15
- nat/front_ends/fastapi/step_adaptor.py +1 -1
- nat/front_ends/fastapi/utils.py +1 -1
- nat/front_ends/register.py +1 -2
- nat/front_ends/simple_base/__init__.py +1 -1
- nat/front_ends/simple_base/simple_front_end_plugin_base.py +6 -4
- nat/llm/aws_bedrock_llm.py +1 -1
- nat/llm/azure_openai_llm.py +10 -1
- nat/llm/dynamo_llm.py +363 -0
- nat/llm/huggingface_llm.py +177 -0
- nat/llm/litellm_llm.py +1 -1
- nat/llm/nim_llm.py +1 -1
- nat/llm/openai_llm.py +1 -1
- nat/llm/register.py +3 -1
- nat/llm/utils/__init__.py +1 -1
- nat/llm/utils/env_config_value.py +1 -1
- nat/llm/utils/error.py +1 -1
- nat/llm/utils/thinking.py +1 -1
- nat/memory/__init__.py +1 -1
- nat/memory/interfaces.py +1 -1
- nat/memory/models.py +1 -1
- nat/meta/pypi.md +1 -1
- nat/middleware/__init__.py +5 -5
- nat/middleware/cache/__init__.py +14 -0
- nat/middleware/{cache_middleware.py → cache/cache_middleware.py} +39 -42
- nat/middleware/cache/cache_middleware_config.py +44 -0
- nat/middleware/cache/register.py +33 -0
- nat/middleware/defense/__init__.py +14 -0
- nat/middleware/defense/defense_middleware.py +362 -0
- nat/middleware/defense/defense_middleware_content_guard.py +455 -0
- nat/middleware/defense/defense_middleware_data_models.py +91 -0
- nat/middleware/defense/defense_middleware_output_verifier.py +440 -0
- nat/middleware/defense/defense_middleware_pii.py +356 -0
- nat/middleware/defense/register.py +82 -0
- nat/middleware/dynamic/__init__.py +14 -0
- nat/middleware/dynamic/dynamic_function_middleware.py +962 -0
- nat/middleware/dynamic/dynamic_middleware_config.py +132 -0
- nat/middleware/dynamic/register.py +34 -0
- nat/middleware/function_middleware.py +236 -52
- nat/middleware/logging/__init__.py +14 -0
- nat/middleware/logging/logging_middleware.py +67 -0
- nat/middleware/logging/logging_middleware_config.py +28 -0
- nat/middleware/logging/register.py +33 -0
- nat/middleware/middleware.py +142 -28
- nat/middleware/red_teaming/__init__.py +14 -0
- nat/middleware/red_teaming/red_teaming_middleware.py +344 -0
- nat/middleware/red_teaming/red_teaming_middleware_config.py +112 -0
- nat/middleware/red_teaming/register.py +47 -0
- nat/middleware/register.py +7 -20
- nat/middleware/utils/__init__.py +14 -0
- nat/middleware/utils/workflow_inventory.py +155 -0
- nat/object_store/__init__.py +1 -1
- nat/object_store/in_memory_object_store.py +1 -1
- nat/object_store/interfaces.py +1 -1
- nat/object_store/models.py +1 -1
- nat/object_store/register.py +1 -1
- nat/observability/__init__.py +1 -1
- nat/observability/exporter/__init__.py +1 -1
- nat/observability/exporter/base_exporter.py +1 -1
- nat/observability/exporter/exporter.py +1 -1
- nat/observability/exporter/file_exporter.py +1 -1
- nat/observability/exporter/processing_exporter.py +1 -1
- nat/observability/exporter/raw_exporter.py +1 -1
- nat/observability/exporter/span_exporter.py +7 -1
- nat/observability/exporter_manager.py +1 -1
- nat/observability/mixin/__init__.py +1 -1
- nat/observability/mixin/batch_config_mixin.py +1 -1
- nat/observability/mixin/collector_config_mixin.py +1 -1
- nat/observability/mixin/file_mixin.py +1 -1
- nat/observability/mixin/file_mode.py +1 -1
- nat/observability/mixin/redaction_config_mixin.py +1 -1
- nat/observability/mixin/resource_conflict_mixin.py +1 -1
- nat/observability/mixin/serialize_mixin.py +1 -1
- nat/observability/mixin/tagging_config_mixin.py +1 -1
- nat/observability/mixin/type_introspection_mixin.py +1 -1
- nat/observability/processor/__init__.py +1 -1
- nat/observability/processor/batching_processor.py +1 -1
- nat/observability/processor/callback_processor.py +1 -1
- nat/observability/processor/falsy_batch_filter_processor.py +1 -1
- nat/observability/processor/intermediate_step_serializer.py +1 -1
- nat/observability/processor/processor.py +1 -1
- nat/observability/processor/processor_factory.py +1 -1
- nat/observability/processor/redaction/__init__.py +1 -1
- nat/observability/processor/redaction/contextual_redaction_processor.py +1 -1
- nat/observability/processor/redaction/contextual_span_redaction_processor.py +1 -1
- nat/observability/processor/redaction/redaction_processor.py +1 -1
- nat/observability/processor/redaction/span_header_redaction_processor.py +1 -1
- nat/observability/processor/span_tagging_processor.py +1 -1
- nat/observability/register.py +1 -1
- nat/observability/utils/__init__.py +1 -1
- nat/observability/utils/dict_utils.py +1 -1
- nat/observability/utils/time_utils.py +1 -1
- nat/profiler/calc/__init__.py +1 -1
- nat/profiler/calc/calc_runner.py +3 -3
- nat/profiler/calc/calculations.py +1 -1
- nat/profiler/calc/data_models.py +1 -1
- nat/profiler/calc/plot.py +30 -3
- nat/profiler/callbacks/agno_callback_handler.py +1 -1
- nat/profiler/callbacks/base_callback_class.py +1 -1
- nat/profiler/callbacks/langchain_callback_handler.py +33 -3
- nat/profiler/callbacks/llama_index_callback_handler.py +13 -10
- nat/profiler/callbacks/semantic_kernel_callback_handler.py +1 -1
- nat/profiler/callbacks/token_usage_base_model.py +1 -1
- nat/profiler/data_frame_row.py +1 -1
- nat/profiler/data_models.py +1 -1
- nat/profiler/decorators/framework_wrapper.py +16 -1
- nat/profiler/decorators/function_tracking.py +1 -1
- nat/profiler/forecasting/config.py +1 -1
- nat/profiler/forecasting/model_trainer.py +1 -1
- nat/profiler/forecasting/models/__init__.py +1 -1
- nat/profiler/forecasting/models/forecasting_base_model.py +1 -1
- nat/profiler/forecasting/models/linear_model.py +1 -1
- nat/profiler/forecasting/models/random_forest_regressor.py +1 -1
- nat/profiler/inference_metrics_model.py +1 -1
- nat/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +1 -1
- nat/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py +1 -1
- nat/profiler/inference_optimization/data_models.py +1 -1
- nat/profiler/inference_optimization/experimental/concurrency_spike_analysis.py +1 -1
- nat/profiler/inference_optimization/experimental/prefix_span_analysis.py +1 -1
- nat/profiler/inference_optimization/llm_metrics.py +1 -1
- nat/profiler/inference_optimization/prompt_caching.py +1 -1
- nat/profiler/inference_optimization/token_uniqueness.py +1 -1
- nat/profiler/inference_optimization/workflow_runtimes.py +1 -1
- nat/profiler/intermediate_property_adapter.py +1 -1
- nat/profiler/parameter_optimization/optimizable_utils.py +1 -1
- nat/profiler/parameter_optimization/optimizer_runtime.py +1 -1
- nat/profiler/parameter_optimization/parameter_optimizer.py +1 -1
- nat/profiler/parameter_optimization/parameter_selection.py +1 -1
- nat/profiler/parameter_optimization/pareto_visualizer.py +1 -1
- nat/profiler/parameter_optimization/prompt_optimizer.py +1 -1
- nat/profiler/parameter_optimization/update_helpers.py +1 -1
- nat/profiler/profile_runner.py +1 -1
- nat/profiler/utils.py +1 -1
- nat/registry_handlers/local/local_handler.py +1 -1
- nat/registry_handlers/local/register_local.py +1 -1
- nat/registry_handlers/metadata_factory.py +1 -1
- nat/registry_handlers/package_utils.py +1 -1
- nat/registry_handlers/pypi/pypi_handler.py +1 -1
- nat/registry_handlers/pypi/register_pypi.py +1 -1
- nat/registry_handlers/register.py +1 -1
- nat/registry_handlers/registry_handler_base.py +1 -1
- nat/registry_handlers/rest/register_rest.py +1 -1
- nat/registry_handlers/rest/rest_handler.py +1 -1
- nat/registry_handlers/schemas/headers.py +1 -1
- nat/registry_handlers/schemas/package.py +1 -1
- nat/registry_handlers/schemas/publish.py +1 -1
- nat/registry_handlers/schemas/pull.py +1 -1
- nat/registry_handlers/schemas/remove.py +1 -1
- nat/registry_handlers/schemas/search.py +1 -1
- nat/registry_handlers/schemas/status.py +1 -1
- nat/retriever/interface.py +1 -1
- nat/retriever/milvus/__init__.py +1 -1
- nat/retriever/milvus/register.py +1 -1
- nat/retriever/milvus/retriever.py +1 -1
- nat/retriever/models.py +1 -1
- nat/retriever/nemo_retriever/__init__.py +1 -1
- nat/retriever/nemo_retriever/register.py +1 -1
- nat/retriever/nemo_retriever/retriever.py +5 -5
- nat/retriever/register.py +1 -1
- nat/runtime/__init__.py +1 -1
- nat/runtime/loader.py +10 -3
- nat/runtime/metrics.py +180 -0
- nat/runtime/runner.py +1 -5
- nat/runtime/session.py +451 -32
- nat/runtime/user_metadata.py +1 -1
- nat/settings/global_settings.py +1 -1
- nat/tool/chat_completion.py +1 -1
- nat/tool/code_execution/README.md +1 -1
- nat/tool/code_execution/code_sandbox.py +1 -1
- nat/tool/code_execution/local_sandbox/Dockerfile.sandbox +1 -1
- nat/tool/code_execution/local_sandbox/__init__.py +1 -1
- nat/tool/code_execution/local_sandbox/local_sandbox_server.py +1 -1
- nat/tool/code_execution/local_sandbox/start_local_sandbox.sh +1 -1
- nat/tool/code_execution/register.py +1 -1
- nat/tool/code_execution/utils.py +1 -1
- nat/tool/datetime_tools.py +1 -1
- nat/tool/document_search.py +1 -1
- nat/tool/github_tools.py +1 -1
- nat/tool/memory_tools/add_memory_tool.py +1 -1
- nat/tool/memory_tools/delete_memory_tool.py +1 -1
- nat/tool/memory_tools/get_memory_tool.py +1 -1
- nat/tool/nvidia_rag.py +2 -2
- nat/tool/register.py +1 -1
- nat/tool/retriever.py +1 -1
- nat/tool/server_tools.py +1 -1
- nat/utils/__init__.py +8 -5
- nat/utils/callable_utils.py +1 -1
- nat/utils/data_models/schema_validator.py +1 -1
- nat/utils/debugging_utils.py +1 -1
- nat/utils/decorators.py +1 -1
- nat/utils/dump_distro_mapping.py +1 -1
- nat/utils/exception_handlers/automatic_retries.py +3 -3
- nat/utils/exception_handlers/schemas.py +1 -1
- nat/utils/io/model_processing.py +1 -1
- nat/utils/io/supress_logs.py +33 -0
- nat/utils/io/yaml_tools.py +1 -1
- nat/utils/log_levels.py +1 -1
- nat/utils/log_utils.py +13 -1
- nat/utils/metadata_utils.py +1 -1
- nat/utils/optional_imports.py +1 -1
- nat/utils/producer_consumer_queue.py +1 -1
- nat/utils/reactive/base/observable_base.py +1 -1
- nat/utils/reactive/base/observer_base.py +1 -1
- nat/utils/reactive/base/subject_base.py +1 -1
- nat/utils/reactive/observable.py +1 -1
- nat/utils/reactive/observer.py +1 -1
- nat/utils/reactive/subject.py +1 -1
- nat/utils/reactive/subscription.py +1 -1
- nat/utils/responses_api.py +1 -1
- nat/utils/settings/global_settings.py +1 -1
- nat/utils/string_utils.py +1 -1
- nat/utils/type_converter.py +18 -5
- nat/utils/type_utils.py +1 -1
- nat/utils/url_utils.py +1 -1
- {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/METADATA +39 -14
- nvidia_nat-1.4.0a20260113.dist-info/RECORD +547 -0
- nvidia_nat-1.4.0a20260113.dist-info/entry_points.txt +38 -0
- nat/cli/commands/mcp/mcp.py +0 -986
- nat/front_ends/mcp/introspection_token_verifier.py +0 -73
- nat/front_ends/mcp/mcp_front_end_config.py +0 -109
- nat/front_ends/mcp/mcp_front_end_plugin.py +0 -155
- nat/front_ends/mcp/mcp_front_end_plugin_worker.py +0 -388
- nat/front_ends/mcp/memory_profiler.py +0 -320
- nat/front_ends/mcp/register.py +0 -27
- nat/front_ends/mcp/tool_converter.py +0 -321
- nvidia_nat-1.4.0a20251120.dist-info/RECORD +0 -488
- nvidia_nat-1.4.0a20251120.dist-info/entry_points.txt +0 -23
- {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/WHEEL +0 -0
- {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
- {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/licenses/LICENSE.md +0 -0
- {nvidia_nat-1.4.0a20251120.dist-info → nvidia_nat-1.4.0a20260113.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
"""HuggingFace Transformers LLM Provider - Local in-process model execution."""
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
from collections.abc import AsyncIterator
|
|
19
|
+
from dataclasses import dataclass
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
from pydantic import Field
|
|
23
|
+
|
|
24
|
+
from nat.builder.builder import Builder
|
|
25
|
+
from nat.builder.llm import LLMProviderInfo
|
|
26
|
+
from nat.cli.register_workflow import register_llm_provider
|
|
27
|
+
from nat.data_models.llm import LLMBaseConfig
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class ModelCacheEntry:
|
|
34
|
+
model: Any
|
|
35
|
+
tokenizer: Any
|
|
36
|
+
torch: Any
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class ModelCache:
|
|
40
|
+
"""Singleton cache for loaded HuggingFace models.
|
|
41
|
+
|
|
42
|
+
Models remain cached for the provider's lifetime (not per-query!) to enable fast reuse:
|
|
43
|
+
- During nat serve: Cached while server runs, cleaned up on shutdown
|
|
44
|
+
- During nat red-team: Cached across all evaluation queries, cleaned up when complete
|
|
45
|
+
- During nat run: Cached for single workflow execution, cleaned up when done
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
_instance: "ModelCache | None" = None
|
|
49
|
+
_cache: dict[str, ModelCacheEntry]
|
|
50
|
+
|
|
51
|
+
def __new__(cls) -> "ModelCache":
|
|
52
|
+
if cls._instance is None:
|
|
53
|
+
cls._instance = super().__new__(cls)
|
|
54
|
+
cls._instance._cache = {}
|
|
55
|
+
return cls._instance
|
|
56
|
+
|
|
57
|
+
def get(self, model_name: str) -> ModelCacheEntry | None:
|
|
58
|
+
"""Return cached model data or None if not loaded."""
|
|
59
|
+
return self._cache.get(model_name)
|
|
60
|
+
|
|
61
|
+
def set(self, model_name: str, data: ModelCacheEntry) -> None:
|
|
62
|
+
"""Cache model data."""
|
|
63
|
+
self._cache[model_name] = data
|
|
64
|
+
|
|
65
|
+
def remove(self, model_name: str) -> None:
|
|
66
|
+
"""Remove model from cache."""
|
|
67
|
+
self._cache.pop(model_name, None)
|
|
68
|
+
|
|
69
|
+
def __contains__(self, model_name: str) -> bool:
|
|
70
|
+
"""Check if model is cached."""
|
|
71
|
+
return model_name in self._cache
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class HuggingFaceConfig(LLMBaseConfig, name="huggingface"):
|
|
75
|
+
"""Configuration for HuggingFace LLM - loads model directly for local execution."""
|
|
76
|
+
|
|
77
|
+
model_name: str = Field(description="HuggingFace model name (e.g. 'Qwen/Qwen3Guard-Gen-0.6B')")
|
|
78
|
+
|
|
79
|
+
device: str = Field(default="auto", description="Device: 'cpu', 'cuda', 'cuda:0', or 'auto'")
|
|
80
|
+
|
|
81
|
+
dtype: str | None = Field(default="auto", description="Torch dtype: 'float16', 'bfloat16', 'float32', or 'auto'")
|
|
82
|
+
|
|
83
|
+
max_new_tokens: int = Field(default=128, description="Maximum number of new tokens to generate")
|
|
84
|
+
|
|
85
|
+
temperature: float = Field(default=0.0,
|
|
86
|
+
description="Sampling temperature (0 = deterministic greedy, > 0 = sampling enabled)")
|
|
87
|
+
|
|
88
|
+
trust_remote_code: bool = Field(default=False, description="Trust remote code when loading model")
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def get_cached_model(model_name: str) -> ModelCacheEntry | None:
|
|
92
|
+
"""Return cached model data (model, tokenizer, torch) or None if not loaded."""
|
|
93
|
+
return ModelCache().get(model_name)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
async def _cleanup_model(model_name: str) -> None:
|
|
97
|
+
"""Clean up a loaded model and free GPU memory.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
model_name: Name of the model to clean up.
|
|
101
|
+
"""
|
|
102
|
+
try:
|
|
103
|
+
cache = ModelCache()
|
|
104
|
+
cached = cache.get(model_name)
|
|
105
|
+
|
|
106
|
+
if cached is not None:
|
|
107
|
+
# Move model to CPU to free GPU memory
|
|
108
|
+
if cached.model:
|
|
109
|
+
cached.model.to("cpu")
|
|
110
|
+
cached.model = None
|
|
111
|
+
|
|
112
|
+
# Clear CUDA cache if available
|
|
113
|
+
if cached.torch and hasattr(cached.torch.cuda, "empty_cache"):
|
|
114
|
+
cached.torch.cuda.empty_cache()
|
|
115
|
+
cached.torch = None
|
|
116
|
+
|
|
117
|
+
cached.tokenizer = None
|
|
118
|
+
|
|
119
|
+
# Remove from cache
|
|
120
|
+
cache.remove(model_name)
|
|
121
|
+
|
|
122
|
+
logger.debug("Model cleaned up: %s", model_name)
|
|
123
|
+
except Exception:
|
|
124
|
+
logger.exception("Error cleaning up HuggingFace model '%s'", model_name)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
@register_llm_provider(config_type=HuggingFaceConfig)
|
|
128
|
+
async def huggingface_provider(
|
|
129
|
+
config: HuggingFaceConfig,
|
|
130
|
+
builder: Builder, # noqa: ARG001 - kept for provider interface, currently unused
|
|
131
|
+
) -> AsyncIterator[LLMProviderInfo]:
|
|
132
|
+
"""HuggingFace model provider - loads models locally for in-process execution.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
config: Configuration for the HuggingFace model.
|
|
136
|
+
builder: The NAT builder instance.
|
|
137
|
+
|
|
138
|
+
Yields:
|
|
139
|
+
LLMProviderInfo: Provider information for the loaded model.
|
|
140
|
+
"""
|
|
141
|
+
try:
|
|
142
|
+
import torch
|
|
143
|
+
from transformers import AutoModelForCausalLM
|
|
144
|
+
from transformers import AutoTokenizer
|
|
145
|
+
except ImportError:
|
|
146
|
+
raise ValueError("HuggingFace dependencies not installed. \n"
|
|
147
|
+
"Install with:\n"
|
|
148
|
+
" `pip install nvidia-nat[huggingface]` (package) or\n"
|
|
149
|
+
" `uv pip install -e '.[huggingface]'` (source)\n")
|
|
150
|
+
|
|
151
|
+
cache = ModelCache()
|
|
152
|
+
|
|
153
|
+
# Load model if not cached
|
|
154
|
+
if config.model_name not in cache:
|
|
155
|
+
logger.debug("Loading model from HuggingFace: %s", config.model_name)
|
|
156
|
+
|
|
157
|
+
# Load tokenizer
|
|
158
|
+
tokenizer = AutoTokenizer.from_pretrained(config.model_name, trust_remote_code=config.trust_remote_code)
|
|
159
|
+
|
|
160
|
+
# Load model
|
|
161
|
+
model = AutoModelForCausalLM.from_pretrained(config.model_name,
|
|
162
|
+
dtype=config.dtype,
|
|
163
|
+
device_map=config.device,
|
|
164
|
+
trust_remote_code=config.trust_remote_code)
|
|
165
|
+
|
|
166
|
+
# Cache it
|
|
167
|
+
cache.set(config.model_name, ModelCacheEntry(model=model, tokenizer=tokenizer, torch=torch))
|
|
168
|
+
|
|
169
|
+
logger.debug("Model loaded: %s on device: %s", config.model_name, config.device)
|
|
170
|
+
else:
|
|
171
|
+
logger.debug("Using cached model: %s", config.model_name)
|
|
172
|
+
|
|
173
|
+
try:
|
|
174
|
+
yield LLMProviderInfo(config=config, description=f"HuggingFace model: {config.model_name}")
|
|
175
|
+
finally:
|
|
176
|
+
# Cleanup when workflow/application shuts down
|
|
177
|
+
await _cleanup_model(config.model_name)
|
nat/llm/litellm_llm.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/llm/nim_llm.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/llm/openai_llm.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/llm/register.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -22,6 +22,8 @@ This module is imported by the NeMo Agent Toolkit runtime to ensure providers ar
|
|
|
22
22
|
# Import any providers which need to be automatically registered here
|
|
23
23
|
from . import aws_bedrock_llm
|
|
24
24
|
from . import azure_openai_llm
|
|
25
|
+
from . import dynamo_llm
|
|
26
|
+
from . import huggingface_llm
|
|
25
27
|
from . import litellm_llm
|
|
26
28
|
from . import nim_llm
|
|
27
29
|
from . import openai_llm
|
nat/llm/utils/__init__.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/llm/utils/error.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/llm/utils/thinking.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/memory/__init__.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/memory/interfaces.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/memory/models.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
nat/meta/pypi.md
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
<!--
|
|
2
|
-
SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
2
|
+
SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
3
|
SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
5
|
Licensed under the Apache License, Version 2.0 (the "License");
|
nat/middleware/__init__.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -14,7 +14,6 @@
|
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
"""Middleware implementations for NeMo Agent Toolkit."""
|
|
16
16
|
|
|
17
|
-
from nat.middleware.cache_middleware import CacheMiddleware
|
|
18
17
|
from nat.middleware.function_middleware import FunctionMiddleware
|
|
19
18
|
from nat.middleware.function_middleware import FunctionMiddlewareChain
|
|
20
19
|
from nat.middleware.function_middleware import validate_middleware
|
|
@@ -22,14 +21,15 @@ from nat.middleware.middleware import CallNext
|
|
|
22
21
|
from nat.middleware.middleware import CallNextStream
|
|
23
22
|
from nat.middleware.middleware import FunctionMiddlewareContext
|
|
24
23
|
from nat.middleware.middleware import Middleware
|
|
24
|
+
from nat.middleware.red_teaming.red_teaming_middleware import RedTeamingMiddleware
|
|
25
25
|
|
|
26
26
|
__all__ = [
|
|
27
|
-
"CacheMiddleware",
|
|
28
27
|
"CallNext",
|
|
29
28
|
"CallNextStream",
|
|
30
|
-
"FunctionMiddlewareContext",
|
|
31
|
-
"Middleware",
|
|
32
29
|
"FunctionMiddleware",
|
|
33
30
|
"FunctionMiddlewareChain",
|
|
31
|
+
"FunctionMiddlewareContext",
|
|
32
|
+
"Middleware",
|
|
33
|
+
"RedTeamingMiddleware",
|
|
34
34
|
"validate_middleware",
|
|
35
35
|
]
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -32,17 +32,14 @@ import json
|
|
|
32
32
|
import logging
|
|
33
33
|
from collections.abc import AsyncIterator
|
|
34
34
|
from typing import Any
|
|
35
|
-
from typing import Literal
|
|
36
|
-
|
|
37
|
-
from pydantic import Field
|
|
38
35
|
|
|
39
36
|
from nat.builder.context import Context
|
|
40
37
|
from nat.builder.context import ContextState
|
|
41
|
-
from nat.data_models.middleware import FunctionMiddlewareBaseConfig
|
|
42
38
|
from nat.middleware.function_middleware import CallNext
|
|
43
39
|
from nat.middleware.function_middleware import CallNextStream
|
|
44
40
|
from nat.middleware.function_middleware import FunctionMiddleware
|
|
45
41
|
from nat.middleware.function_middleware import FunctionMiddlewareContext
|
|
42
|
+
from nat.middleware.middleware import InvocationContext
|
|
46
43
|
|
|
47
44
|
logger = logging.getLogger(__name__)
|
|
48
45
|
|
|
@@ -84,6 +81,23 @@ class CacheMiddleware(FunctionMiddleware):
|
|
|
84
81
|
self._similarity_threshold = similarity_threshold
|
|
85
82
|
self._cache: dict[str, Any] = {}
|
|
86
83
|
|
|
84
|
+
# ==================== Abstract Method Implementations ====================
|
|
85
|
+
|
|
86
|
+
@property
|
|
87
|
+
def enabled(self) -> bool:
|
|
88
|
+
"""Middleware always enabled."""
|
|
89
|
+
return True
|
|
90
|
+
|
|
91
|
+
async def pre_invoke(self, context: InvocationContext) -> InvocationContext | None: # noqa: ARG002
|
|
92
|
+
"""Not used - CacheMiddleware overrides function_middleware_invoke."""
|
|
93
|
+
return None
|
|
94
|
+
|
|
95
|
+
async def post_invoke(self, context: InvocationContext) -> InvocationContext | None: # noqa: ARG002
|
|
96
|
+
"""Not used - CacheMiddleware overrides function_middleware_invoke."""
|
|
97
|
+
return None
|
|
98
|
+
|
|
99
|
+
# ==================== Cache Logic ====================
|
|
100
|
+
|
|
87
101
|
def _should_cache(self) -> bool:
|
|
88
102
|
"""Check if caching should be enabled based on the current context."""
|
|
89
103
|
if self._enabled_mode == "always":
|
|
@@ -145,8 +159,11 @@ class CacheMiddleware(FunctionMiddleware):
|
|
|
145
159
|
|
|
146
160
|
return best_match
|
|
147
161
|
|
|
148
|
-
async def function_middleware_invoke(self,
|
|
149
|
-
|
|
162
|
+
async def function_middleware_invoke(self,
|
|
163
|
+
*args: Any,
|
|
164
|
+
call_next: CallNext,
|
|
165
|
+
context: FunctionMiddlewareContext,
|
|
166
|
+
**kwargs: Any) -> Any:
|
|
150
167
|
"""Cache middleware for single-output invocations.
|
|
151
168
|
|
|
152
169
|
Implements the four-phase middleware pattern:
|
|
@@ -157,23 +174,27 @@ class CacheMiddleware(FunctionMiddleware):
|
|
|
157
174
|
4. **Continue**: Return the result (cached or fresh)
|
|
158
175
|
|
|
159
176
|
Args:
|
|
160
|
-
|
|
177
|
+
args: The positional arguments to process
|
|
161
178
|
call_next: Callable to invoke the next middleware or function
|
|
162
179
|
context: Metadata about the function being wrapped
|
|
180
|
+
kwargs: Additional function arguments
|
|
163
181
|
|
|
164
182
|
Returns:
|
|
165
183
|
The cached output if found, otherwise the fresh output
|
|
166
184
|
"""
|
|
167
|
-
#
|
|
185
|
+
# Check if caching should be enabled for this invocation
|
|
168
186
|
if not self._should_cache():
|
|
169
|
-
return await call_next(
|
|
187
|
+
return await call_next(*args, **kwargs)
|
|
188
|
+
|
|
189
|
+
# Use first arg as cache key (primary input)
|
|
190
|
+
value = args[0] if args else None
|
|
170
191
|
|
|
171
192
|
# Phase 1: Preprocess - serialize the input
|
|
172
193
|
input_str = self._serialize_input(value)
|
|
173
194
|
if input_str is None:
|
|
174
195
|
# Can't serialize, pass through to next middleware/function
|
|
175
196
|
logger.debug("Could not serialize input for function %s, bypassing cache", context.name)
|
|
176
|
-
return await call_next(
|
|
197
|
+
return await call_next(*args, **kwargs)
|
|
177
198
|
|
|
178
199
|
# Phase 1: Preprocess - look for a similar cached input
|
|
179
200
|
similar_key = self._find_similar_key(input_str)
|
|
@@ -187,7 +208,7 @@ class CacheMiddleware(FunctionMiddleware):
|
|
|
187
208
|
|
|
188
209
|
# Phase 2: Call next - no cache hit, call next middleware/function
|
|
189
210
|
logger.debug("Cache miss for function %s", context.name)
|
|
190
|
-
result = await call_next(
|
|
211
|
+
result = await call_next(*args, **kwargs)
|
|
191
212
|
|
|
192
213
|
# Phase 3: Postprocess - cache the result for future use
|
|
193
214
|
self._cache[input_str] = result
|
|
@@ -197,9 +218,10 @@ class CacheMiddleware(FunctionMiddleware):
|
|
|
197
218
|
return result
|
|
198
219
|
|
|
199
220
|
async def function_middleware_stream(self,
|
|
200
|
-
|
|
221
|
+
*args: Any,
|
|
201
222
|
call_next: CallNextStream,
|
|
202
|
-
context: FunctionMiddlewareContext
|
|
223
|
+
context: FunctionMiddlewareContext,
|
|
224
|
+
**kwargs: Any) -> AsyncIterator[Any]:
|
|
203
225
|
"""Cache middleware for streaming invocations - bypasses caching.
|
|
204
226
|
|
|
205
227
|
Streaming results are not cached as they would need to be buffered
|
|
@@ -213,9 +235,10 @@ class CacheMiddleware(FunctionMiddleware):
|
|
|
213
235
|
4. **Continue**: Complete the stream
|
|
214
236
|
|
|
215
237
|
Args:
|
|
216
|
-
|
|
238
|
+
args: The positional arguments to process
|
|
217
239
|
call_next: Callable to invoke the next middleware or function stream
|
|
218
240
|
context: Metadata about the function being wrapped
|
|
241
|
+
kwargs: Additional function arguments
|
|
219
242
|
|
|
220
243
|
Yields:
|
|
221
244
|
Chunks from the stream (unmodified)
|
|
@@ -224,33 +247,7 @@ class CacheMiddleware(FunctionMiddleware):
|
|
|
224
247
|
logger.debug("Streaming call for function %s, bypassing cache", context.name)
|
|
225
248
|
|
|
226
249
|
# Phase 2-3: Call next and process chunks - yield chunks as they arrive
|
|
227
|
-
async for chunk in call_next(
|
|
250
|
+
async for chunk in call_next(*args, **kwargs):
|
|
228
251
|
yield chunk
|
|
229
252
|
|
|
230
253
|
# Phase 4: Continue - stream is complete (implicit)
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
class CacheMiddlewareConfig(FunctionMiddlewareBaseConfig, name="cache"):
|
|
234
|
-
"""Configuration for cache middleware.
|
|
235
|
-
|
|
236
|
-
The cache middleware memoizes function outputs based on input similarity,
|
|
237
|
-
with support for both exact and fuzzy matching.
|
|
238
|
-
|
|
239
|
-
Args:
|
|
240
|
-
enabled_mode: Controls when caching is active:
|
|
241
|
-
- "always": Cache is always enabled
|
|
242
|
-
- "eval": Cache only active when Context.is_evaluating is True
|
|
243
|
-
similarity_threshold: Float between 0 and 1 for input matching:
|
|
244
|
-
- 1.0: Exact string matching (fastest)
|
|
245
|
-
- < 1.0: Fuzzy matching using difflib similarity
|
|
246
|
-
"""
|
|
247
|
-
|
|
248
|
-
enabled_mode: Literal["always", "eval"] = Field(
|
|
249
|
-
default="eval", description="When caching is enabled: 'always' or 'eval' (only during evaluation)")
|
|
250
|
-
similarity_threshold: float = Field(default=1.0,
|
|
251
|
-
ge=0.0,
|
|
252
|
-
le=1.0,
|
|
253
|
-
description="Similarity threshold between 0 and 1. Use 1.0 for exact matching")
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
__all__ = ["CacheMiddleware", "CacheMiddlewareConfig"]
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
"""Configuration for cache middleware."""
|
|
16
|
+
|
|
17
|
+
from typing import Literal
|
|
18
|
+
|
|
19
|
+
from pydantic import Field
|
|
20
|
+
|
|
21
|
+
from nat.data_models.middleware import FunctionMiddlewareBaseConfig
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class CacheMiddlewareConfig(FunctionMiddlewareBaseConfig, name="cache"):
|
|
25
|
+
"""Configuration for cache middleware.
|
|
26
|
+
|
|
27
|
+
The cache middleware memoizes function outputs based on input similarity,
|
|
28
|
+
with support for both exact and fuzzy matching.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
enabled_mode: Controls when caching is active:
|
|
32
|
+
- "always": Cache is always enabled
|
|
33
|
+
- "eval": Cache only active when Context.is_evaluating is True
|
|
34
|
+
similarity_threshold: Float between 0 and 1 for input matching:
|
|
35
|
+
- 1.0: Exact string matching (fastest)
|
|
36
|
+
- < 1.0: Fuzzy matching using difflib similarity
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
enabled_mode: Literal["always", "eval"] = Field(
|
|
40
|
+
default="eval", description="When caching is enabled: 'always' or 'eval' (only during evaluation)")
|
|
41
|
+
similarity_threshold: float = Field(default=1.0,
|
|
42
|
+
ge=0.0,
|
|
43
|
+
le=1.0,
|
|
44
|
+
description="Similarity threshold between 0 and 1. Use 1.0 for exact matching")
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
from nat.builder.builder import Builder
|
|
17
|
+
from nat.cli.register_workflow import register_middleware
|
|
18
|
+
from nat.middleware.cache.cache_middleware import CacheMiddleware
|
|
19
|
+
from nat.middleware.cache.cache_middleware_config import CacheMiddlewareConfig
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@register_middleware(config_type=CacheMiddlewareConfig)
|
|
23
|
+
async def cache_middleware(config: CacheMiddlewareConfig, builder: Builder):
|
|
24
|
+
"""Build a cache middleware from configuration.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
config: The cache middleware configuration
|
|
28
|
+
builder: The workflow builder (unused but required by component pattern)
|
|
29
|
+
|
|
30
|
+
Yields:
|
|
31
|
+
A configured cache middleware instance
|
|
32
|
+
"""
|
|
33
|
+
yield CacheMiddleware(enabled_mode=config.enabled_mode, similarity_threshold=config.similarity_threshold)
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|