nvidia-nat 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiq/__init__.py +66 -0
- nat/agent/__init__.py +0 -0
- nat/agent/base.py +256 -0
- nat/agent/dual_node.py +67 -0
- nat/agent/react_agent/__init__.py +0 -0
- nat/agent/react_agent/agent.py +363 -0
- nat/agent/react_agent/output_parser.py +104 -0
- nat/agent/react_agent/prompt.py +44 -0
- nat/agent/react_agent/register.py +149 -0
- nat/agent/reasoning_agent/__init__.py +0 -0
- nat/agent/reasoning_agent/reasoning_agent.py +225 -0
- nat/agent/register.py +23 -0
- nat/agent/rewoo_agent/__init__.py +0 -0
- nat/agent/rewoo_agent/agent.py +415 -0
- nat/agent/rewoo_agent/prompt.py +110 -0
- nat/agent/rewoo_agent/register.py +157 -0
- nat/agent/tool_calling_agent/__init__.py +0 -0
- nat/agent/tool_calling_agent/agent.py +119 -0
- nat/agent/tool_calling_agent/register.py +106 -0
- nat/authentication/__init__.py +14 -0
- nat/authentication/api_key/__init__.py +14 -0
- nat/authentication/api_key/api_key_auth_provider.py +96 -0
- nat/authentication/api_key/api_key_auth_provider_config.py +124 -0
- nat/authentication/api_key/register.py +26 -0
- nat/authentication/exceptions/__init__.py +14 -0
- nat/authentication/exceptions/api_key_exceptions.py +38 -0
- nat/authentication/http_basic_auth/__init__.py +0 -0
- nat/authentication/http_basic_auth/http_basic_auth_provider.py +81 -0
- nat/authentication/http_basic_auth/register.py +30 -0
- nat/authentication/interfaces.py +93 -0
- nat/authentication/oauth2/__init__.py +14 -0
- nat/authentication/oauth2/oauth2_auth_code_flow_provider.py +107 -0
- nat/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +39 -0
- nat/authentication/oauth2/register.py +25 -0
- nat/authentication/register.py +21 -0
- nat/builder/__init__.py +0 -0
- nat/builder/builder.py +285 -0
- nat/builder/component_utils.py +316 -0
- nat/builder/context.py +270 -0
- nat/builder/embedder.py +24 -0
- nat/builder/eval_builder.py +161 -0
- nat/builder/evaluator.py +29 -0
- nat/builder/framework_enum.py +24 -0
- nat/builder/front_end.py +73 -0
- nat/builder/function.py +344 -0
- nat/builder/function_base.py +380 -0
- nat/builder/function_info.py +627 -0
- nat/builder/intermediate_step_manager.py +174 -0
- nat/builder/llm.py +25 -0
- nat/builder/retriever.py +25 -0
- nat/builder/user_interaction_manager.py +78 -0
- nat/builder/workflow.py +148 -0
- nat/builder/workflow_builder.py +1117 -0
- nat/cli/__init__.py +14 -0
- nat/cli/cli_utils/__init__.py +0 -0
- nat/cli/cli_utils/config_override.py +231 -0
- nat/cli/cli_utils/validation.py +37 -0
- nat/cli/commands/__init__.py +0 -0
- nat/cli/commands/configure/__init__.py +0 -0
- nat/cli/commands/configure/channel/__init__.py +0 -0
- nat/cli/commands/configure/channel/add.py +28 -0
- nat/cli/commands/configure/channel/channel.py +34 -0
- nat/cli/commands/configure/channel/remove.py +30 -0
- nat/cli/commands/configure/channel/update.py +30 -0
- nat/cli/commands/configure/configure.py +33 -0
- nat/cli/commands/evaluate.py +139 -0
- nat/cli/commands/info/__init__.py +14 -0
- nat/cli/commands/info/info.py +37 -0
- nat/cli/commands/info/list_channels.py +32 -0
- nat/cli/commands/info/list_components.py +129 -0
- nat/cli/commands/info/list_mcp.py +304 -0
- nat/cli/commands/registry/__init__.py +14 -0
- nat/cli/commands/registry/publish.py +88 -0
- nat/cli/commands/registry/pull.py +118 -0
- nat/cli/commands/registry/registry.py +36 -0
- nat/cli/commands/registry/remove.py +108 -0
- nat/cli/commands/registry/search.py +155 -0
- nat/cli/commands/sizing/__init__.py +14 -0
- nat/cli/commands/sizing/calc.py +297 -0
- nat/cli/commands/sizing/sizing.py +27 -0
- nat/cli/commands/start.py +246 -0
- nat/cli/commands/uninstall.py +81 -0
- nat/cli/commands/validate.py +47 -0
- nat/cli/commands/workflow/__init__.py +14 -0
- nat/cli/commands/workflow/templates/__init__.py.j2 +0 -0
- nat/cli/commands/workflow/templates/config.yml.j2 +16 -0
- nat/cli/commands/workflow/templates/pyproject.toml.j2 +22 -0
- nat/cli/commands/workflow/templates/register.py.j2 +5 -0
- nat/cli/commands/workflow/templates/workflow.py.j2 +36 -0
- nat/cli/commands/workflow/workflow.py +37 -0
- nat/cli/commands/workflow/workflow_commands.py +317 -0
- nat/cli/entrypoint.py +135 -0
- nat/cli/main.py +57 -0
- nat/cli/register_workflow.py +488 -0
- nat/cli/type_registry.py +1000 -0
- nat/data_models/__init__.py +14 -0
- nat/data_models/api_server.py +716 -0
- nat/data_models/authentication.py +231 -0
- nat/data_models/common.py +171 -0
- nat/data_models/component.py +58 -0
- nat/data_models/component_ref.py +168 -0
- nat/data_models/config.py +410 -0
- nat/data_models/dataset_handler.py +169 -0
- nat/data_models/discovery_metadata.py +305 -0
- nat/data_models/embedder.py +27 -0
- nat/data_models/evaluate.py +127 -0
- nat/data_models/evaluator.py +26 -0
- nat/data_models/front_end.py +26 -0
- nat/data_models/function.py +30 -0
- nat/data_models/function_dependencies.py +72 -0
- nat/data_models/interactive.py +246 -0
- nat/data_models/intermediate_step.py +302 -0
- nat/data_models/invocation_node.py +38 -0
- nat/data_models/llm.py +27 -0
- nat/data_models/logging.py +26 -0
- nat/data_models/memory.py +27 -0
- nat/data_models/object_store.py +44 -0
- nat/data_models/profiler.py +54 -0
- nat/data_models/registry_handler.py +26 -0
- nat/data_models/retriever.py +30 -0
- nat/data_models/retry_mixin.py +35 -0
- nat/data_models/span.py +190 -0
- nat/data_models/step_adaptor.py +64 -0
- nat/data_models/streaming.py +33 -0
- nat/data_models/swe_bench_model.py +54 -0
- nat/data_models/telemetry_exporter.py +26 -0
- nat/data_models/ttc_strategy.py +30 -0
- nat/embedder/__init__.py +0 -0
- nat/embedder/nim_embedder.py +59 -0
- nat/embedder/openai_embedder.py +43 -0
- nat/embedder/register.py +22 -0
- nat/eval/__init__.py +14 -0
- nat/eval/config.py +60 -0
- nat/eval/dataset_handler/__init__.py +0 -0
- nat/eval/dataset_handler/dataset_downloader.py +106 -0
- nat/eval/dataset_handler/dataset_filter.py +52 -0
- nat/eval/dataset_handler/dataset_handler.py +367 -0
- nat/eval/evaluate.py +510 -0
- nat/eval/evaluator/__init__.py +14 -0
- nat/eval/evaluator/base_evaluator.py +77 -0
- nat/eval/evaluator/evaluator_model.py +45 -0
- nat/eval/intermediate_step_adapter.py +99 -0
- nat/eval/rag_evaluator/__init__.py +0 -0
- nat/eval/rag_evaluator/evaluate.py +178 -0
- nat/eval/rag_evaluator/register.py +143 -0
- nat/eval/register.py +23 -0
- nat/eval/remote_workflow.py +133 -0
- nat/eval/runners/__init__.py +14 -0
- nat/eval/runners/config.py +39 -0
- nat/eval/runners/multi_eval_runner.py +54 -0
- nat/eval/runtime_event_subscriber.py +52 -0
- nat/eval/swe_bench_evaluator/__init__.py +0 -0
- nat/eval/swe_bench_evaluator/evaluate.py +215 -0
- nat/eval/swe_bench_evaluator/register.py +36 -0
- nat/eval/trajectory_evaluator/__init__.py +0 -0
- nat/eval/trajectory_evaluator/evaluate.py +75 -0
- nat/eval/trajectory_evaluator/register.py +40 -0
- nat/eval/tunable_rag_evaluator/__init__.py +0 -0
- nat/eval/tunable_rag_evaluator/evaluate.py +245 -0
- nat/eval/tunable_rag_evaluator/register.py +52 -0
- nat/eval/usage_stats.py +41 -0
- nat/eval/utils/__init__.py +0 -0
- nat/eval/utils/output_uploader.py +140 -0
- nat/eval/utils/tqdm_position_registry.py +40 -0
- nat/eval/utils/weave_eval.py +184 -0
- nat/experimental/__init__.py +0 -0
- nat/experimental/decorators/__init__.py +0 -0
- nat/experimental/decorators/experimental_warning_decorator.py +134 -0
- nat/experimental/test_time_compute/__init__.py +0 -0
- nat/experimental/test_time_compute/editing/__init__.py +0 -0
- nat/experimental/test_time_compute/editing/iterative_plan_refinement_editor.py +147 -0
- nat/experimental/test_time_compute/editing/llm_as_a_judge_editor.py +204 -0
- nat/experimental/test_time_compute/editing/motivation_aware_summarization.py +107 -0
- nat/experimental/test_time_compute/functions/__init__.py +0 -0
- nat/experimental/test_time_compute/functions/execute_score_select_function.py +105 -0
- nat/experimental/test_time_compute/functions/plan_select_execute_function.py +224 -0
- nat/experimental/test_time_compute/functions/ttc_tool_orchestration_function.py +205 -0
- nat/experimental/test_time_compute/functions/ttc_tool_wrapper_function.py +146 -0
- nat/experimental/test_time_compute/models/__init__.py +0 -0
- nat/experimental/test_time_compute/models/editor_config.py +132 -0
- nat/experimental/test_time_compute/models/scoring_config.py +112 -0
- nat/experimental/test_time_compute/models/search_config.py +120 -0
- nat/experimental/test_time_compute/models/selection_config.py +154 -0
- nat/experimental/test_time_compute/models/stage_enums.py +43 -0
- nat/experimental/test_time_compute/models/strategy_base.py +66 -0
- nat/experimental/test_time_compute/models/tool_use_config.py +41 -0
- nat/experimental/test_time_compute/models/ttc_item.py +48 -0
- nat/experimental/test_time_compute/register.py +36 -0
- nat/experimental/test_time_compute/scoring/__init__.py +0 -0
- nat/experimental/test_time_compute/scoring/llm_based_agent_scorer.py +168 -0
- nat/experimental/test_time_compute/scoring/llm_based_plan_scorer.py +168 -0
- nat/experimental/test_time_compute/scoring/motivation_aware_scorer.py +111 -0
- nat/experimental/test_time_compute/search/__init__.py +0 -0
- nat/experimental/test_time_compute/search/multi_llm_planner.py +128 -0
- nat/experimental/test_time_compute/search/multi_query_retrieval_search.py +122 -0
- nat/experimental/test_time_compute/search/single_shot_multi_plan_planner.py +128 -0
- nat/experimental/test_time_compute/selection/__init__.py +0 -0
- nat/experimental/test_time_compute/selection/best_of_n_selector.py +63 -0
- nat/experimental/test_time_compute/selection/llm_based_agent_output_selector.py +131 -0
- nat/experimental/test_time_compute/selection/llm_based_output_merging_selector.py +159 -0
- nat/experimental/test_time_compute/selection/llm_based_plan_selector.py +128 -0
- nat/experimental/test_time_compute/selection/threshold_selector.py +58 -0
- nat/front_ends/__init__.py +14 -0
- nat/front_ends/console/__init__.py +14 -0
- nat/front_ends/console/authentication_flow_handler.py +233 -0
- nat/front_ends/console/console_front_end_config.py +32 -0
- nat/front_ends/console/console_front_end_plugin.py +96 -0
- nat/front_ends/console/register.py +25 -0
- nat/front_ends/cron/__init__.py +14 -0
- nat/front_ends/fastapi/__init__.py +14 -0
- nat/front_ends/fastapi/auth_flow_handlers/__init__.py +0 -0
- nat/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +27 -0
- nat/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +107 -0
- nat/front_ends/fastapi/fastapi_front_end_config.py +241 -0
- nat/front_ends/fastapi/fastapi_front_end_controller.py +68 -0
- nat/front_ends/fastapi/fastapi_front_end_plugin.py +116 -0
- nat/front_ends/fastapi/fastapi_front_end_plugin_worker.py +1087 -0
- nat/front_ends/fastapi/html_snippets/__init__.py +14 -0
- nat/front_ends/fastapi/html_snippets/auth_code_grant_success.py +35 -0
- nat/front_ends/fastapi/intermediate_steps_subscriber.py +80 -0
- nat/front_ends/fastapi/job_store.py +183 -0
- nat/front_ends/fastapi/main.py +72 -0
- nat/front_ends/fastapi/message_handler.py +320 -0
- nat/front_ends/fastapi/message_validator.py +352 -0
- nat/front_ends/fastapi/register.py +25 -0
- nat/front_ends/fastapi/response_helpers.py +195 -0
- nat/front_ends/fastapi/step_adaptor.py +319 -0
- nat/front_ends/mcp/__init__.py +14 -0
- nat/front_ends/mcp/mcp_front_end_config.py +36 -0
- nat/front_ends/mcp/mcp_front_end_plugin.py +81 -0
- nat/front_ends/mcp/mcp_front_end_plugin_worker.py +143 -0
- nat/front_ends/mcp/register.py +27 -0
- nat/front_ends/mcp/tool_converter.py +241 -0
- nat/front_ends/register.py +22 -0
- nat/front_ends/simple_base/__init__.py +14 -0
- nat/front_ends/simple_base/simple_front_end_plugin_base.py +54 -0
- nat/llm/__init__.py +0 -0
- nat/llm/aws_bedrock_llm.py +57 -0
- nat/llm/nim_llm.py +46 -0
- nat/llm/openai_llm.py +46 -0
- nat/llm/register.py +23 -0
- nat/llm/utils/__init__.py +14 -0
- nat/llm/utils/env_config_value.py +94 -0
- nat/llm/utils/error.py +17 -0
- nat/memory/__init__.py +20 -0
- nat/memory/interfaces.py +183 -0
- nat/memory/models.py +112 -0
- nat/meta/pypi.md +58 -0
- nat/object_store/__init__.py +20 -0
- nat/object_store/in_memory_object_store.py +76 -0
- nat/object_store/interfaces.py +84 -0
- nat/object_store/models.py +38 -0
- nat/object_store/register.py +20 -0
- nat/observability/__init__.py +14 -0
- nat/observability/exporter/__init__.py +14 -0
- nat/observability/exporter/base_exporter.py +449 -0
- nat/observability/exporter/exporter.py +78 -0
- nat/observability/exporter/file_exporter.py +33 -0
- nat/observability/exporter/processing_exporter.py +322 -0
- nat/observability/exporter/raw_exporter.py +52 -0
- nat/observability/exporter/span_exporter.py +288 -0
- nat/observability/exporter_manager.py +335 -0
- nat/observability/mixin/__init__.py +14 -0
- nat/observability/mixin/batch_config_mixin.py +26 -0
- nat/observability/mixin/collector_config_mixin.py +23 -0
- nat/observability/mixin/file_mixin.py +288 -0
- nat/observability/mixin/file_mode.py +23 -0
- nat/observability/mixin/resource_conflict_mixin.py +134 -0
- nat/observability/mixin/serialize_mixin.py +61 -0
- nat/observability/mixin/type_introspection_mixin.py +183 -0
- nat/observability/processor/__init__.py +14 -0
- nat/observability/processor/batching_processor.py +310 -0
- nat/observability/processor/callback_processor.py +42 -0
- nat/observability/processor/intermediate_step_serializer.py +28 -0
- nat/observability/processor/processor.py +71 -0
- nat/observability/register.py +96 -0
- nat/observability/utils/__init__.py +14 -0
- nat/observability/utils/dict_utils.py +236 -0
- nat/observability/utils/time_utils.py +31 -0
- nat/plugins/.namespace +1 -0
- nat/profiler/__init__.py +0 -0
- nat/profiler/calc/__init__.py +14 -0
- nat/profiler/calc/calc_runner.py +627 -0
- nat/profiler/calc/calculations.py +288 -0
- nat/profiler/calc/data_models.py +188 -0
- nat/profiler/calc/plot.py +345 -0
- nat/profiler/callbacks/__init__.py +0 -0
- nat/profiler/callbacks/agno_callback_handler.py +295 -0
- nat/profiler/callbacks/base_callback_class.py +20 -0
- nat/profiler/callbacks/langchain_callback_handler.py +290 -0
- nat/profiler/callbacks/llama_index_callback_handler.py +205 -0
- nat/profiler/callbacks/semantic_kernel_callback_handler.py +238 -0
- nat/profiler/callbacks/token_usage_base_model.py +27 -0
- nat/profiler/data_frame_row.py +51 -0
- nat/profiler/data_models.py +24 -0
- nat/profiler/decorators/__init__.py +0 -0
- nat/profiler/decorators/framework_wrapper.py +131 -0
- nat/profiler/decorators/function_tracking.py +254 -0
- nat/profiler/forecasting/__init__.py +0 -0
- nat/profiler/forecasting/config.py +18 -0
- nat/profiler/forecasting/model_trainer.py +75 -0
- nat/profiler/forecasting/models/__init__.py +22 -0
- nat/profiler/forecasting/models/forecasting_base_model.py +40 -0
- nat/profiler/forecasting/models/linear_model.py +197 -0
- nat/profiler/forecasting/models/random_forest_regressor.py +269 -0
- nat/profiler/inference_metrics_model.py +28 -0
- nat/profiler/inference_optimization/__init__.py +0 -0
- nat/profiler/inference_optimization/bottleneck_analysis/__init__.py +0 -0
- nat/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +460 -0
- nat/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py +258 -0
- nat/profiler/inference_optimization/data_models.py +386 -0
- nat/profiler/inference_optimization/experimental/__init__.py +0 -0
- nat/profiler/inference_optimization/experimental/concurrency_spike_analysis.py +468 -0
- nat/profiler/inference_optimization/experimental/prefix_span_analysis.py +405 -0
- nat/profiler/inference_optimization/llm_metrics.py +212 -0
- nat/profiler/inference_optimization/prompt_caching.py +163 -0
- nat/profiler/inference_optimization/token_uniqueness.py +107 -0
- nat/profiler/inference_optimization/workflow_runtimes.py +72 -0
- nat/profiler/intermediate_property_adapter.py +102 -0
- nat/profiler/profile_runner.py +473 -0
- nat/profiler/utils.py +184 -0
- nat/registry_handlers/__init__.py +0 -0
- nat/registry_handlers/local/__init__.py +0 -0
- nat/registry_handlers/local/local_handler.py +176 -0
- nat/registry_handlers/local/register_local.py +37 -0
- nat/registry_handlers/metadata_factory.py +60 -0
- nat/registry_handlers/package_utils.py +571 -0
- nat/registry_handlers/pypi/__init__.py +0 -0
- nat/registry_handlers/pypi/pypi_handler.py +251 -0
- nat/registry_handlers/pypi/register_pypi.py +40 -0
- nat/registry_handlers/register.py +21 -0
- nat/registry_handlers/registry_handler_base.py +157 -0
- nat/registry_handlers/rest/__init__.py +0 -0
- nat/registry_handlers/rest/register_rest.py +56 -0
- nat/registry_handlers/rest/rest_handler.py +237 -0
- nat/registry_handlers/schemas/__init__.py +0 -0
- nat/registry_handlers/schemas/headers.py +42 -0
- nat/registry_handlers/schemas/package.py +68 -0
- nat/registry_handlers/schemas/publish.py +68 -0
- nat/registry_handlers/schemas/pull.py +82 -0
- nat/registry_handlers/schemas/remove.py +36 -0
- nat/registry_handlers/schemas/search.py +91 -0
- nat/registry_handlers/schemas/status.py +47 -0
- nat/retriever/__init__.py +0 -0
- nat/retriever/interface.py +41 -0
- nat/retriever/milvus/__init__.py +14 -0
- nat/retriever/milvus/register.py +81 -0
- nat/retriever/milvus/retriever.py +228 -0
- nat/retriever/models.py +77 -0
- nat/retriever/nemo_retriever/__init__.py +14 -0
- nat/retriever/nemo_retriever/register.py +60 -0
- nat/retriever/nemo_retriever/retriever.py +190 -0
- nat/retriever/register.py +22 -0
- nat/runtime/__init__.py +14 -0
- nat/runtime/loader.py +220 -0
- nat/runtime/runner.py +195 -0
- nat/runtime/session.py +162 -0
- nat/runtime/user_metadata.py +130 -0
- nat/settings/__init__.py +0 -0
- nat/settings/global_settings.py +318 -0
- nat/test/.namespace +1 -0
- nat/tool/__init__.py +0 -0
- nat/tool/chat_completion.py +74 -0
- nat/tool/code_execution/README.md +151 -0
- nat/tool/code_execution/__init__.py +0 -0
- nat/tool/code_execution/code_sandbox.py +267 -0
- nat/tool/code_execution/local_sandbox/.gitignore +1 -0
- nat/tool/code_execution/local_sandbox/Dockerfile.sandbox +60 -0
- nat/tool/code_execution/local_sandbox/__init__.py +13 -0
- nat/tool/code_execution/local_sandbox/local_sandbox_server.py +198 -0
- nat/tool/code_execution/local_sandbox/sandbox.requirements.txt +6 -0
- nat/tool/code_execution/local_sandbox/start_local_sandbox.sh +50 -0
- nat/tool/code_execution/register.py +74 -0
- nat/tool/code_execution/test_code_execution_sandbox.py +414 -0
- nat/tool/code_execution/utils.py +100 -0
- nat/tool/datetime_tools.py +42 -0
- nat/tool/document_search.py +141 -0
- nat/tool/github_tools/__init__.py +0 -0
- nat/tool/github_tools/create_github_commit.py +133 -0
- nat/tool/github_tools/create_github_issue.py +87 -0
- nat/tool/github_tools/create_github_pr.py +106 -0
- nat/tool/github_tools/get_github_file.py +106 -0
- nat/tool/github_tools/get_github_issue.py +166 -0
- nat/tool/github_tools/get_github_pr.py +256 -0
- nat/tool/github_tools/update_github_issue.py +100 -0
- nat/tool/mcp/__init__.py +14 -0
- nat/tool/mcp/exceptions.py +142 -0
- nat/tool/mcp/mcp_client.py +255 -0
- nat/tool/mcp/mcp_tool.py +96 -0
- nat/tool/memory_tools/__init__.py +0 -0
- nat/tool/memory_tools/add_memory_tool.py +79 -0
- nat/tool/memory_tools/delete_memory_tool.py +67 -0
- nat/tool/memory_tools/get_memory_tool.py +72 -0
- nat/tool/nvidia_rag.py +95 -0
- nat/tool/register.py +38 -0
- nat/tool/retriever.py +94 -0
- nat/tool/server_tools.py +66 -0
- nat/utils/__init__.py +0 -0
- nat/utils/data_models/__init__.py +0 -0
- nat/utils/data_models/schema_validator.py +58 -0
- nat/utils/debugging_utils.py +43 -0
- nat/utils/dump_distro_mapping.py +32 -0
- nat/utils/exception_handlers/__init__.py +0 -0
- nat/utils/exception_handlers/automatic_retries.py +289 -0
- nat/utils/exception_handlers/mcp.py +211 -0
- nat/utils/exception_handlers/schemas.py +114 -0
- nat/utils/io/__init__.py +0 -0
- nat/utils/io/model_processing.py +28 -0
- nat/utils/io/yaml_tools.py +119 -0
- nat/utils/log_utils.py +37 -0
- nat/utils/metadata_utils.py +74 -0
- nat/utils/optional_imports.py +142 -0
- nat/utils/producer_consumer_queue.py +178 -0
- nat/utils/reactive/__init__.py +0 -0
- nat/utils/reactive/base/__init__.py +0 -0
- nat/utils/reactive/base/observable_base.py +65 -0
- nat/utils/reactive/base/observer_base.py +55 -0
- nat/utils/reactive/base/subject_base.py +79 -0
- nat/utils/reactive/observable.py +59 -0
- nat/utils/reactive/observer.py +76 -0
- nat/utils/reactive/subject.py +131 -0
- nat/utils/reactive/subscription.py +49 -0
- nat/utils/settings/__init__.py +0 -0
- nat/utils/settings/global_settings.py +197 -0
- nat/utils/string_utils.py +38 -0
- nat/utils/type_converter.py +290 -0
- nat/utils/type_utils.py +484 -0
- nat/utils/url_utils.py +27 -0
- nvidia_nat-1.2.0.dist-info/METADATA +365 -0
- nvidia_nat-1.2.0.dist-info/RECORD +435 -0
- nvidia_nat-1.2.0.dist-info/WHEEL +5 -0
- nvidia_nat-1.2.0.dist-info/entry_points.txt +21 -0
- nvidia_nat-1.2.0.dist-info/licenses/LICENSE-3rd-party.txt +5478 -0
- nvidia_nat-1.2.0.dist-info/licenses/LICENSE.md +201 -0
- nvidia_nat-1.2.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
import asyncio
|
|
17
|
+
import logging
|
|
18
|
+
from contextlib import AsyncExitStack
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
import click
|
|
22
|
+
|
|
23
|
+
from nat.data_models.registry_handler import RegistryHandlerBaseConfig
|
|
24
|
+
from nat.utils.data_models.schema_validator import validate_yaml
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
async def remove_artifact(registry_handler_config: RegistryHandlerBaseConfig, packages: list[dict[str, str]]) -> None:
|
|
30
|
+
|
|
31
|
+
from nat.cli.type_registry import GlobalTypeRegistry
|
|
32
|
+
from nat.registry_handlers.schemas.package import PackageNameVersionList
|
|
33
|
+
|
|
34
|
+
registry = GlobalTypeRegistry.get()
|
|
35
|
+
|
|
36
|
+
async with AsyncExitStack() as stack:
|
|
37
|
+
|
|
38
|
+
registry_handler_info = registry.get_registry_handler(type(registry_handler_config))
|
|
39
|
+
registry_handler = await stack.enter_async_context(registry_handler_info.build_fn(registry_handler_config))
|
|
40
|
+
|
|
41
|
+
try:
|
|
42
|
+
package_name_list = PackageNameVersionList(**{"packages": packages})
|
|
43
|
+
except Exception as e:
|
|
44
|
+
logger.exception("Invalid package format: '%s'", e, exc_info=True)
|
|
45
|
+
|
|
46
|
+
await stack.enter_async_context(registry_handler.remove(packages=package_name_list))
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@click.group(name=__name__,
|
|
50
|
+
invoke_without_command=True,
|
|
51
|
+
help=("Remove NAT artifact from a remote registry by name and version."))
|
|
52
|
+
@click.argument("packages", type=str)
|
|
53
|
+
@click.option(
|
|
54
|
+
"--config_file",
|
|
55
|
+
type=click.Path(exists=True, file_okay=True, dir_okay=False, path_type=Path),
|
|
56
|
+
callback=validate_yaml,
|
|
57
|
+
required=False,
|
|
58
|
+
help=("A YAML file to override the channel settings."),
|
|
59
|
+
)
|
|
60
|
+
@click.option(
|
|
61
|
+
"-c",
|
|
62
|
+
"--channel",
|
|
63
|
+
type=str,
|
|
64
|
+
required=True,
|
|
65
|
+
help=("The remote registry channel that will remove the NAT artifact."),
|
|
66
|
+
)
|
|
67
|
+
def remove(channel: str, config_file: str, packages: str) -> None:
|
|
68
|
+
"""
|
|
69
|
+
Remove NAT artifacts from a remote registry.
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
from nat.settings.global_settings import GlobalSettings
|
|
73
|
+
|
|
74
|
+
# Extract package name and version
|
|
75
|
+
packages = packages.split()
|
|
76
|
+
packages_versions = []
|
|
77
|
+
for package in packages:
|
|
78
|
+
package_dict = {}
|
|
79
|
+
package_version = package.split("==")
|
|
80
|
+
if (len(package_version) == 1):
|
|
81
|
+
package_dict["name"] = package_version[0]
|
|
82
|
+
msg = f"No package version provided for '{package_version[0]}'."
|
|
83
|
+
logger.warning(msg)
|
|
84
|
+
elif (len(package_version) == 2):
|
|
85
|
+
package_dict["name"] = package_version[0]
|
|
86
|
+
package_dict["version"] = package_version[1]
|
|
87
|
+
else:
|
|
88
|
+
msg = f"Invalid input: '{package}'"
|
|
89
|
+
logger.error(msg)
|
|
90
|
+
if (package_dict):
|
|
91
|
+
packages_versions.append(package_dict)
|
|
92
|
+
|
|
93
|
+
settings = GlobalSettings().get()
|
|
94
|
+
|
|
95
|
+
if (config_file is not None):
|
|
96
|
+
settings = settings.override_settings(config_file)
|
|
97
|
+
|
|
98
|
+
try:
|
|
99
|
+
remove_channel_config = settings.channels.get(channel)
|
|
100
|
+
|
|
101
|
+
if (remove_channel_config is None):
|
|
102
|
+
logger.error("Remove channel '%s' has not been configured.", channel)
|
|
103
|
+
return
|
|
104
|
+
except Exception as e:
|
|
105
|
+
logger.exception("Error loading user settings: %s", e, exc_info=True)
|
|
106
|
+
return
|
|
107
|
+
|
|
108
|
+
asyncio.run(remove_artifact(registry_handler_config=remove_channel_config, packages=packages_versions))
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
import asyncio
|
|
17
|
+
import logging
|
|
18
|
+
from contextlib import AsyncExitStack
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
import click
|
|
22
|
+
|
|
23
|
+
from nat.data_models.component import ComponentEnum
|
|
24
|
+
from nat.data_models.registry_handler import RegistryHandlerBaseConfig
|
|
25
|
+
from nat.registry_handlers.schemas.search import SearchFields
|
|
26
|
+
from nat.registry_handlers.schemas.status import StatusEnum
|
|
27
|
+
from nat.utils.data_models.schema_validator import validate_yaml
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
async def search_artifacts( # pylint: disable=R0917
|
|
33
|
+
registry_handler_config: RegistryHandlerBaseConfig,
|
|
34
|
+
query: str,
|
|
35
|
+
search_fields: list[SearchFields],
|
|
36
|
+
visualize: bool,
|
|
37
|
+
component_types: list[ComponentEnum],
|
|
38
|
+
save_path: str | None = None,
|
|
39
|
+
n_results: int = 10) -> None:
|
|
40
|
+
|
|
41
|
+
from nat.cli.type_registry import GlobalTypeRegistry
|
|
42
|
+
from nat.registry_handlers.schemas.search import SearchQuery
|
|
43
|
+
|
|
44
|
+
registry = GlobalTypeRegistry.get()
|
|
45
|
+
|
|
46
|
+
async with AsyncExitStack() as stack:
|
|
47
|
+
|
|
48
|
+
registry_handler_info = registry.get_registry_handler(type(registry_handler_config))
|
|
49
|
+
registry_handler = await stack.enter_async_context(registry_handler_info.build_fn(registry_handler_config))
|
|
50
|
+
|
|
51
|
+
if (len(component_types) == 0):
|
|
52
|
+
component_types = [t.value for t in ComponentEnum]
|
|
53
|
+
|
|
54
|
+
query = SearchQuery(query=query, fields=search_fields, top_k=n_results, component_types=component_types)
|
|
55
|
+
|
|
56
|
+
search_response = await stack.enter_async_context(registry_handler.search(query=query))
|
|
57
|
+
|
|
58
|
+
if (search_response.status.status == StatusEnum.SUCCESS):
|
|
59
|
+
if (visualize):
|
|
60
|
+
registry_handler.visualize_search_results(search_response=search_response)
|
|
61
|
+
if (save_path is not None):
|
|
62
|
+
registry_handler.save_search_results(search_response=search_response, save_path=save_path)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@click.group(name=__name__, invoke_without_command=True, help="Search for NAT artifacts from remote registry.")
|
|
66
|
+
@click.option(
|
|
67
|
+
"--config_file",
|
|
68
|
+
type=click.Path(exists=True, file_okay=True, dir_okay=False, path_type=Path),
|
|
69
|
+
callback=validate_yaml,
|
|
70
|
+
required=False,
|
|
71
|
+
help=("A JSON/YAML file that sets the parameters for the workflow."),
|
|
72
|
+
)
|
|
73
|
+
@click.option(
|
|
74
|
+
"-c",
|
|
75
|
+
"--channel",
|
|
76
|
+
type=str,
|
|
77
|
+
required=True,
|
|
78
|
+
help=("The remote registry channel to use when pulling the NAT artifact."),
|
|
79
|
+
)
|
|
80
|
+
@click.option(
|
|
81
|
+
"-o",
|
|
82
|
+
"--output_path",
|
|
83
|
+
type=str,
|
|
84
|
+
required=False,
|
|
85
|
+
help=("Path to save search results."),
|
|
86
|
+
)
|
|
87
|
+
@click.option(
|
|
88
|
+
"-f",
|
|
89
|
+
"--fields",
|
|
90
|
+
multiple=True,
|
|
91
|
+
type=click.Choice([e.value for e in SearchFields], case_sensitive=False),
|
|
92
|
+
required=False,
|
|
93
|
+
help=("The fields to include in the search."),
|
|
94
|
+
)
|
|
95
|
+
@click.option(
|
|
96
|
+
"-q",
|
|
97
|
+
"--query",
|
|
98
|
+
type=str,
|
|
99
|
+
required=True,
|
|
100
|
+
help=("The query string."),
|
|
101
|
+
)
|
|
102
|
+
@click.option(
|
|
103
|
+
"-n",
|
|
104
|
+
"--n_results",
|
|
105
|
+
type=int,
|
|
106
|
+
required=False,
|
|
107
|
+
default=10,
|
|
108
|
+
help=("Number of search results to return."),
|
|
109
|
+
)
|
|
110
|
+
@click.option(
|
|
111
|
+
"-t",
|
|
112
|
+
"--types",
|
|
113
|
+
"component_types",
|
|
114
|
+
multiple=True,
|
|
115
|
+
type=click.Choice([e.value for e in ComponentEnum], case_sensitive=False),
|
|
116
|
+
required=False,
|
|
117
|
+
help=("The component types to include in search."),
|
|
118
|
+
)
|
|
119
|
+
def search( # pylint: disable=R0917
|
|
120
|
+
config_file: str,
|
|
121
|
+
channel: str,
|
|
122
|
+
fields: list[str],
|
|
123
|
+
query: str,
|
|
124
|
+
component_types: list[ComponentEnum],
|
|
125
|
+
n_results: int,
|
|
126
|
+
output_path: str) -> None:
|
|
127
|
+
"""
|
|
128
|
+
Search for NAT artifacts with the specified configuration.
|
|
129
|
+
"""
|
|
130
|
+
|
|
131
|
+
from nat.settings.global_settings import GlobalSettings
|
|
132
|
+
|
|
133
|
+
settings = GlobalSettings().get()
|
|
134
|
+
|
|
135
|
+
if (config_file is not None):
|
|
136
|
+
settings = settings.override_settings(config_file)
|
|
137
|
+
|
|
138
|
+
try:
|
|
139
|
+
search_channel_config = settings.channels.get(channel)
|
|
140
|
+
|
|
141
|
+
if (search_channel_config is None):
|
|
142
|
+
logger.error("Search channel '%s' has not been configured.", channel)
|
|
143
|
+
return
|
|
144
|
+
except Exception as e:
|
|
145
|
+
logger.exception("Error loading user settings: %s", e, exc_info=True)
|
|
146
|
+
return
|
|
147
|
+
|
|
148
|
+
asyncio.run(
|
|
149
|
+
search_artifacts(registry_handler_config=search_channel_config,
|
|
150
|
+
query=query,
|
|
151
|
+
component_types=component_types,
|
|
152
|
+
search_fields=fields,
|
|
153
|
+
visualize=True,
|
|
154
|
+
save_path=output_path,
|
|
155
|
+
n_results=n_results))
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
import asyncio
|
|
17
|
+
import logging
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
|
|
20
|
+
import click
|
|
21
|
+
from tabulate import tabulate
|
|
22
|
+
|
|
23
|
+
from nat.profiler.calc.calc_runner import CalcRunner
|
|
24
|
+
from nat.profiler.calc.data_models import CalcRunnerConfig
|
|
25
|
+
from nat.profiler.calc.data_models import CalcRunnerOutput
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@click.command("calc", help="Estimate GPU count and plot metrics for a workflow")
|
|
31
|
+
@click.option(
|
|
32
|
+
"--config_file",
|
|
33
|
+
type=click.Path(exists=True, file_okay=True, dir_okay=False, path_type=Path),
|
|
34
|
+
required=False,
|
|
35
|
+
default=None,
|
|
36
|
+
help="A YAML config file for the workflow and evaluation. This is not needed in offline mode.",
|
|
37
|
+
)
|
|
38
|
+
@click.option(
|
|
39
|
+
"--offline_mode",
|
|
40
|
+
is_flag=True,
|
|
41
|
+
required=False,
|
|
42
|
+
default=False,
|
|
43
|
+
help="Run in offline mode. This is used to estimate the GPU count for a workflow without running the workflow. ")
|
|
44
|
+
@click.option(
|
|
45
|
+
"--target_llm_latency",
|
|
46
|
+
type=float,
|
|
47
|
+
required=False,
|
|
48
|
+
default=0,
|
|
49
|
+
help="Target p95 LLM latency (seconds). Can be set to 0 to ignore.",
|
|
50
|
+
)
|
|
51
|
+
@click.option(
|
|
52
|
+
"--target_workflow_runtime",
|
|
53
|
+
type=float,
|
|
54
|
+
required=False,
|
|
55
|
+
default=0,
|
|
56
|
+
help="Target p95 workflow runtime (seconds). Can be set to 0 to ignore.",
|
|
57
|
+
)
|
|
58
|
+
@click.option(
|
|
59
|
+
"--target_users",
|
|
60
|
+
type=int,
|
|
61
|
+
required=False,
|
|
62
|
+
default=0,
|
|
63
|
+
help="Target number of users to support.",
|
|
64
|
+
)
|
|
65
|
+
@click.option(
|
|
66
|
+
"--test_gpu_count",
|
|
67
|
+
type=int,
|
|
68
|
+
required=False,
|
|
69
|
+
default=0,
|
|
70
|
+
help="Number of GPUs used in the test.",
|
|
71
|
+
)
|
|
72
|
+
@click.option(
|
|
73
|
+
"--calc_output_dir",
|
|
74
|
+
type=click.Path(file_okay=False, dir_okay=True, path_type=Path),
|
|
75
|
+
required=False,
|
|
76
|
+
default=None,
|
|
77
|
+
help="Directory to save plots and results (optional).",
|
|
78
|
+
)
|
|
79
|
+
@click.option(
|
|
80
|
+
"--concurrencies",
|
|
81
|
+
type=str,
|
|
82
|
+
required=False,
|
|
83
|
+
default="1,2,3,4,5,6,7,8,9,10",
|
|
84
|
+
help="Comma-separated list of concurrency values to test (e.g., 1,2,4,8). Default: 1,2,3,4,5,6,7,8,9,10",
|
|
85
|
+
)
|
|
86
|
+
@click.option(
|
|
87
|
+
"--num_passes",
|
|
88
|
+
type=int,
|
|
89
|
+
required=False,
|
|
90
|
+
default=0,
|
|
91
|
+
help="Number of passes at each concurrency for the evaluation."
|
|
92
|
+
" If set to 0 the dataset is adjusted to a multiple of the concurrency. Default: 0",
|
|
93
|
+
)
|
|
94
|
+
@click.option(
|
|
95
|
+
"--append_calc_outputs",
|
|
96
|
+
is_flag=True,
|
|
97
|
+
required=False,
|
|
98
|
+
default=False,
|
|
99
|
+
help="Append calc outputs to the output directory. "
|
|
100
|
+
"By default append is set to False and the content of the online directory is overwritten.",
|
|
101
|
+
)
|
|
102
|
+
@click.option(
|
|
103
|
+
"--endpoint",
|
|
104
|
+
type=str,
|
|
105
|
+
required=False,
|
|
106
|
+
default=None,
|
|
107
|
+
help="Endpoint to use for the workflow if it is remote(optional).",
|
|
108
|
+
)
|
|
109
|
+
@click.option(
|
|
110
|
+
"--endpoint_timeout",
|
|
111
|
+
type=int,
|
|
112
|
+
required=False,
|
|
113
|
+
default=300,
|
|
114
|
+
help="Timeout for the remote workflow endpoint in seconds (default: 300).",
|
|
115
|
+
)
|
|
116
|
+
@click.pass_context
|
|
117
|
+
def calc_command(ctx,
|
|
118
|
+
config_file,
|
|
119
|
+
offline_mode,
|
|
120
|
+
target_llm_latency,
|
|
121
|
+
target_workflow_runtime,
|
|
122
|
+
target_users,
|
|
123
|
+
test_gpu_count,
|
|
124
|
+
calc_output_dir,
|
|
125
|
+
concurrencies,
|
|
126
|
+
num_passes,
|
|
127
|
+
append_calc_outputs,
|
|
128
|
+
endpoint,
|
|
129
|
+
endpoint_timeout):
|
|
130
|
+
"""Estimate GPU count and plot metrics for a workflow profile."""
|
|
131
|
+
# Only use CLI concurrencies, with default
|
|
132
|
+
concurrencies_list = [int(x) for x in concurrencies.split(",") if x.strip()]
|
|
133
|
+
|
|
134
|
+
# Dont allow a concurrency of 0
|
|
135
|
+
if 0 in concurrencies_list:
|
|
136
|
+
click.echo("Concurrency of 0 is not allowed.")
|
|
137
|
+
return
|
|
138
|
+
|
|
139
|
+
# Check if the parameters are valid in online and offline mode
|
|
140
|
+
if offline_mode:
|
|
141
|
+
# In offline mode target test parameters are needed to estimate the GPU count
|
|
142
|
+
if target_llm_latency == 0 and target_workflow_runtime == 0:
|
|
143
|
+
click.echo("Both --target_llm_latency and --target_workflow_runtime are 0. "
|
|
144
|
+
"Cannot estimate the GPU count.")
|
|
145
|
+
return
|
|
146
|
+
if test_gpu_count <= 0:
|
|
147
|
+
click.echo("Test GPU count is 0. Cannot estimate the GPU count.")
|
|
148
|
+
return
|
|
149
|
+
if target_users <= 0:
|
|
150
|
+
click.echo("Target users is 0. Cannot estimate the GPU count.")
|
|
151
|
+
return
|
|
152
|
+
if append_calc_outputs:
|
|
153
|
+
click.echo("Appending calc outputs is not supported in offline mode.")
|
|
154
|
+
return
|
|
155
|
+
if not calc_output_dir:
|
|
156
|
+
click.echo("Output directory is required in offline mode.")
|
|
157
|
+
return
|
|
158
|
+
else:
|
|
159
|
+
if not config_file:
|
|
160
|
+
click.echo("Config file is required in online mode.")
|
|
161
|
+
return
|
|
162
|
+
if target_llm_latency == 0 and target_workflow_runtime == 0:
|
|
163
|
+
click.echo("Both --target_llm_latency and --target_workflow_runtime are 0. "
|
|
164
|
+
"GPU count will not be estimated.")
|
|
165
|
+
if test_gpu_count <= 0:
|
|
166
|
+
click.echo("Test GPU count is 0. Tests will be run but the GPU count will not be estimated.")
|
|
167
|
+
if target_users <= 0:
|
|
168
|
+
click.echo("Target users is 0. Tests will be run but the GPU count will not be estimated.")
|
|
169
|
+
|
|
170
|
+
# Build CalcRunnerConfig
|
|
171
|
+
runner_config = CalcRunnerConfig(
|
|
172
|
+
config_file=config_file,
|
|
173
|
+
concurrencies=concurrencies_list,
|
|
174
|
+
target_llm_latency_p95=target_llm_latency,
|
|
175
|
+
target_workflow_runtime_p95=target_workflow_runtime,
|
|
176
|
+
target_users=target_users,
|
|
177
|
+
test_gpu_count=test_gpu_count,
|
|
178
|
+
output_dir=calc_output_dir,
|
|
179
|
+
num_passes=num_passes,
|
|
180
|
+
offline_mode=offline_mode,
|
|
181
|
+
append_job=append_calc_outputs,
|
|
182
|
+
endpoint=endpoint,
|
|
183
|
+
endpoint_timeout=endpoint_timeout,
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
async def run_calc() -> CalcRunnerOutput:
|
|
187
|
+
runner = CalcRunner(runner_config)
|
|
188
|
+
result = await runner.run()
|
|
189
|
+
return result
|
|
190
|
+
|
|
191
|
+
def print_results(results: CalcRunnerOutput):
|
|
192
|
+
|
|
193
|
+
# Print header with target numbers
|
|
194
|
+
click.echo(f"Targets: LLM Latency ≤ {runner_config.target_llm_latency_p95}s, "
|
|
195
|
+
f"Workflow Runtime ≤ {runner_config.target_workflow_runtime_p95}s, "
|
|
196
|
+
f"Users = {runner_config.target_users}")
|
|
197
|
+
click.echo(f"Test parameters: GPUs = {runner_config.test_gpu_count}")
|
|
198
|
+
|
|
199
|
+
# Check if there are any GPU estimates to determine if we should show GPU estimate columns
|
|
200
|
+
has_llm_latency_gpu_estimates = any(data.gpu_estimates.gpu_estimate_by_llm_latency is not None
|
|
201
|
+
for data in results.calc_data.values())
|
|
202
|
+
has_wf_runtime_gpu_estimates = any(data.gpu_estimates.gpu_estimate_by_wf_runtime is not None
|
|
203
|
+
for data in results.calc_data.values())
|
|
204
|
+
|
|
205
|
+
# Check if there are any interrupted workflows or outliers to determine if we should show the alerts column
|
|
206
|
+
has_alerts = any(data.sizing_metrics.alerts.workflow_interrupted or data.alerts.outlier_llm_latency
|
|
207
|
+
or data.alerts.outlier_workflow_runtime for data in results.calc_data.values())
|
|
208
|
+
|
|
209
|
+
# Print per concurrency results as a table
|
|
210
|
+
click.echo("Per concurrency results:")
|
|
211
|
+
|
|
212
|
+
# Show alerts legend if there are any alerts
|
|
213
|
+
if has_alerts:
|
|
214
|
+
click.echo("Alerts!: W = Workflow interrupted, L = LLM latency outlier, R = Workflow runtime outlier")
|
|
215
|
+
|
|
216
|
+
table = []
|
|
217
|
+
for concurrency, data in results.calc_data.items():
|
|
218
|
+
metrics = data.sizing_metrics
|
|
219
|
+
gpu_estimates_per_concurrency = data.gpu_estimates
|
|
220
|
+
sizing_metrics_alerts = data.sizing_metrics.alerts
|
|
221
|
+
calc_alerts = data.alerts
|
|
222
|
+
|
|
223
|
+
row = []
|
|
224
|
+
|
|
225
|
+
# Only include alerts column if there are any interrupted workflows (first column)
|
|
226
|
+
if has_alerts:
|
|
227
|
+
alerts = []
|
|
228
|
+
if sizing_metrics_alerts.workflow_interrupted:
|
|
229
|
+
alerts.append("W")
|
|
230
|
+
if calc_alerts.outlier_llm_latency:
|
|
231
|
+
alerts.append("L")
|
|
232
|
+
if calc_alerts.outlier_workflow_runtime:
|
|
233
|
+
alerts.append("R")
|
|
234
|
+
|
|
235
|
+
# Show ! followed by all alert characters
|
|
236
|
+
if alerts:
|
|
237
|
+
row.append(f"!{''.join(alerts)}")
|
|
238
|
+
else:
|
|
239
|
+
row.append("")
|
|
240
|
+
|
|
241
|
+
row.extend([
|
|
242
|
+
concurrency,
|
|
243
|
+
metrics.llm_latency_p95,
|
|
244
|
+
metrics.workflow_runtime_p95,
|
|
245
|
+
metrics.total_runtime,
|
|
246
|
+
])
|
|
247
|
+
|
|
248
|
+
# Only include GPU estimate columns if there are actual estimates of that type
|
|
249
|
+
if has_llm_latency_gpu_estimates:
|
|
250
|
+
row.append(gpu_estimates_per_concurrency.gpu_estimate_by_llm_latency)
|
|
251
|
+
if has_wf_runtime_gpu_estimates:
|
|
252
|
+
row.append(gpu_estimates_per_concurrency.gpu_estimate_by_wf_runtime)
|
|
253
|
+
|
|
254
|
+
table.append(row)
|
|
255
|
+
|
|
256
|
+
headers = []
|
|
257
|
+
|
|
258
|
+
# Only include alerts header if there are any alerts (first column)
|
|
259
|
+
if has_alerts:
|
|
260
|
+
headers.append("Alerts")
|
|
261
|
+
|
|
262
|
+
headers.extend([
|
|
263
|
+
"Concurrency",
|
|
264
|
+
"p95 LLM Latency",
|
|
265
|
+
"p95 WF Runtime",
|
|
266
|
+
"Total Runtime",
|
|
267
|
+
])
|
|
268
|
+
|
|
269
|
+
# Only include GPU estimate headers if there are actual estimates of that type
|
|
270
|
+
if has_llm_latency_gpu_estimates:
|
|
271
|
+
headers.append("GPUs (LLM Latency, Rough)")
|
|
272
|
+
if has_wf_runtime_gpu_estimates:
|
|
273
|
+
headers.append("GPUs (WF Runtime, Rough)")
|
|
274
|
+
|
|
275
|
+
click.echo(tabulate(table, headers=headers, tablefmt="github"))
|
|
276
|
+
|
|
277
|
+
# Display slope-based GPU estimates if they are available
|
|
278
|
+
if results.gpu_estimates.gpu_estimate_by_llm_latency is not None or \
|
|
279
|
+
results.gpu_estimates.gpu_estimate_by_wf_runtime is not None:
|
|
280
|
+
click.echo("")
|
|
281
|
+
click.echo(click.style("=== GPU ESTIMATES ===", fg="bright_blue", bold=True))
|
|
282
|
+
|
|
283
|
+
if results.gpu_estimates.gpu_estimate_by_wf_runtime is not None:
|
|
284
|
+
click.echo(
|
|
285
|
+
click.style(
|
|
286
|
+
f"Estimated GPU count (Workflow Runtime): {results.gpu_estimates.gpu_estimate_by_wf_runtime:.1f}",
|
|
287
|
+
fg="green",
|
|
288
|
+
bold=True))
|
|
289
|
+
if results.gpu_estimates.gpu_estimate_by_llm_latency is not None:
|
|
290
|
+
click.echo(
|
|
291
|
+
click.style(
|
|
292
|
+
f"Estimated GPU count (LLM Latency): {results.gpu_estimates.gpu_estimate_by_llm_latency:.1f}",
|
|
293
|
+
fg="green",
|
|
294
|
+
bold=True))
|
|
295
|
+
|
|
296
|
+
results = asyncio.run(run_calc())
|
|
297
|
+
print_results(results)
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
import click
|
|
17
|
+
|
|
18
|
+
from .calc import calc_command
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@click.group(help="Size GPU clusters for workflows with the specified options.")
|
|
22
|
+
def sizing():
|
|
23
|
+
"""Sizing-related commands."""
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
sizing.add_command(calc_command)
|