nvidia-nat 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiq/__init__.py +66 -0
- nat/agent/__init__.py +0 -0
- nat/agent/base.py +256 -0
- nat/agent/dual_node.py +67 -0
- nat/agent/react_agent/__init__.py +0 -0
- nat/agent/react_agent/agent.py +363 -0
- nat/agent/react_agent/output_parser.py +104 -0
- nat/agent/react_agent/prompt.py +44 -0
- nat/agent/react_agent/register.py +149 -0
- nat/agent/reasoning_agent/__init__.py +0 -0
- nat/agent/reasoning_agent/reasoning_agent.py +225 -0
- nat/agent/register.py +23 -0
- nat/agent/rewoo_agent/__init__.py +0 -0
- nat/agent/rewoo_agent/agent.py +415 -0
- nat/agent/rewoo_agent/prompt.py +110 -0
- nat/agent/rewoo_agent/register.py +157 -0
- nat/agent/tool_calling_agent/__init__.py +0 -0
- nat/agent/tool_calling_agent/agent.py +119 -0
- nat/agent/tool_calling_agent/register.py +106 -0
- nat/authentication/__init__.py +14 -0
- nat/authentication/api_key/__init__.py +14 -0
- nat/authentication/api_key/api_key_auth_provider.py +96 -0
- nat/authentication/api_key/api_key_auth_provider_config.py +124 -0
- nat/authentication/api_key/register.py +26 -0
- nat/authentication/exceptions/__init__.py +14 -0
- nat/authentication/exceptions/api_key_exceptions.py +38 -0
- nat/authentication/http_basic_auth/__init__.py +0 -0
- nat/authentication/http_basic_auth/http_basic_auth_provider.py +81 -0
- nat/authentication/http_basic_auth/register.py +30 -0
- nat/authentication/interfaces.py +93 -0
- nat/authentication/oauth2/__init__.py +14 -0
- nat/authentication/oauth2/oauth2_auth_code_flow_provider.py +107 -0
- nat/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +39 -0
- nat/authentication/oauth2/register.py +25 -0
- nat/authentication/register.py +21 -0
- nat/builder/__init__.py +0 -0
- nat/builder/builder.py +285 -0
- nat/builder/component_utils.py +316 -0
- nat/builder/context.py +270 -0
- nat/builder/embedder.py +24 -0
- nat/builder/eval_builder.py +161 -0
- nat/builder/evaluator.py +29 -0
- nat/builder/framework_enum.py +24 -0
- nat/builder/front_end.py +73 -0
- nat/builder/function.py +344 -0
- nat/builder/function_base.py +380 -0
- nat/builder/function_info.py +627 -0
- nat/builder/intermediate_step_manager.py +174 -0
- nat/builder/llm.py +25 -0
- nat/builder/retriever.py +25 -0
- nat/builder/user_interaction_manager.py +78 -0
- nat/builder/workflow.py +148 -0
- nat/builder/workflow_builder.py +1117 -0
- nat/cli/__init__.py +14 -0
- nat/cli/cli_utils/__init__.py +0 -0
- nat/cli/cli_utils/config_override.py +231 -0
- nat/cli/cli_utils/validation.py +37 -0
- nat/cli/commands/__init__.py +0 -0
- nat/cli/commands/configure/__init__.py +0 -0
- nat/cli/commands/configure/channel/__init__.py +0 -0
- nat/cli/commands/configure/channel/add.py +28 -0
- nat/cli/commands/configure/channel/channel.py +34 -0
- nat/cli/commands/configure/channel/remove.py +30 -0
- nat/cli/commands/configure/channel/update.py +30 -0
- nat/cli/commands/configure/configure.py +33 -0
- nat/cli/commands/evaluate.py +139 -0
- nat/cli/commands/info/__init__.py +14 -0
- nat/cli/commands/info/info.py +37 -0
- nat/cli/commands/info/list_channels.py +32 -0
- nat/cli/commands/info/list_components.py +129 -0
- nat/cli/commands/info/list_mcp.py +304 -0
- nat/cli/commands/registry/__init__.py +14 -0
- nat/cli/commands/registry/publish.py +88 -0
- nat/cli/commands/registry/pull.py +118 -0
- nat/cli/commands/registry/registry.py +36 -0
- nat/cli/commands/registry/remove.py +108 -0
- nat/cli/commands/registry/search.py +155 -0
- nat/cli/commands/sizing/__init__.py +14 -0
- nat/cli/commands/sizing/calc.py +297 -0
- nat/cli/commands/sizing/sizing.py +27 -0
- nat/cli/commands/start.py +246 -0
- nat/cli/commands/uninstall.py +81 -0
- nat/cli/commands/validate.py +47 -0
- nat/cli/commands/workflow/__init__.py +14 -0
- nat/cli/commands/workflow/templates/__init__.py.j2 +0 -0
- nat/cli/commands/workflow/templates/config.yml.j2 +16 -0
- nat/cli/commands/workflow/templates/pyproject.toml.j2 +22 -0
- nat/cli/commands/workflow/templates/register.py.j2 +5 -0
- nat/cli/commands/workflow/templates/workflow.py.j2 +36 -0
- nat/cli/commands/workflow/workflow.py +37 -0
- nat/cli/commands/workflow/workflow_commands.py +317 -0
- nat/cli/entrypoint.py +135 -0
- nat/cli/main.py +57 -0
- nat/cli/register_workflow.py +488 -0
- nat/cli/type_registry.py +1000 -0
- nat/data_models/__init__.py +14 -0
- nat/data_models/api_server.py +716 -0
- nat/data_models/authentication.py +231 -0
- nat/data_models/common.py +171 -0
- nat/data_models/component.py +58 -0
- nat/data_models/component_ref.py +168 -0
- nat/data_models/config.py +410 -0
- nat/data_models/dataset_handler.py +169 -0
- nat/data_models/discovery_metadata.py +305 -0
- nat/data_models/embedder.py +27 -0
- nat/data_models/evaluate.py +127 -0
- nat/data_models/evaluator.py +26 -0
- nat/data_models/front_end.py +26 -0
- nat/data_models/function.py +30 -0
- nat/data_models/function_dependencies.py +72 -0
- nat/data_models/interactive.py +246 -0
- nat/data_models/intermediate_step.py +302 -0
- nat/data_models/invocation_node.py +38 -0
- nat/data_models/llm.py +27 -0
- nat/data_models/logging.py +26 -0
- nat/data_models/memory.py +27 -0
- nat/data_models/object_store.py +44 -0
- nat/data_models/profiler.py +54 -0
- nat/data_models/registry_handler.py +26 -0
- nat/data_models/retriever.py +30 -0
- nat/data_models/retry_mixin.py +35 -0
- nat/data_models/span.py +190 -0
- nat/data_models/step_adaptor.py +64 -0
- nat/data_models/streaming.py +33 -0
- nat/data_models/swe_bench_model.py +54 -0
- nat/data_models/telemetry_exporter.py +26 -0
- nat/data_models/ttc_strategy.py +30 -0
- nat/embedder/__init__.py +0 -0
- nat/embedder/nim_embedder.py +59 -0
- nat/embedder/openai_embedder.py +43 -0
- nat/embedder/register.py +22 -0
- nat/eval/__init__.py +14 -0
- nat/eval/config.py +60 -0
- nat/eval/dataset_handler/__init__.py +0 -0
- nat/eval/dataset_handler/dataset_downloader.py +106 -0
- nat/eval/dataset_handler/dataset_filter.py +52 -0
- nat/eval/dataset_handler/dataset_handler.py +367 -0
- nat/eval/evaluate.py +510 -0
- nat/eval/evaluator/__init__.py +14 -0
- nat/eval/evaluator/base_evaluator.py +77 -0
- nat/eval/evaluator/evaluator_model.py +45 -0
- nat/eval/intermediate_step_adapter.py +99 -0
- nat/eval/rag_evaluator/__init__.py +0 -0
- nat/eval/rag_evaluator/evaluate.py +178 -0
- nat/eval/rag_evaluator/register.py +143 -0
- nat/eval/register.py +23 -0
- nat/eval/remote_workflow.py +133 -0
- nat/eval/runners/__init__.py +14 -0
- nat/eval/runners/config.py +39 -0
- nat/eval/runners/multi_eval_runner.py +54 -0
- nat/eval/runtime_event_subscriber.py +52 -0
- nat/eval/swe_bench_evaluator/__init__.py +0 -0
- nat/eval/swe_bench_evaluator/evaluate.py +215 -0
- nat/eval/swe_bench_evaluator/register.py +36 -0
- nat/eval/trajectory_evaluator/__init__.py +0 -0
- nat/eval/trajectory_evaluator/evaluate.py +75 -0
- nat/eval/trajectory_evaluator/register.py +40 -0
- nat/eval/tunable_rag_evaluator/__init__.py +0 -0
- nat/eval/tunable_rag_evaluator/evaluate.py +245 -0
- nat/eval/tunable_rag_evaluator/register.py +52 -0
- nat/eval/usage_stats.py +41 -0
- nat/eval/utils/__init__.py +0 -0
- nat/eval/utils/output_uploader.py +140 -0
- nat/eval/utils/tqdm_position_registry.py +40 -0
- nat/eval/utils/weave_eval.py +184 -0
- nat/experimental/__init__.py +0 -0
- nat/experimental/decorators/__init__.py +0 -0
- nat/experimental/decorators/experimental_warning_decorator.py +134 -0
- nat/experimental/test_time_compute/__init__.py +0 -0
- nat/experimental/test_time_compute/editing/__init__.py +0 -0
- nat/experimental/test_time_compute/editing/iterative_plan_refinement_editor.py +147 -0
- nat/experimental/test_time_compute/editing/llm_as_a_judge_editor.py +204 -0
- nat/experimental/test_time_compute/editing/motivation_aware_summarization.py +107 -0
- nat/experimental/test_time_compute/functions/__init__.py +0 -0
- nat/experimental/test_time_compute/functions/execute_score_select_function.py +105 -0
- nat/experimental/test_time_compute/functions/plan_select_execute_function.py +224 -0
- nat/experimental/test_time_compute/functions/ttc_tool_orchestration_function.py +205 -0
- nat/experimental/test_time_compute/functions/ttc_tool_wrapper_function.py +146 -0
- nat/experimental/test_time_compute/models/__init__.py +0 -0
- nat/experimental/test_time_compute/models/editor_config.py +132 -0
- nat/experimental/test_time_compute/models/scoring_config.py +112 -0
- nat/experimental/test_time_compute/models/search_config.py +120 -0
- nat/experimental/test_time_compute/models/selection_config.py +154 -0
- nat/experimental/test_time_compute/models/stage_enums.py +43 -0
- nat/experimental/test_time_compute/models/strategy_base.py +66 -0
- nat/experimental/test_time_compute/models/tool_use_config.py +41 -0
- nat/experimental/test_time_compute/models/ttc_item.py +48 -0
- nat/experimental/test_time_compute/register.py +36 -0
- nat/experimental/test_time_compute/scoring/__init__.py +0 -0
- nat/experimental/test_time_compute/scoring/llm_based_agent_scorer.py +168 -0
- nat/experimental/test_time_compute/scoring/llm_based_plan_scorer.py +168 -0
- nat/experimental/test_time_compute/scoring/motivation_aware_scorer.py +111 -0
- nat/experimental/test_time_compute/search/__init__.py +0 -0
- nat/experimental/test_time_compute/search/multi_llm_planner.py +128 -0
- nat/experimental/test_time_compute/search/multi_query_retrieval_search.py +122 -0
- nat/experimental/test_time_compute/search/single_shot_multi_plan_planner.py +128 -0
- nat/experimental/test_time_compute/selection/__init__.py +0 -0
- nat/experimental/test_time_compute/selection/best_of_n_selector.py +63 -0
- nat/experimental/test_time_compute/selection/llm_based_agent_output_selector.py +131 -0
- nat/experimental/test_time_compute/selection/llm_based_output_merging_selector.py +159 -0
- nat/experimental/test_time_compute/selection/llm_based_plan_selector.py +128 -0
- nat/experimental/test_time_compute/selection/threshold_selector.py +58 -0
- nat/front_ends/__init__.py +14 -0
- nat/front_ends/console/__init__.py +14 -0
- nat/front_ends/console/authentication_flow_handler.py +233 -0
- nat/front_ends/console/console_front_end_config.py +32 -0
- nat/front_ends/console/console_front_end_plugin.py +96 -0
- nat/front_ends/console/register.py +25 -0
- nat/front_ends/cron/__init__.py +14 -0
- nat/front_ends/fastapi/__init__.py +14 -0
- nat/front_ends/fastapi/auth_flow_handlers/__init__.py +0 -0
- nat/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +27 -0
- nat/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +107 -0
- nat/front_ends/fastapi/fastapi_front_end_config.py +241 -0
- nat/front_ends/fastapi/fastapi_front_end_controller.py +68 -0
- nat/front_ends/fastapi/fastapi_front_end_plugin.py +116 -0
- nat/front_ends/fastapi/fastapi_front_end_plugin_worker.py +1087 -0
- nat/front_ends/fastapi/html_snippets/__init__.py +14 -0
- nat/front_ends/fastapi/html_snippets/auth_code_grant_success.py +35 -0
- nat/front_ends/fastapi/intermediate_steps_subscriber.py +80 -0
- nat/front_ends/fastapi/job_store.py +183 -0
- nat/front_ends/fastapi/main.py +72 -0
- nat/front_ends/fastapi/message_handler.py +320 -0
- nat/front_ends/fastapi/message_validator.py +352 -0
- nat/front_ends/fastapi/register.py +25 -0
- nat/front_ends/fastapi/response_helpers.py +195 -0
- nat/front_ends/fastapi/step_adaptor.py +319 -0
- nat/front_ends/mcp/__init__.py +14 -0
- nat/front_ends/mcp/mcp_front_end_config.py +36 -0
- nat/front_ends/mcp/mcp_front_end_plugin.py +81 -0
- nat/front_ends/mcp/mcp_front_end_plugin_worker.py +143 -0
- nat/front_ends/mcp/register.py +27 -0
- nat/front_ends/mcp/tool_converter.py +241 -0
- nat/front_ends/register.py +22 -0
- nat/front_ends/simple_base/__init__.py +14 -0
- nat/front_ends/simple_base/simple_front_end_plugin_base.py +54 -0
- nat/llm/__init__.py +0 -0
- nat/llm/aws_bedrock_llm.py +57 -0
- nat/llm/nim_llm.py +46 -0
- nat/llm/openai_llm.py +46 -0
- nat/llm/register.py +23 -0
- nat/llm/utils/__init__.py +14 -0
- nat/llm/utils/env_config_value.py +94 -0
- nat/llm/utils/error.py +17 -0
- nat/memory/__init__.py +20 -0
- nat/memory/interfaces.py +183 -0
- nat/memory/models.py +112 -0
- nat/meta/pypi.md +58 -0
- nat/object_store/__init__.py +20 -0
- nat/object_store/in_memory_object_store.py +76 -0
- nat/object_store/interfaces.py +84 -0
- nat/object_store/models.py +38 -0
- nat/object_store/register.py +20 -0
- nat/observability/__init__.py +14 -0
- nat/observability/exporter/__init__.py +14 -0
- nat/observability/exporter/base_exporter.py +449 -0
- nat/observability/exporter/exporter.py +78 -0
- nat/observability/exporter/file_exporter.py +33 -0
- nat/observability/exporter/processing_exporter.py +322 -0
- nat/observability/exporter/raw_exporter.py +52 -0
- nat/observability/exporter/span_exporter.py +288 -0
- nat/observability/exporter_manager.py +335 -0
- nat/observability/mixin/__init__.py +14 -0
- nat/observability/mixin/batch_config_mixin.py +26 -0
- nat/observability/mixin/collector_config_mixin.py +23 -0
- nat/observability/mixin/file_mixin.py +288 -0
- nat/observability/mixin/file_mode.py +23 -0
- nat/observability/mixin/resource_conflict_mixin.py +134 -0
- nat/observability/mixin/serialize_mixin.py +61 -0
- nat/observability/mixin/type_introspection_mixin.py +183 -0
- nat/observability/processor/__init__.py +14 -0
- nat/observability/processor/batching_processor.py +310 -0
- nat/observability/processor/callback_processor.py +42 -0
- nat/observability/processor/intermediate_step_serializer.py +28 -0
- nat/observability/processor/processor.py +71 -0
- nat/observability/register.py +96 -0
- nat/observability/utils/__init__.py +14 -0
- nat/observability/utils/dict_utils.py +236 -0
- nat/observability/utils/time_utils.py +31 -0
- nat/plugins/.namespace +1 -0
- nat/profiler/__init__.py +0 -0
- nat/profiler/calc/__init__.py +14 -0
- nat/profiler/calc/calc_runner.py +627 -0
- nat/profiler/calc/calculations.py +288 -0
- nat/profiler/calc/data_models.py +188 -0
- nat/profiler/calc/plot.py +345 -0
- nat/profiler/callbacks/__init__.py +0 -0
- nat/profiler/callbacks/agno_callback_handler.py +295 -0
- nat/profiler/callbacks/base_callback_class.py +20 -0
- nat/profiler/callbacks/langchain_callback_handler.py +290 -0
- nat/profiler/callbacks/llama_index_callback_handler.py +205 -0
- nat/profiler/callbacks/semantic_kernel_callback_handler.py +238 -0
- nat/profiler/callbacks/token_usage_base_model.py +27 -0
- nat/profiler/data_frame_row.py +51 -0
- nat/profiler/data_models.py +24 -0
- nat/profiler/decorators/__init__.py +0 -0
- nat/profiler/decorators/framework_wrapper.py +131 -0
- nat/profiler/decorators/function_tracking.py +254 -0
- nat/profiler/forecasting/__init__.py +0 -0
- nat/profiler/forecasting/config.py +18 -0
- nat/profiler/forecasting/model_trainer.py +75 -0
- nat/profiler/forecasting/models/__init__.py +22 -0
- nat/profiler/forecasting/models/forecasting_base_model.py +40 -0
- nat/profiler/forecasting/models/linear_model.py +197 -0
- nat/profiler/forecasting/models/random_forest_regressor.py +269 -0
- nat/profiler/inference_metrics_model.py +28 -0
- nat/profiler/inference_optimization/__init__.py +0 -0
- nat/profiler/inference_optimization/bottleneck_analysis/__init__.py +0 -0
- nat/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +460 -0
- nat/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py +258 -0
- nat/profiler/inference_optimization/data_models.py +386 -0
- nat/profiler/inference_optimization/experimental/__init__.py +0 -0
- nat/profiler/inference_optimization/experimental/concurrency_spike_analysis.py +468 -0
- nat/profiler/inference_optimization/experimental/prefix_span_analysis.py +405 -0
- nat/profiler/inference_optimization/llm_metrics.py +212 -0
- nat/profiler/inference_optimization/prompt_caching.py +163 -0
- nat/profiler/inference_optimization/token_uniqueness.py +107 -0
- nat/profiler/inference_optimization/workflow_runtimes.py +72 -0
- nat/profiler/intermediate_property_adapter.py +102 -0
- nat/profiler/profile_runner.py +473 -0
- nat/profiler/utils.py +184 -0
- nat/registry_handlers/__init__.py +0 -0
- nat/registry_handlers/local/__init__.py +0 -0
- nat/registry_handlers/local/local_handler.py +176 -0
- nat/registry_handlers/local/register_local.py +37 -0
- nat/registry_handlers/metadata_factory.py +60 -0
- nat/registry_handlers/package_utils.py +571 -0
- nat/registry_handlers/pypi/__init__.py +0 -0
- nat/registry_handlers/pypi/pypi_handler.py +251 -0
- nat/registry_handlers/pypi/register_pypi.py +40 -0
- nat/registry_handlers/register.py +21 -0
- nat/registry_handlers/registry_handler_base.py +157 -0
- nat/registry_handlers/rest/__init__.py +0 -0
- nat/registry_handlers/rest/register_rest.py +56 -0
- nat/registry_handlers/rest/rest_handler.py +237 -0
- nat/registry_handlers/schemas/__init__.py +0 -0
- nat/registry_handlers/schemas/headers.py +42 -0
- nat/registry_handlers/schemas/package.py +68 -0
- nat/registry_handlers/schemas/publish.py +68 -0
- nat/registry_handlers/schemas/pull.py +82 -0
- nat/registry_handlers/schemas/remove.py +36 -0
- nat/registry_handlers/schemas/search.py +91 -0
- nat/registry_handlers/schemas/status.py +47 -0
- nat/retriever/__init__.py +0 -0
- nat/retriever/interface.py +41 -0
- nat/retriever/milvus/__init__.py +14 -0
- nat/retriever/milvus/register.py +81 -0
- nat/retriever/milvus/retriever.py +228 -0
- nat/retriever/models.py +77 -0
- nat/retriever/nemo_retriever/__init__.py +14 -0
- nat/retriever/nemo_retriever/register.py +60 -0
- nat/retriever/nemo_retriever/retriever.py +190 -0
- nat/retriever/register.py +22 -0
- nat/runtime/__init__.py +14 -0
- nat/runtime/loader.py +220 -0
- nat/runtime/runner.py +195 -0
- nat/runtime/session.py +162 -0
- nat/runtime/user_metadata.py +130 -0
- nat/settings/__init__.py +0 -0
- nat/settings/global_settings.py +318 -0
- nat/test/.namespace +1 -0
- nat/tool/__init__.py +0 -0
- nat/tool/chat_completion.py +74 -0
- nat/tool/code_execution/README.md +151 -0
- nat/tool/code_execution/__init__.py +0 -0
- nat/tool/code_execution/code_sandbox.py +267 -0
- nat/tool/code_execution/local_sandbox/.gitignore +1 -0
- nat/tool/code_execution/local_sandbox/Dockerfile.sandbox +60 -0
- nat/tool/code_execution/local_sandbox/__init__.py +13 -0
- nat/tool/code_execution/local_sandbox/local_sandbox_server.py +198 -0
- nat/tool/code_execution/local_sandbox/sandbox.requirements.txt +6 -0
- nat/tool/code_execution/local_sandbox/start_local_sandbox.sh +50 -0
- nat/tool/code_execution/register.py +74 -0
- nat/tool/code_execution/test_code_execution_sandbox.py +414 -0
- nat/tool/code_execution/utils.py +100 -0
- nat/tool/datetime_tools.py +42 -0
- nat/tool/document_search.py +141 -0
- nat/tool/github_tools/__init__.py +0 -0
- nat/tool/github_tools/create_github_commit.py +133 -0
- nat/tool/github_tools/create_github_issue.py +87 -0
- nat/tool/github_tools/create_github_pr.py +106 -0
- nat/tool/github_tools/get_github_file.py +106 -0
- nat/tool/github_tools/get_github_issue.py +166 -0
- nat/tool/github_tools/get_github_pr.py +256 -0
- nat/tool/github_tools/update_github_issue.py +100 -0
- nat/tool/mcp/__init__.py +14 -0
- nat/tool/mcp/exceptions.py +142 -0
- nat/tool/mcp/mcp_client.py +255 -0
- nat/tool/mcp/mcp_tool.py +96 -0
- nat/tool/memory_tools/__init__.py +0 -0
- nat/tool/memory_tools/add_memory_tool.py +79 -0
- nat/tool/memory_tools/delete_memory_tool.py +67 -0
- nat/tool/memory_tools/get_memory_tool.py +72 -0
- nat/tool/nvidia_rag.py +95 -0
- nat/tool/register.py +38 -0
- nat/tool/retriever.py +94 -0
- nat/tool/server_tools.py +66 -0
- nat/utils/__init__.py +0 -0
- nat/utils/data_models/__init__.py +0 -0
- nat/utils/data_models/schema_validator.py +58 -0
- nat/utils/debugging_utils.py +43 -0
- nat/utils/dump_distro_mapping.py +32 -0
- nat/utils/exception_handlers/__init__.py +0 -0
- nat/utils/exception_handlers/automatic_retries.py +289 -0
- nat/utils/exception_handlers/mcp.py +211 -0
- nat/utils/exception_handlers/schemas.py +114 -0
- nat/utils/io/__init__.py +0 -0
- nat/utils/io/model_processing.py +28 -0
- nat/utils/io/yaml_tools.py +119 -0
- nat/utils/log_utils.py +37 -0
- nat/utils/metadata_utils.py +74 -0
- nat/utils/optional_imports.py +142 -0
- nat/utils/producer_consumer_queue.py +178 -0
- nat/utils/reactive/__init__.py +0 -0
- nat/utils/reactive/base/__init__.py +0 -0
- nat/utils/reactive/base/observable_base.py +65 -0
- nat/utils/reactive/base/observer_base.py +55 -0
- nat/utils/reactive/base/subject_base.py +79 -0
- nat/utils/reactive/observable.py +59 -0
- nat/utils/reactive/observer.py +76 -0
- nat/utils/reactive/subject.py +131 -0
- nat/utils/reactive/subscription.py +49 -0
- nat/utils/settings/__init__.py +0 -0
- nat/utils/settings/global_settings.py +197 -0
- nat/utils/string_utils.py +38 -0
- nat/utils/type_converter.py +290 -0
- nat/utils/type_utils.py +484 -0
- nat/utils/url_utils.py +27 -0
- nvidia_nat-1.2.0.dist-info/METADATA +365 -0
- nvidia_nat-1.2.0.dist-info/RECORD +435 -0
- nvidia_nat-1.2.0.dist-info/WHEEL +5 -0
- nvidia_nat-1.2.0.dist-info/entry_points.txt +21 -0
- nvidia_nat-1.2.0.dist-info/licenses/LICENSE-3rd-party.txt +5478 -0
- nvidia_nat-1.2.0.dist-info/licenses/LICENSE.md +201 -0
- nvidia_nat-1.2.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,468 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
"""
|
|
16
|
+
An enhanced script that:
|
|
17
|
+
|
|
18
|
+
1. Groups workflow events by example_number to build a per-example call tree (no cross-example nesting).
|
|
19
|
+
2. Tracks concurrency globally across *all* examples.
|
|
20
|
+
3. Identifies concurrency "spikes" (concurrency >= a threshold).
|
|
21
|
+
4. Correlates concurrency spikes with token usage and call metadata.
|
|
22
|
+
5. Computes average call latency by concurrency level, using midpoint concurrency as an approximation.
|
|
23
|
+
6. Returns a Pydantic result containing concurrency distribution, spike intervals, correlation stats, etc.,
|
|
24
|
+
along with a textual report containing the real call count, active calls in spikes, etc.
|
|
25
|
+
|
|
26
|
+
Changes from previous version:
|
|
27
|
+
|
|
28
|
+
- Now shows the actual total calls in the dataset.
|
|
29
|
+
- Displays the real number of active calls for each spike interval.
|
|
30
|
+
- Computes and reports average latency by concurrency (no visualization).
|
|
31
|
+
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
import numpy as np
|
|
35
|
+
import pandas as pd
|
|
36
|
+
|
|
37
|
+
from nat.data_models.intermediate_step import IntermediateStep
|
|
38
|
+
from nat.profiler.inference_optimization.data_models import ConcurrencyAnalysisResult
|
|
39
|
+
from nat.profiler.inference_optimization.data_models import ConcurrencyCallNode
|
|
40
|
+
from nat.profiler.inference_optimization.data_models import ConcurrencyCorrelationStats
|
|
41
|
+
from nat.profiler.inference_optimization.data_models import ConcurrencySpikeInfo
|
|
42
|
+
from nat.profiler.utils import create_standardized_dataframe
|
|
43
|
+
|
|
44
|
+
# --------------------------------------------------------------------------------
|
|
45
|
+
# 1) Building the Per-Example Call Trees
|
|
46
|
+
# --------------------------------------------------------------------------------
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def build_call_tree_for_example(example_df: pd.DataFrame) -> list[ConcurrencyCallNode]:
|
|
50
|
+
"""
|
|
51
|
+
Sort events by time, push on `*_START`, pop on `*_END`, build stack-based calls for a single example.
|
|
52
|
+
"""
|
|
53
|
+
stack: list[ConcurrencyCallNode] = []
|
|
54
|
+
top_level: dict[str, ConcurrencyCallNode] = {}
|
|
55
|
+
partial_map: dict[str, ConcurrencyCallNode] = {}
|
|
56
|
+
|
|
57
|
+
def parse_op_type(et: str) -> str | None:
|
|
58
|
+
et = et.upper()
|
|
59
|
+
if et.startswith("LLM_"):
|
|
60
|
+
return "LLM"
|
|
61
|
+
if et.startswith("TOOL_"):
|
|
62
|
+
return "TOOL"
|
|
63
|
+
return None
|
|
64
|
+
|
|
65
|
+
def get_op_name(row: pd.Series, op_type: str) -> str:
|
|
66
|
+
if op_type == "LLM":
|
|
67
|
+
return row.get("llm_name") or "unknown_llm"
|
|
68
|
+
if op_type == "TOOL":
|
|
69
|
+
return row.get("tool_name") or "unknown_tool"
|
|
70
|
+
return "unknown_op"
|
|
71
|
+
|
|
72
|
+
example_num = int(example_df["example_number"].iloc[0])
|
|
73
|
+
|
|
74
|
+
for _, row in example_df.iterrows():
|
|
75
|
+
et = row["event_type"].value.upper()
|
|
76
|
+
uuid = str(row["UUID"])
|
|
77
|
+
ts = float(row["event_timestamp"])
|
|
78
|
+
op_type = parse_op_type(et)
|
|
79
|
+
if not op_type:
|
|
80
|
+
continue
|
|
81
|
+
|
|
82
|
+
if et.endswith("_START"):
|
|
83
|
+
op_name = get_op_name(row, op_type)
|
|
84
|
+
node = ConcurrencyCallNode(
|
|
85
|
+
uuid=uuid,
|
|
86
|
+
example_number=example_num,
|
|
87
|
+
operation_type=op_type,
|
|
88
|
+
operation_name=op_name,
|
|
89
|
+
start_time=ts,
|
|
90
|
+
end_time=ts, # updated on END
|
|
91
|
+
duration=0.0)
|
|
92
|
+
if stack:
|
|
93
|
+
parent = stack[-1]
|
|
94
|
+
node.parent = parent
|
|
95
|
+
parent.children.append(node)
|
|
96
|
+
else:
|
|
97
|
+
top_level[uuid] = node
|
|
98
|
+
|
|
99
|
+
stack.append(node)
|
|
100
|
+
partial_map[uuid] = node
|
|
101
|
+
|
|
102
|
+
elif et.endswith("_END"):
|
|
103
|
+
if uuid not in partial_map:
|
|
104
|
+
continue
|
|
105
|
+
node = partial_map[uuid]
|
|
106
|
+
node.end_time = ts
|
|
107
|
+
node.duration = max(0.0, node.end_time - node.start_time)
|
|
108
|
+
node.prompt_tokens = row.get("prompt_tokens")
|
|
109
|
+
node.completion_tokens = row.get("completion_tokens")
|
|
110
|
+
node.total_tokens = row.get("total_tokens")
|
|
111
|
+
node.tool_outputs = row.get("metadata").get("tool_outputs") if (
|
|
112
|
+
row.get("metadata") and row.get("metadata").get("tool_outputs")) else None
|
|
113
|
+
node.llm_text_output = row.get("llm_text_output")
|
|
114
|
+
|
|
115
|
+
if stack and stack[-1].uuid == uuid:
|
|
116
|
+
stack.pop()
|
|
117
|
+
del partial_map[uuid]
|
|
118
|
+
|
|
119
|
+
# gather top-level
|
|
120
|
+
roots = []
|
|
121
|
+
for _, nd in top_level.items():
|
|
122
|
+
if nd.parent is None:
|
|
123
|
+
roots.append(nd)
|
|
124
|
+
return roots
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def build_call_tree_per_example(df: pd.DataFrame) -> list[ConcurrencyCallNode]:
|
|
128
|
+
"""
|
|
129
|
+
Groups by example_number, builds separate call trees, returns combined list of top-level calls.
|
|
130
|
+
"""
|
|
131
|
+
req_cols = {"example_number", "event_type", "UUID", "event_timestamp"}
|
|
132
|
+
missing = req_cols - set(df.columns)
|
|
133
|
+
if missing:
|
|
134
|
+
raise ValueError(f"DataFrame missing required columns: {missing}")
|
|
135
|
+
|
|
136
|
+
dfc = df.copy()
|
|
137
|
+
dfc.sort_values(["example_number", "event_timestamp"], inplace=True)
|
|
138
|
+
|
|
139
|
+
all_roots: list[ConcurrencyCallNode] = []
|
|
140
|
+
for _, grp in dfc.groupby("example_number"):
|
|
141
|
+
r = build_call_tree_for_example(grp)
|
|
142
|
+
all_roots.extend(r)
|
|
143
|
+
return all_roots
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def flatten_calls(roots: list[ConcurrencyCallNode]) -> list[ConcurrencyCallNode]:
|
|
147
|
+
"""
|
|
148
|
+
DFS to produce a flat list of all calls (including nested).
|
|
149
|
+
"""
|
|
150
|
+
all_nodes = []
|
|
151
|
+
|
|
152
|
+
def dfs(n: ConcurrencyCallNode):
|
|
153
|
+
all_nodes.append(n)
|
|
154
|
+
for c in n.children:
|
|
155
|
+
dfs(c)
|
|
156
|
+
|
|
157
|
+
for rt in roots:
|
|
158
|
+
dfs(rt)
|
|
159
|
+
return all_nodes
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
# --------------------------------------------------------------------------------
|
|
163
|
+
# 2) Global Concurrency Distribution & Segments
|
|
164
|
+
# --------------------------------------------------------------------------------
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def compute_concurrency_distribution(roots: list[ConcurrencyCallNode]) -> dict[int, float]:
|
|
168
|
+
"""
|
|
169
|
+
Flatten calls, produce (start, +1)/(end, -1), accumulate total time at each concurrency level.
|
|
170
|
+
"""
|
|
171
|
+
all_nodes = flatten_calls(roots)
|
|
172
|
+
if not all_nodes:
|
|
173
|
+
return {}
|
|
174
|
+
|
|
175
|
+
events = []
|
|
176
|
+
for n in all_nodes:
|
|
177
|
+
if n.start_time <= n.end_time:
|
|
178
|
+
events.append((n.start_time, +1))
|
|
179
|
+
events.append((n.end_time, -1))
|
|
180
|
+
|
|
181
|
+
events.sort(key=lambda x: x[0])
|
|
182
|
+
dist_map: dict[int, float] = {}
|
|
183
|
+
curr_conc = 0
|
|
184
|
+
prev_time = events[0][0]
|
|
185
|
+
|
|
186
|
+
for (time_val, delta) in events:
|
|
187
|
+
if time_val > prev_time:
|
|
188
|
+
length = time_val - prev_time
|
|
189
|
+
dist_map[curr_conc] = dist_map.get(curr_conc, 0.0) + length
|
|
190
|
+
curr_conc += delta
|
|
191
|
+
prev_time = time_val
|
|
192
|
+
|
|
193
|
+
return dist_map
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def build_concurrency_segments(roots: list[ConcurrencyCallNode]) -> list[tuple[float, float, int]]:
|
|
197
|
+
"""
|
|
198
|
+
Return piecewise segments of (start, end, concurrency) across all calls.
|
|
199
|
+
"""
|
|
200
|
+
all_nodes = flatten_calls(roots)
|
|
201
|
+
if not all_nodes:
|
|
202
|
+
return []
|
|
203
|
+
|
|
204
|
+
events = []
|
|
205
|
+
for n in all_nodes:
|
|
206
|
+
if n.start_time <= n.end_time:
|
|
207
|
+
events.append((n.start_time, +1))
|
|
208
|
+
events.append((n.end_time, -1))
|
|
209
|
+
|
|
210
|
+
events.sort(key=lambda x: x[0])
|
|
211
|
+
segments: list[tuple[float, float, int]] = []
|
|
212
|
+
curr_conc = 0
|
|
213
|
+
prev_time = events[0][0]
|
|
214
|
+
|
|
215
|
+
for (t, delta) in events:
|
|
216
|
+
if t > prev_time:
|
|
217
|
+
segments.append((prev_time, t, curr_conc))
|
|
218
|
+
curr_conc += delta
|
|
219
|
+
prev_time = t
|
|
220
|
+
|
|
221
|
+
return segments
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def find_percentile_concurrency(dist_map: dict[int, float], percentile: float) -> float:
|
|
225
|
+
"""
|
|
226
|
+
concurrency => total_time -> find concurrency level at given percentile of total time.
|
|
227
|
+
"""
|
|
228
|
+
total_time = sum(dist_map.values())
|
|
229
|
+
if total_time <= 0:
|
|
230
|
+
return 0.0
|
|
231
|
+
|
|
232
|
+
items = sorted(dist_map.items(), key=lambda x: x[0]) # ascending concurrency
|
|
233
|
+
threshold = percentile * 0.01 * total_time
|
|
234
|
+
accum = 0.0
|
|
235
|
+
last_c = 0
|
|
236
|
+
|
|
237
|
+
for c_val, dur in items:
|
|
238
|
+
accum += dur
|
|
239
|
+
if accum >= threshold:
|
|
240
|
+
return float(c_val)
|
|
241
|
+
last_c = c_val
|
|
242
|
+
return float(last_c)
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
# --------------------------------------------------------------------------------
|
|
246
|
+
# 3) Spike Detection & Active Calls
|
|
247
|
+
# --------------------------------------------------------------------------------
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def detect_concurrency_spikes(segments: list[tuple[float, float, int]], threshold: int) -> list[ConcurrencySpikeInfo]:
|
|
251
|
+
"""
|
|
252
|
+
If concurrency >= threshold, label that segment a 'spike'.
|
|
253
|
+
"""
|
|
254
|
+
spikes = []
|
|
255
|
+
for (s, e, c_val) in segments:
|
|
256
|
+
if c_val >= threshold and e > s:
|
|
257
|
+
sp = ConcurrencySpikeInfo(start_time=s, end_time=e, concurrency=c_val)
|
|
258
|
+
spikes.append(sp)
|
|
259
|
+
return spikes
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def find_calls_active_in_interval(roots: list[ConcurrencyCallNode], start_t: float,
|
|
263
|
+
end_t: float) -> list[ConcurrencyCallNode]:
|
|
264
|
+
"""
|
|
265
|
+
Return all calls overlapping [start_t, end_t).
|
|
266
|
+
Overlap => not (call.end_time <= start_t or call.start_time >= end_t).
|
|
267
|
+
"""
|
|
268
|
+
results = []
|
|
269
|
+
all_nodes = flatten_calls(roots)
|
|
270
|
+
for n in all_nodes:
|
|
271
|
+
if not (n.end_time <= start_t or n.start_time >= end_t):
|
|
272
|
+
results.append(n)
|
|
273
|
+
return results
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
# --------------------------------------------------------------------------------
|
|
277
|
+
# 4) Correlations & Average Latency by Concurrency
|
|
278
|
+
# --------------------------------------------------------------------------------
|
|
279
|
+
|
|
280
|
+
def correlate_spike_calls(spikes: list[ConcurrencySpikeInfo], roots: list[ConcurrencyCallNode]) \
|
|
281
|
+
-> ConcurrencyCorrelationStats:
|
|
282
|
+
"""
|
|
283
|
+
For each spike, gather calls that overlap, compute average prompt_tokens, total_tokens across them.
|
|
284
|
+
"""
|
|
285
|
+
p_tokens = []
|
|
286
|
+
t_tokens = []
|
|
287
|
+
|
|
288
|
+
for sp in spikes:
|
|
289
|
+
active = find_calls_active_in_interval(roots, sp.start_time, sp.end_time)
|
|
290
|
+
# record the active call uuids for each spike
|
|
291
|
+
sp.active_uuids = list({c.uuid for c in active})
|
|
292
|
+
|
|
293
|
+
for c in active:
|
|
294
|
+
if c.prompt_tokens and c.prompt_tokens > 0:
|
|
295
|
+
p_tokens.append(c.prompt_tokens)
|
|
296
|
+
if c.total_tokens and c.total_tokens > 0:
|
|
297
|
+
t_tokens.append(c.total_tokens)
|
|
298
|
+
|
|
299
|
+
def safe_avg(lst):
|
|
300
|
+
return float(np.mean(lst)) if lst else 0.0
|
|
301
|
+
|
|
302
|
+
return ConcurrencyCorrelationStats(
|
|
303
|
+
avg_prompt_tokens=safe_avg(p_tokens),
|
|
304
|
+
avg_total_tokens=safe_avg(t_tokens),
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def compute_midpoint_concurrency(n: ConcurrencyCallNode, segments: list[tuple[float, float, int]]) -> float:
|
|
309
|
+
"""
|
|
310
|
+
Approx concurrency at the midpoint of this call.
|
|
311
|
+
"""
|
|
312
|
+
if n.start_time >= n.end_time:
|
|
313
|
+
return 0.0
|
|
314
|
+
mid = 0.5 * (n.start_time + n.end_time)
|
|
315
|
+
|
|
316
|
+
# binary or linear search
|
|
317
|
+
left, right = 0, len(segments) - 1
|
|
318
|
+
while left <= right:
|
|
319
|
+
mid_idx = (left + right) // 2
|
|
320
|
+
seg_start, seg_end, seg_conc = segments[mid_idx]
|
|
321
|
+
if seg_start <= mid < seg_end:
|
|
322
|
+
return float(seg_conc)
|
|
323
|
+
if mid < seg_start:
|
|
324
|
+
right = mid_idx - 1
|
|
325
|
+
else:
|
|
326
|
+
left = mid_idx + 1
|
|
327
|
+
return 0.0
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def average_latency_by_midpoint_concurrency(roots: list[ConcurrencyCallNode]) -> dict[int, float]:
|
|
331
|
+
"""
|
|
332
|
+
For each call, find concurrency at midpoint, then bucket durations by concurrency, compute avg.
|
|
333
|
+
"""
|
|
334
|
+
segs = build_concurrency_segments(roots)
|
|
335
|
+
all_nodes = flatten_calls(roots)
|
|
336
|
+
|
|
337
|
+
# concurrency => list of durations
|
|
338
|
+
from collections import defaultdict
|
|
339
|
+
calls_by_conc = defaultdict(list)
|
|
340
|
+
|
|
341
|
+
for c in all_nodes:
|
|
342
|
+
mc = compute_midpoint_concurrency(c, segs)
|
|
343
|
+
# round or cast to int
|
|
344
|
+
c_level = int(mc)
|
|
345
|
+
calls_by_conc[c_level].append(c.duration)
|
|
346
|
+
|
|
347
|
+
result = {}
|
|
348
|
+
for c_level, durations in calls_by_conc.items():
|
|
349
|
+
if durations:
|
|
350
|
+
result[c_level] = float(np.mean(durations))
|
|
351
|
+
else:
|
|
352
|
+
result[c_level] = 0.0
|
|
353
|
+
return result
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
# --------------------------------------------------------------------------------
|
|
357
|
+
# 5) Main Analysis Function
|
|
358
|
+
# --------------------------------------------------------------------------------
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def concurrency_spike_analysis(
|
|
362
|
+
all_steps: list[list[IntermediateStep]],
|
|
363
|
+
concurrency_spike_threshold: int | None = None,
|
|
364
|
+
) -> ConcurrencyAnalysisResult:
|
|
365
|
+
"""
|
|
366
|
+
1) Build per-example call trees (no cross-example nesting).
|
|
367
|
+
2) Compute concurrency distribution & concurrency segments across *all* calls.
|
|
368
|
+
3) Derive concurrency percentiles (p50, p90, p95, p99).
|
|
369
|
+
4) If threshold not provided, pick e.g. ceil of p90 concurrency.
|
|
370
|
+
5) Detect spikes, gather calls in those intervals => correlation stats.
|
|
371
|
+
6) Also compute average latency by concurrency and add to report.
|
|
372
|
+
7) Return a Pydantic object with everything, plus a textual report.
|
|
373
|
+
"""
|
|
374
|
+
df = create_standardized_dataframe(all_steps)
|
|
375
|
+
required_cols = {
|
|
376
|
+
"framework",
|
|
377
|
+
"llm_name",
|
|
378
|
+
"llm_text_input",
|
|
379
|
+
"llm_text_output",
|
|
380
|
+
"event_timestamp",
|
|
381
|
+
"event_type",
|
|
382
|
+
"UUID",
|
|
383
|
+
"example_number",
|
|
384
|
+
"prompt_tokens",
|
|
385
|
+
"completion_tokens",
|
|
386
|
+
"total_tokens"
|
|
387
|
+
}
|
|
388
|
+
missing = required_cols - set(df.columns)
|
|
389
|
+
if missing:
|
|
390
|
+
raise ValueError(f"DataFrame missing required columns: {missing}")
|
|
391
|
+
|
|
392
|
+
# Build global forest
|
|
393
|
+
roots = build_call_tree_per_example(df)
|
|
394
|
+
all_calls = flatten_calls(roots)
|
|
395
|
+
num_calls = len(all_calls)
|
|
396
|
+
|
|
397
|
+
# Concurrency distribution
|
|
398
|
+
dist_map = compute_concurrency_distribution(roots)
|
|
399
|
+
total_time = sum(dist_map.values())
|
|
400
|
+
|
|
401
|
+
p50_c = find_percentile_concurrency(dist_map, 50)
|
|
402
|
+
p90_c = find_percentile_concurrency(dist_map, 90)
|
|
403
|
+
p95_c = find_percentile_concurrency(dist_map, 95)
|
|
404
|
+
p99_c = find_percentile_concurrency(dist_map, 99)
|
|
405
|
+
|
|
406
|
+
# Threshold
|
|
407
|
+
if concurrency_spike_threshold is None:
|
|
408
|
+
concurrency_spike_threshold = max(1, int(np.ceil(p90_c)))
|
|
409
|
+
|
|
410
|
+
# Build concurrency segments, detect spikes
|
|
411
|
+
segments = build_concurrency_segments(roots)
|
|
412
|
+
spike_intervals = detect_concurrency_spikes(segments, concurrency_spike_threshold)
|
|
413
|
+
|
|
414
|
+
# Correlate
|
|
415
|
+
corr_stats = correlate_spike_calls(spike_intervals, roots)
|
|
416
|
+
|
|
417
|
+
# Average latency by concurrency
|
|
418
|
+
avg_lat_by_conc = average_latency_by_midpoint_concurrency(roots)
|
|
419
|
+
|
|
420
|
+
# Build textual report
|
|
421
|
+
lines = []
|
|
422
|
+
lines.append("=== Concurrency Spike Analysis ===")
|
|
423
|
+
lines.append(f"Total calls in dataset: {num_calls}")
|
|
424
|
+
lines.append(f"Total time observed: {total_time:.2f} units (sum of concurrency timeline)")
|
|
425
|
+
|
|
426
|
+
lines.append("\n-- Concurrency Distribution --")
|
|
427
|
+
for c_val in sorted(dist_map.keys()):
|
|
428
|
+
dur = dist_map[c_val]
|
|
429
|
+
lines.append(f" concurrency={c_val}: {dur:.2f} time")
|
|
430
|
+
|
|
431
|
+
lines.append(f"\nPercentiles => p50={p50_c:.1f}, p90={p90_c:.1f}, p95={p95_c:.1f}, p99={p99_c:.1f}")
|
|
432
|
+
lines.append(f"Spike threshold chosen: {concurrency_spike_threshold}")
|
|
433
|
+
|
|
434
|
+
lines.append("\n-- Detected Spike Intervals --")
|
|
435
|
+
if not spike_intervals:
|
|
436
|
+
lines.append("No intervals exceed concurrency spike threshold.")
|
|
437
|
+
else:
|
|
438
|
+
for i, sp in enumerate(spike_intervals, start=1):
|
|
439
|
+
length = sp.end_time - sp.start_time
|
|
440
|
+
active_count = len(sp.active_uuids)
|
|
441
|
+
lines.append(f"{i}) {sp.start_time:.2f}-{sp.end_time:.2f}, concurrency={sp.concurrency}, "
|
|
442
|
+
f"length={length:.2f}, #active_calls={active_count}")
|
|
443
|
+
|
|
444
|
+
lines.append("\n-- Correlation Stats for Spiked Calls --")
|
|
445
|
+
lines.append(f"Avg prompt_tokens in spike calls: {corr_stats.avg_prompt_tokens:.1f}")
|
|
446
|
+
lines.append(f"Avg total_tokens in spike calls : {corr_stats.avg_total_tokens:.1f}")
|
|
447
|
+
|
|
448
|
+
lines.append("\n-- Average Latency by Midpoint Concurrency --")
|
|
449
|
+
if not avg_lat_by_conc:
|
|
450
|
+
lines.append("No calls or no concurrency data.")
|
|
451
|
+
else:
|
|
452
|
+
for c_level in sorted(avg_lat_by_conc.keys()):
|
|
453
|
+
lat = avg_lat_by_conc[c_level]
|
|
454
|
+
lines.append(f" concurrency={c_level} => avg_latency={lat:.2f}")
|
|
455
|
+
|
|
456
|
+
final_report = "\n".join(lines)
|
|
457
|
+
|
|
458
|
+
# Build result object
|
|
459
|
+
return ConcurrencyAnalysisResult(concurrency_distribution=dist_map,
|
|
460
|
+
p50_concurrency=p50_c,
|
|
461
|
+
p90_concurrency=p90_c,
|
|
462
|
+
p95_concurrency=p95_c,
|
|
463
|
+
p99_concurrency=p99_c,
|
|
464
|
+
spike_threshold=concurrency_spike_threshold,
|
|
465
|
+
spike_intervals=spike_intervals,
|
|
466
|
+
correlation_stats=corr_stats,
|
|
467
|
+
textual_report=final_report,
|
|
468
|
+
average_latency_by_concurrency=avg_lat_by_conc)
|