nvidia-nat 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiq/__init__.py +66 -0
- nat/agent/__init__.py +0 -0
- nat/agent/base.py +256 -0
- nat/agent/dual_node.py +67 -0
- nat/agent/react_agent/__init__.py +0 -0
- nat/agent/react_agent/agent.py +363 -0
- nat/agent/react_agent/output_parser.py +104 -0
- nat/agent/react_agent/prompt.py +44 -0
- nat/agent/react_agent/register.py +149 -0
- nat/agent/reasoning_agent/__init__.py +0 -0
- nat/agent/reasoning_agent/reasoning_agent.py +225 -0
- nat/agent/register.py +23 -0
- nat/agent/rewoo_agent/__init__.py +0 -0
- nat/agent/rewoo_agent/agent.py +415 -0
- nat/agent/rewoo_agent/prompt.py +110 -0
- nat/agent/rewoo_agent/register.py +157 -0
- nat/agent/tool_calling_agent/__init__.py +0 -0
- nat/agent/tool_calling_agent/agent.py +119 -0
- nat/agent/tool_calling_agent/register.py +106 -0
- nat/authentication/__init__.py +14 -0
- nat/authentication/api_key/__init__.py +14 -0
- nat/authentication/api_key/api_key_auth_provider.py +96 -0
- nat/authentication/api_key/api_key_auth_provider_config.py +124 -0
- nat/authentication/api_key/register.py +26 -0
- nat/authentication/exceptions/__init__.py +14 -0
- nat/authentication/exceptions/api_key_exceptions.py +38 -0
- nat/authentication/http_basic_auth/__init__.py +0 -0
- nat/authentication/http_basic_auth/http_basic_auth_provider.py +81 -0
- nat/authentication/http_basic_auth/register.py +30 -0
- nat/authentication/interfaces.py +93 -0
- nat/authentication/oauth2/__init__.py +14 -0
- nat/authentication/oauth2/oauth2_auth_code_flow_provider.py +107 -0
- nat/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +39 -0
- nat/authentication/oauth2/register.py +25 -0
- nat/authentication/register.py +21 -0
- nat/builder/__init__.py +0 -0
- nat/builder/builder.py +285 -0
- nat/builder/component_utils.py +316 -0
- nat/builder/context.py +270 -0
- nat/builder/embedder.py +24 -0
- nat/builder/eval_builder.py +161 -0
- nat/builder/evaluator.py +29 -0
- nat/builder/framework_enum.py +24 -0
- nat/builder/front_end.py +73 -0
- nat/builder/function.py +344 -0
- nat/builder/function_base.py +380 -0
- nat/builder/function_info.py +627 -0
- nat/builder/intermediate_step_manager.py +174 -0
- nat/builder/llm.py +25 -0
- nat/builder/retriever.py +25 -0
- nat/builder/user_interaction_manager.py +78 -0
- nat/builder/workflow.py +148 -0
- nat/builder/workflow_builder.py +1117 -0
- nat/cli/__init__.py +14 -0
- nat/cli/cli_utils/__init__.py +0 -0
- nat/cli/cli_utils/config_override.py +231 -0
- nat/cli/cli_utils/validation.py +37 -0
- nat/cli/commands/__init__.py +0 -0
- nat/cli/commands/configure/__init__.py +0 -0
- nat/cli/commands/configure/channel/__init__.py +0 -0
- nat/cli/commands/configure/channel/add.py +28 -0
- nat/cli/commands/configure/channel/channel.py +34 -0
- nat/cli/commands/configure/channel/remove.py +30 -0
- nat/cli/commands/configure/channel/update.py +30 -0
- nat/cli/commands/configure/configure.py +33 -0
- nat/cli/commands/evaluate.py +139 -0
- nat/cli/commands/info/__init__.py +14 -0
- nat/cli/commands/info/info.py +37 -0
- nat/cli/commands/info/list_channels.py +32 -0
- nat/cli/commands/info/list_components.py +129 -0
- nat/cli/commands/info/list_mcp.py +304 -0
- nat/cli/commands/registry/__init__.py +14 -0
- nat/cli/commands/registry/publish.py +88 -0
- nat/cli/commands/registry/pull.py +118 -0
- nat/cli/commands/registry/registry.py +36 -0
- nat/cli/commands/registry/remove.py +108 -0
- nat/cli/commands/registry/search.py +155 -0
- nat/cli/commands/sizing/__init__.py +14 -0
- nat/cli/commands/sizing/calc.py +297 -0
- nat/cli/commands/sizing/sizing.py +27 -0
- nat/cli/commands/start.py +246 -0
- nat/cli/commands/uninstall.py +81 -0
- nat/cli/commands/validate.py +47 -0
- nat/cli/commands/workflow/__init__.py +14 -0
- nat/cli/commands/workflow/templates/__init__.py.j2 +0 -0
- nat/cli/commands/workflow/templates/config.yml.j2 +16 -0
- nat/cli/commands/workflow/templates/pyproject.toml.j2 +22 -0
- nat/cli/commands/workflow/templates/register.py.j2 +5 -0
- nat/cli/commands/workflow/templates/workflow.py.j2 +36 -0
- nat/cli/commands/workflow/workflow.py +37 -0
- nat/cli/commands/workflow/workflow_commands.py +317 -0
- nat/cli/entrypoint.py +135 -0
- nat/cli/main.py +57 -0
- nat/cli/register_workflow.py +488 -0
- nat/cli/type_registry.py +1000 -0
- nat/data_models/__init__.py +14 -0
- nat/data_models/api_server.py +716 -0
- nat/data_models/authentication.py +231 -0
- nat/data_models/common.py +171 -0
- nat/data_models/component.py +58 -0
- nat/data_models/component_ref.py +168 -0
- nat/data_models/config.py +410 -0
- nat/data_models/dataset_handler.py +169 -0
- nat/data_models/discovery_metadata.py +305 -0
- nat/data_models/embedder.py +27 -0
- nat/data_models/evaluate.py +127 -0
- nat/data_models/evaluator.py +26 -0
- nat/data_models/front_end.py +26 -0
- nat/data_models/function.py +30 -0
- nat/data_models/function_dependencies.py +72 -0
- nat/data_models/interactive.py +246 -0
- nat/data_models/intermediate_step.py +302 -0
- nat/data_models/invocation_node.py +38 -0
- nat/data_models/llm.py +27 -0
- nat/data_models/logging.py +26 -0
- nat/data_models/memory.py +27 -0
- nat/data_models/object_store.py +44 -0
- nat/data_models/profiler.py +54 -0
- nat/data_models/registry_handler.py +26 -0
- nat/data_models/retriever.py +30 -0
- nat/data_models/retry_mixin.py +35 -0
- nat/data_models/span.py +190 -0
- nat/data_models/step_adaptor.py +64 -0
- nat/data_models/streaming.py +33 -0
- nat/data_models/swe_bench_model.py +54 -0
- nat/data_models/telemetry_exporter.py +26 -0
- nat/data_models/ttc_strategy.py +30 -0
- nat/embedder/__init__.py +0 -0
- nat/embedder/nim_embedder.py +59 -0
- nat/embedder/openai_embedder.py +43 -0
- nat/embedder/register.py +22 -0
- nat/eval/__init__.py +14 -0
- nat/eval/config.py +60 -0
- nat/eval/dataset_handler/__init__.py +0 -0
- nat/eval/dataset_handler/dataset_downloader.py +106 -0
- nat/eval/dataset_handler/dataset_filter.py +52 -0
- nat/eval/dataset_handler/dataset_handler.py +367 -0
- nat/eval/evaluate.py +510 -0
- nat/eval/evaluator/__init__.py +14 -0
- nat/eval/evaluator/base_evaluator.py +77 -0
- nat/eval/evaluator/evaluator_model.py +45 -0
- nat/eval/intermediate_step_adapter.py +99 -0
- nat/eval/rag_evaluator/__init__.py +0 -0
- nat/eval/rag_evaluator/evaluate.py +178 -0
- nat/eval/rag_evaluator/register.py +143 -0
- nat/eval/register.py +23 -0
- nat/eval/remote_workflow.py +133 -0
- nat/eval/runners/__init__.py +14 -0
- nat/eval/runners/config.py +39 -0
- nat/eval/runners/multi_eval_runner.py +54 -0
- nat/eval/runtime_event_subscriber.py +52 -0
- nat/eval/swe_bench_evaluator/__init__.py +0 -0
- nat/eval/swe_bench_evaluator/evaluate.py +215 -0
- nat/eval/swe_bench_evaluator/register.py +36 -0
- nat/eval/trajectory_evaluator/__init__.py +0 -0
- nat/eval/trajectory_evaluator/evaluate.py +75 -0
- nat/eval/trajectory_evaluator/register.py +40 -0
- nat/eval/tunable_rag_evaluator/__init__.py +0 -0
- nat/eval/tunable_rag_evaluator/evaluate.py +245 -0
- nat/eval/tunable_rag_evaluator/register.py +52 -0
- nat/eval/usage_stats.py +41 -0
- nat/eval/utils/__init__.py +0 -0
- nat/eval/utils/output_uploader.py +140 -0
- nat/eval/utils/tqdm_position_registry.py +40 -0
- nat/eval/utils/weave_eval.py +184 -0
- nat/experimental/__init__.py +0 -0
- nat/experimental/decorators/__init__.py +0 -0
- nat/experimental/decorators/experimental_warning_decorator.py +134 -0
- nat/experimental/test_time_compute/__init__.py +0 -0
- nat/experimental/test_time_compute/editing/__init__.py +0 -0
- nat/experimental/test_time_compute/editing/iterative_plan_refinement_editor.py +147 -0
- nat/experimental/test_time_compute/editing/llm_as_a_judge_editor.py +204 -0
- nat/experimental/test_time_compute/editing/motivation_aware_summarization.py +107 -0
- nat/experimental/test_time_compute/functions/__init__.py +0 -0
- nat/experimental/test_time_compute/functions/execute_score_select_function.py +105 -0
- nat/experimental/test_time_compute/functions/plan_select_execute_function.py +224 -0
- nat/experimental/test_time_compute/functions/ttc_tool_orchestration_function.py +205 -0
- nat/experimental/test_time_compute/functions/ttc_tool_wrapper_function.py +146 -0
- nat/experimental/test_time_compute/models/__init__.py +0 -0
- nat/experimental/test_time_compute/models/editor_config.py +132 -0
- nat/experimental/test_time_compute/models/scoring_config.py +112 -0
- nat/experimental/test_time_compute/models/search_config.py +120 -0
- nat/experimental/test_time_compute/models/selection_config.py +154 -0
- nat/experimental/test_time_compute/models/stage_enums.py +43 -0
- nat/experimental/test_time_compute/models/strategy_base.py +66 -0
- nat/experimental/test_time_compute/models/tool_use_config.py +41 -0
- nat/experimental/test_time_compute/models/ttc_item.py +48 -0
- nat/experimental/test_time_compute/register.py +36 -0
- nat/experimental/test_time_compute/scoring/__init__.py +0 -0
- nat/experimental/test_time_compute/scoring/llm_based_agent_scorer.py +168 -0
- nat/experimental/test_time_compute/scoring/llm_based_plan_scorer.py +168 -0
- nat/experimental/test_time_compute/scoring/motivation_aware_scorer.py +111 -0
- nat/experimental/test_time_compute/search/__init__.py +0 -0
- nat/experimental/test_time_compute/search/multi_llm_planner.py +128 -0
- nat/experimental/test_time_compute/search/multi_query_retrieval_search.py +122 -0
- nat/experimental/test_time_compute/search/single_shot_multi_plan_planner.py +128 -0
- nat/experimental/test_time_compute/selection/__init__.py +0 -0
- nat/experimental/test_time_compute/selection/best_of_n_selector.py +63 -0
- nat/experimental/test_time_compute/selection/llm_based_agent_output_selector.py +131 -0
- nat/experimental/test_time_compute/selection/llm_based_output_merging_selector.py +159 -0
- nat/experimental/test_time_compute/selection/llm_based_plan_selector.py +128 -0
- nat/experimental/test_time_compute/selection/threshold_selector.py +58 -0
- nat/front_ends/__init__.py +14 -0
- nat/front_ends/console/__init__.py +14 -0
- nat/front_ends/console/authentication_flow_handler.py +233 -0
- nat/front_ends/console/console_front_end_config.py +32 -0
- nat/front_ends/console/console_front_end_plugin.py +96 -0
- nat/front_ends/console/register.py +25 -0
- nat/front_ends/cron/__init__.py +14 -0
- nat/front_ends/fastapi/__init__.py +14 -0
- nat/front_ends/fastapi/auth_flow_handlers/__init__.py +0 -0
- nat/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +27 -0
- nat/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +107 -0
- nat/front_ends/fastapi/fastapi_front_end_config.py +241 -0
- nat/front_ends/fastapi/fastapi_front_end_controller.py +68 -0
- nat/front_ends/fastapi/fastapi_front_end_plugin.py +116 -0
- nat/front_ends/fastapi/fastapi_front_end_plugin_worker.py +1087 -0
- nat/front_ends/fastapi/html_snippets/__init__.py +14 -0
- nat/front_ends/fastapi/html_snippets/auth_code_grant_success.py +35 -0
- nat/front_ends/fastapi/intermediate_steps_subscriber.py +80 -0
- nat/front_ends/fastapi/job_store.py +183 -0
- nat/front_ends/fastapi/main.py +72 -0
- nat/front_ends/fastapi/message_handler.py +320 -0
- nat/front_ends/fastapi/message_validator.py +352 -0
- nat/front_ends/fastapi/register.py +25 -0
- nat/front_ends/fastapi/response_helpers.py +195 -0
- nat/front_ends/fastapi/step_adaptor.py +319 -0
- nat/front_ends/mcp/__init__.py +14 -0
- nat/front_ends/mcp/mcp_front_end_config.py +36 -0
- nat/front_ends/mcp/mcp_front_end_plugin.py +81 -0
- nat/front_ends/mcp/mcp_front_end_plugin_worker.py +143 -0
- nat/front_ends/mcp/register.py +27 -0
- nat/front_ends/mcp/tool_converter.py +241 -0
- nat/front_ends/register.py +22 -0
- nat/front_ends/simple_base/__init__.py +14 -0
- nat/front_ends/simple_base/simple_front_end_plugin_base.py +54 -0
- nat/llm/__init__.py +0 -0
- nat/llm/aws_bedrock_llm.py +57 -0
- nat/llm/nim_llm.py +46 -0
- nat/llm/openai_llm.py +46 -0
- nat/llm/register.py +23 -0
- nat/llm/utils/__init__.py +14 -0
- nat/llm/utils/env_config_value.py +94 -0
- nat/llm/utils/error.py +17 -0
- nat/memory/__init__.py +20 -0
- nat/memory/interfaces.py +183 -0
- nat/memory/models.py +112 -0
- nat/meta/pypi.md +58 -0
- nat/object_store/__init__.py +20 -0
- nat/object_store/in_memory_object_store.py +76 -0
- nat/object_store/interfaces.py +84 -0
- nat/object_store/models.py +38 -0
- nat/object_store/register.py +20 -0
- nat/observability/__init__.py +14 -0
- nat/observability/exporter/__init__.py +14 -0
- nat/observability/exporter/base_exporter.py +449 -0
- nat/observability/exporter/exporter.py +78 -0
- nat/observability/exporter/file_exporter.py +33 -0
- nat/observability/exporter/processing_exporter.py +322 -0
- nat/observability/exporter/raw_exporter.py +52 -0
- nat/observability/exporter/span_exporter.py +288 -0
- nat/observability/exporter_manager.py +335 -0
- nat/observability/mixin/__init__.py +14 -0
- nat/observability/mixin/batch_config_mixin.py +26 -0
- nat/observability/mixin/collector_config_mixin.py +23 -0
- nat/observability/mixin/file_mixin.py +288 -0
- nat/observability/mixin/file_mode.py +23 -0
- nat/observability/mixin/resource_conflict_mixin.py +134 -0
- nat/observability/mixin/serialize_mixin.py +61 -0
- nat/observability/mixin/type_introspection_mixin.py +183 -0
- nat/observability/processor/__init__.py +14 -0
- nat/observability/processor/batching_processor.py +310 -0
- nat/observability/processor/callback_processor.py +42 -0
- nat/observability/processor/intermediate_step_serializer.py +28 -0
- nat/observability/processor/processor.py +71 -0
- nat/observability/register.py +96 -0
- nat/observability/utils/__init__.py +14 -0
- nat/observability/utils/dict_utils.py +236 -0
- nat/observability/utils/time_utils.py +31 -0
- nat/plugins/.namespace +1 -0
- nat/profiler/__init__.py +0 -0
- nat/profiler/calc/__init__.py +14 -0
- nat/profiler/calc/calc_runner.py +627 -0
- nat/profiler/calc/calculations.py +288 -0
- nat/profiler/calc/data_models.py +188 -0
- nat/profiler/calc/plot.py +345 -0
- nat/profiler/callbacks/__init__.py +0 -0
- nat/profiler/callbacks/agno_callback_handler.py +295 -0
- nat/profiler/callbacks/base_callback_class.py +20 -0
- nat/profiler/callbacks/langchain_callback_handler.py +290 -0
- nat/profiler/callbacks/llama_index_callback_handler.py +205 -0
- nat/profiler/callbacks/semantic_kernel_callback_handler.py +238 -0
- nat/profiler/callbacks/token_usage_base_model.py +27 -0
- nat/profiler/data_frame_row.py +51 -0
- nat/profiler/data_models.py +24 -0
- nat/profiler/decorators/__init__.py +0 -0
- nat/profiler/decorators/framework_wrapper.py +131 -0
- nat/profiler/decorators/function_tracking.py +254 -0
- nat/profiler/forecasting/__init__.py +0 -0
- nat/profiler/forecasting/config.py +18 -0
- nat/profiler/forecasting/model_trainer.py +75 -0
- nat/profiler/forecasting/models/__init__.py +22 -0
- nat/profiler/forecasting/models/forecasting_base_model.py +40 -0
- nat/profiler/forecasting/models/linear_model.py +197 -0
- nat/profiler/forecasting/models/random_forest_regressor.py +269 -0
- nat/profiler/inference_metrics_model.py +28 -0
- nat/profiler/inference_optimization/__init__.py +0 -0
- nat/profiler/inference_optimization/bottleneck_analysis/__init__.py +0 -0
- nat/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +460 -0
- nat/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py +258 -0
- nat/profiler/inference_optimization/data_models.py +386 -0
- nat/profiler/inference_optimization/experimental/__init__.py +0 -0
- nat/profiler/inference_optimization/experimental/concurrency_spike_analysis.py +468 -0
- nat/profiler/inference_optimization/experimental/prefix_span_analysis.py +405 -0
- nat/profiler/inference_optimization/llm_metrics.py +212 -0
- nat/profiler/inference_optimization/prompt_caching.py +163 -0
- nat/profiler/inference_optimization/token_uniqueness.py +107 -0
- nat/profiler/inference_optimization/workflow_runtimes.py +72 -0
- nat/profiler/intermediate_property_adapter.py +102 -0
- nat/profiler/profile_runner.py +473 -0
- nat/profiler/utils.py +184 -0
- nat/registry_handlers/__init__.py +0 -0
- nat/registry_handlers/local/__init__.py +0 -0
- nat/registry_handlers/local/local_handler.py +176 -0
- nat/registry_handlers/local/register_local.py +37 -0
- nat/registry_handlers/metadata_factory.py +60 -0
- nat/registry_handlers/package_utils.py +571 -0
- nat/registry_handlers/pypi/__init__.py +0 -0
- nat/registry_handlers/pypi/pypi_handler.py +251 -0
- nat/registry_handlers/pypi/register_pypi.py +40 -0
- nat/registry_handlers/register.py +21 -0
- nat/registry_handlers/registry_handler_base.py +157 -0
- nat/registry_handlers/rest/__init__.py +0 -0
- nat/registry_handlers/rest/register_rest.py +56 -0
- nat/registry_handlers/rest/rest_handler.py +237 -0
- nat/registry_handlers/schemas/__init__.py +0 -0
- nat/registry_handlers/schemas/headers.py +42 -0
- nat/registry_handlers/schemas/package.py +68 -0
- nat/registry_handlers/schemas/publish.py +68 -0
- nat/registry_handlers/schemas/pull.py +82 -0
- nat/registry_handlers/schemas/remove.py +36 -0
- nat/registry_handlers/schemas/search.py +91 -0
- nat/registry_handlers/schemas/status.py +47 -0
- nat/retriever/__init__.py +0 -0
- nat/retriever/interface.py +41 -0
- nat/retriever/milvus/__init__.py +14 -0
- nat/retriever/milvus/register.py +81 -0
- nat/retriever/milvus/retriever.py +228 -0
- nat/retriever/models.py +77 -0
- nat/retriever/nemo_retriever/__init__.py +14 -0
- nat/retriever/nemo_retriever/register.py +60 -0
- nat/retriever/nemo_retriever/retriever.py +190 -0
- nat/retriever/register.py +22 -0
- nat/runtime/__init__.py +14 -0
- nat/runtime/loader.py +220 -0
- nat/runtime/runner.py +195 -0
- nat/runtime/session.py +162 -0
- nat/runtime/user_metadata.py +130 -0
- nat/settings/__init__.py +0 -0
- nat/settings/global_settings.py +318 -0
- nat/test/.namespace +1 -0
- nat/tool/__init__.py +0 -0
- nat/tool/chat_completion.py +74 -0
- nat/tool/code_execution/README.md +151 -0
- nat/tool/code_execution/__init__.py +0 -0
- nat/tool/code_execution/code_sandbox.py +267 -0
- nat/tool/code_execution/local_sandbox/.gitignore +1 -0
- nat/tool/code_execution/local_sandbox/Dockerfile.sandbox +60 -0
- nat/tool/code_execution/local_sandbox/__init__.py +13 -0
- nat/tool/code_execution/local_sandbox/local_sandbox_server.py +198 -0
- nat/tool/code_execution/local_sandbox/sandbox.requirements.txt +6 -0
- nat/tool/code_execution/local_sandbox/start_local_sandbox.sh +50 -0
- nat/tool/code_execution/register.py +74 -0
- nat/tool/code_execution/test_code_execution_sandbox.py +414 -0
- nat/tool/code_execution/utils.py +100 -0
- nat/tool/datetime_tools.py +42 -0
- nat/tool/document_search.py +141 -0
- nat/tool/github_tools/__init__.py +0 -0
- nat/tool/github_tools/create_github_commit.py +133 -0
- nat/tool/github_tools/create_github_issue.py +87 -0
- nat/tool/github_tools/create_github_pr.py +106 -0
- nat/tool/github_tools/get_github_file.py +106 -0
- nat/tool/github_tools/get_github_issue.py +166 -0
- nat/tool/github_tools/get_github_pr.py +256 -0
- nat/tool/github_tools/update_github_issue.py +100 -0
- nat/tool/mcp/__init__.py +14 -0
- nat/tool/mcp/exceptions.py +142 -0
- nat/tool/mcp/mcp_client.py +255 -0
- nat/tool/mcp/mcp_tool.py +96 -0
- nat/tool/memory_tools/__init__.py +0 -0
- nat/tool/memory_tools/add_memory_tool.py +79 -0
- nat/tool/memory_tools/delete_memory_tool.py +67 -0
- nat/tool/memory_tools/get_memory_tool.py +72 -0
- nat/tool/nvidia_rag.py +95 -0
- nat/tool/register.py +38 -0
- nat/tool/retriever.py +94 -0
- nat/tool/server_tools.py +66 -0
- nat/utils/__init__.py +0 -0
- nat/utils/data_models/__init__.py +0 -0
- nat/utils/data_models/schema_validator.py +58 -0
- nat/utils/debugging_utils.py +43 -0
- nat/utils/dump_distro_mapping.py +32 -0
- nat/utils/exception_handlers/__init__.py +0 -0
- nat/utils/exception_handlers/automatic_retries.py +289 -0
- nat/utils/exception_handlers/mcp.py +211 -0
- nat/utils/exception_handlers/schemas.py +114 -0
- nat/utils/io/__init__.py +0 -0
- nat/utils/io/model_processing.py +28 -0
- nat/utils/io/yaml_tools.py +119 -0
- nat/utils/log_utils.py +37 -0
- nat/utils/metadata_utils.py +74 -0
- nat/utils/optional_imports.py +142 -0
- nat/utils/producer_consumer_queue.py +178 -0
- nat/utils/reactive/__init__.py +0 -0
- nat/utils/reactive/base/__init__.py +0 -0
- nat/utils/reactive/base/observable_base.py +65 -0
- nat/utils/reactive/base/observer_base.py +55 -0
- nat/utils/reactive/base/subject_base.py +79 -0
- nat/utils/reactive/observable.py +59 -0
- nat/utils/reactive/observer.py +76 -0
- nat/utils/reactive/subject.py +131 -0
- nat/utils/reactive/subscription.py +49 -0
- nat/utils/settings/__init__.py +0 -0
- nat/utils/settings/global_settings.py +197 -0
- nat/utils/string_utils.py +38 -0
- nat/utils/type_converter.py +290 -0
- nat/utils/type_utils.py +484 -0
- nat/utils/url_utils.py +27 -0
- nvidia_nat-1.2.0.dist-info/METADATA +365 -0
- nvidia_nat-1.2.0.dist-info/RECORD +435 -0
- nvidia_nat-1.2.0.dist-info/WHEEL +5 -0
- nvidia_nat-1.2.0.dist-info/entry_points.txt +21 -0
- nvidia_nat-1.2.0.dist-info/licenses/LICENSE-3rd-party.txt +5478 -0
- nvidia_nat-1.2.0.dist-info/licenses/LICENSE.md +201 -0
- nvidia_nat-1.2.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
"""
|
|
16
|
+
Advanced bottleneck analysis for a dataframe that contains:
|
|
17
|
+
- event_type in {LLM_START, LLM_END, TOOL_START, TOOL_END, ...}
|
|
18
|
+
- llm_name
|
|
19
|
+
- tool_name
|
|
20
|
+
- UUID
|
|
21
|
+
- event_timestamp (float or datetime)
|
|
22
|
+
- other metadata...
|
|
23
|
+
|
|
24
|
+
We pair start/end events by UUID, compute operation durations,
|
|
25
|
+
then analyze concurrency and produce a summary report.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
import numpy as np
|
|
29
|
+
import pandas as pd
|
|
30
|
+
|
|
31
|
+
from nat.data_models.intermediate_step import IntermediateStep
|
|
32
|
+
from nat.profiler.inference_optimization.data_models import SimpleBottleneckReport
|
|
33
|
+
from nat.profiler.inference_optimization.data_models import SimpleOperationStats
|
|
34
|
+
from nat.profiler.utils import create_standardized_dataframe
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# ----------------------------------------------------------------------
|
|
38
|
+
# Main Function
|
|
39
|
+
# ----------------------------------------------------------------------
|
|
40
|
+
def profile_workflow_bottlenecks(all_steps: list[list[IntermediateStep]]) -> SimpleBottleneckReport:
|
|
41
|
+
"""
|
|
42
|
+
Perform advanced bottleneck profiling on a workflow dataframe.
|
|
43
|
+
|
|
44
|
+
1) Pair LLM_START/LLM_END and TOOL_START/TOOL_END by UUID.
|
|
45
|
+
2) Compute operation durations.
|
|
46
|
+
3) Analyze concurrency (max concurrent usage).
|
|
47
|
+
4) Summarize as SimpleOperationStats and produce a final SimpleBottleneckReport.
|
|
48
|
+
|
|
49
|
+
Parameters
|
|
50
|
+
----------
|
|
51
|
+
all_steps : Intermediate Steps
|
|
52
|
+
|
|
53
|
+
Returns
|
|
54
|
+
-------
|
|
55
|
+
SimpleBottleneckReport
|
|
56
|
+
Contains detailed stats per operation and a textual summary of top bottlenecks.
|
|
57
|
+
"""
|
|
58
|
+
df = create_standardized_dataframe(all_steps)
|
|
59
|
+
# -------------------------------------------------------------
|
|
60
|
+
# 1) Separate events by operation type and match start/end
|
|
61
|
+
# -------------------------------------------------------------
|
|
62
|
+
required_cols = {"event_type", "UUID", "event_timestamp"}
|
|
63
|
+
missing_cols = required_cols - set(df.columns)
|
|
64
|
+
if missing_cols:
|
|
65
|
+
raise ValueError(f"DataFrame missing required columns: {missing_cols}")
|
|
66
|
+
|
|
67
|
+
# We'll unify LLM and TOOL operations into a single set, with:
|
|
68
|
+
# operation_type = 'LLM' or 'TOOL'
|
|
69
|
+
# operation_name = llm_name/tool_name
|
|
70
|
+
# start_time
|
|
71
|
+
# end_time
|
|
72
|
+
# duration = end_time - start_time
|
|
73
|
+
# We'll store them in a list of dicts, then convert to DataFrame.
|
|
74
|
+
operations_records = []
|
|
75
|
+
|
|
76
|
+
# We'll create a copy to avoid mutating user data
|
|
77
|
+
dfc = df.copy()
|
|
78
|
+
|
|
79
|
+
# We define a small helper to map event_type -> (operation_type, which_name_field)
|
|
80
|
+
def get_operation_info(event_type: str) -> str | None:
|
|
81
|
+
"""
|
|
82
|
+
Return 'LLM' if event_type starts with 'LLM_', 'TOOL' if event_type starts with 'TOOL_',
|
|
83
|
+
else None (unknown).
|
|
84
|
+
"""
|
|
85
|
+
if event_type.startswith("LLM_"):
|
|
86
|
+
return "LLM"
|
|
87
|
+
if event_type.startswith("TOOL_"):
|
|
88
|
+
return "TOOL"
|
|
89
|
+
return None
|
|
90
|
+
|
|
91
|
+
# Group by UUID so we can pair each START with the corresponding END
|
|
92
|
+
grouped = dfc.groupby("UUID", as_index=False, group_keys=True)
|
|
93
|
+
|
|
94
|
+
for uuid_val, group_df in grouped:
|
|
95
|
+
if len(group_df) < 2:
|
|
96
|
+
# Possibly incomplete or single event, skip
|
|
97
|
+
continue
|
|
98
|
+
|
|
99
|
+
# We might have multiple events with the same UUID, but typically we expect:
|
|
100
|
+
# LLM_START, LLM_END (or TOOL_START, TOOL_END).
|
|
101
|
+
# Sort by timestamp
|
|
102
|
+
group_df = group_df.sort_values("event_timestamp")
|
|
103
|
+
|
|
104
|
+
# Identify operation_type from the first row's event_type
|
|
105
|
+
first_event_type = group_df["event_type"].iloc[0]
|
|
106
|
+
operation_type = get_operation_info(first_event_type)
|
|
107
|
+
if not operation_type:
|
|
108
|
+
# unknown or not LLM_/TOOL_
|
|
109
|
+
continue
|
|
110
|
+
|
|
111
|
+
# We'll attempt to find the start row and the end row
|
|
112
|
+
# Usually there's exactly 1 start, 1 end
|
|
113
|
+
start_rows = group_df[group_df["event_type"] == f"{operation_type}_START"]
|
|
114
|
+
end_rows = group_df[group_df["event_type"] == f"{operation_type}_END"]
|
|
115
|
+
|
|
116
|
+
if len(start_rows) == 0 or len(end_rows) == 0:
|
|
117
|
+
# No matching start/end
|
|
118
|
+
continue
|
|
119
|
+
|
|
120
|
+
# We'll just take the earliest start and the latest end for the entire group.
|
|
121
|
+
start_time = start_rows["event_timestamp"].min()
|
|
122
|
+
end_time = end_rows["event_timestamp"].max()
|
|
123
|
+
duration = end_time - start_time
|
|
124
|
+
|
|
125
|
+
# For the name, we pick 'llm_name' or 'tool_name' depending on operation_type
|
|
126
|
+
if operation_type == "LLM":
|
|
127
|
+
# Among the rows, pick a non-null llm_name if present
|
|
128
|
+
op_names = group_df["llm_name"].dropna().unique()
|
|
129
|
+
# fallback to a default if none
|
|
130
|
+
operation_name = op_names[0] if len(op_names) else "unknown_llm"
|
|
131
|
+
else:
|
|
132
|
+
op_names = group_df["tool_name"].dropna().unique()
|
|
133
|
+
operation_name = op_names[0] if len(op_names) else "unknown_tool"
|
|
134
|
+
|
|
135
|
+
operations_records.append({
|
|
136
|
+
"operation_type": operation_type,
|
|
137
|
+
"operation_name": operation_name,
|
|
138
|
+
"start_time": start_time,
|
|
139
|
+
"end_time": end_time,
|
|
140
|
+
"duration": duration,
|
|
141
|
+
"UUID": uuid_val
|
|
142
|
+
})
|
|
143
|
+
|
|
144
|
+
if not operations_records:
|
|
145
|
+
# No valid operations found
|
|
146
|
+
return SimpleBottleneckReport(stats={}, summary="No operations found to profile.")
|
|
147
|
+
|
|
148
|
+
operations_df = pd.DataFrame(operations_records)
|
|
149
|
+
|
|
150
|
+
# -------------------------------------------------------------
|
|
151
|
+
# 2) Concurrency Analysis
|
|
152
|
+
# -------------------------------------------------------------
|
|
153
|
+
# We want to find the maximum concurrency for each operation_name.
|
|
154
|
+
# We'll do a timeline-based approach: for each operation we have a start_time, end_time
|
|
155
|
+
# We'll create +1 event at start_time, -1 event at end_time, then do a running sum.
|
|
156
|
+
# Then we can measure concurrency across the entire timeline. However, we want concurrency
|
|
157
|
+
# specifically *by operation_name* as well as overall.
|
|
158
|
+
#
|
|
159
|
+
# We'll do it in two passes:
|
|
160
|
+
# A) Overall concurrency ignoring operation_name
|
|
161
|
+
# B) concurrency per (operation_type, operation_name)
|
|
162
|
+
# Then we can combine them for a "peak concurrency" measure.
|
|
163
|
+
|
|
164
|
+
# A) Overall concurrency (not always essential, but might be interesting)
|
|
165
|
+
timeline_events = []
|
|
166
|
+
for row in operations_df.itertuples(index=False):
|
|
167
|
+
timeline_events.append((row.start_time, +1))
|
|
168
|
+
timeline_events.append((row.end_time, -1))
|
|
169
|
+
|
|
170
|
+
timeline_events.sort(key=lambda x: x[0]) # sort by time
|
|
171
|
+
current_concurrency = 0
|
|
172
|
+
concurrency_trace = []
|
|
173
|
+
for ts, delta in timeline_events:
|
|
174
|
+
current_concurrency += delta
|
|
175
|
+
concurrency_trace.append((ts, current_concurrency))
|
|
176
|
+
overall_max_concurrency = max(c[1] for c in concurrency_trace) if concurrency_trace else 0
|
|
177
|
+
|
|
178
|
+
# B) concurrency by operation_name
|
|
179
|
+
# We'll generate timeline events per operation_name
|
|
180
|
+
# Then compute the max concurrency for that subset
|
|
181
|
+
operation_names = operations_df["operation_name"].unique()
|
|
182
|
+
max_concurrency_by_name = {}
|
|
183
|
+
|
|
184
|
+
for op_name in operation_names:
|
|
185
|
+
sub = operations_df[operations_df["operation_name"] == op_name]
|
|
186
|
+
events_sub = []
|
|
187
|
+
for row in sub.itertuples(index=False):
|
|
188
|
+
events_sub.append((row.start_time, +1))
|
|
189
|
+
events_sub.append((row.end_time, -1))
|
|
190
|
+
if not events_sub:
|
|
191
|
+
max_concurrency_by_name[op_name] = 0
|
|
192
|
+
continue
|
|
193
|
+
events_sub.sort(key=lambda x: x[0])
|
|
194
|
+
c_curr = 0
|
|
195
|
+
c_max = 0
|
|
196
|
+
for ts, delta in events_sub:
|
|
197
|
+
c_curr += delta
|
|
198
|
+
if c_curr > c_max: # pylint: disable=consider-using-max-builtin
|
|
199
|
+
c_max = c_curr
|
|
200
|
+
max_concurrency_by_name[op_name] = c_max
|
|
201
|
+
|
|
202
|
+
# -------------------------------------------------------------
|
|
203
|
+
# 3) Compute summary stats per (operation_type, operation_name)
|
|
204
|
+
# -------------------------------------------------------------
|
|
205
|
+
# We'll gather durations in a list, compute average, p95, p99, etc.
|
|
206
|
+
|
|
207
|
+
stats_dict = {}
|
|
208
|
+
grouped_ops = operations_df.groupby(["operation_type", "operation_name"])
|
|
209
|
+
for (op_type, op_name), grp in grouped_ops:
|
|
210
|
+
durations = grp["duration"].values
|
|
211
|
+
usage_count = len(durations)
|
|
212
|
+
avg_duration = durations.mean()
|
|
213
|
+
p95_duration = np.percentile(durations, 95)
|
|
214
|
+
p99_duration = np.percentile(durations, 99)
|
|
215
|
+
|
|
216
|
+
# concurrency
|
|
217
|
+
max_concur = max_concurrency_by_name.get(op_name, 0)
|
|
218
|
+
|
|
219
|
+
# define a custom "bottleneck_score":
|
|
220
|
+
# We say score = avg_duration * max_concurrency,
|
|
221
|
+
bottleneck_score = float(avg_duration * max_concur)
|
|
222
|
+
|
|
223
|
+
# store in dictionary
|
|
224
|
+
key = f"{op_type}:{op_name}"
|
|
225
|
+
stats_dict[key] = SimpleOperationStats(operation_type=op_type,
|
|
226
|
+
operation_name=op_name,
|
|
227
|
+
usage_count=usage_count,
|
|
228
|
+
avg_duration=float(avg_duration),
|
|
229
|
+
p95_duration=float(p95_duration),
|
|
230
|
+
p99_duration=float(p99_duration),
|
|
231
|
+
max_concurrency=int(max_concur),
|
|
232
|
+
bottleneck_score=bottleneck_score)
|
|
233
|
+
|
|
234
|
+
# -------------------------------------------------------------
|
|
235
|
+
# 4) Produce a textual summary highlighting top bottlenecks
|
|
236
|
+
# -------------------------------------------------------------
|
|
237
|
+
# We'll rank by bottleneck_score descending and show top 3.
|
|
238
|
+
if not stats_dict:
|
|
239
|
+
return SimpleBottleneckReport(stats={}, summary="No stats to report.")
|
|
240
|
+
|
|
241
|
+
top_items = sorted(stats_dict.values(), key=lambda x: x.bottleneck_score, reverse=True)
|
|
242
|
+
top_3 = top_items[:3]
|
|
243
|
+
|
|
244
|
+
# Build a simple textual summary
|
|
245
|
+
lines = []
|
|
246
|
+
lines.append("---- BOTTLENECK REPORT ----")
|
|
247
|
+
lines.append(f"Total distinct operations found: {len(stats_dict)}")
|
|
248
|
+
lines.append(f"Overall max concurrency (all ops): {overall_max_concurrency}")
|
|
249
|
+
lines.append("Top 3 Bottlenecks by bottleneck_score (avg_duration * max_concurrency):")
|
|
250
|
+
for i, item in enumerate(top_3, start=1):
|
|
251
|
+
lines.append(f"{i}) {item.operation_type} '{item.operation_name}': "
|
|
252
|
+
f"score={item.bottleneck_score:.2f}, "
|
|
253
|
+
f"avg_dur={item.avg_duration:.2f}, "
|
|
254
|
+
f"max_concurrency={item.max_concurrency}")
|
|
255
|
+
summary_report = "\n".join(lines)
|
|
256
|
+
|
|
257
|
+
# Construct a final Pydantic model
|
|
258
|
+
return SimpleBottleneckReport(stats=stats_dict, summary=summary_report)
|
|
@@ -0,0 +1,386 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
from pydantic import BaseModel
|
|
19
|
+
from pydantic import ConfigDict
|
|
20
|
+
from pydantic import Field
|
|
21
|
+
from pydantic import RootModel
|
|
22
|
+
|
|
23
|
+
# -----------------------------------------------------------
|
|
24
|
+
# Prompt Caching Data Models
|
|
25
|
+
# -----------------------------------------------------------
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class PrefixInfo(BaseModel):
|
|
29
|
+
"""
|
|
30
|
+
Stores metadata about a particular prefix observed in the LLM text input.
|
|
31
|
+
"""
|
|
32
|
+
prefix: str
|
|
33
|
+
prefix_length: int
|
|
34
|
+
calls_count: int
|
|
35
|
+
calls_percentage: float = Field(..., ge=0.0, le=1.0)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class FrameworkLLMPrefixData(BaseModel):
|
|
39
|
+
"""
|
|
40
|
+
Metadata for a single (framework, llm_name) group,
|
|
41
|
+
including total calls and all prefix statistics.
|
|
42
|
+
"""
|
|
43
|
+
total_calls: int
|
|
44
|
+
prefix_info: list[PrefixInfo]
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class CommonPrefixesOutput(RootModel[dict[str, FrameworkLLMPrefixData]]):
|
|
48
|
+
"""
|
|
49
|
+
A root model storing a dictionary keyed by '<framework>-<llm>',
|
|
50
|
+
each value is a FrameworkLLMPrefixData instance.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
def to_dict(self) -> dict[str, FrameworkLLMPrefixData]:
|
|
54
|
+
"""
|
|
55
|
+
Return the raw dictionary of data, discarding the 'root' wrapper.
|
|
56
|
+
"""
|
|
57
|
+
return self.root
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# ----------------------------------------------------------------
|
|
61
|
+
# Token Uniqueness Models
|
|
62
|
+
# ----------------------------------------------------------------
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class LLMUniquenessMetrics(BaseModel):
|
|
66
|
+
"""
|
|
67
|
+
Stores p90, p95, and p99 for the 'new words' metric.
|
|
68
|
+
"""
|
|
69
|
+
p90: float
|
|
70
|
+
p95: float
|
|
71
|
+
p99: float
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class LLMUniquenessMetricsByLLM(RootModel[dict[str, LLMUniquenessMetrics]]):
|
|
75
|
+
"""
|
|
76
|
+
A RootModel containing a dictionary where each key is an LLM name
|
|
77
|
+
and each value is the LLMUniquenessMetrics for that LLM.
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
def to_dict(self) -> dict[str, Any]:
|
|
81
|
+
# Return the raw dictionary for convenience
|
|
82
|
+
return self.root
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
# ----------------------------------------------------------------
|
|
86
|
+
# Workflow Runtime Models
|
|
87
|
+
# ----------------------------------------------------------------
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class WorkflowRuntimeMetrics(BaseModel):
|
|
91
|
+
"""
|
|
92
|
+
Stores p90, p95, and p99 for workflow runtimes across all examples.
|
|
93
|
+
"""
|
|
94
|
+
p90: float
|
|
95
|
+
p95: float
|
|
96
|
+
p99: float
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
# ----------------------------------------------------------------------
|
|
100
|
+
# Simple Bottleneck Detection Models
|
|
101
|
+
# ----------------------------------------------------------------------
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class SimpleOperationStats(BaseModel):
|
|
105
|
+
"""
|
|
106
|
+
Statistics for a particular operation name (LLM or tool),
|
|
107
|
+
capturing concurrency, duration, usage, etc.
|
|
108
|
+
"""
|
|
109
|
+
operation_type: str # 'LLM' or 'TOOL'
|
|
110
|
+
operation_name: str # e.g., "llama-3" or "serpapi"
|
|
111
|
+
usage_count: int # how many times it appears
|
|
112
|
+
avg_duration: float # average duration
|
|
113
|
+
p95_duration: float
|
|
114
|
+
p99_duration: float
|
|
115
|
+
max_concurrency: int # maximum number of concurrent operations
|
|
116
|
+
bottleneck_score: float = Field(..., description="Custom metric to rank bottlenecks.")
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class SimpleBottleneckReport(BaseModel):
|
|
120
|
+
"""
|
|
121
|
+
A container for all operation stats keyed by 'operation_type:operation_name',
|
|
122
|
+
plus a textual summary that highlights top bottlenecks.
|
|
123
|
+
"""
|
|
124
|
+
stats: dict[str, SimpleOperationStats]
|
|
125
|
+
summary: str
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
# ----------------------------------------------------------------------
|
|
129
|
+
# Nested Bottleneck Models
|
|
130
|
+
# ----------------------------------------------------------------------
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class CallNode(BaseModel):
|
|
134
|
+
"""
|
|
135
|
+
A single call (LLM or TOOL) in a nested call tree.
|
|
136
|
+
|
|
137
|
+
Attributes
|
|
138
|
+
----------
|
|
139
|
+
uuid: str
|
|
140
|
+
Unique ID tying together START/END events.
|
|
141
|
+
operation_type: str
|
|
142
|
+
e.g. 'LLM' or 'TOOL'.
|
|
143
|
+
operation_name: str
|
|
144
|
+
e.g. 'llama-3', 'bing-search', ...
|
|
145
|
+
start_time: float
|
|
146
|
+
Time when the call started.
|
|
147
|
+
end_time: float
|
|
148
|
+
Time when the call ended.
|
|
149
|
+
duration: float
|
|
150
|
+
end_time - start_time
|
|
151
|
+
children: list["CallNode"]
|
|
152
|
+
List of nested calls inside this call's time window.
|
|
153
|
+
parent: "CallNode" | None
|
|
154
|
+
Reference to the parent call in the tree (None if top-level).
|
|
155
|
+
"""
|
|
156
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
157
|
+
|
|
158
|
+
uuid: str
|
|
159
|
+
operation_type: str
|
|
160
|
+
operation_name: str
|
|
161
|
+
start_time: float
|
|
162
|
+
end_time: float
|
|
163
|
+
duration: float = Field(..., description="end_time - start_time")
|
|
164
|
+
children: list["CallNode"] = Field(default_factory=list)
|
|
165
|
+
parent: "CallNode | None" = None
|
|
166
|
+
|
|
167
|
+
def compute_self_time(self) -> float:
|
|
168
|
+
"""
|
|
169
|
+
'Self time' = duration minus the union of child intervals.
|
|
170
|
+
Overlapping child intervals are merged so we don't double-count them.
|
|
171
|
+
"""
|
|
172
|
+
if not self.children:
|
|
173
|
+
return self.duration
|
|
174
|
+
|
|
175
|
+
intervals = [(c.start_time, c.end_time) for c in self.children] # pylint: disable=not-an-iterable
|
|
176
|
+
# Sort by start time
|
|
177
|
+
intervals.sort(key=lambda x: x[0])
|
|
178
|
+
|
|
179
|
+
merged = []
|
|
180
|
+
cur_start, cur_end = intervals[0]
|
|
181
|
+
for i in range(1, len(intervals)):
|
|
182
|
+
s, e = intervals[i]
|
|
183
|
+
if s <= cur_end:
|
|
184
|
+
# Overlap
|
|
185
|
+
cur_end = max(cur_end, e)
|
|
186
|
+
else:
|
|
187
|
+
merged.append((cur_start, cur_end))
|
|
188
|
+
cur_start, cur_end = s, e
|
|
189
|
+
merged.append((cur_start, cur_end))
|
|
190
|
+
|
|
191
|
+
# Sum coverage, clamped to [start_time, end_time]
|
|
192
|
+
covered = 0.0
|
|
193
|
+
for (s, e) in merged:
|
|
194
|
+
s_clamped = max(s, self.start_time)
|
|
195
|
+
e_clamped = min(e, self.end_time)
|
|
196
|
+
if e_clamped > s_clamped:
|
|
197
|
+
covered += (e_clamped - s_clamped)
|
|
198
|
+
|
|
199
|
+
return max(0.0, self.duration - covered)
|
|
200
|
+
|
|
201
|
+
def compute_subtree_time(self) -> float:
|
|
202
|
+
"""
|
|
203
|
+
Recursively compute the sum of self_time + children's subtree_time.
|
|
204
|
+
This ensures no overlap double-counting among children.
|
|
205
|
+
"""
|
|
206
|
+
total = self.compute_self_time()
|
|
207
|
+
for c in self.children: # pylint: disable=not-an-iterable
|
|
208
|
+
total += c.compute_subtree_time()
|
|
209
|
+
return total
|
|
210
|
+
|
|
211
|
+
def __str__(self) -> str:
|
|
212
|
+
return self._repr(0)
|
|
213
|
+
|
|
214
|
+
def _repr(self, level: int) -> str:
|
|
215
|
+
indent = " " * level
|
|
216
|
+
info = (f"{indent}- {self.operation_type} '{self.operation_name}' "
|
|
217
|
+
f"(uuid={self.uuid}, start={self.start_time:.2f}, "
|
|
218
|
+
f"end={self.end_time:.2f}, dur={self.duration:.2f})")
|
|
219
|
+
child_strs = [child._repr(level + 1) for child in self.children] # pylint: disable=not-an-iterable
|
|
220
|
+
return "\n".join([info] + child_strs)
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
CallNode.model_rebuild()
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
class NodeMetrics(BaseModel):
|
|
227
|
+
"""
|
|
228
|
+
Metrics for a single node:
|
|
229
|
+
- self_time
|
|
230
|
+
- subtree_time
|
|
231
|
+
- concurrency_midpoint (optional)
|
|
232
|
+
- bottleneck_score (example: subtree_time)
|
|
233
|
+
"""
|
|
234
|
+
uuid: str
|
|
235
|
+
operation_type: str
|
|
236
|
+
operation_name: str
|
|
237
|
+
start_time: float
|
|
238
|
+
end_time: float
|
|
239
|
+
duration: float
|
|
240
|
+
self_time: float
|
|
241
|
+
subtree_time: float
|
|
242
|
+
concurrency_midpoint: float | None = None
|
|
243
|
+
bottleneck_score: float
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
class ConcurrencyDistribution(BaseModel):
|
|
247
|
+
"""
|
|
248
|
+
Overall concurrency distribution info:
|
|
249
|
+
- timeline_segments: List of (start, end, concurrency)
|
|
250
|
+
- p50, p90, p95, p99 concurrency
|
|
251
|
+
"""
|
|
252
|
+
timeline_segments: list[tuple[float, float, int]]
|
|
253
|
+
p50: float
|
|
254
|
+
p90: float
|
|
255
|
+
p95: float
|
|
256
|
+
p99: float
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
class NestedCallProfilingResult(BaseModel):
|
|
260
|
+
"""
|
|
261
|
+
The final Pydantic model returned by 'multi_example_call_profiling'.
|
|
262
|
+
|
|
263
|
+
Contains:
|
|
264
|
+
- concurrency: ConcurrencyDistribution
|
|
265
|
+
- node_metrics: dict[uuid, NodeMetrics]
|
|
266
|
+
- top_bottlenecks: The top calls by bottleneck_score
|
|
267
|
+
- textual_report: A multiline string summarizing everything
|
|
268
|
+
"""
|
|
269
|
+
concurrency: ConcurrencyDistribution
|
|
270
|
+
node_metrics: dict[str, NodeMetrics]
|
|
271
|
+
top_bottlenecks: list[NodeMetrics]
|
|
272
|
+
textual_report: str
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
# ----------------------------------------------------------------------
|
|
276
|
+
# Concurrency Spike Analysis Models
|
|
277
|
+
# ----------------------------------------------------------------------
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
class ConcurrencyCallNode(CallNode):
|
|
281
|
+
"""
|
|
282
|
+
A single call in the nested call tree for one example.
|
|
283
|
+
Each call is matched by a UUID with a `*_START` and `*_END` event.
|
|
284
|
+
|
|
285
|
+
Because fields like prompt_tokens, completion_tokens, total_tokens
|
|
286
|
+
may only exist at the END event, we store them only after seeing `*_END`".
|
|
287
|
+
"""
|
|
288
|
+
|
|
289
|
+
example_number: int
|
|
290
|
+
|
|
291
|
+
# Additional fields from END events
|
|
292
|
+
prompt_tokens: int | None = None
|
|
293
|
+
completion_tokens: int | None = None
|
|
294
|
+
total_tokens: int | None = None
|
|
295
|
+
tool_outputs: str | None = None
|
|
296
|
+
llm_text_output: str | None = None
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
ConcurrencyCallNode.model_rebuild()
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
class ConcurrencySpikeInfo(BaseModel):
|
|
303
|
+
"""
|
|
304
|
+
Info about one concurrency spike interval:
|
|
305
|
+
- start, end of the spike
|
|
306
|
+
- concurrency level
|
|
307
|
+
- list of calls that overlap
|
|
308
|
+
"""
|
|
309
|
+
start_time: float
|
|
310
|
+
end_time: float
|
|
311
|
+
concurrency: int
|
|
312
|
+
active_uuids: list[str] = Field(default_factory=list)
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
class ConcurrencyCorrelationStats(BaseModel):
|
|
316
|
+
"""
|
|
317
|
+
Simple container for correlation / summarized stats of calls overlapping concurrency spikes.
|
|
318
|
+
"""
|
|
319
|
+
avg_prompt_tokens: float
|
|
320
|
+
avg_total_tokens: float
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
class ConcurrencyAnalysisResult(BaseModel):
|
|
324
|
+
"""
|
|
325
|
+
The final Pydantic model returned by concurrency_spike_analysis(...).
|
|
326
|
+
Contains:
|
|
327
|
+
- concurrency_distribution: concurrency_level => total_time
|
|
328
|
+
- p50_concurrency, p90_concurrency, p95_concurrency, p99_concurrency
|
|
329
|
+
- spike_threshold, spike_intervals
|
|
330
|
+
- correlation_stats
|
|
331
|
+
- textual_report
|
|
332
|
+
"""
|
|
333
|
+
concurrency_distribution: dict[int, float]
|
|
334
|
+
p50_concurrency: float
|
|
335
|
+
p90_concurrency: float
|
|
336
|
+
p95_concurrency: float
|
|
337
|
+
p99_concurrency: float
|
|
338
|
+
|
|
339
|
+
spike_threshold: int
|
|
340
|
+
spike_intervals: list[ConcurrencySpikeInfo]
|
|
341
|
+
correlation_stats: ConcurrencyCorrelationStats
|
|
342
|
+
|
|
343
|
+
average_latency_by_concurrency: dict[int, float]
|
|
344
|
+
|
|
345
|
+
textual_report: str
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
# ----------------------------------------------------------------------
|
|
349
|
+
# PrefixSpan Analysis Models
|
|
350
|
+
# ----------------------------------------------------------------------
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
class PrefixCallNode(BaseModel):
|
|
354
|
+
"""
|
|
355
|
+
Represents a single call in an example's workflow.
|
|
356
|
+
- For LLM calls, we also store llm_text_input if available so we can incorporate it into the token.
|
|
357
|
+
"""
|
|
358
|
+
uuid: str
|
|
359
|
+
example_number: int
|
|
360
|
+
operation_type: str # "LLM" or "TOOL"
|
|
361
|
+
operation_name: str # e.g. "llama-3", "internet-search"
|
|
362
|
+
start_time: float
|
|
363
|
+
end_time: float
|
|
364
|
+
duration: float
|
|
365
|
+
llm_text_input: str | None = None
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
class FrequentPattern(BaseModel):
|
|
369
|
+
"""
|
|
370
|
+
Frequent sub-sequence discovered by PrefixSpan, with coverage and average duration data.
|
|
371
|
+
"""
|
|
372
|
+
pattern: list[str] # e.g. ["LLM:llama-3|Hello world", "TOOL:internet-search"]
|
|
373
|
+
frequency: int # total occurrences across all examples
|
|
374
|
+
coverage: float # fraction of distinct examples that contain this pattern
|
|
375
|
+
average_duration: float # average sum of call durations for calls in that sub-sequence
|
|
376
|
+
examples_containing: list[int] # which examples have at least one occurrence
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
class PrefixSpanSubworkflowResult(BaseModel):
|
|
380
|
+
"""
|
|
381
|
+
Pydantic model for the final outcome:
|
|
382
|
+
- A list of frequent patterns
|
|
383
|
+
- A textual summary
|
|
384
|
+
"""
|
|
385
|
+
patterns: list[FrequentPattern]
|
|
386
|
+
textual_report: str
|
|
File without changes
|