nvidia-nat 1.2.0rc5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiq/agent/__init__.py +0 -0
- aiq/agent/base.py +239 -0
- aiq/agent/dual_node.py +67 -0
- aiq/agent/react_agent/__init__.py +0 -0
- aiq/agent/react_agent/agent.py +355 -0
- aiq/agent/react_agent/output_parser.py +104 -0
- aiq/agent/react_agent/prompt.py +41 -0
- aiq/agent/react_agent/register.py +149 -0
- aiq/agent/reasoning_agent/__init__.py +0 -0
- aiq/agent/reasoning_agent/reasoning_agent.py +225 -0
- aiq/agent/register.py +23 -0
- aiq/agent/rewoo_agent/__init__.py +0 -0
- aiq/agent/rewoo_agent/agent.py +411 -0
- aiq/agent/rewoo_agent/prompt.py +108 -0
- aiq/agent/rewoo_agent/register.py +158 -0
- aiq/agent/tool_calling_agent/__init__.py +0 -0
- aiq/agent/tool_calling_agent/agent.py +119 -0
- aiq/agent/tool_calling_agent/register.py +106 -0
- aiq/authentication/__init__.py +14 -0
- aiq/authentication/api_key/__init__.py +14 -0
- aiq/authentication/api_key/api_key_auth_provider.py +96 -0
- aiq/authentication/api_key/api_key_auth_provider_config.py +124 -0
- aiq/authentication/api_key/register.py +26 -0
- aiq/authentication/exceptions/__init__.py +14 -0
- aiq/authentication/exceptions/api_key_exceptions.py +38 -0
- aiq/authentication/http_basic_auth/__init__.py +0 -0
- aiq/authentication/http_basic_auth/http_basic_auth_provider.py +81 -0
- aiq/authentication/http_basic_auth/register.py +30 -0
- aiq/authentication/interfaces.py +93 -0
- aiq/authentication/oauth2/__init__.py +14 -0
- aiq/authentication/oauth2/oauth2_auth_code_flow_provider.py +107 -0
- aiq/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +39 -0
- aiq/authentication/oauth2/register.py +25 -0
- aiq/authentication/register.py +21 -0
- aiq/builder/__init__.py +0 -0
- aiq/builder/builder.py +285 -0
- aiq/builder/component_utils.py +316 -0
- aiq/builder/context.py +264 -0
- aiq/builder/embedder.py +24 -0
- aiq/builder/eval_builder.py +161 -0
- aiq/builder/evaluator.py +29 -0
- aiq/builder/framework_enum.py +24 -0
- aiq/builder/front_end.py +73 -0
- aiq/builder/function.py +344 -0
- aiq/builder/function_base.py +380 -0
- aiq/builder/function_info.py +627 -0
- aiq/builder/intermediate_step_manager.py +174 -0
- aiq/builder/llm.py +25 -0
- aiq/builder/retriever.py +25 -0
- aiq/builder/user_interaction_manager.py +74 -0
- aiq/builder/workflow.py +148 -0
- aiq/builder/workflow_builder.py +1117 -0
- aiq/cli/__init__.py +14 -0
- aiq/cli/cli_utils/__init__.py +0 -0
- aiq/cli/cli_utils/config_override.py +231 -0
- aiq/cli/cli_utils/validation.py +37 -0
- aiq/cli/commands/__init__.py +0 -0
- aiq/cli/commands/configure/__init__.py +0 -0
- aiq/cli/commands/configure/channel/__init__.py +0 -0
- aiq/cli/commands/configure/channel/add.py +28 -0
- aiq/cli/commands/configure/channel/channel.py +36 -0
- aiq/cli/commands/configure/channel/remove.py +30 -0
- aiq/cli/commands/configure/channel/update.py +30 -0
- aiq/cli/commands/configure/configure.py +33 -0
- aiq/cli/commands/evaluate.py +139 -0
- aiq/cli/commands/info/__init__.py +14 -0
- aiq/cli/commands/info/info.py +39 -0
- aiq/cli/commands/info/list_channels.py +32 -0
- aiq/cli/commands/info/list_components.py +129 -0
- aiq/cli/commands/info/list_mcp.py +213 -0
- aiq/cli/commands/registry/__init__.py +14 -0
- aiq/cli/commands/registry/publish.py +88 -0
- aiq/cli/commands/registry/pull.py +118 -0
- aiq/cli/commands/registry/registry.py +38 -0
- aiq/cli/commands/registry/remove.py +108 -0
- aiq/cli/commands/registry/search.py +155 -0
- aiq/cli/commands/sizing/__init__.py +14 -0
- aiq/cli/commands/sizing/calc.py +297 -0
- aiq/cli/commands/sizing/sizing.py +27 -0
- aiq/cli/commands/start.py +246 -0
- aiq/cli/commands/uninstall.py +81 -0
- aiq/cli/commands/validate.py +47 -0
- aiq/cli/commands/workflow/__init__.py +14 -0
- aiq/cli/commands/workflow/templates/__init__.py.j2 +0 -0
- aiq/cli/commands/workflow/templates/config.yml.j2 +16 -0
- aiq/cli/commands/workflow/templates/pyproject.toml.j2 +22 -0
- aiq/cli/commands/workflow/templates/register.py.j2 +5 -0
- aiq/cli/commands/workflow/templates/workflow.py.j2 +36 -0
- aiq/cli/commands/workflow/workflow.py +37 -0
- aiq/cli/commands/workflow/workflow_commands.py +313 -0
- aiq/cli/entrypoint.py +135 -0
- aiq/cli/main.py +44 -0
- aiq/cli/register_workflow.py +488 -0
- aiq/cli/type_registry.py +1000 -0
- aiq/data_models/__init__.py +14 -0
- aiq/data_models/api_server.py +694 -0
- aiq/data_models/authentication.py +231 -0
- aiq/data_models/common.py +171 -0
- aiq/data_models/component.py +54 -0
- aiq/data_models/component_ref.py +168 -0
- aiq/data_models/config.py +406 -0
- aiq/data_models/dataset_handler.py +123 -0
- aiq/data_models/discovery_metadata.py +335 -0
- aiq/data_models/embedder.py +27 -0
- aiq/data_models/evaluate.py +127 -0
- aiq/data_models/evaluator.py +26 -0
- aiq/data_models/front_end.py +26 -0
- aiq/data_models/function.py +30 -0
- aiq/data_models/function_dependencies.py +72 -0
- aiq/data_models/interactive.py +246 -0
- aiq/data_models/intermediate_step.py +302 -0
- aiq/data_models/invocation_node.py +38 -0
- aiq/data_models/llm.py +27 -0
- aiq/data_models/logging.py +26 -0
- aiq/data_models/memory.py +27 -0
- aiq/data_models/object_store.py +44 -0
- aiq/data_models/profiler.py +54 -0
- aiq/data_models/registry_handler.py +26 -0
- aiq/data_models/retriever.py +30 -0
- aiq/data_models/retry_mixin.py +35 -0
- aiq/data_models/span.py +187 -0
- aiq/data_models/step_adaptor.py +64 -0
- aiq/data_models/streaming.py +33 -0
- aiq/data_models/swe_bench_model.py +54 -0
- aiq/data_models/telemetry_exporter.py +26 -0
- aiq/data_models/ttc_strategy.py +30 -0
- aiq/embedder/__init__.py +0 -0
- aiq/embedder/langchain_client.py +41 -0
- aiq/embedder/nim_embedder.py +59 -0
- aiq/embedder/openai_embedder.py +43 -0
- aiq/embedder/register.py +24 -0
- aiq/eval/__init__.py +14 -0
- aiq/eval/config.py +60 -0
- aiq/eval/dataset_handler/__init__.py +0 -0
- aiq/eval/dataset_handler/dataset_downloader.py +106 -0
- aiq/eval/dataset_handler/dataset_filter.py +52 -0
- aiq/eval/dataset_handler/dataset_handler.py +254 -0
- aiq/eval/evaluate.py +506 -0
- aiq/eval/evaluator/__init__.py +14 -0
- aiq/eval/evaluator/base_evaluator.py +73 -0
- aiq/eval/evaluator/evaluator_model.py +45 -0
- aiq/eval/intermediate_step_adapter.py +99 -0
- aiq/eval/rag_evaluator/__init__.py +0 -0
- aiq/eval/rag_evaluator/evaluate.py +178 -0
- aiq/eval/rag_evaluator/register.py +143 -0
- aiq/eval/register.py +23 -0
- aiq/eval/remote_workflow.py +133 -0
- aiq/eval/runners/__init__.py +14 -0
- aiq/eval/runners/config.py +39 -0
- aiq/eval/runners/multi_eval_runner.py +54 -0
- aiq/eval/runtime_event_subscriber.py +52 -0
- aiq/eval/swe_bench_evaluator/__init__.py +0 -0
- aiq/eval/swe_bench_evaluator/evaluate.py +215 -0
- aiq/eval/swe_bench_evaluator/register.py +36 -0
- aiq/eval/trajectory_evaluator/__init__.py +0 -0
- aiq/eval/trajectory_evaluator/evaluate.py +75 -0
- aiq/eval/trajectory_evaluator/register.py +40 -0
- aiq/eval/tunable_rag_evaluator/__init__.py +0 -0
- aiq/eval/tunable_rag_evaluator/evaluate.py +245 -0
- aiq/eval/tunable_rag_evaluator/register.py +52 -0
- aiq/eval/usage_stats.py +41 -0
- aiq/eval/utils/__init__.py +0 -0
- aiq/eval/utils/output_uploader.py +140 -0
- aiq/eval/utils/tqdm_position_registry.py +40 -0
- aiq/eval/utils/weave_eval.py +184 -0
- aiq/experimental/__init__.py +0 -0
- aiq/experimental/decorators/__init__.py +0 -0
- aiq/experimental/decorators/experimental_warning_decorator.py +130 -0
- aiq/experimental/test_time_compute/__init__.py +0 -0
- aiq/experimental/test_time_compute/editing/__init__.py +0 -0
- aiq/experimental/test_time_compute/editing/iterative_plan_refinement_editor.py +147 -0
- aiq/experimental/test_time_compute/editing/llm_as_a_judge_editor.py +204 -0
- aiq/experimental/test_time_compute/editing/motivation_aware_summarization.py +107 -0
- aiq/experimental/test_time_compute/functions/__init__.py +0 -0
- aiq/experimental/test_time_compute/functions/execute_score_select_function.py +105 -0
- aiq/experimental/test_time_compute/functions/its_tool_orchestration_function.py +205 -0
- aiq/experimental/test_time_compute/functions/its_tool_wrapper_function.py +146 -0
- aiq/experimental/test_time_compute/functions/plan_select_execute_function.py +224 -0
- aiq/experimental/test_time_compute/models/__init__.py +0 -0
- aiq/experimental/test_time_compute/models/editor_config.py +132 -0
- aiq/experimental/test_time_compute/models/scoring_config.py +112 -0
- aiq/experimental/test_time_compute/models/search_config.py +120 -0
- aiq/experimental/test_time_compute/models/selection_config.py +154 -0
- aiq/experimental/test_time_compute/models/stage_enums.py +43 -0
- aiq/experimental/test_time_compute/models/strategy_base.py +66 -0
- aiq/experimental/test_time_compute/models/tool_use_config.py +41 -0
- aiq/experimental/test_time_compute/models/ttc_item.py +48 -0
- aiq/experimental/test_time_compute/register.py +36 -0
- aiq/experimental/test_time_compute/scoring/__init__.py +0 -0
- aiq/experimental/test_time_compute/scoring/llm_based_agent_scorer.py +168 -0
- aiq/experimental/test_time_compute/scoring/llm_based_plan_scorer.py +168 -0
- aiq/experimental/test_time_compute/scoring/motivation_aware_scorer.py +111 -0
- aiq/experimental/test_time_compute/search/__init__.py +0 -0
- aiq/experimental/test_time_compute/search/multi_llm_planner.py +128 -0
- aiq/experimental/test_time_compute/search/multi_query_retrieval_search.py +122 -0
- aiq/experimental/test_time_compute/search/single_shot_multi_plan_planner.py +128 -0
- aiq/experimental/test_time_compute/selection/__init__.py +0 -0
- aiq/experimental/test_time_compute/selection/best_of_n_selector.py +63 -0
- aiq/experimental/test_time_compute/selection/llm_based_agent_output_selector.py +131 -0
- aiq/experimental/test_time_compute/selection/llm_based_output_merging_selector.py +159 -0
- aiq/experimental/test_time_compute/selection/llm_based_plan_selector.py +128 -0
- aiq/experimental/test_time_compute/selection/threshold_selector.py +58 -0
- aiq/front_ends/__init__.py +14 -0
- aiq/front_ends/console/__init__.py +14 -0
- aiq/front_ends/console/authentication_flow_handler.py +233 -0
- aiq/front_ends/console/console_front_end_config.py +32 -0
- aiq/front_ends/console/console_front_end_plugin.py +96 -0
- aiq/front_ends/console/register.py +25 -0
- aiq/front_ends/cron/__init__.py +14 -0
- aiq/front_ends/fastapi/__init__.py +14 -0
- aiq/front_ends/fastapi/auth_flow_handlers/__init__.py +0 -0
- aiq/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +27 -0
- aiq/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +107 -0
- aiq/front_ends/fastapi/fastapi_front_end_config.py +234 -0
- aiq/front_ends/fastapi/fastapi_front_end_controller.py +68 -0
- aiq/front_ends/fastapi/fastapi_front_end_plugin.py +116 -0
- aiq/front_ends/fastapi/fastapi_front_end_plugin_worker.py +1092 -0
- aiq/front_ends/fastapi/html_snippets/__init__.py +14 -0
- aiq/front_ends/fastapi/html_snippets/auth_code_grant_success.py +35 -0
- aiq/front_ends/fastapi/intermediate_steps_subscriber.py +80 -0
- aiq/front_ends/fastapi/job_store.py +183 -0
- aiq/front_ends/fastapi/main.py +72 -0
- aiq/front_ends/fastapi/message_handler.py +298 -0
- aiq/front_ends/fastapi/message_validator.py +345 -0
- aiq/front_ends/fastapi/register.py +25 -0
- aiq/front_ends/fastapi/response_helpers.py +195 -0
- aiq/front_ends/fastapi/step_adaptor.py +321 -0
- aiq/front_ends/mcp/__init__.py +14 -0
- aiq/front_ends/mcp/mcp_front_end_config.py +32 -0
- aiq/front_ends/mcp/mcp_front_end_plugin.py +93 -0
- aiq/front_ends/mcp/register.py +27 -0
- aiq/front_ends/mcp/tool_converter.py +242 -0
- aiq/front_ends/register.py +22 -0
- aiq/front_ends/simple_base/__init__.py +14 -0
- aiq/front_ends/simple_base/simple_front_end_plugin_base.py +54 -0
- aiq/llm/__init__.py +0 -0
- aiq/llm/aws_bedrock_llm.py +57 -0
- aiq/llm/nim_llm.py +46 -0
- aiq/llm/openai_llm.py +46 -0
- aiq/llm/register.py +23 -0
- aiq/llm/utils/__init__.py +14 -0
- aiq/llm/utils/env_config_value.py +94 -0
- aiq/llm/utils/error.py +17 -0
- aiq/memory/__init__.py +20 -0
- aiq/memory/interfaces.py +183 -0
- aiq/memory/models.py +112 -0
- aiq/meta/module_to_distro.json +3 -0
- aiq/meta/pypi.md +58 -0
- aiq/object_store/__init__.py +20 -0
- aiq/object_store/in_memory_object_store.py +76 -0
- aiq/object_store/interfaces.py +84 -0
- aiq/object_store/models.py +36 -0
- aiq/object_store/register.py +20 -0
- aiq/observability/__init__.py +14 -0
- aiq/observability/exporter/__init__.py +14 -0
- aiq/observability/exporter/base_exporter.py +449 -0
- aiq/observability/exporter/exporter.py +78 -0
- aiq/observability/exporter/file_exporter.py +33 -0
- aiq/observability/exporter/processing_exporter.py +322 -0
- aiq/observability/exporter/raw_exporter.py +52 -0
- aiq/observability/exporter/span_exporter.py +265 -0
- aiq/observability/exporter_manager.py +335 -0
- aiq/observability/mixin/__init__.py +14 -0
- aiq/observability/mixin/batch_config_mixin.py +26 -0
- aiq/observability/mixin/collector_config_mixin.py +23 -0
- aiq/observability/mixin/file_mixin.py +288 -0
- aiq/observability/mixin/file_mode.py +23 -0
- aiq/observability/mixin/resource_conflict_mixin.py +134 -0
- aiq/observability/mixin/serialize_mixin.py +61 -0
- aiq/observability/mixin/type_introspection_mixin.py +183 -0
- aiq/observability/processor/__init__.py +14 -0
- aiq/observability/processor/batching_processor.py +310 -0
- aiq/observability/processor/callback_processor.py +42 -0
- aiq/observability/processor/intermediate_step_serializer.py +28 -0
- aiq/observability/processor/processor.py +71 -0
- aiq/observability/register.py +96 -0
- aiq/observability/utils/__init__.py +14 -0
- aiq/observability/utils/dict_utils.py +236 -0
- aiq/observability/utils/time_utils.py +31 -0
- aiq/plugins/.namespace +1 -0
- aiq/profiler/__init__.py +0 -0
- aiq/profiler/calc/__init__.py +14 -0
- aiq/profiler/calc/calc_runner.py +627 -0
- aiq/profiler/calc/calculations.py +288 -0
- aiq/profiler/calc/data_models.py +188 -0
- aiq/profiler/calc/plot.py +345 -0
- aiq/profiler/callbacks/__init__.py +0 -0
- aiq/profiler/callbacks/agno_callback_handler.py +295 -0
- aiq/profiler/callbacks/base_callback_class.py +20 -0
- aiq/profiler/callbacks/langchain_callback_handler.py +290 -0
- aiq/profiler/callbacks/llama_index_callback_handler.py +205 -0
- aiq/profiler/callbacks/semantic_kernel_callback_handler.py +238 -0
- aiq/profiler/callbacks/token_usage_base_model.py +27 -0
- aiq/profiler/data_frame_row.py +51 -0
- aiq/profiler/data_models.py +24 -0
- aiq/profiler/decorators/__init__.py +0 -0
- aiq/profiler/decorators/framework_wrapper.py +131 -0
- aiq/profiler/decorators/function_tracking.py +254 -0
- aiq/profiler/forecasting/__init__.py +0 -0
- aiq/profiler/forecasting/config.py +18 -0
- aiq/profiler/forecasting/model_trainer.py +75 -0
- aiq/profiler/forecasting/models/__init__.py +22 -0
- aiq/profiler/forecasting/models/forecasting_base_model.py +40 -0
- aiq/profiler/forecasting/models/linear_model.py +196 -0
- aiq/profiler/forecasting/models/random_forest_regressor.py +268 -0
- aiq/profiler/inference_metrics_model.py +28 -0
- aiq/profiler/inference_optimization/__init__.py +0 -0
- aiq/profiler/inference_optimization/bottleneck_analysis/__init__.py +0 -0
- aiq/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +460 -0
- aiq/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py +258 -0
- aiq/profiler/inference_optimization/data_models.py +386 -0
- aiq/profiler/inference_optimization/experimental/__init__.py +0 -0
- aiq/profiler/inference_optimization/experimental/concurrency_spike_analysis.py +468 -0
- aiq/profiler/inference_optimization/experimental/prefix_span_analysis.py +405 -0
- aiq/profiler/inference_optimization/llm_metrics.py +212 -0
- aiq/profiler/inference_optimization/prompt_caching.py +163 -0
- aiq/profiler/inference_optimization/token_uniqueness.py +107 -0
- aiq/profiler/inference_optimization/workflow_runtimes.py +72 -0
- aiq/profiler/intermediate_property_adapter.py +102 -0
- aiq/profiler/profile_runner.py +473 -0
- aiq/profiler/utils.py +184 -0
- aiq/registry_handlers/__init__.py +0 -0
- aiq/registry_handlers/local/__init__.py +0 -0
- aiq/registry_handlers/local/local_handler.py +176 -0
- aiq/registry_handlers/local/register_local.py +37 -0
- aiq/registry_handlers/metadata_factory.py +60 -0
- aiq/registry_handlers/package_utils.py +567 -0
- aiq/registry_handlers/pypi/__init__.py +0 -0
- aiq/registry_handlers/pypi/pypi_handler.py +251 -0
- aiq/registry_handlers/pypi/register_pypi.py +40 -0
- aiq/registry_handlers/register.py +21 -0
- aiq/registry_handlers/registry_handler_base.py +157 -0
- aiq/registry_handlers/rest/__init__.py +0 -0
- aiq/registry_handlers/rest/register_rest.py +56 -0
- aiq/registry_handlers/rest/rest_handler.py +237 -0
- aiq/registry_handlers/schemas/__init__.py +0 -0
- aiq/registry_handlers/schemas/headers.py +42 -0
- aiq/registry_handlers/schemas/package.py +68 -0
- aiq/registry_handlers/schemas/publish.py +63 -0
- aiq/registry_handlers/schemas/pull.py +82 -0
- aiq/registry_handlers/schemas/remove.py +36 -0
- aiq/registry_handlers/schemas/search.py +91 -0
- aiq/registry_handlers/schemas/status.py +47 -0
- aiq/retriever/__init__.py +0 -0
- aiq/retriever/interface.py +37 -0
- aiq/retriever/milvus/__init__.py +14 -0
- aiq/retriever/milvus/register.py +81 -0
- aiq/retriever/milvus/retriever.py +228 -0
- aiq/retriever/models.py +74 -0
- aiq/retriever/nemo_retriever/__init__.py +14 -0
- aiq/retriever/nemo_retriever/register.py +60 -0
- aiq/retriever/nemo_retriever/retriever.py +190 -0
- aiq/retriever/register.py +22 -0
- aiq/runtime/__init__.py +14 -0
- aiq/runtime/loader.py +215 -0
- aiq/runtime/runner.py +190 -0
- aiq/runtime/session.py +158 -0
- aiq/runtime/user_metadata.py +130 -0
- aiq/settings/__init__.py +0 -0
- aiq/settings/global_settings.py +318 -0
- aiq/test/.namespace +1 -0
- aiq/tool/__init__.py +0 -0
- aiq/tool/chat_completion.py +74 -0
- aiq/tool/code_execution/README.md +151 -0
- aiq/tool/code_execution/__init__.py +0 -0
- aiq/tool/code_execution/code_sandbox.py +267 -0
- aiq/tool/code_execution/local_sandbox/.gitignore +1 -0
- aiq/tool/code_execution/local_sandbox/Dockerfile.sandbox +60 -0
- aiq/tool/code_execution/local_sandbox/__init__.py +13 -0
- aiq/tool/code_execution/local_sandbox/local_sandbox_server.py +198 -0
- aiq/tool/code_execution/local_sandbox/sandbox.requirements.txt +6 -0
- aiq/tool/code_execution/local_sandbox/start_local_sandbox.sh +50 -0
- aiq/tool/code_execution/register.py +74 -0
- aiq/tool/code_execution/test_code_execution_sandbox.py +414 -0
- aiq/tool/code_execution/utils.py +100 -0
- aiq/tool/datetime_tools.py +42 -0
- aiq/tool/document_search.py +141 -0
- aiq/tool/github_tools/__init__.py +0 -0
- aiq/tool/github_tools/create_github_commit.py +133 -0
- aiq/tool/github_tools/create_github_issue.py +87 -0
- aiq/tool/github_tools/create_github_pr.py +106 -0
- aiq/tool/github_tools/get_github_file.py +106 -0
- aiq/tool/github_tools/get_github_issue.py +166 -0
- aiq/tool/github_tools/get_github_pr.py +256 -0
- aiq/tool/github_tools/update_github_issue.py +100 -0
- aiq/tool/mcp/__init__.py +14 -0
- aiq/tool/mcp/exceptions.py +142 -0
- aiq/tool/mcp/mcp_client.py +255 -0
- aiq/tool/mcp/mcp_tool.py +96 -0
- aiq/tool/memory_tools/__init__.py +0 -0
- aiq/tool/memory_tools/add_memory_tool.py +79 -0
- aiq/tool/memory_tools/delete_memory_tool.py +67 -0
- aiq/tool/memory_tools/get_memory_tool.py +72 -0
- aiq/tool/nvidia_rag.py +95 -0
- aiq/tool/register.py +38 -0
- aiq/tool/retriever.py +89 -0
- aiq/tool/server_tools.py +66 -0
- aiq/utils/__init__.py +0 -0
- aiq/utils/data_models/__init__.py +0 -0
- aiq/utils/data_models/schema_validator.py +58 -0
- aiq/utils/debugging_utils.py +43 -0
- aiq/utils/dump_distro_mapping.py +32 -0
- aiq/utils/exception_handlers/__init__.py +0 -0
- aiq/utils/exception_handlers/automatic_retries.py +289 -0
- aiq/utils/exception_handlers/mcp.py +211 -0
- aiq/utils/exception_handlers/schemas.py +114 -0
- aiq/utils/io/__init__.py +0 -0
- aiq/utils/io/model_processing.py +28 -0
- aiq/utils/io/yaml_tools.py +119 -0
- aiq/utils/log_utils.py +37 -0
- aiq/utils/metadata_utils.py +74 -0
- aiq/utils/optional_imports.py +142 -0
- aiq/utils/producer_consumer_queue.py +178 -0
- aiq/utils/reactive/__init__.py +0 -0
- aiq/utils/reactive/base/__init__.py +0 -0
- aiq/utils/reactive/base/observable_base.py +65 -0
- aiq/utils/reactive/base/observer_base.py +55 -0
- aiq/utils/reactive/base/subject_base.py +79 -0
- aiq/utils/reactive/observable.py +59 -0
- aiq/utils/reactive/observer.py +76 -0
- aiq/utils/reactive/subject.py +131 -0
- aiq/utils/reactive/subscription.py +49 -0
- aiq/utils/settings/__init__.py +0 -0
- aiq/utils/settings/global_settings.py +197 -0
- aiq/utils/string_utils.py +38 -0
- aiq/utils/type_converter.py +290 -0
- aiq/utils/type_utils.py +484 -0
- aiq/utils/url_utils.py +27 -0
- nvidia_nat-1.2.0rc5.dist-info/METADATA +363 -0
- nvidia_nat-1.2.0rc5.dist-info/RECORD +435 -0
- nvidia_nat-1.2.0rc5.dist-info/WHEEL +5 -0
- nvidia_nat-1.2.0rc5.dist-info/entry_points.txt +20 -0
- nvidia_nat-1.2.0rc5.dist-info/licenses/LICENSE-3rd-party.txt +3686 -0
- nvidia_nat-1.2.0rc5.dist-info/licenses/LICENSE.md +201 -0
- nvidia_nat-1.2.0rc5.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,473 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
import json
|
|
17
|
+
import logging
|
|
18
|
+
import math
|
|
19
|
+
import os
|
|
20
|
+
import statistics
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import Any
|
|
23
|
+
|
|
24
|
+
from pydantic import BaseModel
|
|
25
|
+
|
|
26
|
+
from aiq.data_models.evaluate import ProfilerConfig
|
|
27
|
+
from aiq.data_models.intermediate_step import IntermediateStep
|
|
28
|
+
from aiq.profiler.data_models import ProfilerResults
|
|
29
|
+
from aiq.profiler.forecasting.model_trainer import ModelTrainer
|
|
30
|
+
from aiq.profiler.inference_metrics_model import InferenceMetricsModel
|
|
31
|
+
from aiq.profiler.utils import create_standardized_dataframe
|
|
32
|
+
from aiq.utils.type_converter import TypeConverter
|
|
33
|
+
|
|
34
|
+
logger = logging.getLogger(__name__)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class SimpleMetricsHolder(BaseModel):
|
|
38
|
+
workflow_run_time_confidence_intervals: Any
|
|
39
|
+
llm_latency_confidence_intervals: Any
|
|
40
|
+
throughput_estimate_confidence_interval: Any
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class InferenceOptimizationHolder(BaseModel):
|
|
44
|
+
confidence_intervals: SimpleMetricsHolder
|
|
45
|
+
common_prefixes: Any
|
|
46
|
+
token_uniqueness: Any
|
|
47
|
+
workflow_runtimes: Any
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class ProfilerRunner:
|
|
51
|
+
"""
|
|
52
|
+
A utility to run a series of prompts through an AIQ Toolkit workflow for profiling:
|
|
53
|
+
|
|
54
|
+
- can load prompts from a file
|
|
55
|
+
- or generate them via an LLM
|
|
56
|
+
- collect usage stats for each run
|
|
57
|
+
- store them in a configured directory
|
|
58
|
+
|
|
59
|
+
Updated version with additional metrics:
|
|
60
|
+
|
|
61
|
+
- For each request, we collect a list of UsageStatistic objects, store them individually,
|
|
62
|
+
and also keep a final large JSON of all requests.
|
|
63
|
+
- We then compute:
|
|
64
|
+
1. 90, 95, 99% confidence intervals for the mean total workflow run time.
|
|
65
|
+
2. 90, 95, 99% confidence intervals for the mean LLM latency.
|
|
66
|
+
3. 90, 95, 99% estimates of throughput.
|
|
67
|
+
|
|
68
|
+
All computed metrics are saved to a metrics JSON file at the end.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
def __init__(self, profiler_config: ProfilerConfig, output_dir: Path, write_output: bool = True):
|
|
72
|
+
self.profile_config = profiler_config
|
|
73
|
+
self.output_dir = output_dir
|
|
74
|
+
self.write_output = write_output
|
|
75
|
+
self._converter = TypeConverter([])
|
|
76
|
+
|
|
77
|
+
# Holds per-request data (prompt, output, usage_stats, etc.)
|
|
78
|
+
# This will be saved at the end to a big JSON file
|
|
79
|
+
self.all_requests_data: list[dict] = []
|
|
80
|
+
self.all_steps = []
|
|
81
|
+
|
|
82
|
+
# Ensure output directory
|
|
83
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
84
|
+
|
|
85
|
+
async def run(self, all_steps: list[list[IntermediateStep]]) -> ProfilerResults:
|
|
86
|
+
"""
|
|
87
|
+
Main entrypoint: Works on Input DataFrame generated from eval to fit forecasting model,
|
|
88
|
+
writes out combined requests JSON, then computes and saves additional metrics,
|
|
89
|
+
and optionally fits a forecasting model.
|
|
90
|
+
"""
|
|
91
|
+
from aiq.profiler.inference_optimization.bottleneck_analysis.nested_stack_analysis import \
|
|
92
|
+
multi_example_call_profiling
|
|
93
|
+
from aiq.profiler.inference_optimization.bottleneck_analysis.simple_stack_analysis import \
|
|
94
|
+
profile_workflow_bottlenecks
|
|
95
|
+
from aiq.profiler.inference_optimization.experimental.concurrency_spike_analysis import \
|
|
96
|
+
concurrency_spike_analysis
|
|
97
|
+
from aiq.profiler.inference_optimization.experimental.prefix_span_analysis import \
|
|
98
|
+
prefixspan_subworkflow_with_text
|
|
99
|
+
from aiq.profiler.inference_optimization.llm_metrics import LLMMetrics
|
|
100
|
+
from aiq.profiler.inference_optimization.prompt_caching import get_common_prefixes
|
|
101
|
+
from aiq.profiler.inference_optimization.token_uniqueness import compute_inter_query_token_uniqueness_by_llm
|
|
102
|
+
from aiq.profiler.inference_optimization.workflow_runtimes import compute_workflow_runtime_metrics
|
|
103
|
+
from aiq.profiler.intermediate_property_adapter import IntermediatePropertyAdaptor
|
|
104
|
+
|
|
105
|
+
# Convert the incoming DataFrame to a list of dicts and store
|
|
106
|
+
all_steps = [[IntermediatePropertyAdaptor.from_intermediate_step(step) for step in steps]
|
|
107
|
+
for steps in all_steps] # Add adapter properties to each step
|
|
108
|
+
|
|
109
|
+
self.all_steps = all_steps
|
|
110
|
+
self.all_requests_data = []
|
|
111
|
+
for i, steps in enumerate(all_steps):
|
|
112
|
+
request_data = []
|
|
113
|
+
for step in steps:
|
|
114
|
+
request_data.append(step.model_dump())
|
|
115
|
+
self.all_requests_data.append({"request_number": i, "intermediate_steps": request_data})
|
|
116
|
+
|
|
117
|
+
# Write the final big JSON (all requests)
|
|
118
|
+
if self.write_output:
|
|
119
|
+
final_path = os.path.join(self.output_dir, "all_requests_profiler_traces.json")
|
|
120
|
+
with open(final_path, 'w', encoding='utf-8') as f:
|
|
121
|
+
json.dump(self.all_requests_data, f, indent=2, default=str)
|
|
122
|
+
logger.info("Wrote combined data to: %s", final_path)
|
|
123
|
+
|
|
124
|
+
# ------------------------------------------------------------
|
|
125
|
+
# Generate one standardized dataframe for all usage stats
|
|
126
|
+
# ------------------------------------------------------------
|
|
127
|
+
merged_df = create_standardized_dataframe(all_steps)
|
|
128
|
+
|
|
129
|
+
if self.profile_config.compute_llm_metrics and not merged_df.empty:
|
|
130
|
+
merged_df = LLMMetrics.compute_profiling_metrics(all_steps)
|
|
131
|
+
|
|
132
|
+
output_df = merged_df.copy()
|
|
133
|
+
|
|
134
|
+
if self.profile_config.csv_exclude_io_text and not output_df.empty:
|
|
135
|
+
# Exclude text fields from CSV
|
|
136
|
+
output_df = output_df.drop(columns=['llm_text_input', 'llm_text_output', 'llm_new_token'])
|
|
137
|
+
|
|
138
|
+
# Write this single CSV
|
|
139
|
+
csv_path = os.path.join(self.output_dir, "standardized_data_all.csv")
|
|
140
|
+
output_df.to_csv(csv_path, index=False, encoding='utf-8')
|
|
141
|
+
logger.info("Wrote merged standardized DataFrame to %s", csv_path)
|
|
142
|
+
|
|
143
|
+
# ------------------------------------------------------------
|
|
144
|
+
# Compute and save additional performance metrics
|
|
145
|
+
# ------------------------------------------------------------
|
|
146
|
+
workflow_run_time_ci: InferenceMetricsModel = self._compute_workflow_run_time_confidence_intervals()
|
|
147
|
+
|
|
148
|
+
# 2. 90, 95, 99% confidence intervals of mean LLM latency
|
|
149
|
+
llm_latency_ci: InferenceMetricsModel = self._compute_llm_latency_confidence_intervals()
|
|
150
|
+
|
|
151
|
+
# 3. 90, 95, 99% estimates of throughput
|
|
152
|
+
throughput_ci: InferenceMetricsModel = self._compute_throughput_estimates()
|
|
153
|
+
|
|
154
|
+
# Collect all computed metrics
|
|
155
|
+
simple_metrics = SimpleMetricsHolder(workflow_run_time_confidence_intervals=workflow_run_time_ci.model_dump(),
|
|
156
|
+
llm_latency_confidence_intervals=llm_latency_ci.model_dump(),
|
|
157
|
+
throughput_estimate_confidence_interval=throughput_ci.model_dump())
|
|
158
|
+
|
|
159
|
+
common_prefix_results = token_uniqueness_results = workflow_runtimes_results = None
|
|
160
|
+
|
|
161
|
+
if self.profile_config.prompt_caching_prefixes.enable:
|
|
162
|
+
# ------------------------------------------------------------
|
|
163
|
+
# Compute and save common prefixes
|
|
164
|
+
# ------------------------------------------------------------
|
|
165
|
+
|
|
166
|
+
prefixes = get_common_prefixes(all_steps, self.profile_config.prompt_caching_prefixes.min_frequency)
|
|
167
|
+
common_prefix_results = prefixes
|
|
168
|
+
|
|
169
|
+
if self.profile_config.token_uniqueness_forecast:
|
|
170
|
+
# ------------------------------------------------------------
|
|
171
|
+
# Compute and save inter-query token uniqueness
|
|
172
|
+
# ------------------------------------------------------------
|
|
173
|
+
|
|
174
|
+
uniqueness = compute_inter_query_token_uniqueness_by_llm(all_steps)
|
|
175
|
+
token_uniqueness_results = uniqueness
|
|
176
|
+
|
|
177
|
+
if self.profile_config.workflow_runtime_forecast or self.profile_config.base_metrics:
|
|
178
|
+
# ------------------------------------------------------------
|
|
179
|
+
# Compute and save workflow runtime metrics
|
|
180
|
+
# ------------------------------------------------------------
|
|
181
|
+
|
|
182
|
+
workflow_runtimes = compute_workflow_runtime_metrics(all_steps)
|
|
183
|
+
workflow_runtimes_results = workflow_runtimes
|
|
184
|
+
|
|
185
|
+
inference_optimization_results = InferenceOptimizationHolder(confidence_intervals=simple_metrics,
|
|
186
|
+
common_prefixes=common_prefix_results,
|
|
187
|
+
token_uniqueness=token_uniqueness_results,
|
|
188
|
+
workflow_runtimes=workflow_runtimes_results)
|
|
189
|
+
|
|
190
|
+
if self.write_output and inference_optimization_results:
|
|
191
|
+
# Save to JSON
|
|
192
|
+
optimization_results_path = os.path.join(self.output_dir, "inference_optimization.json")
|
|
193
|
+
with open(optimization_results_path, 'w', encoding='utf-8') as f:
|
|
194
|
+
json.dump(inference_optimization_results.model_dump(), f, indent=2)
|
|
195
|
+
logger.info("Wrote inference optimization results to: %s", optimization_results_path)
|
|
196
|
+
|
|
197
|
+
workflow_profiling_reports = ""
|
|
198
|
+
workflow_profiling_metrics = {}
|
|
199
|
+
|
|
200
|
+
if self.profile_config.bottleneck_analysis.enable_simple_stack:
|
|
201
|
+
# ------------------------------------------------------------
|
|
202
|
+
# Profile workflow bottlenecks
|
|
203
|
+
# ------------------------------------------------------------
|
|
204
|
+
|
|
205
|
+
workflow_bottlenecks = profile_workflow_bottlenecks(all_steps)
|
|
206
|
+
workflow_bottlenecks = workflow_bottlenecks.model_dump()
|
|
207
|
+
workflow_profiling_reports += "\n\n\n" + workflow_bottlenecks["summary"]
|
|
208
|
+
workflow_profiling_metrics["simple_stack_analysis"] = workflow_bottlenecks["stats"]
|
|
209
|
+
logger.info("Simple stack analysis complete")
|
|
210
|
+
|
|
211
|
+
if self.profile_config.bottleneck_analysis.enable_nested_stack:
|
|
212
|
+
# ------------------------------------------------------------
|
|
213
|
+
# Profile workflow bottlenecks with nested stack analysis
|
|
214
|
+
# ------------------------------------------------------------
|
|
215
|
+
nested_bottlenecks = multi_example_call_profiling(all_steps, output_dir=str(self.output_dir))
|
|
216
|
+
workflow_profiling_reports += "\n\n\n" + nested_bottlenecks.textual_report
|
|
217
|
+
workflow_profiling_metrics["nested_stack_analysis"] = nested_bottlenecks.model_dump(
|
|
218
|
+
exclude=["textual_report"])
|
|
219
|
+
logger.info("Nested stack analysis complete")
|
|
220
|
+
|
|
221
|
+
if self.profile_config.concurrency_spike_analysis.enable:
|
|
222
|
+
# ------------------------------------------------------------
|
|
223
|
+
# Profile concurrency spikes
|
|
224
|
+
# ------------------------------------------------------------
|
|
225
|
+
concurrency_metrics = concurrency_spike_analysis(
|
|
226
|
+
all_steps, self.profile_config.concurrency_spike_analysis.spike_threshold)
|
|
227
|
+
workflow_profiling_reports += "\n\n\n" + concurrency_metrics.textual_report
|
|
228
|
+
workflow_profiling_metrics["concurrency_spike_analysis"] = concurrency_metrics.model_dump(
|
|
229
|
+
exclude=["textual_report"])
|
|
230
|
+
logger.info("Concurrency spike analysis complete")
|
|
231
|
+
|
|
232
|
+
if self.profile_config.prefix_span_analysis.enable:
|
|
233
|
+
# ------------------------------------------------------------
|
|
234
|
+
# Profile prefix span analysis
|
|
235
|
+
# ------------------------------------------------------------
|
|
236
|
+
prefix_list = []
|
|
237
|
+
if (self.profile_config.prefix_span_analysis.chain_with_common_prefixes
|
|
238
|
+
and "common_prefixes" in inference_optimization_results):
|
|
239
|
+
logger.info("Using common prefixes for prefix span analysis")
|
|
240
|
+
for _, llm_data in inference_optimization_results["common_prefixes"].items():
|
|
241
|
+
for prefix_data in llm_data["prefix_info"]:
|
|
242
|
+
prefix_list.append(prefix_data["prefix"])
|
|
243
|
+
|
|
244
|
+
prefix_span_analysis = prefixspan_subworkflow_with_text(
|
|
245
|
+
all_steps,
|
|
246
|
+
**self.profile_config.prefix_span_analysis.model_dump(exclude=["enable", "chain_with_common_prefixes"]),
|
|
247
|
+
prefix_list=prefix_list)
|
|
248
|
+
|
|
249
|
+
workflow_profiling_reports += "\n\n\n" + prefix_span_analysis.textual_report
|
|
250
|
+
workflow_profiling_metrics["prefix_span_analysis"] = prefix_span_analysis.model_dump(
|
|
251
|
+
exclude=["textual_report"])
|
|
252
|
+
logger.info("Prefix span analysis complete")
|
|
253
|
+
|
|
254
|
+
if self.write_output and workflow_profiling_reports:
|
|
255
|
+
# Save to text file
|
|
256
|
+
profiling_report_path = os.path.join(self.output_dir, "workflow_profiling_report.txt")
|
|
257
|
+
with open(profiling_report_path, 'w', encoding='utf-8') as f:
|
|
258
|
+
f.write(workflow_profiling_reports)
|
|
259
|
+
logger.info("Wrote workflow profiling report to: %s", profiling_report_path)
|
|
260
|
+
|
|
261
|
+
if self.write_output and workflow_profiling_metrics:
|
|
262
|
+
# Save to JSON
|
|
263
|
+
profiling_metrics_path = os.path.join(self.output_dir, "workflow_profiling_metrics.json")
|
|
264
|
+
with open(profiling_metrics_path, 'w', encoding='utf-8') as f:
|
|
265
|
+
json.dump(workflow_profiling_metrics, f, indent=2)
|
|
266
|
+
logger.info("Wrote workflow profiling metrics to: %s", profiling_metrics_path)
|
|
267
|
+
|
|
268
|
+
if self.profile_config.token_usage_forecast:
|
|
269
|
+
# ------------------------------------------------------------
|
|
270
|
+
# Fit forecasting model and save
|
|
271
|
+
# ------------------------------------------------------------
|
|
272
|
+
|
|
273
|
+
logger.info("Fitting model for forecasting.")
|
|
274
|
+
model_trainer = ModelTrainer()
|
|
275
|
+
|
|
276
|
+
try:
|
|
277
|
+
fitted_model = model_trainer.train(all_steps)
|
|
278
|
+
logger.info("Fitted model for forecasting.")
|
|
279
|
+
except Exception as e:
|
|
280
|
+
logger.exception("Fitting model failed. %s", e, exc_info=True)
|
|
281
|
+
return ProfilerResults()
|
|
282
|
+
|
|
283
|
+
if self.write_output:
|
|
284
|
+
os.makedirs(self.output_dir, exist_ok=True)
|
|
285
|
+
|
|
286
|
+
import pickle
|
|
287
|
+
with open(os.path.join(self.output_dir, "fitted_model.pkl"), 'wb') as f:
|
|
288
|
+
pickle.dump(fitted_model, f)
|
|
289
|
+
|
|
290
|
+
logger.info("Saved fitted model to disk.")
|
|
291
|
+
|
|
292
|
+
return ProfilerResults(workflow_runtime_metrics=workflow_runtimes_results, llm_latency_ci=llm_latency_ci)
|
|
293
|
+
|
|
294
|
+
# -------------------------------------------------------------------
|
|
295
|
+
# Confidence Intervals / Metrics
|
|
296
|
+
# -------------------------------------------------------------------
|
|
297
|
+
def _compute_workflow_run_time_confidence_intervals(self) -> InferenceMetricsModel:
|
|
298
|
+
"""
|
|
299
|
+
Computes 90, 95, 99% confidence intervals for the mean total workflow run time (in seconds).
|
|
300
|
+
The total workflow run time for each request is the difference between the last and first
|
|
301
|
+
event timestamps in usage_stats.
|
|
302
|
+
"""
|
|
303
|
+
run_times = []
|
|
304
|
+
for req_data in self.all_steps:
|
|
305
|
+
# Find the min and max event_timestamp
|
|
306
|
+
timestamps = [u.event_timestamp for u in req_data]
|
|
307
|
+
if not timestamps:
|
|
308
|
+
continue
|
|
309
|
+
|
|
310
|
+
start_time = min(timestamps)
|
|
311
|
+
end_time = max(timestamps)
|
|
312
|
+
run_times.append(end_time - start_time)
|
|
313
|
+
|
|
314
|
+
return self._compute_confidence_intervals(run_times, "Workflow Run Time")
|
|
315
|
+
|
|
316
|
+
def _compute_llm_latency_confidence_intervals(self) -> InferenceMetricsModel:
|
|
317
|
+
"""
|
|
318
|
+
Computes 90, 95, 99% confidence intervals for the mean LLM latency.
|
|
319
|
+
LLM latency is defined as the difference between an LLM_END event_timestamp and
|
|
320
|
+
the immediately preceding LLM_START event_timestamp, across all usage_stats.
|
|
321
|
+
"""
|
|
322
|
+
latencies = []
|
|
323
|
+
for req_data in self.all_steps:
|
|
324
|
+
|
|
325
|
+
usage_stats_sorted = sorted(req_data, key=lambda x: x.event_timestamp)
|
|
326
|
+
|
|
327
|
+
previous_llm_start_time = None
|
|
328
|
+
for u in usage_stats_sorted:
|
|
329
|
+
event_type = u.event_type.value
|
|
330
|
+
ts = u.event_timestamp
|
|
331
|
+
if event_type == "LLM_START":
|
|
332
|
+
previous_llm_start_time = ts
|
|
333
|
+
elif event_type == "LLM_END" and previous_llm_start_time is not None:
|
|
334
|
+
latencies.append(ts - previous_llm_start_time)
|
|
335
|
+
previous_llm_start_time = None
|
|
336
|
+
|
|
337
|
+
return self._compute_confidence_intervals(latencies, "LLM Latency")
|
|
338
|
+
|
|
339
|
+
def _compute_throughput_estimates(self) -> InferenceMetricsModel:
|
|
340
|
+
"""
|
|
341
|
+
Computes 90, 95, 99% confidence intervals for throughput, defined as:
|
|
342
|
+
|
|
343
|
+
| throughput = (total number of requests) / (total time window),
|
|
344
|
+
|
|
345
|
+
where total time window is from the earliest usage_stats event across all requests
|
|
346
|
+
to the latest usage_stats event.
|
|
347
|
+
Note: This is a simple approximate measure of overall throughput for the entire run.
|
|
348
|
+
"""
|
|
349
|
+
# Gather min timestamp and max timestamp across ALL requests
|
|
350
|
+
all_timestamps = []
|
|
351
|
+
for req_data in self.all_steps:
|
|
352
|
+
for u in req_data:
|
|
353
|
+
all_timestamps.append(u.event_timestamp)
|
|
354
|
+
|
|
355
|
+
if not all_timestamps:
|
|
356
|
+
return InferenceMetricsModel()
|
|
357
|
+
|
|
358
|
+
min_ts = min(all_timestamps)
|
|
359
|
+
max_ts = max(all_timestamps)
|
|
360
|
+
total_time = max_ts - min_ts
|
|
361
|
+
if total_time <= 0:
|
|
362
|
+
# Can't compute a meaningful throughput if time <= 0
|
|
363
|
+
return InferenceMetricsModel()
|
|
364
|
+
|
|
365
|
+
total_requests = len(self.all_requests_data)
|
|
366
|
+
# Single estimate of throughput
|
|
367
|
+
throughput_value = total_requests / total_time
|
|
368
|
+
|
|
369
|
+
# For confidence intervals of throughput, we do a simplistic assumption:
|
|
370
|
+
# We treat each request's contribution as 1 occurrence, and approximate
|
|
371
|
+
# the distribution as if these arrivals were uniform. This is quite simplified.
|
|
372
|
+
# We can compute a standard error: SE = sqrt(throughput_value / total_time)
|
|
373
|
+
# However, a more accurate approach might require a different method (e.g., Poisson).
|
|
374
|
+
# We'll do a naive normal approximation here.
|
|
375
|
+
|
|
376
|
+
# We'll guess that the standard deviation of #requests is sqrt(N), so stdev_n ~ sqrt(N).
|
|
377
|
+
# stdev_time is quite small though. We'll do a naive approach:
|
|
378
|
+
# We'll treat the throughput as a sample mean with n=total_requests.
|
|
379
|
+
# Then standard error is (throughput_value / sqrt(n)).
|
|
380
|
+
# This is purely heuristic.
|
|
381
|
+
n = total_requests
|
|
382
|
+
if n <= 1:
|
|
383
|
+
return InferenceMetricsModel()
|
|
384
|
+
|
|
385
|
+
# A rough standard error for throughput:
|
|
386
|
+
standard_error = throughput_value / math.sqrt(n)
|
|
387
|
+
|
|
388
|
+
# Build confidence intervals using z-scores for 90%, 95%, 99%
|
|
389
|
+
intervals = {'n': total_requests, 'mean': throughput_value}
|
|
390
|
+
for confidence, zvalue in \
|
|
391
|
+
[("ninetieth_interval", 1.645), ("ninety_fifth_interval", 1.96), ("ninety_ninth_interval", 2.576)]:
|
|
392
|
+
ci_lower = throughput_value - zvalue * standard_error
|
|
393
|
+
ci_upper = throughput_value + zvalue * standard_error
|
|
394
|
+
intervals[confidence] = (max(ci_lower, 0.0), ci_upper)
|
|
395
|
+
|
|
396
|
+
return InferenceMetricsModel(**intervals)
|
|
397
|
+
|
|
398
|
+
def _compute_confidence_intervals(self, data: list[float], metric_name: str) -> InferenceMetricsModel:
|
|
399
|
+
"""
|
|
400
|
+
Helper to compute 90, 95, 99 % confidence intervals **and** the empirical
|
|
401
|
+
90th/95th/99th percentiles (p90/p95/p99) for the mean of a dataset.
|
|
402
|
+
Uses a z-score from the normal approximation for large samples.
|
|
403
|
+
|
|
404
|
+
Returns a dict like::
|
|
405
|
+
|
|
406
|
+
{
|
|
407
|
+
'ninetieth_interval': (lower, upper),
|
|
408
|
+
'ninety_fifth_interval': (lower, upper),
|
|
409
|
+
'ninety_ninth_interval': (lower, upper),
|
|
410
|
+
}
|
|
411
|
+
"""
|
|
412
|
+
if not data:
|
|
413
|
+
logger.warning("No data points for %s, cannot compute intervals.", metric_name)
|
|
414
|
+
return InferenceMetricsModel()
|
|
415
|
+
|
|
416
|
+
n = len(data)
|
|
417
|
+
mean_val = statistics.mean(data)
|
|
418
|
+
if n <= 1:
|
|
419
|
+
return InferenceMetricsModel(
|
|
420
|
+
n=n,
|
|
421
|
+
mean=mean_val,
|
|
422
|
+
ninetieth_interval=(mean_val, mean_val),
|
|
423
|
+
ninety_fifth_interval=(mean_val, mean_val),
|
|
424
|
+
ninety_ninth_interval=(mean_val, mean_val),
|
|
425
|
+
p90=mean_val,
|
|
426
|
+
p95=mean_val,
|
|
427
|
+
p99=mean_val,
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
stdev_val = statistics.pstdev(data) # population stdev or use stdev for sample
|
|
431
|
+
# standard error
|
|
432
|
+
se = stdev_val / math.sqrt(n)
|
|
433
|
+
|
|
434
|
+
intervals = {}
|
|
435
|
+
for confidence, zvalue in \
|
|
436
|
+
[("ninetieth_interval", 1.645), ("ninety_fifth_interval", 1.96), ("ninety_ninth_interval", 2.576)]:
|
|
437
|
+
margin = zvalue * se
|
|
438
|
+
lower = mean_val - margin
|
|
439
|
+
upper = mean_val + margin
|
|
440
|
+
intervals[confidence] = (lower, upper)
|
|
441
|
+
|
|
442
|
+
# Optionally, store more info
|
|
443
|
+
intervals["n"] = n
|
|
444
|
+
intervals["mean"] = mean_val
|
|
445
|
+
|
|
446
|
+
# ------------------------------------------------------------------
|
|
447
|
+
# Percentiles
|
|
448
|
+
# ------------------------------------------------------------------
|
|
449
|
+
sorted_data = sorted(data)
|
|
450
|
+
|
|
451
|
+
def _percentile(arr: list[float], pct: float) -> float:
|
|
452
|
+
"""
|
|
453
|
+
Linear interpolation between closest ranks.
|
|
454
|
+
pct is given from 0‑100 (e.g. 90 for p90).
|
|
455
|
+
"""
|
|
456
|
+
if not arr:
|
|
457
|
+
return 0.0
|
|
458
|
+
k = (len(arr) - 1) * (pct / 100.0)
|
|
459
|
+
f = math.floor(k)
|
|
460
|
+
c = math.ceil(k)
|
|
461
|
+
if f == c:
|
|
462
|
+
return arr[int(k)]
|
|
463
|
+
return arr[f] + (arr[c] - arr[f]) * (k - f)
|
|
464
|
+
|
|
465
|
+
p90_val = _percentile(sorted_data, 90)
|
|
466
|
+
p95_val = _percentile(sorted_data, 95)
|
|
467
|
+
p99_val = _percentile(sorted_data, 99)
|
|
468
|
+
|
|
469
|
+
intervals["p90"] = p90_val
|
|
470
|
+
intervals["p95"] = p95_val
|
|
471
|
+
intervals["p99"] = p99_val
|
|
472
|
+
|
|
473
|
+
return InferenceMetricsModel(**intervals)
|
aiq/profiler/utils.py
ADDED
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
import inspect
|
|
17
|
+
import logging
|
|
18
|
+
import re
|
|
19
|
+
from collections.abc import Callable
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
import pandas as pd
|
|
23
|
+
|
|
24
|
+
from aiq.builder.framework_enum import LLMFrameworkEnum
|
|
25
|
+
from aiq.cli.type_registry import RegisteredFunctionInfo
|
|
26
|
+
from aiq.data_models.intermediate_step import IntermediateStep
|
|
27
|
+
from aiq.profiler.data_frame_row import DataFrameRow
|
|
28
|
+
|
|
29
|
+
# A simple set of regex patterns to scan for direct references to LLMFrameworkEnum
|
|
30
|
+
_FRAMEWORK_REGEX_MAP = {t: fr'\b{t._name_}\b' for t in LLMFrameworkEnum}
|
|
31
|
+
|
|
32
|
+
logger = logging.getLogger(__name__)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def detect_llm_frameworks_in_build_fn(registration: RegisteredFunctionInfo) -> list[LLMFrameworkEnum]:
|
|
36
|
+
"""
|
|
37
|
+
Analyze a function's source (the build_fn) to see which LLM frameworks it uses. Also recurses
|
|
38
|
+
into any additional Python functions that the build_fn calls while passing `builder`, so that
|
|
39
|
+
references to LLMFrameworkEnum in those helper calls are also detected.
|
|
40
|
+
|
|
41
|
+
1. If `registration.framework_wrappers` is non-empty, we return that first.
|
|
42
|
+
(We do convert them to LLMFrameworkEnum if possible.)
|
|
43
|
+
2. Otherwise, we attempt to:
|
|
44
|
+
|
|
45
|
+
- Get the build_fn's source via `inspect.getsource(...)`
|
|
46
|
+
- Parse it for references to LLMFrameworkEnum
|
|
47
|
+
- Find any function calls that include the word "builder" in the arguments
|
|
48
|
+
|
|
49
|
+
- Recursively parse those functions' source code for frameworks
|
|
50
|
+
|
|
51
|
+
3. If we cannot parse the source at all (e.g. OSError), we return a list of all frameworks.
|
|
52
|
+
"""
|
|
53
|
+
# ----------------------------------------------------------------
|
|
54
|
+
# 1) If frameworks were explicitly declared in registration.framework_wrappers, use them:
|
|
55
|
+
if registration.framework_wrappers:
|
|
56
|
+
results: list[LLMFrameworkEnum] = []
|
|
57
|
+
for fw_str in registration.framework_wrappers:
|
|
58
|
+
try:
|
|
59
|
+
results.append(LLMFrameworkEnum(fw_str))
|
|
60
|
+
except ValueError:
|
|
61
|
+
# If it's not recognized, ignore or log
|
|
62
|
+
logger.warning("Unrecognized framework %s in registration.framework_wrappers", fw_str)
|
|
63
|
+
|
|
64
|
+
return list(set(results)) # unique
|
|
65
|
+
# ----------------------------------------------------------------
|
|
66
|
+
|
|
67
|
+
# Because we want to recursively parse code, we'll keep track of visited function objects
|
|
68
|
+
visited_fns: set[Callable[..., Any]] = set()
|
|
69
|
+
# We also need a place to store discovered frameworks
|
|
70
|
+
discovered: set[LLMFrameworkEnum] = set()
|
|
71
|
+
|
|
72
|
+
def _parse_source_for_frameworks(src: str) -> None:
|
|
73
|
+
"""Check lines for any direct references to LLMFrameworkEnum.* or placeholders in the map."""
|
|
74
|
+
for fw_enum_member, pattern in _FRAMEWORK_REGEX_MAP.items():
|
|
75
|
+
if re.search(pattern, src):
|
|
76
|
+
discovered.add(fw_enum_member)
|
|
77
|
+
|
|
78
|
+
def _find_builder_func_calls(src: str) -> list[str]:
|
|
79
|
+
"""
|
|
80
|
+
Look for calls of the form: some_func(..., builder, ...)
|
|
81
|
+
or some_func(..., builder=..., ...)
|
|
82
|
+
|
|
83
|
+
This returns the name of each function we found being called, e.g. 'some_func'.
|
|
84
|
+
It's a naive best-effort approach
|
|
85
|
+
and group(1) is the function name.
|
|
86
|
+
"""
|
|
87
|
+
# E.g. foo(builder) or foo( param=..., builder=builder )
|
|
88
|
+
pattern = r'(\w+)\s*\([^)]*\bbuilder\b[^)]*\)'
|
|
89
|
+
return re.findall(pattern, src)
|
|
90
|
+
|
|
91
|
+
def _recurse_parse(fn: Callable[..., Any], visited: set[Callable[..., Any]]) -> None:
|
|
92
|
+
"""Recursively parse the source code of `fn`, add discovered frameworks,
|
|
93
|
+
and parse any new functions that get called with 'builder'."""
|
|
94
|
+
if fn in visited:
|
|
95
|
+
return
|
|
96
|
+
visited.add(fn)
|
|
97
|
+
|
|
98
|
+
try:
|
|
99
|
+
src = inspect.getsource(fn)
|
|
100
|
+
except OSError:
|
|
101
|
+
# If we can't parse source, we add all frameworks and bail
|
|
102
|
+
discovered.update([k for k, v in _FRAMEWORK_REGEX_MAP.items()])
|
|
103
|
+
return
|
|
104
|
+
|
|
105
|
+
# parse direct references
|
|
106
|
+
_parse_source_for_frameworks(src)
|
|
107
|
+
|
|
108
|
+
# parse any function calls that pass in "builder"
|
|
109
|
+
child_func_names = _find_builder_func_calls(src)
|
|
110
|
+
if not child_func_names:
|
|
111
|
+
return
|
|
112
|
+
|
|
113
|
+
# We'll try to find these child functions in the same module as `fn`
|
|
114
|
+
mod = inspect.getmodule(fn)
|
|
115
|
+
if not mod:
|
|
116
|
+
return
|
|
117
|
+
# We'll see if the child function is a top-level in that module
|
|
118
|
+
for child_name in child_func_names:
|
|
119
|
+
# get the function object if it exists in the module
|
|
120
|
+
child_obj = getattr(mod, child_name, None)
|
|
121
|
+
if callable(child_obj):
|
|
122
|
+
_recurse_parse(child_obj, visited)
|
|
123
|
+
|
|
124
|
+
# ----------------------------------------------------------------
|
|
125
|
+
# 2) Actually do the BFS/DFS parse on `registration.build_fn`
|
|
126
|
+
main_fn = registration.build_fn
|
|
127
|
+
|
|
128
|
+
try:
|
|
129
|
+
_recurse_parse(main_fn, visited_fns)
|
|
130
|
+
except Exception:
|
|
131
|
+
# If an unexpected error occurs, fallback to "all frameworks"
|
|
132
|
+
discovered.update([k for k, v in _FRAMEWORK_REGEX_MAP.items()])
|
|
133
|
+
# ----------------------------------------------------------------
|
|
134
|
+
if len(discovered) > 0:
|
|
135
|
+
logger.warning(
|
|
136
|
+
"Discovered frameworks: %s in function %s by inspecting "
|
|
137
|
+
"source. It is recommended and more reliable to instead add the used LLMFrameworkEnum "
|
|
138
|
+
"types in the framework_wrappers argument when calling @register_function.",
|
|
139
|
+
discovered,
|
|
140
|
+
main_fn.__name__)
|
|
141
|
+
|
|
142
|
+
return list(discovered)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
# -------------------------------------------------------------------
|
|
146
|
+
# Create a single standardized DataFrame for all usage stats
|
|
147
|
+
# -------------------------------------------------------------------
|
|
148
|
+
def create_standardized_dataframe(requests_data: list[list[IntermediateStep]]) -> pd.DataFrame:
|
|
149
|
+
"""
|
|
150
|
+
Merge usage stats for *all* requests into one DataFrame, each row representing a usage_stats entry.
|
|
151
|
+
- Include a column 'example_number' to mark which request it originated from.
|
|
152
|
+
"""
|
|
153
|
+
all_rows = []
|
|
154
|
+
try:
|
|
155
|
+
for i, steps in enumerate(requests_data):
|
|
156
|
+
for step in steps:
|
|
157
|
+
# Create a DataFrameRow
|
|
158
|
+
all_rows.append(
|
|
159
|
+
DataFrameRow(event_timestamp=step.event_timestamp,
|
|
160
|
+
example_number=i,
|
|
161
|
+
prompt_tokens=step.token_usage.prompt_tokens,
|
|
162
|
+
completion_tokens=step.token_usage.completion_tokens,
|
|
163
|
+
total_tokens=step.token_usage.total_tokens,
|
|
164
|
+
llm_text_input=step.llm_text_input,
|
|
165
|
+
llm_text_output=step.llm_text_output,
|
|
166
|
+
llm_new_token=step.llm_text_chunk,
|
|
167
|
+
llm_name=step.llm_name,
|
|
168
|
+
tool_name=step.tool_name,
|
|
169
|
+
function_name=step.function_name,
|
|
170
|
+
function_id=step.function_id,
|
|
171
|
+
parent_function_name=step.parent_function_name,
|
|
172
|
+
parent_function_id=step.parent_function_id,
|
|
173
|
+
UUID=step.payload.UUID,
|
|
174
|
+
framework=step.framework,
|
|
175
|
+
event_type=step.event_type).model_dump(), )
|
|
176
|
+
|
|
177
|
+
except Exception as e:
|
|
178
|
+
logger.exception("Error creating standardized DataFrame: %s", e, exc_info=True)
|
|
179
|
+
return pd.DataFrame()
|
|
180
|
+
|
|
181
|
+
if not all_rows:
|
|
182
|
+
return pd.DataFrame()
|
|
183
|
+
|
|
184
|
+
return pd.DataFrame.from_records(all_rows)
|
|
File without changes
|
|
File without changes
|