aiqtoolkit 1.2.0.dev0__py3-none-any.whl → 1.2.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiqtoolkit might be problematic. Click here for more details.
- aiq/agent/base.py +170 -8
- aiq/agent/dual_node.py +1 -1
- aiq/agent/react_agent/agent.py +146 -112
- aiq/agent/react_agent/prompt.py +1 -6
- aiq/agent/react_agent/register.py +36 -35
- aiq/agent/rewoo_agent/agent.py +36 -35
- aiq/agent/rewoo_agent/register.py +2 -2
- aiq/agent/tool_calling_agent/agent.py +3 -7
- aiq/agent/tool_calling_agent/register.py +1 -1
- aiq/authentication/__init__.py +14 -0
- aiq/authentication/api_key/__init__.py +14 -0
- aiq/authentication/api_key/api_key_auth_provider.py +92 -0
- aiq/authentication/api_key/api_key_auth_provider_config.py +124 -0
- aiq/authentication/api_key/register.py +26 -0
- aiq/authentication/exceptions/__init__.py +14 -0
- aiq/authentication/exceptions/api_key_exceptions.py +38 -0
- aiq/authentication/exceptions/auth_code_grant_exceptions.py +86 -0
- aiq/authentication/exceptions/call_back_exceptions.py +38 -0
- aiq/authentication/exceptions/request_exceptions.py +54 -0
- aiq/authentication/http_basic_auth/__init__.py +0 -0
- aiq/authentication/http_basic_auth/http_basic_auth_provider.py +81 -0
- aiq/authentication/http_basic_auth/register.py +30 -0
- aiq/authentication/interfaces.py +93 -0
- aiq/authentication/oauth2/__init__.py +14 -0
- aiq/authentication/oauth2/oauth2_auth_code_flow_provider.py +107 -0
- aiq/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +39 -0
- aiq/authentication/oauth2/register.py +25 -0
- aiq/authentication/register.py +21 -0
- aiq/builder/builder.py +64 -2
- aiq/builder/component_utils.py +16 -3
- aiq/builder/context.py +37 -0
- aiq/builder/eval_builder.py +43 -2
- aiq/builder/function.py +44 -12
- aiq/builder/function_base.py +1 -1
- aiq/builder/intermediate_step_manager.py +6 -8
- aiq/builder/user_interaction_manager.py +3 -0
- aiq/builder/workflow.py +23 -18
- aiq/builder/workflow_builder.py +421 -61
- aiq/cli/commands/info/list_mcp.py +103 -16
- aiq/cli/commands/sizing/__init__.py +14 -0
- aiq/cli/commands/sizing/calc.py +294 -0
- aiq/cli/commands/sizing/sizing.py +27 -0
- aiq/cli/commands/start.py +2 -1
- aiq/cli/entrypoint.py +2 -0
- aiq/cli/register_workflow.py +80 -0
- aiq/cli/type_registry.py +151 -30
- aiq/data_models/api_server.py +124 -12
- aiq/data_models/authentication.py +231 -0
- aiq/data_models/common.py +35 -7
- aiq/data_models/component.py +17 -9
- aiq/data_models/component_ref.py +33 -0
- aiq/data_models/config.py +60 -3
- aiq/data_models/dataset_handler.py +2 -1
- aiq/data_models/embedder.py +1 -0
- aiq/data_models/evaluate.py +23 -0
- aiq/data_models/function_dependencies.py +8 -0
- aiq/data_models/interactive.py +10 -1
- aiq/data_models/intermediate_step.py +38 -5
- aiq/data_models/its_strategy.py +30 -0
- aiq/data_models/llm.py +1 -0
- aiq/data_models/memory.py +1 -0
- aiq/data_models/object_store.py +44 -0
- aiq/data_models/profiler.py +1 -0
- aiq/data_models/retry_mixin.py +35 -0
- aiq/data_models/span.py +187 -0
- aiq/data_models/telemetry_exporter.py +2 -2
- aiq/embedder/nim_embedder.py +2 -1
- aiq/embedder/openai_embedder.py +2 -1
- aiq/eval/config.py +19 -1
- aiq/eval/dataset_handler/dataset_handler.py +87 -2
- aiq/eval/evaluate.py +208 -27
- aiq/eval/evaluator/base_evaluator.py +73 -0
- aiq/eval/evaluator/evaluator_model.py +1 -0
- aiq/eval/intermediate_step_adapter.py +11 -5
- aiq/eval/rag_evaluator/evaluate.py +55 -15
- aiq/eval/rag_evaluator/register.py +6 -1
- aiq/eval/remote_workflow.py +7 -2
- aiq/eval/runners/__init__.py +14 -0
- aiq/eval/runners/config.py +39 -0
- aiq/eval/runners/multi_eval_runner.py +54 -0
- aiq/eval/trajectory_evaluator/evaluate.py +22 -65
- aiq/eval/tunable_rag_evaluator/evaluate.py +150 -168
- aiq/eval/tunable_rag_evaluator/register.py +2 -0
- aiq/eval/usage_stats.py +41 -0
- aiq/eval/utils/output_uploader.py +10 -1
- aiq/eval/utils/weave_eval.py +184 -0
- aiq/experimental/__init__.py +0 -0
- aiq/experimental/decorators/__init__.py +0 -0
- aiq/experimental/decorators/experimental_warning_decorator.py +130 -0
- aiq/experimental/inference_time_scaling/__init__.py +0 -0
- aiq/experimental/inference_time_scaling/editing/__init__.py +0 -0
- aiq/experimental/inference_time_scaling/editing/iterative_plan_refinement_editor.py +147 -0
- aiq/experimental/inference_time_scaling/editing/llm_as_a_judge_editor.py +204 -0
- aiq/experimental/inference_time_scaling/editing/motivation_aware_summarization.py +107 -0
- aiq/experimental/inference_time_scaling/functions/__init__.py +0 -0
- aiq/experimental/inference_time_scaling/functions/execute_score_select_function.py +105 -0
- aiq/experimental/inference_time_scaling/functions/its_tool_orchestration_function.py +205 -0
- aiq/experimental/inference_time_scaling/functions/its_tool_wrapper_function.py +146 -0
- aiq/experimental/inference_time_scaling/functions/plan_select_execute_function.py +224 -0
- aiq/experimental/inference_time_scaling/models/__init__.py +0 -0
- aiq/experimental/inference_time_scaling/models/editor_config.py +132 -0
- aiq/experimental/inference_time_scaling/models/its_item.py +48 -0
- aiq/experimental/inference_time_scaling/models/scoring_config.py +112 -0
- aiq/experimental/inference_time_scaling/models/search_config.py +120 -0
- aiq/experimental/inference_time_scaling/models/selection_config.py +154 -0
- aiq/experimental/inference_time_scaling/models/stage_enums.py +43 -0
- aiq/experimental/inference_time_scaling/models/strategy_base.py +66 -0
- aiq/experimental/inference_time_scaling/models/tool_use_config.py +41 -0
- aiq/experimental/inference_time_scaling/register.py +36 -0
- aiq/experimental/inference_time_scaling/scoring/__init__.py +0 -0
- aiq/experimental/inference_time_scaling/scoring/llm_based_agent_scorer.py +168 -0
- aiq/experimental/inference_time_scaling/scoring/llm_based_plan_scorer.py +168 -0
- aiq/experimental/inference_time_scaling/scoring/motivation_aware_scorer.py +111 -0
- aiq/experimental/inference_time_scaling/search/__init__.py +0 -0
- aiq/experimental/inference_time_scaling/search/multi_llm_planner.py +128 -0
- aiq/experimental/inference_time_scaling/search/multi_query_retrieval_search.py +122 -0
- aiq/experimental/inference_time_scaling/search/single_shot_multi_plan_planner.py +128 -0
- aiq/experimental/inference_time_scaling/selection/__init__.py +0 -0
- aiq/experimental/inference_time_scaling/selection/best_of_n_selector.py +63 -0
- aiq/experimental/inference_time_scaling/selection/llm_based_agent_output_selector.py +131 -0
- aiq/experimental/inference_time_scaling/selection/llm_based_output_merging_selector.py +159 -0
- aiq/experimental/inference_time_scaling/selection/llm_based_plan_selector.py +128 -0
- aiq/experimental/inference_time_scaling/selection/threshold_selector.py +58 -0
- aiq/front_ends/console/authentication_flow_handler.py +233 -0
- aiq/front_ends/console/console_front_end_plugin.py +11 -2
- aiq/front_ends/fastapi/auth_flow_handlers/__init__.py +0 -0
- aiq/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +27 -0
- aiq/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +107 -0
- aiq/front_ends/fastapi/fastapi_front_end_config.py +93 -9
- aiq/front_ends/fastapi/fastapi_front_end_controller.py +68 -0
- aiq/front_ends/fastapi/fastapi_front_end_plugin.py +14 -1
- aiq/front_ends/fastapi/fastapi_front_end_plugin_worker.py +537 -52
- aiq/front_ends/fastapi/html_snippets/__init__.py +14 -0
- aiq/front_ends/fastapi/html_snippets/auth_code_grant_success.py +35 -0
- aiq/front_ends/fastapi/job_store.py +47 -25
- aiq/front_ends/fastapi/main.py +2 -0
- aiq/front_ends/fastapi/message_handler.py +108 -89
- aiq/front_ends/fastapi/step_adaptor.py +2 -1
- aiq/llm/aws_bedrock_llm.py +57 -0
- aiq/llm/nim_llm.py +2 -1
- aiq/llm/openai_llm.py +3 -2
- aiq/llm/register.py +1 -0
- aiq/meta/pypi.md +12 -12
- aiq/object_store/__init__.py +20 -0
- aiq/object_store/in_memory_object_store.py +74 -0
- aiq/object_store/interfaces.py +84 -0
- aiq/object_store/models.py +36 -0
- aiq/object_store/register.py +20 -0
- aiq/observability/__init__.py +14 -0
- aiq/observability/exporter/__init__.py +14 -0
- aiq/observability/exporter/base_exporter.py +449 -0
- aiq/observability/exporter/exporter.py +78 -0
- aiq/observability/exporter/file_exporter.py +33 -0
- aiq/observability/exporter/processing_exporter.py +269 -0
- aiq/observability/exporter/raw_exporter.py +52 -0
- aiq/observability/exporter/span_exporter.py +264 -0
- aiq/observability/exporter_manager.py +335 -0
- aiq/observability/mixin/__init__.py +14 -0
- aiq/observability/mixin/batch_config_mixin.py +26 -0
- aiq/observability/mixin/collector_config_mixin.py +23 -0
- aiq/observability/mixin/file_mixin.py +288 -0
- aiq/observability/mixin/file_mode.py +23 -0
- aiq/observability/mixin/resource_conflict_mixin.py +134 -0
- aiq/observability/mixin/serialize_mixin.py +61 -0
- aiq/observability/mixin/type_introspection_mixin.py +183 -0
- aiq/observability/processor/__init__.py +14 -0
- aiq/observability/processor/batching_processor.py +316 -0
- aiq/observability/processor/intermediate_step_serializer.py +28 -0
- aiq/observability/processor/processor.py +68 -0
- aiq/observability/register.py +36 -39
- aiq/observability/utils/__init__.py +14 -0
- aiq/observability/utils/dict_utils.py +236 -0
- aiq/observability/utils/time_utils.py +31 -0
- aiq/profiler/calc/__init__.py +14 -0
- aiq/profiler/calc/calc_runner.py +623 -0
- aiq/profiler/calc/calculations.py +288 -0
- aiq/profiler/calc/data_models.py +176 -0
- aiq/profiler/calc/plot.py +345 -0
- aiq/profiler/callbacks/langchain_callback_handler.py +22 -10
- aiq/profiler/data_models.py +24 -0
- aiq/profiler/inference_metrics_model.py +3 -0
- aiq/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +8 -0
- aiq/profiler/inference_optimization/data_models.py +2 -2
- aiq/profiler/inference_optimization/llm_metrics.py +2 -2
- aiq/profiler/profile_runner.py +61 -21
- aiq/runtime/loader.py +9 -3
- aiq/runtime/runner.py +23 -9
- aiq/runtime/session.py +25 -7
- aiq/runtime/user_metadata.py +2 -3
- aiq/tool/chat_completion.py +74 -0
- aiq/tool/code_execution/README.md +152 -0
- aiq/tool/code_execution/code_sandbox.py +151 -72
- aiq/tool/code_execution/local_sandbox/.gitignore +1 -0
- aiq/tool/code_execution/local_sandbox/local_sandbox_server.py +139 -24
- aiq/tool/code_execution/local_sandbox/sandbox.requirements.txt +3 -1
- aiq/tool/code_execution/local_sandbox/start_local_sandbox.sh +27 -2
- aiq/tool/code_execution/register.py +7 -3
- aiq/tool/code_execution/test_code_execution_sandbox.py +414 -0
- aiq/tool/mcp/exceptions.py +142 -0
- aiq/tool/mcp/mcp_client.py +41 -6
- aiq/tool/mcp/mcp_tool.py +3 -2
- aiq/tool/register.py +1 -0
- aiq/tool/server_tools.py +6 -3
- aiq/utils/exception_handlers/automatic_retries.py +289 -0
- aiq/utils/exception_handlers/mcp.py +211 -0
- aiq/utils/io/model_processing.py +28 -0
- aiq/utils/log_utils.py +37 -0
- aiq/utils/string_utils.py +38 -0
- aiq/utils/type_converter.py +18 -2
- aiq/utils/type_utils.py +87 -0
- {aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/METADATA +53 -21
- aiqtoolkit-1.2.0rc1.dist-info/RECORD +436 -0
- {aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/WHEEL +1 -1
- {aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/entry_points.txt +3 -0
- aiq/front_ends/fastapi/websocket.py +0 -148
- aiq/observability/async_otel_listener.py +0 -429
- aiqtoolkit-1.2.0.dev0.dist-info/RECORD +0 -316
- {aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
- {aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/licenses/LICENSE.md +0 -0
- {aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,345 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
import logging
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
import matplotlib.pyplot as plt
|
|
20
|
+
import numpy as np
|
|
21
|
+
import pandas as pd
|
|
22
|
+
|
|
23
|
+
from aiq.profiler.calc.data_models import LinearFitResult
|
|
24
|
+
from aiq.profiler.calc.data_models import SizingMetrics
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# Plotting constants
|
|
30
|
+
class PlotConfig:
|
|
31
|
+
# Simple plot settings
|
|
32
|
+
SIMPLE_FIGSIZE = (12, 6)
|
|
33
|
+
SIMPLE_LINEWIDTH = 2
|
|
34
|
+
SIMPLE_DPI = 150
|
|
35
|
+
|
|
36
|
+
# Enhanced plot settings
|
|
37
|
+
ENHANCED_FIGSIZE = (16, 6)
|
|
38
|
+
ENHANCED_DPI = 300
|
|
39
|
+
|
|
40
|
+
# Marker and styling
|
|
41
|
+
DATA_MARKER = 'o'
|
|
42
|
+
OUTLIER_MARKER = 'x'
|
|
43
|
+
OUTLIER_COLOR = 'crimson'
|
|
44
|
+
TREND_COLOR = 'r'
|
|
45
|
+
TREND_LINESTYLE = '--'
|
|
46
|
+
TREND_ALPHA = 0.8
|
|
47
|
+
TREND_LINEWIDTH = 2.0
|
|
48
|
+
|
|
49
|
+
# Colors
|
|
50
|
+
LLM_LATENCY_COLOR = 'steelblue'
|
|
51
|
+
RUNTIME_COLOR = 'darkgreen'
|
|
52
|
+
SLA_COLOR = 'red'
|
|
53
|
+
NOTE_BOX_COLOR = 'mistyrose'
|
|
54
|
+
NOTE_TEXT_COLOR = 'crimson'
|
|
55
|
+
STATS_BOX_COLOR = 'lightblue'
|
|
56
|
+
|
|
57
|
+
# Alpha values
|
|
58
|
+
DATA_ALPHA = 0.7
|
|
59
|
+
OUTLIER_ALPHA = 0.9
|
|
60
|
+
GRID_ALPHA = 0.3
|
|
61
|
+
SLA_ALPHA = 0.7
|
|
62
|
+
NOTE_BOX_ALPHA = 0.7
|
|
63
|
+
STATS_BOX_ALPHA = 0.8
|
|
64
|
+
|
|
65
|
+
# Sizes
|
|
66
|
+
DATA_POINT_SIZE = 120
|
|
67
|
+
OUTLIER_POINT_SIZE = 140
|
|
68
|
+
DATA_LINEWIDTH = 1
|
|
69
|
+
|
|
70
|
+
# Font sizes
|
|
71
|
+
AXIS_LABEL_FONTSIZE = 12
|
|
72
|
+
TITLE_FONTSIZE = 14
|
|
73
|
+
LEGEND_FONTSIZE = 10
|
|
74
|
+
NOTE_FONTSIZE = 10
|
|
75
|
+
STATS_FONTSIZE = 10
|
|
76
|
+
|
|
77
|
+
# Text positioning
|
|
78
|
+
NOTE_X_POS = 0.98
|
|
79
|
+
NOTE_Y_POS = 0.02
|
|
80
|
+
STATS_X_POS = 0.02
|
|
81
|
+
STATS_Y_POS = 0.02
|
|
82
|
+
|
|
83
|
+
# Box styling
|
|
84
|
+
NOTE_BOX_PAD = 0.3
|
|
85
|
+
STATS_BOX_PAD = 0.5
|
|
86
|
+
|
|
87
|
+
# Trend line points
|
|
88
|
+
TREND_LINE_POINTS = 100
|
|
89
|
+
|
|
90
|
+
# Font weights
|
|
91
|
+
AXIS_LABEL_FONTWEIGHT = 'bold'
|
|
92
|
+
TITLE_FONTWEIGHT = 'bold'
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def plot_concurrency_vs_time_metrics_simple(df: pd.DataFrame, output_dir: Path) -> None:
|
|
96
|
+
"""
|
|
97
|
+
Save a simple plot of concurrency vs. p95 LLM latency and workflow runtime.
|
|
98
|
+
"""
|
|
99
|
+
plt.figure(figsize=PlotConfig.SIMPLE_FIGSIZE)
|
|
100
|
+
plt.plot(df["concurrency"],
|
|
101
|
+
df["llm_latency_p95"],
|
|
102
|
+
label="p95 LLM Latency (s)",
|
|
103
|
+
marker=PlotConfig.DATA_MARKER,
|
|
104
|
+
linewidth=PlotConfig.SIMPLE_LINEWIDTH)
|
|
105
|
+
plt.plot(df["concurrency"],
|
|
106
|
+
df["workflow_runtime_p95"],
|
|
107
|
+
label="p95 Workflow Runtime (s)",
|
|
108
|
+
marker="s",
|
|
109
|
+
linewidth=PlotConfig.SIMPLE_LINEWIDTH)
|
|
110
|
+
plt.xlabel("Concurrency")
|
|
111
|
+
plt.ylabel("Time (seconds)")
|
|
112
|
+
plt.title("Concurrency vs. p95 LLM Latency and Workflow Runtime")
|
|
113
|
+
plt.grid(True, alpha=PlotConfig.GRID_ALPHA)
|
|
114
|
+
plt.legend()
|
|
115
|
+
plt.tight_layout()
|
|
116
|
+
|
|
117
|
+
simple_plot_path = output_dir / "concurrency_vs_p95_simple.png"
|
|
118
|
+
plt.savefig(simple_plot_path, dpi=PlotConfig.SIMPLE_DPI, bbox_inches='tight')
|
|
119
|
+
plt.close()
|
|
120
|
+
logger.info("Simple plot saved to %s", simple_plot_path)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def plot_metric_vs_concurrency_with_optional_fit(
|
|
124
|
+
ax: plt.Axes,
|
|
125
|
+
x: np.ndarray,
|
|
126
|
+
y: np.ndarray,
|
|
127
|
+
metric_name: str,
|
|
128
|
+
y_label: str,
|
|
129
|
+
title: str,
|
|
130
|
+
color: str,
|
|
131
|
+
sla_value: float = 0.0,
|
|
132
|
+
sla_label: str = None,
|
|
133
|
+
fit: LinearFitResult | None = None,
|
|
134
|
+
):
|
|
135
|
+
"""
|
|
136
|
+
Helper to plot a metric vs concurrency with pre-computed fit, outlier highlighting, and SLA line.
|
|
137
|
+
Requires pre-computed fit to be provided.
|
|
138
|
+
"""
|
|
139
|
+
marker = PlotConfig.DATA_MARKER
|
|
140
|
+
outlier_marker = PlotConfig.OUTLIER_MARKER
|
|
141
|
+
outlier_color = PlotConfig.OUTLIER_COLOR
|
|
142
|
+
trend_color = PlotConfig.TREND_COLOR
|
|
143
|
+
trend_linestyle = PlotConfig.TREND_LINESTYLE
|
|
144
|
+
trend_alpha = PlotConfig.TREND_ALPHA
|
|
145
|
+
trend_linewidth = PlotConfig.TREND_LINEWIDTH
|
|
146
|
+
note_box_color = PlotConfig.NOTE_BOX_COLOR
|
|
147
|
+
note_text_color = PlotConfig.NOTE_TEXT_COLOR
|
|
148
|
+
legend_fontsize = PlotConfig.LEGEND_FONTSIZE
|
|
149
|
+
outliers_x = outliers_y = np.array([])
|
|
150
|
+
outliers_note = ""
|
|
151
|
+
|
|
152
|
+
# Skip analysis plot if no fit is available
|
|
153
|
+
if not fit:
|
|
154
|
+
logger.warning(f"No linear fit available for {metric_name}, skipping analysis plot")
|
|
155
|
+
return False
|
|
156
|
+
|
|
157
|
+
if fit.outliers_removed:
|
|
158
|
+
# Use the concurrencies that were removed to identify outlier points
|
|
159
|
+
outlier_mask = np.isin(x, fit.outliers_removed)
|
|
160
|
+
outliers_x = x[outlier_mask]
|
|
161
|
+
outliers_y = y[outlier_mask]
|
|
162
|
+
outliers_note = f"Outliers removed: concurrencies {fit.outliers_removed}"
|
|
163
|
+
# Plot cleaned data (points that weren't removed as outliers)
|
|
164
|
+
non_outlier_mask = ~np.isin(x, fit.outliers_removed)
|
|
165
|
+
x_clean = x[non_outlier_mask]
|
|
166
|
+
y_clean = y[non_outlier_mask]
|
|
167
|
+
ax.scatter(x_clean,
|
|
168
|
+
y_clean,
|
|
169
|
+
alpha=PlotConfig.DATA_ALPHA,
|
|
170
|
+
s=PlotConfig.DATA_POINT_SIZE,
|
|
171
|
+
c=color,
|
|
172
|
+
edgecolors='white',
|
|
173
|
+
linewidth=PlotConfig.DATA_LINEWIDTH,
|
|
174
|
+
marker=marker,
|
|
175
|
+
label='Data Points')
|
|
176
|
+
ax.scatter(outliers_x,
|
|
177
|
+
outliers_y,
|
|
178
|
+
alpha=PlotConfig.OUTLIER_ALPHA,
|
|
179
|
+
s=PlotConfig.OUTLIER_POINT_SIZE,
|
|
180
|
+
c=outlier_color,
|
|
181
|
+
marker=outlier_marker,
|
|
182
|
+
label='Removed Outliers')
|
|
183
|
+
else:
|
|
184
|
+
# No outliers plot all data points
|
|
185
|
+
ax.scatter(x,
|
|
186
|
+
y,
|
|
187
|
+
alpha=PlotConfig.DATA_ALPHA,
|
|
188
|
+
s=PlotConfig.DATA_POINT_SIZE,
|
|
189
|
+
c=color,
|
|
190
|
+
edgecolors='white',
|
|
191
|
+
linewidth=PlotConfig.DATA_LINEWIDTH,
|
|
192
|
+
marker=marker,
|
|
193
|
+
label='Data Points')
|
|
194
|
+
|
|
195
|
+
# Plot trend line using the fit
|
|
196
|
+
x_fit = np.linspace(x.min(), x.max(), PlotConfig.TREND_LINE_POINTS)
|
|
197
|
+
y_fit = fit.slope * x_fit + fit.intercept
|
|
198
|
+
ax.plot(x_fit,
|
|
199
|
+
y_fit,
|
|
200
|
+
trend_linestyle,
|
|
201
|
+
alpha=trend_alpha,
|
|
202
|
+
linewidth=trend_linewidth,
|
|
203
|
+
color=trend_color,
|
|
204
|
+
label=f'Trend (slope={fit.slope:.4f}, R²={fit.r_squared:.3f})')
|
|
205
|
+
|
|
206
|
+
if sla_value > 0:
|
|
207
|
+
ax.axhline(y=sla_value,
|
|
208
|
+
color=PlotConfig.SLA_COLOR,
|
|
209
|
+
linestyle=':',
|
|
210
|
+
alpha=PlotConfig.SLA_ALPHA,
|
|
211
|
+
linewidth=2,
|
|
212
|
+
label=sla_label or f'SLA Threshold ({sla_value}s)')
|
|
213
|
+
|
|
214
|
+
ax.set_xlabel('Concurrency', fontsize=PlotConfig.AXIS_LABEL_FONTSIZE, fontweight=PlotConfig.AXIS_LABEL_FONTWEIGHT)
|
|
215
|
+
ax.set_ylabel(y_label, fontsize=PlotConfig.AXIS_LABEL_FONTSIZE, fontweight=PlotConfig.AXIS_LABEL_FONTWEIGHT)
|
|
216
|
+
ax.set_title(title, fontsize=PlotConfig.TITLE_FONTSIZE, fontweight=PlotConfig.TITLE_FONTWEIGHT)
|
|
217
|
+
ax.grid(True, alpha=PlotConfig.GRID_ALPHA)
|
|
218
|
+
ax.legend(fontsize=legend_fontsize)
|
|
219
|
+
if outliers_note:
|
|
220
|
+
ax.text(PlotConfig.NOTE_X_POS,
|
|
221
|
+
PlotConfig.NOTE_Y_POS,
|
|
222
|
+
outliers_note,
|
|
223
|
+
transform=ax.transAxes,
|
|
224
|
+
fontsize=PlotConfig.NOTE_FONTSIZE,
|
|
225
|
+
color=note_text_color,
|
|
226
|
+
ha='right',
|
|
227
|
+
va='bottom',
|
|
228
|
+
bbox=dict(boxstyle=f'round,pad={PlotConfig.NOTE_BOX_PAD}',
|
|
229
|
+
facecolor=note_box_color,
|
|
230
|
+
alpha=PlotConfig.NOTE_BOX_ALPHA))
|
|
231
|
+
|
|
232
|
+
return True
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def plot_concurrency_vs_time_metrics(metrics_per_concurrency: dict[int, SizingMetrics],
|
|
236
|
+
output_dir: Path,
|
|
237
|
+
target_llm_latency: float = 0.0,
|
|
238
|
+
target_runtime: float = 0.0,
|
|
239
|
+
llm_latency_fit: LinearFitResult | None = None,
|
|
240
|
+
runtime_fit: LinearFitResult | None = None) -> None:
|
|
241
|
+
"""
|
|
242
|
+
Plot concurrency vs. p95 latency and workflow runtime using metrics_per_concurrency.
|
|
243
|
+
Enhanced with better styling, trend analysis, and annotations.
|
|
244
|
+
Only plots valid runs and requires pre-computed fits.
|
|
245
|
+
"""
|
|
246
|
+
rows = []
|
|
247
|
+
|
|
248
|
+
for concurrency, metrics in metrics_per_concurrency.items():
|
|
249
|
+
llm_latency = metrics.llm_latency_p95
|
|
250
|
+
workflow_runtime = metrics.workflow_runtime_p95
|
|
251
|
+
|
|
252
|
+
rows.append({
|
|
253
|
+
"concurrency": concurrency, "llm_latency_p95": llm_latency, "workflow_runtime_p95": workflow_runtime
|
|
254
|
+
})
|
|
255
|
+
|
|
256
|
+
if not rows:
|
|
257
|
+
logger.warning("No valid metrics data available to plot.")
|
|
258
|
+
return
|
|
259
|
+
|
|
260
|
+
plt.style.use('seaborn-v0_8')
|
|
261
|
+
df = pd.DataFrame(rows).sort_values("concurrency")
|
|
262
|
+
|
|
263
|
+
# Always generate simple plot first
|
|
264
|
+
plot_concurrency_vs_time_metrics_simple(df, output_dir)
|
|
265
|
+
|
|
266
|
+
# Check if we have fits available for analysis plots
|
|
267
|
+
has_llm_latency_fit = llm_latency_fit is not None
|
|
268
|
+
has_runtime_fit = runtime_fit is not None
|
|
269
|
+
|
|
270
|
+
if not has_llm_latency_fit and not has_runtime_fit:
|
|
271
|
+
logger.warning("No linear fits available for analysis plots, skipping enhanced plot")
|
|
272
|
+
return
|
|
273
|
+
|
|
274
|
+
# Create subplots based on available fits
|
|
275
|
+
if has_llm_latency_fit and has_runtime_fit:
|
|
276
|
+
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=PlotConfig.ENHANCED_FIGSIZE)
|
|
277
|
+
else:
|
|
278
|
+
fig, ax1 = plt.subplots(1, 1, figsize=(8, 6))
|
|
279
|
+
ax2 = None
|
|
280
|
+
|
|
281
|
+
# Plot llm_latency if fit is available
|
|
282
|
+
llm_latency_plotted = False
|
|
283
|
+
if has_llm_latency_fit:
|
|
284
|
+
llm_latency_plotted = plot_metric_vs_concurrency_with_optional_fit(
|
|
285
|
+
ax1,
|
|
286
|
+
df["concurrency"].to_numpy(),
|
|
287
|
+
df["llm_latency_p95"].to_numpy(),
|
|
288
|
+
metric_name="llm_latency",
|
|
289
|
+
y_label='P95 LLM Latency (seconds)',
|
|
290
|
+
title='Concurrency vs P95 LLM Latency',
|
|
291
|
+
color=PlotConfig.LLM_LATENCY_COLOR,
|
|
292
|
+
sla_value=target_llm_latency,
|
|
293
|
+
sla_label=f'SLA Threshold ({target_llm_latency}s)' if target_llm_latency > 0 else None,
|
|
294
|
+
fit=llm_latency_fit,
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
# Plot runtime if fit is available
|
|
298
|
+
runtime_plotted = False
|
|
299
|
+
if has_runtime_fit and ax2 is not None:
|
|
300
|
+
runtime_plotted = plot_metric_vs_concurrency_with_optional_fit(
|
|
301
|
+
ax2,
|
|
302
|
+
df["concurrency"].to_numpy(),
|
|
303
|
+
df["workflow_runtime_p95"].to_numpy(),
|
|
304
|
+
metric_name="runtime",
|
|
305
|
+
y_label='P95 Workflow Runtime (seconds)',
|
|
306
|
+
title='Concurrency vs P95 Workflow Runtime',
|
|
307
|
+
color=PlotConfig.RUNTIME_COLOR,
|
|
308
|
+
sla_value=target_runtime,
|
|
309
|
+
sla_label=f'SLA Threshold ({target_runtime}s)' if target_runtime > 0 else None,
|
|
310
|
+
fit=runtime_fit,
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
# Check if any plots were successfully created
|
|
314
|
+
plots_created = (llm_latency_plotted or runtime_plotted)
|
|
315
|
+
|
|
316
|
+
if not plots_created:
|
|
317
|
+
logger.warning("No analysis plots could be created, skipping enhanced plot")
|
|
318
|
+
plt.close(fig)
|
|
319
|
+
return
|
|
320
|
+
|
|
321
|
+
# Add summary statistics
|
|
322
|
+
stats_text = f'Data Points: {len(df)}\n'
|
|
323
|
+
stats_text += f'LLM Latency Range: {df["llm_latency_p95"].min():.3f}-{df["llm_latency_p95"].max():.3f}s\n'
|
|
324
|
+
stats_text += f'WF Runtime Range: {df["workflow_runtime_p95"].min():.3f}-{df["workflow_runtime_p95"].max():.3f}s'
|
|
325
|
+
|
|
326
|
+
fig.text(PlotConfig.STATS_X_POS,
|
|
327
|
+
PlotConfig.STATS_Y_POS,
|
|
328
|
+
stats_text,
|
|
329
|
+
fontsize=PlotConfig.STATS_FONTSIZE,
|
|
330
|
+
bbox=dict(boxstyle=f'round,pad={PlotConfig.STATS_BOX_PAD}',
|
|
331
|
+
facecolor=PlotConfig.STATS_BOX_COLOR,
|
|
332
|
+
alpha=PlotConfig.STATS_BOX_ALPHA))
|
|
333
|
+
|
|
334
|
+
plt.tight_layout()
|
|
335
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
336
|
+
|
|
337
|
+
enhanced_plot_path = output_dir / "concurrency_vs_p95_analysis.png"
|
|
338
|
+
plt.savefig(enhanced_plot_path,
|
|
339
|
+
dpi=PlotConfig.ENHANCED_DPI,
|
|
340
|
+
bbox_inches='tight',
|
|
341
|
+
facecolor='white',
|
|
342
|
+
edgecolor='none')
|
|
343
|
+
plt.close()
|
|
344
|
+
|
|
345
|
+
logger.info("Enhanced plot saved to %s", enhanced_plot_path)
|
|
@@ -34,6 +34,7 @@ from aiq.builder.framework_enum import LLMFrameworkEnum
|
|
|
34
34
|
from aiq.data_models.intermediate_step import IntermediateStepPayload
|
|
35
35
|
from aiq.data_models.intermediate_step import IntermediateStepType
|
|
36
36
|
from aiq.data_models.intermediate_step import StreamEventData
|
|
37
|
+
from aiq.data_models.intermediate_step import ToolSchema
|
|
37
38
|
from aiq.data_models.intermediate_step import TraceMetadata
|
|
38
39
|
from aiq.data_models.intermediate_step import UsageInfo
|
|
39
40
|
from aiq.profiler.callbacks.base_callback_class import BaseProfilerCallback
|
|
@@ -42,6 +43,16 @@ from aiq.profiler.callbacks.token_usage_base_model import TokenUsageBaseModel
|
|
|
42
43
|
logger = logging.getLogger(__name__)
|
|
43
44
|
|
|
44
45
|
|
|
46
|
+
def _extract_tools_schema(invocation_params: dict) -> list:
|
|
47
|
+
|
|
48
|
+
tools_schema = []
|
|
49
|
+
if invocation_params is not None:
|
|
50
|
+
for tool in invocation_params.get("tools", []):
|
|
51
|
+
tools_schema.append(ToolSchema(**tool))
|
|
52
|
+
|
|
53
|
+
return tools_schema
|
|
54
|
+
|
|
55
|
+
|
|
45
56
|
class LangchainProfilerHandler(AsyncCallbackHandler, BaseProfilerCallback): # pylint: disable=R0901
|
|
46
57
|
"""Callback Handler that tracks NIM info."""
|
|
47
58
|
|
|
@@ -138,16 +149,17 @@ class LangchainProfilerHandler(AsyncCallbackHandler, BaseProfilerCallback): # p
|
|
|
138
149
|
run_id = str(run_id)
|
|
139
150
|
self._run_id_to_model_name[run_id] = model_name
|
|
140
151
|
|
|
141
|
-
stats = IntermediateStepPayload(
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
152
|
+
stats = IntermediateStepPayload(
|
|
153
|
+
event_type=IntermediateStepType.LLM_START,
|
|
154
|
+
framework=LLMFrameworkEnum.LANGCHAIN,
|
|
155
|
+
name=model_name,
|
|
156
|
+
UUID=run_id,
|
|
157
|
+
data=StreamEventData(input=copy.deepcopy(messages[0])),
|
|
158
|
+
metadata=TraceMetadata(chat_inputs=copy.deepcopy(messages[0]),
|
|
159
|
+
tools_schema=_extract_tools_schema(kwargs.get("invocation_params", {}))),
|
|
160
|
+
usage_info=UsageInfo(token_usage=TokenUsageBaseModel(),
|
|
161
|
+
num_llm_calls=1,
|
|
162
|
+
seconds_between_calls=int(time.time() - self.last_call_ts)))
|
|
151
163
|
|
|
152
164
|
self.step_manager.push_intermediate_step(stats)
|
|
153
165
|
self._run_id_to_llm_input[run_id] = messages[0][-1].content
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
from pydantic import BaseModel
|
|
17
|
+
|
|
18
|
+
from aiq.profiler.inference_metrics_model import InferenceMetricsModel
|
|
19
|
+
from aiq.profiler.inference_optimization.data_models import WorkflowRuntimeMetrics
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ProfilerResults(BaseModel):
|
|
23
|
+
workflow_runtime_metrics: WorkflowRuntimeMetrics | None = None
|
|
24
|
+
llm_latency_ci: InferenceMetricsModel | None = None
|
|
@@ -23,3 +23,6 @@ class InferenceMetricsModel(BaseModel):
|
|
|
23
23
|
ninetieth_interval: tuple[float, float] = Field(default=(0, 0), description="90% confidence interval")
|
|
24
24
|
ninety_fifth_interval: tuple[float, float] = Field(default=(0, 0), description="95% confidence interval")
|
|
25
25
|
ninety_ninth_interval: tuple[float, float] = Field(default=(0, 0), description="99% confidence interval")
|
|
26
|
+
p90: float = Field(default=0, description="90th percentile of the samples")
|
|
27
|
+
p95: float = Field(default=0, description="95th percentile of the samples")
|
|
28
|
+
p99: float = Field(default=0, description="99th percentile of the samples")
|
|
@@ -69,13 +69,20 @@ def build_call_tree_for_example(example_df: pd.DataFrame) -> list[CallNode]:
|
|
|
69
69
|
return "LLM"
|
|
70
70
|
if evt.startswith("TOOL_"):
|
|
71
71
|
return "TOOL"
|
|
72
|
+
if evt.startswith("FUNCTION_"):
|
|
73
|
+
return "FUNCTION"
|
|
74
|
+
if evt.startswith("SPAN_"):
|
|
75
|
+
return "FUNCTION"
|
|
72
76
|
return None
|
|
73
77
|
|
|
74
78
|
def get_op_name(row: pd.Series, op_type: str) -> str:
|
|
75
79
|
if op_type == "LLM":
|
|
76
80
|
return row.get("llm_name") or "unknown_llm"
|
|
81
|
+
if op_type == "FUNCTION":
|
|
82
|
+
return row.get("function_name") or "unknown_function"
|
|
77
83
|
if op_type == "TOOL":
|
|
78
84
|
return row.get("tool_name") or "unknown_tool"
|
|
85
|
+
|
|
79
86
|
return "unknown_op"
|
|
80
87
|
|
|
81
88
|
for _, row in example_df.iterrows():
|
|
@@ -309,6 +316,7 @@ def save_gantt_chart(all_nodes: list[CallNode], output_path: str) -> None:
|
|
|
309
316
|
color_map = {
|
|
310
317
|
"LLM": "tab:blue",
|
|
311
318
|
"TOOL": "tab:green",
|
|
319
|
+
"FUNCTION": "tab:orange",
|
|
312
320
|
}
|
|
313
321
|
default_color = "tab:gray"
|
|
314
322
|
|
|
@@ -220,7 +220,7 @@ class CallNode(BaseModel):
|
|
|
220
220
|
return "\n".join([info] + child_strs)
|
|
221
221
|
|
|
222
222
|
|
|
223
|
-
CallNode.
|
|
223
|
+
CallNode.model_rebuild()
|
|
224
224
|
|
|
225
225
|
|
|
226
226
|
class NodeMetrics(BaseModel):
|
|
@@ -296,7 +296,7 @@ class ConcurrencyCallNode(CallNode):
|
|
|
296
296
|
llm_text_output: str | None = None
|
|
297
297
|
|
|
298
298
|
|
|
299
|
-
ConcurrencyCallNode.
|
|
299
|
+
ConcurrencyCallNode.model_rebuild()
|
|
300
300
|
|
|
301
301
|
|
|
302
302
|
class ConcurrencySpikeInfo(BaseModel):
|
|
@@ -176,8 +176,8 @@ class LLMMetrics:
|
|
|
176
176
|
return subdf
|
|
177
177
|
|
|
178
178
|
# Apply the group metrics
|
|
179
|
-
|
|
180
|
-
|
|
179
|
+
df_group = df.groupby(['example_number', 'function_name'], group_keys=False)
|
|
180
|
+
df = df_group[df.columns].apply(_compute_group_metrics).sort_index()
|
|
181
181
|
|
|
182
182
|
# ---------------------------------------------------------------------
|
|
183
183
|
# 5. NOVA-Predicted-OSL
|
aiq/profiler/profile_runner.py
CHANGED
|
@@ -25,6 +25,7 @@ from pydantic import BaseModel
|
|
|
25
25
|
|
|
26
26
|
from aiq.data_models.evaluate import ProfilerConfig
|
|
27
27
|
from aiq.data_models.intermediate_step import IntermediateStep
|
|
28
|
+
from aiq.profiler.data_models import ProfilerResults
|
|
28
29
|
from aiq.profiler.forecasting.model_trainer import ModelTrainer
|
|
29
30
|
from aiq.profiler.inference_metrics_model import InferenceMetricsModel
|
|
30
31
|
from aiq.profiler.utils import create_standardized_dataframe
|
|
@@ -67,9 +68,10 @@ class ProfilerRunner:
|
|
|
67
68
|
All computed metrics are saved to a metrics JSON file at the end.
|
|
68
69
|
"""
|
|
69
70
|
|
|
70
|
-
def __init__(self, profiler_config: ProfilerConfig, output_dir: Path):
|
|
71
|
+
def __init__(self, profiler_config: ProfilerConfig, output_dir: Path, write_output: bool = True):
|
|
71
72
|
self.profile_config = profiler_config
|
|
72
73
|
self.output_dir = output_dir
|
|
74
|
+
self.write_output = write_output
|
|
73
75
|
self._converter = TypeConverter([])
|
|
74
76
|
|
|
75
77
|
# Holds per-request data (prompt, output, usage_stats, etc.)
|
|
@@ -80,7 +82,7 @@ class ProfilerRunner:
|
|
|
80
82
|
# Ensure output directory
|
|
81
83
|
os.makedirs(output_dir, exist_ok=True)
|
|
82
84
|
|
|
83
|
-
async def run(self, all_steps: list[list[IntermediateStep]]):
|
|
85
|
+
async def run(self, all_steps: list[list[IntermediateStep]]) -> ProfilerResults:
|
|
84
86
|
"""
|
|
85
87
|
Main entrypoint: Works on Input DataFrame generated from eval to fit forecasting model,
|
|
86
88
|
writes out combined requests JSON, then computes and saves additional metrics,
|
|
@@ -113,10 +115,11 @@ class ProfilerRunner:
|
|
|
113
115
|
self.all_requests_data.append({"request_number": i, "intermediate_steps": request_data})
|
|
114
116
|
|
|
115
117
|
# Write the final big JSON (all requests)
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
118
|
+
if self.write_output:
|
|
119
|
+
final_path = os.path.join(self.output_dir, "all_requests_profiler_traces.json")
|
|
120
|
+
with open(final_path, 'w', encoding='utf-8') as f:
|
|
121
|
+
json.dump(self.all_requests_data, f, indent=2, default=str)
|
|
122
|
+
logger.info("Wrote combined data to: %s", final_path)
|
|
120
123
|
|
|
121
124
|
# ------------------------------------------------------------
|
|
122
125
|
# Generate one standardized dataframe for all usage stats
|
|
@@ -171,7 +174,7 @@ class ProfilerRunner:
|
|
|
171
174
|
uniqueness = compute_inter_query_token_uniqueness_by_llm(all_steps)
|
|
172
175
|
token_uniqueness_results = uniqueness
|
|
173
176
|
|
|
174
|
-
if self.profile_config.workflow_runtime_forecast:
|
|
177
|
+
if self.profile_config.workflow_runtime_forecast or self.profile_config.base_metrics:
|
|
175
178
|
# ------------------------------------------------------------
|
|
176
179
|
# Compute and save workflow runtime metrics
|
|
177
180
|
# ------------------------------------------------------------
|
|
@@ -184,7 +187,7 @@ class ProfilerRunner:
|
|
|
184
187
|
token_uniqueness=token_uniqueness_results,
|
|
185
188
|
workflow_runtimes=workflow_runtimes_results)
|
|
186
189
|
|
|
187
|
-
if inference_optimization_results:
|
|
190
|
+
if self.write_output and inference_optimization_results:
|
|
188
191
|
# Save to JSON
|
|
189
192
|
optimization_results_path = os.path.join(self.output_dir, "inference_optimization.json")
|
|
190
193
|
with open(optimization_results_path, 'w', encoding='utf-8') as f:
|
|
@@ -248,14 +251,14 @@ class ProfilerRunner:
|
|
|
248
251
|
exclude=["textual_report"])
|
|
249
252
|
logger.info("Prefix span analysis complete")
|
|
250
253
|
|
|
251
|
-
if workflow_profiling_reports:
|
|
254
|
+
if self.write_output and workflow_profiling_reports:
|
|
252
255
|
# Save to text file
|
|
253
256
|
profiling_report_path = os.path.join(self.output_dir, "workflow_profiling_report.txt")
|
|
254
257
|
with open(profiling_report_path, 'w', encoding='utf-8') as f:
|
|
255
258
|
f.write(workflow_profiling_reports)
|
|
256
259
|
logger.info("Wrote workflow profiling report to: %s", profiling_report_path)
|
|
257
260
|
|
|
258
|
-
if workflow_profiling_metrics:
|
|
261
|
+
if self.write_output and workflow_profiling_metrics:
|
|
259
262
|
# Save to JSON
|
|
260
263
|
profiling_metrics_path = os.path.join(self.output_dir, "workflow_profiling_metrics.json")
|
|
261
264
|
with open(profiling_metrics_path, 'w', encoding='utf-8') as f:
|
|
@@ -275,16 +278,19 @@ class ProfilerRunner:
|
|
|
275
278
|
logger.info("Fitted model for forecasting.")
|
|
276
279
|
except Exception as e:
|
|
277
280
|
logger.exception("Fitting model failed. %s", e, exc_info=True)
|
|
278
|
-
return
|
|
281
|
+
return ProfilerResults()
|
|
279
282
|
|
|
280
|
-
|
|
283
|
+
if self.write_output:
|
|
284
|
+
os.makedirs(self.output_dir, exist_ok=True)
|
|
281
285
|
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
286
|
+
import pickle
|
|
287
|
+
with open(os.path.join(self.output_dir, "fitted_model.pkl"), 'wb') as f:
|
|
288
|
+
pickle.dump(fitted_model, f)
|
|
285
289
|
|
|
286
290
|
logger.info("Saved fitted model to disk.")
|
|
287
291
|
|
|
292
|
+
return ProfilerResults(workflow_runtime_metrics=workflow_runtimes_results, llm_latency_ci=llm_latency_ci)
|
|
293
|
+
|
|
288
294
|
# -------------------------------------------------------------------
|
|
289
295
|
# Confidence Intervals / Metrics
|
|
290
296
|
# -------------------------------------------------------------------
|
|
@@ -391,7 +397,8 @@ class ProfilerRunner:
|
|
|
391
397
|
|
|
392
398
|
def _compute_confidence_intervals(self, data: list[float], metric_name: str) -> InferenceMetricsModel:
|
|
393
399
|
"""
|
|
394
|
-
Helper to compute 90, 95, 99% confidence intervals
|
|
400
|
+
Helper to compute 90, 95, 99 % confidence intervals **and** the empirical
|
|
401
|
+
90th/95th/99th percentiles (p90/p95/p99) for the mean of a dataset.
|
|
395
402
|
Uses a z-score from the normal approximation for large samples.
|
|
396
403
|
|
|
397
404
|
Returns a dict like::
|
|
@@ -409,11 +416,16 @@ class ProfilerRunner:
|
|
|
409
416
|
n = len(data)
|
|
410
417
|
mean_val = statistics.mean(data)
|
|
411
418
|
if n <= 1:
|
|
412
|
-
return InferenceMetricsModel(
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
419
|
+
return InferenceMetricsModel(
|
|
420
|
+
n=n,
|
|
421
|
+
mean=mean_val,
|
|
422
|
+
ninetieth_interval=(mean_val, mean_val),
|
|
423
|
+
ninety_fifth_interval=(mean_val, mean_val),
|
|
424
|
+
ninety_ninth_interval=(mean_val, mean_val),
|
|
425
|
+
p90=mean_val,
|
|
426
|
+
p95=mean_val,
|
|
427
|
+
p99=mean_val,
|
|
428
|
+
)
|
|
417
429
|
|
|
418
430
|
stdev_val = statistics.pstdev(data) # population stdev or use stdev for sample
|
|
419
431
|
# standard error
|
|
@@ -430,4 +442,32 @@ class ProfilerRunner:
|
|
|
430
442
|
# Optionally, store more info
|
|
431
443
|
intervals["n"] = n
|
|
432
444
|
intervals["mean"] = mean_val
|
|
445
|
+
|
|
446
|
+
# ------------------------------------------------------------------
|
|
447
|
+
# Percentiles
|
|
448
|
+
# ------------------------------------------------------------------
|
|
449
|
+
sorted_data = sorted(data)
|
|
450
|
+
|
|
451
|
+
def _percentile(arr: list[float], pct: float) -> float:
|
|
452
|
+
"""
|
|
453
|
+
Linear interpolation between closest ranks.
|
|
454
|
+
pct is given from 0‑100 (e.g. 90 for p90).
|
|
455
|
+
"""
|
|
456
|
+
if not arr:
|
|
457
|
+
return 0.0
|
|
458
|
+
k = (len(arr) - 1) * (pct / 100.0)
|
|
459
|
+
f = math.floor(k)
|
|
460
|
+
c = math.ceil(k)
|
|
461
|
+
if f == c:
|
|
462
|
+
return arr[int(k)]
|
|
463
|
+
return arr[f] + (arr[c] - arr[f]) * (k - f)
|
|
464
|
+
|
|
465
|
+
p90_val = _percentile(sorted_data, 90)
|
|
466
|
+
p95_val = _percentile(sorted_data, 95)
|
|
467
|
+
p99_val = _percentile(sorted_data, 99)
|
|
468
|
+
|
|
469
|
+
intervals["p90"] = p90_val
|
|
470
|
+
intervals["p95"] = p95_val
|
|
471
|
+
intervals["p99"] = p99_val
|
|
472
|
+
|
|
433
473
|
return InferenceMetricsModel(**intervals)
|