aiqtoolkit 1.2.0.dev0__py3-none-any.whl → 1.2.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiqtoolkit might be problematic. Click here for more details.

Files changed (220) hide show
  1. aiq/agent/base.py +170 -8
  2. aiq/agent/dual_node.py +1 -1
  3. aiq/agent/react_agent/agent.py +146 -112
  4. aiq/agent/react_agent/prompt.py +1 -6
  5. aiq/agent/react_agent/register.py +36 -35
  6. aiq/agent/rewoo_agent/agent.py +36 -35
  7. aiq/agent/rewoo_agent/register.py +2 -2
  8. aiq/agent/tool_calling_agent/agent.py +3 -7
  9. aiq/agent/tool_calling_agent/register.py +1 -1
  10. aiq/authentication/__init__.py +14 -0
  11. aiq/authentication/api_key/__init__.py +14 -0
  12. aiq/authentication/api_key/api_key_auth_provider.py +92 -0
  13. aiq/authentication/api_key/api_key_auth_provider_config.py +124 -0
  14. aiq/authentication/api_key/register.py +26 -0
  15. aiq/authentication/exceptions/__init__.py +14 -0
  16. aiq/authentication/exceptions/api_key_exceptions.py +38 -0
  17. aiq/authentication/exceptions/auth_code_grant_exceptions.py +86 -0
  18. aiq/authentication/exceptions/call_back_exceptions.py +38 -0
  19. aiq/authentication/exceptions/request_exceptions.py +54 -0
  20. aiq/authentication/http_basic_auth/__init__.py +0 -0
  21. aiq/authentication/http_basic_auth/http_basic_auth_provider.py +81 -0
  22. aiq/authentication/http_basic_auth/register.py +30 -0
  23. aiq/authentication/interfaces.py +93 -0
  24. aiq/authentication/oauth2/__init__.py +14 -0
  25. aiq/authentication/oauth2/oauth2_auth_code_flow_provider.py +107 -0
  26. aiq/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +39 -0
  27. aiq/authentication/oauth2/register.py +25 -0
  28. aiq/authentication/register.py +21 -0
  29. aiq/builder/builder.py +64 -2
  30. aiq/builder/component_utils.py +16 -3
  31. aiq/builder/context.py +37 -0
  32. aiq/builder/eval_builder.py +43 -2
  33. aiq/builder/function.py +44 -12
  34. aiq/builder/function_base.py +1 -1
  35. aiq/builder/intermediate_step_manager.py +6 -8
  36. aiq/builder/user_interaction_manager.py +3 -0
  37. aiq/builder/workflow.py +23 -18
  38. aiq/builder/workflow_builder.py +421 -61
  39. aiq/cli/commands/info/list_mcp.py +103 -16
  40. aiq/cli/commands/sizing/__init__.py +14 -0
  41. aiq/cli/commands/sizing/calc.py +294 -0
  42. aiq/cli/commands/sizing/sizing.py +27 -0
  43. aiq/cli/commands/start.py +2 -1
  44. aiq/cli/entrypoint.py +2 -0
  45. aiq/cli/register_workflow.py +80 -0
  46. aiq/cli/type_registry.py +151 -30
  47. aiq/data_models/api_server.py +124 -12
  48. aiq/data_models/authentication.py +231 -0
  49. aiq/data_models/common.py +35 -7
  50. aiq/data_models/component.py +17 -9
  51. aiq/data_models/component_ref.py +33 -0
  52. aiq/data_models/config.py +60 -3
  53. aiq/data_models/dataset_handler.py +2 -1
  54. aiq/data_models/embedder.py +1 -0
  55. aiq/data_models/evaluate.py +23 -0
  56. aiq/data_models/function_dependencies.py +8 -0
  57. aiq/data_models/interactive.py +10 -1
  58. aiq/data_models/intermediate_step.py +38 -5
  59. aiq/data_models/its_strategy.py +30 -0
  60. aiq/data_models/llm.py +1 -0
  61. aiq/data_models/memory.py +1 -0
  62. aiq/data_models/object_store.py +44 -0
  63. aiq/data_models/profiler.py +1 -0
  64. aiq/data_models/retry_mixin.py +35 -0
  65. aiq/data_models/span.py +187 -0
  66. aiq/data_models/telemetry_exporter.py +2 -2
  67. aiq/embedder/nim_embedder.py +2 -1
  68. aiq/embedder/openai_embedder.py +2 -1
  69. aiq/eval/config.py +19 -1
  70. aiq/eval/dataset_handler/dataset_handler.py +87 -2
  71. aiq/eval/evaluate.py +208 -27
  72. aiq/eval/evaluator/base_evaluator.py +73 -0
  73. aiq/eval/evaluator/evaluator_model.py +1 -0
  74. aiq/eval/intermediate_step_adapter.py +11 -5
  75. aiq/eval/rag_evaluator/evaluate.py +55 -15
  76. aiq/eval/rag_evaluator/register.py +6 -1
  77. aiq/eval/remote_workflow.py +7 -2
  78. aiq/eval/runners/__init__.py +14 -0
  79. aiq/eval/runners/config.py +39 -0
  80. aiq/eval/runners/multi_eval_runner.py +54 -0
  81. aiq/eval/trajectory_evaluator/evaluate.py +22 -65
  82. aiq/eval/tunable_rag_evaluator/evaluate.py +150 -168
  83. aiq/eval/tunable_rag_evaluator/register.py +2 -0
  84. aiq/eval/usage_stats.py +41 -0
  85. aiq/eval/utils/output_uploader.py +10 -1
  86. aiq/eval/utils/weave_eval.py +184 -0
  87. aiq/experimental/__init__.py +0 -0
  88. aiq/experimental/decorators/__init__.py +0 -0
  89. aiq/experimental/decorators/experimental_warning_decorator.py +130 -0
  90. aiq/experimental/inference_time_scaling/__init__.py +0 -0
  91. aiq/experimental/inference_time_scaling/editing/__init__.py +0 -0
  92. aiq/experimental/inference_time_scaling/editing/iterative_plan_refinement_editor.py +147 -0
  93. aiq/experimental/inference_time_scaling/editing/llm_as_a_judge_editor.py +204 -0
  94. aiq/experimental/inference_time_scaling/editing/motivation_aware_summarization.py +107 -0
  95. aiq/experimental/inference_time_scaling/functions/__init__.py +0 -0
  96. aiq/experimental/inference_time_scaling/functions/execute_score_select_function.py +105 -0
  97. aiq/experimental/inference_time_scaling/functions/its_tool_orchestration_function.py +205 -0
  98. aiq/experimental/inference_time_scaling/functions/its_tool_wrapper_function.py +146 -0
  99. aiq/experimental/inference_time_scaling/functions/plan_select_execute_function.py +224 -0
  100. aiq/experimental/inference_time_scaling/models/__init__.py +0 -0
  101. aiq/experimental/inference_time_scaling/models/editor_config.py +132 -0
  102. aiq/experimental/inference_time_scaling/models/its_item.py +48 -0
  103. aiq/experimental/inference_time_scaling/models/scoring_config.py +112 -0
  104. aiq/experimental/inference_time_scaling/models/search_config.py +120 -0
  105. aiq/experimental/inference_time_scaling/models/selection_config.py +154 -0
  106. aiq/experimental/inference_time_scaling/models/stage_enums.py +43 -0
  107. aiq/experimental/inference_time_scaling/models/strategy_base.py +66 -0
  108. aiq/experimental/inference_time_scaling/models/tool_use_config.py +41 -0
  109. aiq/experimental/inference_time_scaling/register.py +36 -0
  110. aiq/experimental/inference_time_scaling/scoring/__init__.py +0 -0
  111. aiq/experimental/inference_time_scaling/scoring/llm_based_agent_scorer.py +168 -0
  112. aiq/experimental/inference_time_scaling/scoring/llm_based_plan_scorer.py +168 -0
  113. aiq/experimental/inference_time_scaling/scoring/motivation_aware_scorer.py +111 -0
  114. aiq/experimental/inference_time_scaling/search/__init__.py +0 -0
  115. aiq/experimental/inference_time_scaling/search/multi_llm_planner.py +128 -0
  116. aiq/experimental/inference_time_scaling/search/multi_query_retrieval_search.py +122 -0
  117. aiq/experimental/inference_time_scaling/search/single_shot_multi_plan_planner.py +128 -0
  118. aiq/experimental/inference_time_scaling/selection/__init__.py +0 -0
  119. aiq/experimental/inference_time_scaling/selection/best_of_n_selector.py +63 -0
  120. aiq/experimental/inference_time_scaling/selection/llm_based_agent_output_selector.py +131 -0
  121. aiq/experimental/inference_time_scaling/selection/llm_based_output_merging_selector.py +159 -0
  122. aiq/experimental/inference_time_scaling/selection/llm_based_plan_selector.py +128 -0
  123. aiq/experimental/inference_time_scaling/selection/threshold_selector.py +58 -0
  124. aiq/front_ends/console/authentication_flow_handler.py +233 -0
  125. aiq/front_ends/console/console_front_end_plugin.py +11 -2
  126. aiq/front_ends/fastapi/auth_flow_handlers/__init__.py +0 -0
  127. aiq/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +27 -0
  128. aiq/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +107 -0
  129. aiq/front_ends/fastapi/fastapi_front_end_config.py +93 -9
  130. aiq/front_ends/fastapi/fastapi_front_end_controller.py +68 -0
  131. aiq/front_ends/fastapi/fastapi_front_end_plugin.py +14 -1
  132. aiq/front_ends/fastapi/fastapi_front_end_plugin_worker.py +537 -52
  133. aiq/front_ends/fastapi/html_snippets/__init__.py +14 -0
  134. aiq/front_ends/fastapi/html_snippets/auth_code_grant_success.py +35 -0
  135. aiq/front_ends/fastapi/job_store.py +47 -25
  136. aiq/front_ends/fastapi/main.py +2 -0
  137. aiq/front_ends/fastapi/message_handler.py +108 -89
  138. aiq/front_ends/fastapi/step_adaptor.py +2 -1
  139. aiq/llm/aws_bedrock_llm.py +57 -0
  140. aiq/llm/nim_llm.py +2 -1
  141. aiq/llm/openai_llm.py +3 -2
  142. aiq/llm/register.py +1 -0
  143. aiq/meta/pypi.md +12 -12
  144. aiq/object_store/__init__.py +20 -0
  145. aiq/object_store/in_memory_object_store.py +74 -0
  146. aiq/object_store/interfaces.py +84 -0
  147. aiq/object_store/models.py +36 -0
  148. aiq/object_store/register.py +20 -0
  149. aiq/observability/__init__.py +14 -0
  150. aiq/observability/exporter/__init__.py +14 -0
  151. aiq/observability/exporter/base_exporter.py +449 -0
  152. aiq/observability/exporter/exporter.py +78 -0
  153. aiq/observability/exporter/file_exporter.py +33 -0
  154. aiq/observability/exporter/processing_exporter.py +269 -0
  155. aiq/observability/exporter/raw_exporter.py +52 -0
  156. aiq/observability/exporter/span_exporter.py +264 -0
  157. aiq/observability/exporter_manager.py +335 -0
  158. aiq/observability/mixin/__init__.py +14 -0
  159. aiq/observability/mixin/batch_config_mixin.py +26 -0
  160. aiq/observability/mixin/collector_config_mixin.py +23 -0
  161. aiq/observability/mixin/file_mixin.py +288 -0
  162. aiq/observability/mixin/file_mode.py +23 -0
  163. aiq/observability/mixin/resource_conflict_mixin.py +134 -0
  164. aiq/observability/mixin/serialize_mixin.py +61 -0
  165. aiq/observability/mixin/type_introspection_mixin.py +183 -0
  166. aiq/observability/processor/__init__.py +14 -0
  167. aiq/observability/processor/batching_processor.py +316 -0
  168. aiq/observability/processor/intermediate_step_serializer.py +28 -0
  169. aiq/observability/processor/processor.py +68 -0
  170. aiq/observability/register.py +36 -39
  171. aiq/observability/utils/__init__.py +14 -0
  172. aiq/observability/utils/dict_utils.py +236 -0
  173. aiq/observability/utils/time_utils.py +31 -0
  174. aiq/profiler/calc/__init__.py +14 -0
  175. aiq/profiler/calc/calc_runner.py +623 -0
  176. aiq/profiler/calc/calculations.py +288 -0
  177. aiq/profiler/calc/data_models.py +176 -0
  178. aiq/profiler/calc/plot.py +345 -0
  179. aiq/profiler/callbacks/langchain_callback_handler.py +22 -10
  180. aiq/profiler/data_models.py +24 -0
  181. aiq/profiler/inference_metrics_model.py +3 -0
  182. aiq/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +8 -0
  183. aiq/profiler/inference_optimization/data_models.py +2 -2
  184. aiq/profiler/inference_optimization/llm_metrics.py +2 -2
  185. aiq/profiler/profile_runner.py +61 -21
  186. aiq/runtime/loader.py +9 -3
  187. aiq/runtime/runner.py +23 -9
  188. aiq/runtime/session.py +25 -7
  189. aiq/runtime/user_metadata.py +2 -3
  190. aiq/tool/chat_completion.py +74 -0
  191. aiq/tool/code_execution/README.md +152 -0
  192. aiq/tool/code_execution/code_sandbox.py +151 -72
  193. aiq/tool/code_execution/local_sandbox/.gitignore +1 -0
  194. aiq/tool/code_execution/local_sandbox/local_sandbox_server.py +139 -24
  195. aiq/tool/code_execution/local_sandbox/sandbox.requirements.txt +3 -1
  196. aiq/tool/code_execution/local_sandbox/start_local_sandbox.sh +27 -2
  197. aiq/tool/code_execution/register.py +7 -3
  198. aiq/tool/code_execution/test_code_execution_sandbox.py +414 -0
  199. aiq/tool/mcp/exceptions.py +142 -0
  200. aiq/tool/mcp/mcp_client.py +41 -6
  201. aiq/tool/mcp/mcp_tool.py +3 -2
  202. aiq/tool/register.py +1 -0
  203. aiq/tool/server_tools.py +6 -3
  204. aiq/utils/exception_handlers/automatic_retries.py +289 -0
  205. aiq/utils/exception_handlers/mcp.py +211 -0
  206. aiq/utils/io/model_processing.py +28 -0
  207. aiq/utils/log_utils.py +37 -0
  208. aiq/utils/string_utils.py +38 -0
  209. aiq/utils/type_converter.py +18 -2
  210. aiq/utils/type_utils.py +87 -0
  211. {aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc2.dist-info}/METADATA +53 -21
  212. aiqtoolkit-1.2.0rc2.dist-info/RECORD +436 -0
  213. {aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc2.dist-info}/WHEEL +1 -1
  214. {aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc2.dist-info}/entry_points.txt +3 -0
  215. aiq/front_ends/fastapi/websocket.py +0 -148
  216. aiq/observability/async_otel_listener.py +0 -429
  217. aiqtoolkit-1.2.0.dev0.dist-info/RECORD +0 -316
  218. {aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc2.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
  219. {aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc2.dist-info}/licenses/LICENSE.md +0 -0
  220. {aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,345 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import logging
17
+ from pathlib import Path
18
+
19
+ import matplotlib.pyplot as plt
20
+ import numpy as np
21
+ import pandas as pd
22
+
23
+ from aiq.profiler.calc.data_models import LinearFitResult
24
+ from aiq.profiler.calc.data_models import SizingMetrics
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ # Plotting constants
30
+ class PlotConfig:
31
+ # Simple plot settings
32
+ SIMPLE_FIGSIZE = (12, 6)
33
+ SIMPLE_LINEWIDTH = 2
34
+ SIMPLE_DPI = 150
35
+
36
+ # Enhanced plot settings
37
+ ENHANCED_FIGSIZE = (16, 6)
38
+ ENHANCED_DPI = 300
39
+
40
+ # Marker and styling
41
+ DATA_MARKER = 'o'
42
+ OUTLIER_MARKER = 'x'
43
+ OUTLIER_COLOR = 'crimson'
44
+ TREND_COLOR = 'r'
45
+ TREND_LINESTYLE = '--'
46
+ TREND_ALPHA = 0.8
47
+ TREND_LINEWIDTH = 2.0
48
+
49
+ # Colors
50
+ LLM_LATENCY_COLOR = 'steelblue'
51
+ RUNTIME_COLOR = 'darkgreen'
52
+ SLA_COLOR = 'red'
53
+ NOTE_BOX_COLOR = 'mistyrose'
54
+ NOTE_TEXT_COLOR = 'crimson'
55
+ STATS_BOX_COLOR = 'lightblue'
56
+
57
+ # Alpha values
58
+ DATA_ALPHA = 0.7
59
+ OUTLIER_ALPHA = 0.9
60
+ GRID_ALPHA = 0.3
61
+ SLA_ALPHA = 0.7
62
+ NOTE_BOX_ALPHA = 0.7
63
+ STATS_BOX_ALPHA = 0.8
64
+
65
+ # Sizes
66
+ DATA_POINT_SIZE = 120
67
+ OUTLIER_POINT_SIZE = 140
68
+ DATA_LINEWIDTH = 1
69
+
70
+ # Font sizes
71
+ AXIS_LABEL_FONTSIZE = 12
72
+ TITLE_FONTSIZE = 14
73
+ LEGEND_FONTSIZE = 10
74
+ NOTE_FONTSIZE = 10
75
+ STATS_FONTSIZE = 10
76
+
77
+ # Text positioning
78
+ NOTE_X_POS = 0.98
79
+ NOTE_Y_POS = 0.02
80
+ STATS_X_POS = 0.02
81
+ STATS_Y_POS = 0.02
82
+
83
+ # Box styling
84
+ NOTE_BOX_PAD = 0.3
85
+ STATS_BOX_PAD = 0.5
86
+
87
+ # Trend line points
88
+ TREND_LINE_POINTS = 100
89
+
90
+ # Font weights
91
+ AXIS_LABEL_FONTWEIGHT = 'bold'
92
+ TITLE_FONTWEIGHT = 'bold'
93
+
94
+
95
+ def plot_concurrency_vs_time_metrics_simple(df: pd.DataFrame, output_dir: Path) -> None:
96
+ """
97
+ Save a simple plot of concurrency vs. p95 LLM latency and workflow runtime.
98
+ """
99
+ plt.figure(figsize=PlotConfig.SIMPLE_FIGSIZE)
100
+ plt.plot(df["concurrency"],
101
+ df["llm_latency_p95"],
102
+ label="p95 LLM Latency (s)",
103
+ marker=PlotConfig.DATA_MARKER,
104
+ linewidth=PlotConfig.SIMPLE_LINEWIDTH)
105
+ plt.plot(df["concurrency"],
106
+ df["workflow_runtime_p95"],
107
+ label="p95 Workflow Runtime (s)",
108
+ marker="s",
109
+ linewidth=PlotConfig.SIMPLE_LINEWIDTH)
110
+ plt.xlabel("Concurrency")
111
+ plt.ylabel("Time (seconds)")
112
+ plt.title("Concurrency vs. p95 LLM Latency and Workflow Runtime")
113
+ plt.grid(True, alpha=PlotConfig.GRID_ALPHA)
114
+ plt.legend()
115
+ plt.tight_layout()
116
+
117
+ simple_plot_path = output_dir / "concurrency_vs_p95_simple.png"
118
+ plt.savefig(simple_plot_path, dpi=PlotConfig.SIMPLE_DPI, bbox_inches='tight')
119
+ plt.close()
120
+ logger.info("Simple plot saved to %s", simple_plot_path)
121
+
122
+
123
+ def plot_metric_vs_concurrency_with_optional_fit(
124
+ ax: plt.Axes,
125
+ x: np.ndarray,
126
+ y: np.ndarray,
127
+ metric_name: str,
128
+ y_label: str,
129
+ title: str,
130
+ color: str,
131
+ sla_value: float = 0.0,
132
+ sla_label: str = None,
133
+ fit: LinearFitResult | None = None,
134
+ ):
135
+ """
136
+ Helper to plot a metric vs concurrency with pre-computed fit, outlier highlighting, and SLA line.
137
+ Requires pre-computed fit to be provided.
138
+ """
139
+ marker = PlotConfig.DATA_MARKER
140
+ outlier_marker = PlotConfig.OUTLIER_MARKER
141
+ outlier_color = PlotConfig.OUTLIER_COLOR
142
+ trend_color = PlotConfig.TREND_COLOR
143
+ trend_linestyle = PlotConfig.TREND_LINESTYLE
144
+ trend_alpha = PlotConfig.TREND_ALPHA
145
+ trend_linewidth = PlotConfig.TREND_LINEWIDTH
146
+ note_box_color = PlotConfig.NOTE_BOX_COLOR
147
+ note_text_color = PlotConfig.NOTE_TEXT_COLOR
148
+ legend_fontsize = PlotConfig.LEGEND_FONTSIZE
149
+ outliers_x = outliers_y = np.array([])
150
+ outliers_note = ""
151
+
152
+ # Skip analysis plot if no fit is available
153
+ if not fit:
154
+ logger.warning(f"No linear fit available for {metric_name}, skipping analysis plot")
155
+ return False
156
+
157
+ if fit.outliers_removed:
158
+ # Use the concurrencies that were removed to identify outlier points
159
+ outlier_mask = np.isin(x, fit.outliers_removed)
160
+ outliers_x = x[outlier_mask]
161
+ outliers_y = y[outlier_mask]
162
+ outliers_note = f"Outliers removed: concurrencies {fit.outliers_removed}"
163
+ # Plot cleaned data (points that weren't removed as outliers)
164
+ non_outlier_mask = ~np.isin(x, fit.outliers_removed)
165
+ x_clean = x[non_outlier_mask]
166
+ y_clean = y[non_outlier_mask]
167
+ ax.scatter(x_clean,
168
+ y_clean,
169
+ alpha=PlotConfig.DATA_ALPHA,
170
+ s=PlotConfig.DATA_POINT_SIZE,
171
+ c=color,
172
+ edgecolors='white',
173
+ linewidth=PlotConfig.DATA_LINEWIDTH,
174
+ marker=marker,
175
+ label='Data Points')
176
+ ax.scatter(outliers_x,
177
+ outliers_y,
178
+ alpha=PlotConfig.OUTLIER_ALPHA,
179
+ s=PlotConfig.OUTLIER_POINT_SIZE,
180
+ c=outlier_color,
181
+ marker=outlier_marker,
182
+ label='Removed Outliers')
183
+ else:
184
+ # No outliers plot all data points
185
+ ax.scatter(x,
186
+ y,
187
+ alpha=PlotConfig.DATA_ALPHA,
188
+ s=PlotConfig.DATA_POINT_SIZE,
189
+ c=color,
190
+ edgecolors='white',
191
+ linewidth=PlotConfig.DATA_LINEWIDTH,
192
+ marker=marker,
193
+ label='Data Points')
194
+
195
+ # Plot trend line using the fit
196
+ x_fit = np.linspace(x.min(), x.max(), PlotConfig.TREND_LINE_POINTS)
197
+ y_fit = fit.slope * x_fit + fit.intercept
198
+ ax.plot(x_fit,
199
+ y_fit,
200
+ trend_linestyle,
201
+ alpha=trend_alpha,
202
+ linewidth=trend_linewidth,
203
+ color=trend_color,
204
+ label=f'Trend (slope={fit.slope:.4f}, R²={fit.r_squared:.3f})')
205
+
206
+ if sla_value > 0:
207
+ ax.axhline(y=sla_value,
208
+ color=PlotConfig.SLA_COLOR,
209
+ linestyle=':',
210
+ alpha=PlotConfig.SLA_ALPHA,
211
+ linewidth=2,
212
+ label=sla_label or f'SLA Threshold ({sla_value}s)')
213
+
214
+ ax.set_xlabel('Concurrency', fontsize=PlotConfig.AXIS_LABEL_FONTSIZE, fontweight=PlotConfig.AXIS_LABEL_FONTWEIGHT)
215
+ ax.set_ylabel(y_label, fontsize=PlotConfig.AXIS_LABEL_FONTSIZE, fontweight=PlotConfig.AXIS_LABEL_FONTWEIGHT)
216
+ ax.set_title(title, fontsize=PlotConfig.TITLE_FONTSIZE, fontweight=PlotConfig.TITLE_FONTWEIGHT)
217
+ ax.grid(True, alpha=PlotConfig.GRID_ALPHA)
218
+ ax.legend(fontsize=legend_fontsize)
219
+ if outliers_note:
220
+ ax.text(PlotConfig.NOTE_X_POS,
221
+ PlotConfig.NOTE_Y_POS,
222
+ outliers_note,
223
+ transform=ax.transAxes,
224
+ fontsize=PlotConfig.NOTE_FONTSIZE,
225
+ color=note_text_color,
226
+ ha='right',
227
+ va='bottom',
228
+ bbox=dict(boxstyle=f'round,pad={PlotConfig.NOTE_BOX_PAD}',
229
+ facecolor=note_box_color,
230
+ alpha=PlotConfig.NOTE_BOX_ALPHA))
231
+
232
+ return True
233
+
234
+
235
+ def plot_concurrency_vs_time_metrics(metrics_per_concurrency: dict[int, SizingMetrics],
236
+ output_dir: Path,
237
+ target_llm_latency: float = 0.0,
238
+ target_runtime: float = 0.0,
239
+ llm_latency_fit: LinearFitResult | None = None,
240
+ runtime_fit: LinearFitResult | None = None) -> None:
241
+ """
242
+ Plot concurrency vs. p95 latency and workflow runtime using metrics_per_concurrency.
243
+ Enhanced with better styling, trend analysis, and annotations.
244
+ Only plots valid runs and requires pre-computed fits.
245
+ """
246
+ rows = []
247
+
248
+ for concurrency, metrics in metrics_per_concurrency.items():
249
+ llm_latency = metrics.llm_latency_p95
250
+ workflow_runtime = metrics.workflow_runtime_p95
251
+
252
+ rows.append({
253
+ "concurrency": concurrency, "llm_latency_p95": llm_latency, "workflow_runtime_p95": workflow_runtime
254
+ })
255
+
256
+ if not rows:
257
+ logger.warning("No valid metrics data available to plot.")
258
+ return
259
+
260
+ plt.style.use('seaborn-v0_8')
261
+ df = pd.DataFrame(rows).sort_values("concurrency")
262
+
263
+ # Always generate simple plot first
264
+ plot_concurrency_vs_time_metrics_simple(df, output_dir)
265
+
266
+ # Check if we have fits available for analysis plots
267
+ has_llm_latency_fit = llm_latency_fit is not None
268
+ has_runtime_fit = runtime_fit is not None
269
+
270
+ if not has_llm_latency_fit and not has_runtime_fit:
271
+ logger.warning("No linear fits available for analysis plots, skipping enhanced plot")
272
+ return
273
+
274
+ # Create subplots based on available fits
275
+ if has_llm_latency_fit and has_runtime_fit:
276
+ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=PlotConfig.ENHANCED_FIGSIZE)
277
+ else:
278
+ fig, ax1 = plt.subplots(1, 1, figsize=(8, 6))
279
+ ax2 = None
280
+
281
+ # Plot llm_latency if fit is available
282
+ llm_latency_plotted = False
283
+ if has_llm_latency_fit:
284
+ llm_latency_plotted = plot_metric_vs_concurrency_with_optional_fit(
285
+ ax1,
286
+ df["concurrency"].to_numpy(),
287
+ df["llm_latency_p95"].to_numpy(),
288
+ metric_name="llm_latency",
289
+ y_label='P95 LLM Latency (seconds)',
290
+ title='Concurrency vs P95 LLM Latency',
291
+ color=PlotConfig.LLM_LATENCY_COLOR,
292
+ sla_value=target_llm_latency,
293
+ sla_label=f'SLA Threshold ({target_llm_latency}s)' if target_llm_latency > 0 else None,
294
+ fit=llm_latency_fit,
295
+ )
296
+
297
+ # Plot runtime if fit is available
298
+ runtime_plotted = False
299
+ if has_runtime_fit and ax2 is not None:
300
+ runtime_plotted = plot_metric_vs_concurrency_with_optional_fit(
301
+ ax2,
302
+ df["concurrency"].to_numpy(),
303
+ df["workflow_runtime_p95"].to_numpy(),
304
+ metric_name="runtime",
305
+ y_label='P95 Workflow Runtime (seconds)',
306
+ title='Concurrency vs P95 Workflow Runtime',
307
+ color=PlotConfig.RUNTIME_COLOR,
308
+ sla_value=target_runtime,
309
+ sla_label=f'SLA Threshold ({target_runtime}s)' if target_runtime > 0 else None,
310
+ fit=runtime_fit,
311
+ )
312
+
313
+ # Check if any plots were successfully created
314
+ plots_created = (llm_latency_plotted or runtime_plotted)
315
+
316
+ if not plots_created:
317
+ logger.warning("No analysis plots could be created, skipping enhanced plot")
318
+ plt.close(fig)
319
+ return
320
+
321
+ # Add summary statistics
322
+ stats_text = f'Data Points: {len(df)}\n'
323
+ stats_text += f'LLM Latency Range: {df["llm_latency_p95"].min():.3f}-{df["llm_latency_p95"].max():.3f}s\n'
324
+ stats_text += f'WF Runtime Range: {df["workflow_runtime_p95"].min():.3f}-{df["workflow_runtime_p95"].max():.3f}s'
325
+
326
+ fig.text(PlotConfig.STATS_X_POS,
327
+ PlotConfig.STATS_Y_POS,
328
+ stats_text,
329
+ fontsize=PlotConfig.STATS_FONTSIZE,
330
+ bbox=dict(boxstyle=f'round,pad={PlotConfig.STATS_BOX_PAD}',
331
+ facecolor=PlotConfig.STATS_BOX_COLOR,
332
+ alpha=PlotConfig.STATS_BOX_ALPHA))
333
+
334
+ plt.tight_layout()
335
+ output_dir.mkdir(parents=True, exist_ok=True)
336
+
337
+ enhanced_plot_path = output_dir / "concurrency_vs_p95_analysis.png"
338
+ plt.savefig(enhanced_plot_path,
339
+ dpi=PlotConfig.ENHANCED_DPI,
340
+ bbox_inches='tight',
341
+ facecolor='white',
342
+ edgecolor='none')
343
+ plt.close()
344
+
345
+ logger.info("Enhanced plot saved to %s", enhanced_plot_path)
@@ -34,6 +34,7 @@ from aiq.builder.framework_enum import LLMFrameworkEnum
34
34
  from aiq.data_models.intermediate_step import IntermediateStepPayload
35
35
  from aiq.data_models.intermediate_step import IntermediateStepType
36
36
  from aiq.data_models.intermediate_step import StreamEventData
37
+ from aiq.data_models.intermediate_step import ToolSchema
37
38
  from aiq.data_models.intermediate_step import TraceMetadata
38
39
  from aiq.data_models.intermediate_step import UsageInfo
39
40
  from aiq.profiler.callbacks.base_callback_class import BaseProfilerCallback
@@ -42,6 +43,16 @@ from aiq.profiler.callbacks.token_usage_base_model import TokenUsageBaseModel
42
43
  logger = logging.getLogger(__name__)
43
44
 
44
45
 
46
+ def _extract_tools_schema(invocation_params: dict) -> list:
47
+
48
+ tools_schema = []
49
+ if invocation_params is not None:
50
+ for tool in invocation_params.get("tools", []):
51
+ tools_schema.append(ToolSchema(**tool))
52
+
53
+ return tools_schema
54
+
55
+
45
56
  class LangchainProfilerHandler(AsyncCallbackHandler, BaseProfilerCallback): # pylint: disable=R0901
46
57
  """Callback Handler that tracks NIM info."""
47
58
 
@@ -138,16 +149,17 @@ class LangchainProfilerHandler(AsyncCallbackHandler, BaseProfilerCallback): # p
138
149
  run_id = str(run_id)
139
150
  self._run_id_to_model_name[run_id] = model_name
140
151
 
141
- stats = IntermediateStepPayload(event_type=IntermediateStepType.LLM_START,
142
- framework=LLMFrameworkEnum.LANGCHAIN,
143
- name=model_name,
144
- UUID=run_id,
145
- data=StreamEventData(input=copy.deepcopy(messages[0])),
146
- metadata=TraceMetadata(chat_inputs=copy.deepcopy(messages[0])),
147
- usage_info=UsageInfo(token_usage=TokenUsageBaseModel(),
148
- num_llm_calls=1,
149
- seconds_between_calls=int(time.time() -
150
- self.last_call_ts)))
152
+ stats = IntermediateStepPayload(
153
+ event_type=IntermediateStepType.LLM_START,
154
+ framework=LLMFrameworkEnum.LANGCHAIN,
155
+ name=model_name,
156
+ UUID=run_id,
157
+ data=StreamEventData(input=copy.deepcopy(messages[0])),
158
+ metadata=TraceMetadata(chat_inputs=copy.deepcopy(messages[0]),
159
+ tools_schema=_extract_tools_schema(kwargs.get("invocation_params", {}))),
160
+ usage_info=UsageInfo(token_usage=TokenUsageBaseModel(),
161
+ num_llm_calls=1,
162
+ seconds_between_calls=int(time.time() - self.last_call_ts)))
151
163
 
152
164
  self.step_manager.push_intermediate_step(stats)
153
165
  self._run_id_to_llm_input[run_id] = messages[0][-1].content
@@ -0,0 +1,24 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ from pydantic import BaseModel
17
+
18
+ from aiq.profiler.inference_metrics_model import InferenceMetricsModel
19
+ from aiq.profiler.inference_optimization.data_models import WorkflowRuntimeMetrics
20
+
21
+
22
+ class ProfilerResults(BaseModel):
23
+ workflow_runtime_metrics: WorkflowRuntimeMetrics | None = None
24
+ llm_latency_ci: InferenceMetricsModel | None = None
@@ -23,3 +23,6 @@ class InferenceMetricsModel(BaseModel):
23
23
  ninetieth_interval: tuple[float, float] = Field(default=(0, 0), description="90% confidence interval")
24
24
  ninety_fifth_interval: tuple[float, float] = Field(default=(0, 0), description="95% confidence interval")
25
25
  ninety_ninth_interval: tuple[float, float] = Field(default=(0, 0), description="99% confidence interval")
26
+ p90: float = Field(default=0, description="90th percentile of the samples")
27
+ p95: float = Field(default=0, description="95th percentile of the samples")
28
+ p99: float = Field(default=0, description="99th percentile of the samples")
@@ -69,13 +69,20 @@ def build_call_tree_for_example(example_df: pd.DataFrame) -> list[CallNode]:
69
69
  return "LLM"
70
70
  if evt.startswith("TOOL_"):
71
71
  return "TOOL"
72
+ if evt.startswith("FUNCTION_"):
73
+ return "FUNCTION"
74
+ if evt.startswith("SPAN_"):
75
+ return "FUNCTION"
72
76
  return None
73
77
 
74
78
  def get_op_name(row: pd.Series, op_type: str) -> str:
75
79
  if op_type == "LLM":
76
80
  return row.get("llm_name") or "unknown_llm"
81
+ if op_type == "FUNCTION":
82
+ return row.get("function_name") or "unknown_function"
77
83
  if op_type == "TOOL":
78
84
  return row.get("tool_name") or "unknown_tool"
85
+
79
86
  return "unknown_op"
80
87
 
81
88
  for _, row in example_df.iterrows():
@@ -309,6 +316,7 @@ def save_gantt_chart(all_nodes: list[CallNode], output_path: str) -> None:
309
316
  color_map = {
310
317
  "LLM": "tab:blue",
311
318
  "TOOL": "tab:green",
319
+ "FUNCTION": "tab:orange",
312
320
  }
313
321
  default_color = "tab:gray"
314
322
 
@@ -220,7 +220,7 @@ class CallNode(BaseModel):
220
220
  return "\n".join([info] + child_strs)
221
221
 
222
222
 
223
- CallNode.update_forward_refs()
223
+ CallNode.model_rebuild()
224
224
 
225
225
 
226
226
  class NodeMetrics(BaseModel):
@@ -296,7 +296,7 @@ class ConcurrencyCallNode(CallNode):
296
296
  llm_text_output: str | None = None
297
297
 
298
298
 
299
- ConcurrencyCallNode.update_forward_refs()
299
+ ConcurrencyCallNode.model_rebuild()
300
300
 
301
301
 
302
302
  class ConcurrencySpikeInfo(BaseModel):
@@ -176,8 +176,8 @@ class LLMMetrics:
176
176
  return subdf
177
177
 
178
178
  # Apply the group metrics
179
- df = (df.groupby(['example_number', 'function_name'],
180
- group_keys=False).apply(_compute_group_metrics).sort_index())
179
+ df_group = df.groupby(['example_number', 'function_name'], group_keys=False)
180
+ df = df_group[df.columns].apply(_compute_group_metrics).sort_index()
181
181
 
182
182
  # ---------------------------------------------------------------------
183
183
  # 5. NOVA-Predicted-OSL
@@ -25,6 +25,7 @@ from pydantic import BaseModel
25
25
 
26
26
  from aiq.data_models.evaluate import ProfilerConfig
27
27
  from aiq.data_models.intermediate_step import IntermediateStep
28
+ from aiq.profiler.data_models import ProfilerResults
28
29
  from aiq.profiler.forecasting.model_trainer import ModelTrainer
29
30
  from aiq.profiler.inference_metrics_model import InferenceMetricsModel
30
31
  from aiq.profiler.utils import create_standardized_dataframe
@@ -67,9 +68,10 @@ class ProfilerRunner:
67
68
  All computed metrics are saved to a metrics JSON file at the end.
68
69
  """
69
70
 
70
- def __init__(self, profiler_config: ProfilerConfig, output_dir: Path):
71
+ def __init__(self, profiler_config: ProfilerConfig, output_dir: Path, write_output: bool = True):
71
72
  self.profile_config = profiler_config
72
73
  self.output_dir = output_dir
74
+ self.write_output = write_output
73
75
  self._converter = TypeConverter([])
74
76
 
75
77
  # Holds per-request data (prompt, output, usage_stats, etc.)
@@ -80,7 +82,7 @@ class ProfilerRunner:
80
82
  # Ensure output directory
81
83
  os.makedirs(output_dir, exist_ok=True)
82
84
 
83
- async def run(self, all_steps: list[list[IntermediateStep]]):
85
+ async def run(self, all_steps: list[list[IntermediateStep]]) -> ProfilerResults:
84
86
  """
85
87
  Main entrypoint: Works on Input DataFrame generated from eval to fit forecasting model,
86
88
  writes out combined requests JSON, then computes and saves additional metrics,
@@ -113,10 +115,11 @@ class ProfilerRunner:
113
115
  self.all_requests_data.append({"request_number": i, "intermediate_steps": request_data})
114
116
 
115
117
  # Write the final big JSON (all requests)
116
- final_path = os.path.join(self.output_dir, "all_requests_profiler_traces.json")
117
- with open(final_path, 'w', encoding='utf-8') as f:
118
- json.dump(self.all_requests_data, f, indent=2, default=str)
119
- logger.info("Wrote combined data to: %s", final_path)
118
+ if self.write_output:
119
+ final_path = os.path.join(self.output_dir, "all_requests_profiler_traces.json")
120
+ with open(final_path, 'w', encoding='utf-8') as f:
121
+ json.dump(self.all_requests_data, f, indent=2, default=str)
122
+ logger.info("Wrote combined data to: %s", final_path)
120
123
 
121
124
  # ------------------------------------------------------------
122
125
  # Generate one standardized dataframe for all usage stats
@@ -171,7 +174,7 @@ class ProfilerRunner:
171
174
  uniqueness = compute_inter_query_token_uniqueness_by_llm(all_steps)
172
175
  token_uniqueness_results = uniqueness
173
176
 
174
- if self.profile_config.workflow_runtime_forecast:
177
+ if self.profile_config.workflow_runtime_forecast or self.profile_config.base_metrics:
175
178
  # ------------------------------------------------------------
176
179
  # Compute and save workflow runtime metrics
177
180
  # ------------------------------------------------------------
@@ -184,7 +187,7 @@ class ProfilerRunner:
184
187
  token_uniqueness=token_uniqueness_results,
185
188
  workflow_runtimes=workflow_runtimes_results)
186
189
 
187
- if inference_optimization_results:
190
+ if self.write_output and inference_optimization_results:
188
191
  # Save to JSON
189
192
  optimization_results_path = os.path.join(self.output_dir, "inference_optimization.json")
190
193
  with open(optimization_results_path, 'w', encoding='utf-8') as f:
@@ -248,14 +251,14 @@ class ProfilerRunner:
248
251
  exclude=["textual_report"])
249
252
  logger.info("Prefix span analysis complete")
250
253
 
251
- if workflow_profiling_reports:
254
+ if self.write_output and workflow_profiling_reports:
252
255
  # Save to text file
253
256
  profiling_report_path = os.path.join(self.output_dir, "workflow_profiling_report.txt")
254
257
  with open(profiling_report_path, 'w', encoding='utf-8') as f:
255
258
  f.write(workflow_profiling_reports)
256
259
  logger.info("Wrote workflow profiling report to: %s", profiling_report_path)
257
260
 
258
- if workflow_profiling_metrics:
261
+ if self.write_output and workflow_profiling_metrics:
259
262
  # Save to JSON
260
263
  profiling_metrics_path = os.path.join(self.output_dir, "workflow_profiling_metrics.json")
261
264
  with open(profiling_metrics_path, 'w', encoding='utf-8') as f:
@@ -275,16 +278,19 @@ class ProfilerRunner:
275
278
  logger.info("Fitted model for forecasting.")
276
279
  except Exception as e:
277
280
  logger.exception("Fitting model failed. %s", e, exc_info=True)
278
- return
281
+ return ProfilerResults()
279
282
 
280
- os.makedirs(self.output_dir, exist_ok=True)
283
+ if self.write_output:
284
+ os.makedirs(self.output_dir, exist_ok=True)
281
285
 
282
- import pickle
283
- with open(os.path.join(self.output_dir, "fitted_model.pkl"), 'wb') as f:
284
- pickle.dump(fitted_model, f)
286
+ import pickle
287
+ with open(os.path.join(self.output_dir, "fitted_model.pkl"), 'wb') as f:
288
+ pickle.dump(fitted_model, f)
285
289
 
286
290
  logger.info("Saved fitted model to disk.")
287
291
 
292
+ return ProfilerResults(workflow_runtime_metrics=workflow_runtimes_results, llm_latency_ci=llm_latency_ci)
293
+
288
294
  # -------------------------------------------------------------------
289
295
  # Confidence Intervals / Metrics
290
296
  # -------------------------------------------------------------------
@@ -391,7 +397,8 @@ class ProfilerRunner:
391
397
 
392
398
  def _compute_confidence_intervals(self, data: list[float], metric_name: str) -> InferenceMetricsModel:
393
399
  """
394
- Helper to compute 90, 95, 99% confidence intervals for the mean of a dataset.
400
+ Helper to compute 90, 95, 99% confidence intervals **and** the empirical
401
+ 90th/95th/99th percentiles (p90/p95/p99) for the mean of a dataset.
395
402
  Uses a z-score from the normal approximation for large samples.
396
403
 
397
404
  Returns a dict like::
@@ -409,11 +416,16 @@ class ProfilerRunner:
409
416
  n = len(data)
410
417
  mean_val = statistics.mean(data)
411
418
  if n <= 1:
412
- return InferenceMetricsModel(n=n,
413
- mean=mean_val,
414
- ninetieth_interval=(mean_val, mean_val),
415
- ninety_fifth_interval=(mean_val, mean_val),
416
- ninety_ninth_interval=(mean_val, mean_val))
419
+ return InferenceMetricsModel(
420
+ n=n,
421
+ mean=mean_val,
422
+ ninetieth_interval=(mean_val, mean_val),
423
+ ninety_fifth_interval=(mean_val, mean_val),
424
+ ninety_ninth_interval=(mean_val, mean_val),
425
+ p90=mean_val,
426
+ p95=mean_val,
427
+ p99=mean_val,
428
+ )
417
429
 
418
430
  stdev_val = statistics.pstdev(data) # population stdev or use stdev for sample
419
431
  # standard error
@@ -430,4 +442,32 @@ class ProfilerRunner:
430
442
  # Optionally, store more info
431
443
  intervals["n"] = n
432
444
  intervals["mean"] = mean_val
445
+
446
+ # ------------------------------------------------------------------
447
+ # Percentiles
448
+ # ------------------------------------------------------------------
449
+ sorted_data = sorted(data)
450
+
451
+ def _percentile(arr: list[float], pct: float) -> float:
452
+ """
453
+ Linear interpolation between closest ranks.
454
+ pct is given from 0‑100 (e.g. 90 for p90).
455
+ """
456
+ if not arr:
457
+ return 0.0
458
+ k = (len(arr) - 1) * (pct / 100.0)
459
+ f = math.floor(k)
460
+ c = math.ceil(k)
461
+ if f == c:
462
+ return arr[int(k)]
463
+ return arr[f] + (arr[c] - arr[f]) * (k - f)
464
+
465
+ p90_val = _percentile(sorted_data, 90)
466
+ p95_val = _percentile(sorted_data, 95)
467
+ p99_val = _percentile(sorted_data, 99)
468
+
469
+ intervals["p90"] = p90_val
470
+ intervals["p95"] = p95_val
471
+ intervals["p99"] = p99_val
472
+
433
473
  return InferenceMetricsModel(**intervals)