aiqtoolkit 1.2.0.dev0__py3-none-any.whl → 1.2.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiqtoolkit might be problematic. Click here for more details.

Files changed (220) hide show
  1. aiq/agent/base.py +170 -8
  2. aiq/agent/dual_node.py +1 -1
  3. aiq/agent/react_agent/agent.py +146 -112
  4. aiq/agent/react_agent/prompt.py +1 -6
  5. aiq/agent/react_agent/register.py +36 -35
  6. aiq/agent/rewoo_agent/agent.py +36 -35
  7. aiq/agent/rewoo_agent/register.py +2 -2
  8. aiq/agent/tool_calling_agent/agent.py +3 -7
  9. aiq/agent/tool_calling_agent/register.py +1 -1
  10. aiq/authentication/__init__.py +14 -0
  11. aiq/authentication/api_key/__init__.py +14 -0
  12. aiq/authentication/api_key/api_key_auth_provider.py +92 -0
  13. aiq/authentication/api_key/api_key_auth_provider_config.py +124 -0
  14. aiq/authentication/api_key/register.py +26 -0
  15. aiq/authentication/exceptions/__init__.py +14 -0
  16. aiq/authentication/exceptions/api_key_exceptions.py +38 -0
  17. aiq/authentication/exceptions/auth_code_grant_exceptions.py +86 -0
  18. aiq/authentication/exceptions/call_back_exceptions.py +38 -0
  19. aiq/authentication/exceptions/request_exceptions.py +54 -0
  20. aiq/authentication/http_basic_auth/__init__.py +0 -0
  21. aiq/authentication/http_basic_auth/http_basic_auth_provider.py +81 -0
  22. aiq/authentication/http_basic_auth/register.py +30 -0
  23. aiq/authentication/interfaces.py +93 -0
  24. aiq/authentication/oauth2/__init__.py +14 -0
  25. aiq/authentication/oauth2/oauth2_auth_code_flow_provider.py +107 -0
  26. aiq/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +39 -0
  27. aiq/authentication/oauth2/register.py +25 -0
  28. aiq/authentication/register.py +21 -0
  29. aiq/builder/builder.py +64 -2
  30. aiq/builder/component_utils.py +16 -3
  31. aiq/builder/context.py +37 -0
  32. aiq/builder/eval_builder.py +43 -2
  33. aiq/builder/function.py +44 -12
  34. aiq/builder/function_base.py +1 -1
  35. aiq/builder/intermediate_step_manager.py +6 -8
  36. aiq/builder/user_interaction_manager.py +3 -0
  37. aiq/builder/workflow.py +23 -18
  38. aiq/builder/workflow_builder.py +421 -61
  39. aiq/cli/commands/info/list_mcp.py +103 -16
  40. aiq/cli/commands/sizing/__init__.py +14 -0
  41. aiq/cli/commands/sizing/calc.py +294 -0
  42. aiq/cli/commands/sizing/sizing.py +27 -0
  43. aiq/cli/commands/start.py +2 -1
  44. aiq/cli/entrypoint.py +2 -0
  45. aiq/cli/register_workflow.py +80 -0
  46. aiq/cli/type_registry.py +151 -30
  47. aiq/data_models/api_server.py +124 -12
  48. aiq/data_models/authentication.py +231 -0
  49. aiq/data_models/common.py +35 -7
  50. aiq/data_models/component.py +17 -9
  51. aiq/data_models/component_ref.py +33 -0
  52. aiq/data_models/config.py +60 -3
  53. aiq/data_models/dataset_handler.py +2 -1
  54. aiq/data_models/embedder.py +1 -0
  55. aiq/data_models/evaluate.py +23 -0
  56. aiq/data_models/function_dependencies.py +8 -0
  57. aiq/data_models/interactive.py +10 -1
  58. aiq/data_models/intermediate_step.py +38 -5
  59. aiq/data_models/its_strategy.py +30 -0
  60. aiq/data_models/llm.py +1 -0
  61. aiq/data_models/memory.py +1 -0
  62. aiq/data_models/object_store.py +44 -0
  63. aiq/data_models/profiler.py +1 -0
  64. aiq/data_models/retry_mixin.py +35 -0
  65. aiq/data_models/span.py +187 -0
  66. aiq/data_models/telemetry_exporter.py +2 -2
  67. aiq/embedder/nim_embedder.py +2 -1
  68. aiq/embedder/openai_embedder.py +2 -1
  69. aiq/eval/config.py +19 -1
  70. aiq/eval/dataset_handler/dataset_handler.py +87 -2
  71. aiq/eval/evaluate.py +208 -27
  72. aiq/eval/evaluator/base_evaluator.py +73 -0
  73. aiq/eval/evaluator/evaluator_model.py +1 -0
  74. aiq/eval/intermediate_step_adapter.py +11 -5
  75. aiq/eval/rag_evaluator/evaluate.py +55 -15
  76. aiq/eval/rag_evaluator/register.py +6 -1
  77. aiq/eval/remote_workflow.py +7 -2
  78. aiq/eval/runners/__init__.py +14 -0
  79. aiq/eval/runners/config.py +39 -0
  80. aiq/eval/runners/multi_eval_runner.py +54 -0
  81. aiq/eval/trajectory_evaluator/evaluate.py +22 -65
  82. aiq/eval/tunable_rag_evaluator/evaluate.py +150 -168
  83. aiq/eval/tunable_rag_evaluator/register.py +2 -0
  84. aiq/eval/usage_stats.py +41 -0
  85. aiq/eval/utils/output_uploader.py +10 -1
  86. aiq/eval/utils/weave_eval.py +184 -0
  87. aiq/experimental/__init__.py +0 -0
  88. aiq/experimental/decorators/__init__.py +0 -0
  89. aiq/experimental/decorators/experimental_warning_decorator.py +130 -0
  90. aiq/experimental/inference_time_scaling/__init__.py +0 -0
  91. aiq/experimental/inference_time_scaling/editing/__init__.py +0 -0
  92. aiq/experimental/inference_time_scaling/editing/iterative_plan_refinement_editor.py +147 -0
  93. aiq/experimental/inference_time_scaling/editing/llm_as_a_judge_editor.py +204 -0
  94. aiq/experimental/inference_time_scaling/editing/motivation_aware_summarization.py +107 -0
  95. aiq/experimental/inference_time_scaling/functions/__init__.py +0 -0
  96. aiq/experimental/inference_time_scaling/functions/execute_score_select_function.py +105 -0
  97. aiq/experimental/inference_time_scaling/functions/its_tool_orchestration_function.py +205 -0
  98. aiq/experimental/inference_time_scaling/functions/its_tool_wrapper_function.py +146 -0
  99. aiq/experimental/inference_time_scaling/functions/plan_select_execute_function.py +224 -0
  100. aiq/experimental/inference_time_scaling/models/__init__.py +0 -0
  101. aiq/experimental/inference_time_scaling/models/editor_config.py +132 -0
  102. aiq/experimental/inference_time_scaling/models/its_item.py +48 -0
  103. aiq/experimental/inference_time_scaling/models/scoring_config.py +112 -0
  104. aiq/experimental/inference_time_scaling/models/search_config.py +120 -0
  105. aiq/experimental/inference_time_scaling/models/selection_config.py +154 -0
  106. aiq/experimental/inference_time_scaling/models/stage_enums.py +43 -0
  107. aiq/experimental/inference_time_scaling/models/strategy_base.py +66 -0
  108. aiq/experimental/inference_time_scaling/models/tool_use_config.py +41 -0
  109. aiq/experimental/inference_time_scaling/register.py +36 -0
  110. aiq/experimental/inference_time_scaling/scoring/__init__.py +0 -0
  111. aiq/experimental/inference_time_scaling/scoring/llm_based_agent_scorer.py +168 -0
  112. aiq/experimental/inference_time_scaling/scoring/llm_based_plan_scorer.py +168 -0
  113. aiq/experimental/inference_time_scaling/scoring/motivation_aware_scorer.py +111 -0
  114. aiq/experimental/inference_time_scaling/search/__init__.py +0 -0
  115. aiq/experimental/inference_time_scaling/search/multi_llm_planner.py +128 -0
  116. aiq/experimental/inference_time_scaling/search/multi_query_retrieval_search.py +122 -0
  117. aiq/experimental/inference_time_scaling/search/single_shot_multi_plan_planner.py +128 -0
  118. aiq/experimental/inference_time_scaling/selection/__init__.py +0 -0
  119. aiq/experimental/inference_time_scaling/selection/best_of_n_selector.py +63 -0
  120. aiq/experimental/inference_time_scaling/selection/llm_based_agent_output_selector.py +131 -0
  121. aiq/experimental/inference_time_scaling/selection/llm_based_output_merging_selector.py +159 -0
  122. aiq/experimental/inference_time_scaling/selection/llm_based_plan_selector.py +128 -0
  123. aiq/experimental/inference_time_scaling/selection/threshold_selector.py +58 -0
  124. aiq/front_ends/console/authentication_flow_handler.py +233 -0
  125. aiq/front_ends/console/console_front_end_plugin.py +11 -2
  126. aiq/front_ends/fastapi/auth_flow_handlers/__init__.py +0 -0
  127. aiq/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +27 -0
  128. aiq/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +107 -0
  129. aiq/front_ends/fastapi/fastapi_front_end_config.py +93 -9
  130. aiq/front_ends/fastapi/fastapi_front_end_controller.py +68 -0
  131. aiq/front_ends/fastapi/fastapi_front_end_plugin.py +14 -1
  132. aiq/front_ends/fastapi/fastapi_front_end_plugin_worker.py +537 -52
  133. aiq/front_ends/fastapi/html_snippets/__init__.py +14 -0
  134. aiq/front_ends/fastapi/html_snippets/auth_code_grant_success.py +35 -0
  135. aiq/front_ends/fastapi/job_store.py +47 -25
  136. aiq/front_ends/fastapi/main.py +2 -0
  137. aiq/front_ends/fastapi/message_handler.py +108 -89
  138. aiq/front_ends/fastapi/step_adaptor.py +2 -1
  139. aiq/llm/aws_bedrock_llm.py +57 -0
  140. aiq/llm/nim_llm.py +2 -1
  141. aiq/llm/openai_llm.py +3 -2
  142. aiq/llm/register.py +1 -0
  143. aiq/meta/pypi.md +12 -12
  144. aiq/object_store/__init__.py +20 -0
  145. aiq/object_store/in_memory_object_store.py +74 -0
  146. aiq/object_store/interfaces.py +84 -0
  147. aiq/object_store/models.py +36 -0
  148. aiq/object_store/register.py +20 -0
  149. aiq/observability/__init__.py +14 -0
  150. aiq/observability/exporter/__init__.py +14 -0
  151. aiq/observability/exporter/base_exporter.py +449 -0
  152. aiq/observability/exporter/exporter.py +78 -0
  153. aiq/observability/exporter/file_exporter.py +33 -0
  154. aiq/observability/exporter/processing_exporter.py +269 -0
  155. aiq/observability/exporter/raw_exporter.py +52 -0
  156. aiq/observability/exporter/span_exporter.py +264 -0
  157. aiq/observability/exporter_manager.py +335 -0
  158. aiq/observability/mixin/__init__.py +14 -0
  159. aiq/observability/mixin/batch_config_mixin.py +26 -0
  160. aiq/observability/mixin/collector_config_mixin.py +23 -0
  161. aiq/observability/mixin/file_mixin.py +288 -0
  162. aiq/observability/mixin/file_mode.py +23 -0
  163. aiq/observability/mixin/resource_conflict_mixin.py +134 -0
  164. aiq/observability/mixin/serialize_mixin.py +61 -0
  165. aiq/observability/mixin/type_introspection_mixin.py +183 -0
  166. aiq/observability/processor/__init__.py +14 -0
  167. aiq/observability/processor/batching_processor.py +316 -0
  168. aiq/observability/processor/intermediate_step_serializer.py +28 -0
  169. aiq/observability/processor/processor.py +68 -0
  170. aiq/observability/register.py +36 -39
  171. aiq/observability/utils/__init__.py +14 -0
  172. aiq/observability/utils/dict_utils.py +236 -0
  173. aiq/observability/utils/time_utils.py +31 -0
  174. aiq/profiler/calc/__init__.py +14 -0
  175. aiq/profiler/calc/calc_runner.py +623 -0
  176. aiq/profiler/calc/calculations.py +288 -0
  177. aiq/profiler/calc/data_models.py +176 -0
  178. aiq/profiler/calc/plot.py +345 -0
  179. aiq/profiler/callbacks/langchain_callback_handler.py +22 -10
  180. aiq/profiler/data_models.py +24 -0
  181. aiq/profiler/inference_metrics_model.py +3 -0
  182. aiq/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +8 -0
  183. aiq/profiler/inference_optimization/data_models.py +2 -2
  184. aiq/profiler/inference_optimization/llm_metrics.py +2 -2
  185. aiq/profiler/profile_runner.py +61 -21
  186. aiq/runtime/loader.py +9 -3
  187. aiq/runtime/runner.py +23 -9
  188. aiq/runtime/session.py +25 -7
  189. aiq/runtime/user_metadata.py +2 -3
  190. aiq/tool/chat_completion.py +74 -0
  191. aiq/tool/code_execution/README.md +152 -0
  192. aiq/tool/code_execution/code_sandbox.py +151 -72
  193. aiq/tool/code_execution/local_sandbox/.gitignore +1 -0
  194. aiq/tool/code_execution/local_sandbox/local_sandbox_server.py +139 -24
  195. aiq/tool/code_execution/local_sandbox/sandbox.requirements.txt +3 -1
  196. aiq/tool/code_execution/local_sandbox/start_local_sandbox.sh +27 -2
  197. aiq/tool/code_execution/register.py +7 -3
  198. aiq/tool/code_execution/test_code_execution_sandbox.py +414 -0
  199. aiq/tool/mcp/exceptions.py +142 -0
  200. aiq/tool/mcp/mcp_client.py +41 -6
  201. aiq/tool/mcp/mcp_tool.py +3 -2
  202. aiq/tool/register.py +1 -0
  203. aiq/tool/server_tools.py +6 -3
  204. aiq/utils/exception_handlers/automatic_retries.py +289 -0
  205. aiq/utils/exception_handlers/mcp.py +211 -0
  206. aiq/utils/io/model_processing.py +28 -0
  207. aiq/utils/log_utils.py +37 -0
  208. aiq/utils/string_utils.py +38 -0
  209. aiq/utils/type_converter.py +18 -2
  210. aiq/utils/type_utils.py +87 -0
  211. {aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/METADATA +53 -21
  212. aiqtoolkit-1.2.0rc1.dist-info/RECORD +436 -0
  213. {aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/WHEEL +1 -1
  214. {aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/entry_points.txt +3 -0
  215. aiq/front_ends/fastapi/websocket.py +0 -148
  216. aiq/observability/async_otel_listener.py +0 -429
  217. aiqtoolkit-1.2.0.dev0.dist-info/RECORD +0 -316
  218. {aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
  219. {aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/licenses/LICENSE.md +0 -0
  220. {aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,288 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import logging
17
+
18
+ import numpy as np
19
+
20
+ from aiq.profiler.calc.data_models import FitConfig
21
+ from aiq.profiler.calc.data_models import GPUEstimates
22
+ from aiq.profiler.calc.data_models import LinearFitResult
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ def compute_slope(concurrencies: list[float],
28
+ time_metrics: list[float],
29
+ fit_config: FitConfig | None = None) -> LinearFitResult:
30
+ """
31
+ Concurrency is the independent variable (x-axis) and time metric (which can be runtime or latency)
32
+ is the dependent variable (y-axis). This function computes the slope of the linear relationship
33
+ between concurrency and time metric.
34
+
35
+ Args:
36
+ concurrencies: List of concurrency values (x-axis)
37
+ time_metrics: List of time metric values (y-axis)
38
+ fit_config: Configuration for outlier detection and fit validation
39
+
40
+ Returns:
41
+ LinearFitResult containing slope, intercept, R-squared, and outliers removed
42
+
43
+ Raises:
44
+ ValueError: If the relationship is not linear (R² < min_r_squared)
45
+ """
46
+ # Use default config if none provided
47
+ if fit_config is None:
48
+ fit_config = FitConfig()
49
+
50
+ # Convert to numpy arrays for calculations
51
+ x = np.array(concurrencies)
52
+ y = np.array(time_metrics)
53
+
54
+ # Validate input
55
+ if len(x) != len(y):
56
+ raise ValueError("Concurrencies and time_metrics must have the same length")
57
+ if len(x) < 2:
58
+ raise ValueError("Need at least 2 points for linear regression")
59
+
60
+ outliers_removed = []
61
+
62
+ # Remove outliers if requested
63
+ if fit_config.remove_outliers and len(x) > 4: # Need at least 4 points for outlier detection
64
+ x_clean, y_clean, removed_concurrencies = _remove_outliers(x, y, fit_config)
65
+ x, y = x_clean, y_clean
66
+ outliers_removed = removed_concurrencies
67
+
68
+ # Calculate linear regression using least squares
69
+ n = len(x)
70
+ sum_x = x.sum()
71
+ sum_y = y.sum()
72
+ sum_xy = (x * y).sum()
73
+ sum_x2 = (x**2).sum()
74
+
75
+ # Calculate slope and intercept
76
+ slope = (n * sum_xy - sum_x * sum_y) / (n * sum_x2 - sum_x**2)
77
+ intercept = (sum_y - slope * sum_x) / n
78
+
79
+ # Calculate R-squared
80
+ y_pred = slope * x + intercept
81
+ ss_res = ((y - y_pred)**2).sum()
82
+ ss_tot = ((y - y.mean())**2).sum()
83
+ r_squared = 1 - (ss_res / ss_tot) if ss_tot != 0 else 0.0
84
+
85
+ # Validate linearity
86
+ if r_squared < fit_config.min_r_squared:
87
+ raise ValueError(f"Poor linear fit detected (R² = {r_squared:.3f} < {fit_config.min_r_squared}). "
88
+ f"The relationship may not be linear. Consider using non-linear regression.")
89
+
90
+ return LinearFitResult(slope=slope, intercept=intercept, r_squared=r_squared, outliers_removed=outliers_removed)
91
+
92
+
93
+ def _remove_outliers(x: np.ndarray, y: np.ndarray, fit_config: FitConfig) -> tuple[np.ndarray, np.ndarray, list[int]]:
94
+ """
95
+ Remove outliers using the Interquartile Range (IQR) method.
96
+ For small concurrency range (≤ threshold points), also checks raw y-values for extreme outliers.
97
+
98
+ Args:
99
+ x: Input x values (concurrencies)
100
+ y: Input y values (time metrics)
101
+ fit_config: Configuration for outlier detection
102
+
103
+ Returns:
104
+ Tuple of (cleaned_x, cleaned_y, list_of_removed_concurrencies)
105
+ """
106
+ # if the number of concurrency points is less removing outliers can be challenging
107
+ # as extreme outliers can skew the results.
108
+ # We use a threshold to check for extreme outliers in raw y-values first.
109
+ n = len(x)
110
+ all_removed_concurrencies = []
111
+
112
+ # For smaller concurrency ranges, check for extreme outliers in raw y-values first
113
+ if n <= fit_config.small_concurrency_range_threshold:
114
+ # Calculate IQR on raw y-values
115
+ y_q1 = np.percentile(y, 25)
116
+ y_q3 = np.percentile(y, 75)
117
+ y_iqr = y_q3 - y_q1
118
+
119
+ # Use a more aggressive threshold for small datasets
120
+ y_lower_bound = y_q1 - fit_config.extreme_outlier_threshold * y_iqr # More aggressive than 1.5
121
+ y_upper_bound = y_q3 + fit_config.extreme_outlier_threshold * y_iqr
122
+
123
+ # Find extreme outliers in raw values
124
+ extreme_outlier_mask = (y >= y_lower_bound) & (y <= y_upper_bound)
125
+ extreme_outliers_removed = np.sum(~extreme_outlier_mask)
126
+
127
+ if extreme_outliers_removed > 0:
128
+ extreme_removed_concurrencies = x[~extreme_outlier_mask].tolist()
129
+ all_removed_concurrencies.extend(extreme_removed_concurrencies)
130
+ logger.info("Removed %d extreme outliers from raw values: concurrencies %s",
131
+ extreme_outliers_removed,
132
+ extreme_removed_concurrencies)
133
+ # Continue with residual-based detection on the cleaned data
134
+ x = x[extreme_outlier_mask]
135
+ y = y[extreme_outlier_mask]
136
+ n = len(x)
137
+
138
+ # Standard residual-based outlier detection
139
+ # Calculate residuals from a simple linear fit
140
+ if n == 0:
141
+ raise ValueError("No data points remaining after outlier removal. Cannot compute linear fit.")
142
+
143
+ sum_x = x.sum()
144
+ sum_y = y.sum()
145
+ sum_xy = (x * y).sum()
146
+ sum_x2 = (x**2).sum()
147
+
148
+ slope = (n * sum_xy - sum_x * sum_y) / (n * sum_x2 - sum_x**2)
149
+ intercept = (sum_y - slope * sum_x) / n
150
+
151
+ # Calculate residuals
152
+ y_pred = slope * x + intercept
153
+ residuals = y - y_pred
154
+
155
+ # Use IQR method to detect outliers
156
+ q1 = np.percentile(residuals, 25)
157
+ q3 = np.percentile(residuals, 75)
158
+ iqr = q3 - q1
159
+
160
+ # Define outlier bounds (1.5 * IQR rule)
161
+ lower_bound = q1 - fit_config.conservative_outlier_threshold * iqr
162
+ upper_bound = q3 + fit_config.conservative_outlier_threshold * iqr
163
+
164
+ # Find non-outlier indices
165
+ non_outlier_mask = (residuals >= lower_bound) & (residuals <= upper_bound)
166
+
167
+ outliers_removed = np.sum(~non_outlier_mask)
168
+ residual_removed_concurrencies = x[~non_outlier_mask].tolist()
169
+ all_removed_concurrencies.extend(residual_removed_concurrencies)
170
+
171
+ # Add debugging for small datasets
172
+ if len(x) <= fit_config.small_concurrency_range_threshold:
173
+ logger.debug("Outlier detection for small dataset (n=%d):", len(x))
174
+ logger.debug(" Data points: %s", list(zip(x, y)))
175
+ logger.debug(" Residuals: %s", residuals.tolist())
176
+ logger.debug(" Q1=%.3f, Q3=%.3f, IQR=%.3f", q1, q3, iqr)
177
+ logger.debug(" Bounds: [%.3f, %.3f]", lower_bound, upper_bound)
178
+ logger.info(" Outliers removed: %d (concurrencies: %s)", outliers_removed, residual_removed_concurrencies)
179
+
180
+ return x[non_outlier_mask], y[non_outlier_mask], all_removed_concurrencies
181
+
182
+
183
+ def calc_gpu_estimate_based_on_slope(target_time_metric: float,
184
+ target_users: int,
185
+ test_gpu_count: int,
186
+ observed_slope: float,
187
+ observed_intercept: float = 0.0) -> float:
188
+ """
189
+ Calculate the GPU estimate based on the slope of the time metric.
190
+
191
+ This function uses the linear relationship between concurrency and time metrics
192
+ to estimate the required GPU count for a target user load.
193
+
194
+ Args:
195
+ target_time_metric: Target time metric (latency or runtime) in seconds
196
+ observed_slope: Slope from linear regression of time vs concurrency
197
+ target_users: Target number of concurrent users
198
+ test_gpu_count: Number of GPUs used in the test
199
+ observed_intercept: Y-intercept from linear regression (default: 0.0)
200
+
201
+ Returns:
202
+ Estimated number of GPUs required
203
+
204
+ Raises:
205
+ ValueError: If target_time_metric is less than or equal to intercept
206
+ """
207
+ if target_time_metric <= observed_intercept:
208
+ raise ValueError(f"Target time metric ({target_time_metric}) must be greater than "
209
+ f"the intercept ({observed_intercept}) for valid GPU estimation.")
210
+
211
+ # Calculate the concurrency that would achieve the target time metric
212
+ # Using the linear equation: time = slope * concurrency + intercept
213
+ # Solving for concurrency: concurrency = (time - intercept) / slope
214
+ calculated_concurrency = (target_time_metric - observed_intercept) / observed_slope
215
+ logger.info("Calculated concurrency: %f for target time metric: %f, observed intercept: %f, observed slope: %f",
216
+ calculated_concurrency,
217
+ target_time_metric,
218
+ observed_intercept,
219
+ observed_slope)
220
+
221
+ if calculated_concurrency <= 0:
222
+ raise ValueError(f"Calculated target concurrency ({calculated_concurrency}) is not positive. "
223
+ f"This suggests the slope or intercept values may be invalid.")
224
+
225
+ # Estimate GPUs using the ratio of target users to target concurrency
226
+ # scaled by the test GPU count
227
+ gpu_estimate = (target_users / calculated_concurrency) * test_gpu_count
228
+
229
+ return gpu_estimate
230
+
231
+
232
+ def calc_gpu_estimate_for_single_concurrency(target_llm_latency: float,
233
+ target_workflow_runtime: float,
234
+ target_users: int,
235
+ test_concurrency: int,
236
+ test_gpu_count: int,
237
+ observed_latency: float,
238
+ observed_runtime: float) -> GPUEstimates:
239
+ """
240
+ ROUGH ESTIMATE: Calculate GPU count estimate for a single concurrency level.
241
+
242
+ This is a simplified estimate that assumes linear scaling and should be used
243
+ as a baseline only. For more accurate estimates, use slope-based estimation
244
+ with multiple concurrency levels.
245
+
246
+ Formula based on the target latency:
247
+ G_required = (U_target / C_test) * (L_obs / L_target) * G_test
248
+
249
+ Formula based on the target runtime:
250
+ G_required = (U_target / C_test) * (R_obs / R_target) * G_test
251
+
252
+ where:
253
+ - U_target: Target number of users
254
+ - C_test: Test concurrency level
255
+ - L_obs: Observed LLM latency
256
+ - L_target: Target LLM latency
257
+ - R_obs: Observed workflow runtime
258
+ - R_target: Target workflow runtime
259
+ - G_test: Test GPU count
260
+
261
+ WARNING: This is a rough estimate that:
262
+ - Assumes perfect linear scaling (rarely true in practice)
263
+ - Doesn't account for GPU utilization inefficiencies
264
+ - May underestimate GPU requirements for high concurrency
265
+ - Should be validated against slope-based estimates
266
+ """
267
+ use_latency = target_llm_latency > 0
268
+ use_runtime = target_workflow_runtime > 0
269
+
270
+ # If observed latency or runtime exceeds the target, return empty estimates
271
+ if use_latency and observed_latency > target_llm_latency:
272
+ return GPUEstimates()
273
+
274
+ if use_runtime and observed_runtime > target_workflow_runtime:
275
+ return GPUEstimates()
276
+
277
+ # Calculate multipliers (how much faster we need to be)
278
+ llm_latency_multiplier = observed_latency / target_llm_latency if use_latency else 1.0
279
+ wf_runtime_multiplier = observed_runtime / target_workflow_runtime if use_runtime else 1.0
280
+
281
+ # Calculate GPU estimates using the corrected formula
282
+ gpu_estimate_by_wf_runtime = (target_users /
283
+ test_concurrency) * wf_runtime_multiplier * test_gpu_count if use_runtime else None
284
+ gpu_estimate_by_llm_latency = (target_users /
285
+ test_concurrency) * llm_latency_multiplier * test_gpu_count if use_latency else None
286
+
287
+ return GPUEstimates(gpu_estimate_by_wf_runtime=gpu_estimate_by_wf_runtime,
288
+ gpu_estimate_by_llm_latency=gpu_estimate_by_llm_latency)
@@ -0,0 +1,176 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import typing
17
+ from pathlib import Path
18
+
19
+ from pydantic import BaseModel
20
+
21
+
22
+ class FitConfig(BaseModel):
23
+ """
24
+ Configuration parameters for linear fit and outlier detection.
25
+ """
26
+ # Threshold for small concurrency range (≤ 8 points) to check for extreme outliers in raw y-values first
27
+ small_concurrency_range_threshold: int = 8
28
+
29
+ # Extreme outlier threshold is 2.0 times the IQR, extreme outliers are removed
30
+ extreme_outlier_threshold: float = 2.0
31
+
32
+ # Conservative outlier threshold is 1.5 times the IQR, conservative outliers are removed
33
+ conservative_outlier_threshold: float = 1.5
34
+
35
+ # Minimum R-squared value required for a valid linear fit
36
+ min_r_squared: float = 0.7
37
+
38
+ # Whether to remove outliers during linear fit calculation
39
+ remove_outliers: bool = True
40
+
41
+
42
+ class CalcRunnerConfig(BaseModel):
43
+ """
44
+ Parameters used for a calc runner.
45
+ """
46
+ # base config and endpoints (if remote)- not needed in offline mode
47
+ config_file: Path | None = None
48
+ # endpoint to use for the workflow, if not provided the workflow is run locally
49
+ endpoint: str | None = None
50
+ # timeout for the workflow
51
+ endpoint_timeout: int = 300
52
+
53
+ # if true workflow is not run, instead results from previous runs are used to estimate the
54
+ # GPU count
55
+ offline_mode: bool = False
56
+
57
+ # number of passes at each concurrency, if 0 the dataset is adjusted to a multiple of the
58
+ # concurrency
59
+ num_passes: int = 0
60
+ # concurrency values to test
61
+ concurrencies: list[int] = [1, 2, 4, 8]
62
+
63
+ # Targets for GPU estimation
64
+ target_llm_latency_p95: float = 0
65
+ target_workflow_runtime_p95: float = 0
66
+ target_users: int = 0
67
+
68
+ # Test setup information needed for GPU estimation
69
+ test_gpu_count: int = 0
70
+
71
+ # output directory for results
72
+ output_dir: Path | None = None
73
+ # if true, the job is stored in a new subdirectory of the output directory
74
+ append_job: bool = False
75
+ # if true, the data is plotted
76
+ plot_data: bool = True
77
+
78
+ # Configuration for linear fit and outlier detection
79
+ fit_config: FitConfig = FitConfig()
80
+
81
+
82
+ # Sizing metrics are gathered from the evaluation runs and used as input by the calculator.
83
+ class SizingMetricPerItem(BaseModel):
84
+ """
85
+ Sizing metrics per dataset entry item.
86
+ """
87
+ # LLM latency
88
+ llm_latency: float
89
+ # workflow runtime
90
+ workflow_runtime: float
91
+
92
+
93
+ class SizingMetricsAlerts(BaseModel):
94
+ """
95
+ Sizing metrics alerts.
96
+ """
97
+ # if true, the workflow was interrupted that concurrency cannot be used
98
+ workflow_interrupted: bool = False
99
+
100
+
101
+ class SizingMetrics(BaseModel):
102
+ """
103
+ Sizing metrics for a single concurrency.
104
+ """
105
+ # alerts associated with the sizing metrics
106
+ alerts: SizingMetricsAlerts = SizingMetricsAlerts()
107
+
108
+ # p95 LLM latency
109
+ llm_latency_p95: float = 0.0
110
+ # p95 workflow runtime
111
+ workflow_runtime_p95: float = 0.0
112
+ # total workflow runtime
113
+ total_runtime: float = 0.0
114
+ # per item metrics, key is the dataset entry id
115
+ per_item_metrics: dict[typing.Any, SizingMetricPerItem] = {}
116
+
117
+
118
+ class LinearFitResult(BaseModel):
119
+ """
120
+ Result of linear regression including slope, intercept, and quality metrics.
121
+ """
122
+ slope: float
123
+ intercept: float
124
+ r_squared: float
125
+ outliers_removed: list[int]
126
+
127
+
128
+ # GPU estimates are generated by the calculator.
129
+ class GPUEstimates(BaseModel):
130
+ """
131
+ GPU estimates.
132
+ """
133
+ # GPU estimate based on the workflow runtime
134
+ gpu_estimate_by_wf_runtime: float | None = None
135
+ # GPU estimate based on the LLM latency
136
+ gpu_estimate_by_llm_latency: float | None = None
137
+
138
+
139
+ # Calc runner alerts are generated by the calculator.
140
+ class CalcAlerts(BaseModel):
141
+ """
142
+ Calc runner alerts.
143
+ """
144
+ # if true, the run was identified as an outlier by the workflow runtime linear fit
145
+ outlier_workflow_runtime: bool = False
146
+ # if true, the run was identified as an outlier by the LLM latency linear fit
147
+ outlier_llm_latency: bool = False
148
+
149
+ # number of items that are greater than the target latency
150
+ num_items_greater_than_target_latency: int = 0
151
+ # number of items that are greater than the target runtime
152
+ num_items_greater_than_target_runtime: int = 0
153
+
154
+
155
+ class CalcData(BaseModel):
156
+ """
157
+ Output of the calc runner per concurrency.
158
+ """
159
+ # ROUGH GPU estimates per concurrency: these are not used for the final GPU estimation
160
+ # they are only available for information purposes
161
+ gpu_estimates: GPUEstimates = GPUEstimates()
162
+ # Calc runner alerts
163
+ alerts: CalcAlerts = CalcAlerts()
164
+ # Sizing metrics
165
+ sizing_metrics: SizingMetrics = SizingMetrics()
166
+
167
+
168
+ class CalcRunnerOutput(BaseModel):
169
+ """
170
+ Output of the calc runner.
171
+ """
172
+ # GPU estimates based on the slope of the time vs concurrency, calculated online or offline
173
+ gpu_estimates: GPUEstimates
174
+
175
+ # Per-concurrency data (GPU estimates, out-of-range runs, and sizing metrics)
176
+ calc_data: dict[int, CalcData] = {}