PyPI - aiqtoolkit - Versions diffs - 1.2.0.dev0__py3-none-any.whl → 1.2.0rc1__py3-none-any.whl - Mend

aiqtoolkit 1.2.0.dev0py3-none-any.whl → 1.2.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of aiqtoolkit might be problematic. Click here for more details.

Files changed (220) hide show

aiq/agent/base.py +170 -8
aiq/agent/dual_node.py +1 -1
aiq/agent/react_agent/agent.py +146 -112
aiq/agent/react_agent/prompt.py +1 -6
aiq/agent/react_agent/register.py +36 -35
aiq/agent/rewoo_agent/agent.py +36 -35
aiq/agent/rewoo_agent/register.py +2 -2
aiq/agent/tool_calling_agent/agent.py +3 -7
aiq/agent/tool_calling_agent/register.py +1 -1
aiq/authentication/__init__.py +14 -0
aiq/authentication/api_key/__init__.py +14 -0
aiq/authentication/api_key/api_key_auth_provider.py +92 -0
aiq/authentication/api_key/api_key_auth_provider_config.py +124 -0
aiq/authentication/api_key/register.py +26 -0
aiq/authentication/exceptions/__init__.py +14 -0
aiq/authentication/exceptions/api_key_exceptions.py +38 -0
aiq/authentication/exceptions/auth_code_grant_exceptions.py +86 -0
aiq/authentication/exceptions/call_back_exceptions.py +38 -0
aiq/authentication/exceptions/request_exceptions.py +54 -0
aiq/authentication/http_basic_auth/__init__.py +0 -0
aiq/authentication/http_basic_auth/http_basic_auth_provider.py +81 -0
aiq/authentication/http_basic_auth/register.py +30 -0
aiq/authentication/interfaces.py +93 -0
aiq/authentication/oauth2/__init__.py +14 -0
aiq/authentication/oauth2/oauth2_auth_code_flow_provider.py +107 -0
aiq/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +39 -0
aiq/authentication/oauth2/register.py +25 -0
aiq/authentication/register.py +21 -0
aiq/builder/builder.py +64 -2
aiq/builder/component_utils.py +16 -3
aiq/builder/context.py +37 -0
aiq/builder/eval_builder.py +43 -2
aiq/builder/function.py +44 -12
aiq/builder/function_base.py +1 -1
aiq/builder/intermediate_step_manager.py +6 -8
aiq/builder/user_interaction_manager.py +3 -0
aiq/builder/workflow.py +23 -18
aiq/builder/workflow_builder.py +421 -61
aiq/cli/commands/info/list_mcp.py +103 -16
aiq/cli/commands/sizing/__init__.py +14 -0
aiq/cli/commands/sizing/calc.py +294 -0
aiq/cli/commands/sizing/sizing.py +27 -0
aiq/cli/commands/start.py +2 -1
aiq/cli/entrypoint.py +2 -0
aiq/cli/register_workflow.py +80 -0
aiq/cli/type_registry.py +151 -30
aiq/data_models/api_server.py +124 -12
aiq/data_models/authentication.py +231 -0
aiq/data_models/common.py +35 -7
aiq/data_models/component.py +17 -9
aiq/data_models/component_ref.py +33 -0
aiq/data_models/config.py +60 -3
aiq/data_models/dataset_handler.py +2 -1
aiq/data_models/embedder.py +1 -0
aiq/data_models/evaluate.py +23 -0
aiq/data_models/function_dependencies.py +8 -0
aiq/data_models/interactive.py +10 -1
aiq/data_models/intermediate_step.py +38 -5
aiq/data_models/its_strategy.py +30 -0
aiq/data_models/llm.py +1 -0
aiq/data_models/memory.py +1 -0
aiq/data_models/object_store.py +44 -0
aiq/data_models/profiler.py +1 -0
aiq/data_models/retry_mixin.py +35 -0
aiq/data_models/span.py +187 -0
aiq/data_models/telemetry_exporter.py +2 -2
aiq/embedder/nim_embedder.py +2 -1
aiq/embedder/openai_embedder.py +2 -1
aiq/eval/config.py +19 -1
aiq/eval/dataset_handler/dataset_handler.py +87 -2
aiq/eval/evaluate.py +208 -27
aiq/eval/evaluator/base_evaluator.py +73 -0
aiq/eval/evaluator/evaluator_model.py +1 -0
aiq/eval/intermediate_step_adapter.py +11 -5
aiq/eval/rag_evaluator/evaluate.py +55 -15
aiq/eval/rag_evaluator/register.py +6 -1
aiq/eval/remote_workflow.py +7 -2
aiq/eval/runners/__init__.py +14 -0
aiq/eval/runners/config.py +39 -0
aiq/eval/runners/multi_eval_runner.py +54 -0
aiq/eval/trajectory_evaluator/evaluate.py +22 -65
aiq/eval/tunable_rag_evaluator/evaluate.py +150 -168
aiq/eval/tunable_rag_evaluator/register.py +2 -0
aiq/eval/usage_stats.py +41 -0
aiq/eval/utils/output_uploader.py +10 -1
aiq/eval/utils/weave_eval.py +184 -0
aiq/experimental/__init__.py +0 -0
aiq/experimental/decorators/__init__.py +0 -0
aiq/experimental/decorators/experimental_warning_decorator.py +130 -0
aiq/experimental/inference_time_scaling/__init__.py +0 -0
aiq/experimental/inference_time_scaling/editing/__init__.py +0 -0
aiq/experimental/inference_time_scaling/editing/iterative_plan_refinement_editor.py +147 -0
aiq/experimental/inference_time_scaling/editing/llm_as_a_judge_editor.py +204 -0
aiq/experimental/inference_time_scaling/editing/motivation_aware_summarization.py +107 -0
aiq/experimental/inference_time_scaling/functions/__init__.py +0 -0
aiq/experimental/inference_time_scaling/functions/execute_score_select_function.py +105 -0
aiq/experimental/inference_time_scaling/functions/its_tool_orchestration_function.py +205 -0
aiq/experimental/inference_time_scaling/functions/its_tool_wrapper_function.py +146 -0
aiq/experimental/inference_time_scaling/functions/plan_select_execute_function.py +224 -0
aiq/experimental/inference_time_scaling/models/__init__.py +0 -0
aiq/experimental/inference_time_scaling/models/editor_config.py +132 -0
aiq/experimental/inference_time_scaling/models/its_item.py +48 -0
aiq/experimental/inference_time_scaling/models/scoring_config.py +112 -0
aiq/experimental/inference_time_scaling/models/search_config.py +120 -0
aiq/experimental/inference_time_scaling/models/selection_config.py +154 -0
aiq/experimental/inference_time_scaling/models/stage_enums.py +43 -0
aiq/experimental/inference_time_scaling/models/strategy_base.py +66 -0
aiq/experimental/inference_time_scaling/models/tool_use_config.py +41 -0
aiq/experimental/inference_time_scaling/register.py +36 -0
aiq/experimental/inference_time_scaling/scoring/__init__.py +0 -0
aiq/experimental/inference_time_scaling/scoring/llm_based_agent_scorer.py +168 -0
aiq/experimental/inference_time_scaling/scoring/llm_based_plan_scorer.py +168 -0
aiq/experimental/inference_time_scaling/scoring/motivation_aware_scorer.py +111 -0
aiq/experimental/inference_time_scaling/search/__init__.py +0 -0
aiq/experimental/inference_time_scaling/search/multi_llm_planner.py +128 -0
aiq/experimental/inference_time_scaling/search/multi_query_retrieval_search.py +122 -0
aiq/experimental/inference_time_scaling/search/single_shot_multi_plan_planner.py +128 -0
aiq/experimental/inference_time_scaling/selection/__init__.py +0 -0
aiq/experimental/inference_time_scaling/selection/best_of_n_selector.py +63 -0
aiq/experimental/inference_time_scaling/selection/llm_based_agent_output_selector.py +131 -0
aiq/experimental/inference_time_scaling/selection/llm_based_output_merging_selector.py +159 -0
aiq/experimental/inference_time_scaling/selection/llm_based_plan_selector.py +128 -0
aiq/experimental/inference_time_scaling/selection/threshold_selector.py +58 -0
aiq/front_ends/console/authentication_flow_handler.py +233 -0
aiq/front_ends/console/console_front_end_plugin.py +11 -2
aiq/front_ends/fastapi/auth_flow_handlers/__init__.py +0 -0
aiq/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +27 -0
aiq/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +107 -0
aiq/front_ends/fastapi/fastapi_front_end_config.py +93 -9
aiq/front_ends/fastapi/fastapi_front_end_controller.py +68 -0
aiq/front_ends/fastapi/fastapi_front_end_plugin.py +14 -1
aiq/front_ends/fastapi/fastapi_front_end_plugin_worker.py +537 -52
aiq/front_ends/fastapi/html_snippets/__init__.py +14 -0
aiq/front_ends/fastapi/html_snippets/auth_code_grant_success.py +35 -0
aiq/front_ends/fastapi/job_store.py +47 -25
aiq/front_ends/fastapi/main.py +2 -0
aiq/front_ends/fastapi/message_handler.py +108 -89
aiq/front_ends/fastapi/step_adaptor.py +2 -1
aiq/llm/aws_bedrock_llm.py +57 -0
aiq/llm/nim_llm.py +2 -1
aiq/llm/openai_llm.py +3 -2
aiq/llm/register.py +1 -0
aiq/meta/pypi.md +12 -12
aiq/object_store/__init__.py +20 -0
aiq/object_store/in_memory_object_store.py +74 -0
aiq/object_store/interfaces.py +84 -0
aiq/object_store/models.py +36 -0
aiq/object_store/register.py +20 -0
aiq/observability/__init__.py +14 -0
aiq/observability/exporter/__init__.py +14 -0
aiq/observability/exporter/base_exporter.py +449 -0
aiq/observability/exporter/exporter.py +78 -0
aiq/observability/exporter/file_exporter.py +33 -0
aiq/observability/exporter/processing_exporter.py +269 -0
aiq/observability/exporter/raw_exporter.py +52 -0
aiq/observability/exporter/span_exporter.py +264 -0
aiq/observability/exporter_manager.py +335 -0
aiq/observability/mixin/__init__.py +14 -0
aiq/observability/mixin/batch_config_mixin.py +26 -0
aiq/observability/mixin/collector_config_mixin.py +23 -0
aiq/observability/mixin/file_mixin.py +288 -0
aiq/observability/mixin/file_mode.py +23 -0
aiq/observability/mixin/resource_conflict_mixin.py +134 -0
aiq/observability/mixin/serialize_mixin.py +61 -0
aiq/observability/mixin/type_introspection_mixin.py +183 -0
aiq/observability/processor/__init__.py +14 -0
aiq/observability/processor/batching_processor.py +316 -0
aiq/observability/processor/intermediate_step_serializer.py +28 -0
aiq/observability/processor/processor.py +68 -0
aiq/observability/register.py +36 -39
aiq/observability/utils/__init__.py +14 -0
aiq/observability/utils/dict_utils.py +236 -0
aiq/observability/utils/time_utils.py +31 -0
aiq/profiler/calc/__init__.py +14 -0
aiq/profiler/calc/calc_runner.py +623 -0
aiq/profiler/calc/calculations.py +288 -0
aiq/profiler/calc/data_models.py +176 -0
aiq/profiler/calc/plot.py +345 -0
aiq/profiler/callbacks/langchain_callback_handler.py +22 -10
aiq/profiler/data_models.py +24 -0
aiq/profiler/inference_metrics_model.py +3 -0
aiq/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +8 -0
aiq/profiler/inference_optimization/data_models.py +2 -2
aiq/profiler/inference_optimization/llm_metrics.py +2 -2
aiq/profiler/profile_runner.py +61 -21
aiq/runtime/loader.py +9 -3
aiq/runtime/runner.py +23 -9
aiq/runtime/session.py +25 -7
aiq/runtime/user_metadata.py +2 -3
aiq/tool/chat_completion.py +74 -0
aiq/tool/code_execution/README.md +152 -0
aiq/tool/code_execution/code_sandbox.py +151 -72
aiq/tool/code_execution/local_sandbox/.gitignore +1 -0
aiq/tool/code_execution/local_sandbox/local_sandbox_server.py +139 -24
aiq/tool/code_execution/local_sandbox/sandbox.requirements.txt +3 -1
aiq/tool/code_execution/local_sandbox/start_local_sandbox.sh +27 -2
aiq/tool/code_execution/register.py +7 -3
aiq/tool/code_execution/test_code_execution_sandbox.py +414 -0
aiq/tool/mcp/exceptions.py +142 -0
aiq/tool/mcp/mcp_client.py +41 -6
aiq/tool/mcp/mcp_tool.py +3 -2
aiq/tool/register.py +1 -0
aiq/tool/server_tools.py +6 -3
aiq/utils/exception_handlers/automatic_retries.py +289 -0
aiq/utils/exception_handlers/mcp.py +211 -0
aiq/utils/io/model_processing.py +28 -0
aiq/utils/log_utils.py +37 -0
aiq/utils/string_utils.py +38 -0
aiq/utils/type_converter.py +18 -2
aiq/utils/type_utils.py +87 -0
{aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/METADATA +53 -21
aiqtoolkit-1.2.0rc1.dist-info/RECORD +436 -0
{aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/WHEEL +1 -1
{aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/entry_points.txt +3 -0
aiq/front_ends/fastapi/websocket.py +0 -148
aiq/observability/async_otel_listener.py +0 -429
aiqtoolkit-1.2.0.dev0.dist-info/RECORD +0 -316
{aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
{aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/licenses/LICENSE.md +0 -0
{aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/top_level.txt +0 -0

aiq/profiler/calc/calculations.py ADDED Viewed

@@ -0,0 +1,288 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import numpy as np
+from aiq.profiler.calc.data_models import FitConfig
+from aiq.profiler.calc.data_models import GPUEstimates
+from aiq.profiler.calc.data_models import LinearFitResult
+logger = logging.getLogger(__name__)
+def compute_slope(concurrencies: list[float],
+                  time_metrics: list[float],
+                  fit_config: FitConfig | None = None) -> LinearFitResult:
+    """
+    Concurrency is the independent variable (x-axis) and time metric (which can be runtime or latency)
+    is the dependent variable (y-axis). This function computes the slope of the linear relationship
+    between concurrency and time metric.
+    Args:
+        concurrencies: List of concurrency values (x-axis)
+        time_metrics: List of time metric values (y-axis)
+        fit_config: Configuration for outlier detection and fit validation
+    Returns:
+        LinearFitResult containing slope, intercept, R-squared, and outliers removed
+    Raises:
+        ValueError: If the relationship is not linear (R² < min_r_squared)
+    """
+    # Use default config if none provided
+    if fit_config is None:
+        fit_config = FitConfig()
+    # Convert to numpy arrays for calculations
+    x = np.array(concurrencies)
+    y = np.array(time_metrics)
+    # Validate input
+    if len(x) != len(y):
+        raise ValueError("Concurrencies and time_metrics must have the same length")
+    if len(x) < 2:
+        raise ValueError("Need at least 2 points for linear regression")
+    outliers_removed = []
+    # Remove outliers if requested
+    if fit_config.remove_outliers and len(x) > 4:  # Need at least 4 points for outlier detection
+        x_clean, y_clean, removed_concurrencies = _remove_outliers(x, y, fit_config)
+        x, y = x_clean, y_clean
+        outliers_removed = removed_concurrencies
+    # Calculate linear regression using least squares
+    n = len(x)
+    sum_x = x.sum()
+    sum_y = y.sum()
+    sum_xy = (x * y).sum()
+    sum_x2 = (x**2).sum()
+    # Calculate slope and intercept
+    slope = (n * sum_xy - sum_x * sum_y) / (n * sum_x2 - sum_x**2)
+    intercept = (sum_y - slope * sum_x) / n
+    # Calculate R-squared
+    y_pred = slope * x + intercept
+    ss_res = ((y - y_pred)**2).sum()
+    ss_tot = ((y - y.mean())**2).sum()
+    r_squared = 1 - (ss_res / ss_tot) if ss_tot != 0 else 0.0
+    # Validate linearity
+    if r_squared < fit_config.min_r_squared:
+        raise ValueError(f"Poor linear fit detected (R² = {r_squared:.3f} < {fit_config.min_r_squared}). "
+                         f"The relationship may not be linear. Consider using non-linear regression.")
+    return LinearFitResult(slope=slope, intercept=intercept, r_squared=r_squared, outliers_removed=outliers_removed)
+def _remove_outliers(x: np.ndarray, y: np.ndarray, fit_config: FitConfig) -> tuple[np.ndarray, np.ndarray, list[int]]:
+    """
+    Remove outliers using the Interquartile Range (IQR) method.
+    For small concurrency range (≤ threshold points), also checks raw y-values for extreme outliers.
+    Args:
+        x: Input x values (concurrencies)
+        y: Input y values (time metrics)
+        fit_config: Configuration for outlier detection
+    Returns:
+        Tuple of (cleaned_x, cleaned_y, list_of_removed_concurrencies)
+    """
+    # if the number of concurrency points is less removing outliers can be challenging
+    # as extreme outliers can skew the results.
+    # We use a threshold to check for extreme outliers in raw y-values first.
+    n = len(x)
+    all_removed_concurrencies = []
+    # For smaller concurrency ranges, check for extreme outliers in raw y-values first
+    if n <= fit_config.small_concurrency_range_threshold:
+        # Calculate IQR on raw y-values
+        y_q1 = np.percentile(y, 25)
+        y_q3 = np.percentile(y, 75)
+        y_iqr = y_q3 - y_q1
+        # Use a more aggressive threshold for small datasets
+        y_lower_bound = y_q1 - fit_config.extreme_outlier_threshold * y_iqr  # More aggressive than 1.5
+        y_upper_bound = y_q3 + fit_config.extreme_outlier_threshold * y_iqr
+        # Find extreme outliers in raw values
+        extreme_outlier_mask = (y >= y_lower_bound) & (y <= y_upper_bound)
+        extreme_outliers_removed = np.sum(~extreme_outlier_mask)
+        if extreme_outliers_removed > 0:
+            extreme_removed_concurrencies = x[~extreme_outlier_mask].tolist()
+            all_removed_concurrencies.extend(extreme_removed_concurrencies)
+            logger.info("Removed %d extreme outliers from raw values: concurrencies %s",
+                        extreme_outliers_removed,
+                        extreme_removed_concurrencies)
+            # Continue with residual-based detection on the cleaned data
+            x = x[extreme_outlier_mask]
+            y = y[extreme_outlier_mask]
+            n = len(x)
+    # Standard residual-based outlier detection
+    # Calculate residuals from a simple linear fit
+    if n == 0:
+        raise ValueError("No data points remaining after outlier removal. Cannot compute linear fit.")
+    sum_x = x.sum()
+    sum_y = y.sum()
+    sum_xy = (x * y).sum()
+    sum_x2 = (x**2).sum()
+    slope = (n * sum_xy - sum_x * sum_y) / (n * sum_x2 - sum_x**2)
+    intercept = (sum_y - slope * sum_x) / n
+    # Calculate residuals
+    y_pred = slope * x + intercept
+    residuals = y - y_pred
+    # Use IQR method to detect outliers
+    q1 = np.percentile(residuals, 25)
+    q3 = np.percentile(residuals, 75)
+    iqr = q3 - q1
+    # Define outlier bounds (1.5 * IQR rule)
+    lower_bound = q1 - fit_config.conservative_outlier_threshold * iqr
+    upper_bound = q3 + fit_config.conservative_outlier_threshold * iqr
+    # Find non-outlier indices
+    non_outlier_mask = (residuals >= lower_bound) & (residuals <= upper_bound)
+    outliers_removed = np.sum(~non_outlier_mask)
+    residual_removed_concurrencies = x[~non_outlier_mask].tolist()
+    all_removed_concurrencies.extend(residual_removed_concurrencies)
+    # Add debugging for small datasets
+    if len(x) <= fit_config.small_concurrency_range_threshold:
+        logger.debug("Outlier detection for small dataset (n=%d):", len(x))
+        logger.debug("  Data points: %s", list(zip(x, y)))
+        logger.debug("  Residuals: %s", residuals.tolist())
+        logger.debug("  Q1=%.3f, Q3=%.3f, IQR=%.3f", q1, q3, iqr)
+        logger.debug("  Bounds: [%.3f, %.3f]", lower_bound, upper_bound)
+        logger.info("  Outliers removed: %d (concurrencies: %s)", outliers_removed, residual_removed_concurrencies)
+    return x[non_outlier_mask], y[non_outlier_mask], all_removed_concurrencies
+def calc_gpu_estimate_based_on_slope(target_time_metric: float,
+                                     target_users: int,
+                                     test_gpu_count: int,
+                                     observed_slope: float,
+                                     observed_intercept: float = 0.0) -> float:
+    """
+    Calculate the GPU estimate based on the slope of the time metric.
+    This function uses the linear relationship between concurrency and time metrics
+    to estimate the required GPU count for a target user load.
+    Args:
+        target_time_metric: Target time metric (latency or runtime) in seconds
+        observed_slope: Slope from linear regression of time vs concurrency
+        target_users: Target number of concurrent users
+        test_gpu_count: Number of GPUs used in the test
+        observed_intercept: Y-intercept from linear regression (default: 0.0)
+    Returns:
+        Estimated number of GPUs required
+    Raises:
+        ValueError: If target_time_metric is less than or equal to intercept
+    """
+    if target_time_metric <= observed_intercept:
+        raise ValueError(f"Target time metric ({target_time_metric}) must be greater than "
+                         f"the intercept ({observed_intercept}) for valid GPU estimation.")
+    # Calculate the concurrency that would achieve the target time metric
+    # Using the linear equation: time = slope * concurrency + intercept
+    # Solving for concurrency: concurrency = (time - intercept) / slope
+    calculated_concurrency = (target_time_metric - observed_intercept) / observed_slope
+    logger.info("Calculated concurrency: %f for target time metric: %f, observed intercept: %f, observed slope: %f",
+                calculated_concurrency,
+                target_time_metric,
+                observed_intercept,
+                observed_slope)
+    if calculated_concurrency <= 0:
+        raise ValueError(f"Calculated target concurrency ({calculated_concurrency}) is not positive. "
+                         f"This suggests the slope or intercept values may be invalid.")
+    # Estimate GPUs using the ratio of target users to target concurrency
+    # scaled by the test GPU count
+    gpu_estimate = (target_users / calculated_concurrency) * test_gpu_count
+    return gpu_estimate
+def calc_gpu_estimate_for_single_concurrency(target_llm_latency: float,
+                                             target_workflow_runtime: float,
+                                             target_users: int,
+                                             test_concurrency: int,
+                                             test_gpu_count: int,
+                                             observed_latency: float,
+                                             observed_runtime: float) -> GPUEstimates:
+    """
+    ROUGH ESTIMATE: Calculate GPU count estimate for a single concurrency level.
+    This is a simplified estimate that assumes linear scaling and should be used
+    as a baseline only. For more accurate estimates, use slope-based estimation
+    with multiple concurrency levels.
+    Formula based on the target latency:
+        G_required = (U_target / C_test) * (L_obs / L_target) * G_test
+    Formula based on the target runtime:
+        G_required = (U_target / C_test) * (R_obs / R_target) * G_test
+    where:
+        - U_target: Target number of users
+        - C_test: Test concurrency level
+        - L_obs: Observed LLM latency
+        - L_target: Target LLM latency
+        - R_obs: Observed workflow runtime
+        - R_target: Target workflow runtime
+        - G_test: Test GPU count
+    WARNING: This is a rough estimate that:
+    - Assumes perfect linear scaling (rarely true in practice)
+    - Doesn't account for GPU utilization inefficiencies
+    - May underestimate GPU requirements for high concurrency
+    - Should be validated against slope-based estimates
+    """
+    use_latency = target_llm_latency > 0
+    use_runtime = target_workflow_runtime > 0
+    # If observed latency or runtime exceeds the target, return empty estimates
+    if use_latency and observed_latency > target_llm_latency:
+        return GPUEstimates()
+    if use_runtime and observed_runtime > target_workflow_runtime:
+        return GPUEstimates()
+    # Calculate multipliers (how much faster we need to be)
+    llm_latency_multiplier = observed_latency / target_llm_latency if use_latency else 1.0
+    wf_runtime_multiplier = observed_runtime / target_workflow_runtime if use_runtime else 1.0
+    # Calculate GPU estimates using the corrected formula
+    gpu_estimate_by_wf_runtime = (target_users /
+                                  test_concurrency) * wf_runtime_multiplier * test_gpu_count if use_runtime else None
+    gpu_estimate_by_llm_latency = (target_users /
+                                   test_concurrency) * llm_latency_multiplier * test_gpu_count if use_latency else None
+    return GPUEstimates(gpu_estimate_by_wf_runtime=gpu_estimate_by_wf_runtime,
+                        gpu_estimate_by_llm_latency=gpu_estimate_by_llm_latency)

aiq/profiler/calc/data_models.py ADDED Viewed

@@ -0,0 +1,176 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import typing
+from pathlib import Path
+from pydantic import BaseModel
+class FitConfig(BaseModel):
+    """
+    Configuration parameters for linear fit and outlier detection.
+    """
+    # Threshold for small concurrency range (≤ 8 points) to check for extreme outliers in raw y-values first
+    small_concurrency_range_threshold: int = 8
+    # Extreme outlier threshold is 2.0 times the IQR, extreme outliers are removed
+    extreme_outlier_threshold: float = 2.0
+    # Conservative outlier threshold is 1.5 times the IQR, conservative outliers are removed
+    conservative_outlier_threshold: float = 1.5
+    # Minimum R-squared value required for a valid linear fit
+    min_r_squared: float = 0.7
+    # Whether to remove outliers during linear fit calculation
+    remove_outliers: bool = True
+class CalcRunnerConfig(BaseModel):
+    """
+    Parameters used for a calc runner.
+    """
+    # base config and endpoints (if remote)- not needed in offline mode
+    config_file: Path | None = None
+    # endpoint to use for the workflow, if not provided the workflow is run locally
+    endpoint: str | None = None
+    # timeout for the workflow
+    endpoint_timeout: int = 300
+    # if true workflow is not run, instead results from previous runs are used to estimate the
+    # GPU count
+    offline_mode: bool = False
+    # number of passes at each concurrency, if 0 the dataset is adjusted to a multiple of the
+    # concurrency
+    num_passes: int = 0
+    # concurrency values to test
+    concurrencies: list[int] = [1, 2, 4, 8]
+    # Targets for GPU estimation
+    target_llm_latency_p95: float = 0
+    target_workflow_runtime_p95: float = 0
+    target_users: int = 0
+    # Test setup information needed for GPU estimation
+    test_gpu_count: int = 0
+    # output directory for results
+    output_dir: Path | None = None
+    # if true, the job is stored in a new subdirectory of the output directory
+    append_job: bool = False
+    # if true, the data is plotted
+    plot_data: bool = True
+    # Configuration for linear fit and outlier detection
+    fit_config: FitConfig = FitConfig()
+# Sizing metrics are gathered from the evaluation runs and used as input by the calculator.
+class SizingMetricPerItem(BaseModel):
+    """
+    Sizing metrics per dataset entry item.
+    """
+    # LLM latency
+    llm_latency: float
+    # workflow runtime
+    workflow_runtime: float
+class SizingMetricsAlerts(BaseModel):
+    """
+    Sizing metrics alerts.
+    """
+    # if true, the workflow was interrupted that concurrency cannot be used
+    workflow_interrupted: bool = False
+class SizingMetrics(BaseModel):
+    """
+    Sizing metrics for a single concurrency.
+    """
+    # alerts associated with the sizing metrics
+    alerts: SizingMetricsAlerts = SizingMetricsAlerts()
+    # p95 LLM latency
+    llm_latency_p95: float = 0.0
+    # p95 workflow runtime
+    workflow_runtime_p95: float = 0.0
+    # total workflow runtime
+    total_runtime: float = 0.0
+    # per item metrics, key is the dataset entry id
+    per_item_metrics: dict[typing.Any, SizingMetricPerItem] = {}
+class LinearFitResult(BaseModel):
+    """
+    Result of linear regression including slope, intercept, and quality metrics.
+    """
+    slope: float
+    intercept: float
+    r_squared: float
+    outliers_removed: list[int]
+# GPU estimates are generated by the calculator.
+class GPUEstimates(BaseModel):
+    """
+    GPU estimates.
+    """
+    # GPU estimate based on the workflow runtime
+    gpu_estimate_by_wf_runtime: float | None = None
+    # GPU estimate based on the LLM latency
+    gpu_estimate_by_llm_latency: float | None = None
+# Calc runner alerts are generated by the calculator.
+class CalcAlerts(BaseModel):
+    """
+    Calc runner alerts.
+    """
+    # if true, the run was identified as an outlier by the workflow runtime linear fit
+    outlier_workflow_runtime: bool = False
+    # if true, the run was identified as an outlier by the LLM latency linear fit
+    outlier_llm_latency: bool = False
+    # number of items that are greater than the target latency
+    num_items_greater_than_target_latency: int = 0
+    # number of items that are greater than the target runtime
+    num_items_greater_than_target_runtime: int = 0
+class CalcData(BaseModel):
+    """
+    Output of the calc runner per concurrency.
+    """
+    # ROUGH GPU estimates per concurrency: these are not used for the final GPU estimation
+    # they are only available for information purposes
+    gpu_estimates: GPUEstimates = GPUEstimates()
+    # Calc runner alerts
+    alerts: CalcAlerts = CalcAlerts()
+    # Sizing metrics
+    sizing_metrics: SizingMetrics = SizingMetrics()
+class CalcRunnerOutput(BaseModel):
+    """
+    Output of the calc runner.
+    """
+    # GPU estimates based on the slope of the time vs concurrency, calculated online or offline
+    gpu_estimates: GPUEstimates
+    # Per-concurrency data (GPU estimates, out-of-range runs, and sizing metrics)
+    calc_data: dict[int, CalcData] = {}

aiqtoolkit 1.2.0.dev0__py3-none-any.whl → 1.2.0rc1__py3-none-any.whl

Potentially problematic release.

aiqtoolkit 1.2.0.dev0py3-none-any.whl → 1.2.0rc1py3-none-any.whl