PyPI - aiqtoolkit - Versions diffs - 1.2.0.dev0__py3-none-any.whl → 1.2.0rc1__py3-none-any.whl - Mend

aiqtoolkit 1.2.0.dev0py3-none-any.whl → 1.2.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of aiqtoolkit might be problematic. Click here for more details.

Files changed (220) hide show

aiq/agent/base.py +170 -8
aiq/agent/dual_node.py +1 -1
aiq/agent/react_agent/agent.py +146 -112
aiq/agent/react_agent/prompt.py +1 -6
aiq/agent/react_agent/register.py +36 -35
aiq/agent/rewoo_agent/agent.py +36 -35
aiq/agent/rewoo_agent/register.py +2 -2
aiq/agent/tool_calling_agent/agent.py +3 -7
aiq/agent/tool_calling_agent/register.py +1 -1
aiq/authentication/__init__.py +14 -0
aiq/authentication/api_key/__init__.py +14 -0
aiq/authentication/api_key/api_key_auth_provider.py +92 -0
aiq/authentication/api_key/api_key_auth_provider_config.py +124 -0
aiq/authentication/api_key/register.py +26 -0
aiq/authentication/exceptions/__init__.py +14 -0
aiq/authentication/exceptions/api_key_exceptions.py +38 -0
aiq/authentication/exceptions/auth_code_grant_exceptions.py +86 -0
aiq/authentication/exceptions/call_back_exceptions.py +38 -0
aiq/authentication/exceptions/request_exceptions.py +54 -0
aiq/authentication/http_basic_auth/__init__.py +0 -0
aiq/authentication/http_basic_auth/http_basic_auth_provider.py +81 -0
aiq/authentication/http_basic_auth/register.py +30 -0
aiq/authentication/interfaces.py +93 -0
aiq/authentication/oauth2/__init__.py +14 -0
aiq/authentication/oauth2/oauth2_auth_code_flow_provider.py +107 -0
aiq/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +39 -0
aiq/authentication/oauth2/register.py +25 -0
aiq/authentication/register.py +21 -0
aiq/builder/builder.py +64 -2
aiq/builder/component_utils.py +16 -3
aiq/builder/context.py +37 -0
aiq/builder/eval_builder.py +43 -2
aiq/builder/function.py +44 -12
aiq/builder/function_base.py +1 -1
aiq/builder/intermediate_step_manager.py +6 -8
aiq/builder/user_interaction_manager.py +3 -0
aiq/builder/workflow.py +23 -18
aiq/builder/workflow_builder.py +421 -61
aiq/cli/commands/info/list_mcp.py +103 -16
aiq/cli/commands/sizing/__init__.py +14 -0
aiq/cli/commands/sizing/calc.py +294 -0
aiq/cli/commands/sizing/sizing.py +27 -0
aiq/cli/commands/start.py +2 -1
aiq/cli/entrypoint.py +2 -0
aiq/cli/register_workflow.py +80 -0
aiq/cli/type_registry.py +151 -30
aiq/data_models/api_server.py +124 -12
aiq/data_models/authentication.py +231 -0
aiq/data_models/common.py +35 -7
aiq/data_models/component.py +17 -9
aiq/data_models/component_ref.py +33 -0
aiq/data_models/config.py +60 -3
aiq/data_models/dataset_handler.py +2 -1
aiq/data_models/embedder.py +1 -0
aiq/data_models/evaluate.py +23 -0
aiq/data_models/function_dependencies.py +8 -0
aiq/data_models/interactive.py +10 -1
aiq/data_models/intermediate_step.py +38 -5
aiq/data_models/its_strategy.py +30 -0
aiq/data_models/llm.py +1 -0
aiq/data_models/memory.py +1 -0
aiq/data_models/object_store.py +44 -0
aiq/data_models/profiler.py +1 -0
aiq/data_models/retry_mixin.py +35 -0
aiq/data_models/span.py +187 -0
aiq/data_models/telemetry_exporter.py +2 -2
aiq/embedder/nim_embedder.py +2 -1
aiq/embedder/openai_embedder.py +2 -1
aiq/eval/config.py +19 -1
aiq/eval/dataset_handler/dataset_handler.py +87 -2
aiq/eval/evaluate.py +208 -27
aiq/eval/evaluator/base_evaluator.py +73 -0
aiq/eval/evaluator/evaluator_model.py +1 -0
aiq/eval/intermediate_step_adapter.py +11 -5
aiq/eval/rag_evaluator/evaluate.py +55 -15
aiq/eval/rag_evaluator/register.py +6 -1
aiq/eval/remote_workflow.py +7 -2
aiq/eval/runners/__init__.py +14 -0
aiq/eval/runners/config.py +39 -0
aiq/eval/runners/multi_eval_runner.py +54 -0
aiq/eval/trajectory_evaluator/evaluate.py +22 -65
aiq/eval/tunable_rag_evaluator/evaluate.py +150 -168
aiq/eval/tunable_rag_evaluator/register.py +2 -0
aiq/eval/usage_stats.py +41 -0
aiq/eval/utils/output_uploader.py +10 -1
aiq/eval/utils/weave_eval.py +184 -0
aiq/experimental/__init__.py +0 -0
aiq/experimental/decorators/__init__.py +0 -0
aiq/experimental/decorators/experimental_warning_decorator.py +130 -0
aiq/experimental/inference_time_scaling/__init__.py +0 -0
aiq/experimental/inference_time_scaling/editing/__init__.py +0 -0
aiq/experimental/inference_time_scaling/editing/iterative_plan_refinement_editor.py +147 -0
aiq/experimental/inference_time_scaling/editing/llm_as_a_judge_editor.py +204 -0
aiq/experimental/inference_time_scaling/editing/motivation_aware_summarization.py +107 -0
aiq/experimental/inference_time_scaling/functions/__init__.py +0 -0
aiq/experimental/inference_time_scaling/functions/execute_score_select_function.py +105 -0
aiq/experimental/inference_time_scaling/functions/its_tool_orchestration_function.py +205 -0
aiq/experimental/inference_time_scaling/functions/its_tool_wrapper_function.py +146 -0
aiq/experimental/inference_time_scaling/functions/plan_select_execute_function.py +224 -0
aiq/experimental/inference_time_scaling/models/__init__.py +0 -0
aiq/experimental/inference_time_scaling/models/editor_config.py +132 -0
aiq/experimental/inference_time_scaling/models/its_item.py +48 -0
aiq/experimental/inference_time_scaling/models/scoring_config.py +112 -0
aiq/experimental/inference_time_scaling/models/search_config.py +120 -0
aiq/experimental/inference_time_scaling/models/selection_config.py +154 -0
aiq/experimental/inference_time_scaling/models/stage_enums.py +43 -0
aiq/experimental/inference_time_scaling/models/strategy_base.py +66 -0
aiq/experimental/inference_time_scaling/models/tool_use_config.py +41 -0
aiq/experimental/inference_time_scaling/register.py +36 -0
aiq/experimental/inference_time_scaling/scoring/__init__.py +0 -0
aiq/experimental/inference_time_scaling/scoring/llm_based_agent_scorer.py +168 -0
aiq/experimental/inference_time_scaling/scoring/llm_based_plan_scorer.py +168 -0
aiq/experimental/inference_time_scaling/scoring/motivation_aware_scorer.py +111 -0
aiq/experimental/inference_time_scaling/search/__init__.py +0 -0
aiq/experimental/inference_time_scaling/search/multi_llm_planner.py +128 -0
aiq/experimental/inference_time_scaling/search/multi_query_retrieval_search.py +122 -0
aiq/experimental/inference_time_scaling/search/single_shot_multi_plan_planner.py +128 -0
aiq/experimental/inference_time_scaling/selection/__init__.py +0 -0
aiq/experimental/inference_time_scaling/selection/best_of_n_selector.py +63 -0
aiq/experimental/inference_time_scaling/selection/llm_based_agent_output_selector.py +131 -0
aiq/experimental/inference_time_scaling/selection/llm_based_output_merging_selector.py +159 -0
aiq/experimental/inference_time_scaling/selection/llm_based_plan_selector.py +128 -0
aiq/experimental/inference_time_scaling/selection/threshold_selector.py +58 -0
aiq/front_ends/console/authentication_flow_handler.py +233 -0
aiq/front_ends/console/console_front_end_plugin.py +11 -2
aiq/front_ends/fastapi/auth_flow_handlers/__init__.py +0 -0
aiq/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +27 -0
aiq/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +107 -0
aiq/front_ends/fastapi/fastapi_front_end_config.py +93 -9
aiq/front_ends/fastapi/fastapi_front_end_controller.py +68 -0
aiq/front_ends/fastapi/fastapi_front_end_plugin.py +14 -1
aiq/front_ends/fastapi/fastapi_front_end_plugin_worker.py +537 -52
aiq/front_ends/fastapi/html_snippets/__init__.py +14 -0
aiq/front_ends/fastapi/html_snippets/auth_code_grant_success.py +35 -0
aiq/front_ends/fastapi/job_store.py +47 -25
aiq/front_ends/fastapi/main.py +2 -0
aiq/front_ends/fastapi/message_handler.py +108 -89
aiq/front_ends/fastapi/step_adaptor.py +2 -1
aiq/llm/aws_bedrock_llm.py +57 -0
aiq/llm/nim_llm.py +2 -1
aiq/llm/openai_llm.py +3 -2
aiq/llm/register.py +1 -0
aiq/meta/pypi.md +12 -12
aiq/object_store/__init__.py +20 -0
aiq/object_store/in_memory_object_store.py +74 -0
aiq/object_store/interfaces.py +84 -0
aiq/object_store/models.py +36 -0
aiq/object_store/register.py +20 -0
aiq/observability/__init__.py +14 -0
aiq/observability/exporter/__init__.py +14 -0
aiq/observability/exporter/base_exporter.py +449 -0
aiq/observability/exporter/exporter.py +78 -0
aiq/observability/exporter/file_exporter.py +33 -0
aiq/observability/exporter/processing_exporter.py +269 -0
aiq/observability/exporter/raw_exporter.py +52 -0
aiq/observability/exporter/span_exporter.py +264 -0
aiq/observability/exporter_manager.py +335 -0
aiq/observability/mixin/__init__.py +14 -0
aiq/observability/mixin/batch_config_mixin.py +26 -0
aiq/observability/mixin/collector_config_mixin.py +23 -0
aiq/observability/mixin/file_mixin.py +288 -0
aiq/observability/mixin/file_mode.py +23 -0
aiq/observability/mixin/resource_conflict_mixin.py +134 -0
aiq/observability/mixin/serialize_mixin.py +61 -0
aiq/observability/mixin/type_introspection_mixin.py +183 -0
aiq/observability/processor/__init__.py +14 -0
aiq/observability/processor/batching_processor.py +316 -0
aiq/observability/processor/intermediate_step_serializer.py +28 -0
aiq/observability/processor/processor.py +68 -0
aiq/observability/register.py +36 -39
aiq/observability/utils/__init__.py +14 -0
aiq/observability/utils/dict_utils.py +236 -0
aiq/observability/utils/time_utils.py +31 -0
aiq/profiler/calc/__init__.py +14 -0
aiq/profiler/calc/calc_runner.py +623 -0
aiq/profiler/calc/calculations.py +288 -0
aiq/profiler/calc/data_models.py +176 -0
aiq/profiler/calc/plot.py +345 -0
aiq/profiler/callbacks/langchain_callback_handler.py +22 -10
aiq/profiler/data_models.py +24 -0
aiq/profiler/inference_metrics_model.py +3 -0
aiq/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +8 -0
aiq/profiler/inference_optimization/data_models.py +2 -2
aiq/profiler/inference_optimization/llm_metrics.py +2 -2
aiq/profiler/profile_runner.py +61 -21
aiq/runtime/loader.py +9 -3
aiq/runtime/runner.py +23 -9
aiq/runtime/session.py +25 -7
aiq/runtime/user_metadata.py +2 -3
aiq/tool/chat_completion.py +74 -0
aiq/tool/code_execution/README.md +152 -0
aiq/tool/code_execution/code_sandbox.py +151 -72
aiq/tool/code_execution/local_sandbox/.gitignore +1 -0
aiq/tool/code_execution/local_sandbox/local_sandbox_server.py +139 -24
aiq/tool/code_execution/local_sandbox/sandbox.requirements.txt +3 -1
aiq/tool/code_execution/local_sandbox/start_local_sandbox.sh +27 -2
aiq/tool/code_execution/register.py +7 -3
aiq/tool/code_execution/test_code_execution_sandbox.py +414 -0
aiq/tool/mcp/exceptions.py +142 -0
aiq/tool/mcp/mcp_client.py +41 -6
aiq/tool/mcp/mcp_tool.py +3 -2
aiq/tool/register.py +1 -0
aiq/tool/server_tools.py +6 -3
aiq/utils/exception_handlers/automatic_retries.py +289 -0
aiq/utils/exception_handlers/mcp.py +211 -0
aiq/utils/io/model_processing.py +28 -0
aiq/utils/log_utils.py +37 -0
aiq/utils/string_utils.py +38 -0
aiq/utils/type_converter.py +18 -2
aiq/utils/type_utils.py +87 -0
{aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/METADATA +53 -21
aiqtoolkit-1.2.0rc1.dist-info/RECORD +436 -0
{aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/WHEEL +1 -1
{aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/entry_points.txt +3 -0
aiq/front_ends/fastapi/websocket.py +0 -148
aiq/observability/async_otel_listener.py +0 -429
aiqtoolkit-1.2.0.dev0.dist-info/RECORD +0 -316
{aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
{aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/licenses/LICENSE.md +0 -0
{aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/top_level.txt +0 -0

aiq/profiler/calc/plot.py ADDED Viewed

@@ -0,0 +1,345 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from pathlib import Path
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from aiq.profiler.calc.data_models import LinearFitResult
+from aiq.profiler.calc.data_models import SizingMetrics
+logger = logging.getLogger(__name__)
+# Plotting constants
+class PlotConfig:
+    # Simple plot settings
+    SIMPLE_FIGSIZE = (12, 6)
+    SIMPLE_LINEWIDTH = 2
+    SIMPLE_DPI = 150
+    # Enhanced plot settings
+    ENHANCED_FIGSIZE = (16, 6)
+    ENHANCED_DPI = 300
+    # Marker and styling
+    DATA_MARKER = 'o'
+    OUTLIER_MARKER = 'x'
+    OUTLIER_COLOR = 'crimson'
+    TREND_COLOR = 'r'
+    TREND_LINESTYLE = '--'
+    TREND_ALPHA = 0.8
+    TREND_LINEWIDTH = 2.0
+    # Colors
+    LLM_LATENCY_COLOR = 'steelblue'
+    RUNTIME_COLOR = 'darkgreen'
+    SLA_COLOR = 'red'
+    NOTE_BOX_COLOR = 'mistyrose'
+    NOTE_TEXT_COLOR = 'crimson'
+    STATS_BOX_COLOR = 'lightblue'
+    # Alpha values
+    DATA_ALPHA = 0.7
+    OUTLIER_ALPHA = 0.9
+    GRID_ALPHA = 0.3
+    SLA_ALPHA = 0.7
+    NOTE_BOX_ALPHA = 0.7
+    STATS_BOX_ALPHA = 0.8
+    # Sizes
+    DATA_POINT_SIZE = 120
+    OUTLIER_POINT_SIZE = 140
+    DATA_LINEWIDTH = 1
+    # Font sizes
+    AXIS_LABEL_FONTSIZE = 12
+    TITLE_FONTSIZE = 14
+    LEGEND_FONTSIZE = 10
+    NOTE_FONTSIZE = 10
+    STATS_FONTSIZE = 10
+    # Text positioning
+    NOTE_X_POS = 0.98
+    NOTE_Y_POS = 0.02
+    STATS_X_POS = 0.02
+    STATS_Y_POS = 0.02
+    # Box styling
+    NOTE_BOX_PAD = 0.3
+    STATS_BOX_PAD = 0.5
+    # Trend line points
+    TREND_LINE_POINTS = 100
+    # Font weights
+    AXIS_LABEL_FONTWEIGHT = 'bold'
+    TITLE_FONTWEIGHT = 'bold'
+def plot_concurrency_vs_time_metrics_simple(df: pd.DataFrame, output_dir: Path) -> None:
+    """
+    Save a simple plot of concurrency vs. p95 LLM latency and workflow runtime.
+    """
+    plt.figure(figsize=PlotConfig.SIMPLE_FIGSIZE)
+    plt.plot(df["concurrency"],
+             df["llm_latency_p95"],
+             label="p95 LLM Latency (s)",
+             marker=PlotConfig.DATA_MARKER,
+             linewidth=PlotConfig.SIMPLE_LINEWIDTH)
+    plt.plot(df["concurrency"],
+             df["workflow_runtime_p95"],
+             label="p95 Workflow Runtime (s)",
+             marker="s",
+             linewidth=PlotConfig.SIMPLE_LINEWIDTH)
+    plt.xlabel("Concurrency")
+    plt.ylabel("Time (seconds)")
+    plt.title("Concurrency vs. p95 LLM Latency and Workflow Runtime")
+    plt.grid(True, alpha=PlotConfig.GRID_ALPHA)
+    plt.legend()
+    plt.tight_layout()
+    simple_plot_path = output_dir / "concurrency_vs_p95_simple.png"
+    plt.savefig(simple_plot_path, dpi=PlotConfig.SIMPLE_DPI, bbox_inches='tight')
+    plt.close()
+    logger.info("Simple plot saved to %s", simple_plot_path)
+def plot_metric_vs_concurrency_with_optional_fit(
+    ax: plt.Axes,
+    x: np.ndarray,
+    y: np.ndarray,
+    metric_name: str,
+    y_label: str,
+    title: str,
+    color: str,
+    sla_value: float = 0.0,
+    sla_label: str = None,
+    fit: LinearFitResult | None = None,
+):
+    """
+    Helper to plot a metric vs concurrency with pre-computed fit, outlier highlighting, and SLA line.
+    Requires pre-computed fit to be provided.
+    """
+    marker = PlotConfig.DATA_MARKER
+    outlier_marker = PlotConfig.OUTLIER_MARKER
+    outlier_color = PlotConfig.OUTLIER_COLOR
+    trend_color = PlotConfig.TREND_COLOR
+    trend_linestyle = PlotConfig.TREND_LINESTYLE
+    trend_alpha = PlotConfig.TREND_ALPHA
+    trend_linewidth = PlotConfig.TREND_LINEWIDTH
+    note_box_color = PlotConfig.NOTE_BOX_COLOR
+    note_text_color = PlotConfig.NOTE_TEXT_COLOR
+    legend_fontsize = PlotConfig.LEGEND_FONTSIZE
+    outliers_x = outliers_y = np.array([])
+    outliers_note = ""
+    # Skip analysis plot if no fit is available
+    if not fit:
+        logger.warning(f"No linear fit available for {metric_name}, skipping analysis plot")
+        return False
+    if fit.outliers_removed:
+        # Use the concurrencies that were removed to identify outlier points
+        outlier_mask = np.isin(x, fit.outliers_removed)
+        outliers_x = x[outlier_mask]
+        outliers_y = y[outlier_mask]
+        outliers_note = f"Outliers removed: concurrencies {fit.outliers_removed}"
+        # Plot cleaned data (points that weren't removed as outliers)
+        non_outlier_mask = ~np.isin(x, fit.outliers_removed)
+        x_clean = x[non_outlier_mask]
+        y_clean = y[non_outlier_mask]
+        ax.scatter(x_clean,
+                   y_clean,
+                   alpha=PlotConfig.DATA_ALPHA,
+                   s=PlotConfig.DATA_POINT_SIZE,
+                   c=color,
+                   edgecolors='white',
+                   linewidth=PlotConfig.DATA_LINEWIDTH,
+                   marker=marker,
+                   label='Data Points')
+        ax.scatter(outliers_x,
+                   outliers_y,
+                   alpha=PlotConfig.OUTLIER_ALPHA,
+                   s=PlotConfig.OUTLIER_POINT_SIZE,
+                   c=outlier_color,
+                   marker=outlier_marker,
+                   label='Removed Outliers')
+    else:
+        # No outliers plot all data points
+        ax.scatter(x,
+                   y,
+                   alpha=PlotConfig.DATA_ALPHA,
+                   s=PlotConfig.DATA_POINT_SIZE,
+                   c=color,
+                   edgecolors='white',
+                   linewidth=PlotConfig.DATA_LINEWIDTH,
+                   marker=marker,
+                   label='Data Points')
+    # Plot trend line using the fit
+    x_fit = np.linspace(x.min(), x.max(), PlotConfig.TREND_LINE_POINTS)
+    y_fit = fit.slope * x_fit + fit.intercept
+    ax.plot(x_fit,
+            y_fit,
+            trend_linestyle,
+            alpha=trend_alpha,
+            linewidth=trend_linewidth,
+            color=trend_color,
+            label=f'Trend (slope={fit.slope:.4f}, R²={fit.r_squared:.3f})')
+    if sla_value > 0:
+        ax.axhline(y=sla_value,
+                   color=PlotConfig.SLA_COLOR,
+                   linestyle=':',
+                   alpha=PlotConfig.SLA_ALPHA,
+                   linewidth=2,
+                   label=sla_label or f'SLA Threshold ({sla_value}s)')
+    ax.set_xlabel('Concurrency', fontsize=PlotConfig.AXIS_LABEL_FONTSIZE, fontweight=PlotConfig.AXIS_LABEL_FONTWEIGHT)
+    ax.set_ylabel(y_label, fontsize=PlotConfig.AXIS_LABEL_FONTSIZE, fontweight=PlotConfig.AXIS_LABEL_FONTWEIGHT)
+    ax.set_title(title, fontsize=PlotConfig.TITLE_FONTSIZE, fontweight=PlotConfig.TITLE_FONTWEIGHT)
+    ax.grid(True, alpha=PlotConfig.GRID_ALPHA)
+    ax.legend(fontsize=legend_fontsize)
+    if outliers_note:
+        ax.text(PlotConfig.NOTE_X_POS,
+                PlotConfig.NOTE_Y_POS,
+                outliers_note,
+                transform=ax.transAxes,
+                fontsize=PlotConfig.NOTE_FONTSIZE,
+                color=note_text_color,
+                ha='right',
+                va='bottom',
+                bbox=dict(boxstyle=f'round,pad={PlotConfig.NOTE_BOX_PAD}',
+                          facecolor=note_box_color,
+                          alpha=PlotConfig.NOTE_BOX_ALPHA))
+    return True
+def plot_concurrency_vs_time_metrics(metrics_per_concurrency: dict[int, SizingMetrics],
+                                     output_dir: Path,
+                                     target_llm_latency: float = 0.0,
+                                     target_runtime: float = 0.0,
+                                     llm_latency_fit: LinearFitResult | None = None,
+                                     runtime_fit: LinearFitResult | None = None) -> None:
+    """
+    Plot concurrency vs. p95 latency and workflow runtime using metrics_per_concurrency.
+    Enhanced with better styling, trend analysis, and annotations.
+    Only plots valid runs and requires pre-computed fits.
+    """
+    rows = []
+    for concurrency, metrics in metrics_per_concurrency.items():
+        llm_latency = metrics.llm_latency_p95
+        workflow_runtime = metrics.workflow_runtime_p95
+        rows.append({
+            "concurrency": concurrency, "llm_latency_p95": llm_latency, "workflow_runtime_p95": workflow_runtime
+        })
+    if not rows:
+        logger.warning("No valid metrics data available to plot.")
+        return
+    plt.style.use('seaborn-v0_8')
+    df = pd.DataFrame(rows).sort_values("concurrency")
+    # Always generate simple plot first
+    plot_concurrency_vs_time_metrics_simple(df, output_dir)
+    # Check if we have fits available for analysis plots
+    has_llm_latency_fit = llm_latency_fit is not None
+    has_runtime_fit = runtime_fit is not None
+    if not has_llm_latency_fit and not has_runtime_fit:
+        logger.warning("No linear fits available for analysis plots, skipping enhanced plot")
+        return
+    # Create subplots based on available fits
+    if has_llm_latency_fit and has_runtime_fit:
+        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=PlotConfig.ENHANCED_FIGSIZE)
+    else:
+        fig, ax1 = plt.subplots(1, 1, figsize=(8, 6))
+        ax2 = None
+    # Plot llm_latency if fit is available
+    llm_latency_plotted = False
+    if has_llm_latency_fit:
+        llm_latency_plotted = plot_metric_vs_concurrency_with_optional_fit(
+            ax1,
+            df["concurrency"].to_numpy(),
+            df["llm_latency_p95"].to_numpy(),
+            metric_name="llm_latency",
+            y_label='P95 LLM Latency (seconds)',
+            title='Concurrency vs P95 LLM Latency',
+            color=PlotConfig.LLM_LATENCY_COLOR,
+            sla_value=target_llm_latency,
+            sla_label=f'SLA Threshold ({target_llm_latency}s)' if target_llm_latency > 0 else None,
+            fit=llm_latency_fit,
+        )
+    # Plot runtime if fit is available
+    runtime_plotted = False
+    if has_runtime_fit and ax2 is not None:
+        runtime_plotted = plot_metric_vs_concurrency_with_optional_fit(
+            ax2,
+            df["concurrency"].to_numpy(),
+            df["workflow_runtime_p95"].to_numpy(),
+            metric_name="runtime",
+            y_label='P95 Workflow Runtime (seconds)',
+            title='Concurrency vs P95 Workflow Runtime',
+            color=PlotConfig.RUNTIME_COLOR,
+            sla_value=target_runtime,
+            sla_label=f'SLA Threshold ({target_runtime}s)' if target_runtime > 0 else None,
+            fit=runtime_fit,
+        )
+    # Check if any plots were successfully created
+    plots_created = (llm_latency_plotted or runtime_plotted)
+    if not plots_created:
+        logger.warning("No analysis plots could be created, skipping enhanced plot")
+        plt.close(fig)
+        return
+    # Add summary statistics
+    stats_text = f'Data Points: {len(df)}\n'
+    stats_text += f'LLM Latency Range: {df["llm_latency_p95"].min():.3f}-{df["llm_latency_p95"].max():.3f}s\n'
+    stats_text += f'WF Runtime Range: {df["workflow_runtime_p95"].min():.3f}-{df["workflow_runtime_p95"].max():.3f}s'
+    fig.text(PlotConfig.STATS_X_POS,
+             PlotConfig.STATS_Y_POS,
+             stats_text,
+             fontsize=PlotConfig.STATS_FONTSIZE,
+             bbox=dict(boxstyle=f'round,pad={PlotConfig.STATS_BOX_PAD}',
+                       facecolor=PlotConfig.STATS_BOX_COLOR,
+                       alpha=PlotConfig.STATS_BOX_ALPHA))
+    plt.tight_layout()
+    output_dir.mkdir(parents=True, exist_ok=True)
+    enhanced_plot_path = output_dir / "concurrency_vs_p95_analysis.png"
+    plt.savefig(enhanced_plot_path,
+                dpi=PlotConfig.ENHANCED_DPI,
+                bbox_inches='tight',
+                facecolor='white',
+                edgecolor='none')
+    plt.close()
+    logger.info("Enhanced plot saved to %s", enhanced_plot_path)

aiq/profiler/callbacks/langchain_callback_handler.py CHANGED Viewed

@@ -34,6 +34,7 @@ from aiq.builder.framework_enum import LLMFrameworkEnum
 from aiq.data_models.intermediate_step import IntermediateStepPayload
 from aiq.data_models.intermediate_step import IntermediateStepType
 from aiq.data_models.intermediate_step import StreamEventData
+from aiq.data_models.intermediate_step import ToolSchema
 from aiq.data_models.intermediate_step import TraceMetadata
 from aiq.data_models.intermediate_step import UsageInfo
 from aiq.profiler.callbacks.base_callback_class import BaseProfilerCallback
@@ -42,6 +43,16 @@ from aiq.profiler.callbacks.token_usage_base_model import TokenUsageBaseModel
 logger = logging.getLogger(__name__)
+def _extract_tools_schema(invocation_params: dict) -> list:
+    tools_schema = []
+    if invocation_params is not None:
+        for tool in invocation_params.get("tools", []):
+            tools_schema.append(ToolSchema(**tool))
+    return tools_schema
 class LangchainProfilerHandler(AsyncCallbackHandler, BaseProfilerCallback):  # pylint: disable=R0901
     """Callback Handler that tracks NIM info."""
@@ -138,16 +149,17 @@ class LangchainProfilerHandler(AsyncCallbackHandler, BaseProfilerCallback):  # p
         run_id = str(run_id)
         self._run_id_to_model_name[run_id] = model_name
-        stats = IntermediateStepPayload(event_type=IntermediateStepType.LLM_START,
-                                        framework=LLMFrameworkEnum.LANGCHAIN,
-                                        name=model_name,
-                                        UUID=run_id,
-                                        data=StreamEventData(input=copy.deepcopy(messages[0])),
-                                        metadata=TraceMetadata(chat_inputs=copy.deepcopy(messages[0])),
-                                        usage_info=UsageInfo(token_usage=TokenUsageBaseModel(),
-                                                             num_llm_calls=1,
-                                                             seconds_between_calls=int(time.time() -
-                                                                                       self.last_call_ts)))
+        stats = IntermediateStepPayload(
+            event_type=IntermediateStepType.LLM_START,
+            framework=LLMFrameworkEnum.LANGCHAIN,
+            name=model_name,
+            UUID=run_id,
+            data=StreamEventData(input=copy.deepcopy(messages[0])),
+            metadata=TraceMetadata(chat_inputs=copy.deepcopy(messages[0]),
+                                   tools_schema=_extract_tools_schema(kwargs.get("invocation_params", {}))),
+            usage_info=UsageInfo(token_usage=TokenUsageBaseModel(),
+                                 num_llm_calls=1,
+                                 seconds_between_calls=int(time.time() - self.last_call_ts)))
         self.step_manager.push_intermediate_step(stats)
         self._run_id_to_llm_input[run_id] = messages[0][-1].content

aiq/profiler/data_models.py ADDED Viewed

@@ -0,0 +1,24 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from pydantic import BaseModel
+from aiq.profiler.inference_metrics_model import InferenceMetricsModel
+from aiq.profiler.inference_optimization.data_models import WorkflowRuntimeMetrics
+class ProfilerResults(BaseModel):
+    workflow_runtime_metrics: WorkflowRuntimeMetrics | None = None
+    llm_latency_ci: InferenceMetricsModel | None = None

aiq/profiler/inference_metrics_model.py CHANGED Viewed

@@ -23,3 +23,6 @@ class InferenceMetricsModel(BaseModel):
     ninetieth_interval: tuple[float, float] = Field(default=(0, 0), description="90% confidence interval")
     ninety_fifth_interval: tuple[float, float] = Field(default=(0, 0), description="95% confidence interval")
     ninety_ninth_interval: tuple[float, float] = Field(default=(0, 0), description="99% confidence interval")
+    p90: float = Field(default=0, description="90th percentile of the samples")
+    p95: float = Field(default=0, description="95th percentile of the samples")
+    p99: float = Field(default=0, description="99th percentile of the samples")

aiq/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py CHANGED Viewed

@@ -69,13 +69,20 @@ def build_call_tree_for_example(example_df: pd.DataFrame) -> list[CallNode]:
             return "LLM"
         if evt.startswith("TOOL_"):
             return "TOOL"
+        if evt.startswith("FUNCTION_"):
+            return "FUNCTION"
+        if evt.startswith("SPAN_"):
+            return "FUNCTION"
         return None
     def get_op_name(row: pd.Series, op_type: str) -> str:
         if op_type == "LLM":
             return row.get("llm_name") or "unknown_llm"
+        if op_type == "FUNCTION":
+            return row.get("function_name") or "unknown_function"
         if op_type == "TOOL":
             return row.get("tool_name") or "unknown_tool"
         return "unknown_op"
     for _, row in example_df.iterrows():
@@ -309,6 +316,7 @@ def save_gantt_chart(all_nodes: list[CallNode], output_path: str) -> None:
     color_map = {
         "LLM": "tab:blue",
         "TOOL": "tab:green",
+        "FUNCTION": "tab:orange",
     }
     default_color = "tab:gray"

aiq/profiler/inference_optimization/data_models.py CHANGED Viewed

@@ -220,7 +220,7 @@ class CallNode(BaseModel):
         return "\n".join([info] + child_strs)
-CallNode.update_forward_refs()
+CallNode.model_rebuild()
 class NodeMetrics(BaseModel):
@@ -296,7 +296,7 @@ class ConcurrencyCallNode(CallNode):
     llm_text_output: str | None = None
-ConcurrencyCallNode.update_forward_refs()
+ConcurrencyCallNode.model_rebuild()
 class ConcurrencySpikeInfo(BaseModel):

aiq/profiler/inference_optimization/llm_metrics.py CHANGED Viewed

@@ -176,8 +176,8 @@ class LLMMetrics:
             return subdf
         # Apply the group metrics
-        df = (df.groupby(['example_number', 'function_name'],
-                         group_keys=False).apply(_compute_group_metrics).sort_index())
+        df_group = df.groupby(['example_number', 'function_name'], group_keys=False)
+        df = df_group[df.columns].apply(_compute_group_metrics).sort_index()
         # ---------------------------------------------------------------------
         # 5. NOVA-Predicted-OSL

aiq/profiler/profile_runner.py CHANGED Viewed

@@ -25,6 +25,7 @@ from pydantic import BaseModel
 from aiq.data_models.evaluate import ProfilerConfig
 from aiq.data_models.intermediate_step import IntermediateStep
+from aiq.profiler.data_models import ProfilerResults
 from aiq.profiler.forecasting.model_trainer import ModelTrainer
 from aiq.profiler.inference_metrics_model import InferenceMetricsModel
 from aiq.profiler.utils import create_standardized_dataframe
@@ -67,9 +68,10 @@ class ProfilerRunner:
       All computed metrics are saved to a metrics JSON file at the end.
     """
-    def __init__(self, profiler_config: ProfilerConfig, output_dir: Path):
+    def __init__(self, profiler_config: ProfilerConfig, output_dir: Path, write_output: bool = True):
         self.profile_config = profiler_config
         self.output_dir = output_dir
+        self.write_output = write_output
         self._converter = TypeConverter([])
         # Holds per-request data (prompt, output, usage_stats, etc.)
@@ -80,7 +82,7 @@ class ProfilerRunner:
         # Ensure output directory
         os.makedirs(output_dir, exist_ok=True)
-    async def run(self, all_steps: list[list[IntermediateStep]]):
+    async def run(self, all_steps: list[list[IntermediateStep]]) -> ProfilerResults:
         """
         Main entrypoint: Works on Input DataFrame generated from eval to fit forecasting model,
         writes out combined requests JSON, then computes and saves additional metrics,
@@ -113,10 +115,11 @@ class ProfilerRunner:
             self.all_requests_data.append({"request_number": i, "intermediate_steps": request_data})
         # Write the final big JSON (all requests)
-        final_path = os.path.join(self.output_dir, "all_requests_profiler_traces.json")
-        with open(final_path, 'w', encoding='utf-8') as f:
-            json.dump(self.all_requests_data, f, indent=2, default=str)
-        logger.info("Wrote combined data to: %s", final_path)
+        if self.write_output:
+            final_path = os.path.join(self.output_dir, "all_requests_profiler_traces.json")
+            with open(final_path, 'w', encoding='utf-8') as f:
+                json.dump(self.all_requests_data, f, indent=2, default=str)
+            logger.info("Wrote combined data to: %s", final_path)
         # ------------------------------------------------------------
         # Generate one standardized dataframe for all usage stats
@@ -171,7 +174,7 @@ class ProfilerRunner:
             uniqueness = compute_inter_query_token_uniqueness_by_llm(all_steps)
             token_uniqueness_results = uniqueness
-        if self.profile_config.workflow_runtime_forecast:
+        if self.profile_config.workflow_runtime_forecast or self.profile_config.base_metrics:
             # ------------------------------------------------------------
             # Compute and save workflow runtime metrics
             # ------------------------------------------------------------
@@ -184,7 +187,7 @@ class ProfilerRunner:
                                                                      token_uniqueness=token_uniqueness_results,
                                                                      workflow_runtimes=workflow_runtimes_results)
-        if inference_optimization_results:
+        if self.write_output and inference_optimization_results:
             # Save to JSON
             optimization_results_path = os.path.join(self.output_dir, "inference_optimization.json")
             with open(optimization_results_path, 'w', encoding='utf-8') as f:
@@ -248,14 +251,14 @@ class ProfilerRunner:
                 exclude=["textual_report"])
             logger.info("Prefix span analysis complete")
-        if workflow_profiling_reports:
+        if self.write_output and workflow_profiling_reports:
             # Save to text file
             profiling_report_path = os.path.join(self.output_dir, "workflow_profiling_report.txt")
             with open(profiling_report_path, 'w', encoding='utf-8') as f:
                 f.write(workflow_profiling_reports)
             logger.info("Wrote workflow profiling report to: %s", profiling_report_path)
-        if workflow_profiling_metrics:
+        if self.write_output and workflow_profiling_metrics:
             # Save to JSON
             profiling_metrics_path = os.path.join(self.output_dir, "workflow_profiling_metrics.json")
             with open(profiling_metrics_path, 'w', encoding='utf-8') as f:
@@ -275,16 +278,19 @@ class ProfilerRunner:
                 logger.info("Fitted model for forecasting.")
             except Exception as e:
                 logger.exception("Fitting model failed. %s", e, exc_info=True)
-                return
+                return ProfilerResults()
-            os.makedirs(self.output_dir, exist_ok=True)
+            if self.write_output:
+                os.makedirs(self.output_dir, exist_ok=True)
-            import pickle
-            with open(os.path.join(self.output_dir, "fitted_model.pkl"), 'wb') as f:
-                pickle.dump(fitted_model, f)
+                import pickle
+                with open(os.path.join(self.output_dir, "fitted_model.pkl"), 'wb') as f:
+                    pickle.dump(fitted_model, f)
             logger.info("Saved fitted model to disk.")
+        return ProfilerResults(workflow_runtime_metrics=workflow_runtimes_results, llm_latency_ci=llm_latency_ci)
     # -------------------------------------------------------------------
     # Confidence Intervals / Metrics
     # -------------------------------------------------------------------
@@ -391,7 +397,8 @@ class ProfilerRunner:
     def _compute_confidence_intervals(self, data: list[float], metric_name: str) -> InferenceMetricsModel:
         """
-        Helper to compute 90, 95, 99% confidence intervals for the mean of a dataset.
+        Helper to compute 90, 95, 99 % confidence intervals **and** the empirical
+        90th/95th/99th percentiles (p90/p95/p99) for the mean of a dataset.
         Uses a z-score from the normal approximation for large samples.
         Returns a dict like::
@@ -409,11 +416,16 @@ class ProfilerRunner:
         n = len(data)
         mean_val = statistics.mean(data)
         if n <= 1:
-            return InferenceMetricsModel(n=n,
-                                         mean=mean_val,
-                                         ninetieth_interval=(mean_val, mean_val),
-                                         ninety_fifth_interval=(mean_val, mean_val),
-                                         ninety_ninth_interval=(mean_val, mean_val))
+            return InferenceMetricsModel(
+                n=n,
+                mean=mean_val,
+                ninetieth_interval=(mean_val, mean_val),
+                ninety_fifth_interval=(mean_val, mean_val),
+                ninety_ninth_interval=(mean_val, mean_val),
+                p90=mean_val,
+                p95=mean_val,
+                p99=mean_val,
+            )
         stdev_val = statistics.pstdev(data)  # population stdev or use stdev for sample
         # standard error
@@ -430,4 +442,32 @@ class ProfilerRunner:
         # Optionally, store more info
         intervals["n"] = n
         intervals["mean"] = mean_val
+        # ------------------------------------------------------------------
+        # Percentiles
+        # ------------------------------------------------------------------
+        sorted_data = sorted(data)
+        def _percentile(arr: list[float], pct: float) -> float:
+            """
+            Linear interpolation between closest ranks.
+            pct is given from 0‑100 (e.g. 90 for p90).
+            """
+            if not arr:
+                return 0.0
+            k = (len(arr) - 1) * (pct / 100.0)
+            f = math.floor(k)
+            c = math.ceil(k)
+            if f == c:
+                return arr[int(k)]
+            return arr[f] + (arr[c] - arr[f]) * (k - f)
+        p90_val = _percentile(sorted_data, 90)
+        p95_val = _percentile(sorted_data, 95)
+        p99_val = _percentile(sorted_data, 99)
+        intervals["p90"] = p90_val
+        intervals["p95"] = p95_val
+        intervals["p99"] = p99_val
         return InferenceMetricsModel(**intervals)

aiqtoolkit 1.2.0.dev0__py3-none-any.whl → 1.2.0rc1__py3-none-any.whl

Potentially problematic release.

aiqtoolkit 1.2.0.dev0py3-none-any.whl → 1.2.0rc1py3-none-any.whl