PyPI - aiqtoolkit - Versions diffs - 1.2.0a20250706__py3-none-any.whl → 1.2.0a20250730__py3-none-any.whl - Mend

aiqtoolkit 1.2.0a20250706py3-none-any.whl → 1.2.0a20250730py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of aiqtoolkit might be problematic. Click here for more details.

Files changed (197) hide show

aiq/agent/base.py +171 -8
aiq/agent/dual_node.py +1 -1
aiq/agent/react_agent/agent.py +113 -113
aiq/agent/react_agent/register.py +31 -14
aiq/agent/rewoo_agent/agent.py +36 -35
aiq/agent/rewoo_agent/register.py +2 -2
aiq/agent/tool_calling_agent/agent.py +3 -7
aiq/authentication/__init__.py +14 -0
aiq/authentication/api_key/__init__.py +14 -0
aiq/authentication/api_key/api_key_auth_provider.py +92 -0
aiq/authentication/api_key/api_key_auth_provider_config.py +124 -0
aiq/authentication/api_key/register.py +26 -0
aiq/authentication/exceptions/__init__.py +14 -0
aiq/authentication/exceptions/api_key_exceptions.py +38 -0
aiq/authentication/exceptions/auth_code_grant_exceptions.py +86 -0
aiq/authentication/exceptions/call_back_exceptions.py +38 -0
aiq/authentication/exceptions/request_exceptions.py +54 -0
aiq/authentication/http_basic_auth/__init__.py +0 -0
aiq/authentication/http_basic_auth/http_basic_auth_provider.py +81 -0
aiq/authentication/http_basic_auth/register.py +30 -0
aiq/authentication/interfaces.py +93 -0
aiq/authentication/oauth2/__init__.py +14 -0
aiq/authentication/oauth2/oauth2_auth_code_flow_provider.py +107 -0
aiq/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +39 -0
aiq/authentication/oauth2/register.py +25 -0
aiq/authentication/register.py +21 -0
aiq/builder/builder.py +64 -2
aiq/builder/component_utils.py +16 -3
aiq/builder/context.py +26 -0
aiq/builder/eval_builder.py +43 -2
aiq/builder/function.py +32 -4
aiq/builder/function_base.py +1 -1
aiq/builder/intermediate_step_manager.py +6 -8
aiq/builder/user_interaction_manager.py +3 -0
aiq/builder/workflow.py +23 -18
aiq/builder/workflow_builder.py +420 -73
aiq/cli/commands/info/list_mcp.py +103 -16
aiq/cli/commands/sizing/__init__.py +14 -0
aiq/cli/commands/sizing/calc.py +294 -0
aiq/cli/commands/sizing/sizing.py +27 -0
aiq/cli/commands/start.py +1 -0
aiq/cli/entrypoint.py +2 -0
aiq/cli/register_workflow.py +80 -0
aiq/cli/type_registry.py +151 -30
aiq/data_models/api_server.py +117 -11
aiq/data_models/authentication.py +231 -0
aiq/data_models/common.py +35 -7
aiq/data_models/component.py +17 -9
aiq/data_models/component_ref.py +33 -0
aiq/data_models/config.py +60 -3
aiq/data_models/embedder.py +1 -0
aiq/data_models/function_dependencies.py +8 -0
aiq/data_models/interactive.py +10 -1
aiq/data_models/intermediate_step.py +15 -5
aiq/data_models/its_strategy.py +30 -0
aiq/data_models/llm.py +1 -0
aiq/data_models/memory.py +1 -0
aiq/data_models/object_store.py +44 -0
aiq/data_models/retry_mixin.py +35 -0
aiq/data_models/span.py +187 -0
aiq/data_models/telemetry_exporter.py +2 -2
aiq/embedder/nim_embedder.py +2 -1
aiq/embedder/openai_embedder.py +2 -1
aiq/eval/config.py +19 -1
aiq/eval/dataset_handler/dataset_handler.py +75 -1
aiq/eval/evaluate.py +53 -10
aiq/eval/rag_evaluator/evaluate.py +23 -12
aiq/eval/remote_workflow.py +7 -2
aiq/eval/runners/__init__.py +14 -0
aiq/eval/runners/config.py +39 -0
aiq/eval/runners/multi_eval_runner.py +54 -0
aiq/eval/usage_stats.py +6 -0
aiq/eval/utils/weave_eval.py +5 -1
aiq/experimental/__init__.py +0 -0
aiq/experimental/decorators/__init__.py +0 -0
aiq/experimental/decorators/experimental_warning_decorator.py +130 -0
aiq/experimental/inference_time_scaling/__init__.py +0 -0
aiq/experimental/inference_time_scaling/editing/__init__.py +0 -0
aiq/experimental/inference_time_scaling/editing/iterative_plan_refinement_editor.py +147 -0
aiq/experimental/inference_time_scaling/editing/llm_as_a_judge_editor.py +204 -0
aiq/experimental/inference_time_scaling/editing/motivation_aware_summarization.py +107 -0
aiq/experimental/inference_time_scaling/functions/__init__.py +0 -0
aiq/experimental/inference_time_scaling/functions/execute_score_select_function.py +105 -0
aiq/experimental/inference_time_scaling/functions/its_tool_orchestration_function.py +205 -0
aiq/experimental/inference_time_scaling/functions/its_tool_wrapper_function.py +146 -0
aiq/experimental/inference_time_scaling/functions/plan_select_execute_function.py +224 -0
aiq/experimental/inference_time_scaling/models/__init__.py +0 -0
aiq/experimental/inference_time_scaling/models/editor_config.py +132 -0
aiq/experimental/inference_time_scaling/models/its_item.py +48 -0
aiq/experimental/inference_time_scaling/models/scoring_config.py +112 -0
aiq/experimental/inference_time_scaling/models/search_config.py +120 -0
aiq/experimental/inference_time_scaling/models/selection_config.py +154 -0
aiq/experimental/inference_time_scaling/models/stage_enums.py +43 -0
aiq/experimental/inference_time_scaling/models/strategy_base.py +66 -0
aiq/experimental/inference_time_scaling/models/tool_use_config.py +41 -0
aiq/experimental/inference_time_scaling/register.py +36 -0
aiq/experimental/inference_time_scaling/scoring/__init__.py +0 -0
aiq/experimental/inference_time_scaling/scoring/llm_based_agent_scorer.py +168 -0
aiq/experimental/inference_time_scaling/scoring/llm_based_plan_scorer.py +168 -0
aiq/experimental/inference_time_scaling/scoring/motivation_aware_scorer.py +111 -0
aiq/experimental/inference_time_scaling/search/__init__.py +0 -0
aiq/experimental/inference_time_scaling/search/multi_llm_planner.py +128 -0
aiq/experimental/inference_time_scaling/search/multi_query_retrieval_search.py +122 -0
aiq/experimental/inference_time_scaling/search/single_shot_multi_plan_planner.py +128 -0
aiq/experimental/inference_time_scaling/selection/__init__.py +0 -0
aiq/experimental/inference_time_scaling/selection/best_of_n_selector.py +63 -0
aiq/experimental/inference_time_scaling/selection/llm_based_agent_output_selector.py +131 -0
aiq/experimental/inference_time_scaling/selection/llm_based_output_merging_selector.py +159 -0
aiq/experimental/inference_time_scaling/selection/llm_based_plan_selector.py +128 -0
aiq/experimental/inference_time_scaling/selection/threshold_selector.py +58 -0
aiq/front_ends/console/authentication_flow_handler.py +233 -0
aiq/front_ends/console/console_front_end_plugin.py +11 -2
aiq/front_ends/fastapi/auth_flow_handlers/__init__.py +0 -0
aiq/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +27 -0
aiq/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +107 -0
aiq/front_ends/fastapi/fastapi_front_end_config.py +20 -0
aiq/front_ends/fastapi/fastapi_front_end_controller.py +68 -0
aiq/front_ends/fastapi/fastapi_front_end_plugin.py +14 -1
aiq/front_ends/fastapi/fastapi_front_end_plugin_worker.py +353 -31
aiq/front_ends/fastapi/html_snippets/__init__.py +14 -0
aiq/front_ends/fastapi/html_snippets/auth_code_grant_success.py +35 -0
aiq/front_ends/fastapi/main.py +2 -0
aiq/front_ends/fastapi/message_handler.py +102 -84
aiq/front_ends/fastapi/step_adaptor.py +2 -1
aiq/llm/aws_bedrock_llm.py +2 -1
aiq/llm/nim_llm.py +2 -1
aiq/llm/openai_llm.py +2 -1
aiq/object_store/__init__.py +20 -0
aiq/object_store/in_memory_object_store.py +74 -0
aiq/object_store/interfaces.py +84 -0
aiq/object_store/models.py +36 -0
aiq/object_store/register.py +20 -0
aiq/observability/__init__.py +14 -0
aiq/observability/exporter/__init__.py +14 -0
aiq/observability/exporter/base_exporter.py +449 -0
aiq/observability/exporter/exporter.py +78 -0
aiq/observability/exporter/file_exporter.py +33 -0
aiq/observability/exporter/processing_exporter.py +269 -0
aiq/observability/exporter/raw_exporter.py +52 -0
aiq/observability/exporter/span_exporter.py +264 -0
aiq/observability/exporter_manager.py +335 -0
aiq/observability/mixin/__init__.py +14 -0
aiq/observability/mixin/batch_config_mixin.py +26 -0
aiq/observability/mixin/collector_config_mixin.py +23 -0
aiq/observability/mixin/file_mixin.py +288 -0
aiq/observability/mixin/file_mode.py +23 -0
aiq/observability/mixin/resource_conflict_mixin.py +134 -0
aiq/observability/mixin/serialize_mixin.py +61 -0
aiq/observability/mixin/type_introspection_mixin.py +183 -0
aiq/observability/processor/__init__.py +14 -0
aiq/observability/processor/batching_processor.py +316 -0
aiq/observability/processor/intermediate_step_serializer.py +28 -0
aiq/observability/processor/processor.py +68 -0
aiq/observability/register.py +32 -116
aiq/observability/utils/__init__.py +14 -0
aiq/observability/utils/dict_utils.py +236 -0
aiq/observability/utils/time_utils.py +31 -0
aiq/profiler/calc/__init__.py +14 -0
aiq/profiler/calc/calc_runner.py +623 -0
aiq/profiler/calc/calculations.py +288 -0
aiq/profiler/calc/data_models.py +176 -0
aiq/profiler/calc/plot.py +345 -0
aiq/profiler/data_models.py +2 -0
aiq/profiler/profile_runner.py +16 -13
aiq/runtime/loader.py +8 -2
aiq/runtime/runner.py +23 -9
aiq/runtime/session.py +16 -5
aiq/tool/chat_completion.py +74 -0
aiq/tool/code_execution/README.md +152 -0
aiq/tool/code_execution/code_sandbox.py +151 -72
aiq/tool/code_execution/local_sandbox/.gitignore +1 -0
aiq/tool/code_execution/local_sandbox/local_sandbox_server.py +139 -24
aiq/tool/code_execution/local_sandbox/sandbox.requirements.txt +3 -1
aiq/tool/code_execution/local_sandbox/start_local_sandbox.sh +27 -2
aiq/tool/code_execution/register.py +7 -3
aiq/tool/code_execution/test_code_execution_sandbox.py +414 -0
aiq/tool/mcp/exceptions.py +142 -0
aiq/tool/mcp/mcp_client.py +17 -3
aiq/tool/mcp/mcp_tool.py +1 -1
aiq/tool/register.py +1 -0
aiq/tool/server_tools.py +2 -2
aiq/utils/exception_handlers/automatic_retries.py +289 -0
aiq/utils/exception_handlers/mcp.py +211 -0
aiq/utils/io/model_processing.py +28 -0
aiq/utils/log_utils.py +37 -0
aiq/utils/string_utils.py +38 -0
aiq/utils/type_converter.py +18 -2
aiq/utils/type_utils.py +87 -0
{aiqtoolkit-1.2.0a20250706.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/METADATA +37 -9
{aiqtoolkit-1.2.0a20250706.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/RECORD +195 -80
{aiqtoolkit-1.2.0a20250706.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/entry_points.txt +3 -0
aiq/front_ends/fastapi/websocket.py +0 -153
aiq/observability/async_otel_listener.py +0 -470
{aiqtoolkit-1.2.0a20250706.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/WHEEL +0 -0
{aiqtoolkit-1.2.0a20250706.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
{aiqtoolkit-1.2.0a20250706.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/licenses/LICENSE.md +0 -0
{aiqtoolkit-1.2.0a20250706.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/top_level.txt +0 -0

aiq/profiler/calc/plot.py ADDED Viewed

@@ -0,0 +1,345 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from pathlib import Path
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from aiq.profiler.calc.data_models import LinearFitResult
+from aiq.profiler.calc.data_models import SizingMetrics
+logger = logging.getLogger(__name__)
+# Plotting constants
+class PlotConfig:
+    # Simple plot settings
+    SIMPLE_FIGSIZE = (12, 6)
+    SIMPLE_LINEWIDTH = 2
+    SIMPLE_DPI = 150
+    # Enhanced plot settings
+    ENHANCED_FIGSIZE = (16, 6)
+    ENHANCED_DPI = 300
+    # Marker and styling
+    DATA_MARKER = 'o'
+    OUTLIER_MARKER = 'x'
+    OUTLIER_COLOR = 'crimson'
+    TREND_COLOR = 'r'
+    TREND_LINESTYLE = '--'
+    TREND_ALPHA = 0.8
+    TREND_LINEWIDTH = 2.0
+    # Colors
+    LLM_LATENCY_COLOR = 'steelblue'
+    RUNTIME_COLOR = 'darkgreen'
+    SLA_COLOR = 'red'
+    NOTE_BOX_COLOR = 'mistyrose'
+    NOTE_TEXT_COLOR = 'crimson'
+    STATS_BOX_COLOR = 'lightblue'
+    # Alpha values
+    DATA_ALPHA = 0.7
+    OUTLIER_ALPHA = 0.9
+    GRID_ALPHA = 0.3
+    SLA_ALPHA = 0.7
+    NOTE_BOX_ALPHA = 0.7
+    STATS_BOX_ALPHA = 0.8
+    # Sizes
+    DATA_POINT_SIZE = 120
+    OUTLIER_POINT_SIZE = 140
+    DATA_LINEWIDTH = 1
+    # Font sizes
+    AXIS_LABEL_FONTSIZE = 12
+    TITLE_FONTSIZE = 14
+    LEGEND_FONTSIZE = 10
+    NOTE_FONTSIZE = 10
+    STATS_FONTSIZE = 10
+    # Text positioning
+    NOTE_X_POS = 0.98
+    NOTE_Y_POS = 0.02
+    STATS_X_POS = 0.02
+    STATS_Y_POS = 0.02
+    # Box styling
+    NOTE_BOX_PAD = 0.3
+    STATS_BOX_PAD = 0.5
+    # Trend line points
+    TREND_LINE_POINTS = 100
+    # Font weights
+    AXIS_LABEL_FONTWEIGHT = 'bold'
+    TITLE_FONTWEIGHT = 'bold'
+def plot_concurrency_vs_time_metrics_simple(df: pd.DataFrame, output_dir: Path) -> None:
+    """
+    Save a simple plot of concurrency vs. p95 LLM latency and workflow runtime.
+    """
+    plt.figure(figsize=PlotConfig.SIMPLE_FIGSIZE)
+    plt.plot(df["concurrency"],
+             df["llm_latency_p95"],
+             label="p95 LLM Latency (s)",
+             marker=PlotConfig.DATA_MARKER,
+             linewidth=PlotConfig.SIMPLE_LINEWIDTH)
+    plt.plot(df["concurrency"],
+             df["workflow_runtime_p95"],
+             label="p95 Workflow Runtime (s)",
+             marker="s",
+             linewidth=PlotConfig.SIMPLE_LINEWIDTH)
+    plt.xlabel("Concurrency")
+    plt.ylabel("Time (seconds)")
+    plt.title("Concurrency vs. p95 LLM Latency and Workflow Runtime")
+    plt.grid(True, alpha=PlotConfig.GRID_ALPHA)
+    plt.legend()
+    plt.tight_layout()
+    simple_plot_path = output_dir / "concurrency_vs_p95_simple.png"
+    plt.savefig(simple_plot_path, dpi=PlotConfig.SIMPLE_DPI, bbox_inches='tight')
+    plt.close()
+    logger.info("Simple plot saved to %s", simple_plot_path)
+def plot_metric_vs_concurrency_with_optional_fit(
+    ax: plt.Axes,
+    x: np.ndarray,
+    y: np.ndarray,
+    metric_name: str,
+    y_label: str,
+    title: str,
+    color: str,
+    sla_value: float = 0.0,
+    sla_label: str = None,
+    fit: LinearFitResult | None = None,
+):
+    """
+    Helper to plot a metric vs concurrency with pre-computed fit, outlier highlighting, and SLA line.
+    Requires pre-computed fit to be provided.
+    """
+    marker = PlotConfig.DATA_MARKER
+    outlier_marker = PlotConfig.OUTLIER_MARKER
+    outlier_color = PlotConfig.OUTLIER_COLOR
+    trend_color = PlotConfig.TREND_COLOR
+    trend_linestyle = PlotConfig.TREND_LINESTYLE
+    trend_alpha = PlotConfig.TREND_ALPHA
+    trend_linewidth = PlotConfig.TREND_LINEWIDTH
+    note_box_color = PlotConfig.NOTE_BOX_COLOR
+    note_text_color = PlotConfig.NOTE_TEXT_COLOR
+    legend_fontsize = PlotConfig.LEGEND_FONTSIZE
+    outliers_x = outliers_y = np.array([])
+    outliers_note = ""
+    # Skip analysis plot if no fit is available
+    if not fit:
+        logger.warning(f"No linear fit available for {metric_name}, skipping analysis plot")
+        return False
+    if fit.outliers_removed:
+        # Use the concurrencies that were removed to identify outlier points
+        outlier_mask = np.isin(x, fit.outliers_removed)
+        outliers_x = x[outlier_mask]
+        outliers_y = y[outlier_mask]
+        outliers_note = f"Outliers removed: concurrencies {fit.outliers_removed}"
+        # Plot cleaned data (points that weren't removed as outliers)
+        non_outlier_mask = ~np.isin(x, fit.outliers_removed)
+        x_clean = x[non_outlier_mask]
+        y_clean = y[non_outlier_mask]
+        ax.scatter(x_clean,
+                   y_clean,
+                   alpha=PlotConfig.DATA_ALPHA,
+                   s=PlotConfig.DATA_POINT_SIZE,
+                   c=color,
+                   edgecolors='white',
+                   linewidth=PlotConfig.DATA_LINEWIDTH,
+                   marker=marker,
+                   label='Data Points')
+        ax.scatter(outliers_x,
+                   outliers_y,
+                   alpha=PlotConfig.OUTLIER_ALPHA,
+                   s=PlotConfig.OUTLIER_POINT_SIZE,
+                   c=outlier_color,
+                   marker=outlier_marker,
+                   label='Removed Outliers')
+    else:
+        # No outliers plot all data points
+        ax.scatter(x,
+                   y,
+                   alpha=PlotConfig.DATA_ALPHA,
+                   s=PlotConfig.DATA_POINT_SIZE,
+                   c=color,
+                   edgecolors='white',
+                   linewidth=PlotConfig.DATA_LINEWIDTH,
+                   marker=marker,
+                   label='Data Points')
+    # Plot trend line using the fit
+    x_fit = np.linspace(x.min(), x.max(), PlotConfig.TREND_LINE_POINTS)
+    y_fit = fit.slope * x_fit + fit.intercept
+    ax.plot(x_fit,
+            y_fit,
+            trend_linestyle,
+            alpha=trend_alpha,
+            linewidth=trend_linewidth,
+            color=trend_color,
+            label=f'Trend (slope={fit.slope:.4f}, R²={fit.r_squared:.3f})')
+    if sla_value > 0:
+        ax.axhline(y=sla_value,
+                   color=PlotConfig.SLA_COLOR,
+                   linestyle=':',
+                   alpha=PlotConfig.SLA_ALPHA,
+                   linewidth=2,
+                   label=sla_label or f'SLA Threshold ({sla_value}s)')
+    ax.set_xlabel('Concurrency', fontsize=PlotConfig.AXIS_LABEL_FONTSIZE, fontweight=PlotConfig.AXIS_LABEL_FONTWEIGHT)
+    ax.set_ylabel(y_label, fontsize=PlotConfig.AXIS_LABEL_FONTSIZE, fontweight=PlotConfig.AXIS_LABEL_FONTWEIGHT)
+    ax.set_title(title, fontsize=PlotConfig.TITLE_FONTSIZE, fontweight=PlotConfig.TITLE_FONTWEIGHT)
+    ax.grid(True, alpha=PlotConfig.GRID_ALPHA)
+    ax.legend(fontsize=legend_fontsize)
+    if outliers_note:
+        ax.text(PlotConfig.NOTE_X_POS,
+                PlotConfig.NOTE_Y_POS,
+                outliers_note,
+                transform=ax.transAxes,
+                fontsize=PlotConfig.NOTE_FONTSIZE,
+                color=note_text_color,
+                ha='right',
+                va='bottom',
+                bbox=dict(boxstyle=f'round,pad={PlotConfig.NOTE_BOX_PAD}',
+                          facecolor=note_box_color,
+                          alpha=PlotConfig.NOTE_BOX_ALPHA))
+    return True
+def plot_concurrency_vs_time_metrics(metrics_per_concurrency: dict[int, SizingMetrics],
+                                     output_dir: Path,
+                                     target_llm_latency: float = 0.0,
+                                     target_runtime: float = 0.0,
+                                     llm_latency_fit: LinearFitResult | None = None,
+                                     runtime_fit: LinearFitResult | None = None) -> None:
+    """
+    Plot concurrency vs. p95 latency and workflow runtime using metrics_per_concurrency.
+    Enhanced with better styling, trend analysis, and annotations.
+    Only plots valid runs and requires pre-computed fits.
+    """
+    rows = []
+    for concurrency, metrics in metrics_per_concurrency.items():
+        llm_latency = metrics.llm_latency_p95
+        workflow_runtime = metrics.workflow_runtime_p95
+        rows.append({
+            "concurrency": concurrency, "llm_latency_p95": llm_latency, "workflow_runtime_p95": workflow_runtime
+        })
+    if not rows:
+        logger.warning("No valid metrics data available to plot.")
+        return
+    plt.style.use('seaborn-v0_8')
+    df = pd.DataFrame(rows).sort_values("concurrency")
+    # Always generate simple plot first
+    plot_concurrency_vs_time_metrics_simple(df, output_dir)
+    # Check if we have fits available for analysis plots
+    has_llm_latency_fit = llm_latency_fit is not None
+    has_runtime_fit = runtime_fit is not None
+    if not has_llm_latency_fit and not has_runtime_fit:
+        logger.warning("No linear fits available for analysis plots, skipping enhanced plot")
+        return
+    # Create subplots based on available fits
+    if has_llm_latency_fit and has_runtime_fit:
+        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=PlotConfig.ENHANCED_FIGSIZE)
+    else:
+        fig, ax1 = plt.subplots(1, 1, figsize=(8, 6))
+        ax2 = None
+    # Plot llm_latency if fit is available
+    llm_latency_plotted = False
+    if has_llm_latency_fit:
+        llm_latency_plotted = plot_metric_vs_concurrency_with_optional_fit(
+            ax1,
+            df["concurrency"].to_numpy(),
+            df["llm_latency_p95"].to_numpy(),
+            metric_name="llm_latency",
+            y_label='P95 LLM Latency (seconds)',
+            title='Concurrency vs P95 LLM Latency',
+            color=PlotConfig.LLM_LATENCY_COLOR,
+            sla_value=target_llm_latency,
+            sla_label=f'SLA Threshold ({target_llm_latency}s)' if target_llm_latency > 0 else None,
+            fit=llm_latency_fit,
+        )
+    # Plot runtime if fit is available
+    runtime_plotted = False
+    if has_runtime_fit and ax2 is not None:
+        runtime_plotted = plot_metric_vs_concurrency_with_optional_fit(
+            ax2,
+            df["concurrency"].to_numpy(),
+            df["workflow_runtime_p95"].to_numpy(),
+            metric_name="runtime",
+            y_label='P95 Workflow Runtime (seconds)',
+            title='Concurrency vs P95 Workflow Runtime',
+            color=PlotConfig.RUNTIME_COLOR,
+            sla_value=target_runtime,
+            sla_label=f'SLA Threshold ({target_runtime}s)' if target_runtime > 0 else None,
+            fit=runtime_fit,
+        )
+    # Check if any plots were successfully created
+    plots_created = (llm_latency_plotted or runtime_plotted)
+    if not plots_created:
+        logger.warning("No analysis plots could be created, skipping enhanced plot")
+        plt.close(fig)
+        return
+    # Add summary statistics
+    stats_text = f'Data Points: {len(df)}\n'
+    stats_text += f'LLM Latency Range: {df["llm_latency_p95"].min():.3f}-{df["llm_latency_p95"].max():.3f}s\n'
+    stats_text += f'WF Runtime Range: {df["workflow_runtime_p95"].min():.3f}-{df["workflow_runtime_p95"].max():.3f}s'
+    fig.text(PlotConfig.STATS_X_POS,
+             PlotConfig.STATS_Y_POS,
+             stats_text,
+             fontsize=PlotConfig.STATS_FONTSIZE,
+             bbox=dict(boxstyle=f'round,pad={PlotConfig.STATS_BOX_PAD}',
+                       facecolor=PlotConfig.STATS_BOX_COLOR,
+                       alpha=PlotConfig.STATS_BOX_ALPHA))
+    plt.tight_layout()
+    output_dir.mkdir(parents=True, exist_ok=True)
+    enhanced_plot_path = output_dir / "concurrency_vs_p95_analysis.png"
+    plt.savefig(enhanced_plot_path,
+                dpi=PlotConfig.ENHANCED_DPI,
+                bbox_inches='tight',
+                facecolor='white',
+                edgecolor='none')
+    plt.close()
+    logger.info("Enhanced plot saved to %s", enhanced_plot_path)

aiq/profiler/data_models.py CHANGED Viewed

@@ -15,8 +15,10 @@
 from pydantic import BaseModel
+from aiq.profiler.inference_metrics_model import InferenceMetricsModel
 from aiq.profiler.inference_optimization.data_models import WorkflowRuntimeMetrics
 class ProfilerResults(BaseModel):
     workflow_runtime_metrics: WorkflowRuntimeMetrics | None = None
+    llm_latency_ci: InferenceMetricsModel | None = None

aiq/profiler/profile_runner.py CHANGED Viewed

@@ -68,9 +68,10 @@ class ProfilerRunner:
       All computed metrics are saved to a metrics JSON file at the end.
     """
-    def __init__(self, profiler_config: ProfilerConfig, output_dir: Path):
+    def __init__(self, profiler_config: ProfilerConfig, output_dir: Path, write_output: bool = True):
         self.profile_config = profiler_config
         self.output_dir = output_dir
+        self.write_output = write_output
         self._converter = TypeConverter([])
         # Holds per-request data (prompt, output, usage_stats, etc.)
@@ -114,10 +115,11 @@ class ProfilerRunner:
             self.all_requests_data.append({"request_number": i, "intermediate_steps": request_data})
         # Write the final big JSON (all requests)
-        final_path = os.path.join(self.output_dir, "all_requests_profiler_traces.json")
-        with open(final_path, 'w', encoding='utf-8') as f:
-            json.dump(self.all_requests_data, f, indent=2, default=str)
-        logger.info("Wrote combined data to: %s", final_path)
+        if self.write_output:
+            final_path = os.path.join(self.output_dir, "all_requests_profiler_traces.json")
+            with open(final_path, 'w', encoding='utf-8') as f:
+                json.dump(self.all_requests_data, f, indent=2, default=str)
+            logger.info("Wrote combined data to: %s", final_path)
         # ------------------------------------------------------------
         # Generate one standardized dataframe for all usage stats
@@ -185,7 +187,7 @@ class ProfilerRunner:
                                                                      token_uniqueness=token_uniqueness_results,
                                                                      workflow_runtimes=workflow_runtimes_results)
-        if inference_optimization_results:
+        if self.write_output and inference_optimization_results:
             # Save to JSON
             optimization_results_path = os.path.join(self.output_dir, "inference_optimization.json")
             with open(optimization_results_path, 'w', encoding='utf-8') as f:
@@ -249,14 +251,14 @@ class ProfilerRunner:
                 exclude=["textual_report"])
             logger.info("Prefix span analysis complete")
-        if workflow_profiling_reports:
+        if self.write_output and workflow_profiling_reports:
             # Save to text file
             profiling_report_path = os.path.join(self.output_dir, "workflow_profiling_report.txt")
             with open(profiling_report_path, 'w', encoding='utf-8') as f:
                 f.write(workflow_profiling_reports)
             logger.info("Wrote workflow profiling report to: %s", profiling_report_path)
-        if workflow_profiling_metrics:
+        if self.write_output and workflow_profiling_metrics:
             # Save to JSON
             profiling_metrics_path = os.path.join(self.output_dir, "workflow_profiling_metrics.json")
             with open(profiling_metrics_path, 'w', encoding='utf-8') as f:
@@ -278,15 +280,16 @@ class ProfilerRunner:
                 logger.exception("Fitting model failed. %s", e, exc_info=True)
                 return ProfilerResults()
-            os.makedirs(self.output_dir, exist_ok=True)
+            if self.write_output:
+                os.makedirs(self.output_dir, exist_ok=True)
-            import pickle
-            with open(os.path.join(self.output_dir, "fitted_model.pkl"), 'wb') as f:
-                pickle.dump(fitted_model, f)
+                import pickle
+                with open(os.path.join(self.output_dir, "fitted_model.pkl"), 'wb') as f:
+                    pickle.dump(fitted_model, f)
             logger.info("Saved fitted model to disk.")
-        return ProfilerResults(workflow_runtime_metrics=workflow_runtimes_results)
+        return ProfilerResults(workflow_runtime_metrics=workflow_runtimes_results, llm_latency_ci=llm_latency_ci)
     # -------------------------------------------------------------------
     # Confidence Intervals / Metrics

aiq/runtime/loader.py CHANGED Viewed

@@ -48,14 +48,18 @@ class PluginTypes(IntFlag):
     """
     A plugin that is an evaluator for the workflow. This includes evaluators like RAGAS, SWE-bench, etc.
     """
+    AUTHENTICATION = auto()
+    """
+    A plugin that is an API authentication provider for the workflow. This includes Oauth2, API Key, etc.
+    """
     REGISTRY_HANDLER = auto()
     # Convenience flag for groups of plugin types
-    CONFIG_OBJECT = COMPONENT | FRONT_END | EVALUATOR
+    CONFIG_OBJECT = COMPONENT | FRONT_END | EVALUATOR | AUTHENTICATION
     """
     Any plugin that can be specified in the AIQ Toolkit configuration file.
     """
-    ALL = COMPONENT | FRONT_END | EVALUATOR | REGISTRY_HANDLER
+    ALL = COMPONENT | FRONT_END | EVALUATOR | REGISTRY_HANDLER | AUTHENTICATION
     """
     All plugin types
     """
@@ -130,6 +134,8 @@ def discover_entrypoints(plugin_type: PluginTypes):
         plugin_groups.append("aiq.registry_handlers")
     if (plugin_type & PluginTypes.EVALUATOR):
         plugin_groups.append("aiq.evaluators")
+    if (plugin_type & PluginTypes.AUTHENTICATION):
+        plugin_groups.append("aiq.authentication_providers")
     # Get the entry points for the specified groups
     aiq_plugins = reduce(lambda x, y: list(x) + list(y), [entry_points.select(group=y) for y in plugin_groups])

aiq/runtime/runner.py CHANGED Viewed

@@ -21,7 +21,7 @@ from aiq.builder.context import AIQContext
 from aiq.builder.context import AIQContextState
 from aiq.builder.function import Function
 from aiq.data_models.invocation_node import InvocationNode
-from aiq.observability.async_otel_listener import AsyncOtelSpanListener
+from aiq.observability.exporter_manager import ExporterManager
 from aiq.utils.reactive.subject import Subject
 logger = logging.getLogger(__name__)
@@ -44,7 +44,11 @@ _T = typing.TypeVar("_T")
 class AIQRunner:
-    def __init__(self, input_message: typing.Any, entry_fn: Function, context_state: AIQContextState):
+    def __init__(self,
+                 input_message: typing.Any,
+                 entry_fn: Function,
+                 context_state: AIQContextState,
+                 exporter_manager: ExporterManager):
         """
         The AIQRunner class is used to run a workflow. It handles converting input and output data types and running the
         workflow with the specified concurrency.
@@ -57,6 +61,8 @@ class AIQRunner:
             The entry function to the workflow
         context_state : AIQContextState
             The context state to use
+        exporter_manager : ExporterManager
+            The exporter manager to use
         """
         if (entry_fn is None):
@@ -73,7 +79,7 @@ class AIQRunner:
         # Before we start, we need to convert the input message to the workflow input type
         self._input_message = input_message
-        self._span_manager = AsyncOtelSpanListener(context_state=context_state)
+        self._exporter_manager = exporter_manager
     @property
     def context(self) -> AIQContext:
@@ -130,19 +136,23 @@ class AIQRunner:
             if (not self._entry_fn.has_single_output):
                 raise ValueError("Workflow does not support single output")
-            async with self._span_manager.start():
+            async with self._exporter_manager.start(context_state=self._context_state):
                 # Run the workflow
                 result = await self._entry_fn.ainvoke(self._input_message, to_type=to_type)
                 # Close the intermediate stream
-                self._context_state.event_stream.get().on_complete()
+                event_stream = self._context_state.event_stream.get()
+                if event_stream:
+                    event_stream.on_complete()
             self._state = AIQRunnerState.COMPLETED
             return result
         except Exception as e:
             logger.exception("Error running workflow: %s", e)
-            self._context_state.event_stream.get().on_complete()
+            event_stream = self._context_state.event_stream.get()
+            if event_stream:
+                event_stream.on_complete()
             self._state = AIQRunnerState.FAILED
             raise
@@ -159,18 +169,22 @@ class AIQRunner:
                 raise ValueError("Workflow does not support streaming output")
             # Run the workflow
-            async with self._span_manager.start():
+            async with self._exporter_manager.start(context_state=self._context_state):
                 async for m in self._entry_fn.astream(self._input_message, to_type=to_type):
                     yield m
                 self._state = AIQRunnerState.COMPLETED
                 # Close the intermediate stream
-                self._context_state.event_stream.get().on_complete()
+                event_stream = self._context_state.event_stream.get()
+                if event_stream:
+                    event_stream.on_complete()
         except Exception as e:
             logger.exception("Error running workflow: %s", e)
-            self._context_state.event_stream.get().on_complete()
+            event_stream = self._context_state.event_stream.get()
+            if event_stream:
+                event_stream.on_complete()
             self._state = AIQRunnerState.FAILED
             raise

aiq/runtime/session.py CHANGED Viewed

@@ -21,11 +21,14 @@ from collections.abc import Callable
 from contextlib import asynccontextmanager
 from contextlib import nullcontext
-from fastapi import Request
+from starlette.requests import HTTPConnection
 from aiq.builder.context import AIQContext
 from aiq.builder.context import AIQContextState
 from aiq.builder.workflow import Workflow
+from aiq.data_models.authentication import AuthenticatedContext
+from aiq.data_models.authentication import AuthFlowType
+from aiq.data_models.authentication import AuthProviderBaseConfig
 from aiq.data_models.config import AIQConfig
 from aiq.data_models.interactive import HumanResponse
 from aiq.data_models.interactive import InteractionPrompt
@@ -86,9 +89,11 @@ class AIQSessionManager:
     @asynccontextmanager
     async def session(self,
                       user_manager=None,
-                      request: Request | None = None,
+                      request: HTTPConnection | None = None,
                       conversation_id: str | None = None,
-                      user_input_callback: Callable[[InteractionPrompt], Awaitable[HumanResponse]] = None):
+                      user_input_callback: Callable[[InteractionPrompt], Awaitable[HumanResponse]] = None,
+                      user_authentication_callback: Callable[[AuthProviderBaseConfig, AuthFlowType],
+                                                             Awaitable[AuthenticatedContext | None]] = None):
         token_user_input = None
         if user_input_callback is not None:
@@ -98,6 +103,10 @@ class AIQSessionManager:
         if user_manager is not None:
             token_user_manager = self._context_state.user_manager.set(user_manager)
+        token_user_authentication = None
+        if user_authentication_callback is not None:
+            token_user_authentication = self._context_state.user_auth_callback.set(user_authentication_callback)
         if conversation_id is not None and request is None:
             self._context_state.conversation_id.set(conversation_id)
@@ -110,6 +119,8 @@ class AIQSessionManager:
                 self._context_state.user_manager.reset(token_user_manager)
             if token_user_input is not None:
                 self._context_state.user_input_callback.reset(token_user_input)
+            if token_user_authentication is not None:
+                self._context_state.user_auth_callback.reset(token_user_authentication)
     @asynccontextmanager
     async def run(self, message):
@@ -124,7 +135,7 @@ class AIQSessionManager:
             async with self._workflow.run(message) as runner:
                 yield runner
-    def set_metadata_from_http_request(self, request: Request | None) -> None:
+    def set_metadata_from_http_request(self, request: HTTPConnection | None) -> None:
         """
         Extracts and sets user metadata request attributes from a HTTP request.
         If request is None, no attributes are set.
@@ -132,7 +143,7 @@ class AIQSessionManager:
         if request is None:
             return
-        self._context.metadata._request.method = request.method
+        self._context.metadata._request.method = getattr(request, "method", None)
         self._context.metadata._request.url_path = request.url.path
         self._context.metadata._request.url_port = request.url.port
         self._context.metadata._request.url_scheme = request.url.scheme

aiqtoolkit 1.2.0a20250706__py3-none-any.whl → 1.2.0a20250730__py3-none-any.whl

Potentially problematic release.

aiqtoolkit 1.2.0a20250706py3-none-any.whl → 1.2.0a20250730py3-none-any.whl