PyPI - aiqtoolkit - Versions diffs - 1.2.0a20250707__py3-none-any.whl → 1.2.0a20250730__py3-none-any.whl - Mend

aiqtoolkit 1.2.0a20250707py3-none-any.whl → 1.2.0a20250730py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of aiqtoolkit might be problematic. Click here for more details.

Files changed (197) hide show

aiq/agent/base.py +171 -8
aiq/agent/dual_node.py +1 -1
aiq/agent/react_agent/agent.py +113 -113
aiq/agent/react_agent/register.py +31 -14
aiq/agent/rewoo_agent/agent.py +36 -35
aiq/agent/rewoo_agent/register.py +2 -2
aiq/agent/tool_calling_agent/agent.py +3 -7
aiq/authentication/__init__.py +14 -0
aiq/authentication/api_key/__init__.py +14 -0
aiq/authentication/api_key/api_key_auth_provider.py +92 -0
aiq/authentication/api_key/api_key_auth_provider_config.py +124 -0
aiq/authentication/api_key/register.py +26 -0
aiq/authentication/exceptions/__init__.py +14 -0
aiq/authentication/exceptions/api_key_exceptions.py +38 -0
aiq/authentication/exceptions/auth_code_grant_exceptions.py +86 -0
aiq/authentication/exceptions/call_back_exceptions.py +38 -0
aiq/authentication/exceptions/request_exceptions.py +54 -0
aiq/authentication/http_basic_auth/__init__.py +0 -0
aiq/authentication/http_basic_auth/http_basic_auth_provider.py +81 -0
aiq/authentication/http_basic_auth/register.py +30 -0
aiq/authentication/interfaces.py +93 -0
aiq/authentication/oauth2/__init__.py +14 -0
aiq/authentication/oauth2/oauth2_auth_code_flow_provider.py +107 -0
aiq/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +39 -0
aiq/authentication/oauth2/register.py +25 -0
aiq/authentication/register.py +21 -0
aiq/builder/builder.py +64 -2
aiq/builder/component_utils.py +16 -3
aiq/builder/context.py +26 -0
aiq/builder/eval_builder.py +43 -2
aiq/builder/function.py +32 -4
aiq/builder/function_base.py +1 -1
aiq/builder/intermediate_step_manager.py +6 -8
aiq/builder/user_interaction_manager.py +3 -0
aiq/builder/workflow.py +23 -18
aiq/builder/workflow_builder.py +420 -73
aiq/cli/commands/info/list_mcp.py +103 -16
aiq/cli/commands/sizing/__init__.py +14 -0
aiq/cli/commands/sizing/calc.py +294 -0
aiq/cli/commands/sizing/sizing.py +27 -0
aiq/cli/commands/start.py +1 -0
aiq/cli/entrypoint.py +2 -0
aiq/cli/register_workflow.py +80 -0
aiq/cli/type_registry.py +151 -30
aiq/data_models/api_server.py +117 -11
aiq/data_models/authentication.py +231 -0
aiq/data_models/common.py +35 -7
aiq/data_models/component.py +17 -9
aiq/data_models/component_ref.py +33 -0
aiq/data_models/config.py +60 -3
aiq/data_models/embedder.py +1 -0
aiq/data_models/function_dependencies.py +8 -0
aiq/data_models/interactive.py +10 -1
aiq/data_models/intermediate_step.py +15 -5
aiq/data_models/its_strategy.py +30 -0
aiq/data_models/llm.py +1 -0
aiq/data_models/memory.py +1 -0
aiq/data_models/object_store.py +44 -0
aiq/data_models/retry_mixin.py +35 -0
aiq/data_models/span.py +187 -0
aiq/data_models/telemetry_exporter.py +2 -2
aiq/embedder/nim_embedder.py +2 -1
aiq/embedder/openai_embedder.py +2 -1
aiq/eval/config.py +19 -1
aiq/eval/dataset_handler/dataset_handler.py +75 -1
aiq/eval/evaluate.py +53 -10
aiq/eval/rag_evaluator/evaluate.py +23 -12
aiq/eval/remote_workflow.py +7 -2
aiq/eval/runners/__init__.py +14 -0
aiq/eval/runners/config.py +39 -0
aiq/eval/runners/multi_eval_runner.py +54 -0
aiq/eval/usage_stats.py +6 -0
aiq/eval/utils/weave_eval.py +5 -1
aiq/experimental/__init__.py +0 -0
aiq/experimental/decorators/__init__.py +0 -0
aiq/experimental/decorators/experimental_warning_decorator.py +130 -0
aiq/experimental/inference_time_scaling/__init__.py +0 -0
aiq/experimental/inference_time_scaling/editing/__init__.py +0 -0
aiq/experimental/inference_time_scaling/editing/iterative_plan_refinement_editor.py +147 -0
aiq/experimental/inference_time_scaling/editing/llm_as_a_judge_editor.py +204 -0
aiq/experimental/inference_time_scaling/editing/motivation_aware_summarization.py +107 -0
aiq/experimental/inference_time_scaling/functions/__init__.py +0 -0
aiq/experimental/inference_time_scaling/functions/execute_score_select_function.py +105 -0
aiq/experimental/inference_time_scaling/functions/its_tool_orchestration_function.py +205 -0
aiq/experimental/inference_time_scaling/functions/its_tool_wrapper_function.py +146 -0
aiq/experimental/inference_time_scaling/functions/plan_select_execute_function.py +224 -0
aiq/experimental/inference_time_scaling/models/__init__.py +0 -0
aiq/experimental/inference_time_scaling/models/editor_config.py +132 -0
aiq/experimental/inference_time_scaling/models/its_item.py +48 -0
aiq/experimental/inference_time_scaling/models/scoring_config.py +112 -0
aiq/experimental/inference_time_scaling/models/search_config.py +120 -0
aiq/experimental/inference_time_scaling/models/selection_config.py +154 -0
aiq/experimental/inference_time_scaling/models/stage_enums.py +43 -0
aiq/experimental/inference_time_scaling/models/strategy_base.py +66 -0
aiq/experimental/inference_time_scaling/models/tool_use_config.py +41 -0
aiq/experimental/inference_time_scaling/register.py +36 -0
aiq/experimental/inference_time_scaling/scoring/__init__.py +0 -0
aiq/experimental/inference_time_scaling/scoring/llm_based_agent_scorer.py +168 -0
aiq/experimental/inference_time_scaling/scoring/llm_based_plan_scorer.py +168 -0
aiq/experimental/inference_time_scaling/scoring/motivation_aware_scorer.py +111 -0
aiq/experimental/inference_time_scaling/search/__init__.py +0 -0
aiq/experimental/inference_time_scaling/search/multi_llm_planner.py +128 -0
aiq/experimental/inference_time_scaling/search/multi_query_retrieval_search.py +122 -0
aiq/experimental/inference_time_scaling/search/single_shot_multi_plan_planner.py +128 -0
aiq/experimental/inference_time_scaling/selection/__init__.py +0 -0
aiq/experimental/inference_time_scaling/selection/best_of_n_selector.py +63 -0
aiq/experimental/inference_time_scaling/selection/llm_based_agent_output_selector.py +131 -0
aiq/experimental/inference_time_scaling/selection/llm_based_output_merging_selector.py +159 -0
aiq/experimental/inference_time_scaling/selection/llm_based_plan_selector.py +128 -0
aiq/experimental/inference_time_scaling/selection/threshold_selector.py +58 -0
aiq/front_ends/console/authentication_flow_handler.py +233 -0
aiq/front_ends/console/console_front_end_plugin.py +11 -2
aiq/front_ends/fastapi/auth_flow_handlers/__init__.py +0 -0
aiq/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +27 -0
aiq/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +107 -0
aiq/front_ends/fastapi/fastapi_front_end_config.py +20 -0
aiq/front_ends/fastapi/fastapi_front_end_controller.py +68 -0
aiq/front_ends/fastapi/fastapi_front_end_plugin.py +14 -1
aiq/front_ends/fastapi/fastapi_front_end_plugin_worker.py +353 -31
aiq/front_ends/fastapi/html_snippets/__init__.py +14 -0
aiq/front_ends/fastapi/html_snippets/auth_code_grant_success.py +35 -0
aiq/front_ends/fastapi/main.py +2 -0
aiq/front_ends/fastapi/message_handler.py +102 -84
aiq/front_ends/fastapi/step_adaptor.py +2 -1
aiq/llm/aws_bedrock_llm.py +2 -1
aiq/llm/nim_llm.py +2 -1
aiq/llm/openai_llm.py +2 -1
aiq/object_store/__init__.py +20 -0
aiq/object_store/in_memory_object_store.py +74 -0
aiq/object_store/interfaces.py +84 -0
aiq/object_store/models.py +36 -0
aiq/object_store/register.py +20 -0
aiq/observability/__init__.py +14 -0
aiq/observability/exporter/__init__.py +14 -0
aiq/observability/exporter/base_exporter.py +449 -0
aiq/observability/exporter/exporter.py +78 -0
aiq/observability/exporter/file_exporter.py +33 -0
aiq/observability/exporter/processing_exporter.py +269 -0
aiq/observability/exporter/raw_exporter.py +52 -0
aiq/observability/exporter/span_exporter.py +264 -0
aiq/observability/exporter_manager.py +335 -0
aiq/observability/mixin/__init__.py +14 -0
aiq/observability/mixin/batch_config_mixin.py +26 -0
aiq/observability/mixin/collector_config_mixin.py +23 -0
aiq/observability/mixin/file_mixin.py +288 -0
aiq/observability/mixin/file_mode.py +23 -0
aiq/observability/mixin/resource_conflict_mixin.py +134 -0
aiq/observability/mixin/serialize_mixin.py +61 -0
aiq/observability/mixin/type_introspection_mixin.py +183 -0
aiq/observability/processor/__init__.py +14 -0
aiq/observability/processor/batching_processor.py +316 -0
aiq/observability/processor/intermediate_step_serializer.py +28 -0
aiq/observability/processor/processor.py +68 -0
aiq/observability/register.py +32 -116
aiq/observability/utils/__init__.py +14 -0
aiq/observability/utils/dict_utils.py +236 -0
aiq/observability/utils/time_utils.py +31 -0
aiq/profiler/calc/__init__.py +14 -0
aiq/profiler/calc/calc_runner.py +623 -0
aiq/profiler/calc/calculations.py +288 -0
aiq/profiler/calc/data_models.py +176 -0
aiq/profiler/calc/plot.py +345 -0
aiq/profiler/data_models.py +2 -0
aiq/profiler/profile_runner.py +16 -13
aiq/runtime/loader.py +8 -2
aiq/runtime/runner.py +23 -9
aiq/runtime/session.py +16 -5
aiq/tool/chat_completion.py +74 -0
aiq/tool/code_execution/README.md +152 -0
aiq/tool/code_execution/code_sandbox.py +151 -72
aiq/tool/code_execution/local_sandbox/.gitignore +1 -0
aiq/tool/code_execution/local_sandbox/local_sandbox_server.py +139 -24
aiq/tool/code_execution/local_sandbox/sandbox.requirements.txt +3 -1
aiq/tool/code_execution/local_sandbox/start_local_sandbox.sh +27 -2
aiq/tool/code_execution/register.py +7 -3
aiq/tool/code_execution/test_code_execution_sandbox.py +414 -0
aiq/tool/mcp/exceptions.py +142 -0
aiq/tool/mcp/mcp_client.py +17 -3
aiq/tool/mcp/mcp_tool.py +1 -1
aiq/tool/register.py +1 -0
aiq/tool/server_tools.py +2 -2
aiq/utils/exception_handlers/automatic_retries.py +289 -0
aiq/utils/exception_handlers/mcp.py +211 -0
aiq/utils/io/model_processing.py +28 -0
aiq/utils/log_utils.py +37 -0
aiq/utils/string_utils.py +38 -0
aiq/utils/type_converter.py +18 -2
aiq/utils/type_utils.py +87 -0
{aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/METADATA +37 -9
{aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/RECORD +195 -80
{aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/entry_points.txt +3 -0
aiq/front_ends/fastapi/websocket.py +0 -153
aiq/observability/async_otel_listener.py +0 -470
{aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/WHEEL +0 -0
{aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
{aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/licenses/LICENSE.md +0 -0
{aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/top_level.txt +0 -0

aiq/cli/commands/info/list_mcp.py CHANGED Viewed

@@ -15,18 +15,34 @@
 import json
 import logging
+from typing import Any
 import anyio
 import click
+from aiq.tool.mcp.exceptions import MCPError
 from aiq.tool.mcp.mcp_client import MCPBuilder
+from aiq.utils.exception_handlers.mcp import format_mcp_error
 # Suppress verbose logs from mcp.client.sse and httpx
 logging.getLogger("mcp.client.sse").setLevel(logging.WARNING)
 logging.getLogger("httpx").setLevel(logging.WARNING)
+logger = logging.getLogger(__name__)
-def format_tool(tool):
+def format_tool(tool: Any) -> dict[str, str | None]:
+    """Format an MCP tool into a dictionary for display.
+    Extracts name, description, and input schema from various MCP tool object types
+    and normalizes them into a consistent dictionary format for CLI display.
+    Args:
+        tool (Any): MCPToolClient or raw MCP Tool object (uses Any due to different types)
+    Returns:
+        dict[str, str | None]: Dictionary with name, description, and input_schema as keys
+    """
     name = getattr(tool, 'name', None)
     description = getattr(tool, 'description', '')
     input_schema = getattr(tool, 'input_schema', None) or getattr(tool, 'inputSchema', None)
@@ -45,19 +61,43 @@ def format_tool(tool):
     }
-def print_tool(tool_dict, detail=False):
-    click.echo(f"Tool: {tool_dict['name']}")
+def print_tool(tool_dict: dict[str, str | None], detail: bool = False) -> None:
+    """Print a formatted tool to the console with optional detailed information.
+    Outputs tool information in a user-friendly format to stdout. When detail=True
+    or when description/schema are available, shows full information with separator.
+    Args:
+        tool_dict (dict[str, str | None]): Dictionary containing tool information with name, description, and
+        input_schema as keys
+        detail (bool, optional): Whether to force detailed output. Defaults to False.
+    """
+    click.echo(f"Tool: {tool_dict.get('name', 'Unknown')}")
     if detail or tool_dict.get('input_schema') or tool_dict.get('description'):
-        click.echo(f"Description: {tool_dict['description']}")
-        if tool_dict["input_schema"]:
+        click.echo(f"Description: {tool_dict.get('description', 'No description available')}")
+        if tool_dict.get("input_schema"):
             click.echo("Input Schema:")
-            click.echo(tool_dict["input_schema"])
+            click.echo(tool_dict.get("input_schema"))
         else:
             click.echo("Input Schema: None")
         click.echo("-" * 60)
-async def list_tools_and_schemas(url, tool_name=None):
+async def list_tools_and_schemas(url: str, tool_name: str | None = None) -> list[dict[str, str | None]]:
+    """List MCP tools using MCPBuilder with structured exception handling.
+    Args:
+        url (str): MCP server URL to connect to
+        tool_name (str | None, optional): Specific tool name to retrieve.
+        If None, retrieves all available tools. Defaults to None.
+    Returns:
+        list[dict[str, str | None]]: List of formatted tool dictionaries, each containing name, description, and
+        input_schema as keys
+    Raises:
+        MCPError: Caught internally and logged, returns empty list instead
+    """
     builder = MCPBuilder(url=url)
     try:
         if tool_name:
@@ -66,12 +106,31 @@ async def list_tools_and_schemas(url, tool_name=None):
         else:
             tools = await builder.get_tools()
             return [format_tool(tool) for tool in tools.values()]
-    except Exception as e:
-        click.echo(f"[ERROR] Failed to fetch tools via MCPBuilder: {e}", err=True)
+    except MCPError as e:
+        format_mcp_error(e, include_traceback=False)
         return []
-async def list_tools_direct(url, tool_name=None):
+async def list_tools_direct(url: str, tool_name: str | None = None) -> list[dict[str, str | None]]:
+    """List MCP tools using direct MCP protocol with exception conversion.
+    Bypasses MCPBuilder and uses raw MCP ClientSession and SSE client directly.
+    Converts raw exceptions to structured MCPErrors for consistent user experience.
+    Used when --direct flag is specified in CLI.
+    Args:
+        url (str): MCP server URL to connect to
+        tool_name (str | None, optional): Specific tool name to retrieve.
+        If None, retrieves all available tools. Defaults to None.
+    Returns:
+        list[dict[str, str | None]]: List of formatted tool dictionaries, each containing name, description, and
+        input_schema as keys
+    Note:
+        This function handles ExceptionGroup by extracting the most relevant exception
+        and converting it to MCPError for consistent error reporting.
+    """
     from mcp import ClientSession
     from mcp.client.sse import sse_client
@@ -92,7 +151,17 @@ async def list_tools_direct(url, tool_name=None):
                     click.echo(f"[INFO] Tool '{tool_name}' not found.")
                 return tools
     except Exception as e:
-        click.echo(f"[ERROR] Failed to fetch tools via direct protocol: {e}", err=True)
+        # Convert raw exceptions to structured MCPError for consistency
+        from aiq.utils.exception_handlers.mcp import convert_to_mcp_error
+        from aiq.utils.exception_handlers.mcp import extract_primary_exception
+        if isinstance(e, ExceptionGroup):  # noqa: F821
+            primary_exception = extract_primary_exception(list(e.exceptions))
+            mcp_error = convert_to_mcp_error(primary_exception, url)
+        else:
+            mcp_error = convert_to_mcp_error(e, url)
+        format_mcp_error(mcp_error, include_traceback=False)
         return []
@@ -103,10 +172,28 @@ async def list_tools_direct(url, tool_name=None):
 @click.option('--detail', is_flag=True, help='Show full details for all tools')
 @click.option('--json-output', is_flag=True, help='Output tool metadata in JSON format')
 @click.pass_context
-def list_mcp(ctx, direct, url, tool, detail, json_output):
-    """
-    List tool names (default). Use --detail for full output. If --tool is provided,
-    always show full output for that tool.
+def list_mcp(ctx: click.Context, direct: bool, url: str, tool: str | None, detail: bool, json_output: bool) -> None:
+    """List MCP tool names (default) or show detailed tool information.
+    Use --detail for full output including descriptions and input schemas.
+    If --tool is provided, always shows full output for that specific tool.
+    Use --direct to bypass MCPBuilder and use raw MCP protocol.
+    Use --json-output to get structured JSON data instead of formatted text.
+    Args:
+        ctx (click.Context): Click context object for command invocation
+        direct (bool): Whether to bypass MCPBuilder and use direct MCP protocol
+        url (str): MCP server URL to connect to (default: http://localhost:9901/sse)
+        tool (str | None): Optional specific tool name to retrieve detailed info for
+        detail (bool): Whether to show full details (description + schema) for all tools
+        json_output (bool): Whether to output tool metadata in JSON format instead of text
+    Examples:
+        aiq info mcp                           # List tool names only
+        aiq info mcp --detail                  # Show all tools with full details
+        aiq info mcp --tool my_tool            # Show details for specific tool
+        aiq info mcp --json-output             # Get JSON format output
+        aiq info mcp --direct --url http://...  # Use direct protocol with custom URL
     """
     if ctx.invoked_subcommand is not None:
         return
@@ -123,4 +210,4 @@ def list_mcp(ctx, direct, url, tool, detail, json_output):
             print_tool(tool_dict, detail=True)
     else:
         for tool_dict in tools:
-            click.echo(tool_dict['name'])
+            click.echo(tool_dict.get('name', 'Unknown tool'))

aiq/cli/commands/sizing/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

aiq/cli/commands/sizing/calc.py ADDED Viewed

@@ -0,0 +1,294 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import asyncio
+import logging
+from pathlib import Path
+import click
+from tabulate import tabulate
+from aiq.profiler.calc.calc_runner import CalcRunner
+from aiq.profiler.calc.data_models import CalcRunnerConfig
+from aiq.profiler.calc.data_models import CalcRunnerOutput
+logger = logging.getLogger(__name__)
+@click.command("calc", help="Estimate GPU count and plot metrics for a workflow")
+@click.option(
+    "--config_file",
+    type=click.Path(exists=True, file_okay=True, dir_okay=False, path_type=Path),
+    required=False,
+    default=None,
+    help="A YAML config file for the workflow and evaluation. This is not needed in offline mode.",
+)
+@click.option(
+    "--offline_mode",
+    is_flag=True,
+    required=False,
+    default=False,
+    help="Run in offline mode. This is used to estimate the GPU count for a workflow without running the workflow. ")
+@click.option(
+    "--target_llm_latency",
+    type=float,
+    required=False,
+    default=0,
+    help="Target p95 LLM latency (seconds). Can be set to 0 to ignore.",
+)
+@click.option(
+    "--target_workflow_runtime",
+    type=float,
+    required=False,
+    default=0,
+    help="Target p95 workflow runtime (seconds). Can be set to 0 to ignore.",
+)
+@click.option(
+    "--target_users",
+    type=int,
+    required=False,
+    default=0,
+    help="Target number of users to support.",
+)
+@click.option(
+    "--test_gpu_count",
+    type=int,
+    required=False,
+    default=0,
+    help="Number of GPUs used in the test.",
+)
+@click.option(
+    "--calc_output_dir",
+    type=click.Path(file_okay=False, dir_okay=True, path_type=Path),
+    required=False,
+    default=None,
+    help="Directory to save plots and results (optional).",
+)
+@click.option(
+    "--concurrencies",
+    type=str,
+    required=False,
+    default="1,2,3,4,5,6,7,8,9,10",
+    help="Comma-separated list of concurrency values to test (e.g., 1,2,4,8). Default: 1,2,3,4,5,6,7,8,9,10",
+)
+@click.option(
+    "--num_passes",
+    type=int,
+    required=False,
+    default=0,
+    help="Number of passes at each concurrency for the evaluation."
+    " If set to 0 the dataset is adjusted to a multiple of the concurrency. Default: 0",
+)
+@click.option(
+    "--append_calc_outputs",
+    is_flag=True,
+    required=False,
+    default=False,
+    help="Append calc outputs to the output directory. "
+    "By default append is set to False and the content of the online directory is overwritten.",
+)
+@click.option(
+    "--endpoint",
+    type=str,
+    required=False,
+    default=None,
+    help="Endpoint to use for the workflow if it is remote(optional).",
+)
+@click.option(
+    "--endpoint_timeout",
+    type=int,
+    required=False,
+    default=300,
+    help="Timeout for the remote workflow endpoint in seconds (default: 300).",
+)
+@click.pass_context
+def calc_command(ctx,
+                 config_file,
+                 offline_mode,
+                 target_llm_latency,
+                 target_workflow_runtime,
+                 target_users,
+                 test_gpu_count,
+                 calc_output_dir,
+                 concurrencies,
+                 num_passes,
+                 append_calc_outputs,
+                 endpoint,
+                 endpoint_timeout):
+    """Estimate GPU count and plot metrics for a workflow profile."""
+    # Only use CLI concurrencies, with default
+    concurrencies_list = [int(x) for x in concurrencies.split(",") if x.strip()]
+    # Dont allow a concurrency of 0
+    if 0 in concurrencies_list:
+        click.echo("Concurrency of 0 is not allowed.")
+        return
+    # Check if the parameters are valid in online and offline mode
+    if offline_mode:
+        # In offline mode target test parameters are needed to estimate the GPU count
+        if target_llm_latency == 0 and target_workflow_runtime == 0:
+            click.echo("Both --target_llm_latency and --target_workflow_runtime are 0. "
+                       "Cannot estimate the GPU count.")
+            return
+        if test_gpu_count <= 0:
+            click.echo("Test GPU count is 0. Cannot estimate the GPU count.")
+            return
+        if target_users <= 0:
+            click.echo("Target users is 0. Cannot estimate the GPU count.")
+            return
+        if append_calc_outputs:
+            click.echo("Appending calc outputs is not supported in offline mode.")
+            return
+        if not calc_output_dir:
+            click.echo("Output directory is required in offline mode.")
+            return
+    else:
+        if not config_file:
+            click.echo("Config file is required in online mode.")
+            return
+        if target_llm_latency == 0 and target_workflow_runtime == 0:
+            click.echo("Both --target_llm_latency and --target_workflow_runtime are 0. "
+                       "GPU count will not be estimated.")
+        if test_gpu_count <= 0:
+            click.echo("Test GPU count is 0. Tests will be run but the GPU count will not be estimated.")
+        if target_users <= 0:
+            click.echo("Target users is 0. Tests will be run but the GPU count will not be estimated.")
+    # Build CalcRunnerConfig
+    runner_config = CalcRunnerConfig(
+        config_file=config_file,
+        concurrencies=concurrencies_list,
+        target_llm_latency_p95=target_llm_latency,
+        target_workflow_runtime_p95=target_workflow_runtime,
+        target_users=target_users,
+        test_gpu_count=test_gpu_count,
+        output_dir=calc_output_dir,
+        num_passes=num_passes,
+        offline_mode=offline_mode,
+        append_job=append_calc_outputs,
+        endpoint=endpoint,
+        endpoint_timeout=endpoint_timeout,
+    )
+    async def run_calc() -> CalcRunnerOutput:
+        runner = CalcRunner(runner_config)
+        result = await runner.run()
+        return result
+    def print_results(results: CalcRunnerOutput):
+        # Print header with target numbers
+        click.echo(f"Targets: LLM Latency ≤ {runner_config.target_llm_latency_p95}s, "
+                   f"Workflow Runtime ≤ {runner_config.target_workflow_runtime_p95}s, "
+                   f"Users = {runner_config.target_users}")
+        click.echo(f"Test parameters: GPUs = {runner_config.test_gpu_count}")
+        # Check if there are any GPU estimates to determine if we should show GPU estimate columns
+        has_llm_latency_gpu_estimates = any(data.gpu_estimates.gpu_estimate_by_llm_latency is not None
+                                            for data in results.calc_data.values())
+        has_wf_runtime_gpu_estimates = any(data.gpu_estimates.gpu_estimate_by_wf_runtime is not None
+                                           for data in results.calc_data.values())
+        # Check if there are any interrupted workflows or outliers to determine if we should show the alerts column
+        has_alerts = any(data.sizing_metrics.alerts.workflow_interrupted or data.alerts.outlier_llm_latency
+                         or data.alerts.outlier_workflow_runtime for data in results.calc_data.values())
+        # Print per concurrency results as a table
+        click.echo("Per concurrency results:")
+        # Show alerts legend if there are any alerts
+        if has_alerts:
+            click.echo("Alerts!: W = Workflow interrupted, L = LLM latency outlier, R = Workflow runtime outlier")
+        table = []
+        for concurrency, data in results.calc_data.items():
+            metrics = data.sizing_metrics
+            gpu_estimates_per_concurrency = data.gpu_estimates
+            sizing_metrics_alerts = data.sizing_metrics.alerts
+            calc_alerts = data.alerts
+            row = []
+            # Only include alerts column if there are any interrupted workflows (first column)
+            if has_alerts:
+                alerts = []
+                if sizing_metrics_alerts.workflow_interrupted:
+                    alerts.append("W")
+                if calc_alerts.outlier_llm_latency:
+                    alerts.append("L")
+                if calc_alerts.outlier_workflow_runtime:
+                    alerts.append("R")
+                # Show ! followed by all alert characters
+                if alerts:
+                    row.append(f"!{''.join(alerts)}")
+                else:
+                    row.append("")
+            row.extend([
+                concurrency,
+                metrics.llm_latency_p95,
+                metrics.workflow_runtime_p95,
+                metrics.total_runtime,
+            ])
+            # Only include GPU estimate columns if there are actual estimates of that type
+            if has_llm_latency_gpu_estimates:
+                row.append(gpu_estimates_per_concurrency.gpu_estimate_by_llm_latency)
+            if has_wf_runtime_gpu_estimates:
+                row.append(gpu_estimates_per_concurrency.gpu_estimate_by_wf_runtime)
+            table.append(row)
+        headers = []
+        # Only include alerts header if there are any alerts (first column)
+        if has_alerts:
+            headers.append("Alerts")
+        headers.extend([
+            "Concurrency",
+            "p95 LLM Latency",
+            "p95 WF Runtime",
+            "Total Runtime",
+        ])
+        # Only include GPU estimate headers if there are actual estimates of that type
+        if has_llm_latency_gpu_estimates:
+            headers.append("GPUs (LLM Latency, Rough)")
+        if has_wf_runtime_gpu_estimates:
+            headers.append("GPUs (WF Runtime, Rough)")
+        click.echo(tabulate(table, headers=headers, tablefmt="github"))
+        # Display slope-based GPU estimates at the end
+        click.echo("")  # Add blank line for separation
+        click.echo(click.style("=== GPU ESTIMATES ===", fg="bright_blue", bold=True))
+        if results.gpu_estimates.gpu_estimate_by_wf_runtime is not None:
+            click.echo(
+                click.style(
+                    f"Estimated GPU count (Workflow Runtime): {results.gpu_estimates.gpu_estimate_by_wf_runtime:.1f}",
+                    fg="green",
+                    bold=True))
+        if results.gpu_estimates.gpu_estimate_by_llm_latency is not None:
+            click.echo(
+                click.style(
+                    f"Estimated GPU count (LLM Latency): {results.gpu_estimates.gpu_estimate_by_llm_latency:.1f}",
+                    fg="green",
+                    bold=True))
+    results = asyncio.run(run_calc())
+    print_results(results)

aiq/cli/commands/sizing/sizing.py ADDED Viewed

@@ -0,0 +1,27 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import click
+from .calc import calc_command
+@click.group(help="Size GPU clusters for workflows with the specified options.")
+def sizing():
+    """Sizing-related commands."""
+    pass
+sizing.add_command(calc_command)

aiq/cli/commands/start.py CHANGED Viewed

@@ -187,6 +187,7 @@ class StartCommandGroup(click.Group):
         config = validate_schema(config_dict, AIQConfig)
+        # Override default front end config with values from the config file for serverless execution modes.
         # Check that we have the right kind of front end
         if (not isinstance(config.general.front_end, front_end.config_type)):
             logger.warning(

aiq/cli/entrypoint.py CHANGED Viewed

@@ -34,6 +34,7 @@ from .commands.configure.configure import configure_command
 from .commands.evaluate import eval_command
 from .commands.info.info import info_command
 from .commands.registry.registry import registry_command
+from .commands.sizing.sizing import sizing
 from .commands.start import start_command
 from .commands.uninstall import uninstall_command
 from .commands.validate import validate_command
@@ -105,6 +106,7 @@ cli.add_command(start_command, name="start")
 cli.add_command(uninstall_command, name="uninstall")
 cli.add_command(validate_command, name="validate")
 cli.add_command(workflow_command, name="workflow")
+cli.add_command(sizing, name="sizing")
 # Aliases
 cli.add_command(start_command.get_command(None, "console"), name="run")  # type: ignore

aiq/cli/register_workflow.py CHANGED Viewed

@@ -16,6 +16,8 @@
 from contextlib import asynccontextmanager
 from aiq.builder.framework_enum import LLMFrameworkEnum
+from aiq.cli.type_registry import AuthProviderBuildCallableT
+from aiq.cli.type_registry import AuthProviderRegisteredCallableT
 from aiq.cli.type_registry import EmbedderClientBuildCallableT
 from aiq.cli.type_registry import EmbedderClientRegisteredCallableT
 from aiq.cli.type_registry import EmbedderProviderBuildCallableT
@@ -26,6 +28,8 @@ from aiq.cli.type_registry import FrontEndBuildCallableT
 from aiq.cli.type_registry import FrontEndRegisteredCallableT
 from aiq.cli.type_registry import FunctionBuildCallableT
 from aiq.cli.type_registry import FunctionRegisteredCallableT
+from aiq.cli.type_registry import ITSStrategyBuildCallableT
+from aiq.cli.type_registry import ITSStrategyRegisterCallableT
 from aiq.cli.type_registry import LLMClientBuildCallableT
 from aiq.cli.type_registry import LLMClientRegisteredCallableT
 from aiq.cli.type_registry import LLMProviderBuildCallableT
@@ -34,6 +38,8 @@ from aiq.cli.type_registry import LoggingMethodConfigT
 from aiq.cli.type_registry import LoggingMethodRegisteredCallableT
 from aiq.cli.type_registry import MemoryBuildCallableT
 from aiq.cli.type_registry import MemoryRegisteredCallableT
+from aiq.cli.type_registry import ObjectStoreBuildCallableT
+from aiq.cli.type_registry import ObjectStoreRegisteredCallableT
 from aiq.cli.type_registry import RegisteredLoggingMethod
 from aiq.cli.type_registry import RegisteredTelemetryExporter
 from aiq.cli.type_registry import RegisteredToolWrapper
@@ -47,6 +53,7 @@ from aiq.cli.type_registry import TeleExporterRegisteredCallableT
 from aiq.cli.type_registry import TelemetryExporterBuildCallableT
 from aiq.cli.type_registry import TelemetryExporterConfigT
 from aiq.cli.type_registry import ToolWrapperBuildCallableT
+from aiq.data_models.authentication import AuthProviderBaseConfigT
 from aiq.data_models.component import AIQComponentEnum
 from aiq.data_models.discovery_metadata import DiscoveryMetadata
 from aiq.data_models.embedder import EmbedderBaseConfigT
@@ -55,6 +62,7 @@ from aiq.data_models.front_end import FrontEndConfigT
 from aiq.data_models.function import FunctionConfigT
 from aiq.data_models.llm import LLMBaseConfigT
 from aiq.data_models.memory import MemoryBaseConfigT
+from aiq.data_models.object_store import ObjectStoreBaseConfigT
 from aiq.data_models.registry_handler import RegistryHandlerBaseConfigT
 from aiq.data_models.retriever import RetrieverBaseConfigT
@@ -192,6 +200,30 @@ def register_llm_provider(config_type: type[LLMBaseConfigT]):
     return register_llm_provider_inner
+def register_auth_provider(config_type: type[AuthProviderBaseConfigT]):
+    def register_auth_provider_inner(
+        fn: AuthProviderBuildCallableT[AuthProviderBaseConfigT]
+    ) -> AuthProviderRegisteredCallableT[AuthProviderBaseConfigT]:
+        from .type_registry import GlobalTypeRegistry
+        from .type_registry import RegisteredAuthProviderInfo
+        context_manager_fn = asynccontextmanager(fn)
+        discovery_metadata = DiscoveryMetadata.from_config_type(config_type=config_type,
+                                                                component_type=AIQComponentEnum.AUTHENTICATION_PROVIDER)
+        GlobalTypeRegistry.get().register_auth_provider(
+            RegisteredAuthProviderInfo(full_type=config_type.full_type,
+                                       config_type=config_type,
+                                       build_fn=context_manager_fn,
+                                       discovery_metadata=discovery_metadata))
+        return context_manager_fn
+    return register_auth_provider_inner
 def register_llm_client(config_type: type[LLMBaseConfigT], wrapper_type: LLMFrameworkEnum | str):
     def register_llm_client_inner(
@@ -315,6 +347,54 @@ def register_memory(config_type: type[MemoryBaseConfigT]):
     return register_memory_inner
+def register_object_store(config_type: type[ObjectStoreBaseConfigT]):
+    def register_kv_store_inner(
+        fn: ObjectStoreBuildCallableT[ObjectStoreBaseConfigT]
+    ) -> ObjectStoreRegisteredCallableT[ObjectStoreBaseConfigT]:
+        from .type_registry import GlobalTypeRegistry
+        from .type_registry import RegisteredObjectStoreInfo
+        context_manager_fn = asynccontextmanager(fn)
+        discovery_metadata = DiscoveryMetadata.from_config_type(config_type=config_type,
+                                                                component_type=AIQComponentEnum.OBJECT_STORE)
+        GlobalTypeRegistry.get().register_object_store(
+            RegisteredObjectStoreInfo(full_type=config_type.full_type,
+                                      config_type=config_type,
+                                      build_fn=context_manager_fn,
+                                      discovery_metadata=discovery_metadata))
+        return context_manager_fn
+    return register_kv_store_inner
+def register_its_strategy(config_type: type[ITSStrategyRegisterCallableT]):
+    def register_its_strategy_inner(
+        fn: ITSStrategyBuildCallableT[ITSStrategyRegisterCallableT]
+    ) -> ITSStrategyRegisterCallableT[ITSStrategyRegisterCallableT]:
+        from .type_registry import GlobalTypeRegistry
+        from .type_registry import RegisteredITSStrategyInfo
+        context_manager_fn = asynccontextmanager(fn)
+        discovery_metadata = DiscoveryMetadata.from_config_type(config_type=config_type,
+                                                                component_type=AIQComponentEnum.ITS_STRATEGY)
+        GlobalTypeRegistry.get().register_its_strategy(
+            RegisteredITSStrategyInfo(full_type=config_type.full_type,
+                                      config_type=config_type,
+                                      build_fn=context_manager_fn,
+                                      discovery_metadata=discovery_metadata))
+        return context_manager_fn
+    return register_its_strategy_inner
 def register_retriever_provider(config_type: type[RetrieverBaseConfigT]):
     def register_retriever_provider_inner(

aiqtoolkit 1.2.0a20250707__py3-none-any.whl → 1.2.0a20250730__py3-none-any.whl

Potentially problematic release.

aiqtoolkit 1.2.0a20250707py3-none-any.whl → 1.2.0a20250730py3-none-any.whl