aiqtoolkit 1.2.0a20250707__py3-none-any.whl → 1.2.0a20250730__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiqtoolkit might be problematic. Click here for more details.

Files changed (197) hide show
  1. aiq/agent/base.py +171 -8
  2. aiq/agent/dual_node.py +1 -1
  3. aiq/agent/react_agent/agent.py +113 -113
  4. aiq/agent/react_agent/register.py +31 -14
  5. aiq/agent/rewoo_agent/agent.py +36 -35
  6. aiq/agent/rewoo_agent/register.py +2 -2
  7. aiq/agent/tool_calling_agent/agent.py +3 -7
  8. aiq/authentication/__init__.py +14 -0
  9. aiq/authentication/api_key/__init__.py +14 -0
  10. aiq/authentication/api_key/api_key_auth_provider.py +92 -0
  11. aiq/authentication/api_key/api_key_auth_provider_config.py +124 -0
  12. aiq/authentication/api_key/register.py +26 -0
  13. aiq/authentication/exceptions/__init__.py +14 -0
  14. aiq/authentication/exceptions/api_key_exceptions.py +38 -0
  15. aiq/authentication/exceptions/auth_code_grant_exceptions.py +86 -0
  16. aiq/authentication/exceptions/call_back_exceptions.py +38 -0
  17. aiq/authentication/exceptions/request_exceptions.py +54 -0
  18. aiq/authentication/http_basic_auth/__init__.py +0 -0
  19. aiq/authentication/http_basic_auth/http_basic_auth_provider.py +81 -0
  20. aiq/authentication/http_basic_auth/register.py +30 -0
  21. aiq/authentication/interfaces.py +93 -0
  22. aiq/authentication/oauth2/__init__.py +14 -0
  23. aiq/authentication/oauth2/oauth2_auth_code_flow_provider.py +107 -0
  24. aiq/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +39 -0
  25. aiq/authentication/oauth2/register.py +25 -0
  26. aiq/authentication/register.py +21 -0
  27. aiq/builder/builder.py +64 -2
  28. aiq/builder/component_utils.py +16 -3
  29. aiq/builder/context.py +26 -0
  30. aiq/builder/eval_builder.py +43 -2
  31. aiq/builder/function.py +32 -4
  32. aiq/builder/function_base.py +1 -1
  33. aiq/builder/intermediate_step_manager.py +6 -8
  34. aiq/builder/user_interaction_manager.py +3 -0
  35. aiq/builder/workflow.py +23 -18
  36. aiq/builder/workflow_builder.py +420 -73
  37. aiq/cli/commands/info/list_mcp.py +103 -16
  38. aiq/cli/commands/sizing/__init__.py +14 -0
  39. aiq/cli/commands/sizing/calc.py +294 -0
  40. aiq/cli/commands/sizing/sizing.py +27 -0
  41. aiq/cli/commands/start.py +1 -0
  42. aiq/cli/entrypoint.py +2 -0
  43. aiq/cli/register_workflow.py +80 -0
  44. aiq/cli/type_registry.py +151 -30
  45. aiq/data_models/api_server.py +117 -11
  46. aiq/data_models/authentication.py +231 -0
  47. aiq/data_models/common.py +35 -7
  48. aiq/data_models/component.py +17 -9
  49. aiq/data_models/component_ref.py +33 -0
  50. aiq/data_models/config.py +60 -3
  51. aiq/data_models/embedder.py +1 -0
  52. aiq/data_models/function_dependencies.py +8 -0
  53. aiq/data_models/interactive.py +10 -1
  54. aiq/data_models/intermediate_step.py +15 -5
  55. aiq/data_models/its_strategy.py +30 -0
  56. aiq/data_models/llm.py +1 -0
  57. aiq/data_models/memory.py +1 -0
  58. aiq/data_models/object_store.py +44 -0
  59. aiq/data_models/retry_mixin.py +35 -0
  60. aiq/data_models/span.py +187 -0
  61. aiq/data_models/telemetry_exporter.py +2 -2
  62. aiq/embedder/nim_embedder.py +2 -1
  63. aiq/embedder/openai_embedder.py +2 -1
  64. aiq/eval/config.py +19 -1
  65. aiq/eval/dataset_handler/dataset_handler.py +75 -1
  66. aiq/eval/evaluate.py +53 -10
  67. aiq/eval/rag_evaluator/evaluate.py +23 -12
  68. aiq/eval/remote_workflow.py +7 -2
  69. aiq/eval/runners/__init__.py +14 -0
  70. aiq/eval/runners/config.py +39 -0
  71. aiq/eval/runners/multi_eval_runner.py +54 -0
  72. aiq/eval/usage_stats.py +6 -0
  73. aiq/eval/utils/weave_eval.py +5 -1
  74. aiq/experimental/__init__.py +0 -0
  75. aiq/experimental/decorators/__init__.py +0 -0
  76. aiq/experimental/decorators/experimental_warning_decorator.py +130 -0
  77. aiq/experimental/inference_time_scaling/__init__.py +0 -0
  78. aiq/experimental/inference_time_scaling/editing/__init__.py +0 -0
  79. aiq/experimental/inference_time_scaling/editing/iterative_plan_refinement_editor.py +147 -0
  80. aiq/experimental/inference_time_scaling/editing/llm_as_a_judge_editor.py +204 -0
  81. aiq/experimental/inference_time_scaling/editing/motivation_aware_summarization.py +107 -0
  82. aiq/experimental/inference_time_scaling/functions/__init__.py +0 -0
  83. aiq/experimental/inference_time_scaling/functions/execute_score_select_function.py +105 -0
  84. aiq/experimental/inference_time_scaling/functions/its_tool_orchestration_function.py +205 -0
  85. aiq/experimental/inference_time_scaling/functions/its_tool_wrapper_function.py +146 -0
  86. aiq/experimental/inference_time_scaling/functions/plan_select_execute_function.py +224 -0
  87. aiq/experimental/inference_time_scaling/models/__init__.py +0 -0
  88. aiq/experimental/inference_time_scaling/models/editor_config.py +132 -0
  89. aiq/experimental/inference_time_scaling/models/its_item.py +48 -0
  90. aiq/experimental/inference_time_scaling/models/scoring_config.py +112 -0
  91. aiq/experimental/inference_time_scaling/models/search_config.py +120 -0
  92. aiq/experimental/inference_time_scaling/models/selection_config.py +154 -0
  93. aiq/experimental/inference_time_scaling/models/stage_enums.py +43 -0
  94. aiq/experimental/inference_time_scaling/models/strategy_base.py +66 -0
  95. aiq/experimental/inference_time_scaling/models/tool_use_config.py +41 -0
  96. aiq/experimental/inference_time_scaling/register.py +36 -0
  97. aiq/experimental/inference_time_scaling/scoring/__init__.py +0 -0
  98. aiq/experimental/inference_time_scaling/scoring/llm_based_agent_scorer.py +168 -0
  99. aiq/experimental/inference_time_scaling/scoring/llm_based_plan_scorer.py +168 -0
  100. aiq/experimental/inference_time_scaling/scoring/motivation_aware_scorer.py +111 -0
  101. aiq/experimental/inference_time_scaling/search/__init__.py +0 -0
  102. aiq/experimental/inference_time_scaling/search/multi_llm_planner.py +128 -0
  103. aiq/experimental/inference_time_scaling/search/multi_query_retrieval_search.py +122 -0
  104. aiq/experimental/inference_time_scaling/search/single_shot_multi_plan_planner.py +128 -0
  105. aiq/experimental/inference_time_scaling/selection/__init__.py +0 -0
  106. aiq/experimental/inference_time_scaling/selection/best_of_n_selector.py +63 -0
  107. aiq/experimental/inference_time_scaling/selection/llm_based_agent_output_selector.py +131 -0
  108. aiq/experimental/inference_time_scaling/selection/llm_based_output_merging_selector.py +159 -0
  109. aiq/experimental/inference_time_scaling/selection/llm_based_plan_selector.py +128 -0
  110. aiq/experimental/inference_time_scaling/selection/threshold_selector.py +58 -0
  111. aiq/front_ends/console/authentication_flow_handler.py +233 -0
  112. aiq/front_ends/console/console_front_end_plugin.py +11 -2
  113. aiq/front_ends/fastapi/auth_flow_handlers/__init__.py +0 -0
  114. aiq/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +27 -0
  115. aiq/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +107 -0
  116. aiq/front_ends/fastapi/fastapi_front_end_config.py +20 -0
  117. aiq/front_ends/fastapi/fastapi_front_end_controller.py +68 -0
  118. aiq/front_ends/fastapi/fastapi_front_end_plugin.py +14 -1
  119. aiq/front_ends/fastapi/fastapi_front_end_plugin_worker.py +353 -31
  120. aiq/front_ends/fastapi/html_snippets/__init__.py +14 -0
  121. aiq/front_ends/fastapi/html_snippets/auth_code_grant_success.py +35 -0
  122. aiq/front_ends/fastapi/main.py +2 -0
  123. aiq/front_ends/fastapi/message_handler.py +102 -84
  124. aiq/front_ends/fastapi/step_adaptor.py +2 -1
  125. aiq/llm/aws_bedrock_llm.py +2 -1
  126. aiq/llm/nim_llm.py +2 -1
  127. aiq/llm/openai_llm.py +2 -1
  128. aiq/object_store/__init__.py +20 -0
  129. aiq/object_store/in_memory_object_store.py +74 -0
  130. aiq/object_store/interfaces.py +84 -0
  131. aiq/object_store/models.py +36 -0
  132. aiq/object_store/register.py +20 -0
  133. aiq/observability/__init__.py +14 -0
  134. aiq/observability/exporter/__init__.py +14 -0
  135. aiq/observability/exporter/base_exporter.py +449 -0
  136. aiq/observability/exporter/exporter.py +78 -0
  137. aiq/observability/exporter/file_exporter.py +33 -0
  138. aiq/observability/exporter/processing_exporter.py +269 -0
  139. aiq/observability/exporter/raw_exporter.py +52 -0
  140. aiq/observability/exporter/span_exporter.py +264 -0
  141. aiq/observability/exporter_manager.py +335 -0
  142. aiq/observability/mixin/__init__.py +14 -0
  143. aiq/observability/mixin/batch_config_mixin.py +26 -0
  144. aiq/observability/mixin/collector_config_mixin.py +23 -0
  145. aiq/observability/mixin/file_mixin.py +288 -0
  146. aiq/observability/mixin/file_mode.py +23 -0
  147. aiq/observability/mixin/resource_conflict_mixin.py +134 -0
  148. aiq/observability/mixin/serialize_mixin.py +61 -0
  149. aiq/observability/mixin/type_introspection_mixin.py +183 -0
  150. aiq/observability/processor/__init__.py +14 -0
  151. aiq/observability/processor/batching_processor.py +316 -0
  152. aiq/observability/processor/intermediate_step_serializer.py +28 -0
  153. aiq/observability/processor/processor.py +68 -0
  154. aiq/observability/register.py +32 -116
  155. aiq/observability/utils/__init__.py +14 -0
  156. aiq/observability/utils/dict_utils.py +236 -0
  157. aiq/observability/utils/time_utils.py +31 -0
  158. aiq/profiler/calc/__init__.py +14 -0
  159. aiq/profiler/calc/calc_runner.py +623 -0
  160. aiq/profiler/calc/calculations.py +288 -0
  161. aiq/profiler/calc/data_models.py +176 -0
  162. aiq/profiler/calc/plot.py +345 -0
  163. aiq/profiler/data_models.py +2 -0
  164. aiq/profiler/profile_runner.py +16 -13
  165. aiq/runtime/loader.py +8 -2
  166. aiq/runtime/runner.py +23 -9
  167. aiq/runtime/session.py +16 -5
  168. aiq/tool/chat_completion.py +74 -0
  169. aiq/tool/code_execution/README.md +152 -0
  170. aiq/tool/code_execution/code_sandbox.py +151 -72
  171. aiq/tool/code_execution/local_sandbox/.gitignore +1 -0
  172. aiq/tool/code_execution/local_sandbox/local_sandbox_server.py +139 -24
  173. aiq/tool/code_execution/local_sandbox/sandbox.requirements.txt +3 -1
  174. aiq/tool/code_execution/local_sandbox/start_local_sandbox.sh +27 -2
  175. aiq/tool/code_execution/register.py +7 -3
  176. aiq/tool/code_execution/test_code_execution_sandbox.py +414 -0
  177. aiq/tool/mcp/exceptions.py +142 -0
  178. aiq/tool/mcp/mcp_client.py +17 -3
  179. aiq/tool/mcp/mcp_tool.py +1 -1
  180. aiq/tool/register.py +1 -0
  181. aiq/tool/server_tools.py +2 -2
  182. aiq/utils/exception_handlers/automatic_retries.py +289 -0
  183. aiq/utils/exception_handlers/mcp.py +211 -0
  184. aiq/utils/io/model_processing.py +28 -0
  185. aiq/utils/log_utils.py +37 -0
  186. aiq/utils/string_utils.py +38 -0
  187. aiq/utils/type_converter.py +18 -2
  188. aiq/utils/type_utils.py +87 -0
  189. {aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/METADATA +37 -9
  190. {aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/RECORD +195 -80
  191. {aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/entry_points.txt +3 -0
  192. aiq/front_ends/fastapi/websocket.py +0 -153
  193. aiq/observability/async_otel_listener.py +0 -470
  194. {aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/WHEEL +0 -0
  195. {aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
  196. {aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/licenses/LICENSE.md +0 -0
  197. {aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/top_level.txt +0 -0
@@ -15,18 +15,34 @@
15
15
 
16
16
  import json
17
17
  import logging
18
+ from typing import Any
18
19
 
19
20
  import anyio
20
21
  import click
21
22
 
23
+ from aiq.tool.mcp.exceptions import MCPError
22
24
  from aiq.tool.mcp.mcp_client import MCPBuilder
25
+ from aiq.utils.exception_handlers.mcp import format_mcp_error
23
26
 
24
27
  # Suppress verbose logs from mcp.client.sse and httpx
25
28
  logging.getLogger("mcp.client.sse").setLevel(logging.WARNING)
26
29
  logging.getLogger("httpx").setLevel(logging.WARNING)
27
30
 
31
+ logger = logging.getLogger(__name__)
28
32
 
29
- def format_tool(tool):
33
+
34
+ def format_tool(tool: Any) -> dict[str, str | None]:
35
+ """Format an MCP tool into a dictionary for display.
36
+
37
+ Extracts name, description, and input schema from various MCP tool object types
38
+ and normalizes them into a consistent dictionary format for CLI display.
39
+
40
+ Args:
41
+ tool (Any): MCPToolClient or raw MCP Tool object (uses Any due to different types)
42
+
43
+ Returns:
44
+ dict[str, str | None]: Dictionary with name, description, and input_schema as keys
45
+ """
30
46
  name = getattr(tool, 'name', None)
31
47
  description = getattr(tool, 'description', '')
32
48
  input_schema = getattr(tool, 'input_schema', None) or getattr(tool, 'inputSchema', None)
@@ -45,19 +61,43 @@ def format_tool(tool):
45
61
  }
46
62
 
47
63
 
48
- def print_tool(tool_dict, detail=False):
49
- click.echo(f"Tool: {tool_dict['name']}")
64
+ def print_tool(tool_dict: dict[str, str | None], detail: bool = False) -> None:
65
+ """Print a formatted tool to the console with optional detailed information.
66
+
67
+ Outputs tool information in a user-friendly format to stdout. When detail=True
68
+ or when description/schema are available, shows full information with separator.
69
+
70
+ Args:
71
+ tool_dict (dict[str, str | None]): Dictionary containing tool information with name, description, and
72
+ input_schema as keys
73
+ detail (bool, optional): Whether to force detailed output. Defaults to False.
74
+ """
75
+ click.echo(f"Tool: {tool_dict.get('name', 'Unknown')}")
50
76
  if detail or tool_dict.get('input_schema') or tool_dict.get('description'):
51
- click.echo(f"Description: {tool_dict['description']}")
52
- if tool_dict["input_schema"]:
77
+ click.echo(f"Description: {tool_dict.get('description', 'No description available')}")
78
+ if tool_dict.get("input_schema"):
53
79
  click.echo("Input Schema:")
54
- click.echo(tool_dict["input_schema"])
80
+ click.echo(tool_dict.get("input_schema"))
55
81
  else:
56
82
  click.echo("Input Schema: None")
57
83
  click.echo("-" * 60)
58
84
 
59
85
 
60
- async def list_tools_and_schemas(url, tool_name=None):
86
+ async def list_tools_and_schemas(url: str, tool_name: str | None = None) -> list[dict[str, str | None]]:
87
+ """List MCP tools using MCPBuilder with structured exception handling.
88
+
89
+ Args:
90
+ url (str): MCP server URL to connect to
91
+ tool_name (str | None, optional): Specific tool name to retrieve.
92
+ If None, retrieves all available tools. Defaults to None.
93
+
94
+ Returns:
95
+ list[dict[str, str | None]]: List of formatted tool dictionaries, each containing name, description, and
96
+ input_schema as keys
97
+
98
+ Raises:
99
+ MCPError: Caught internally and logged, returns empty list instead
100
+ """
61
101
  builder = MCPBuilder(url=url)
62
102
  try:
63
103
  if tool_name:
@@ -66,12 +106,31 @@ async def list_tools_and_schemas(url, tool_name=None):
66
106
  else:
67
107
  tools = await builder.get_tools()
68
108
  return [format_tool(tool) for tool in tools.values()]
69
- except Exception as e:
70
- click.echo(f"[ERROR] Failed to fetch tools via MCPBuilder: {e}", err=True)
109
+ except MCPError as e:
110
+ format_mcp_error(e, include_traceback=False)
71
111
  return []
72
112
 
73
113
 
74
- async def list_tools_direct(url, tool_name=None):
114
+ async def list_tools_direct(url: str, tool_name: str | None = None) -> list[dict[str, str | None]]:
115
+ """List MCP tools using direct MCP protocol with exception conversion.
116
+
117
+ Bypasses MCPBuilder and uses raw MCP ClientSession and SSE client directly.
118
+ Converts raw exceptions to structured MCPErrors for consistent user experience.
119
+ Used when --direct flag is specified in CLI.
120
+
121
+ Args:
122
+ url (str): MCP server URL to connect to
123
+ tool_name (str | None, optional): Specific tool name to retrieve.
124
+ If None, retrieves all available tools. Defaults to None.
125
+
126
+ Returns:
127
+ list[dict[str, str | None]]: List of formatted tool dictionaries, each containing name, description, and
128
+ input_schema as keys
129
+
130
+ Note:
131
+ This function handles ExceptionGroup by extracting the most relevant exception
132
+ and converting it to MCPError for consistent error reporting.
133
+ """
75
134
  from mcp import ClientSession
76
135
  from mcp.client.sse import sse_client
77
136
 
@@ -92,7 +151,17 @@ async def list_tools_direct(url, tool_name=None):
92
151
  click.echo(f"[INFO] Tool '{tool_name}' not found.")
93
152
  return tools
94
153
  except Exception as e:
95
- click.echo(f"[ERROR] Failed to fetch tools via direct protocol: {e}", err=True)
154
+ # Convert raw exceptions to structured MCPError for consistency
155
+ from aiq.utils.exception_handlers.mcp import convert_to_mcp_error
156
+ from aiq.utils.exception_handlers.mcp import extract_primary_exception
157
+
158
+ if isinstance(e, ExceptionGroup): # noqa: F821
159
+ primary_exception = extract_primary_exception(list(e.exceptions))
160
+ mcp_error = convert_to_mcp_error(primary_exception, url)
161
+ else:
162
+ mcp_error = convert_to_mcp_error(e, url)
163
+
164
+ format_mcp_error(mcp_error, include_traceback=False)
96
165
  return []
97
166
 
98
167
 
@@ -103,10 +172,28 @@ async def list_tools_direct(url, tool_name=None):
103
172
  @click.option('--detail', is_flag=True, help='Show full details for all tools')
104
173
  @click.option('--json-output', is_flag=True, help='Output tool metadata in JSON format')
105
174
  @click.pass_context
106
- def list_mcp(ctx, direct, url, tool, detail, json_output):
107
- """
108
- List tool names (default). Use --detail for full output. If --tool is provided,
109
- always show full output for that tool.
175
+ def list_mcp(ctx: click.Context, direct: bool, url: str, tool: str | None, detail: bool, json_output: bool) -> None:
176
+ """List MCP tool names (default) or show detailed tool information.
177
+
178
+ Use --detail for full output including descriptions and input schemas.
179
+ If --tool is provided, always shows full output for that specific tool.
180
+ Use --direct to bypass MCPBuilder and use raw MCP protocol.
181
+ Use --json-output to get structured JSON data instead of formatted text.
182
+
183
+ Args:
184
+ ctx (click.Context): Click context object for command invocation
185
+ direct (bool): Whether to bypass MCPBuilder and use direct MCP protocol
186
+ url (str): MCP server URL to connect to (default: http://localhost:9901/sse)
187
+ tool (str | None): Optional specific tool name to retrieve detailed info for
188
+ detail (bool): Whether to show full details (description + schema) for all tools
189
+ json_output (bool): Whether to output tool metadata in JSON format instead of text
190
+
191
+ Examples:
192
+ aiq info mcp # List tool names only
193
+ aiq info mcp --detail # Show all tools with full details
194
+ aiq info mcp --tool my_tool # Show details for specific tool
195
+ aiq info mcp --json-output # Get JSON format output
196
+ aiq info mcp --direct --url http://... # Use direct protocol with custom URL
110
197
  """
111
198
  if ctx.invoked_subcommand is not None:
112
199
  return
@@ -123,4 +210,4 @@ def list_mcp(ctx, direct, url, tool, detail, json_output):
123
210
  print_tool(tool_dict, detail=True)
124
211
  else:
125
212
  for tool_dict in tools:
126
- click.echo(tool_dict['name'])
213
+ click.echo(tool_dict.get('name', 'Unknown tool'))
@@ -0,0 +1,14 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
@@ -0,0 +1,294 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import asyncio
17
+ import logging
18
+ from pathlib import Path
19
+
20
+ import click
21
+ from tabulate import tabulate
22
+
23
+ from aiq.profiler.calc.calc_runner import CalcRunner
24
+ from aiq.profiler.calc.data_models import CalcRunnerConfig
25
+ from aiq.profiler.calc.data_models import CalcRunnerOutput
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ @click.command("calc", help="Estimate GPU count and plot metrics for a workflow")
31
+ @click.option(
32
+ "--config_file",
33
+ type=click.Path(exists=True, file_okay=True, dir_okay=False, path_type=Path),
34
+ required=False,
35
+ default=None,
36
+ help="A YAML config file for the workflow and evaluation. This is not needed in offline mode.",
37
+ )
38
+ @click.option(
39
+ "--offline_mode",
40
+ is_flag=True,
41
+ required=False,
42
+ default=False,
43
+ help="Run in offline mode. This is used to estimate the GPU count for a workflow without running the workflow. ")
44
+ @click.option(
45
+ "--target_llm_latency",
46
+ type=float,
47
+ required=False,
48
+ default=0,
49
+ help="Target p95 LLM latency (seconds). Can be set to 0 to ignore.",
50
+ )
51
+ @click.option(
52
+ "--target_workflow_runtime",
53
+ type=float,
54
+ required=False,
55
+ default=0,
56
+ help="Target p95 workflow runtime (seconds). Can be set to 0 to ignore.",
57
+ )
58
+ @click.option(
59
+ "--target_users",
60
+ type=int,
61
+ required=False,
62
+ default=0,
63
+ help="Target number of users to support.",
64
+ )
65
+ @click.option(
66
+ "--test_gpu_count",
67
+ type=int,
68
+ required=False,
69
+ default=0,
70
+ help="Number of GPUs used in the test.",
71
+ )
72
+ @click.option(
73
+ "--calc_output_dir",
74
+ type=click.Path(file_okay=False, dir_okay=True, path_type=Path),
75
+ required=False,
76
+ default=None,
77
+ help="Directory to save plots and results (optional).",
78
+ )
79
+ @click.option(
80
+ "--concurrencies",
81
+ type=str,
82
+ required=False,
83
+ default="1,2,3,4,5,6,7,8,9,10",
84
+ help="Comma-separated list of concurrency values to test (e.g., 1,2,4,8). Default: 1,2,3,4,5,6,7,8,9,10",
85
+ )
86
+ @click.option(
87
+ "--num_passes",
88
+ type=int,
89
+ required=False,
90
+ default=0,
91
+ help="Number of passes at each concurrency for the evaluation."
92
+ " If set to 0 the dataset is adjusted to a multiple of the concurrency. Default: 0",
93
+ )
94
+ @click.option(
95
+ "--append_calc_outputs",
96
+ is_flag=True,
97
+ required=False,
98
+ default=False,
99
+ help="Append calc outputs to the output directory. "
100
+ "By default append is set to False and the content of the online directory is overwritten.",
101
+ )
102
+ @click.option(
103
+ "--endpoint",
104
+ type=str,
105
+ required=False,
106
+ default=None,
107
+ help="Endpoint to use for the workflow if it is remote(optional).",
108
+ )
109
+ @click.option(
110
+ "--endpoint_timeout",
111
+ type=int,
112
+ required=False,
113
+ default=300,
114
+ help="Timeout for the remote workflow endpoint in seconds (default: 300).",
115
+ )
116
+ @click.pass_context
117
+ def calc_command(ctx,
118
+ config_file,
119
+ offline_mode,
120
+ target_llm_latency,
121
+ target_workflow_runtime,
122
+ target_users,
123
+ test_gpu_count,
124
+ calc_output_dir,
125
+ concurrencies,
126
+ num_passes,
127
+ append_calc_outputs,
128
+ endpoint,
129
+ endpoint_timeout):
130
+ """Estimate GPU count and plot metrics for a workflow profile."""
131
+ # Only use CLI concurrencies, with default
132
+ concurrencies_list = [int(x) for x in concurrencies.split(",") if x.strip()]
133
+
134
+ # Dont allow a concurrency of 0
135
+ if 0 in concurrencies_list:
136
+ click.echo("Concurrency of 0 is not allowed.")
137
+ return
138
+
139
+ # Check if the parameters are valid in online and offline mode
140
+ if offline_mode:
141
+ # In offline mode target test parameters are needed to estimate the GPU count
142
+ if target_llm_latency == 0 and target_workflow_runtime == 0:
143
+ click.echo("Both --target_llm_latency and --target_workflow_runtime are 0. "
144
+ "Cannot estimate the GPU count.")
145
+ return
146
+ if test_gpu_count <= 0:
147
+ click.echo("Test GPU count is 0. Cannot estimate the GPU count.")
148
+ return
149
+ if target_users <= 0:
150
+ click.echo("Target users is 0. Cannot estimate the GPU count.")
151
+ return
152
+ if append_calc_outputs:
153
+ click.echo("Appending calc outputs is not supported in offline mode.")
154
+ return
155
+ if not calc_output_dir:
156
+ click.echo("Output directory is required in offline mode.")
157
+ return
158
+ else:
159
+ if not config_file:
160
+ click.echo("Config file is required in online mode.")
161
+ return
162
+ if target_llm_latency == 0 and target_workflow_runtime == 0:
163
+ click.echo("Both --target_llm_latency and --target_workflow_runtime are 0. "
164
+ "GPU count will not be estimated.")
165
+ if test_gpu_count <= 0:
166
+ click.echo("Test GPU count is 0. Tests will be run but the GPU count will not be estimated.")
167
+ if target_users <= 0:
168
+ click.echo("Target users is 0. Tests will be run but the GPU count will not be estimated.")
169
+
170
+ # Build CalcRunnerConfig
171
+ runner_config = CalcRunnerConfig(
172
+ config_file=config_file,
173
+ concurrencies=concurrencies_list,
174
+ target_llm_latency_p95=target_llm_latency,
175
+ target_workflow_runtime_p95=target_workflow_runtime,
176
+ target_users=target_users,
177
+ test_gpu_count=test_gpu_count,
178
+ output_dir=calc_output_dir,
179
+ num_passes=num_passes,
180
+ offline_mode=offline_mode,
181
+ append_job=append_calc_outputs,
182
+ endpoint=endpoint,
183
+ endpoint_timeout=endpoint_timeout,
184
+ )
185
+
186
+ async def run_calc() -> CalcRunnerOutput:
187
+ runner = CalcRunner(runner_config)
188
+ result = await runner.run()
189
+ return result
190
+
191
+ def print_results(results: CalcRunnerOutput):
192
+
193
+ # Print header with target numbers
194
+ click.echo(f"Targets: LLM Latency ≤ {runner_config.target_llm_latency_p95}s, "
195
+ f"Workflow Runtime ≤ {runner_config.target_workflow_runtime_p95}s, "
196
+ f"Users = {runner_config.target_users}")
197
+ click.echo(f"Test parameters: GPUs = {runner_config.test_gpu_count}")
198
+
199
+ # Check if there are any GPU estimates to determine if we should show GPU estimate columns
200
+ has_llm_latency_gpu_estimates = any(data.gpu_estimates.gpu_estimate_by_llm_latency is not None
201
+ for data in results.calc_data.values())
202
+ has_wf_runtime_gpu_estimates = any(data.gpu_estimates.gpu_estimate_by_wf_runtime is not None
203
+ for data in results.calc_data.values())
204
+
205
+ # Check if there are any interrupted workflows or outliers to determine if we should show the alerts column
206
+ has_alerts = any(data.sizing_metrics.alerts.workflow_interrupted or data.alerts.outlier_llm_latency
207
+ or data.alerts.outlier_workflow_runtime for data in results.calc_data.values())
208
+
209
+ # Print per concurrency results as a table
210
+ click.echo("Per concurrency results:")
211
+
212
+ # Show alerts legend if there are any alerts
213
+ if has_alerts:
214
+ click.echo("Alerts!: W = Workflow interrupted, L = LLM latency outlier, R = Workflow runtime outlier")
215
+
216
+ table = []
217
+ for concurrency, data in results.calc_data.items():
218
+ metrics = data.sizing_metrics
219
+ gpu_estimates_per_concurrency = data.gpu_estimates
220
+ sizing_metrics_alerts = data.sizing_metrics.alerts
221
+ calc_alerts = data.alerts
222
+
223
+ row = []
224
+
225
+ # Only include alerts column if there are any interrupted workflows (first column)
226
+ if has_alerts:
227
+ alerts = []
228
+ if sizing_metrics_alerts.workflow_interrupted:
229
+ alerts.append("W")
230
+ if calc_alerts.outlier_llm_latency:
231
+ alerts.append("L")
232
+ if calc_alerts.outlier_workflow_runtime:
233
+ alerts.append("R")
234
+
235
+ # Show ! followed by all alert characters
236
+ if alerts:
237
+ row.append(f"!{''.join(alerts)}")
238
+ else:
239
+ row.append("")
240
+
241
+ row.extend([
242
+ concurrency,
243
+ metrics.llm_latency_p95,
244
+ metrics.workflow_runtime_p95,
245
+ metrics.total_runtime,
246
+ ])
247
+
248
+ # Only include GPU estimate columns if there are actual estimates of that type
249
+ if has_llm_latency_gpu_estimates:
250
+ row.append(gpu_estimates_per_concurrency.gpu_estimate_by_llm_latency)
251
+ if has_wf_runtime_gpu_estimates:
252
+ row.append(gpu_estimates_per_concurrency.gpu_estimate_by_wf_runtime)
253
+
254
+ table.append(row)
255
+
256
+ headers = []
257
+
258
+ # Only include alerts header if there are any alerts (first column)
259
+ if has_alerts:
260
+ headers.append("Alerts")
261
+
262
+ headers.extend([
263
+ "Concurrency",
264
+ "p95 LLM Latency",
265
+ "p95 WF Runtime",
266
+ "Total Runtime",
267
+ ])
268
+
269
+ # Only include GPU estimate headers if there are actual estimates of that type
270
+ if has_llm_latency_gpu_estimates:
271
+ headers.append("GPUs (LLM Latency, Rough)")
272
+ if has_wf_runtime_gpu_estimates:
273
+ headers.append("GPUs (WF Runtime, Rough)")
274
+
275
+ click.echo(tabulate(table, headers=headers, tablefmt="github"))
276
+
277
+ # Display slope-based GPU estimates at the end
278
+ click.echo("") # Add blank line for separation
279
+ click.echo(click.style("=== GPU ESTIMATES ===", fg="bright_blue", bold=True))
280
+ if results.gpu_estimates.gpu_estimate_by_wf_runtime is not None:
281
+ click.echo(
282
+ click.style(
283
+ f"Estimated GPU count (Workflow Runtime): {results.gpu_estimates.gpu_estimate_by_wf_runtime:.1f}",
284
+ fg="green",
285
+ bold=True))
286
+ if results.gpu_estimates.gpu_estimate_by_llm_latency is not None:
287
+ click.echo(
288
+ click.style(
289
+ f"Estimated GPU count (LLM Latency): {results.gpu_estimates.gpu_estimate_by_llm_latency:.1f}",
290
+ fg="green",
291
+ bold=True))
292
+
293
+ results = asyncio.run(run_calc())
294
+ print_results(results)
@@ -0,0 +1,27 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import click
17
+
18
+ from .calc import calc_command
19
+
20
+
21
+ @click.group(help="Size GPU clusters for workflows with the specified options.")
22
+ def sizing():
23
+ """Sizing-related commands."""
24
+ pass
25
+
26
+
27
+ sizing.add_command(calc_command)
aiq/cli/commands/start.py CHANGED
@@ -187,6 +187,7 @@ class StartCommandGroup(click.Group):
187
187
 
188
188
  config = validate_schema(config_dict, AIQConfig)
189
189
 
190
+ # Override default front end config with values from the config file for serverless execution modes.
190
191
  # Check that we have the right kind of front end
191
192
  if (not isinstance(config.general.front_end, front_end.config_type)):
192
193
  logger.warning(
aiq/cli/entrypoint.py CHANGED
@@ -34,6 +34,7 @@ from .commands.configure.configure import configure_command
34
34
  from .commands.evaluate import eval_command
35
35
  from .commands.info.info import info_command
36
36
  from .commands.registry.registry import registry_command
37
+ from .commands.sizing.sizing import sizing
37
38
  from .commands.start import start_command
38
39
  from .commands.uninstall import uninstall_command
39
40
  from .commands.validate import validate_command
@@ -105,6 +106,7 @@ cli.add_command(start_command, name="start")
105
106
  cli.add_command(uninstall_command, name="uninstall")
106
107
  cli.add_command(validate_command, name="validate")
107
108
  cli.add_command(workflow_command, name="workflow")
109
+ cli.add_command(sizing, name="sizing")
108
110
 
109
111
  # Aliases
110
112
  cli.add_command(start_command.get_command(None, "console"), name="run") # type: ignore
@@ -16,6 +16,8 @@
16
16
  from contextlib import asynccontextmanager
17
17
 
18
18
  from aiq.builder.framework_enum import LLMFrameworkEnum
19
+ from aiq.cli.type_registry import AuthProviderBuildCallableT
20
+ from aiq.cli.type_registry import AuthProviderRegisteredCallableT
19
21
  from aiq.cli.type_registry import EmbedderClientBuildCallableT
20
22
  from aiq.cli.type_registry import EmbedderClientRegisteredCallableT
21
23
  from aiq.cli.type_registry import EmbedderProviderBuildCallableT
@@ -26,6 +28,8 @@ from aiq.cli.type_registry import FrontEndBuildCallableT
26
28
  from aiq.cli.type_registry import FrontEndRegisteredCallableT
27
29
  from aiq.cli.type_registry import FunctionBuildCallableT
28
30
  from aiq.cli.type_registry import FunctionRegisteredCallableT
31
+ from aiq.cli.type_registry import ITSStrategyBuildCallableT
32
+ from aiq.cli.type_registry import ITSStrategyRegisterCallableT
29
33
  from aiq.cli.type_registry import LLMClientBuildCallableT
30
34
  from aiq.cli.type_registry import LLMClientRegisteredCallableT
31
35
  from aiq.cli.type_registry import LLMProviderBuildCallableT
@@ -34,6 +38,8 @@ from aiq.cli.type_registry import LoggingMethodConfigT
34
38
  from aiq.cli.type_registry import LoggingMethodRegisteredCallableT
35
39
  from aiq.cli.type_registry import MemoryBuildCallableT
36
40
  from aiq.cli.type_registry import MemoryRegisteredCallableT
41
+ from aiq.cli.type_registry import ObjectStoreBuildCallableT
42
+ from aiq.cli.type_registry import ObjectStoreRegisteredCallableT
37
43
  from aiq.cli.type_registry import RegisteredLoggingMethod
38
44
  from aiq.cli.type_registry import RegisteredTelemetryExporter
39
45
  from aiq.cli.type_registry import RegisteredToolWrapper
@@ -47,6 +53,7 @@ from aiq.cli.type_registry import TeleExporterRegisteredCallableT
47
53
  from aiq.cli.type_registry import TelemetryExporterBuildCallableT
48
54
  from aiq.cli.type_registry import TelemetryExporterConfigT
49
55
  from aiq.cli.type_registry import ToolWrapperBuildCallableT
56
+ from aiq.data_models.authentication import AuthProviderBaseConfigT
50
57
  from aiq.data_models.component import AIQComponentEnum
51
58
  from aiq.data_models.discovery_metadata import DiscoveryMetadata
52
59
  from aiq.data_models.embedder import EmbedderBaseConfigT
@@ -55,6 +62,7 @@ from aiq.data_models.front_end import FrontEndConfigT
55
62
  from aiq.data_models.function import FunctionConfigT
56
63
  from aiq.data_models.llm import LLMBaseConfigT
57
64
  from aiq.data_models.memory import MemoryBaseConfigT
65
+ from aiq.data_models.object_store import ObjectStoreBaseConfigT
58
66
  from aiq.data_models.registry_handler import RegistryHandlerBaseConfigT
59
67
  from aiq.data_models.retriever import RetrieverBaseConfigT
60
68
 
@@ -192,6 +200,30 @@ def register_llm_provider(config_type: type[LLMBaseConfigT]):
192
200
  return register_llm_provider_inner
193
201
 
194
202
 
203
+ def register_auth_provider(config_type: type[AuthProviderBaseConfigT]):
204
+
205
+ def register_auth_provider_inner(
206
+ fn: AuthProviderBuildCallableT[AuthProviderBaseConfigT]
207
+ ) -> AuthProviderRegisteredCallableT[AuthProviderBaseConfigT]:
208
+ from .type_registry import GlobalTypeRegistry
209
+ from .type_registry import RegisteredAuthProviderInfo
210
+
211
+ context_manager_fn = asynccontextmanager(fn)
212
+
213
+ discovery_metadata = DiscoveryMetadata.from_config_type(config_type=config_type,
214
+ component_type=AIQComponentEnum.AUTHENTICATION_PROVIDER)
215
+
216
+ GlobalTypeRegistry.get().register_auth_provider(
217
+ RegisteredAuthProviderInfo(full_type=config_type.full_type,
218
+ config_type=config_type,
219
+ build_fn=context_manager_fn,
220
+ discovery_metadata=discovery_metadata))
221
+
222
+ return context_manager_fn
223
+
224
+ return register_auth_provider_inner
225
+
226
+
195
227
  def register_llm_client(config_type: type[LLMBaseConfigT], wrapper_type: LLMFrameworkEnum | str):
196
228
 
197
229
  def register_llm_client_inner(
@@ -315,6 +347,54 @@ def register_memory(config_type: type[MemoryBaseConfigT]):
315
347
  return register_memory_inner
316
348
 
317
349
 
350
+ def register_object_store(config_type: type[ObjectStoreBaseConfigT]):
351
+
352
+ def register_kv_store_inner(
353
+ fn: ObjectStoreBuildCallableT[ObjectStoreBaseConfigT]
354
+ ) -> ObjectStoreRegisteredCallableT[ObjectStoreBaseConfigT]:
355
+ from .type_registry import GlobalTypeRegistry
356
+ from .type_registry import RegisteredObjectStoreInfo
357
+
358
+ context_manager_fn = asynccontextmanager(fn)
359
+
360
+ discovery_metadata = DiscoveryMetadata.from_config_type(config_type=config_type,
361
+ component_type=AIQComponentEnum.OBJECT_STORE)
362
+
363
+ GlobalTypeRegistry.get().register_object_store(
364
+ RegisteredObjectStoreInfo(full_type=config_type.full_type,
365
+ config_type=config_type,
366
+ build_fn=context_manager_fn,
367
+ discovery_metadata=discovery_metadata))
368
+
369
+ return context_manager_fn
370
+
371
+ return register_kv_store_inner
372
+
373
+
374
+ def register_its_strategy(config_type: type[ITSStrategyRegisterCallableT]):
375
+
376
+ def register_its_strategy_inner(
377
+ fn: ITSStrategyBuildCallableT[ITSStrategyRegisterCallableT]
378
+ ) -> ITSStrategyRegisterCallableT[ITSStrategyRegisterCallableT]:
379
+ from .type_registry import GlobalTypeRegistry
380
+ from .type_registry import RegisteredITSStrategyInfo
381
+
382
+ context_manager_fn = asynccontextmanager(fn)
383
+
384
+ discovery_metadata = DiscoveryMetadata.from_config_type(config_type=config_type,
385
+ component_type=AIQComponentEnum.ITS_STRATEGY)
386
+
387
+ GlobalTypeRegistry.get().register_its_strategy(
388
+ RegisteredITSStrategyInfo(full_type=config_type.full_type,
389
+ config_type=config_type,
390
+ build_fn=context_manager_fn,
391
+ discovery_metadata=discovery_metadata))
392
+
393
+ return context_manager_fn
394
+
395
+ return register_its_strategy_inner
396
+
397
+
318
398
  def register_retriever_provider(config_type: type[RetrieverBaseConfigT]):
319
399
 
320
400
  def register_retriever_provider_inner(