aiqtoolkit 1.2.0a20250707__py3-none-any.whl → 1.2.0a20250730__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiqtoolkit might be problematic. Click here for more details.
- aiq/agent/base.py +171 -8
- aiq/agent/dual_node.py +1 -1
- aiq/agent/react_agent/agent.py +113 -113
- aiq/agent/react_agent/register.py +31 -14
- aiq/agent/rewoo_agent/agent.py +36 -35
- aiq/agent/rewoo_agent/register.py +2 -2
- aiq/agent/tool_calling_agent/agent.py +3 -7
- aiq/authentication/__init__.py +14 -0
- aiq/authentication/api_key/__init__.py +14 -0
- aiq/authentication/api_key/api_key_auth_provider.py +92 -0
- aiq/authentication/api_key/api_key_auth_provider_config.py +124 -0
- aiq/authentication/api_key/register.py +26 -0
- aiq/authentication/exceptions/__init__.py +14 -0
- aiq/authentication/exceptions/api_key_exceptions.py +38 -0
- aiq/authentication/exceptions/auth_code_grant_exceptions.py +86 -0
- aiq/authentication/exceptions/call_back_exceptions.py +38 -0
- aiq/authentication/exceptions/request_exceptions.py +54 -0
- aiq/authentication/http_basic_auth/__init__.py +0 -0
- aiq/authentication/http_basic_auth/http_basic_auth_provider.py +81 -0
- aiq/authentication/http_basic_auth/register.py +30 -0
- aiq/authentication/interfaces.py +93 -0
- aiq/authentication/oauth2/__init__.py +14 -0
- aiq/authentication/oauth2/oauth2_auth_code_flow_provider.py +107 -0
- aiq/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +39 -0
- aiq/authentication/oauth2/register.py +25 -0
- aiq/authentication/register.py +21 -0
- aiq/builder/builder.py +64 -2
- aiq/builder/component_utils.py +16 -3
- aiq/builder/context.py +26 -0
- aiq/builder/eval_builder.py +43 -2
- aiq/builder/function.py +32 -4
- aiq/builder/function_base.py +1 -1
- aiq/builder/intermediate_step_manager.py +6 -8
- aiq/builder/user_interaction_manager.py +3 -0
- aiq/builder/workflow.py +23 -18
- aiq/builder/workflow_builder.py +420 -73
- aiq/cli/commands/info/list_mcp.py +103 -16
- aiq/cli/commands/sizing/__init__.py +14 -0
- aiq/cli/commands/sizing/calc.py +294 -0
- aiq/cli/commands/sizing/sizing.py +27 -0
- aiq/cli/commands/start.py +1 -0
- aiq/cli/entrypoint.py +2 -0
- aiq/cli/register_workflow.py +80 -0
- aiq/cli/type_registry.py +151 -30
- aiq/data_models/api_server.py +117 -11
- aiq/data_models/authentication.py +231 -0
- aiq/data_models/common.py +35 -7
- aiq/data_models/component.py +17 -9
- aiq/data_models/component_ref.py +33 -0
- aiq/data_models/config.py +60 -3
- aiq/data_models/embedder.py +1 -0
- aiq/data_models/function_dependencies.py +8 -0
- aiq/data_models/interactive.py +10 -1
- aiq/data_models/intermediate_step.py +15 -5
- aiq/data_models/its_strategy.py +30 -0
- aiq/data_models/llm.py +1 -0
- aiq/data_models/memory.py +1 -0
- aiq/data_models/object_store.py +44 -0
- aiq/data_models/retry_mixin.py +35 -0
- aiq/data_models/span.py +187 -0
- aiq/data_models/telemetry_exporter.py +2 -2
- aiq/embedder/nim_embedder.py +2 -1
- aiq/embedder/openai_embedder.py +2 -1
- aiq/eval/config.py +19 -1
- aiq/eval/dataset_handler/dataset_handler.py +75 -1
- aiq/eval/evaluate.py +53 -10
- aiq/eval/rag_evaluator/evaluate.py +23 -12
- aiq/eval/remote_workflow.py +7 -2
- aiq/eval/runners/__init__.py +14 -0
- aiq/eval/runners/config.py +39 -0
- aiq/eval/runners/multi_eval_runner.py +54 -0
- aiq/eval/usage_stats.py +6 -0
- aiq/eval/utils/weave_eval.py +5 -1
- aiq/experimental/__init__.py +0 -0
- aiq/experimental/decorators/__init__.py +0 -0
- aiq/experimental/decorators/experimental_warning_decorator.py +130 -0
- aiq/experimental/inference_time_scaling/__init__.py +0 -0
- aiq/experimental/inference_time_scaling/editing/__init__.py +0 -0
- aiq/experimental/inference_time_scaling/editing/iterative_plan_refinement_editor.py +147 -0
- aiq/experimental/inference_time_scaling/editing/llm_as_a_judge_editor.py +204 -0
- aiq/experimental/inference_time_scaling/editing/motivation_aware_summarization.py +107 -0
- aiq/experimental/inference_time_scaling/functions/__init__.py +0 -0
- aiq/experimental/inference_time_scaling/functions/execute_score_select_function.py +105 -0
- aiq/experimental/inference_time_scaling/functions/its_tool_orchestration_function.py +205 -0
- aiq/experimental/inference_time_scaling/functions/its_tool_wrapper_function.py +146 -0
- aiq/experimental/inference_time_scaling/functions/plan_select_execute_function.py +224 -0
- aiq/experimental/inference_time_scaling/models/__init__.py +0 -0
- aiq/experimental/inference_time_scaling/models/editor_config.py +132 -0
- aiq/experimental/inference_time_scaling/models/its_item.py +48 -0
- aiq/experimental/inference_time_scaling/models/scoring_config.py +112 -0
- aiq/experimental/inference_time_scaling/models/search_config.py +120 -0
- aiq/experimental/inference_time_scaling/models/selection_config.py +154 -0
- aiq/experimental/inference_time_scaling/models/stage_enums.py +43 -0
- aiq/experimental/inference_time_scaling/models/strategy_base.py +66 -0
- aiq/experimental/inference_time_scaling/models/tool_use_config.py +41 -0
- aiq/experimental/inference_time_scaling/register.py +36 -0
- aiq/experimental/inference_time_scaling/scoring/__init__.py +0 -0
- aiq/experimental/inference_time_scaling/scoring/llm_based_agent_scorer.py +168 -0
- aiq/experimental/inference_time_scaling/scoring/llm_based_plan_scorer.py +168 -0
- aiq/experimental/inference_time_scaling/scoring/motivation_aware_scorer.py +111 -0
- aiq/experimental/inference_time_scaling/search/__init__.py +0 -0
- aiq/experimental/inference_time_scaling/search/multi_llm_planner.py +128 -0
- aiq/experimental/inference_time_scaling/search/multi_query_retrieval_search.py +122 -0
- aiq/experimental/inference_time_scaling/search/single_shot_multi_plan_planner.py +128 -0
- aiq/experimental/inference_time_scaling/selection/__init__.py +0 -0
- aiq/experimental/inference_time_scaling/selection/best_of_n_selector.py +63 -0
- aiq/experimental/inference_time_scaling/selection/llm_based_agent_output_selector.py +131 -0
- aiq/experimental/inference_time_scaling/selection/llm_based_output_merging_selector.py +159 -0
- aiq/experimental/inference_time_scaling/selection/llm_based_plan_selector.py +128 -0
- aiq/experimental/inference_time_scaling/selection/threshold_selector.py +58 -0
- aiq/front_ends/console/authentication_flow_handler.py +233 -0
- aiq/front_ends/console/console_front_end_plugin.py +11 -2
- aiq/front_ends/fastapi/auth_flow_handlers/__init__.py +0 -0
- aiq/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +27 -0
- aiq/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +107 -0
- aiq/front_ends/fastapi/fastapi_front_end_config.py +20 -0
- aiq/front_ends/fastapi/fastapi_front_end_controller.py +68 -0
- aiq/front_ends/fastapi/fastapi_front_end_plugin.py +14 -1
- aiq/front_ends/fastapi/fastapi_front_end_plugin_worker.py +353 -31
- aiq/front_ends/fastapi/html_snippets/__init__.py +14 -0
- aiq/front_ends/fastapi/html_snippets/auth_code_grant_success.py +35 -0
- aiq/front_ends/fastapi/main.py +2 -0
- aiq/front_ends/fastapi/message_handler.py +102 -84
- aiq/front_ends/fastapi/step_adaptor.py +2 -1
- aiq/llm/aws_bedrock_llm.py +2 -1
- aiq/llm/nim_llm.py +2 -1
- aiq/llm/openai_llm.py +2 -1
- aiq/object_store/__init__.py +20 -0
- aiq/object_store/in_memory_object_store.py +74 -0
- aiq/object_store/interfaces.py +84 -0
- aiq/object_store/models.py +36 -0
- aiq/object_store/register.py +20 -0
- aiq/observability/__init__.py +14 -0
- aiq/observability/exporter/__init__.py +14 -0
- aiq/observability/exporter/base_exporter.py +449 -0
- aiq/observability/exporter/exporter.py +78 -0
- aiq/observability/exporter/file_exporter.py +33 -0
- aiq/observability/exporter/processing_exporter.py +269 -0
- aiq/observability/exporter/raw_exporter.py +52 -0
- aiq/observability/exporter/span_exporter.py +264 -0
- aiq/observability/exporter_manager.py +335 -0
- aiq/observability/mixin/__init__.py +14 -0
- aiq/observability/mixin/batch_config_mixin.py +26 -0
- aiq/observability/mixin/collector_config_mixin.py +23 -0
- aiq/observability/mixin/file_mixin.py +288 -0
- aiq/observability/mixin/file_mode.py +23 -0
- aiq/observability/mixin/resource_conflict_mixin.py +134 -0
- aiq/observability/mixin/serialize_mixin.py +61 -0
- aiq/observability/mixin/type_introspection_mixin.py +183 -0
- aiq/observability/processor/__init__.py +14 -0
- aiq/observability/processor/batching_processor.py +316 -0
- aiq/observability/processor/intermediate_step_serializer.py +28 -0
- aiq/observability/processor/processor.py +68 -0
- aiq/observability/register.py +32 -116
- aiq/observability/utils/__init__.py +14 -0
- aiq/observability/utils/dict_utils.py +236 -0
- aiq/observability/utils/time_utils.py +31 -0
- aiq/profiler/calc/__init__.py +14 -0
- aiq/profiler/calc/calc_runner.py +623 -0
- aiq/profiler/calc/calculations.py +288 -0
- aiq/profiler/calc/data_models.py +176 -0
- aiq/profiler/calc/plot.py +345 -0
- aiq/profiler/data_models.py +2 -0
- aiq/profiler/profile_runner.py +16 -13
- aiq/runtime/loader.py +8 -2
- aiq/runtime/runner.py +23 -9
- aiq/runtime/session.py +16 -5
- aiq/tool/chat_completion.py +74 -0
- aiq/tool/code_execution/README.md +152 -0
- aiq/tool/code_execution/code_sandbox.py +151 -72
- aiq/tool/code_execution/local_sandbox/.gitignore +1 -0
- aiq/tool/code_execution/local_sandbox/local_sandbox_server.py +139 -24
- aiq/tool/code_execution/local_sandbox/sandbox.requirements.txt +3 -1
- aiq/tool/code_execution/local_sandbox/start_local_sandbox.sh +27 -2
- aiq/tool/code_execution/register.py +7 -3
- aiq/tool/code_execution/test_code_execution_sandbox.py +414 -0
- aiq/tool/mcp/exceptions.py +142 -0
- aiq/tool/mcp/mcp_client.py +17 -3
- aiq/tool/mcp/mcp_tool.py +1 -1
- aiq/tool/register.py +1 -0
- aiq/tool/server_tools.py +2 -2
- aiq/utils/exception_handlers/automatic_retries.py +289 -0
- aiq/utils/exception_handlers/mcp.py +211 -0
- aiq/utils/io/model_processing.py +28 -0
- aiq/utils/log_utils.py +37 -0
- aiq/utils/string_utils.py +38 -0
- aiq/utils/type_converter.py +18 -2
- aiq/utils/type_utils.py +87 -0
- {aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/METADATA +37 -9
- {aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/RECORD +195 -80
- {aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/entry_points.txt +3 -0
- aiq/front_ends/fastapi/websocket.py +0 -153
- aiq/observability/async_otel_listener.py +0 -470
- {aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/WHEEL +0 -0
- {aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
- {aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/licenses/LICENSE.md +0 -0
- {aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/top_level.txt +0 -0
|
@@ -15,18 +15,34 @@
|
|
|
15
15
|
|
|
16
16
|
import json
|
|
17
17
|
import logging
|
|
18
|
+
from typing import Any
|
|
18
19
|
|
|
19
20
|
import anyio
|
|
20
21
|
import click
|
|
21
22
|
|
|
23
|
+
from aiq.tool.mcp.exceptions import MCPError
|
|
22
24
|
from aiq.tool.mcp.mcp_client import MCPBuilder
|
|
25
|
+
from aiq.utils.exception_handlers.mcp import format_mcp_error
|
|
23
26
|
|
|
24
27
|
# Suppress verbose logs from mcp.client.sse and httpx
|
|
25
28
|
logging.getLogger("mcp.client.sse").setLevel(logging.WARNING)
|
|
26
29
|
logging.getLogger("httpx").setLevel(logging.WARNING)
|
|
27
30
|
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
28
32
|
|
|
29
|
-
|
|
33
|
+
|
|
34
|
+
def format_tool(tool: Any) -> dict[str, str | None]:
|
|
35
|
+
"""Format an MCP tool into a dictionary for display.
|
|
36
|
+
|
|
37
|
+
Extracts name, description, and input schema from various MCP tool object types
|
|
38
|
+
and normalizes them into a consistent dictionary format for CLI display.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
tool (Any): MCPToolClient or raw MCP Tool object (uses Any due to different types)
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
dict[str, str | None]: Dictionary with name, description, and input_schema as keys
|
|
45
|
+
"""
|
|
30
46
|
name = getattr(tool, 'name', None)
|
|
31
47
|
description = getattr(tool, 'description', '')
|
|
32
48
|
input_schema = getattr(tool, 'input_schema', None) or getattr(tool, 'inputSchema', None)
|
|
@@ -45,19 +61,43 @@ def format_tool(tool):
|
|
|
45
61
|
}
|
|
46
62
|
|
|
47
63
|
|
|
48
|
-
def print_tool(tool_dict, detail=False):
|
|
49
|
-
|
|
64
|
+
def print_tool(tool_dict: dict[str, str | None], detail: bool = False) -> None:
|
|
65
|
+
"""Print a formatted tool to the console with optional detailed information.
|
|
66
|
+
|
|
67
|
+
Outputs tool information in a user-friendly format to stdout. When detail=True
|
|
68
|
+
or when description/schema are available, shows full information with separator.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
tool_dict (dict[str, str | None]): Dictionary containing tool information with name, description, and
|
|
72
|
+
input_schema as keys
|
|
73
|
+
detail (bool, optional): Whether to force detailed output. Defaults to False.
|
|
74
|
+
"""
|
|
75
|
+
click.echo(f"Tool: {tool_dict.get('name', 'Unknown')}")
|
|
50
76
|
if detail or tool_dict.get('input_schema') or tool_dict.get('description'):
|
|
51
|
-
click.echo(f"Description: {tool_dict
|
|
52
|
-
if tool_dict
|
|
77
|
+
click.echo(f"Description: {tool_dict.get('description', 'No description available')}")
|
|
78
|
+
if tool_dict.get("input_schema"):
|
|
53
79
|
click.echo("Input Schema:")
|
|
54
|
-
click.echo(tool_dict
|
|
80
|
+
click.echo(tool_dict.get("input_schema"))
|
|
55
81
|
else:
|
|
56
82
|
click.echo("Input Schema: None")
|
|
57
83
|
click.echo("-" * 60)
|
|
58
84
|
|
|
59
85
|
|
|
60
|
-
async def list_tools_and_schemas(url, tool_name=None):
|
|
86
|
+
async def list_tools_and_schemas(url: str, tool_name: str | None = None) -> list[dict[str, str | None]]:
|
|
87
|
+
"""List MCP tools using MCPBuilder with structured exception handling.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
url (str): MCP server URL to connect to
|
|
91
|
+
tool_name (str | None, optional): Specific tool name to retrieve.
|
|
92
|
+
If None, retrieves all available tools. Defaults to None.
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
list[dict[str, str | None]]: List of formatted tool dictionaries, each containing name, description, and
|
|
96
|
+
input_schema as keys
|
|
97
|
+
|
|
98
|
+
Raises:
|
|
99
|
+
MCPError: Caught internally and logged, returns empty list instead
|
|
100
|
+
"""
|
|
61
101
|
builder = MCPBuilder(url=url)
|
|
62
102
|
try:
|
|
63
103
|
if tool_name:
|
|
@@ -66,12 +106,31 @@ async def list_tools_and_schemas(url, tool_name=None):
|
|
|
66
106
|
else:
|
|
67
107
|
tools = await builder.get_tools()
|
|
68
108
|
return [format_tool(tool) for tool in tools.values()]
|
|
69
|
-
except
|
|
70
|
-
|
|
109
|
+
except MCPError as e:
|
|
110
|
+
format_mcp_error(e, include_traceback=False)
|
|
71
111
|
return []
|
|
72
112
|
|
|
73
113
|
|
|
74
|
-
async def list_tools_direct(url, tool_name=None):
|
|
114
|
+
async def list_tools_direct(url: str, tool_name: str | None = None) -> list[dict[str, str | None]]:
|
|
115
|
+
"""List MCP tools using direct MCP protocol with exception conversion.
|
|
116
|
+
|
|
117
|
+
Bypasses MCPBuilder and uses raw MCP ClientSession and SSE client directly.
|
|
118
|
+
Converts raw exceptions to structured MCPErrors for consistent user experience.
|
|
119
|
+
Used when --direct flag is specified in CLI.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
url (str): MCP server URL to connect to
|
|
123
|
+
tool_name (str | None, optional): Specific tool name to retrieve.
|
|
124
|
+
If None, retrieves all available tools. Defaults to None.
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
list[dict[str, str | None]]: List of formatted tool dictionaries, each containing name, description, and
|
|
128
|
+
input_schema as keys
|
|
129
|
+
|
|
130
|
+
Note:
|
|
131
|
+
This function handles ExceptionGroup by extracting the most relevant exception
|
|
132
|
+
and converting it to MCPError for consistent error reporting.
|
|
133
|
+
"""
|
|
75
134
|
from mcp import ClientSession
|
|
76
135
|
from mcp.client.sse import sse_client
|
|
77
136
|
|
|
@@ -92,7 +151,17 @@ async def list_tools_direct(url, tool_name=None):
|
|
|
92
151
|
click.echo(f"[INFO] Tool '{tool_name}' not found.")
|
|
93
152
|
return tools
|
|
94
153
|
except Exception as e:
|
|
95
|
-
|
|
154
|
+
# Convert raw exceptions to structured MCPError for consistency
|
|
155
|
+
from aiq.utils.exception_handlers.mcp import convert_to_mcp_error
|
|
156
|
+
from aiq.utils.exception_handlers.mcp import extract_primary_exception
|
|
157
|
+
|
|
158
|
+
if isinstance(e, ExceptionGroup): # noqa: F821
|
|
159
|
+
primary_exception = extract_primary_exception(list(e.exceptions))
|
|
160
|
+
mcp_error = convert_to_mcp_error(primary_exception, url)
|
|
161
|
+
else:
|
|
162
|
+
mcp_error = convert_to_mcp_error(e, url)
|
|
163
|
+
|
|
164
|
+
format_mcp_error(mcp_error, include_traceback=False)
|
|
96
165
|
return []
|
|
97
166
|
|
|
98
167
|
|
|
@@ -103,10 +172,28 @@ async def list_tools_direct(url, tool_name=None):
|
|
|
103
172
|
@click.option('--detail', is_flag=True, help='Show full details for all tools')
|
|
104
173
|
@click.option('--json-output', is_flag=True, help='Output tool metadata in JSON format')
|
|
105
174
|
@click.pass_context
|
|
106
|
-
def list_mcp(ctx, direct, url, tool, detail, json_output):
|
|
107
|
-
"""
|
|
108
|
-
|
|
109
|
-
|
|
175
|
+
def list_mcp(ctx: click.Context, direct: bool, url: str, tool: str | None, detail: bool, json_output: bool) -> None:
|
|
176
|
+
"""List MCP tool names (default) or show detailed tool information.
|
|
177
|
+
|
|
178
|
+
Use --detail for full output including descriptions and input schemas.
|
|
179
|
+
If --tool is provided, always shows full output for that specific tool.
|
|
180
|
+
Use --direct to bypass MCPBuilder and use raw MCP protocol.
|
|
181
|
+
Use --json-output to get structured JSON data instead of formatted text.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
ctx (click.Context): Click context object for command invocation
|
|
185
|
+
direct (bool): Whether to bypass MCPBuilder and use direct MCP protocol
|
|
186
|
+
url (str): MCP server URL to connect to (default: http://localhost:9901/sse)
|
|
187
|
+
tool (str | None): Optional specific tool name to retrieve detailed info for
|
|
188
|
+
detail (bool): Whether to show full details (description + schema) for all tools
|
|
189
|
+
json_output (bool): Whether to output tool metadata in JSON format instead of text
|
|
190
|
+
|
|
191
|
+
Examples:
|
|
192
|
+
aiq info mcp # List tool names only
|
|
193
|
+
aiq info mcp --detail # Show all tools with full details
|
|
194
|
+
aiq info mcp --tool my_tool # Show details for specific tool
|
|
195
|
+
aiq info mcp --json-output # Get JSON format output
|
|
196
|
+
aiq info mcp --direct --url http://... # Use direct protocol with custom URL
|
|
110
197
|
"""
|
|
111
198
|
if ctx.invoked_subcommand is not None:
|
|
112
199
|
return
|
|
@@ -123,4 +210,4 @@ def list_mcp(ctx, direct, url, tool, detail, json_output):
|
|
|
123
210
|
print_tool(tool_dict, detail=True)
|
|
124
211
|
else:
|
|
125
212
|
for tool_dict in tools:
|
|
126
|
-
click.echo(tool_dict
|
|
213
|
+
click.echo(tool_dict.get('name', 'Unknown tool'))
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
import asyncio
|
|
17
|
+
import logging
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
|
|
20
|
+
import click
|
|
21
|
+
from tabulate import tabulate
|
|
22
|
+
|
|
23
|
+
from aiq.profiler.calc.calc_runner import CalcRunner
|
|
24
|
+
from aiq.profiler.calc.data_models import CalcRunnerConfig
|
|
25
|
+
from aiq.profiler.calc.data_models import CalcRunnerOutput
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@click.command("calc", help="Estimate GPU count and plot metrics for a workflow")
|
|
31
|
+
@click.option(
|
|
32
|
+
"--config_file",
|
|
33
|
+
type=click.Path(exists=True, file_okay=True, dir_okay=False, path_type=Path),
|
|
34
|
+
required=False,
|
|
35
|
+
default=None,
|
|
36
|
+
help="A YAML config file for the workflow and evaluation. This is not needed in offline mode.",
|
|
37
|
+
)
|
|
38
|
+
@click.option(
|
|
39
|
+
"--offline_mode",
|
|
40
|
+
is_flag=True,
|
|
41
|
+
required=False,
|
|
42
|
+
default=False,
|
|
43
|
+
help="Run in offline mode. This is used to estimate the GPU count for a workflow without running the workflow. ")
|
|
44
|
+
@click.option(
|
|
45
|
+
"--target_llm_latency",
|
|
46
|
+
type=float,
|
|
47
|
+
required=False,
|
|
48
|
+
default=0,
|
|
49
|
+
help="Target p95 LLM latency (seconds). Can be set to 0 to ignore.",
|
|
50
|
+
)
|
|
51
|
+
@click.option(
|
|
52
|
+
"--target_workflow_runtime",
|
|
53
|
+
type=float,
|
|
54
|
+
required=False,
|
|
55
|
+
default=0,
|
|
56
|
+
help="Target p95 workflow runtime (seconds). Can be set to 0 to ignore.",
|
|
57
|
+
)
|
|
58
|
+
@click.option(
|
|
59
|
+
"--target_users",
|
|
60
|
+
type=int,
|
|
61
|
+
required=False,
|
|
62
|
+
default=0,
|
|
63
|
+
help="Target number of users to support.",
|
|
64
|
+
)
|
|
65
|
+
@click.option(
|
|
66
|
+
"--test_gpu_count",
|
|
67
|
+
type=int,
|
|
68
|
+
required=False,
|
|
69
|
+
default=0,
|
|
70
|
+
help="Number of GPUs used in the test.",
|
|
71
|
+
)
|
|
72
|
+
@click.option(
|
|
73
|
+
"--calc_output_dir",
|
|
74
|
+
type=click.Path(file_okay=False, dir_okay=True, path_type=Path),
|
|
75
|
+
required=False,
|
|
76
|
+
default=None,
|
|
77
|
+
help="Directory to save plots and results (optional).",
|
|
78
|
+
)
|
|
79
|
+
@click.option(
|
|
80
|
+
"--concurrencies",
|
|
81
|
+
type=str,
|
|
82
|
+
required=False,
|
|
83
|
+
default="1,2,3,4,5,6,7,8,9,10",
|
|
84
|
+
help="Comma-separated list of concurrency values to test (e.g., 1,2,4,8). Default: 1,2,3,4,5,6,7,8,9,10",
|
|
85
|
+
)
|
|
86
|
+
@click.option(
|
|
87
|
+
"--num_passes",
|
|
88
|
+
type=int,
|
|
89
|
+
required=False,
|
|
90
|
+
default=0,
|
|
91
|
+
help="Number of passes at each concurrency for the evaluation."
|
|
92
|
+
" If set to 0 the dataset is adjusted to a multiple of the concurrency. Default: 0",
|
|
93
|
+
)
|
|
94
|
+
@click.option(
|
|
95
|
+
"--append_calc_outputs",
|
|
96
|
+
is_flag=True,
|
|
97
|
+
required=False,
|
|
98
|
+
default=False,
|
|
99
|
+
help="Append calc outputs to the output directory. "
|
|
100
|
+
"By default append is set to False and the content of the online directory is overwritten.",
|
|
101
|
+
)
|
|
102
|
+
@click.option(
|
|
103
|
+
"--endpoint",
|
|
104
|
+
type=str,
|
|
105
|
+
required=False,
|
|
106
|
+
default=None,
|
|
107
|
+
help="Endpoint to use for the workflow if it is remote(optional).",
|
|
108
|
+
)
|
|
109
|
+
@click.option(
|
|
110
|
+
"--endpoint_timeout",
|
|
111
|
+
type=int,
|
|
112
|
+
required=False,
|
|
113
|
+
default=300,
|
|
114
|
+
help="Timeout for the remote workflow endpoint in seconds (default: 300).",
|
|
115
|
+
)
|
|
116
|
+
@click.pass_context
|
|
117
|
+
def calc_command(ctx,
|
|
118
|
+
config_file,
|
|
119
|
+
offline_mode,
|
|
120
|
+
target_llm_latency,
|
|
121
|
+
target_workflow_runtime,
|
|
122
|
+
target_users,
|
|
123
|
+
test_gpu_count,
|
|
124
|
+
calc_output_dir,
|
|
125
|
+
concurrencies,
|
|
126
|
+
num_passes,
|
|
127
|
+
append_calc_outputs,
|
|
128
|
+
endpoint,
|
|
129
|
+
endpoint_timeout):
|
|
130
|
+
"""Estimate GPU count and plot metrics for a workflow profile."""
|
|
131
|
+
# Only use CLI concurrencies, with default
|
|
132
|
+
concurrencies_list = [int(x) for x in concurrencies.split(",") if x.strip()]
|
|
133
|
+
|
|
134
|
+
# Dont allow a concurrency of 0
|
|
135
|
+
if 0 in concurrencies_list:
|
|
136
|
+
click.echo("Concurrency of 0 is not allowed.")
|
|
137
|
+
return
|
|
138
|
+
|
|
139
|
+
# Check if the parameters are valid in online and offline mode
|
|
140
|
+
if offline_mode:
|
|
141
|
+
# In offline mode target test parameters are needed to estimate the GPU count
|
|
142
|
+
if target_llm_latency == 0 and target_workflow_runtime == 0:
|
|
143
|
+
click.echo("Both --target_llm_latency and --target_workflow_runtime are 0. "
|
|
144
|
+
"Cannot estimate the GPU count.")
|
|
145
|
+
return
|
|
146
|
+
if test_gpu_count <= 0:
|
|
147
|
+
click.echo("Test GPU count is 0. Cannot estimate the GPU count.")
|
|
148
|
+
return
|
|
149
|
+
if target_users <= 0:
|
|
150
|
+
click.echo("Target users is 0. Cannot estimate the GPU count.")
|
|
151
|
+
return
|
|
152
|
+
if append_calc_outputs:
|
|
153
|
+
click.echo("Appending calc outputs is not supported in offline mode.")
|
|
154
|
+
return
|
|
155
|
+
if not calc_output_dir:
|
|
156
|
+
click.echo("Output directory is required in offline mode.")
|
|
157
|
+
return
|
|
158
|
+
else:
|
|
159
|
+
if not config_file:
|
|
160
|
+
click.echo("Config file is required in online mode.")
|
|
161
|
+
return
|
|
162
|
+
if target_llm_latency == 0 and target_workflow_runtime == 0:
|
|
163
|
+
click.echo("Both --target_llm_latency and --target_workflow_runtime are 0. "
|
|
164
|
+
"GPU count will not be estimated.")
|
|
165
|
+
if test_gpu_count <= 0:
|
|
166
|
+
click.echo("Test GPU count is 0. Tests will be run but the GPU count will not be estimated.")
|
|
167
|
+
if target_users <= 0:
|
|
168
|
+
click.echo("Target users is 0. Tests will be run but the GPU count will not be estimated.")
|
|
169
|
+
|
|
170
|
+
# Build CalcRunnerConfig
|
|
171
|
+
runner_config = CalcRunnerConfig(
|
|
172
|
+
config_file=config_file,
|
|
173
|
+
concurrencies=concurrencies_list,
|
|
174
|
+
target_llm_latency_p95=target_llm_latency,
|
|
175
|
+
target_workflow_runtime_p95=target_workflow_runtime,
|
|
176
|
+
target_users=target_users,
|
|
177
|
+
test_gpu_count=test_gpu_count,
|
|
178
|
+
output_dir=calc_output_dir,
|
|
179
|
+
num_passes=num_passes,
|
|
180
|
+
offline_mode=offline_mode,
|
|
181
|
+
append_job=append_calc_outputs,
|
|
182
|
+
endpoint=endpoint,
|
|
183
|
+
endpoint_timeout=endpoint_timeout,
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
async def run_calc() -> CalcRunnerOutput:
|
|
187
|
+
runner = CalcRunner(runner_config)
|
|
188
|
+
result = await runner.run()
|
|
189
|
+
return result
|
|
190
|
+
|
|
191
|
+
def print_results(results: CalcRunnerOutput):
|
|
192
|
+
|
|
193
|
+
# Print header with target numbers
|
|
194
|
+
click.echo(f"Targets: LLM Latency ≤ {runner_config.target_llm_latency_p95}s, "
|
|
195
|
+
f"Workflow Runtime ≤ {runner_config.target_workflow_runtime_p95}s, "
|
|
196
|
+
f"Users = {runner_config.target_users}")
|
|
197
|
+
click.echo(f"Test parameters: GPUs = {runner_config.test_gpu_count}")
|
|
198
|
+
|
|
199
|
+
# Check if there are any GPU estimates to determine if we should show GPU estimate columns
|
|
200
|
+
has_llm_latency_gpu_estimates = any(data.gpu_estimates.gpu_estimate_by_llm_latency is not None
|
|
201
|
+
for data in results.calc_data.values())
|
|
202
|
+
has_wf_runtime_gpu_estimates = any(data.gpu_estimates.gpu_estimate_by_wf_runtime is not None
|
|
203
|
+
for data in results.calc_data.values())
|
|
204
|
+
|
|
205
|
+
# Check if there are any interrupted workflows or outliers to determine if we should show the alerts column
|
|
206
|
+
has_alerts = any(data.sizing_metrics.alerts.workflow_interrupted or data.alerts.outlier_llm_latency
|
|
207
|
+
or data.alerts.outlier_workflow_runtime for data in results.calc_data.values())
|
|
208
|
+
|
|
209
|
+
# Print per concurrency results as a table
|
|
210
|
+
click.echo("Per concurrency results:")
|
|
211
|
+
|
|
212
|
+
# Show alerts legend if there are any alerts
|
|
213
|
+
if has_alerts:
|
|
214
|
+
click.echo("Alerts!: W = Workflow interrupted, L = LLM latency outlier, R = Workflow runtime outlier")
|
|
215
|
+
|
|
216
|
+
table = []
|
|
217
|
+
for concurrency, data in results.calc_data.items():
|
|
218
|
+
metrics = data.sizing_metrics
|
|
219
|
+
gpu_estimates_per_concurrency = data.gpu_estimates
|
|
220
|
+
sizing_metrics_alerts = data.sizing_metrics.alerts
|
|
221
|
+
calc_alerts = data.alerts
|
|
222
|
+
|
|
223
|
+
row = []
|
|
224
|
+
|
|
225
|
+
# Only include alerts column if there are any interrupted workflows (first column)
|
|
226
|
+
if has_alerts:
|
|
227
|
+
alerts = []
|
|
228
|
+
if sizing_metrics_alerts.workflow_interrupted:
|
|
229
|
+
alerts.append("W")
|
|
230
|
+
if calc_alerts.outlier_llm_latency:
|
|
231
|
+
alerts.append("L")
|
|
232
|
+
if calc_alerts.outlier_workflow_runtime:
|
|
233
|
+
alerts.append("R")
|
|
234
|
+
|
|
235
|
+
# Show ! followed by all alert characters
|
|
236
|
+
if alerts:
|
|
237
|
+
row.append(f"!{''.join(alerts)}")
|
|
238
|
+
else:
|
|
239
|
+
row.append("")
|
|
240
|
+
|
|
241
|
+
row.extend([
|
|
242
|
+
concurrency,
|
|
243
|
+
metrics.llm_latency_p95,
|
|
244
|
+
metrics.workflow_runtime_p95,
|
|
245
|
+
metrics.total_runtime,
|
|
246
|
+
])
|
|
247
|
+
|
|
248
|
+
# Only include GPU estimate columns if there are actual estimates of that type
|
|
249
|
+
if has_llm_latency_gpu_estimates:
|
|
250
|
+
row.append(gpu_estimates_per_concurrency.gpu_estimate_by_llm_latency)
|
|
251
|
+
if has_wf_runtime_gpu_estimates:
|
|
252
|
+
row.append(gpu_estimates_per_concurrency.gpu_estimate_by_wf_runtime)
|
|
253
|
+
|
|
254
|
+
table.append(row)
|
|
255
|
+
|
|
256
|
+
headers = []
|
|
257
|
+
|
|
258
|
+
# Only include alerts header if there are any alerts (first column)
|
|
259
|
+
if has_alerts:
|
|
260
|
+
headers.append("Alerts")
|
|
261
|
+
|
|
262
|
+
headers.extend([
|
|
263
|
+
"Concurrency",
|
|
264
|
+
"p95 LLM Latency",
|
|
265
|
+
"p95 WF Runtime",
|
|
266
|
+
"Total Runtime",
|
|
267
|
+
])
|
|
268
|
+
|
|
269
|
+
# Only include GPU estimate headers if there are actual estimates of that type
|
|
270
|
+
if has_llm_latency_gpu_estimates:
|
|
271
|
+
headers.append("GPUs (LLM Latency, Rough)")
|
|
272
|
+
if has_wf_runtime_gpu_estimates:
|
|
273
|
+
headers.append("GPUs (WF Runtime, Rough)")
|
|
274
|
+
|
|
275
|
+
click.echo(tabulate(table, headers=headers, tablefmt="github"))
|
|
276
|
+
|
|
277
|
+
# Display slope-based GPU estimates at the end
|
|
278
|
+
click.echo("") # Add blank line for separation
|
|
279
|
+
click.echo(click.style("=== GPU ESTIMATES ===", fg="bright_blue", bold=True))
|
|
280
|
+
if results.gpu_estimates.gpu_estimate_by_wf_runtime is not None:
|
|
281
|
+
click.echo(
|
|
282
|
+
click.style(
|
|
283
|
+
f"Estimated GPU count (Workflow Runtime): {results.gpu_estimates.gpu_estimate_by_wf_runtime:.1f}",
|
|
284
|
+
fg="green",
|
|
285
|
+
bold=True))
|
|
286
|
+
if results.gpu_estimates.gpu_estimate_by_llm_latency is not None:
|
|
287
|
+
click.echo(
|
|
288
|
+
click.style(
|
|
289
|
+
f"Estimated GPU count (LLM Latency): {results.gpu_estimates.gpu_estimate_by_llm_latency:.1f}",
|
|
290
|
+
fg="green",
|
|
291
|
+
bold=True))
|
|
292
|
+
|
|
293
|
+
results = asyncio.run(run_calc())
|
|
294
|
+
print_results(results)
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
import click
|
|
17
|
+
|
|
18
|
+
from .calc import calc_command
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@click.group(help="Size GPU clusters for workflows with the specified options.")
|
|
22
|
+
def sizing():
|
|
23
|
+
"""Sizing-related commands."""
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
sizing.add_command(calc_command)
|
aiq/cli/commands/start.py
CHANGED
|
@@ -187,6 +187,7 @@ class StartCommandGroup(click.Group):
|
|
|
187
187
|
|
|
188
188
|
config = validate_schema(config_dict, AIQConfig)
|
|
189
189
|
|
|
190
|
+
# Override default front end config with values from the config file for serverless execution modes.
|
|
190
191
|
# Check that we have the right kind of front end
|
|
191
192
|
if (not isinstance(config.general.front_end, front_end.config_type)):
|
|
192
193
|
logger.warning(
|
aiq/cli/entrypoint.py
CHANGED
|
@@ -34,6 +34,7 @@ from .commands.configure.configure import configure_command
|
|
|
34
34
|
from .commands.evaluate import eval_command
|
|
35
35
|
from .commands.info.info import info_command
|
|
36
36
|
from .commands.registry.registry import registry_command
|
|
37
|
+
from .commands.sizing.sizing import sizing
|
|
37
38
|
from .commands.start import start_command
|
|
38
39
|
from .commands.uninstall import uninstall_command
|
|
39
40
|
from .commands.validate import validate_command
|
|
@@ -105,6 +106,7 @@ cli.add_command(start_command, name="start")
|
|
|
105
106
|
cli.add_command(uninstall_command, name="uninstall")
|
|
106
107
|
cli.add_command(validate_command, name="validate")
|
|
107
108
|
cli.add_command(workflow_command, name="workflow")
|
|
109
|
+
cli.add_command(sizing, name="sizing")
|
|
108
110
|
|
|
109
111
|
# Aliases
|
|
110
112
|
cli.add_command(start_command.get_command(None, "console"), name="run") # type: ignore
|
aiq/cli/register_workflow.py
CHANGED
|
@@ -16,6 +16,8 @@
|
|
|
16
16
|
from contextlib import asynccontextmanager
|
|
17
17
|
|
|
18
18
|
from aiq.builder.framework_enum import LLMFrameworkEnum
|
|
19
|
+
from aiq.cli.type_registry import AuthProviderBuildCallableT
|
|
20
|
+
from aiq.cli.type_registry import AuthProviderRegisteredCallableT
|
|
19
21
|
from aiq.cli.type_registry import EmbedderClientBuildCallableT
|
|
20
22
|
from aiq.cli.type_registry import EmbedderClientRegisteredCallableT
|
|
21
23
|
from aiq.cli.type_registry import EmbedderProviderBuildCallableT
|
|
@@ -26,6 +28,8 @@ from aiq.cli.type_registry import FrontEndBuildCallableT
|
|
|
26
28
|
from aiq.cli.type_registry import FrontEndRegisteredCallableT
|
|
27
29
|
from aiq.cli.type_registry import FunctionBuildCallableT
|
|
28
30
|
from aiq.cli.type_registry import FunctionRegisteredCallableT
|
|
31
|
+
from aiq.cli.type_registry import ITSStrategyBuildCallableT
|
|
32
|
+
from aiq.cli.type_registry import ITSStrategyRegisterCallableT
|
|
29
33
|
from aiq.cli.type_registry import LLMClientBuildCallableT
|
|
30
34
|
from aiq.cli.type_registry import LLMClientRegisteredCallableT
|
|
31
35
|
from aiq.cli.type_registry import LLMProviderBuildCallableT
|
|
@@ -34,6 +38,8 @@ from aiq.cli.type_registry import LoggingMethodConfigT
|
|
|
34
38
|
from aiq.cli.type_registry import LoggingMethodRegisteredCallableT
|
|
35
39
|
from aiq.cli.type_registry import MemoryBuildCallableT
|
|
36
40
|
from aiq.cli.type_registry import MemoryRegisteredCallableT
|
|
41
|
+
from aiq.cli.type_registry import ObjectStoreBuildCallableT
|
|
42
|
+
from aiq.cli.type_registry import ObjectStoreRegisteredCallableT
|
|
37
43
|
from aiq.cli.type_registry import RegisteredLoggingMethod
|
|
38
44
|
from aiq.cli.type_registry import RegisteredTelemetryExporter
|
|
39
45
|
from aiq.cli.type_registry import RegisteredToolWrapper
|
|
@@ -47,6 +53,7 @@ from aiq.cli.type_registry import TeleExporterRegisteredCallableT
|
|
|
47
53
|
from aiq.cli.type_registry import TelemetryExporterBuildCallableT
|
|
48
54
|
from aiq.cli.type_registry import TelemetryExporterConfigT
|
|
49
55
|
from aiq.cli.type_registry import ToolWrapperBuildCallableT
|
|
56
|
+
from aiq.data_models.authentication import AuthProviderBaseConfigT
|
|
50
57
|
from aiq.data_models.component import AIQComponentEnum
|
|
51
58
|
from aiq.data_models.discovery_metadata import DiscoveryMetadata
|
|
52
59
|
from aiq.data_models.embedder import EmbedderBaseConfigT
|
|
@@ -55,6 +62,7 @@ from aiq.data_models.front_end import FrontEndConfigT
|
|
|
55
62
|
from aiq.data_models.function import FunctionConfigT
|
|
56
63
|
from aiq.data_models.llm import LLMBaseConfigT
|
|
57
64
|
from aiq.data_models.memory import MemoryBaseConfigT
|
|
65
|
+
from aiq.data_models.object_store import ObjectStoreBaseConfigT
|
|
58
66
|
from aiq.data_models.registry_handler import RegistryHandlerBaseConfigT
|
|
59
67
|
from aiq.data_models.retriever import RetrieverBaseConfigT
|
|
60
68
|
|
|
@@ -192,6 +200,30 @@ def register_llm_provider(config_type: type[LLMBaseConfigT]):
|
|
|
192
200
|
return register_llm_provider_inner
|
|
193
201
|
|
|
194
202
|
|
|
203
|
+
def register_auth_provider(config_type: type[AuthProviderBaseConfigT]):
|
|
204
|
+
|
|
205
|
+
def register_auth_provider_inner(
|
|
206
|
+
fn: AuthProviderBuildCallableT[AuthProviderBaseConfigT]
|
|
207
|
+
) -> AuthProviderRegisteredCallableT[AuthProviderBaseConfigT]:
|
|
208
|
+
from .type_registry import GlobalTypeRegistry
|
|
209
|
+
from .type_registry import RegisteredAuthProviderInfo
|
|
210
|
+
|
|
211
|
+
context_manager_fn = asynccontextmanager(fn)
|
|
212
|
+
|
|
213
|
+
discovery_metadata = DiscoveryMetadata.from_config_type(config_type=config_type,
|
|
214
|
+
component_type=AIQComponentEnum.AUTHENTICATION_PROVIDER)
|
|
215
|
+
|
|
216
|
+
GlobalTypeRegistry.get().register_auth_provider(
|
|
217
|
+
RegisteredAuthProviderInfo(full_type=config_type.full_type,
|
|
218
|
+
config_type=config_type,
|
|
219
|
+
build_fn=context_manager_fn,
|
|
220
|
+
discovery_metadata=discovery_metadata))
|
|
221
|
+
|
|
222
|
+
return context_manager_fn
|
|
223
|
+
|
|
224
|
+
return register_auth_provider_inner
|
|
225
|
+
|
|
226
|
+
|
|
195
227
|
def register_llm_client(config_type: type[LLMBaseConfigT], wrapper_type: LLMFrameworkEnum | str):
|
|
196
228
|
|
|
197
229
|
def register_llm_client_inner(
|
|
@@ -315,6 +347,54 @@ def register_memory(config_type: type[MemoryBaseConfigT]):
|
|
|
315
347
|
return register_memory_inner
|
|
316
348
|
|
|
317
349
|
|
|
350
|
+
def register_object_store(config_type: type[ObjectStoreBaseConfigT]):
|
|
351
|
+
|
|
352
|
+
def register_kv_store_inner(
|
|
353
|
+
fn: ObjectStoreBuildCallableT[ObjectStoreBaseConfigT]
|
|
354
|
+
) -> ObjectStoreRegisteredCallableT[ObjectStoreBaseConfigT]:
|
|
355
|
+
from .type_registry import GlobalTypeRegistry
|
|
356
|
+
from .type_registry import RegisteredObjectStoreInfo
|
|
357
|
+
|
|
358
|
+
context_manager_fn = asynccontextmanager(fn)
|
|
359
|
+
|
|
360
|
+
discovery_metadata = DiscoveryMetadata.from_config_type(config_type=config_type,
|
|
361
|
+
component_type=AIQComponentEnum.OBJECT_STORE)
|
|
362
|
+
|
|
363
|
+
GlobalTypeRegistry.get().register_object_store(
|
|
364
|
+
RegisteredObjectStoreInfo(full_type=config_type.full_type,
|
|
365
|
+
config_type=config_type,
|
|
366
|
+
build_fn=context_manager_fn,
|
|
367
|
+
discovery_metadata=discovery_metadata))
|
|
368
|
+
|
|
369
|
+
return context_manager_fn
|
|
370
|
+
|
|
371
|
+
return register_kv_store_inner
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
def register_its_strategy(config_type: type[ITSStrategyRegisterCallableT]):
|
|
375
|
+
|
|
376
|
+
def register_its_strategy_inner(
|
|
377
|
+
fn: ITSStrategyBuildCallableT[ITSStrategyRegisterCallableT]
|
|
378
|
+
) -> ITSStrategyRegisterCallableT[ITSStrategyRegisterCallableT]:
|
|
379
|
+
from .type_registry import GlobalTypeRegistry
|
|
380
|
+
from .type_registry import RegisteredITSStrategyInfo
|
|
381
|
+
|
|
382
|
+
context_manager_fn = asynccontextmanager(fn)
|
|
383
|
+
|
|
384
|
+
discovery_metadata = DiscoveryMetadata.from_config_type(config_type=config_type,
|
|
385
|
+
component_type=AIQComponentEnum.ITS_STRATEGY)
|
|
386
|
+
|
|
387
|
+
GlobalTypeRegistry.get().register_its_strategy(
|
|
388
|
+
RegisteredITSStrategyInfo(full_type=config_type.full_type,
|
|
389
|
+
config_type=config_type,
|
|
390
|
+
build_fn=context_manager_fn,
|
|
391
|
+
discovery_metadata=discovery_metadata))
|
|
392
|
+
|
|
393
|
+
return context_manager_fn
|
|
394
|
+
|
|
395
|
+
return register_its_strategy_inner
|
|
396
|
+
|
|
397
|
+
|
|
318
398
|
def register_retriever_provider(config_type: type[RetrieverBaseConfigT]):
|
|
319
399
|
|
|
320
400
|
def register_retriever_provider_inner(
|