aiqtoolkit 1.1.0a20250429__py3-none-any.whl → 1.1.0a20250502__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiqtoolkit might be problematic. Click here for more details.
- aiq/agent/react_agent/register.py +2 -2
- aiq/agent/reasoning_agent/reasoning_agent.py +1 -1
- aiq/agent/rewoo_agent/register.py +2 -2
- aiq/builder/component_utils.py +5 -5
- aiq/builder/front_end.py +4 -4
- aiq/builder/function_base.py +4 -4
- aiq/builder/function_info.py +1 -1
- aiq/builder/intermediate_step_manager.py +10 -8
- aiq/builder/workflow_builder.py +1 -1
- aiq/cli/cli_utils/validation.py +1 -1
- aiq/cli/commands/configure/channel/add.py +1 -1
- aiq/cli/commands/configure/channel/channel.py +3 -1
- aiq/cli/commands/configure/channel/remove.py +1 -1
- aiq/cli/commands/configure/channel/update.py +1 -1
- aiq/cli/commands/configure/configure.py +2 -2
- aiq/cli/commands/info/info.py +2 -2
- aiq/cli/commands/info/list_components.py +2 -2
- aiq/cli/commands/registry/publish.py +3 -3
- aiq/cli/commands/registry/pull.py +3 -3
- aiq/cli/commands/registry/registry.py +3 -1
- aiq/cli/commands/registry/remove.py +3 -3
- aiq/cli/commands/registry/search.py +3 -3
- aiq/cli/commands/start.py +4 -4
- aiq/cli/commands/uninstall.py +2 -2
- aiq/cli/commands/workflow/templates/pyproject.toml.j2 +2 -2
- aiq/cli/commands/workflow/workflow_commands.py +14 -8
- aiq/cli/entrypoint.py +1 -1
- aiq/data_models/api_server.py +73 -57
- aiq/data_models/component_ref.py +7 -7
- aiq/data_models/discovery_metadata.py +7 -7
- aiq/data_models/intermediate_step.py +2 -2
- aiq/eval/register.py +1 -0
- aiq/eval/remote_workflow.py +1 -1
- aiq/eval/tunable_rag_evaluator/__init__.py +0 -0
- aiq/eval/tunable_rag_evaluator/evaluate.py +263 -0
- aiq/eval/tunable_rag_evaluator/register.py +50 -0
- aiq/front_ends/console/console_front_end_config.py +1 -1
- aiq/front_ends/fastapi/fastapi_front_end_config.py +5 -5
- aiq/front_ends/fastapi/fastapi_front_end_plugin_worker.py +27 -18
- aiq/front_ends/fastapi/response_helpers.py +33 -19
- aiq/memory/__init__.py +2 -2
- aiq/meta/pypi.md +18 -18
- aiq/observability/async_otel_listener.py +157 -10
- aiq/profiler/callbacks/agno_callback_handler.py +2 -2
- aiq/profiler/callbacks/langchain_callback_handler.py +1 -1
- aiq/profiler/callbacks/llama_index_callback_handler.py +1 -1
- aiq/profiler/callbacks/semantic_kernel_callback_handler.py +1 -1
- aiq/profiler/decorators/function_tracking.py +1 -1
- aiq/profiler/profile_runner.py +1 -1
- aiq/registry_handlers/local/local_handler.py +5 -5
- aiq/registry_handlers/local/register_local.py +1 -1
- aiq/registry_handlers/package_utils.py +2 -2
- aiq/registry_handlers/pypi/pypi_handler.py +5 -5
- aiq/registry_handlers/pypi/register_pypi.py +3 -3
- aiq/registry_handlers/registry_handler_base.py +7 -7
- aiq/registry_handlers/rest/register_rest.py +4 -4
- aiq/registry_handlers/rest/rest_handler.py +5 -5
- aiq/registry_handlers/schemas/package.py +1 -1
- aiq/registry_handlers/schemas/publish.py +4 -4
- aiq/registry_handlers/schemas/pull.py +5 -4
- aiq/registry_handlers/schemas/search.py +7 -7
- aiq/retriever/models.py +1 -1
- aiq/runtime/loader.py +6 -6
- aiq/tool/mcp/mcp_tool.py +3 -2
- aiq/tool/retriever.py +1 -1
- aiq/utils/io/yaml_tools.py +75 -6
- aiq/utils/settings/global_settings.py +1 -1
- {aiqtoolkit-1.1.0a20250429.dist-info → aiqtoolkit-1.1.0a20250502.dist-info}/METADATA +24 -21
- {aiqtoolkit-1.1.0a20250429.dist-info → aiqtoolkit-1.1.0a20250502.dist-info}/RECORD +74 -71
- {aiqtoolkit-1.1.0a20250429.dist-info → aiqtoolkit-1.1.0a20250502.dist-info}/WHEEL +1 -1
- {aiqtoolkit-1.1.0a20250429.dist-info → aiqtoolkit-1.1.0a20250502.dist-info}/entry_points.txt +0 -0
- {aiqtoolkit-1.1.0a20250429.dist-info → aiqtoolkit-1.1.0a20250502.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
- {aiqtoolkit-1.1.0a20250429.dist-info → aiqtoolkit-1.1.0a20250502.dist-info}/licenses/LICENSE.md +0 -0
- {aiqtoolkit-1.1.0a20250429.dist-info → aiqtoolkit-1.1.0a20250502.dist-info}/top_level.txt +0 -0
|
@@ -57,7 +57,7 @@ class AIQEvaluateStatusResponse(BaseModel):
|
|
|
57
57
|
|
|
58
58
|
class FastApiFrontEndConfig(FrontEndBaseConfig, name="fastapi"):
|
|
59
59
|
"""
|
|
60
|
-
A FastAPI based front end that allows an
|
|
60
|
+
A FastAPI based front end that allows an AIQ Toolkit workflow to be served as a microservice.
|
|
61
61
|
"""
|
|
62
62
|
|
|
63
63
|
class EndpointBase(BaseModel):
|
|
@@ -119,7 +119,7 @@ class FastApiFrontEndConfig(FrontEndBaseConfig, name="fastapi"):
|
|
|
119
119
|
path="/generate",
|
|
120
120
|
websocket_path="/websocket",
|
|
121
121
|
openai_api_path="/chat",
|
|
122
|
-
description="Executes the default
|
|
122
|
+
description="Executes the default AIQ Toolkit workflow from the loaded configuration ",
|
|
123
123
|
)
|
|
124
124
|
|
|
125
125
|
evaluate: typing.Annotated[EndpointBase, Field(description="Endpoint for evaluating workflows.")] = EndpointBase(
|
|
@@ -131,7 +131,7 @@ class FastApiFrontEndConfig(FrontEndBaseConfig, name="fastapi"):
|
|
|
131
131
|
endpoints: list[Endpoint] = Field(
|
|
132
132
|
default_factory=list,
|
|
133
133
|
description=(
|
|
134
|
-
"Additional endpoints to add to the FastAPI app which run functions within the
|
|
134
|
+
"Additional endpoints to add to the FastAPI app which run functions within the AIQ Toolkit configuration. "
|
|
135
135
|
"Each endpoint must have a unique path."))
|
|
136
136
|
|
|
137
137
|
cors: CrossOriginResourceSharing = Field(
|
|
@@ -144,7 +144,7 @@ class FastApiFrontEndConfig(FrontEndBaseConfig, name="fastapi"):
|
|
|
144
144
|
)
|
|
145
145
|
runner_class: str | None = Field(
|
|
146
146
|
default=None,
|
|
147
|
-
description=("The
|
|
148
|
-
"Each runner is responsible for loading and running the
|
|
147
|
+
description=("The AIQ Toolkit runner class to use when launching the FastAPI app from multiple processes. "
|
|
148
|
+
"Each runner is responsible for loading and running the AIQ Toolkit workflow. "
|
|
149
149
|
"Note: This is different from the worker class used by Gunicorn."),
|
|
150
150
|
)
|
|
@@ -49,7 +49,7 @@ from aiq.front_ends.fastapi.job_store import JobInfo
|
|
|
49
49
|
from aiq.front_ends.fastapi.job_store import JobStore
|
|
50
50
|
from aiq.front_ends.fastapi.response_helpers import generate_single_response
|
|
51
51
|
from aiq.front_ends.fastapi.response_helpers import generate_streaming_response_as_str
|
|
52
|
-
from aiq.front_ends.fastapi.response_helpers import
|
|
52
|
+
from aiq.front_ends.fastapi.response_helpers import generate_streaming_response_full_as_str
|
|
53
53
|
from aiq.front_ends.fastapi.step_adaptor import StepAdaptor
|
|
54
54
|
from aiq.front_ends.fastapi.websocket import AIQWebSocket
|
|
55
55
|
from aiq.runtime.session import AIQSessionManager
|
|
@@ -82,7 +82,7 @@ class FastApiFrontEndPluginWorkerBase(ABC):
|
|
|
82
82
|
@asynccontextmanager
|
|
83
83
|
async def lifespan(starting_app: FastAPI):
|
|
84
84
|
|
|
85
|
-
logger.debug("Starting
|
|
85
|
+
logger.debug("Starting AIQ Toolkit server from process %s", os.getpid())
|
|
86
86
|
|
|
87
87
|
async with WorkflowBuilder.from_config(self.config) as builder:
|
|
88
88
|
|
|
@@ -96,7 +96,7 @@ class FastApiFrontEndPluginWorkerBase(ABC):
|
|
|
96
96
|
logger.info("Cancelling cleanup task")
|
|
97
97
|
cleanup_task.cancel()
|
|
98
98
|
|
|
99
|
-
logger.debug("Closing
|
|
99
|
+
logger.debug("Closing AIQ Toolkit server from process %s", os.getpid())
|
|
100
100
|
|
|
101
101
|
aiq_app = FastAPI(lifespan=lifespan)
|
|
102
102
|
|
|
@@ -396,14 +396,16 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
|
|
|
396
396
|
|
|
397
397
|
def get_streaming_raw_endpoint(streaming: bool, result_type: type | None, output_type: type | None):
|
|
398
398
|
|
|
399
|
-
async def get_stream():
|
|
399
|
+
async def get_stream(filter_steps: str | None = None):
|
|
400
400
|
|
|
401
401
|
return StreamingResponse(headers={"Content-Type": "text/event-stream; charset=utf-8"},
|
|
402
|
-
content=
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
402
|
+
content=generate_streaming_response_full_as_str(
|
|
403
|
+
None,
|
|
404
|
+
session_manager=session_manager,
|
|
405
|
+
streaming=streaming,
|
|
406
|
+
result_type=result_type,
|
|
407
|
+
output_type=output_type,
|
|
408
|
+
filter_steps=filter_steps))
|
|
407
409
|
|
|
408
410
|
return get_stream
|
|
409
411
|
|
|
@@ -443,14 +445,16 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
|
|
|
443
445
|
Stream raw intermediate steps without any step adaptor translations.
|
|
444
446
|
"""
|
|
445
447
|
|
|
446
|
-
async def post_stream(payload: request_type):
|
|
448
|
+
async def post_stream(payload: request_type, filter_steps: str | None = None):
|
|
447
449
|
|
|
448
450
|
return StreamingResponse(headers={"Content-Type": "text/event-stream; charset=utf-8"},
|
|
449
|
-
content=
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
451
|
+
content=generate_streaming_response_full_as_str(
|
|
452
|
+
payload,
|
|
453
|
+
session_manager=session_manager,
|
|
454
|
+
streaming=streaming,
|
|
455
|
+
result_type=result_type,
|
|
456
|
+
output_type=output_type,
|
|
457
|
+
filter_steps=filter_steps))
|
|
454
458
|
|
|
455
459
|
return post_stream
|
|
456
460
|
|
|
@@ -478,11 +482,14 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
|
|
|
478
482
|
)
|
|
479
483
|
|
|
480
484
|
app.add_api_route(
|
|
481
|
-
path=f"{endpoint.path}/
|
|
485
|
+
path=f"{endpoint.path}/full",
|
|
482
486
|
endpoint=get_streaming_raw_endpoint(streaming=True,
|
|
483
487
|
result_type=GenerateStreamResponseType,
|
|
484
488
|
output_type=GenerateStreamResponseType),
|
|
485
489
|
methods=[endpoint.method],
|
|
490
|
+
description="Stream raw intermediate steps without any step adaptor translations.\n"
|
|
491
|
+
"Use filter_steps query parameter to filter steps by type (comma-separated list) or\
|
|
492
|
+
set to 'none' to suppress all intermediate steps.",
|
|
486
493
|
)
|
|
487
494
|
|
|
488
495
|
elif (endpoint.method == "POST"):
|
|
@@ -510,14 +517,16 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
|
|
|
510
517
|
)
|
|
511
518
|
|
|
512
519
|
app.add_api_route(
|
|
513
|
-
path=f"{endpoint.path}/
|
|
520
|
+
path=f"{endpoint.path}/full",
|
|
514
521
|
endpoint=post_streaming_raw_endpoint(request_type=GenerateBodyType,
|
|
515
522
|
streaming=True,
|
|
516
523
|
result_type=GenerateStreamResponseType,
|
|
517
524
|
output_type=GenerateStreamResponseType),
|
|
518
525
|
methods=[endpoint.method],
|
|
519
526
|
response_model=GenerateStreamResponseType,
|
|
520
|
-
description="Stream raw intermediate steps without any step adaptor translations"
|
|
527
|
+
description="Stream raw intermediate steps without any step adaptor translations.\n"
|
|
528
|
+
"Use filter_steps query parameter to filter steps by type (comma-separated list) or \
|
|
529
|
+
set to 'none' to suppress all intermediate steps.",
|
|
521
530
|
responses={500: response_500},
|
|
522
531
|
)
|
|
523
532
|
|
|
@@ -117,16 +117,26 @@ async def generate_single_response(
|
|
|
117
117
|
return await runner.result(to_type=result_type)
|
|
118
118
|
|
|
119
119
|
|
|
120
|
-
async def
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
120
|
+
async def generate_streaming_response_full(payload: typing.Any,
|
|
121
|
+
*,
|
|
122
|
+
session_manager: AIQSessionManager,
|
|
123
|
+
streaming: bool,
|
|
124
|
+
result_type: type | None = None,
|
|
125
|
+
output_type: type | None = None,
|
|
126
|
+
filter_steps: str | None = None) -> AsyncGenerator[AIQResponseSerializable]:
|
|
126
127
|
"""
|
|
127
128
|
Similar to generate_streaming_response but provides raw AIQResponseIntermediateStep objects
|
|
128
129
|
without any step adaptor translations.
|
|
129
130
|
"""
|
|
131
|
+
# Parse filter_steps into a set of allowed types if provided
|
|
132
|
+
# Special case: if filter_steps is "none", suppress all steps
|
|
133
|
+
allowed_types = None
|
|
134
|
+
if filter_steps:
|
|
135
|
+
if filter_steps.lower() == "none":
|
|
136
|
+
allowed_types = set() # Empty set means no steps allowed
|
|
137
|
+
else:
|
|
138
|
+
allowed_types = set(filter_steps.split(','))
|
|
139
|
+
|
|
130
140
|
async with session_manager.run(payload) as runner:
|
|
131
141
|
q: AsyncIOProducerConsumerQueue[AIQResponseSerializable] = AsyncIOProducerConsumerQueue()
|
|
132
142
|
|
|
@@ -150,7 +160,9 @@ async def generate_streaming_response_raw(payload: typing.Any,
|
|
|
150
160
|
|
|
151
161
|
async for item in q:
|
|
152
162
|
if (isinstance(item, AIQResponseIntermediateStep)):
|
|
153
|
-
|
|
163
|
+
# Filter intermediate steps if filter_steps is provided
|
|
164
|
+
if allowed_types is None or item.type in allowed_types:
|
|
165
|
+
yield item
|
|
154
166
|
else:
|
|
155
167
|
yield AIQResponsePayloadOutput(payload=item)
|
|
156
168
|
except Exception as e:
|
|
@@ -160,20 +172,22 @@ async def generate_streaming_response_raw(payload: typing.Any,
|
|
|
160
172
|
await q.close()
|
|
161
173
|
|
|
162
174
|
|
|
163
|
-
async def
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
175
|
+
async def generate_streaming_response_full_as_str(payload: typing.Any,
|
|
176
|
+
*,
|
|
177
|
+
session_manager: AIQSessionManager,
|
|
178
|
+
streaming: bool,
|
|
179
|
+
result_type: type | None = None,
|
|
180
|
+
output_type: type | None = None,
|
|
181
|
+
filter_steps: str | None = None) -> AsyncGenerator[str]:
|
|
169
182
|
"""
|
|
170
|
-
Similar to
|
|
183
|
+
Similar to generate_streaming_response but converts the response to a string format.
|
|
171
184
|
"""
|
|
172
|
-
async for item in
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
185
|
+
async for item in generate_streaming_response_full(payload,
|
|
186
|
+
session_manager=session_manager,
|
|
187
|
+
streaming=streaming,
|
|
188
|
+
result_type=result_type,
|
|
189
|
+
output_type=output_type,
|
|
190
|
+
filter_steps=filter_steps):
|
|
177
191
|
if (isinstance(item, AIQResponseIntermediateStep) or isinstance(item, AIQResponsePayloadOutput)):
|
|
178
192
|
yield item.get_stream_data()
|
|
179
193
|
else:
|
aiq/memory/__init__.py
CHANGED
|
@@ -13,8 +13,8 @@
|
|
|
13
13
|
# See the License for the specific language governing permissions and
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
"""
|
|
16
|
-
|
|
16
|
+
AIQ Toolkit Memory Module
|
|
17
17
|
|
|
18
18
|
This package provides foundational classes and interfaces
|
|
19
|
-
for managing text-based memory in
|
|
19
|
+
for managing text-based memory in AIQ Toolkit's LLM-based agents.
|
|
20
20
|
"""
|
aiq/meta/pypi.md
CHANGED
|
@@ -6,7 +6,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
6
6
|
you may not use this file except in compliance with the License.
|
|
7
7
|
You may obtain a copy of the License at
|
|
8
8
|
|
|
9
|
-
http
|
|
9
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
10
10
|
|
|
11
11
|
Unless required by applicable law or agreed to in writing, software
|
|
12
12
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
@@ -15,39 +15,39 @@ See the License for the specific language governing permissions and
|
|
|
15
15
|
limitations under the License.
|
|
16
16
|
-->
|
|
17
17
|
|
|
18
|
-

|
|
19
19
|
|
|
20
|
-
# NVIDIA
|
|
20
|
+
# NVIDIA Agent Intelligence Toolkit
|
|
21
21
|
|
|
22
|
-
|
|
22
|
+
AIQ Toolkit is a flexible library designed to seamlessly integrate your enterprise agents—regardless of framework—with various data sources and tools. By treating agents, tools, and agentic workflows as simple function calls, AIQ Toolkit enables true composability: build once and reuse anywhere.
|
|
23
23
|
|
|
24
24
|
## Key Features
|
|
25
25
|
|
|
26
|
-
- [**Framework Agnostic:**](https://docs.nvidia.com/
|
|
27
|
-
- [**Reusability:**](https://docs.nvidia.com/
|
|
28
|
-
- [**Rapid Development:**](https://docs.nvidia.com/
|
|
29
|
-
- [**Profiling:**](https://docs.nvidia.com/
|
|
30
|
-
- [**Observability:**](https://docs.nvidia.com/
|
|
31
|
-
- [**Evaluation System:**](https://docs.nvidia.com/
|
|
32
|
-
- [**User Interface:**](https://docs.nvidia.com/
|
|
33
|
-
- [**MCP Compatibility**](https://docs.nvidia.com/
|
|
26
|
+
- [**Framework Agnostic:**](https://docs.nvidia.com/aiqtoolkit/latest/concepts/plugins.html) Works with any agentic framework, so you can use your current technology stack without replatforming.
|
|
27
|
+
- [**Reusability:**](https://docs.nvidia.com/aiqtoolkit/latest/guides/sharing-workflows-and-tools.html) Every agent, tool, or workflow can be combined and repurposed, allowing developers to leverage existing work in new scenarios.
|
|
28
|
+
- [**Rapid Development:**](https://docs.nvidia.com/aiqtoolkit/latest/guides/create-customize-workflows.html) Start with a pre-built agent, tool, or workflow, and customize it to your needs.
|
|
29
|
+
- [**Profiling:**](https://docs.nvidia.com/aiqtoolkit/latest/guides/profiler.html) Profile entire workflows down to the tool and agent level, track input/output tokens and timings, and identify bottlenecks.
|
|
30
|
+
- [**Observability:**](https://docs.nvidia.com/aiqtoolkit/latest/guides/observe-workflow-with-phoenix.html) Monitor and debug your workflows with any OpenTelemetry-compatible observability tool.
|
|
31
|
+
- [**Evaluation System:**](https://docs.nvidia.com/aiqtoolkit/latest/guides/evaluate.html) Validate and maintain accuracy of agentic workflows with built-in evaluation tools.
|
|
32
|
+
- [**User Interface:**](https://docs.nvidia.com/aiqtoolkit/latest/guides/using-aiqtoolkit-ui-and-server.html) Use the AIQ Toolkit UI chat interface to interact with your agents, visualize output, and debug workflows.
|
|
33
|
+
- [**MCP Compatibility**](https://docs.nvidia.com/aiqtoolkit/latest/components/mcp.html) Compatible with Model Context Protocol (MCP), allowing tools served by MCP Servers to be used as AIQ Toolkit functions.
|
|
34
34
|
|
|
35
|
-
With
|
|
35
|
+
With AIQ Toolkit, you can move quickly, experiment freely, and ensure reliability across all your agent-driven projects.
|
|
36
36
|
|
|
37
37
|
## Links
|
|
38
|
-
* [Documentation](https://docs.nvidia.com/
|
|
39
|
-
* [About
|
|
38
|
+
* [Documentation](https://docs.nvidia.com/aiqtoolkit/latest/index.html): Explore the full documentation for AIQ Toolkit.
|
|
39
|
+
* [About AIQ Toolkit](https://docs.nvidia.com/aiqtoolkit/latest/intro/why-aiqtoolkit.html): Learn more about the benefits of using AIQ Toolkit.
|
|
40
40
|
|
|
41
41
|
## First time user?
|
|
42
|
-
If this is your first time using
|
|
42
|
+
If this is your first time using AIQ Toolkit, it is recommended to install the latest version from the [source repository](https://github.com/NVIDIA/AIQToolkit?tab=readme-ov-file#get-started) on GitHub. This package is intended for users who are familiar with AIQ Toolkit applications and need to add AIQ Toolkit as a dependency to their project.
|
|
43
43
|
|
|
44
44
|
## Feedback
|
|
45
45
|
|
|
46
|
-
We would love to hear from you! Please file an issue on [GitHub](https://github.com/NVIDIA/
|
|
46
|
+
We would love to hear from you! Please file an issue on [GitHub](https://github.com/NVIDIA/AIQToolkit/issues) if you have any feedback or feature requests.
|
|
47
47
|
|
|
48
48
|
## Acknowledgements
|
|
49
49
|
|
|
50
|
-
We would like to thank the following open source projects that made
|
|
50
|
+
We would like to thank the following open source projects that made AIQ Toolkit possible:
|
|
51
51
|
|
|
52
52
|
- [CrewAI](https://github.com/crewAIInc/crewAI)
|
|
53
53
|
- [FastAPI](https://github.com/tiangolo/fastapi)
|
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
import logging
|
|
17
17
|
import re
|
|
18
18
|
from contextlib import asynccontextmanager
|
|
19
|
+
from contextlib import contextmanager
|
|
19
20
|
from typing import Any
|
|
20
21
|
|
|
21
22
|
from openinference.semconv.trace import OpenInferenceSpanKindValues
|
|
@@ -30,6 +31,17 @@ from aiq.builder.context import AIQContextState
|
|
|
30
31
|
from aiq.data_models.intermediate_step import IntermediateStep
|
|
31
32
|
from aiq.data_models.intermediate_step import IntermediateStepState
|
|
32
33
|
|
|
34
|
+
try:
|
|
35
|
+
from weave.trace.context import weave_client_context
|
|
36
|
+
from weave.trace.context.call_context import get_current_call
|
|
37
|
+
from weave.trace.context.call_context import set_call_stack
|
|
38
|
+
from weave.trace.weave_client import Call
|
|
39
|
+
WEAVE_AVAILABLE = True
|
|
40
|
+
except ImportError:
|
|
41
|
+
WEAVE_AVAILABLE = False
|
|
42
|
+
# we simply don't do anything if weave is not available
|
|
43
|
+
pass
|
|
44
|
+
|
|
33
45
|
logger = logging.getLogger(__name__)
|
|
34
46
|
|
|
35
47
|
OPENINFERENCE_SPAN_KIND = SpanAttributes.OPENINFERENCE_SPAN_KIND
|
|
@@ -37,7 +49,7 @@ OPENINFERENCE_SPAN_KIND = SpanAttributes.OPENINFERENCE_SPAN_KIND
|
|
|
37
49
|
|
|
38
50
|
def _ns_timestamp(seconds_float: float) -> int:
|
|
39
51
|
"""
|
|
40
|
-
Convert
|
|
52
|
+
Convert AIQ Toolkit’s float `event_timestamp` (in seconds) into an integer number
|
|
41
53
|
of nanoseconds, as OpenTelemetry expects.
|
|
42
54
|
"""
|
|
43
55
|
return int(seconds_float * 1e9)
|
|
@@ -45,14 +57,14 @@ def _ns_timestamp(seconds_float: float) -> int:
|
|
|
45
57
|
|
|
46
58
|
class AsyncOtelSpanListener:
|
|
47
59
|
"""
|
|
48
|
-
A separate, async class that listens to the
|
|
60
|
+
A separate, async class that listens to the AIQ Toolkit intermediate step
|
|
49
61
|
event stream and creates proper Otel spans:
|
|
50
62
|
|
|
51
63
|
- On FUNCTION_START => open a new top-level span
|
|
52
64
|
- On any other intermediate step => open a child subspan (immediate open/close)
|
|
53
65
|
- On FUNCTION_END => close the function’s top-level span
|
|
54
66
|
|
|
55
|
-
This runs fully independently from the normal
|
|
67
|
+
This runs fully independently from the normal AIQ Toolkit workflow, so that
|
|
56
68
|
the workflow is not blocking or entangled by OTel calls.
|
|
57
69
|
"""
|
|
58
70
|
|
|
@@ -70,7 +82,7 @@ class AsyncOtelSpanListener:
|
|
|
70
82
|
self._outstanding_spans: dict[str, Span] = {}
|
|
71
83
|
|
|
72
84
|
# Stack of spans, for when we need to create a child span
|
|
73
|
-
self._span_stack:
|
|
85
|
+
self._span_stack: dict[str, Span] = {}
|
|
74
86
|
|
|
75
87
|
self._running = False
|
|
76
88
|
|
|
@@ -84,6 +96,17 @@ class AsyncOtelSpanListener:
|
|
|
84
96
|
|
|
85
97
|
self._tracer = trace.get_tracer("aiq-async-otel-listener")
|
|
86
98
|
|
|
99
|
+
# Initialize Weave-specific components if available
|
|
100
|
+
self.gc = None
|
|
101
|
+
self._weave_calls = {}
|
|
102
|
+
if WEAVE_AVAILABLE:
|
|
103
|
+
try:
|
|
104
|
+
# Try to get the weave client, but don't fail if Weave isn't initialized
|
|
105
|
+
self.gc = weave_client_context.require_weave_client()
|
|
106
|
+
except Exception:
|
|
107
|
+
# Weave is not initialized, so we don't do anything
|
|
108
|
+
pass
|
|
109
|
+
|
|
87
110
|
def _on_next(self, step: IntermediateStep) -> None:
|
|
88
111
|
"""
|
|
89
112
|
The main logic that reacts to each IntermediateStep.
|
|
@@ -109,11 +132,11 @@ class AsyncOtelSpanListener:
|
|
|
109
132
|
|
|
110
133
|
otel_listener = AsyncOtelSpanListener()
|
|
111
134
|
async with otel_listener.start():
|
|
112
|
-
# run your
|
|
135
|
+
# run your AIQ Toolkit workflow
|
|
113
136
|
...
|
|
114
137
|
# cleans up
|
|
115
138
|
|
|
116
|
-
This sets up the subscription to the
|
|
139
|
+
This sets up the subscription to the AIQ Toolkit event stream and starts the background loop.
|
|
117
140
|
"""
|
|
118
141
|
try:
|
|
119
142
|
# Subscribe to the event stream
|
|
@@ -152,13 +175,19 @@ class AsyncOtelSpanListener:
|
|
|
152
175
|
|
|
153
176
|
self._outstanding_spans.clear()
|
|
154
177
|
|
|
155
|
-
if self._span_stack:
|
|
178
|
+
if len(self._span_stack) > 0:
|
|
156
179
|
logger.error(
|
|
157
180
|
"Not all spans were closed. Ensure all start events have a corresponding end event. Remaining: %s",
|
|
158
181
|
self._span_stack)
|
|
159
182
|
|
|
160
183
|
self._span_stack.clear()
|
|
161
184
|
|
|
185
|
+
# Clean up any lingering Weave calls if Weave is available and initialized
|
|
186
|
+
if self.gc is not None and self._weave_calls:
|
|
187
|
+
for _, call in list(self._weave_calls.items()):
|
|
188
|
+
self.gc.finish_call(call, {"status": "incomplete"})
|
|
189
|
+
self._weave_calls.clear()
|
|
190
|
+
|
|
162
191
|
def _serialize_payload(self, input_value: Any) -> tuple[str, bool]:
|
|
163
192
|
"""
|
|
164
193
|
Serialize the input value to a string. Returns a tuple with the serialized value and a boolean indicating if the
|
|
@@ -175,7 +204,10 @@ class AsyncOtelSpanListener:
|
|
|
175
204
|
parent_ctx = None
|
|
176
205
|
|
|
177
206
|
if (len(self._span_stack) > 0):
|
|
178
|
-
parent_span = self._span_stack
|
|
207
|
+
parent_span = self._span_stack.get(step.function_ancestry.parent_id, None)
|
|
208
|
+
if parent_span is None:
|
|
209
|
+
logger.warning("No parent span found for step %s", step.UUID)
|
|
210
|
+
return
|
|
179
211
|
|
|
180
212
|
parent_ctx = set_span_in_context(parent_span)
|
|
181
213
|
|
|
@@ -230,10 +262,14 @@ class AsyncOtelSpanListener:
|
|
|
230
262
|
sub_span.set_attribute(SpanAttributes.INPUT_VALUE, serialized_input)
|
|
231
263
|
sub_span.set_attribute(SpanAttributes.INPUT_MIME_TYPE, "application/json" if is_json else "text/plain")
|
|
232
264
|
|
|
233
|
-
self._span_stack.
|
|
265
|
+
self._span_stack[step.UUID] = sub_span
|
|
234
266
|
|
|
235
267
|
self._outstanding_spans[step.UUID] = sub_span
|
|
236
268
|
|
|
269
|
+
# Create corresponding Weave call if Weave is available and initialized
|
|
270
|
+
if self.gc is not None:
|
|
271
|
+
self._create_weave_call(step, sub_span)
|
|
272
|
+
|
|
237
273
|
def _process_end_event(self, step: IntermediateStep):
|
|
238
274
|
|
|
239
275
|
# Find the subspan that was created in the start event
|
|
@@ -243,7 +279,7 @@ class AsyncOtelSpanListener:
|
|
|
243
279
|
logger.warning("No subspan found for step %s", step.UUID)
|
|
244
280
|
return
|
|
245
281
|
|
|
246
|
-
self._span_stack.pop()
|
|
282
|
+
self._span_stack.pop(step.UUID, None)
|
|
247
283
|
|
|
248
284
|
# Optionally add more attributes from usage_info or data
|
|
249
285
|
usage_info = step.payload.usage_info
|
|
@@ -268,3 +304,114 @@ class AsyncOtelSpanListener:
|
|
|
268
304
|
|
|
269
305
|
# End the subspan
|
|
270
306
|
sub_span.end(end_time=end_ns)
|
|
307
|
+
|
|
308
|
+
# Finish corresponding Weave call if Weave is available and initialized
|
|
309
|
+
if self.gc is not None:
|
|
310
|
+
self._finish_weave_call(step, sub_span)
|
|
311
|
+
|
|
312
|
+
@contextmanager
|
|
313
|
+
def parent_call(self, trace_id: str, parent_call_id: str):
|
|
314
|
+
"""Context manager to set a parent call context for Weave.
|
|
315
|
+
This allows connecting AIQ spans to existing traces from other frameworks.
|
|
316
|
+
"""
|
|
317
|
+
dummy_call = Call(trace_id=trace_id, id=parent_call_id, _op_name="", project_id="", parent_id=None, inputs={})
|
|
318
|
+
with set_call_stack([dummy_call]):
|
|
319
|
+
yield
|
|
320
|
+
|
|
321
|
+
def _create_weave_call(self, step: IntermediateStep, span: Span) -> None:
|
|
322
|
+
"""
|
|
323
|
+
Create a Weave call directly from the span and step data,
|
|
324
|
+
connecting to existing framework traces if available.
|
|
325
|
+
"""
|
|
326
|
+
# Check for existing Weave trace/call
|
|
327
|
+
existing_call = get_current_call()
|
|
328
|
+
|
|
329
|
+
# Extract parent call if applicable
|
|
330
|
+
parent_call = None
|
|
331
|
+
|
|
332
|
+
# If we have an existing Weave call from another framework (e.g., LangChain),
|
|
333
|
+
# use it as the parent
|
|
334
|
+
if existing_call is not None:
|
|
335
|
+
parent_call = existing_call
|
|
336
|
+
logger.debug(f"Found existing Weave call: {existing_call.id} from trace: {existing_call.trace_id}")
|
|
337
|
+
# Otherwise, check our internal stack for parent relationships
|
|
338
|
+
elif len(self._weave_calls) > 0 and len(self._span_stack) > 1:
|
|
339
|
+
# Get the parent span using stack position (one level up)
|
|
340
|
+
parent_span_id = self._span_stack[-2].get_span_context().span_id
|
|
341
|
+
# Find the corresponding weave call for this parent span
|
|
342
|
+
for uuid, call in self._weave_calls.items():
|
|
343
|
+
if getattr(call, "span_id", None) == parent_span_id:
|
|
344
|
+
parent_call = call
|
|
345
|
+
break
|
|
346
|
+
|
|
347
|
+
# Generate a meaningful operation name based on event type
|
|
348
|
+
event_type = step.payload.event_type.split(".")[-1]
|
|
349
|
+
if step.payload.name:
|
|
350
|
+
op_name = f"aiq.{event_type}.{step.payload.name}"
|
|
351
|
+
else:
|
|
352
|
+
op_name = f"aiq.{event_type}"
|
|
353
|
+
|
|
354
|
+
# Create input dictionary
|
|
355
|
+
inputs = {}
|
|
356
|
+
if step.payload.data and step.payload.data.input is not None:
|
|
357
|
+
try:
|
|
358
|
+
# Add the input to the Weave call
|
|
359
|
+
inputs["input"] = step.payload.data.input
|
|
360
|
+
except Exception:
|
|
361
|
+
# If serialization fails, use string representation
|
|
362
|
+
inputs["input"] = str(step.payload.data.input)
|
|
363
|
+
|
|
364
|
+
# Create the Weave call
|
|
365
|
+
call = self.gc.create_call(
|
|
366
|
+
op_name,
|
|
367
|
+
inputs=inputs,
|
|
368
|
+
parent=parent_call,
|
|
369
|
+
attributes=span.attributes,
|
|
370
|
+
display_name=op_name,
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
# Store the call with step UUID as key
|
|
374
|
+
self._weave_calls[step.UUID] = call
|
|
375
|
+
|
|
376
|
+
# Store span ID for parent reference
|
|
377
|
+
setattr(call, "span_id", span.get_span_context().span_id)
|
|
378
|
+
|
|
379
|
+
return call
|
|
380
|
+
|
|
381
|
+
def _finish_weave_call(self, step: IntermediateStep, span: Span) -> None:
|
|
382
|
+
"""
|
|
383
|
+
Finish a previously created Weave call
|
|
384
|
+
"""
|
|
385
|
+
# Find the call for this step
|
|
386
|
+
call = self._weave_calls.pop(step.UUID, None)
|
|
387
|
+
|
|
388
|
+
if call is None:
|
|
389
|
+
logger.warning("No Weave call found for step %s", step.UUID)
|
|
390
|
+
return
|
|
391
|
+
|
|
392
|
+
# Create output dictionary
|
|
393
|
+
outputs = {}
|
|
394
|
+
if step.payload.data and step.payload.data.output is not None:
|
|
395
|
+
try:
|
|
396
|
+
# Add the output to the Weave call
|
|
397
|
+
outputs["output"] = step.payload.data.output
|
|
398
|
+
except Exception:
|
|
399
|
+
# If serialization fails, use string representation
|
|
400
|
+
outputs["output"] = str(step.payload.data.output)
|
|
401
|
+
|
|
402
|
+
# Add usage information if available
|
|
403
|
+
usage_info = step.payload.usage_info
|
|
404
|
+
if usage_info:
|
|
405
|
+
if usage_info.token_usage:
|
|
406
|
+
outputs["prompt_tokens"] = usage_info.token_usage.prompt_tokens
|
|
407
|
+
outputs["completion_tokens"] = usage_info.token_usage.completion_tokens
|
|
408
|
+
outputs["total_tokens"] = usage_info.token_usage.total_tokens
|
|
409
|
+
|
|
410
|
+
if usage_info.num_llm_calls:
|
|
411
|
+
outputs["num_llm_calls"] = usage_info.num_llm_calls
|
|
412
|
+
|
|
413
|
+
if usage_info.seconds_between_calls:
|
|
414
|
+
outputs["seconds_between_calls"] = usage_info.seconds_between_calls
|
|
415
|
+
|
|
416
|
+
# Finish the call with outputs
|
|
417
|
+
self.gc.finish_call(call, outputs)
|
|
@@ -44,7 +44,7 @@ class AgnoProfilerHandler(BaseProfilerCallback):
|
|
|
44
44
|
- LLM Calls
|
|
45
45
|
|
|
46
46
|
to collect usage statistics (tokens, inputs, outputs, time intervals, etc.)
|
|
47
|
-
and store them in
|
|
47
|
+
and store them in AIQ Toolkit's usage_stats queue for subsequent analysis.
|
|
48
48
|
"""
|
|
49
49
|
|
|
50
50
|
def __init__(self) -> None:
|
|
@@ -73,7 +73,7 @@ class AgnoProfilerHandler(BaseProfilerCallback):
|
|
|
73
73
|
|
|
74
74
|
# Note: Agno doesn't have a class-based tool structure to patch directly.
|
|
75
75
|
# Instead, it uses decorators to convert functions to tools.
|
|
76
|
-
# In
|
|
76
|
+
# In AIQ Toolkit, tool executions are captured at the execute_agno_tool level
|
|
77
77
|
# in packages/aiqtoolkit_agno/src/aiq/plugins/agno/tool_wrapper.py
|
|
78
78
|
|
|
79
79
|
# To properly monitor Agno tool executions, we would need to either:
|
|
@@ -50,7 +50,7 @@ class LangchainProfilerHandler(AsyncCallbackHandler, BaseProfilerCallback): # p
|
|
|
50
50
|
completion_tokens: int = 0
|
|
51
51
|
successful_requests: int = 0
|
|
52
52
|
raise_error = True # Override to raise error and run inline
|
|
53
|
-
run_inline =
|
|
53
|
+
run_inline = True
|
|
54
54
|
|
|
55
55
|
def __init__(self) -> None:
|
|
56
56
|
super().__init__()
|
|
@@ -167,7 +167,7 @@ class LlamaIndexProfilerHandler(BaseCallbackHandler, BaseProfilerCallback):
|
|
|
167
167
|
except Exception as e:
|
|
168
168
|
logger.exception("Error getting model name: %s", e, exc_info=True)
|
|
169
169
|
|
|
170
|
-
# Append usage data to
|
|
170
|
+
# Append usage data to AIQ Toolkit usage stats
|
|
171
171
|
with self._lock:
|
|
172
172
|
stats = IntermediateStepPayload(
|
|
173
173
|
event_type=IntermediateStepType.LLM_END,
|
|
@@ -55,7 +55,7 @@ class SemanticKernelProfilerHandler(BaseProfilerCallback):
|
|
|
55
55
|
- Tool calls
|
|
56
56
|
|
|
57
57
|
to collect usage statistics (tokens, inputs, outputs, time intervals, etc.)
|
|
58
|
-
and store them in
|
|
58
|
+
and store them in AIQ Toolkit's usage_stats queue for subsequent analysis.
|
|
59
59
|
"""
|
|
60
60
|
|
|
61
61
|
def __init__(self, workflow_llms: dict) -> None:
|
|
@@ -61,7 +61,7 @@ def push_intermediate_step(step_manager: IntermediateStepManager,
|
|
|
61
61
|
kwargs: Any = None,
|
|
62
62
|
output: Any = None,
|
|
63
63
|
metadata: dict[str, Any] | None = None) -> None:
|
|
64
|
-
"""Push an intermediate step to the
|
|
64
|
+
"""Push an intermediate step to the AIQ Toolkit Event Stream."""
|
|
65
65
|
|
|
66
66
|
payload = IntermediateStepPayload(UUID=identifier,
|
|
67
67
|
event_type=event_type,
|
aiq/profiler/profile_runner.py
CHANGED
|
@@ -48,7 +48,7 @@ class InferenceOptimizationHolder(BaseModel):
|
|
|
48
48
|
|
|
49
49
|
class ProfilerRunner:
|
|
50
50
|
"""
|
|
51
|
-
A utility to run a series of prompts through an
|
|
51
|
+
A utility to run a series of prompts through an AIQ Toolkit workflow for profiling:
|
|
52
52
|
|
|
53
53
|
- can load prompts from a file
|
|
54
54
|
- or generate them via an LLM
|