aiqtoolkit 1.2.0rc2__py3-none-any.whl → 1.2.0rc4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiqtoolkit might be problematic. Click here for more details.
- aiq/agent/base.py +8 -7
- aiq/agent/react_agent/agent.py +2 -3
- aiq/agent/react_agent/register.py +1 -1
- aiq/agent/reasoning_agent/reasoning_agent.py +2 -1
- aiq/agent/tool_calling_agent/register.py +2 -1
- aiq/authentication/api_key/api_key_auth_provider.py +6 -2
- aiq/builder/function.py +21 -6
- aiq/builder/function_base.py +6 -2
- aiq/cli/commands/sizing/calc.py +6 -3
- aiq/cli/commands/start.py +0 -5
- aiq/cli/commands/uninstall.py +2 -4
- aiq/data_models/api_server.py +6 -12
- aiq/data_models/component_ref.py +1 -1
- aiq/data_models/discovery_metadata.py +62 -13
- aiq/front_ends/console/console_front_end_plugin.py +2 -22
- aiq/front_ends/simple_base/simple_front_end_plugin_base.py +4 -2
- aiq/object_store/in_memory_object_store.py +18 -16
- aiq/observability/exporter/processing_exporter.py +99 -46
- aiq/observability/exporter/span_exporter.py +1 -0
- aiq/observability/processor/batching_processor.py +52 -59
- aiq/observability/processor/callback_processor.py +42 -0
- aiq/observability/processor/processor.py +4 -1
- aiq/profiler/calc/calc_runner.py +5 -1
- aiq/profiler/calc/data_models.py +18 -6
- aiq/registry_handlers/package_utils.py +397 -28
- aiq/runtime/loader.py +23 -2
- aiq/tool/code_execution/README.md +0 -1
- aiq/tool/server_tools.py +1 -1
- aiq/utils/dump_distro_mapping.py +32 -0
- aiq/utils/type_converter.py +52 -10
- {aiqtoolkit-1.2.0rc2.dist-info → aiqtoolkit-1.2.0rc4.dist-info}/METADATA +1 -1
- {aiqtoolkit-1.2.0rc2.dist-info → aiqtoolkit-1.2.0rc4.dist-info}/RECORD +37 -35
- {aiqtoolkit-1.2.0rc2.dist-info → aiqtoolkit-1.2.0rc4.dist-info}/WHEEL +0 -0
- {aiqtoolkit-1.2.0rc2.dist-info → aiqtoolkit-1.2.0rc4.dist-info}/entry_points.txt +0 -0
- {aiqtoolkit-1.2.0rc2.dist-info → aiqtoolkit-1.2.0rc4.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
- {aiqtoolkit-1.2.0rc2.dist-info → aiqtoolkit-1.2.0rc4.dist-info}/licenses/LICENSE.md +0 -0
- {aiqtoolkit-1.2.0rc2.dist-info → aiqtoolkit-1.2.0rc4.dist-info}/top_level.txt +0 -0
|
@@ -17,6 +17,7 @@ import asyncio
|
|
|
17
17
|
import logging
|
|
18
18
|
from abc import abstractmethod
|
|
19
19
|
from collections.abc import Coroutine
|
|
20
|
+
from typing import Any
|
|
20
21
|
from typing import Generic
|
|
21
22
|
from typing import TypeVar
|
|
22
23
|
|
|
@@ -24,6 +25,7 @@ from aiq.builder.context import AIQContextState
|
|
|
24
25
|
from aiq.data_models.intermediate_step import IntermediateStep
|
|
25
26
|
from aiq.observability.exporter.base_exporter import BaseExporter
|
|
26
27
|
from aiq.observability.mixin.type_introspection_mixin import TypeIntrospectionMixin
|
|
28
|
+
from aiq.observability.processor.callback_processor import CallbackProcessor
|
|
27
29
|
from aiq.observability.processor.processor import Processor
|
|
28
30
|
from aiq.utils.type_utils import DecomposedType
|
|
29
31
|
from aiq.utils.type_utils import override
|
|
@@ -89,6 +91,14 @@ class ProcessingExporter(Generic[PipelineInputT, PipelineOutputT], BaseExporter,
|
|
|
89
91
|
self._processors[-1].output_type)
|
|
90
92
|
self._processors.append(processor)
|
|
91
93
|
|
|
94
|
+
# Set up pipeline continuation callback for processors that support it
|
|
95
|
+
if isinstance(processor, CallbackProcessor):
|
|
96
|
+
# Create a callback that continues processing through the rest of the pipeline
|
|
97
|
+
async def pipeline_callback(item):
|
|
98
|
+
await self._continue_pipeline_after(processor, item)
|
|
99
|
+
|
|
100
|
+
processor.set_done_callback(pipeline_callback)
|
|
101
|
+
|
|
92
102
|
def remove_processor(self, processor: Processor) -> None:
|
|
93
103
|
"""Remove a processor from the processing pipeline.
|
|
94
104
|
|
|
@@ -143,20 +153,82 @@ class ProcessingExporter(Generic[PipelineInputT, PipelineOutputT], BaseExporter,
|
|
|
143
153
|
"""Process item through all registered processors.
|
|
144
154
|
|
|
145
155
|
Args:
|
|
146
|
-
item: The item to process (starts as PipelineInputT, can transform to PipelineOutputT)
|
|
156
|
+
item (PipelineInputT): The item to process (starts as PipelineInputT, can transform to PipelineOutputT)
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
PipelineOutputT: The processed item after running through all processors
|
|
160
|
+
"""
|
|
161
|
+
return await self._process_through_processors(self._processors, item) # type: ignore
|
|
162
|
+
|
|
163
|
+
async def _process_through_processors(self, processors: list[Processor], item: Any) -> Any:
|
|
164
|
+
"""Process an item through a list of processors.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
processors (list[Processor]): List of processors to run the item through
|
|
168
|
+
item (Any): The item to process
|
|
147
169
|
|
|
148
170
|
Returns:
|
|
149
171
|
The processed item after running through all processors
|
|
150
172
|
"""
|
|
151
173
|
processed_item = item
|
|
152
|
-
for processor in
|
|
174
|
+
for processor in processors:
|
|
153
175
|
try:
|
|
154
176
|
processed_item = await processor.process(processed_item)
|
|
155
177
|
except Exception as e:
|
|
156
178
|
logger.error("Error in processor %s: %s", processor.__class__.__name__, e, exc_info=True)
|
|
157
|
-
# Continue with unprocessed item rather than failing
|
|
179
|
+
# Continue with unprocessed item rather than failing
|
|
180
|
+
return processed_item
|
|
181
|
+
|
|
182
|
+
async def _export_final_item(self, processed_item: Any, raise_on_invalid: bool = False) -> None:
|
|
183
|
+
"""Export a processed item with proper type handling.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
processed_item (Any): The item to export
|
|
187
|
+
raise_on_invalid (bool): If True, raise ValueError for invalid types instead of logging warning
|
|
188
|
+
"""
|
|
189
|
+
if isinstance(processed_item, list):
|
|
190
|
+
if len(processed_item) > 0:
|
|
191
|
+
await self.export_processed(processed_item)
|
|
192
|
+
else:
|
|
193
|
+
logger.debug("Skipping export of empty batch")
|
|
194
|
+
elif isinstance(processed_item, self.output_class):
|
|
195
|
+
await self.export_processed(processed_item)
|
|
196
|
+
else:
|
|
197
|
+
if raise_on_invalid:
|
|
198
|
+
raise ValueError(f"Processed item {processed_item} is not a valid output type. "
|
|
199
|
+
f"Expected {self.output_class} or list[{self.output_class}]")
|
|
200
|
+
logger.warning("Processed item %s is not a valid output type for export", processed_item)
|
|
201
|
+
|
|
202
|
+
async def _continue_pipeline_after(self, source_processor: Processor, item: Any) -> None:
|
|
203
|
+
"""Continue processing an item through the pipeline after a specific processor.
|
|
158
204
|
|
|
159
|
-
|
|
205
|
+
This is used when processors (like BatchingProcessor) need to inject items
|
|
206
|
+
back into the pipeline flow to continue through downstream processors.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
source_processor (Processor): The processor that generated the item
|
|
210
|
+
item (Any): The item to continue processing through the remaining pipeline
|
|
211
|
+
"""
|
|
212
|
+
try:
|
|
213
|
+
# Find the source processor's position
|
|
214
|
+
try:
|
|
215
|
+
source_index = self._processors.index(source_processor)
|
|
216
|
+
except ValueError:
|
|
217
|
+
logger.error("Source processor %s not found in pipeline", source_processor.__class__.__name__)
|
|
218
|
+
return
|
|
219
|
+
|
|
220
|
+
# Process through remaining processors (skip the source processor)
|
|
221
|
+
remaining_processors = self._processors[source_index + 1:]
|
|
222
|
+
processed_item = await self._process_through_processors(remaining_processors, item)
|
|
223
|
+
|
|
224
|
+
# Export the final result
|
|
225
|
+
await self._export_final_item(processed_item)
|
|
226
|
+
|
|
227
|
+
except Exception as e:
|
|
228
|
+
logger.error("Failed to continue pipeline processing after %s: %s",
|
|
229
|
+
source_processor.__class__.__name__,
|
|
230
|
+
e,
|
|
231
|
+
exc_info=True)
|
|
160
232
|
|
|
161
233
|
async def _export_with_processing(self, item: PipelineInputT) -> None:
|
|
162
234
|
"""Export an item after processing it through the pipeline.
|
|
@@ -169,20 +241,11 @@ class ProcessingExporter(Generic[PipelineInputT, PipelineOutputT], BaseExporter,
|
|
|
169
241
|
final_item: PipelineOutputT = await self._process_pipeline(item)
|
|
170
242
|
|
|
171
243
|
# Handle different output types from batch processors
|
|
172
|
-
if isinstance(final_item, list):
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
# Non-empty lists should be exported (batch processors)
|
|
179
|
-
await self.export_processed(final_item)
|
|
180
|
-
elif isinstance(final_item, self.output_class):
|
|
181
|
-
# Single items should be exported normally
|
|
182
|
-
await self.export_processed(final_item)
|
|
183
|
-
else:
|
|
184
|
-
raise ValueError(f"Processed item {final_item} is not a valid output type. "
|
|
185
|
-
f"Expected {self.output_class} or list[{self.output_class}]")
|
|
244
|
+
if isinstance(final_item, list) and len(final_item) == 0:
|
|
245
|
+
logger.debug("Skipping export of empty batch from processor pipeline")
|
|
246
|
+
return
|
|
247
|
+
|
|
248
|
+
await self._export_final_item(final_item, raise_on_invalid=True)
|
|
186
249
|
|
|
187
250
|
except Exception as e:
|
|
188
251
|
logger.error("Failed to export item '%s': %s", item, e, exc_info=True)
|
|
@@ -235,35 +298,25 @@ class ProcessingExporter(Generic[PipelineInputT, PipelineOutputT], BaseExporter,
|
|
|
235
298
|
|
|
236
299
|
@override
|
|
237
300
|
async def _cleanup(self):
|
|
238
|
-
"""Enhanced cleanup that shuts down all shutdown-aware processors.
|
|
301
|
+
"""Enhanced cleanup that shuts down all shutdown-aware processors.
|
|
302
|
+
|
|
303
|
+
Each processor is responsible for its own cleanup, including routing
|
|
304
|
+
any final batches through the remaining pipeline via their done callbacks.
|
|
305
|
+
"""
|
|
239
306
|
# Shutdown all processors that support it
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
# Process final batches from batch processors
|
|
255
|
-
for processor in getattr(self, '_processors', []):
|
|
256
|
-
if hasattr(processor, 'has_final_batch') and hasattr(processor, 'get_final_batch'):
|
|
257
|
-
if processor.has_final_batch():
|
|
258
|
-
final_batch = processor.get_final_batch()
|
|
259
|
-
if final_batch:
|
|
260
|
-
logger.info("Processing final batch of %d items from %s during cleanup",
|
|
261
|
-
len(final_batch),
|
|
262
|
-
processor.__class__.__name__)
|
|
263
|
-
try:
|
|
264
|
-
await self.export_processed(final_batch)
|
|
265
|
-
except Exception as e:
|
|
266
|
-
logger.error("Error processing final batch during cleanup: %s", e, exc_info=True)
|
|
307
|
+
shutdown_tasks = []
|
|
308
|
+
for processor in getattr(self, '_processors', []):
|
|
309
|
+
shutdown_method = getattr(processor, 'shutdown', None)
|
|
310
|
+
if shutdown_method:
|
|
311
|
+
logger.debug("Shutting down processor: %s", processor.__class__.__name__)
|
|
312
|
+
shutdown_tasks.append(shutdown_method())
|
|
313
|
+
|
|
314
|
+
if shutdown_tasks:
|
|
315
|
+
try:
|
|
316
|
+
await asyncio.gather(*shutdown_tasks, return_exceptions=True)
|
|
317
|
+
logger.info("Successfully shut down %d processors", len(shutdown_tasks))
|
|
318
|
+
except Exception as e:
|
|
319
|
+
logger.error("Error shutting down processors: %s", e, exc_info=True)
|
|
267
320
|
|
|
268
321
|
# Call parent cleanup
|
|
269
322
|
await super()._cleanup()
|
|
@@ -23,17 +23,17 @@ from typing import Any
|
|
|
23
23
|
from typing import Generic
|
|
24
24
|
from typing import TypeVar
|
|
25
25
|
|
|
26
|
-
from aiq.observability.processor.
|
|
26
|
+
from aiq.observability.processor.callback_processor import CallbackProcessor
|
|
27
27
|
|
|
28
28
|
logger = logging.getLogger(__name__)
|
|
29
29
|
|
|
30
30
|
T = TypeVar('T')
|
|
31
31
|
|
|
32
32
|
|
|
33
|
-
class BatchingProcessor(
|
|
33
|
+
class BatchingProcessor(CallbackProcessor[T, list[T]], Generic[T]):
|
|
34
34
|
"""Pass-through batching processor that accumulates items and outputs batched lists.
|
|
35
35
|
|
|
36
|
-
This processor
|
|
36
|
+
This processor extends CallbackProcessor[T, List[T]] to provide batching functionality.
|
|
37
37
|
It accumulates individual items and outputs them as batches when size or time thresholds
|
|
38
38
|
are met. The batched output continues through the processing pipeline.
|
|
39
39
|
|
|
@@ -43,25 +43,31 @@ class BatchingProcessor(Processor[T, list[T]], Generic[T]):
|
|
|
43
43
|
Key Features:
|
|
44
44
|
- Pass-through design: Processor[T, List[T]]
|
|
45
45
|
- Size-based and time-based batching
|
|
46
|
-
-
|
|
46
|
+
- Pipeline flow: batches continue through downstream processors
|
|
47
47
|
- GUARANTEED: No items lost during cleanup
|
|
48
48
|
- Comprehensive statistics and monitoring
|
|
49
49
|
- Proper cleanup and shutdown handling
|
|
50
50
|
- High-performance async implementation
|
|
51
51
|
- Back-pressure handling with queue limits
|
|
52
52
|
|
|
53
|
+
Pipeline Flow:
|
|
54
|
+
Normal processing: Individual items → BatchingProcessor → List[items] → downstream processors → export
|
|
55
|
+
Time-based flush: Scheduled batches automatically continue through remaining pipeline
|
|
56
|
+
Shutdown: Final batch immediately routed through remaining pipeline
|
|
57
|
+
|
|
53
58
|
Cleanup Guarantee:
|
|
54
|
-
When
|
|
59
|
+
When shutdown() is called, this processor:
|
|
55
60
|
1. Stops accepting new items
|
|
56
|
-
2.
|
|
57
|
-
3.
|
|
58
|
-
4. Ensures zero data loss
|
|
61
|
+
2. Creates final batch from all queued items
|
|
62
|
+
3. Immediately routes final batch through remaining pipeline via callback
|
|
63
|
+
4. Ensures zero data loss with no external coordination needed
|
|
59
64
|
|
|
60
65
|
Usage in Pipeline:
|
|
61
66
|
```python
|
|
62
|
-
# Individual spans → Batched spans → Continue
|
|
63
|
-
exporter.add_processor(BatchingProcessor[Span](batch_size=100))
|
|
64
|
-
exporter.add_processor(
|
|
67
|
+
# Individual spans → Batched spans → Continue through downstream processors
|
|
68
|
+
exporter.add_processor(BatchingProcessor[Span](batch_size=100)) # Auto-wired with pipeline callback
|
|
69
|
+
exporter.add_processor(FilterProcessor()) # Processes List[Span] from batching
|
|
70
|
+
exporter.add_processor(TransformProcessor()) # Further processing
|
|
65
71
|
```
|
|
66
72
|
|
|
67
73
|
Args:
|
|
@@ -70,6 +76,10 @@ class BatchingProcessor(Processor[T, list[T]], Generic[T]):
|
|
|
70
76
|
max_queue_size: Maximum items to queue before blocking (default: 1000)
|
|
71
77
|
drop_on_overflow: If True, drop items when queue is full (default: False)
|
|
72
78
|
shutdown_timeout: Max seconds to wait for final batch processing (default: 10.0)
|
|
79
|
+
|
|
80
|
+
Note:
|
|
81
|
+
The done_callback for pipeline integration is automatically set by ProcessingExporter
|
|
82
|
+
when the processor is added to a pipeline. For standalone usage, call set_done_callback().
|
|
73
83
|
"""
|
|
74
84
|
|
|
75
85
|
def __init__(self,
|
|
@@ -77,14 +87,13 @@ class BatchingProcessor(Processor[T, list[T]], Generic[T]):
|
|
|
77
87
|
flush_interval: float = 5.0,
|
|
78
88
|
max_queue_size: int = 1000,
|
|
79
89
|
drop_on_overflow: bool = False,
|
|
80
|
-
shutdown_timeout: float = 10.0
|
|
81
|
-
done_callback: Callable[[list[T]], Awaitable[None]] | None = None):
|
|
90
|
+
shutdown_timeout: float = 10.0):
|
|
82
91
|
self._batch_size = batch_size
|
|
83
92
|
self._flush_interval = flush_interval
|
|
84
93
|
self._max_queue_size = max_queue_size
|
|
85
94
|
self._drop_on_overflow = drop_on_overflow
|
|
86
95
|
self._shutdown_timeout = shutdown_timeout
|
|
87
|
-
self._done_callback =
|
|
96
|
+
self._done_callback: Callable[[list[T]], Awaitable[None]] | None = None
|
|
88
97
|
|
|
89
98
|
# Batching state
|
|
90
99
|
self._batch_queue: deque[T] = deque()
|
|
@@ -93,11 +102,7 @@ class BatchingProcessor(Processor[T, list[T]], Generic[T]):
|
|
|
93
102
|
self._batch_lock = asyncio.Lock()
|
|
94
103
|
self._shutdown_requested = False
|
|
95
104
|
self._shutdown_complete = False
|
|
96
|
-
self._shutdown_complete_event
|
|
97
|
-
|
|
98
|
-
# Final batch handling for cleanup
|
|
99
|
-
self._final_batch: list[T] | None = None
|
|
100
|
-
self._final_batch_processed = False
|
|
105
|
+
self._shutdown_complete_event = asyncio.Event()
|
|
101
106
|
|
|
102
107
|
# Callback for immediate export of scheduled batches
|
|
103
108
|
self._done = None
|
|
@@ -167,7 +172,11 @@ class BatchingProcessor(Processor[T, list[T]], Generic[T]):
|
|
|
167
172
|
return []
|
|
168
173
|
|
|
169
174
|
def set_done_callback(self, callback: Callable[[list[T]], Awaitable[None]]):
|
|
170
|
-
"""Set callback function for
|
|
175
|
+
"""Set callback function for routing batches through the remaining pipeline.
|
|
176
|
+
|
|
177
|
+
This is automatically set by ProcessingExporter.add_processor() to continue
|
|
178
|
+
batches through downstream processors before final export.
|
|
179
|
+
"""
|
|
171
180
|
self._done_callback = callback
|
|
172
181
|
|
|
173
182
|
async def _schedule_flush(self):
|
|
@@ -178,15 +187,15 @@ class BatchingProcessor(Processor[T, list[T]], Generic[T]):
|
|
|
178
187
|
if not self._shutdown_requested and self._batch_queue:
|
|
179
188
|
batch = await self._create_batch()
|
|
180
189
|
if batch:
|
|
181
|
-
#
|
|
190
|
+
# Route scheduled batches through pipeline via callback
|
|
182
191
|
if self._done_callback is not None:
|
|
183
192
|
try:
|
|
184
193
|
await self._done_callback(batch)
|
|
185
|
-
logger.debug("Scheduled flush
|
|
194
|
+
logger.debug("Scheduled flush routed batch of %d items through pipeline", len(batch))
|
|
186
195
|
except Exception as e:
|
|
187
|
-
logger.error("Error
|
|
196
|
+
logger.error("Error routing scheduled batch through pipeline: %s", e, exc_info=True)
|
|
188
197
|
else:
|
|
189
|
-
logger.warning("Scheduled flush created batch of %d items but no
|
|
198
|
+
logger.warning("Scheduled flush created batch of %d items but no pipeline callback set",
|
|
190
199
|
len(batch))
|
|
191
200
|
except asyncio.CancelledError:
|
|
192
201
|
pass
|
|
@@ -223,11 +232,8 @@ class BatchingProcessor(Processor[T, list[T]], Generic[T]):
|
|
|
223
232
|
"""Shutdown the processor and ensure all items are processed.
|
|
224
233
|
|
|
225
234
|
CRITICAL: This method is called by ProcessingExporter._cleanup() to ensure
|
|
226
|
-
no items are lost during shutdown. It
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
The final batch will be processed by the next process() call or can be
|
|
230
|
-
retrieved via get_final_batch().
|
|
235
|
+
no items are lost during shutdown. It immediately routes any remaining
|
|
236
|
+
items as a final batch through the rest of the processing pipeline.
|
|
231
237
|
"""
|
|
232
238
|
if self._shutdown_requested:
|
|
233
239
|
logger.debug("Shutdown already requested, waiting for completion")
|
|
@@ -251,13 +257,26 @@ class BatchingProcessor(Processor[T, list[T]], Generic[T]):
|
|
|
251
257
|
except asyncio.CancelledError:
|
|
252
258
|
pass
|
|
253
259
|
|
|
254
|
-
# Create final batch
|
|
260
|
+
# Create and route final batch through pipeline
|
|
255
261
|
async with self._batch_lock:
|
|
256
262
|
if self._batch_queue:
|
|
257
|
-
|
|
258
|
-
logger.info("Created final batch of %d items during shutdown", len(
|
|
263
|
+
final_batch = await self._create_batch()
|
|
264
|
+
logger.info("Created final batch of %d items during shutdown", len(final_batch))
|
|
265
|
+
|
|
266
|
+
# Route final batch through pipeline via callback
|
|
267
|
+
if self._done_callback is not None:
|
|
268
|
+
try:
|
|
269
|
+
await self._done_callback(final_batch)
|
|
270
|
+
logger.info("Successfully routed final batch of %d items through pipeline during shutdown",
|
|
271
|
+
len(final_batch))
|
|
272
|
+
except Exception as e:
|
|
273
|
+
logger.error("Error routing final batch through pipeline during shutdown: %s",
|
|
274
|
+
e,
|
|
275
|
+
exc_info=True)
|
|
276
|
+
else:
|
|
277
|
+
logger.warning("Final batch of %d items created during shutdown but no pipeline callback set",
|
|
278
|
+
len(final_batch))
|
|
259
279
|
else:
|
|
260
|
-
self._final_batch = []
|
|
261
280
|
logger.info("No items remaining during shutdown")
|
|
262
281
|
|
|
263
282
|
self._shutdown_complete = True
|
|
@@ -269,30 +288,6 @@ class BatchingProcessor(Processor[T, list[T]], Generic[T]):
|
|
|
269
288
|
self._shutdown_complete = True
|
|
270
289
|
self._shutdown_complete_event.set()
|
|
271
290
|
|
|
272
|
-
def get_final_batch(self) -> list[T]:
|
|
273
|
-
"""Get the final batch created during shutdown.
|
|
274
|
-
|
|
275
|
-
This method allows the exporter to retrieve and process any items
|
|
276
|
-
that were queued when shutdown was called.
|
|
277
|
-
|
|
278
|
-
Returns:
|
|
279
|
-
List[T]: Final batch of items, empty list if none
|
|
280
|
-
"""
|
|
281
|
-
if self._final_batch is not None:
|
|
282
|
-
final_batch = self._final_batch
|
|
283
|
-
self._final_batch = None # Clear to avoid double processing
|
|
284
|
-
self._final_batch_processed = True
|
|
285
|
-
return final_batch
|
|
286
|
-
return []
|
|
287
|
-
|
|
288
|
-
def has_final_batch(self) -> bool:
|
|
289
|
-
"""Check if there's a final batch waiting to be processed.
|
|
290
|
-
|
|
291
|
-
Returns:
|
|
292
|
-
bool: True if final batch exists and hasn't been processed
|
|
293
|
-
"""
|
|
294
|
-
return self._final_batch is not None and not self._final_batch_processed
|
|
295
|
-
|
|
296
291
|
def get_stats(self) -> dict[str, Any]:
|
|
297
292
|
"""Get comprehensive batching statistics."""
|
|
298
293
|
return {
|
|
@@ -309,8 +304,6 @@ class BatchingProcessor(Processor[T, list[T]], Generic[T]):
|
|
|
309
304
|
"shutdown_batches": self._shutdown_batches,
|
|
310
305
|
"shutdown_requested": self._shutdown_requested,
|
|
311
306
|
"shutdown_complete": self._shutdown_complete,
|
|
312
|
-
"final_batch_size": len(self._final_batch) if self._final_batch else 0,
|
|
313
|
-
"final_batch_processed": self._final_batch_processed,
|
|
314
307
|
"avg_items_per_batch": self._items_processed / max(1, self._batches_created),
|
|
315
308
|
"drop_rate": self._items_dropped / max(1, self._items_processed) * 100 if self._items_processed > 0 else 0
|
|
316
309
|
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
from abc import abstractmethod
|
|
17
|
+
from collections.abc import Awaitable
|
|
18
|
+
from collections.abc import Callable
|
|
19
|
+
from typing import Any
|
|
20
|
+
from typing import TypeVar
|
|
21
|
+
|
|
22
|
+
from aiq.observability.processor.processor import Processor
|
|
23
|
+
|
|
24
|
+
InputT = TypeVar('InputT')
|
|
25
|
+
OutputT = TypeVar('OutputT')
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class CallbackProcessor(Processor[InputT, OutputT]):
|
|
29
|
+
"""Abstract base class for processors that support done callbacks.
|
|
30
|
+
|
|
31
|
+
Processors inheriting from this class can register callbacks that are
|
|
32
|
+
invoked when items are ready for further processing or export.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
@abstractmethod
|
|
36
|
+
def set_done_callback(self, callback: Callable[[Any], Awaitable[None]]) -> None:
|
|
37
|
+
"""Set a callback function to be invoked when items are processed.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
callback (Callable[[Any], Awaitable[None]]): Function to call with processed items
|
|
41
|
+
"""
|
|
42
|
+
pass
|
|
@@ -63,6 +63,9 @@ class Processor(Generic[InputT, OutputT], TypeIntrospectionMixin, ABC):
|
|
|
63
63
|
"""Process an item and return a potentially different type.
|
|
64
64
|
|
|
65
65
|
Args:
|
|
66
|
-
item: The item to process
|
|
66
|
+
item (InputT): The item to process
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
OutputT: The processed item
|
|
67
70
|
"""
|
|
68
71
|
pass
|
aiq/profiler/calc/calc_runner.py
CHANGED
|
@@ -34,6 +34,7 @@ from aiq.profiler.calc.data_models import CalcData
|
|
|
34
34
|
from aiq.profiler.calc.data_models import CalcRunnerConfig
|
|
35
35
|
from aiq.profiler.calc.data_models import CalcRunnerOutput
|
|
36
36
|
from aiq.profiler.calc.data_models import FitConfig
|
|
37
|
+
from aiq.profiler.calc.data_models import FitResults
|
|
37
38
|
from aiq.profiler.calc.data_models import GPUEstimates
|
|
38
39
|
from aiq.profiler.calc.data_models import SizingMetricPerItem
|
|
39
40
|
from aiq.profiler.calc.data_models import SizingMetrics
|
|
@@ -408,7 +409,10 @@ class CalcRunner:
|
|
|
408
409
|
if gpu_estimates.gpu_estimate_by_llm_latency is not None:
|
|
409
410
|
logger.info("GPU estimate by LLM latency: %.2f", gpu_estimates.gpu_estimate_by_llm_latency)
|
|
410
411
|
|
|
411
|
-
return CalcRunnerOutput(gpu_estimates=gpu_estimates,
|
|
412
|
+
return CalcRunnerOutput(gpu_estimates=gpu_estimates,
|
|
413
|
+
calc_data=calc_data,
|
|
414
|
+
fit_results=FitResults(llm_latency_fit=self.linear_analyzer.llm_latency_fit,
|
|
415
|
+
wf_runtime_fit=self.linear_analyzer.wf_runtime_fit))
|
|
412
416
|
|
|
413
417
|
def plot_concurrency_vs_time_metrics(self, output_dir: Path):
|
|
414
418
|
"""Plots concurrency vs. time metrics using pre-computed fits."""
|
aiq/profiler/calc/data_models.py
CHANGED
|
@@ -17,6 +17,7 @@ import typing
|
|
|
17
17
|
from pathlib import Path
|
|
18
18
|
|
|
19
19
|
from pydantic import BaseModel
|
|
20
|
+
from pydantic import Field
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
class FitConfig(BaseModel):
|
|
@@ -76,7 +77,7 @@ class CalcRunnerConfig(BaseModel):
|
|
|
76
77
|
plot_data: bool = True
|
|
77
78
|
|
|
78
79
|
# Configuration for linear fit and outlier detection
|
|
79
|
-
fit_config: FitConfig = FitConfig
|
|
80
|
+
fit_config: FitConfig = Field(default_factory=FitConfig)
|
|
80
81
|
|
|
81
82
|
|
|
82
83
|
# Sizing metrics are gathered from the evaluation runs and used as input by the calculator.
|
|
@@ -103,7 +104,7 @@ class SizingMetrics(BaseModel):
|
|
|
103
104
|
Sizing metrics for a single concurrency.
|
|
104
105
|
"""
|
|
105
106
|
# alerts associated with the sizing metrics
|
|
106
|
-
alerts: SizingMetricsAlerts = SizingMetricsAlerts
|
|
107
|
+
alerts: SizingMetricsAlerts = Field(default_factory=SizingMetricsAlerts)
|
|
107
108
|
|
|
108
109
|
# p95 LLM latency
|
|
109
110
|
llm_latency_p95: float = 0.0
|
|
@@ -125,6 +126,14 @@ class LinearFitResult(BaseModel):
|
|
|
125
126
|
outliers_removed: list[int]
|
|
126
127
|
|
|
127
128
|
|
|
129
|
+
class FitResults(BaseModel):
|
|
130
|
+
"""
|
|
131
|
+
Linear fit results for both LLM latency and workflow runtime analysis.
|
|
132
|
+
"""
|
|
133
|
+
llm_latency_fit: LinearFitResult | None = None
|
|
134
|
+
wf_runtime_fit: LinearFitResult | None = None
|
|
135
|
+
|
|
136
|
+
|
|
128
137
|
# GPU estimates are generated by the calculator.
|
|
129
138
|
class GPUEstimates(BaseModel):
|
|
130
139
|
"""
|
|
@@ -158,11 +167,11 @@ class CalcData(BaseModel):
|
|
|
158
167
|
"""
|
|
159
168
|
# ROUGH GPU estimates per concurrency: these are not used for the final GPU estimation
|
|
160
169
|
# they are only available for information purposes
|
|
161
|
-
gpu_estimates: GPUEstimates = GPUEstimates
|
|
170
|
+
gpu_estimates: GPUEstimates = Field(default_factory=GPUEstimates)
|
|
162
171
|
# Calc runner alerts
|
|
163
|
-
alerts: CalcAlerts = CalcAlerts
|
|
172
|
+
alerts: CalcAlerts = Field(default_factory=CalcAlerts)
|
|
164
173
|
# Sizing metrics
|
|
165
|
-
sizing_metrics: SizingMetrics = SizingMetrics
|
|
174
|
+
sizing_metrics: SizingMetrics = Field(default_factory=SizingMetrics)
|
|
166
175
|
|
|
167
176
|
|
|
168
177
|
class CalcRunnerOutput(BaseModel):
|
|
@@ -170,7 +179,10 @@ class CalcRunnerOutput(BaseModel):
|
|
|
170
179
|
Output of the calc runner.
|
|
171
180
|
"""
|
|
172
181
|
# GPU estimates based on the slope of the time vs concurrency, calculated online or offline
|
|
173
|
-
gpu_estimates: GPUEstimates
|
|
182
|
+
gpu_estimates: GPUEstimates = Field(default_factory=GPUEstimates)
|
|
183
|
+
|
|
184
|
+
# Linear fit results for analysis and debugging
|
|
185
|
+
fit_results: FitResults = Field(default_factory=FitResults)
|
|
174
186
|
|
|
175
187
|
# Per-concurrency data (GPU estimates, out-of-range runs, and sizing metrics)
|
|
176
188
|
calc_data: dict[int, CalcData] = {}
|