ai-pipeline-core 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +84 -4
- ai_pipeline_core/documents/__init__.py +9 -0
- ai_pipeline_core/documents/document.py +1044 -152
- ai_pipeline_core/documents/document_list.py +147 -38
- ai_pipeline_core/documents/flow_document.py +112 -11
- ai_pipeline_core/documents/mime_type.py +173 -15
- ai_pipeline_core/documents/task_document.py +117 -12
- ai_pipeline_core/documents/temporary_document.py +84 -5
- ai_pipeline_core/documents/utils.py +41 -9
- ai_pipeline_core/exceptions.py +47 -11
- ai_pipeline_core/flow/__init__.py +2 -0
- ai_pipeline_core/flow/config.py +236 -27
- ai_pipeline_core/flow/options.py +50 -1
- ai_pipeline_core/llm/__init__.py +6 -0
- ai_pipeline_core/llm/ai_messages.py +125 -27
- ai_pipeline_core/llm/client.py +278 -26
- ai_pipeline_core/llm/model_options.py +130 -1
- ai_pipeline_core/llm/model_response.py +239 -35
- ai_pipeline_core/llm/model_types.py +67 -0
- ai_pipeline_core/logging/__init__.py +13 -0
- ai_pipeline_core/logging/logging_config.py +72 -20
- ai_pipeline_core/logging/logging_mixin.py +38 -32
- ai_pipeline_core/pipeline.py +363 -60
- ai_pipeline_core/prefect.py +48 -1
- ai_pipeline_core/prompt_manager.py +209 -24
- ai_pipeline_core/settings.py +108 -4
- ai_pipeline_core/simple_runner/__init__.py +5 -0
- ai_pipeline_core/simple_runner/cli.py +96 -11
- ai_pipeline_core/simple_runner/simple_runner.py +237 -4
- ai_pipeline_core/tracing.py +253 -30
- ai_pipeline_core-0.1.12.dist-info/METADATA +450 -0
- ai_pipeline_core-0.1.12.dist-info/RECORD +36 -0
- ai_pipeline_core-0.1.10.dist-info/METADATA +0 -538
- ai_pipeline_core-0.1.10.dist-info/RECORD +0 -36
- {ai_pipeline_core-0.1.10.dist-info → ai_pipeline_core-0.1.12.dist-info}/WHEEL +0 -0
- {ai_pipeline_core-0.1.10.dist-info → ai_pipeline_core-0.1.12.dist-info}/licenses/LICENSE +0 -0
ai_pipeline_core/pipeline.py
CHANGED
|
@@ -1,51 +1,26 @@
|
|
|
1
|
-
"""
|
|
2
|
-
ai_pipeline_core.pipeline
|
|
3
|
-
=========================
|
|
4
|
-
|
|
5
|
-
Tiny wrappers around Prefect's public ``@task`` and ``@flow`` that add our
|
|
6
|
-
``trace`` decorator and **require async functions**.
|
|
7
|
-
|
|
8
|
-
Why this exists
|
|
9
|
-
---------------
|
|
10
|
-
Prefect tasks/flows are awaitable at runtime, but their public type stubs
|
|
11
|
-
don’t declare that clearly. We therefore:
|
|
12
|
-
|
|
13
|
-
1) Return the **real Prefect objects** (so you keep every Prefect method).
|
|
14
|
-
2) Type them as small Protocols that say “this is awaitable and has common
|
|
15
|
-
helpers like `.submit`/`.map`”.
|
|
1
|
+
"""Pipeline decorators with Prefect integration and tracing.
|
|
16
2
|
|
|
17
|
-
|
|
18
|
-
leaking advanced typing constructs (like ``ParamSpec``) that confuse tools
|
|
19
|
-
that introspect callables (e.g., Pydantic).
|
|
3
|
+
@public
|
|
20
4
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
from ai_pipeline_core.pipeline import pipeline_task, pipeline_flow
|
|
24
|
-
from ai_pipeline_core.documents import DocumentList
|
|
25
|
-
from ai_pipeline_core.flow.options import FlowOptions
|
|
26
|
-
|
|
27
|
-
@pipeline_task
|
|
28
|
-
async def add(x: int, y: int) -> int:
|
|
29
|
-
return x + y
|
|
30
|
-
|
|
31
|
-
@pipeline_flow
|
|
32
|
-
async def my_flow(project_name: str, docs: DocumentList, opts: FlowOptions) -> DocumentList:
|
|
33
|
-
await add(1, 2) # awaitable and typed
|
|
34
|
-
return docs
|
|
35
|
-
|
|
36
|
-
Rules
|
|
37
|
-
-----
|
|
38
|
-
• Your decorated function **must** be ``async def``.
|
|
39
|
-
• ``@pipeline_flow`` functions must accept at least:
|
|
40
|
-
(project_name: str, documents: DocumentList, flow_options: FlowOptions | subclass).
|
|
41
|
-
• Both wrappers return the same Prefect objects you’d get from Prefect directly.
|
|
5
|
+
Wrappers around Prefect's @task and @flow that add Laminar tracing
|
|
6
|
+
and enforce async-only execution for consistency.
|
|
42
7
|
"""
|
|
43
8
|
|
|
44
9
|
from __future__ import annotations
|
|
45
10
|
|
|
46
11
|
import datetime
|
|
47
12
|
import inspect
|
|
48
|
-
from typing import
|
|
13
|
+
from typing import (
|
|
14
|
+
Any,
|
|
15
|
+
Callable,
|
|
16
|
+
Coroutine,
|
|
17
|
+
Iterable,
|
|
18
|
+
Protocol,
|
|
19
|
+
TypeVar,
|
|
20
|
+
Union,
|
|
21
|
+
cast,
|
|
22
|
+
overload,
|
|
23
|
+
)
|
|
49
24
|
|
|
50
25
|
from prefect.assets import Asset
|
|
51
26
|
from prefect.cache_policies import CachePolicy
|
|
@@ -79,7 +54,27 @@ FO_contra = TypeVar("FO_contra", bound=FlowOptions, contravariant=True)
|
|
|
79
54
|
|
|
80
55
|
|
|
81
56
|
class _TaskLike(Protocol[R_co]):
|
|
82
|
-
"""
|
|
57
|
+
"""Protocol for type-safe Prefect task representation.
|
|
58
|
+
|
|
59
|
+
Defines the minimal interface for a Prefect task as seen by
|
|
60
|
+
type checkers. Ensures tasks are awaitable and have common
|
|
61
|
+
Prefect task methods.
|
|
62
|
+
|
|
63
|
+
Type Parameter:
|
|
64
|
+
R_co: Covariant return type of the task.
|
|
65
|
+
|
|
66
|
+
Methods:
|
|
67
|
+
__call__: Makes the task awaitable.
|
|
68
|
+
submit: Submit task for asynchronous execution.
|
|
69
|
+
map: Map task over multiple inputs.
|
|
70
|
+
|
|
71
|
+
Attributes:
|
|
72
|
+
name: Optional task name.
|
|
73
|
+
|
|
74
|
+
Note:
|
|
75
|
+
This is a typing Protocol, not a runtime class.
|
|
76
|
+
__getattr__ allows accessing Prefect-specific helpers.
|
|
77
|
+
"""
|
|
83
78
|
|
|
84
79
|
def __call__(self, *args: Any, **kwargs: Any) -> Coroutine[Any, Any, R_co]: ...
|
|
85
80
|
|
|
@@ -91,7 +86,27 @@ class _TaskLike(Protocol[R_co]):
|
|
|
91
86
|
|
|
92
87
|
|
|
93
88
|
class _DocumentsFlowCallable(Protocol[FO_contra]):
|
|
94
|
-
"""
|
|
89
|
+
"""Protocol for user-defined flow functions.
|
|
90
|
+
|
|
91
|
+
Defines the required signature for functions that will be
|
|
92
|
+
decorated with @pipeline_flow. Enforces the standard parameters
|
|
93
|
+
for document processing flows.
|
|
94
|
+
|
|
95
|
+
Type Parameter:
|
|
96
|
+
FO_contra: Contravariant FlowOptions type (or subclass).
|
|
97
|
+
|
|
98
|
+
Required Parameters:
|
|
99
|
+
project_name: Name of the project/pipeline.
|
|
100
|
+
documents: Input DocumentList to process.
|
|
101
|
+
flow_options: Configuration options (FlowOptions or subclass).
|
|
102
|
+
*args, **kwargs: Additional flow-specific parameters.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
DocumentList: Processed documents.
|
|
106
|
+
|
|
107
|
+
Note:
|
|
108
|
+
Functions must be async and return DocumentList.
|
|
109
|
+
"""
|
|
95
110
|
|
|
96
111
|
def __call__(
|
|
97
112
|
self,
|
|
@@ -104,7 +119,26 @@ class _DocumentsFlowCallable(Protocol[FO_contra]):
|
|
|
104
119
|
|
|
105
120
|
|
|
106
121
|
class _FlowLike(Protocol[FO_contra]):
|
|
107
|
-
"""
|
|
122
|
+
"""Protocol for decorated flow objects returned to users.
|
|
123
|
+
|
|
124
|
+
Represents the callable object returned by @pipeline_flow,
|
|
125
|
+
which wraps the original flow function with Prefect and
|
|
126
|
+
tracing capabilities.
|
|
127
|
+
|
|
128
|
+
Type Parameter:
|
|
129
|
+
FO_contra: Contravariant FlowOptions type.
|
|
130
|
+
|
|
131
|
+
Callable Signature:
|
|
132
|
+
Same as _DocumentsFlowCallable - accepts project_name,
|
|
133
|
+
documents, flow_options, plus additional arguments.
|
|
134
|
+
|
|
135
|
+
Attributes:
|
|
136
|
+
name: Optional flow name from decorator.
|
|
137
|
+
|
|
138
|
+
Note:
|
|
139
|
+
__getattr__ provides access to all Prefect flow methods
|
|
140
|
+
without explicit typing (e.g., .serve(), .deploy()).
|
|
141
|
+
"""
|
|
108
142
|
|
|
109
143
|
def __call__(
|
|
110
144
|
self,
|
|
@@ -124,6 +158,18 @@ class _FlowLike(Protocol[FO_contra]):
|
|
|
124
158
|
# Small helper: safely get a callable's name without upsetting the type checker
|
|
125
159
|
# --------------------------------------------------------------------------- #
|
|
126
160
|
def _callable_name(obj: Any, fallback: str) -> str:
|
|
161
|
+
"""Safely extract callable's name for error messages.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
obj: Any object that might have a __name__ attribute.
|
|
165
|
+
fallback: Default name if extraction fails.
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
The callable's __name__ if available, fallback otherwise.
|
|
169
|
+
|
|
170
|
+
Note:
|
|
171
|
+
Internal helper that never raises exceptions.
|
|
172
|
+
"""
|
|
127
173
|
try:
|
|
128
174
|
n = getattr(obj, "__name__", None)
|
|
129
175
|
return n if isinstance(n, str) else fallback
|
|
@@ -131,6 +177,38 @@ def _callable_name(obj: Any, fallback: str) -> str:
|
|
|
131
177
|
return fallback
|
|
132
178
|
|
|
133
179
|
|
|
180
|
+
def _is_already_traced(func: Callable[..., Any]) -> bool:
|
|
181
|
+
"""Check if a function has already been wrapped by the trace decorator.
|
|
182
|
+
|
|
183
|
+
This checks both for the explicit __is_traced__ marker and walks
|
|
184
|
+
the __wrapped__ chain to detect nested trace decorations.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
func: Function to check for existing trace decoration.
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
True if the function is already traced, False otherwise.
|
|
191
|
+
"""
|
|
192
|
+
# Check for explicit marker
|
|
193
|
+
if hasattr(func, "__is_traced__") and func.__is_traced__: # type: ignore[attr-defined]
|
|
194
|
+
return True
|
|
195
|
+
|
|
196
|
+
# Walk the __wrapped__ chain to detect nested traces
|
|
197
|
+
current = func
|
|
198
|
+
depth = 0
|
|
199
|
+
max_depth = 10 # Prevent infinite loops
|
|
200
|
+
|
|
201
|
+
while hasattr(current, "__wrapped__") and depth < max_depth:
|
|
202
|
+
wrapped = current.__wrapped__ # type: ignore[attr-defined]
|
|
203
|
+
# Check if the wrapped function has the trace marker
|
|
204
|
+
if hasattr(wrapped, "__is_traced__") and wrapped.__is_traced__: # type: ignore[attr-defined]
|
|
205
|
+
return True
|
|
206
|
+
current = wrapped
|
|
207
|
+
depth += 1
|
|
208
|
+
|
|
209
|
+
return False
|
|
210
|
+
|
|
211
|
+
|
|
134
212
|
# --------------------------------------------------------------------------- #
|
|
135
213
|
# @pipeline_task — async-only, traced, returns Prefect's Task object
|
|
136
214
|
# --------------------------------------------------------------------------- #
|
|
@@ -211,18 +289,125 @@ def pipeline_task(
|
|
|
211
289
|
viz_return_value: bool | None = None,
|
|
212
290
|
asset_deps: list[str | Asset] | None = None,
|
|
213
291
|
) -> _TaskLike[R_co] | Callable[[Callable[..., Coroutine[Any, Any, R_co]]], _TaskLike[R_co]]:
|
|
214
|
-
"""Decorate an
|
|
292
|
+
"""Decorate an async function as a traced Prefect task.
|
|
293
|
+
|
|
294
|
+
@public
|
|
295
|
+
|
|
296
|
+
Wraps an async function with both Prefect task functionality and
|
|
297
|
+
LMNR tracing. The function MUST be async (declared with 'async def').
|
|
298
|
+
|
|
299
|
+
IMPORTANT: Never combine with @trace decorator - this includes tracing automatically.
|
|
300
|
+
The framework will raise TypeError if you try to use both decorators together.
|
|
301
|
+
|
|
302
|
+
Best Practice - Use Defaults:
|
|
303
|
+
For 90% of use cases, use this decorator WITHOUT any parameters.
|
|
304
|
+
Only specify parameters when you have EXPLICIT requirements.
|
|
305
|
+
|
|
306
|
+
Args:
|
|
307
|
+
__fn: Function to decorate (when used without parentheses).
|
|
308
|
+
|
|
309
|
+
Tracing parameters:
|
|
310
|
+
trace_level: When to trace ("always", "debug", "off").
|
|
311
|
+
- "always": Always trace (default)
|
|
312
|
+
- "debug": Only trace when LMNR_DEBUG="true"
|
|
313
|
+
- "off": Disable tracing
|
|
314
|
+
trace_ignore_input: Don't trace input arguments.
|
|
315
|
+
trace_ignore_output: Don't trace return value.
|
|
316
|
+
trace_ignore_inputs: List of parameter names to exclude from tracing.
|
|
317
|
+
trace_input_formatter: Custom formatter for input tracing.
|
|
318
|
+
trace_output_formatter: Custom formatter for output tracing.
|
|
319
|
+
|
|
320
|
+
Prefect task parameters:
|
|
321
|
+
name: Task name (defaults to function name).
|
|
322
|
+
description: Human-readable task description.
|
|
323
|
+
tags: Tags for organization and filtering.
|
|
324
|
+
version: Task version string.
|
|
325
|
+
cache_policy: Caching policy for task results.
|
|
326
|
+
cache_key_fn: Custom cache key generation.
|
|
327
|
+
cache_expiration: How long to cache results.
|
|
328
|
+
task_run_name: Dynamic or static run name.
|
|
329
|
+
retries: Number of retry attempts (default 0).
|
|
330
|
+
retry_delay_seconds: Delay between retries.
|
|
331
|
+
retry_jitter_factor: Random jitter for retry delays.
|
|
332
|
+
persist_result: Whether to persist results.
|
|
333
|
+
result_storage: Where to store results.
|
|
334
|
+
result_serializer: How to serialize results.
|
|
335
|
+
result_storage_key: Custom storage key.
|
|
336
|
+
cache_result_in_memory: Keep results in memory.
|
|
337
|
+
timeout_seconds: Task execution timeout.
|
|
338
|
+
log_prints: Capture print() statements.
|
|
339
|
+
refresh_cache: Force cache refresh.
|
|
340
|
+
on_completion: Hooks for successful completion.
|
|
341
|
+
on_failure: Hooks for task failure.
|
|
342
|
+
retry_condition_fn: Custom retry condition.
|
|
343
|
+
viz_return_value: Include return value in visualization.
|
|
344
|
+
asset_deps: Upstream asset dependencies.
|
|
345
|
+
|
|
346
|
+
Returns:
|
|
347
|
+
Decorated task callable that is awaitable and has Prefect
|
|
348
|
+
task methods (submit, map, etc.).
|
|
349
|
+
|
|
350
|
+
Example:
|
|
351
|
+
>>> # RECOMMENDED - No parameters needed!
|
|
352
|
+
>>> @pipeline_task
|
|
353
|
+
>>> async def process_document(doc: Document) -> Document:
|
|
354
|
+
... result = await analyze(doc)
|
|
355
|
+
... return result
|
|
356
|
+
>>>
|
|
357
|
+
>>> # With parameters (only when necessary):
|
|
358
|
+
>>> @pipeline_task(retries=5) # Only for known flaky operations
|
|
359
|
+
>>> async def unreliable_api_call(url: str) -> dict:
|
|
360
|
+
... # This API fails often, needs extra retries
|
|
361
|
+
... return await fetch_with_retry(url)
|
|
362
|
+
>>>
|
|
363
|
+
>>> # AVOID specifying defaults - they're already optimal:
|
|
364
|
+
>>> # - Automatic task naming
|
|
365
|
+
>>> # - Standard retry policy
|
|
366
|
+
>>> # - Sensible timeout
|
|
367
|
+
>>> # - Full observability
|
|
368
|
+
|
|
369
|
+
Performance:
|
|
370
|
+
- Task decoration overhead: ~1-2ms
|
|
371
|
+
- Tracing overhead: ~1-2ms per call
|
|
372
|
+
- Prefect state tracking: ~5-10ms
|
|
373
|
+
|
|
374
|
+
Note:
|
|
375
|
+
Tasks are automatically traced with LMNR and appear in
|
|
376
|
+
both Prefect and LMNR dashboards.
|
|
377
|
+
|
|
378
|
+
See Also:
|
|
379
|
+
- pipeline_flow: For flow-level decoration
|
|
380
|
+
- trace: Lower-level tracing decorator
|
|
381
|
+
- prefect.task: Standard Prefect task (no tracing)
|
|
382
|
+
"""
|
|
215
383
|
task_decorator: Callable[..., Any] = _prefect_task # helps the type checker
|
|
216
384
|
|
|
217
385
|
def _apply(fn: Callable[..., Coroutine[Any, Any, R_co]]) -> _TaskLike[R_co]:
|
|
386
|
+
"""Apply pipeline_task decorator to async function.
|
|
387
|
+
|
|
388
|
+
Returns:
|
|
389
|
+
Wrapped task with tracing and Prefect functionality.
|
|
390
|
+
|
|
391
|
+
Raises:
|
|
392
|
+
TypeError: If function is not async or already traced.
|
|
393
|
+
"""
|
|
218
394
|
if not inspect.iscoroutinefunction(fn):
|
|
219
395
|
raise TypeError(
|
|
220
396
|
f"@pipeline_task target '{_callable_name(fn, 'task')}' must be 'async def'"
|
|
221
397
|
)
|
|
222
398
|
|
|
399
|
+
# Check if function is already traced
|
|
400
|
+
if _is_already_traced(fn):
|
|
401
|
+
raise TypeError(
|
|
402
|
+
f"@pipeline_task target '{_callable_name(fn, 'task')}' is already decorated "
|
|
403
|
+
f"with @trace. Remove the @trace decorator - @pipeline_task includes "
|
|
404
|
+
f"tracing automatically."
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
fname = _callable_name(fn, "task")
|
|
223
408
|
traced_fn = trace(
|
|
224
409
|
level=trace_level,
|
|
225
|
-
name=name or
|
|
410
|
+
name=name or fname,
|
|
226
411
|
ignore_input=trace_ignore_input,
|
|
227
412
|
ignore_output=trace_ignore_output,
|
|
228
413
|
ignore_inputs=trace_ignore_inputs,
|
|
@@ -233,14 +418,14 @@ def pipeline_task(
|
|
|
233
418
|
return cast(
|
|
234
419
|
_TaskLike[R_co],
|
|
235
420
|
task_decorator(
|
|
236
|
-
name=name,
|
|
421
|
+
name=name or fname,
|
|
237
422
|
description=description,
|
|
238
423
|
tags=tags,
|
|
239
424
|
version=version,
|
|
240
425
|
cache_policy=cache_policy,
|
|
241
426
|
cache_key_fn=cache_key_fn,
|
|
242
427
|
cache_expiration=cache_expiration,
|
|
243
|
-
task_run_name=task_run_name,
|
|
428
|
+
task_run_name=task_run_name or name or fname,
|
|
244
429
|
retries=0 if retries is None else retries,
|
|
245
430
|
retry_delay_seconds=retry_delay_seconds,
|
|
246
431
|
retry_jitter_factor=retry_jitter_factor,
|
|
@@ -264,7 +449,7 @@ def pipeline_task(
|
|
|
264
449
|
|
|
265
450
|
|
|
266
451
|
# --------------------------------------------------------------------------- #
|
|
267
|
-
# @pipeline_flow — async-only, traced, returns Prefect
|
|
452
|
+
# @pipeline_flow — async-only, traced, returns Prefect's flow wrapper
|
|
268
453
|
# --------------------------------------------------------------------------- #
|
|
269
454
|
@overload
|
|
270
455
|
def pipeline_flow(__fn: _DocumentsFlowCallable[FO_contra], /) -> _FlowLike[FO_contra]: ...
|
|
@@ -333,26 +518,140 @@ def pipeline_flow(
|
|
|
333
518
|
on_crashed: list[FlowStateHook[Any, Any]] | None = None,
|
|
334
519
|
on_running: list[FlowStateHook[Any, Any]] | None = None,
|
|
335
520
|
) -> _FlowLike[FO_contra] | Callable[[_DocumentsFlowCallable[FO_contra]], _FlowLike[FO_contra]]:
|
|
336
|
-
"""Decorate an
|
|
521
|
+
"""Decorate an async flow for document processing.
|
|
337
522
|
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
flow_options: FlowOptions, # or any subclass
|
|
343
|
-
*args,
|
|
344
|
-
**kwargs
|
|
345
|
-
) -> DocumentList
|
|
523
|
+
@public
|
|
524
|
+
|
|
525
|
+
Wraps an async function as a Prefect flow with tracing and type safety.
|
|
526
|
+
The decorated function MUST be async and follow the required signature.
|
|
346
527
|
|
|
347
|
-
|
|
528
|
+
IMPORTANT: Never combine with @trace decorator - this includes tracing automatically.
|
|
529
|
+
The framework will raise TypeError if you try to use both decorators together.
|
|
530
|
+
|
|
531
|
+
Best Practice - Use Defaults:
|
|
532
|
+
For 90% of use cases, use this decorator WITHOUT any parameters.
|
|
533
|
+
Only specify parameters when you have EXPLICIT requirements.
|
|
534
|
+
|
|
535
|
+
Required function signature:
|
|
536
|
+
async def flow_fn(
|
|
537
|
+
project_name: str, # Project/pipeline identifier
|
|
538
|
+
documents: DocumentList, # Input documents to process
|
|
539
|
+
flow_options: FlowOptions, # Configuration (or subclass)
|
|
540
|
+
*args, # Additional positional args for custom parameters
|
|
541
|
+
**kwargs # Additional keyword args for custom parameters
|
|
542
|
+
) -> DocumentList # Must return DocumentList
|
|
543
|
+
|
|
544
|
+
Note: *args and **kwargs allow for defining custom parameters on your flow
|
|
545
|
+
function, which can be passed during execution for flow-specific needs.
|
|
546
|
+
|
|
547
|
+
Args:
|
|
548
|
+
__fn: Function to decorate (when used without parentheses).
|
|
549
|
+
|
|
550
|
+
Tracing parameters:
|
|
551
|
+
trace_level: When to trace ("always", "debug", "off").
|
|
552
|
+
- "always": Always trace (default)
|
|
553
|
+
- "debug": Only trace when LMNR_DEBUG="true"
|
|
554
|
+
- "off": Disable tracing
|
|
555
|
+
trace_ignore_input: Don't trace input arguments.
|
|
556
|
+
trace_ignore_output: Don't trace return value.
|
|
557
|
+
trace_ignore_inputs: Parameter names to exclude from tracing.
|
|
558
|
+
trace_input_formatter: Custom input formatter.
|
|
559
|
+
trace_output_formatter: Custom output formatter.
|
|
560
|
+
|
|
561
|
+
Prefect flow parameters:
|
|
562
|
+
name: Flow name (defaults to function name).
|
|
563
|
+
version: Flow version identifier.
|
|
564
|
+
flow_run_name: Static or dynamic run name.
|
|
565
|
+
retries: Number of flow retry attempts (default 0).
|
|
566
|
+
retry_delay_seconds: Delay between flow retries.
|
|
567
|
+
task_runner: Task execution strategy (sequential/concurrent).
|
|
568
|
+
description: Human-readable flow description.
|
|
569
|
+
timeout_seconds: Flow execution timeout.
|
|
570
|
+
validate_parameters: Validate input parameters.
|
|
571
|
+
persist_result: Persist flow results.
|
|
572
|
+
result_storage: Where to store results.
|
|
573
|
+
result_serializer: How to serialize results.
|
|
574
|
+
cache_result_in_memory: Keep results in memory.
|
|
575
|
+
log_prints: Capture print() statements.
|
|
576
|
+
on_completion: Hooks for successful completion.
|
|
577
|
+
on_failure: Hooks for flow failure.
|
|
578
|
+
on_cancellation: Hooks for flow cancellation.
|
|
579
|
+
on_crashed: Hooks for flow crashes.
|
|
580
|
+
on_running: Hooks for flow start.
|
|
581
|
+
|
|
582
|
+
Returns:
|
|
583
|
+
Decorated flow callable that maintains Prefect flow interface
|
|
584
|
+
while enforcing document processing conventions.
|
|
585
|
+
|
|
586
|
+
Example:
|
|
587
|
+
>>> from ai_pipeline_core import FlowOptions
|
|
588
|
+
>>>
|
|
589
|
+
>>> # RECOMMENDED - No parameters needed!
|
|
590
|
+
>>> @pipeline_flow
|
|
591
|
+
>>> async def analyze_documents(
|
|
592
|
+
... project_name: str,
|
|
593
|
+
... documents: DocumentList,
|
|
594
|
+
... flow_options: FlowOptions
|
|
595
|
+
>>> ) -> DocumentList:
|
|
596
|
+
... # Process each document
|
|
597
|
+
... results = []
|
|
598
|
+
... for doc in documents:
|
|
599
|
+
... result = await process(doc)
|
|
600
|
+
... results.append(result)
|
|
601
|
+
... return DocumentList(results)
|
|
602
|
+
>>>
|
|
603
|
+
>>> # With parameters (only when necessary):
|
|
604
|
+
>>> @pipeline_flow(retries=2) # Only for flows that need retry logic
|
|
605
|
+
>>> async def critical_flow(
|
|
606
|
+
... project_name: str,
|
|
607
|
+
... documents: DocumentList,
|
|
608
|
+
... flow_options: FlowOptions
|
|
609
|
+
>>> ) -> DocumentList:
|
|
610
|
+
... # Critical processing that might fail
|
|
611
|
+
... return await process_critical(documents)
|
|
612
|
+
>>>
|
|
613
|
+
>>> # AVOID specifying defaults - they're already optimal:
|
|
614
|
+
>>> # - Automatic flow naming
|
|
615
|
+
>>> # - Standard retry policy
|
|
616
|
+
>>> # - Full observability
|
|
617
|
+
|
|
618
|
+
Note:
|
|
619
|
+
- Flow is wrapped with both Prefect and LMNR tracing
|
|
620
|
+
- Return type is validated at runtime
|
|
621
|
+
- FlowOptions can be subclassed for custom configuration
|
|
622
|
+
- All Prefect flow methods (.serve(), .deploy()) are available
|
|
623
|
+
|
|
624
|
+
See Also:
|
|
625
|
+
- pipeline_task: For task-level decoration
|
|
626
|
+
- FlowConfig: Type-safe flow configuration
|
|
627
|
+
- FlowOptions: Base class for flow options
|
|
628
|
+
- simple_runner.run_pipeline: Execute flows locally
|
|
348
629
|
"""
|
|
349
630
|
flow_decorator: Callable[..., Any] = _prefect_flow
|
|
350
631
|
|
|
351
632
|
def _apply(fn: _DocumentsFlowCallable[FO_contra]) -> _FlowLike[FO_contra]:
|
|
633
|
+
"""Apply pipeline_flow decorator to flow function.
|
|
634
|
+
|
|
635
|
+
Returns:
|
|
636
|
+
Wrapped flow with tracing and Prefect functionality.
|
|
637
|
+
|
|
638
|
+
Raises:
|
|
639
|
+
TypeError: If function is not async, already traced, doesn't have
|
|
640
|
+
required parameters, or doesn't return DocumentList.
|
|
641
|
+
"""
|
|
352
642
|
fname = _callable_name(fn, "flow")
|
|
353
643
|
|
|
354
644
|
if not inspect.iscoroutinefunction(fn):
|
|
355
645
|
raise TypeError(f"@pipeline_flow '{fname}' must be declared with 'async def'")
|
|
646
|
+
|
|
647
|
+
# Check if function is already traced
|
|
648
|
+
if _is_already_traced(fn):
|
|
649
|
+
raise TypeError(
|
|
650
|
+
f"@pipeline_flow target '{fname}' is already decorated "
|
|
651
|
+
f"with @trace. Remove the @trace decorator - @pipeline_flow includes "
|
|
652
|
+
f"tracing automatically."
|
|
653
|
+
)
|
|
654
|
+
|
|
356
655
|
if len(inspect.signature(fn).parameters) < 3:
|
|
357
656
|
raise TypeError(
|
|
358
657
|
f"@pipeline_flow '{fname}' must accept "
|
|
@@ -373,6 +672,10 @@ def pipeline_flow(
|
|
|
373
672
|
)
|
|
374
673
|
return result
|
|
375
674
|
|
|
675
|
+
# Preserve the original function name for Prefect
|
|
676
|
+
_wrapper.__name__ = fname
|
|
677
|
+
_wrapper.__qualname__ = getattr(fn, "__qualname__", fname)
|
|
678
|
+
|
|
376
679
|
traced = trace(
|
|
377
680
|
level=trace_level,
|
|
378
681
|
name=name or fname,
|
|
@@ -386,9 +689,9 @@ def pipeline_flow(
|
|
|
386
689
|
return cast(
|
|
387
690
|
_FlowLike[FO_contra],
|
|
388
691
|
flow_decorator(
|
|
389
|
-
name=name,
|
|
692
|
+
name=name or fname,
|
|
390
693
|
version=version,
|
|
391
|
-
flow_run_name=flow_run_name,
|
|
694
|
+
flow_run_name=flow_run_name or name or fname,
|
|
392
695
|
retries=0 if retries is None else retries,
|
|
393
696
|
retry_delay_seconds=retry_delay_seconds,
|
|
394
697
|
task_runner=task_runner,
|
ai_pipeline_core/prefect.py
CHANGED
|
@@ -1,4 +1,51 @@
|
|
|
1
|
-
"""Prefect core features.
|
|
1
|
+
"""Prefect core features for pipeline orchestration.
|
|
2
|
+
|
|
3
|
+
This module provides clean re-exports of essential Prefect functionality.
|
|
4
|
+
|
|
5
|
+
IMPORTANT: You should NEVER use the `task` and `flow` decorators directly
|
|
6
|
+
unless it is 100% impossible to use `pipeline_task` and `pipeline_flow`.
|
|
7
|
+
The standard Prefect decorators are exported here only for extremely
|
|
8
|
+
limited edge cases where the pipeline decorators cannot be used.
|
|
9
|
+
|
|
10
|
+
Always prefer:
|
|
11
|
+
>>> from ai_pipeline_core import pipeline_task, pipeline_flow
|
|
12
|
+
>>>
|
|
13
|
+
>>> @pipeline_task
|
|
14
|
+
>>> async def my_task(...): ...
|
|
15
|
+
>>>
|
|
16
|
+
>>> @pipeline_flow
|
|
17
|
+
>>> async def my_flow(...): ...
|
|
18
|
+
|
|
19
|
+
The `task` and `flow` decorators should only be used when:
|
|
20
|
+
- You absolutely cannot convert to async (pipeline decorators require async)
|
|
21
|
+
- You have a very specific Prefect integration that conflicts with tracing
|
|
22
|
+
- You are writing test utilities or infrastructure code
|
|
23
|
+
|
|
24
|
+
Exported components:
|
|
25
|
+
task: Prefect task decorator (AVOID - use pipeline_task instead).
|
|
26
|
+
flow: Prefect flow decorator (AVOID - use pipeline_flow instead).
|
|
27
|
+
disable_run_logger: Context manager to suppress Prefect logging.
|
|
28
|
+
prefect_test_harness: Test harness for unit testing flows/tasks.
|
|
29
|
+
|
|
30
|
+
Testing utilities (use as fixtures):
|
|
31
|
+
The disable_run_logger and prefect_test_harness should be used as
|
|
32
|
+
pytest fixtures as shown in tests/conftest.py:
|
|
33
|
+
|
|
34
|
+
>>> @pytest.fixture(autouse=True, scope="session")
|
|
35
|
+
>>> def prefect_test_fixture():
|
|
36
|
+
... with prefect_test_harness():
|
|
37
|
+
... yield
|
|
38
|
+
>>>
|
|
39
|
+
>>> @pytest.fixture(autouse=True)
|
|
40
|
+
>>> def disable_prefect_logging():
|
|
41
|
+
... with disable_run_logger():
|
|
42
|
+
... yield
|
|
43
|
+
|
|
44
|
+
Note:
|
|
45
|
+
The pipeline_task and pipeline_flow decorators from
|
|
46
|
+
ai_pipeline_core.pipeline provide async-only execution with
|
|
47
|
+
integrated LMNR tracing and are the standard for this library.
|
|
48
|
+
"""
|
|
2
49
|
|
|
3
50
|
from prefect import flow, task
|
|
4
51
|
from prefect.logging import disable_run_logger
|