ai-pipeline-core 0.1.8__py3-none-any.whl → 0.1.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +86 -4
- ai_pipeline_core/documents/__init__.py +11 -0
- ai_pipeline_core/documents/document.py +1107 -131
- ai_pipeline_core/documents/document_list.py +147 -38
- ai_pipeline_core/documents/flow_document.py +112 -11
- ai_pipeline_core/documents/mime_type.py +173 -15
- ai_pipeline_core/documents/task_document.py +117 -12
- ai_pipeline_core/documents/temporary_document.py +95 -0
- ai_pipeline_core/documents/utils.py +41 -9
- ai_pipeline_core/exceptions.py +47 -11
- ai_pipeline_core/flow/__init__.py +2 -0
- ai_pipeline_core/flow/config.py +250 -23
- ai_pipeline_core/flow/options.py +50 -1
- ai_pipeline_core/llm/__init__.py +6 -0
- ai_pipeline_core/llm/ai_messages.py +125 -27
- ai_pipeline_core/llm/client.py +278 -26
- ai_pipeline_core/llm/model_options.py +130 -1
- ai_pipeline_core/llm/model_response.py +239 -35
- ai_pipeline_core/llm/model_types.py +67 -0
- ai_pipeline_core/logging/__init__.py +13 -0
- ai_pipeline_core/logging/logging_config.py +72 -20
- ai_pipeline_core/logging/logging_mixin.py +38 -32
- ai_pipeline_core/pipeline.py +308 -60
- ai_pipeline_core/prefect.py +48 -1
- ai_pipeline_core/prompt_manager.py +215 -24
- ai_pipeline_core/settings.py +108 -4
- ai_pipeline_core/simple_runner/__init__.py +5 -0
- ai_pipeline_core/simple_runner/cli.py +145 -17
- ai_pipeline_core/simple_runner/simple_runner.py +244 -6
- ai_pipeline_core/tracing.py +232 -30
- ai_pipeline_core-0.1.11.dist-info/METADATA +450 -0
- ai_pipeline_core-0.1.11.dist-info/RECORD +36 -0
- ai_pipeline_core-0.1.8.dist-info/METADATA +0 -558
- ai_pipeline_core-0.1.8.dist-info/RECORD +0 -35
- {ai_pipeline_core-0.1.8.dist-info → ai_pipeline_core-0.1.11.dist-info}/WHEEL +0 -0
- {ai_pipeline_core-0.1.8.dist-info → ai_pipeline_core-0.1.11.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,4 +1,7 @@
|
|
|
1
|
-
"""Logging mixin for consistent logging across components using Prefect logging
|
|
1
|
+
"""Logging mixin for consistent logging across components using Prefect logging.
|
|
2
|
+
|
|
3
|
+
@public
|
|
4
|
+
"""
|
|
2
5
|
|
|
3
6
|
import contextlib
|
|
4
7
|
import time
|
|
@@ -12,12 +15,17 @@ from prefect.logging import get_logger
|
|
|
12
15
|
|
|
13
16
|
|
|
14
17
|
class LoggerMixin:
|
|
15
|
-
"""
|
|
16
|
-
|
|
18
|
+
"""Mixin class that provides consistent logging functionality using Prefect's logging system.
|
|
19
|
+
|
|
20
|
+
@public
|
|
21
|
+
|
|
22
|
+
Note for users: In your code, always obtain loggers via get_pipeline_logger(__name__).
|
|
23
|
+
The mixin's internal behavior routes to the appropriate backend; you should not call
|
|
24
|
+
logging.getLogger directly.
|
|
17
25
|
|
|
18
26
|
Automatically uses appropriate logger based on context:
|
|
19
|
-
- get_run_logger() when in flow/task context
|
|
20
|
-
-
|
|
27
|
+
- prefect.get_run_logger() when in flow/task context
|
|
28
|
+
- Internal routing when outside flow/task context
|
|
21
29
|
"""
|
|
22
30
|
|
|
23
31
|
_logger_name: Optional[str] = None
|
|
@@ -30,7 +38,11 @@ class LoggerMixin:
|
|
|
30
38
|
return get_logger(self._logger_name or self.__class__.__module__)
|
|
31
39
|
|
|
32
40
|
def _get_run_logger(self):
|
|
33
|
-
"""Attempt to get Prefect run logger.
|
|
41
|
+
"""Attempt to get Prefect run logger.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
The Prefect run logger if in a flow/task context, None otherwise.
|
|
45
|
+
"""
|
|
34
46
|
# Intentionally broad: Must handle any exception when checking context
|
|
35
47
|
with contextlib.suppress(Exception):
|
|
36
48
|
if FlowRunContext.get() or TaskRunContext.get():
|
|
@@ -38,28 +50,27 @@ class LoggerMixin:
|
|
|
38
50
|
return None
|
|
39
51
|
|
|
40
52
|
def log_debug(self, message: str, **kwargs: Any) -> None:
|
|
41
|
-
"""Log debug message with optional context"""
|
|
53
|
+
"""Log debug message with optional context."""
|
|
42
54
|
self.logger.debug(message, extra=kwargs)
|
|
43
55
|
|
|
44
56
|
def log_info(self, message: str, **kwargs: Any) -> None:
|
|
45
|
-
"""Log info message with optional context"""
|
|
57
|
+
"""Log info message with optional context."""
|
|
46
58
|
self.logger.info(message, extra=kwargs)
|
|
47
59
|
|
|
48
60
|
def log_warning(self, message: str, **kwargs: Any) -> None:
|
|
49
|
-
"""Log warning message with optional context"""
|
|
61
|
+
"""Log warning message with optional context."""
|
|
50
62
|
self.logger.warning(message, extra=kwargs)
|
|
51
63
|
|
|
52
64
|
def log_error(self, message: str, exc_info: bool = False, **kwargs: Any) -> None:
|
|
53
|
-
"""Log error message with optional exception info"""
|
|
65
|
+
"""Log error message with optional exception info."""
|
|
54
66
|
self.logger.error(message, exc_info=exc_info, extra=kwargs)
|
|
55
67
|
|
|
56
68
|
def log_critical(self, message: str, exc_info: bool = False, **kwargs: Any) -> None:
|
|
57
|
-
"""Log critical message with optional exception info"""
|
|
69
|
+
"""Log critical message with optional exception info."""
|
|
58
70
|
self.logger.critical(message, exc_info=exc_info, extra=kwargs)
|
|
59
71
|
|
|
60
72
|
def log_with_context(self, level: str, message: str, context: Dict[str, Any]) -> None:
|
|
61
|
-
"""
|
|
62
|
-
Log message with structured context
|
|
73
|
+
"""Log message with structured context.
|
|
63
74
|
|
|
64
75
|
Args:
|
|
65
76
|
level: Log level (debug, info, warning, error, critical)
|
|
@@ -83,13 +94,13 @@ class LoggerMixin:
|
|
|
83
94
|
|
|
84
95
|
|
|
85
96
|
class StructuredLoggerMixin(LoggerMixin):
|
|
86
|
-
"""
|
|
87
|
-
|
|
97
|
+
"""Extended mixin for structured logging with Prefect.
|
|
98
|
+
|
|
99
|
+
@public
|
|
88
100
|
"""
|
|
89
101
|
|
|
90
102
|
def log_event(self, event: str, **kwargs: Any) -> None:
|
|
91
|
-
"""
|
|
92
|
-
Log a structured event
|
|
103
|
+
"""Log a structured event.
|
|
93
104
|
|
|
94
105
|
Args:
|
|
95
106
|
event: Event name
|
|
@@ -104,8 +115,7 @@ class StructuredLoggerMixin(LoggerMixin):
|
|
|
104
115
|
self.logger.info(event, extra={"event": event, "structured": True, **kwargs})
|
|
105
116
|
|
|
106
117
|
def log_metric(self, metric_name: str, value: float, unit: str = "", **tags: Any) -> None:
|
|
107
|
-
"""
|
|
108
|
-
Log a metric value
|
|
118
|
+
"""Log a metric value.
|
|
109
119
|
|
|
110
120
|
Args:
|
|
111
121
|
metric_name: Name of the metric
|
|
@@ -129,8 +139,7 @@ class StructuredLoggerMixin(LoggerMixin):
|
|
|
129
139
|
)
|
|
130
140
|
|
|
131
141
|
def log_span(self, operation: str, duration_ms: float, **attributes: Any) -> None:
|
|
132
|
-
"""
|
|
133
|
-
Log a span (operation with duration)
|
|
142
|
+
"""Log a span (operation with duration).
|
|
134
143
|
|
|
135
144
|
Args:
|
|
136
145
|
operation: Operation name
|
|
@@ -153,8 +162,7 @@ class StructuredLoggerMixin(LoggerMixin):
|
|
|
153
162
|
|
|
154
163
|
@contextmanager
|
|
155
164
|
def log_operation(self, operation: str, **context: Any) -> Generator[None, None, None]:
|
|
156
|
-
"""
|
|
157
|
-
Context manager for logging operations with timing
|
|
165
|
+
"""Context manager for logging operations with timing.
|
|
158
166
|
|
|
159
167
|
Args:
|
|
160
168
|
operation: Operation name
|
|
@@ -188,36 +196,34 @@ class StructuredLoggerMixin(LoggerMixin):
|
|
|
188
196
|
|
|
189
197
|
|
|
190
198
|
class PrefectLoggerMixin(StructuredLoggerMixin):
|
|
191
|
-
"""
|
|
192
|
-
Enhanced mixin specifically for Prefect flows and tasks
|
|
193
|
-
"""
|
|
199
|
+
"""Enhanced mixin specifically for Prefect flows and tasks."""
|
|
194
200
|
|
|
195
201
|
def log_flow_start(self, flow_name: str, parameters: Dict[str, Any]) -> None:
|
|
196
|
-
"""Log flow start with parameters"""
|
|
202
|
+
"""Log flow start with parameters."""
|
|
197
203
|
self.log_event("flow_started", flow_name=flow_name, parameters=parameters)
|
|
198
204
|
|
|
199
205
|
def log_flow_end(self, flow_name: str, status: str, duration_ms: float) -> None:
|
|
200
|
-
"""Log flow completion"""
|
|
206
|
+
"""Log flow completion."""
|
|
201
207
|
self.log_event(
|
|
202
208
|
"flow_completed", flow_name=flow_name, status=status, duration_ms=duration_ms
|
|
203
209
|
)
|
|
204
210
|
|
|
205
211
|
def log_task_start(self, task_name: str, inputs: Dict[str, Any]) -> None:
|
|
206
|
-
"""Log task start with inputs"""
|
|
212
|
+
"""Log task start with inputs."""
|
|
207
213
|
self.log_event("task_started", task_name=task_name, inputs=inputs)
|
|
208
214
|
|
|
209
215
|
def log_task_end(self, task_name: str, status: str, duration_ms: float) -> None:
|
|
210
|
-
"""Log task completion"""
|
|
216
|
+
"""Log task completion."""
|
|
211
217
|
self.log_event(
|
|
212
218
|
"task_completed", task_name=task_name, status=status, duration_ms=duration_ms
|
|
213
219
|
)
|
|
214
220
|
|
|
215
221
|
def log_retry(self, operation: str, attempt: int, max_attempts: int, error: str) -> None:
|
|
216
|
-
"""Log retry attempt"""
|
|
222
|
+
"""Log retry attempt."""
|
|
217
223
|
self.log_warning(
|
|
218
224
|
f"Retrying {operation}", attempt=attempt, max_attempts=max_attempts, error=error
|
|
219
225
|
)
|
|
220
226
|
|
|
221
227
|
def log_checkpoint(self, checkpoint_name: str, **data: Any) -> None:
|
|
222
|
-
"""Log a checkpoint in processing"""
|
|
228
|
+
"""Log a checkpoint in processing."""
|
|
223
229
|
self.log_info(f"Checkpoint: {checkpoint_name}", checkpoint=checkpoint_name, **data)
|
ai_pipeline_core/pipeline.py
CHANGED
|
@@ -1,51 +1,26 @@
|
|
|
1
|
-
"""
|
|
2
|
-
ai_pipeline_core.pipeline
|
|
3
|
-
=========================
|
|
4
|
-
|
|
5
|
-
Tiny wrappers around Prefect's public ``@task`` and ``@flow`` that add our
|
|
6
|
-
``trace`` decorator and **require async functions**.
|
|
7
|
-
|
|
8
|
-
Why this exists
|
|
9
|
-
---------------
|
|
10
|
-
Prefect tasks/flows are awaitable at runtime, but their public type stubs
|
|
11
|
-
don’t declare that clearly. We therefore:
|
|
1
|
+
"""Pipeline decorators with Prefect integration and tracing.
|
|
12
2
|
|
|
13
|
-
|
|
14
|
-
2) Type them as small Protocols that say “this is awaitable and has common
|
|
15
|
-
helpers like `.submit`/`.map`”.
|
|
3
|
+
@public
|
|
16
4
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
that introspect callables (e.g., Pydantic).
|
|
20
|
-
|
|
21
|
-
Quick start
|
|
22
|
-
-----------
|
|
23
|
-
from ai_pipeline_core.pipeline import pipeline_task, pipeline_flow
|
|
24
|
-
from ai_pipeline_core.documents import DocumentList
|
|
25
|
-
from ai_pipeline_core.flow.options import FlowOptions
|
|
26
|
-
|
|
27
|
-
@pipeline_task
|
|
28
|
-
async def add(x: int, y: int) -> int:
|
|
29
|
-
return x + y
|
|
30
|
-
|
|
31
|
-
@pipeline_flow
|
|
32
|
-
async def my_flow(project_name: str, docs: DocumentList, opts: FlowOptions) -> DocumentList:
|
|
33
|
-
await add(1, 2) # awaitable and typed
|
|
34
|
-
return docs
|
|
35
|
-
|
|
36
|
-
Rules
|
|
37
|
-
-----
|
|
38
|
-
• Your decorated function **must** be ``async def``.
|
|
39
|
-
• ``@pipeline_flow`` functions must accept at least:
|
|
40
|
-
(project_name: str, documents: DocumentList, flow_options: FlowOptions | subclass).
|
|
41
|
-
• Both wrappers return the same Prefect objects you’d get from Prefect directly.
|
|
5
|
+
Wrappers around Prefect's @task and @flow that add Laminar tracing
|
|
6
|
+
and enforce async-only execution for consistency.
|
|
42
7
|
"""
|
|
43
8
|
|
|
44
9
|
from __future__ import annotations
|
|
45
10
|
|
|
46
11
|
import datetime
|
|
47
12
|
import inspect
|
|
48
|
-
from typing import
|
|
13
|
+
from typing import (
|
|
14
|
+
Any,
|
|
15
|
+
Callable,
|
|
16
|
+
Coroutine,
|
|
17
|
+
Iterable,
|
|
18
|
+
Protocol,
|
|
19
|
+
TypeVar,
|
|
20
|
+
Union,
|
|
21
|
+
cast,
|
|
22
|
+
overload,
|
|
23
|
+
)
|
|
49
24
|
|
|
50
25
|
from prefect.assets import Asset
|
|
51
26
|
from prefect.cache_policies import CachePolicy
|
|
@@ -79,7 +54,27 @@ FO_contra = TypeVar("FO_contra", bound=FlowOptions, contravariant=True)
|
|
|
79
54
|
|
|
80
55
|
|
|
81
56
|
class _TaskLike(Protocol[R_co]):
|
|
82
|
-
"""
|
|
57
|
+
"""Protocol for type-safe Prefect task representation.
|
|
58
|
+
|
|
59
|
+
Defines the minimal interface for a Prefect task as seen by
|
|
60
|
+
type checkers. Ensures tasks are awaitable and have common
|
|
61
|
+
Prefect task methods.
|
|
62
|
+
|
|
63
|
+
Type Parameter:
|
|
64
|
+
R_co: Covariant return type of the task.
|
|
65
|
+
|
|
66
|
+
Methods:
|
|
67
|
+
__call__: Makes the task awaitable.
|
|
68
|
+
submit: Submit task for asynchronous execution.
|
|
69
|
+
map: Map task over multiple inputs.
|
|
70
|
+
|
|
71
|
+
Attributes:
|
|
72
|
+
name: Optional task name.
|
|
73
|
+
|
|
74
|
+
Note:
|
|
75
|
+
This is a typing Protocol, not a runtime class.
|
|
76
|
+
__getattr__ allows accessing Prefect-specific helpers.
|
|
77
|
+
"""
|
|
83
78
|
|
|
84
79
|
def __call__(self, *args: Any, **kwargs: Any) -> Coroutine[Any, Any, R_co]: ...
|
|
85
80
|
|
|
@@ -91,7 +86,27 @@ class _TaskLike(Protocol[R_co]):
|
|
|
91
86
|
|
|
92
87
|
|
|
93
88
|
class _DocumentsFlowCallable(Protocol[FO_contra]):
|
|
94
|
-
"""
|
|
89
|
+
"""Protocol for user-defined flow functions.
|
|
90
|
+
|
|
91
|
+
Defines the required signature for functions that will be
|
|
92
|
+
decorated with @pipeline_flow. Enforces the standard parameters
|
|
93
|
+
for document processing flows.
|
|
94
|
+
|
|
95
|
+
Type Parameter:
|
|
96
|
+
FO_contra: Contravariant FlowOptions type (or subclass).
|
|
97
|
+
|
|
98
|
+
Required Parameters:
|
|
99
|
+
project_name: Name of the project/pipeline.
|
|
100
|
+
documents: Input DocumentList to process.
|
|
101
|
+
flow_options: Configuration options (FlowOptions or subclass).
|
|
102
|
+
*args, **kwargs: Additional flow-specific parameters.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
DocumentList: Processed documents.
|
|
106
|
+
|
|
107
|
+
Note:
|
|
108
|
+
Functions must be async and return DocumentList.
|
|
109
|
+
"""
|
|
95
110
|
|
|
96
111
|
def __call__(
|
|
97
112
|
self,
|
|
@@ -104,7 +119,26 @@ class _DocumentsFlowCallable(Protocol[FO_contra]):
|
|
|
104
119
|
|
|
105
120
|
|
|
106
121
|
class _FlowLike(Protocol[FO_contra]):
|
|
107
|
-
"""
|
|
122
|
+
"""Protocol for decorated flow objects returned to users.
|
|
123
|
+
|
|
124
|
+
Represents the callable object returned by @pipeline_flow,
|
|
125
|
+
which wraps the original flow function with Prefect and
|
|
126
|
+
tracing capabilities.
|
|
127
|
+
|
|
128
|
+
Type Parameter:
|
|
129
|
+
FO_contra: Contravariant FlowOptions type.
|
|
130
|
+
|
|
131
|
+
Callable Signature:
|
|
132
|
+
Same as _DocumentsFlowCallable - accepts project_name,
|
|
133
|
+
documents, flow_options, plus additional arguments.
|
|
134
|
+
|
|
135
|
+
Attributes:
|
|
136
|
+
name: Optional flow name from decorator.
|
|
137
|
+
|
|
138
|
+
Note:
|
|
139
|
+
__getattr__ provides access to all Prefect flow methods
|
|
140
|
+
without explicit typing (e.g., .serve(), .deploy()).
|
|
141
|
+
"""
|
|
108
142
|
|
|
109
143
|
def __call__(
|
|
110
144
|
self,
|
|
@@ -124,6 +158,18 @@ class _FlowLike(Protocol[FO_contra]):
|
|
|
124
158
|
# Small helper: safely get a callable's name without upsetting the type checker
|
|
125
159
|
# --------------------------------------------------------------------------- #
|
|
126
160
|
def _callable_name(obj: Any, fallback: str) -> str:
|
|
161
|
+
"""Safely extract callable's name for error messages.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
obj: Any object that might have a __name__ attribute.
|
|
165
|
+
fallback: Default name if extraction fails.
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
The callable's __name__ if available, fallback otherwise.
|
|
169
|
+
|
|
170
|
+
Note:
|
|
171
|
+
Internal helper that never raises exceptions.
|
|
172
|
+
"""
|
|
127
173
|
try:
|
|
128
174
|
n = getattr(obj, "__name__", None)
|
|
129
175
|
return n if isinstance(n, str) else fallback
|
|
@@ -211,18 +257,114 @@ def pipeline_task(
|
|
|
211
257
|
viz_return_value: bool | None = None,
|
|
212
258
|
asset_deps: list[str | Asset] | None = None,
|
|
213
259
|
) -> _TaskLike[R_co] | Callable[[Callable[..., Coroutine[Any, Any, R_co]]], _TaskLike[R_co]]:
|
|
214
|
-
"""Decorate an
|
|
260
|
+
"""Decorate an async function as a traced Prefect task.
|
|
261
|
+
|
|
262
|
+
@public
|
|
263
|
+
|
|
264
|
+
Wraps an async function with both Prefect task functionality and
|
|
265
|
+
LMNR tracing. The function MUST be async (declared with 'async def').
|
|
266
|
+
|
|
267
|
+
Best Practice - Use Defaults:
|
|
268
|
+
For 90% of use cases, use this decorator WITHOUT any parameters.
|
|
269
|
+
Only specify parameters when you have EXPLICIT requirements.
|
|
270
|
+
|
|
271
|
+
Args:
|
|
272
|
+
__fn: Function to decorate (when used without parentheses).
|
|
273
|
+
|
|
274
|
+
Tracing parameters:
|
|
275
|
+
trace_level: When to trace ("always", "debug", "off").
|
|
276
|
+
- "always": Always trace (default)
|
|
277
|
+
- "debug": Only trace when LMNR_DEBUG="true"
|
|
278
|
+
- "off": Disable tracing
|
|
279
|
+
trace_ignore_input: Don't trace input arguments.
|
|
280
|
+
trace_ignore_output: Don't trace return value.
|
|
281
|
+
trace_ignore_inputs: List of parameter names to exclude from tracing.
|
|
282
|
+
trace_input_formatter: Custom formatter for input tracing.
|
|
283
|
+
trace_output_formatter: Custom formatter for output tracing.
|
|
284
|
+
|
|
285
|
+
Prefect task parameters:
|
|
286
|
+
name: Task name (defaults to function name).
|
|
287
|
+
description: Human-readable task description.
|
|
288
|
+
tags: Tags for organization and filtering.
|
|
289
|
+
version: Task version string.
|
|
290
|
+
cache_policy: Caching policy for task results.
|
|
291
|
+
cache_key_fn: Custom cache key generation.
|
|
292
|
+
cache_expiration: How long to cache results.
|
|
293
|
+
task_run_name: Dynamic or static run name.
|
|
294
|
+
retries: Number of retry attempts (default 0).
|
|
295
|
+
retry_delay_seconds: Delay between retries.
|
|
296
|
+
retry_jitter_factor: Random jitter for retry delays.
|
|
297
|
+
persist_result: Whether to persist results.
|
|
298
|
+
result_storage: Where to store results.
|
|
299
|
+
result_serializer: How to serialize results.
|
|
300
|
+
result_storage_key: Custom storage key.
|
|
301
|
+
cache_result_in_memory: Keep results in memory.
|
|
302
|
+
timeout_seconds: Task execution timeout.
|
|
303
|
+
log_prints: Capture print() statements.
|
|
304
|
+
refresh_cache: Force cache refresh.
|
|
305
|
+
on_completion: Hooks for successful completion.
|
|
306
|
+
on_failure: Hooks for task failure.
|
|
307
|
+
retry_condition_fn: Custom retry condition.
|
|
308
|
+
viz_return_value: Include return value in visualization.
|
|
309
|
+
asset_deps: Upstream asset dependencies.
|
|
310
|
+
|
|
311
|
+
Returns:
|
|
312
|
+
Decorated task callable that is awaitable and has Prefect
|
|
313
|
+
task methods (submit, map, etc.).
|
|
314
|
+
|
|
315
|
+
Example:
|
|
316
|
+
>>> # RECOMMENDED - No parameters needed!
|
|
317
|
+
>>> @pipeline_task
|
|
318
|
+
>>> async def process_document(doc: Document) -> Document:
|
|
319
|
+
... result = await analyze(doc)
|
|
320
|
+
... return result
|
|
321
|
+
>>>
|
|
322
|
+
>>> # With parameters (only when necessary):
|
|
323
|
+
>>> @pipeline_task(retries=5) # Only for known flaky operations
|
|
324
|
+
>>> async def unreliable_api_call(url: str) -> dict:
|
|
325
|
+
... # This API fails often, needs extra retries
|
|
326
|
+
... return await fetch_with_retry(url)
|
|
327
|
+
>>>
|
|
328
|
+
>>> # AVOID specifying defaults - they're already optimal:
|
|
329
|
+
>>> # - Automatic task naming
|
|
330
|
+
>>> # - Standard retry policy
|
|
331
|
+
>>> # - Sensible timeout
|
|
332
|
+
>>> # - Full observability
|
|
333
|
+
|
|
334
|
+
Performance:
|
|
335
|
+
- Task decoration overhead: ~1-2ms
|
|
336
|
+
- Tracing overhead: ~1-2ms per call
|
|
337
|
+
- Prefect state tracking: ~5-10ms
|
|
338
|
+
|
|
339
|
+
Note:
|
|
340
|
+
Tasks are automatically traced with LMNR and appear in
|
|
341
|
+
both Prefect and LMNR dashboards.
|
|
342
|
+
|
|
343
|
+
See Also:
|
|
344
|
+
- pipeline_flow: For flow-level decoration
|
|
345
|
+
- trace: Lower-level tracing decorator
|
|
346
|
+
- prefect.task: Standard Prefect task (no tracing)
|
|
347
|
+
"""
|
|
215
348
|
task_decorator: Callable[..., Any] = _prefect_task # helps the type checker
|
|
216
349
|
|
|
217
350
|
def _apply(fn: Callable[..., Coroutine[Any, Any, R_co]]) -> _TaskLike[R_co]:
|
|
351
|
+
"""Apply pipeline_task decorator to async function.
|
|
352
|
+
|
|
353
|
+
Returns:
|
|
354
|
+
Wrapped task with tracing and Prefect functionality.
|
|
355
|
+
|
|
356
|
+
Raises:
|
|
357
|
+
TypeError: If function is not async.
|
|
358
|
+
"""
|
|
218
359
|
if not inspect.iscoroutinefunction(fn):
|
|
219
360
|
raise TypeError(
|
|
220
361
|
f"@pipeline_task target '{_callable_name(fn, 'task')}' must be 'async def'"
|
|
221
362
|
)
|
|
222
363
|
|
|
364
|
+
fname = _callable_name(fn, "task")
|
|
223
365
|
traced_fn = trace(
|
|
224
366
|
level=trace_level,
|
|
225
|
-
name=name or
|
|
367
|
+
name=name or fname,
|
|
226
368
|
ignore_input=trace_ignore_input,
|
|
227
369
|
ignore_output=trace_ignore_output,
|
|
228
370
|
ignore_inputs=trace_ignore_inputs,
|
|
@@ -233,14 +375,14 @@ def pipeline_task(
|
|
|
233
375
|
return cast(
|
|
234
376
|
_TaskLike[R_co],
|
|
235
377
|
task_decorator(
|
|
236
|
-
name=name,
|
|
378
|
+
name=name or fname,
|
|
237
379
|
description=description,
|
|
238
380
|
tags=tags,
|
|
239
381
|
version=version,
|
|
240
382
|
cache_policy=cache_policy,
|
|
241
383
|
cache_key_fn=cache_key_fn,
|
|
242
384
|
cache_expiration=cache_expiration,
|
|
243
|
-
task_run_name=task_run_name,
|
|
385
|
+
task_run_name=task_run_name or name or fname,
|
|
244
386
|
retries=0 if retries is None else retries,
|
|
245
387
|
retry_delay_seconds=retry_delay_seconds,
|
|
246
388
|
retry_jitter_factor=retry_jitter_factor,
|
|
@@ -264,7 +406,7 @@ def pipeline_task(
|
|
|
264
406
|
|
|
265
407
|
|
|
266
408
|
# --------------------------------------------------------------------------- #
|
|
267
|
-
# @pipeline_flow — async-only, traced, returns Prefect
|
|
409
|
+
# @pipeline_flow — async-only, traced, returns Prefect's flow wrapper
|
|
268
410
|
# --------------------------------------------------------------------------- #
|
|
269
411
|
@overload
|
|
270
412
|
def pipeline_flow(__fn: _DocumentsFlowCallable[FO_contra], /) -> _FlowLike[FO_contra]: ...
|
|
@@ -333,22 +475,124 @@ def pipeline_flow(
|
|
|
333
475
|
on_crashed: list[FlowStateHook[Any, Any]] | None = None,
|
|
334
476
|
on_running: list[FlowStateHook[Any, Any]] | None = None,
|
|
335
477
|
) -> _FlowLike[FO_contra] | Callable[[_DocumentsFlowCallable[FO_contra]], _FlowLike[FO_contra]]:
|
|
336
|
-
"""Decorate an
|
|
478
|
+
"""Decorate an async flow for document processing.
|
|
337
479
|
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
flow_options: FlowOptions, # or any subclass
|
|
343
|
-
*args,
|
|
344
|
-
**kwargs
|
|
345
|
-
) -> DocumentList
|
|
480
|
+
@public
|
|
481
|
+
|
|
482
|
+
Wraps an async function as a Prefect flow with tracing and type safety.
|
|
483
|
+
The decorated function MUST be async and follow the required signature.
|
|
346
484
|
|
|
347
|
-
|
|
485
|
+
Best Practice - Use Defaults:
|
|
486
|
+
For 90% of use cases, use this decorator WITHOUT any parameters.
|
|
487
|
+
Only specify parameters when you have EXPLICIT requirements.
|
|
488
|
+
|
|
489
|
+
Required function signature:
|
|
490
|
+
async def flow_fn(
|
|
491
|
+
project_name: str, # Project/pipeline identifier
|
|
492
|
+
documents: DocumentList, # Input documents to process
|
|
493
|
+
flow_options: FlowOptions, # Configuration (or subclass)
|
|
494
|
+
*args, # Additional positional args for custom parameters
|
|
495
|
+
**kwargs # Additional keyword args for custom parameters
|
|
496
|
+
) -> DocumentList # Must return DocumentList
|
|
497
|
+
|
|
498
|
+
Note: *args and **kwargs allow for defining custom parameters on your flow
|
|
499
|
+
function, which can be passed during execution for flow-specific needs.
|
|
500
|
+
|
|
501
|
+
Args:
|
|
502
|
+
__fn: Function to decorate (when used without parentheses).
|
|
503
|
+
|
|
504
|
+
Tracing parameters:
|
|
505
|
+
trace_level: When to trace ("always", "debug", "off").
|
|
506
|
+
- "always": Always trace (default)
|
|
507
|
+
- "debug": Only trace when LMNR_DEBUG="true"
|
|
508
|
+
- "off": Disable tracing
|
|
509
|
+
trace_ignore_input: Don't trace input arguments.
|
|
510
|
+
trace_ignore_output: Don't trace return value.
|
|
511
|
+
trace_ignore_inputs: Parameter names to exclude from tracing.
|
|
512
|
+
trace_input_formatter: Custom input formatter.
|
|
513
|
+
trace_output_formatter: Custom output formatter.
|
|
514
|
+
|
|
515
|
+
Prefect flow parameters:
|
|
516
|
+
name: Flow name (defaults to function name).
|
|
517
|
+
version: Flow version identifier.
|
|
518
|
+
flow_run_name: Static or dynamic run name.
|
|
519
|
+
retries: Number of flow retry attempts (default 0).
|
|
520
|
+
retry_delay_seconds: Delay between flow retries.
|
|
521
|
+
task_runner: Task execution strategy (sequential/concurrent).
|
|
522
|
+
description: Human-readable flow description.
|
|
523
|
+
timeout_seconds: Flow execution timeout.
|
|
524
|
+
validate_parameters: Validate input parameters.
|
|
525
|
+
persist_result: Persist flow results.
|
|
526
|
+
result_storage: Where to store results.
|
|
527
|
+
result_serializer: How to serialize results.
|
|
528
|
+
cache_result_in_memory: Keep results in memory.
|
|
529
|
+
log_prints: Capture print() statements.
|
|
530
|
+
on_completion: Hooks for successful completion.
|
|
531
|
+
on_failure: Hooks for flow failure.
|
|
532
|
+
on_cancellation: Hooks for flow cancellation.
|
|
533
|
+
on_crashed: Hooks for flow crashes.
|
|
534
|
+
on_running: Hooks for flow start.
|
|
535
|
+
|
|
536
|
+
Returns:
|
|
537
|
+
Decorated flow callable that maintains Prefect flow interface
|
|
538
|
+
while enforcing document processing conventions.
|
|
539
|
+
|
|
540
|
+
Example:
|
|
541
|
+
>>> from ai_pipeline_core import FlowOptions
|
|
542
|
+
>>>
|
|
543
|
+
>>> # RECOMMENDED - No parameters needed!
|
|
544
|
+
>>> @pipeline_flow
|
|
545
|
+
>>> async def analyze_documents(
|
|
546
|
+
... project_name: str,
|
|
547
|
+
... documents: DocumentList,
|
|
548
|
+
... flow_options: FlowOptions
|
|
549
|
+
>>> ) -> DocumentList:
|
|
550
|
+
... # Process each document
|
|
551
|
+
... results = []
|
|
552
|
+
... for doc in documents:
|
|
553
|
+
... result = await process(doc)
|
|
554
|
+
... results.append(result)
|
|
555
|
+
... return DocumentList(results)
|
|
556
|
+
>>>
|
|
557
|
+
>>> # With parameters (only when necessary):
|
|
558
|
+
>>> @pipeline_flow(retries=2) # Only for flows that need retry logic
|
|
559
|
+
>>> async def critical_flow(
|
|
560
|
+
... project_name: str,
|
|
561
|
+
... documents: DocumentList,
|
|
562
|
+
... flow_options: FlowOptions
|
|
563
|
+
>>> ) -> DocumentList:
|
|
564
|
+
... # Critical processing that might fail
|
|
565
|
+
... return await process_critical(documents)
|
|
566
|
+
>>>
|
|
567
|
+
>>> # AVOID specifying defaults - they're already optimal:
|
|
568
|
+
>>> # - Automatic flow naming
|
|
569
|
+
>>> # - Standard retry policy
|
|
570
|
+
>>> # - Full observability
|
|
571
|
+
|
|
572
|
+
Note:
|
|
573
|
+
- Flow is wrapped with both Prefect and LMNR tracing
|
|
574
|
+
- Return type is validated at runtime
|
|
575
|
+
- FlowOptions can be subclassed for custom configuration
|
|
576
|
+
- All Prefect flow methods (.serve(), .deploy()) are available
|
|
577
|
+
|
|
578
|
+
See Also:
|
|
579
|
+
- pipeline_task: For task-level decoration
|
|
580
|
+
- FlowConfig: Type-safe flow configuration
|
|
581
|
+
- FlowOptions: Base class for flow options
|
|
582
|
+
- simple_runner.run_pipeline: Execute flows locally
|
|
348
583
|
"""
|
|
349
584
|
flow_decorator: Callable[..., Any] = _prefect_flow
|
|
350
585
|
|
|
351
586
|
def _apply(fn: _DocumentsFlowCallable[FO_contra]) -> _FlowLike[FO_contra]:
|
|
587
|
+
"""Apply pipeline_flow decorator to flow function.
|
|
588
|
+
|
|
589
|
+
Returns:
|
|
590
|
+
Wrapped flow with tracing and Prefect functionality.
|
|
591
|
+
|
|
592
|
+
Raises:
|
|
593
|
+
TypeError: If function is not async, doesn't have required
|
|
594
|
+
parameters, or doesn't return DocumentList.
|
|
595
|
+
"""
|
|
352
596
|
fname = _callable_name(fn, "flow")
|
|
353
597
|
|
|
354
598
|
if not inspect.iscoroutinefunction(fn):
|
|
@@ -373,6 +617,10 @@ def pipeline_flow(
|
|
|
373
617
|
)
|
|
374
618
|
return result
|
|
375
619
|
|
|
620
|
+
# Preserve the original function name for Prefect
|
|
621
|
+
_wrapper.__name__ = fname
|
|
622
|
+
_wrapper.__qualname__ = getattr(fn, "__qualname__", fname)
|
|
623
|
+
|
|
376
624
|
traced = trace(
|
|
377
625
|
level=trace_level,
|
|
378
626
|
name=name or fname,
|
|
@@ -386,9 +634,9 @@ def pipeline_flow(
|
|
|
386
634
|
return cast(
|
|
387
635
|
_FlowLike[FO_contra],
|
|
388
636
|
flow_decorator(
|
|
389
|
-
name=name,
|
|
637
|
+
name=name or fname,
|
|
390
638
|
version=version,
|
|
391
|
-
flow_run_name=flow_run_name,
|
|
639
|
+
flow_run_name=flow_run_name or name or fname,
|
|
392
640
|
retries=0 if retries is None else retries,
|
|
393
641
|
retry_delay_seconds=retry_delay_seconds,
|
|
394
642
|
task_runner=task_runner,
|
ai_pipeline_core/prefect.py
CHANGED
|
@@ -1,4 +1,51 @@
|
|
|
1
|
-
"""Prefect core features.
|
|
1
|
+
"""Prefect core features for pipeline orchestration.
|
|
2
|
+
|
|
3
|
+
This module provides clean re-exports of essential Prefect functionality.
|
|
4
|
+
|
|
5
|
+
IMPORTANT: You should NEVER use the `task` and `flow` decorators directly
|
|
6
|
+
unless it is 100% impossible to use `pipeline_task` and `pipeline_flow`.
|
|
7
|
+
The standard Prefect decorators are exported here only for extremely
|
|
8
|
+
limited edge cases where the pipeline decorators cannot be used.
|
|
9
|
+
|
|
10
|
+
Always prefer:
|
|
11
|
+
>>> from ai_pipeline_core import pipeline_task, pipeline_flow
|
|
12
|
+
>>>
|
|
13
|
+
>>> @pipeline_task
|
|
14
|
+
>>> async def my_task(...): ...
|
|
15
|
+
>>>
|
|
16
|
+
>>> @pipeline_flow
|
|
17
|
+
>>> async def my_flow(...): ...
|
|
18
|
+
|
|
19
|
+
The `task` and `flow` decorators should only be used when:
|
|
20
|
+
- You absolutely cannot convert to async (pipeline decorators require async)
|
|
21
|
+
- You have a very specific Prefect integration that conflicts with tracing
|
|
22
|
+
- You are writing test utilities or infrastructure code
|
|
23
|
+
|
|
24
|
+
Exported components:
|
|
25
|
+
task: Prefect task decorator (AVOID - use pipeline_task instead).
|
|
26
|
+
flow: Prefect flow decorator (AVOID - use pipeline_flow instead).
|
|
27
|
+
disable_run_logger: Context manager to suppress Prefect logging.
|
|
28
|
+
prefect_test_harness: Test harness for unit testing flows/tasks.
|
|
29
|
+
|
|
30
|
+
Testing utilities (use as fixtures):
|
|
31
|
+
The disable_run_logger and prefect_test_harness should be used as
|
|
32
|
+
pytest fixtures as shown in tests/conftest.py:
|
|
33
|
+
|
|
34
|
+
>>> @pytest.fixture(autouse=True, scope="session")
|
|
35
|
+
>>> def prefect_test_fixture():
|
|
36
|
+
... with prefect_test_harness():
|
|
37
|
+
... yield
|
|
38
|
+
>>>
|
|
39
|
+
>>> @pytest.fixture(autouse=True)
|
|
40
|
+
>>> def disable_prefect_logging():
|
|
41
|
+
... with disable_run_logger():
|
|
42
|
+
... yield
|
|
43
|
+
|
|
44
|
+
Note:
|
|
45
|
+
The pipeline_task and pipeline_flow decorators from
|
|
46
|
+
ai_pipeline_core.pipeline provide async-only execution with
|
|
47
|
+
integrated LMNR tracing and are the standard for this library.
|
|
48
|
+
"""
|
|
2
49
|
|
|
3
50
|
from prefect import flow, task
|
|
4
51
|
from prefect.logging import disable_run_logger
|