ai-pipeline-core 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +70 -144
- ai_pipeline_core/deployment/__init__.py +6 -18
- ai_pipeline_core/deployment/base.py +392 -212
- ai_pipeline_core/deployment/contract.py +6 -10
- ai_pipeline_core/{utils → deployment}/deploy.py +50 -69
- ai_pipeline_core/deployment/helpers.py +16 -17
- ai_pipeline_core/{progress.py → deployment/progress.py} +23 -24
- ai_pipeline_core/{utils/remote_deployment.py → deployment/remote.py} +11 -14
- ai_pipeline_core/docs_generator/__init__.py +54 -0
- ai_pipeline_core/docs_generator/__main__.py +5 -0
- ai_pipeline_core/docs_generator/cli.py +196 -0
- ai_pipeline_core/docs_generator/extractor.py +324 -0
- ai_pipeline_core/docs_generator/guide_builder.py +644 -0
- ai_pipeline_core/docs_generator/trimmer.py +35 -0
- ai_pipeline_core/docs_generator/validator.py +114 -0
- ai_pipeline_core/document_store/__init__.py +13 -0
- ai_pipeline_core/document_store/_summary.py +9 -0
- ai_pipeline_core/document_store/_summary_worker.py +170 -0
- ai_pipeline_core/document_store/clickhouse.py +492 -0
- ai_pipeline_core/document_store/factory.py +38 -0
- ai_pipeline_core/document_store/local.py +312 -0
- ai_pipeline_core/document_store/memory.py +85 -0
- ai_pipeline_core/document_store/protocol.py +68 -0
- ai_pipeline_core/documents/__init__.py +12 -14
- ai_pipeline_core/documents/_context_vars.py +85 -0
- ai_pipeline_core/documents/_hashing.py +52 -0
- ai_pipeline_core/documents/attachment.py +85 -0
- ai_pipeline_core/documents/context.py +128 -0
- ai_pipeline_core/documents/document.py +318 -1434
- ai_pipeline_core/documents/mime_type.py +37 -82
- ai_pipeline_core/documents/utils.py +4 -12
- ai_pipeline_core/exceptions.py +10 -62
- ai_pipeline_core/images/__init__.py +32 -85
- ai_pipeline_core/images/_processing.py +5 -11
- ai_pipeline_core/llm/__init__.py +6 -4
- ai_pipeline_core/llm/ai_messages.py +106 -81
- ai_pipeline_core/llm/client.py +267 -158
- ai_pipeline_core/llm/model_options.py +12 -84
- ai_pipeline_core/llm/model_response.py +53 -99
- ai_pipeline_core/llm/model_types.py +8 -23
- ai_pipeline_core/logging/__init__.py +2 -7
- ai_pipeline_core/logging/logging.yml +1 -1
- ai_pipeline_core/logging/logging_config.py +27 -37
- ai_pipeline_core/logging/logging_mixin.py +15 -41
- ai_pipeline_core/observability/__init__.py +32 -0
- ai_pipeline_core/observability/_debug/__init__.py +30 -0
- ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
- ai_pipeline_core/{debug/config.py → observability/_debug/_config.py} +11 -7
- ai_pipeline_core/{debug/content.py → observability/_debug/_content.py} +134 -75
- ai_pipeline_core/{debug/processor.py → observability/_debug/_processor.py} +16 -17
- ai_pipeline_core/{debug/summary.py → observability/_debug/_summary.py} +113 -37
- ai_pipeline_core/observability/_debug/_types.py +75 -0
- ai_pipeline_core/{debug/writer.py → observability/_debug/_writer.py} +126 -196
- ai_pipeline_core/observability/_document_tracking.py +146 -0
- ai_pipeline_core/observability/_initialization.py +194 -0
- ai_pipeline_core/observability/_logging_bridge.py +57 -0
- ai_pipeline_core/observability/_summary.py +81 -0
- ai_pipeline_core/observability/_tracking/__init__.py +6 -0
- ai_pipeline_core/observability/_tracking/_client.py +178 -0
- ai_pipeline_core/observability/_tracking/_internal.py +28 -0
- ai_pipeline_core/observability/_tracking/_models.py +138 -0
- ai_pipeline_core/observability/_tracking/_processor.py +158 -0
- ai_pipeline_core/observability/_tracking/_service.py +311 -0
- ai_pipeline_core/observability/_tracking/_writer.py +229 -0
- ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -335
- ai_pipeline_core/pipeline/__init__.py +10 -0
- ai_pipeline_core/pipeline/decorators.py +915 -0
- ai_pipeline_core/pipeline/options.py +16 -0
- ai_pipeline_core/prompt_manager.py +16 -102
- ai_pipeline_core/settings.py +26 -31
- ai_pipeline_core/testing.py +9 -0
- ai_pipeline_core-0.4.0.dist-info/METADATA +807 -0
- ai_pipeline_core-0.4.0.dist-info/RECORD +76 -0
- ai_pipeline_core/debug/__init__.py +0 -26
- ai_pipeline_core/documents/document_list.py +0 -420
- ai_pipeline_core/documents/flow_document.py +0 -112
- ai_pipeline_core/documents/task_document.py +0 -117
- ai_pipeline_core/documents/temporary_document.py +0 -74
- ai_pipeline_core/flow/__init__.py +0 -9
- ai_pipeline_core/flow/config.py +0 -494
- ai_pipeline_core/flow/options.py +0 -75
- ai_pipeline_core/pipeline.py +0 -718
- ai_pipeline_core/prefect.py +0 -63
- ai_pipeline_core/prompt_builder/__init__.py +0 -5
- ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +0 -23
- ai_pipeline_core/prompt_builder/global_cache.py +0 -78
- ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +0 -6
- ai_pipeline_core/prompt_builder/prompt_builder.py +0 -253
- ai_pipeline_core/prompt_builder/system_prompt.jinja2 +0 -41
- ai_pipeline_core/storage/__init__.py +0 -8
- ai_pipeline_core/storage/storage.py +0 -628
- ai_pipeline_core/utils/__init__.py +0 -8
- ai_pipeline_core-0.3.3.dist-info/METADATA +0 -569
- ai_pipeline_core-0.3.3.dist-info/RECORD +0 -57
- {ai_pipeline_core-0.3.3.dist-info → ai_pipeline_core-0.4.0.dist-info}/WHEEL +0 -0
- {ai_pipeline_core-0.3.3.dist-info → ai_pipeline_core-0.4.0.dist-info}/licenses/LICENSE +0 -0
ai_pipeline_core/pipeline.py
DELETED
|
@@ -1,718 +0,0 @@
|
|
|
1
|
-
"""Pipeline decorators with Prefect integration and tracing.
|
|
2
|
-
|
|
3
|
-
@public
|
|
4
|
-
|
|
5
|
-
Wrappers around Prefect's @task and @flow that add Laminar tracing
|
|
6
|
-
and enforce async-only execution for consistency.
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
from __future__ import annotations
|
|
10
|
-
|
|
11
|
-
import datetime
|
|
12
|
-
import inspect
|
|
13
|
-
from functools import wraps
|
|
14
|
-
from typing import (
|
|
15
|
-
Any,
|
|
16
|
-
Callable,
|
|
17
|
-
Coroutine,
|
|
18
|
-
Iterable,
|
|
19
|
-
Protocol,
|
|
20
|
-
TypeVar,
|
|
21
|
-
Union,
|
|
22
|
-
cast,
|
|
23
|
-
overload,
|
|
24
|
-
)
|
|
25
|
-
|
|
26
|
-
from prefect.assets import Asset
|
|
27
|
-
from prefect.cache_policies import CachePolicy
|
|
28
|
-
from prefect.context import TaskRunContext
|
|
29
|
-
from prefect.flows import FlowStateHook
|
|
30
|
-
from prefect.flows import flow as _prefect_flow # public import
|
|
31
|
-
from prefect.futures import PrefectFuture
|
|
32
|
-
from prefect.results import ResultSerializer, ResultStorage
|
|
33
|
-
from prefect.task_runners import TaskRunner
|
|
34
|
-
from prefect.tasks import task as _prefect_task # public import
|
|
35
|
-
from prefect.utilities.annotations import NotSet
|
|
36
|
-
from typing_extensions import TypeAlias
|
|
37
|
-
|
|
38
|
-
from ai_pipeline_core.documents import DocumentList
|
|
39
|
-
from ai_pipeline_core.flow.config import FlowConfig
|
|
40
|
-
from ai_pipeline_core.flow.options import FlowOptions
|
|
41
|
-
from ai_pipeline_core.tracing import TraceLevel, set_trace_cost, trace
|
|
42
|
-
|
|
43
|
-
# --------------------------------------------------------------------------- #
|
|
44
|
-
# Public callback aliases (Prefect stubs omit these exact types)
|
|
45
|
-
# --------------------------------------------------------------------------- #
|
|
46
|
-
RetryConditionCallable: TypeAlias = Callable[[Any, Any, Any], bool]
|
|
47
|
-
StateHookCallable: TypeAlias = Callable[[Any, Any, Any], None]
|
|
48
|
-
TaskRunNameValueOrCallable: TypeAlias = Union[str, Callable[[], str]]
|
|
49
|
-
|
|
50
|
-
# --------------------------------------------------------------------------- #
|
|
51
|
-
# Typing helpers
|
|
52
|
-
# --------------------------------------------------------------------------- #
|
|
53
|
-
R_co = TypeVar("R_co", covariant=True)
|
|
54
|
-
FO_contra = TypeVar("FO_contra", bound=FlowOptions, contravariant=True)
|
|
55
|
-
"""Flow options are an *input* type, so contravariant fits the callable model."""
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
class _TaskLike(Protocol[R_co]):
|
|
59
|
-
"""Protocol for type-safe Prefect task representation.
|
|
60
|
-
|
|
61
|
-
Defines the minimal interface for a Prefect task as seen by
|
|
62
|
-
type checkers. Ensures tasks are awaitable and have common
|
|
63
|
-
Prefect task methods.
|
|
64
|
-
|
|
65
|
-
Type Parameter:
|
|
66
|
-
R_co: Covariant return type of the task.
|
|
67
|
-
|
|
68
|
-
Methods:
|
|
69
|
-
__call__: Makes the task awaitable.
|
|
70
|
-
submit: Submit task for asynchronous execution.
|
|
71
|
-
map: Map task over multiple inputs.
|
|
72
|
-
|
|
73
|
-
Attributes:
|
|
74
|
-
name: Optional task name.
|
|
75
|
-
|
|
76
|
-
Note:
|
|
77
|
-
This is a typing Protocol, not a runtime class.
|
|
78
|
-
__getattr__ allows accessing Prefect-specific helpers.
|
|
79
|
-
"""
|
|
80
|
-
|
|
81
|
-
def __call__(self, *args: Any, **kwargs: Any) -> Coroutine[Any, Any, R_co]: ...
|
|
82
|
-
|
|
83
|
-
submit: Callable[..., Any]
|
|
84
|
-
map: Callable[..., Any]
|
|
85
|
-
name: str | None
|
|
86
|
-
|
|
87
|
-
def __getattr__(self, name: str) -> Any: ... # allow unknown helpers without type errors
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
class _DocumentsFlowCallable(Protocol[FO_contra]):
|
|
91
|
-
"""Protocol for user-defined flow functions.
|
|
92
|
-
|
|
93
|
-
Defines the required signature for functions that will be
|
|
94
|
-
decorated with @pipeline_flow. Enforces the standard parameters
|
|
95
|
-
for document processing flows.
|
|
96
|
-
|
|
97
|
-
Type Parameter:
|
|
98
|
-
FO_contra: Contravariant FlowOptions type (or subclass).
|
|
99
|
-
|
|
100
|
-
Required Parameters:
|
|
101
|
-
project_name: Name of the project/pipeline.
|
|
102
|
-
documents: Input DocumentList to process.
|
|
103
|
-
flow_options: Configuration options (FlowOptions or subclass).
|
|
104
|
-
|
|
105
|
-
Returns:
|
|
106
|
-
DocumentList: Processed documents.
|
|
107
|
-
|
|
108
|
-
Note:
|
|
109
|
-
Functions must be async and return DocumentList.
|
|
110
|
-
"""
|
|
111
|
-
|
|
112
|
-
def __call__(
|
|
113
|
-
self,
|
|
114
|
-
project_name: str,
|
|
115
|
-
documents: DocumentList,
|
|
116
|
-
flow_options: FO_contra,
|
|
117
|
-
) -> Coroutine[Any, Any, DocumentList]: ...
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
class _FlowLike(Protocol[FO_contra]):
|
|
121
|
-
"""Protocol for decorated flow objects returned to users.
|
|
122
|
-
|
|
123
|
-
Represents the callable object returned by @pipeline_flow,
|
|
124
|
-
which wraps the original flow function with Prefect and
|
|
125
|
-
tracing capabilities.
|
|
126
|
-
|
|
127
|
-
Type Parameter:
|
|
128
|
-
FO_contra: Contravariant FlowOptions type.
|
|
129
|
-
|
|
130
|
-
Callable Signature:
|
|
131
|
-
Same as _DocumentsFlowCallable - accepts project_name,
|
|
132
|
-
documents, flow_options, plus additional arguments.
|
|
133
|
-
|
|
134
|
-
Attributes:
|
|
135
|
-
name: Optional flow name from decorator.
|
|
136
|
-
|
|
137
|
-
Note:
|
|
138
|
-
__getattr__ provides access to all Prefect flow methods
|
|
139
|
-
without explicit typing (e.g., .serve(), .deploy()).
|
|
140
|
-
"""
|
|
141
|
-
|
|
142
|
-
def __call__(
|
|
143
|
-
self,
|
|
144
|
-
project_name: str,
|
|
145
|
-
documents: DocumentList,
|
|
146
|
-
flow_options: FO_contra,
|
|
147
|
-
) -> Coroutine[Any, Any, DocumentList]: ...
|
|
148
|
-
|
|
149
|
-
name: str | None
|
|
150
|
-
|
|
151
|
-
def __getattr__(self, name: str) -> Any: ... # allow unknown helpers without type errors
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
# --------------------------------------------------------------------------- #
|
|
155
|
-
# Small helper: safely get a callable's name without upsetting the type checker
|
|
156
|
-
# --------------------------------------------------------------------------- #
|
|
157
|
-
def _callable_name(obj: Any, fallback: str) -> str:
|
|
158
|
-
"""Safely extract callable's name for error messages.
|
|
159
|
-
|
|
160
|
-
Args:
|
|
161
|
-
obj: Any object that might have a __name__ attribute.
|
|
162
|
-
fallback: Default name if extraction fails.
|
|
163
|
-
|
|
164
|
-
Returns:
|
|
165
|
-
The callable's __name__ if available, fallback otherwise.
|
|
166
|
-
|
|
167
|
-
Note:
|
|
168
|
-
Internal helper that never raises exceptions.
|
|
169
|
-
"""
|
|
170
|
-
try:
|
|
171
|
-
n = getattr(obj, "__name__", None)
|
|
172
|
-
return n if isinstance(n, str) else fallback
|
|
173
|
-
except Exception:
|
|
174
|
-
return fallback
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
def _is_already_traced(func: Callable[..., Any]) -> bool:
|
|
178
|
-
"""Check if a function has already been wrapped by the trace decorator.
|
|
179
|
-
|
|
180
|
-
This checks both for the explicit __is_traced__ marker and walks
|
|
181
|
-
the __wrapped__ chain to detect nested trace decorations.
|
|
182
|
-
|
|
183
|
-
Args:
|
|
184
|
-
func: Function to check for existing trace decoration.
|
|
185
|
-
|
|
186
|
-
Returns:
|
|
187
|
-
True if the function is already traced, False otherwise.
|
|
188
|
-
"""
|
|
189
|
-
# Check for explicit marker
|
|
190
|
-
if hasattr(func, "__is_traced__") and func.__is_traced__: # type: ignore[attr-defined]
|
|
191
|
-
return True
|
|
192
|
-
|
|
193
|
-
# Walk the __wrapped__ chain to detect nested traces
|
|
194
|
-
current = func
|
|
195
|
-
depth = 0
|
|
196
|
-
max_depth = 10 # Prevent infinite loops
|
|
197
|
-
|
|
198
|
-
while hasattr(current, "__wrapped__") and depth < max_depth:
|
|
199
|
-
wrapped = current.__wrapped__ # type: ignore[attr-defined]
|
|
200
|
-
# Check if the wrapped function has the trace marker
|
|
201
|
-
if hasattr(wrapped, "__is_traced__") and wrapped.__is_traced__: # type: ignore[attr-defined]
|
|
202
|
-
return True
|
|
203
|
-
current = wrapped
|
|
204
|
-
depth += 1
|
|
205
|
-
|
|
206
|
-
return False
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
# --------------------------------------------------------------------------- #
|
|
210
|
-
# @pipeline_task — async-only, traced, returns Prefect's Task object
|
|
211
|
-
# --------------------------------------------------------------------------- #
|
|
212
|
-
@overload
|
|
213
|
-
def pipeline_task(__fn: Callable[..., Coroutine[Any, Any, R_co]], /) -> _TaskLike[R_co]: ...
|
|
214
|
-
@overload
|
|
215
|
-
def pipeline_task(
|
|
216
|
-
*,
|
|
217
|
-
# tracing
|
|
218
|
-
trace_level: TraceLevel = "always",
|
|
219
|
-
trace_ignore_input: bool = False,
|
|
220
|
-
trace_ignore_output: bool = False,
|
|
221
|
-
trace_ignore_inputs: list[str] | None = None,
|
|
222
|
-
trace_input_formatter: Callable[..., str] | None = None,
|
|
223
|
-
trace_output_formatter: Callable[..., str] | None = None,
|
|
224
|
-
trace_cost: float | None = None,
|
|
225
|
-
trace_trim_documents: bool = True,
|
|
226
|
-
# prefect passthrough
|
|
227
|
-
name: str | None = None,
|
|
228
|
-
description: str | None = None,
|
|
229
|
-
tags: Iterable[str] | None = None,
|
|
230
|
-
version: str | None = None,
|
|
231
|
-
cache_policy: CachePolicy | type[NotSet] = NotSet,
|
|
232
|
-
cache_key_fn: Callable[[TaskRunContext, dict[str, Any]], str | None] | None = None,
|
|
233
|
-
cache_expiration: datetime.timedelta | None = None,
|
|
234
|
-
task_run_name: TaskRunNameValueOrCallable | None = None,
|
|
235
|
-
retries: int | None = None,
|
|
236
|
-
retry_delay_seconds: int | float | list[float] | Callable[[int], list[float]] | None = None,
|
|
237
|
-
retry_jitter_factor: float | None = None,
|
|
238
|
-
persist_result: bool | None = None,
|
|
239
|
-
result_storage: ResultStorage | str | None = None,
|
|
240
|
-
result_serializer: ResultSerializer | str | None = None,
|
|
241
|
-
result_storage_key: str | None = None,
|
|
242
|
-
cache_result_in_memory: bool = True,
|
|
243
|
-
timeout_seconds: int | float | None = None,
|
|
244
|
-
log_prints: bool | None = False,
|
|
245
|
-
refresh_cache: bool | None = None,
|
|
246
|
-
on_completion: list[StateHookCallable] | None = None,
|
|
247
|
-
on_failure: list[StateHookCallable] | None = None,
|
|
248
|
-
retry_condition_fn: RetryConditionCallable | None = None,
|
|
249
|
-
viz_return_value: bool | None = None,
|
|
250
|
-
asset_deps: list[str | Asset] | None = None,
|
|
251
|
-
) -> Callable[[Callable[..., Coroutine[Any, Any, R_co]]], _TaskLike[R_co]]: ...
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
def pipeline_task(
|
|
255
|
-
__fn: Callable[..., Coroutine[Any, Any, R_co]] | None = None,
|
|
256
|
-
/,
|
|
257
|
-
*,
|
|
258
|
-
# tracing
|
|
259
|
-
trace_level: TraceLevel = "always",
|
|
260
|
-
trace_ignore_input: bool = False,
|
|
261
|
-
trace_ignore_output: bool = False,
|
|
262
|
-
trace_ignore_inputs: list[str] | None = None,
|
|
263
|
-
trace_input_formatter: Callable[..., str] | None = None,
|
|
264
|
-
trace_output_formatter: Callable[..., str] | None = None,
|
|
265
|
-
trace_cost: float | None = None,
|
|
266
|
-
trace_trim_documents: bool = True,
|
|
267
|
-
# prefect passthrough
|
|
268
|
-
name: str | None = None,
|
|
269
|
-
description: str | None = None,
|
|
270
|
-
tags: Iterable[str] | None = None,
|
|
271
|
-
version: str | None = None,
|
|
272
|
-
cache_policy: CachePolicy | type[NotSet] = NotSet,
|
|
273
|
-
cache_key_fn: Callable[[TaskRunContext, dict[str, Any]], str | None] | None = None,
|
|
274
|
-
cache_expiration: datetime.timedelta | None = None,
|
|
275
|
-
task_run_name: TaskRunNameValueOrCallable | None = None,
|
|
276
|
-
retries: int | None = None,
|
|
277
|
-
retry_delay_seconds: int | float | list[float] | Callable[[int], list[float]] | None = None,
|
|
278
|
-
retry_jitter_factor: float | None = None,
|
|
279
|
-
persist_result: bool | None = None,
|
|
280
|
-
result_storage: ResultStorage | str | None = None,
|
|
281
|
-
result_serializer: ResultSerializer | str | None = None,
|
|
282
|
-
result_storage_key: str | None = None,
|
|
283
|
-
cache_result_in_memory: bool = True,
|
|
284
|
-
timeout_seconds: int | float | None = None,
|
|
285
|
-
log_prints: bool | None = False,
|
|
286
|
-
refresh_cache: bool | None = None,
|
|
287
|
-
on_completion: list[StateHookCallable] | None = None,
|
|
288
|
-
on_failure: list[StateHookCallable] | None = None,
|
|
289
|
-
retry_condition_fn: RetryConditionCallable | None = None,
|
|
290
|
-
viz_return_value: bool | None = None,
|
|
291
|
-
asset_deps: list[str | Asset] | None = None,
|
|
292
|
-
) -> _TaskLike[R_co] | Callable[[Callable[..., Coroutine[Any, Any, R_co]]], _TaskLike[R_co]]:
|
|
293
|
-
"""Decorate an async function as a traced Prefect task.
|
|
294
|
-
|
|
295
|
-
@public
|
|
296
|
-
|
|
297
|
-
Wraps an async function with both Prefect task functionality and
|
|
298
|
-
LMNR tracing. The function MUST be async (declared with 'async def').
|
|
299
|
-
|
|
300
|
-
IMPORTANT: Never combine with @trace decorator - this includes tracing automatically.
|
|
301
|
-
The framework will raise TypeError if you try to use both decorators together.
|
|
302
|
-
|
|
303
|
-
Best Practice - Use Defaults:
|
|
304
|
-
For 90% of use cases, use this decorator WITHOUT any parameters.
|
|
305
|
-
Only specify parameters when you have EXPLICIT requirements.
|
|
306
|
-
|
|
307
|
-
Args:
|
|
308
|
-
__fn: Function to decorate (when used without parentheses).
|
|
309
|
-
trace_level: When to trace ("always", "debug", "off").
|
|
310
|
-
- "always": Always trace (default)
|
|
311
|
-
- "debug": Only trace when LMNR_DEBUG="true"
|
|
312
|
-
- "off": Disable tracing
|
|
313
|
-
trace_ignore_input: Don't trace input arguments.
|
|
314
|
-
trace_ignore_output: Don't trace return value.
|
|
315
|
-
trace_ignore_inputs: List of parameter names to exclude from tracing.
|
|
316
|
-
trace_input_formatter: Custom formatter for input tracing.
|
|
317
|
-
trace_output_formatter: Custom formatter for output tracing.
|
|
318
|
-
trace_cost: Optional cost value to track in metadata. When provided and > 0,
|
|
319
|
-
sets gen_ai.usage.output_cost, gen_ai.usage.cost, and cost metadata.
|
|
320
|
-
Also forces trace level to "always" if not already set.
|
|
321
|
-
trace_trim_documents: Trim document content in traces to first 100 chars (default True).
|
|
322
|
-
Reduces trace size with large documents.
|
|
323
|
-
name: Task name (defaults to function name).
|
|
324
|
-
description: Human-readable task description.
|
|
325
|
-
tags: Tags for organization and filtering.
|
|
326
|
-
version: Task version string.
|
|
327
|
-
cache_policy: Caching policy for task results.
|
|
328
|
-
cache_key_fn: Custom cache key generation.
|
|
329
|
-
cache_expiration: How long to cache results.
|
|
330
|
-
task_run_name: Dynamic or static run name.
|
|
331
|
-
retries: Number of retry attempts (default 0).
|
|
332
|
-
retry_delay_seconds: Delay between retries.
|
|
333
|
-
retry_jitter_factor: Random jitter for retry delays.
|
|
334
|
-
persist_result: Whether to persist results.
|
|
335
|
-
result_storage: Where to store results.
|
|
336
|
-
result_serializer: How to serialize results.
|
|
337
|
-
result_storage_key: Custom storage key.
|
|
338
|
-
cache_result_in_memory: Keep results in memory.
|
|
339
|
-
timeout_seconds: Task execution timeout.
|
|
340
|
-
log_prints: Capture print() statements.
|
|
341
|
-
refresh_cache: Force cache refresh.
|
|
342
|
-
on_completion: Hooks for successful completion.
|
|
343
|
-
on_failure: Hooks for task failure.
|
|
344
|
-
retry_condition_fn: Custom retry condition.
|
|
345
|
-
viz_return_value: Include return value in visualization.
|
|
346
|
-
asset_deps: Upstream asset dependencies.
|
|
347
|
-
|
|
348
|
-
Returns:
|
|
349
|
-
Decorated task callable that is awaitable and has Prefect
|
|
350
|
-
task methods (submit, map, etc.).
|
|
351
|
-
|
|
352
|
-
Example:
|
|
353
|
-
>>> # RECOMMENDED - No parameters needed!
|
|
354
|
-
>>> @pipeline_task
|
|
355
|
-
>>> async def process_document(doc: Document) -> Document:
|
|
356
|
-
... result = await analyze(doc)
|
|
357
|
-
... return result
|
|
358
|
-
>>>
|
|
359
|
-
>>> # With parameters (only when necessary):
|
|
360
|
-
>>> @pipeline_task(retries=5) # Only for known flaky operations
|
|
361
|
-
>>> async def unreliable_api_call(url: str) -> dict:
|
|
362
|
-
... # This API fails often, needs extra retries
|
|
363
|
-
... return await fetch_with_retry(url)
|
|
364
|
-
>>>
|
|
365
|
-
>>> # AVOID specifying defaults - they're already optimal:
|
|
366
|
-
>>> # - Automatic task naming
|
|
367
|
-
>>> # - Standard retry policy
|
|
368
|
-
>>> # - Sensible timeout
|
|
369
|
-
>>> # - Full observability
|
|
370
|
-
|
|
371
|
-
Performance:
|
|
372
|
-
- Task decoration overhead: ~1-2ms
|
|
373
|
-
- Tracing overhead: ~1-2ms per call
|
|
374
|
-
- Prefect state tracking: ~5-10ms
|
|
375
|
-
|
|
376
|
-
Note:
|
|
377
|
-
Tasks are automatically traced with LMNR and appear in
|
|
378
|
-
both Prefect and LMNR dashboards.
|
|
379
|
-
|
|
380
|
-
See Also:
|
|
381
|
-
- pipeline_flow: For flow-level decoration
|
|
382
|
-
- trace: Lower-level tracing decorator
|
|
383
|
-
- prefect.task: Standard Prefect task (no tracing)
|
|
384
|
-
"""
|
|
385
|
-
task_decorator: Callable[..., Any] = _prefect_task # helps the type checker
|
|
386
|
-
|
|
387
|
-
def _apply(fn: Callable[..., Coroutine[Any, Any, R_co]]) -> _TaskLike[R_co]:
|
|
388
|
-
"""Apply pipeline_task decorator to async function.
|
|
389
|
-
|
|
390
|
-
Returns:
|
|
391
|
-
Wrapped task with tracing and Prefect functionality.
|
|
392
|
-
|
|
393
|
-
Raises:
|
|
394
|
-
TypeError: If function is not async or already traced.
|
|
395
|
-
"""
|
|
396
|
-
if not inspect.iscoroutinefunction(fn):
|
|
397
|
-
raise TypeError(
|
|
398
|
-
f"@pipeline_task target '{_callable_name(fn, 'task')}' must be 'async def'"
|
|
399
|
-
)
|
|
400
|
-
|
|
401
|
-
# Check if function is already traced
|
|
402
|
-
if _is_already_traced(fn):
|
|
403
|
-
raise TypeError(
|
|
404
|
-
f"@pipeline_task target '{_callable_name(fn, 'task')}' is already decorated "
|
|
405
|
-
f"with @trace. Remove the @trace decorator - @pipeline_task includes "
|
|
406
|
-
f"tracing automatically."
|
|
407
|
-
)
|
|
408
|
-
|
|
409
|
-
fname = _callable_name(fn, "task")
|
|
410
|
-
|
|
411
|
-
# Create wrapper to handle trace_cost if provided
|
|
412
|
-
@wraps(fn)
|
|
413
|
-
async def _wrapper(*args: Any, **kwargs: Any) -> R_co:
|
|
414
|
-
result = await fn(*args, **kwargs)
|
|
415
|
-
if trace_cost is not None and trace_cost > 0:
|
|
416
|
-
set_trace_cost(trace_cost)
|
|
417
|
-
return result
|
|
418
|
-
|
|
419
|
-
traced_fn = trace(
|
|
420
|
-
level=trace_level,
|
|
421
|
-
name=name or fname,
|
|
422
|
-
ignore_input=trace_ignore_input,
|
|
423
|
-
ignore_output=trace_ignore_output,
|
|
424
|
-
ignore_inputs=trace_ignore_inputs,
|
|
425
|
-
input_formatter=trace_input_formatter,
|
|
426
|
-
output_formatter=trace_output_formatter,
|
|
427
|
-
trim_documents=trace_trim_documents,
|
|
428
|
-
)(_wrapper)
|
|
429
|
-
|
|
430
|
-
return cast(
|
|
431
|
-
_TaskLike[R_co],
|
|
432
|
-
task_decorator(
|
|
433
|
-
name=name or fname,
|
|
434
|
-
description=description,
|
|
435
|
-
tags=tags,
|
|
436
|
-
version=version,
|
|
437
|
-
cache_policy=cache_policy,
|
|
438
|
-
cache_key_fn=cache_key_fn,
|
|
439
|
-
cache_expiration=cache_expiration,
|
|
440
|
-
task_run_name=task_run_name or name or fname,
|
|
441
|
-
retries=0 if retries is None else retries,
|
|
442
|
-
retry_delay_seconds=retry_delay_seconds,
|
|
443
|
-
retry_jitter_factor=retry_jitter_factor,
|
|
444
|
-
persist_result=persist_result,
|
|
445
|
-
result_storage=result_storage,
|
|
446
|
-
result_serializer=result_serializer,
|
|
447
|
-
result_storage_key=result_storage_key,
|
|
448
|
-
cache_result_in_memory=cache_result_in_memory,
|
|
449
|
-
timeout_seconds=timeout_seconds,
|
|
450
|
-
log_prints=log_prints,
|
|
451
|
-
refresh_cache=refresh_cache,
|
|
452
|
-
on_completion=on_completion,
|
|
453
|
-
on_failure=on_failure,
|
|
454
|
-
retry_condition_fn=retry_condition_fn,
|
|
455
|
-
viz_return_value=viz_return_value,
|
|
456
|
-
asset_deps=asset_deps,
|
|
457
|
-
)(traced_fn),
|
|
458
|
-
)
|
|
459
|
-
|
|
460
|
-
return _apply(__fn) if __fn else _apply
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
# --------------------------------------------------------------------------- #
|
|
464
|
-
# @pipeline_flow — async-only, traced, returns Prefect's flow wrapper
|
|
465
|
-
# --------------------------------------------------------------------------- #
|
|
466
|
-
def pipeline_flow(
|
|
467
|
-
*,
|
|
468
|
-
# config
|
|
469
|
-
config: type[FlowConfig],
|
|
470
|
-
# tracing
|
|
471
|
-
trace_level: TraceLevel = "always",
|
|
472
|
-
trace_ignore_input: bool = False,
|
|
473
|
-
trace_ignore_output: bool = False,
|
|
474
|
-
trace_ignore_inputs: list[str] | None = None,
|
|
475
|
-
trace_input_formatter: Callable[..., str] | None = None,
|
|
476
|
-
trace_output_formatter: Callable[..., str] | None = None,
|
|
477
|
-
trace_cost: float | None = None,
|
|
478
|
-
trace_trim_documents: bool = True,
|
|
479
|
-
# prefect passthrough
|
|
480
|
-
name: str | None = None,
|
|
481
|
-
version: str | None = None,
|
|
482
|
-
flow_run_name: Union[Callable[[], str], str] | None = None,
|
|
483
|
-
retries: int | None = None,
|
|
484
|
-
retry_delay_seconds: int | float | None = None,
|
|
485
|
-
task_runner: TaskRunner[PrefectFuture[Any]] | None = None,
|
|
486
|
-
description: str | None = None,
|
|
487
|
-
timeout_seconds: int | float | None = None,
|
|
488
|
-
validate_parameters: bool = True,
|
|
489
|
-
persist_result: bool | None = None,
|
|
490
|
-
result_storage: ResultStorage | str | None = None,
|
|
491
|
-
result_serializer: ResultSerializer | str | None = None,
|
|
492
|
-
cache_result_in_memory: bool = True,
|
|
493
|
-
log_prints: bool | None = None,
|
|
494
|
-
on_completion: list[FlowStateHook[Any, Any]] | None = None,
|
|
495
|
-
on_failure: list[FlowStateHook[Any, Any]] | None = None,
|
|
496
|
-
on_cancellation: list[FlowStateHook[Any, Any]] | None = None,
|
|
497
|
-
on_crashed: list[FlowStateHook[Any, Any]] | None = None,
|
|
498
|
-
on_running: list[FlowStateHook[Any, Any]] | None = None,
|
|
499
|
-
) -> Callable[[_DocumentsFlowCallable[FO_contra]], _FlowLike[FO_contra]]:
|
|
500
|
-
"""Decorate an async flow for document processing.
|
|
501
|
-
|
|
502
|
-
@public
|
|
503
|
-
|
|
504
|
-
Wraps an async function as a Prefect flow with tracing and type safety.
|
|
505
|
-
The decorated function MUST be async and follow the required signature.
|
|
506
|
-
|
|
507
|
-
IMPORTANT: Never combine with @trace decorator - this includes tracing automatically.
|
|
508
|
-
The framework will raise TypeError if you try to use both decorators together.
|
|
509
|
-
|
|
510
|
-
Best Practice - Use Defaults:
|
|
511
|
-
For 90% of use cases, use this decorator WITHOUT any parameters.
|
|
512
|
-
Only specify parameters when you have EXPLICIT requirements.
|
|
513
|
-
|
|
514
|
-
Required function signature:
|
|
515
|
-
async def flow_fn(
|
|
516
|
-
project_name: str, # Project/pipeline identifier
|
|
517
|
-
documents: DocumentList, # Input documents to process
|
|
518
|
-
flow_options: FlowOptions, # Configuration (or subclass)
|
|
519
|
-
) -> DocumentList # Must return DocumentList
|
|
520
|
-
|
|
521
|
-
Args:
|
|
522
|
-
config: Required FlowConfig class for document loading/saving. Enables
|
|
523
|
-
automatic loading from string paths and saving outputs.
|
|
524
|
-
trace_level: When to trace ("always", "debug", "off").
|
|
525
|
-
- "always": Always trace (default)
|
|
526
|
-
- "debug": Only trace when LMNR_DEBUG="true"
|
|
527
|
-
- "off": Disable tracing
|
|
528
|
-
trace_ignore_input: Don't trace input arguments.
|
|
529
|
-
trace_ignore_output: Don't trace return value.
|
|
530
|
-
trace_ignore_inputs: Parameter names to exclude from tracing.
|
|
531
|
-
trace_input_formatter: Custom input formatter.
|
|
532
|
-
trace_output_formatter: Custom output formatter.
|
|
533
|
-
trace_cost: Optional cost value to track in metadata. When provided and > 0,
|
|
534
|
-
sets gen_ai.usage.output_cost, gen_ai.usage.cost, and cost metadata.
|
|
535
|
-
Also forces trace level to "always" if not already set.
|
|
536
|
-
trace_trim_documents: Trim document content in traces to first 100 chars (default True).
|
|
537
|
-
Reduces trace size with large documents.
|
|
538
|
-
name: Flow name (defaults to function name).
|
|
539
|
-
version: Flow version identifier.
|
|
540
|
-
flow_run_name: Static or dynamic run name.
|
|
541
|
-
retries: Number of flow retry attempts (default 0).
|
|
542
|
-
retry_delay_seconds: Delay between flow retries.
|
|
543
|
-
task_runner: Task execution strategy (sequential/concurrent).
|
|
544
|
-
description: Human-readable flow description.
|
|
545
|
-
timeout_seconds: Flow execution timeout.
|
|
546
|
-
validate_parameters: Validate input parameters.
|
|
547
|
-
persist_result: Persist flow results.
|
|
548
|
-
result_storage: Where to store results.
|
|
549
|
-
result_serializer: How to serialize results.
|
|
550
|
-
cache_result_in_memory: Keep results in memory.
|
|
551
|
-
log_prints: Capture print() statements.
|
|
552
|
-
on_completion: Hooks for successful completion.
|
|
553
|
-
on_failure: Hooks for flow failure.
|
|
554
|
-
on_cancellation: Hooks for flow cancellation.
|
|
555
|
-
on_crashed: Hooks for flow crashes.
|
|
556
|
-
on_running: Hooks for flow start.
|
|
557
|
-
|
|
558
|
-
Returns:
|
|
559
|
-
Decorated flow callable that maintains Prefect flow interface
|
|
560
|
-
while enforcing document processing conventions.
|
|
561
|
-
|
|
562
|
-
Example:
|
|
563
|
-
>>> from ai_pipeline_core import FlowOptions, FlowConfig
|
|
564
|
-
>>>
|
|
565
|
-
>>> class MyFlowConfig(FlowConfig):
|
|
566
|
-
... INPUT_DOCUMENT_TYPES = [InputDoc]
|
|
567
|
-
... OUTPUT_DOCUMENT_TYPE = OutputDoc
|
|
568
|
-
>>>
|
|
569
|
-
>>> # Standard usage with config
|
|
570
|
-
>>> @pipeline_flow(config=MyFlowConfig)
|
|
571
|
-
>>> async def analyze_documents(
|
|
572
|
-
... project_name: str,
|
|
573
|
-
... documents: DocumentList,
|
|
574
|
-
... flow_options: FlowOptions
|
|
575
|
-
>>> ) -> DocumentList:
|
|
576
|
-
... # Process each document
|
|
577
|
-
... results = []
|
|
578
|
-
... for doc in documents:
|
|
579
|
-
... result = await process(doc)
|
|
580
|
-
... results.append(result)
|
|
581
|
-
... return DocumentList(results)
|
|
582
|
-
>>>
|
|
583
|
-
>>> # With additional parameters:
|
|
584
|
-
>>> @pipeline_flow(config=MyFlowConfig, retries=2)
|
|
585
|
-
>>> async def critical_flow(
|
|
586
|
-
... project_name: str,
|
|
587
|
-
... documents: DocumentList,
|
|
588
|
-
... flow_options: FlowOptions
|
|
589
|
-
>>> ) -> DocumentList:
|
|
590
|
-
... # Critical processing that might fail
|
|
591
|
-
... return await process_critical(documents)
|
|
592
|
-
>>>
|
|
593
|
-
>>> # AVOID specifying defaults - they're already optimal:
|
|
594
|
-
>>> # - Automatic flow naming
|
|
595
|
-
>>> # - Standard retry policy
|
|
596
|
-
>>> # - Full observability
|
|
597
|
-
|
|
598
|
-
Note:
|
|
599
|
-
- Flow is wrapped with both Prefect and LMNR tracing
|
|
600
|
-
- Return type is validated at runtime
|
|
601
|
-
- FlowOptions can be subclassed for custom configuration
|
|
602
|
-
- All Prefect flow methods (.serve(), .deploy()) are available
|
|
603
|
-
|
|
604
|
-
See Also:
|
|
605
|
-
- pipeline_task: For task-level decoration
|
|
606
|
-
- FlowConfig: Type-safe flow configuration
|
|
607
|
-
- FlowOptions: Base class for flow options
|
|
608
|
-
- PipelineDeployment: Execute flows locally or remotely
|
|
609
|
-
"""
|
|
610
|
-
flow_decorator: Callable[..., Any] = _prefect_flow
|
|
611
|
-
|
|
612
|
-
def _apply(fn: _DocumentsFlowCallable[FO_contra]) -> _FlowLike[FO_contra]:
|
|
613
|
-
"""Apply pipeline_flow decorator to flow function.
|
|
614
|
-
|
|
615
|
-
Returns:
|
|
616
|
-
Wrapped flow with tracing and Prefect functionality.
|
|
617
|
-
|
|
618
|
-
Raises:
|
|
619
|
-
TypeError: If function is not async, already traced, doesn't have
|
|
620
|
-
required parameters, or doesn't return DocumentList.
|
|
621
|
-
"""
|
|
622
|
-
fname = _callable_name(fn, "flow")
|
|
623
|
-
|
|
624
|
-
if not inspect.iscoroutinefunction(fn):
|
|
625
|
-
raise TypeError(f"@pipeline_flow '{fname}' must be declared with 'async def'")
|
|
626
|
-
|
|
627
|
-
# Check if function is already traced
|
|
628
|
-
if _is_already_traced(fn):
|
|
629
|
-
raise TypeError(
|
|
630
|
-
f"@pipeline_flow target '{fname}' is already decorated "
|
|
631
|
-
f"with @trace. Remove the @trace decorator - @pipeline_flow includes "
|
|
632
|
-
f"tracing automatically."
|
|
633
|
-
)
|
|
634
|
-
|
|
635
|
-
if len(inspect.signature(fn).parameters) < 3:
|
|
636
|
-
raise TypeError(
|
|
637
|
-
f"@pipeline_flow '{fname}' must accept "
|
|
638
|
-
"'project_name, documents, flow_options' as its first three parameters"
|
|
639
|
-
)
|
|
640
|
-
|
|
641
|
-
@wraps(fn)
|
|
642
|
-
async def _wrapper(
|
|
643
|
-
project_name: str,
|
|
644
|
-
documents: str | DocumentList,
|
|
645
|
-
flow_options: FO_contra,
|
|
646
|
-
) -> DocumentList:
|
|
647
|
-
save_path: str | None = None
|
|
648
|
-
if isinstance(documents, str):
|
|
649
|
-
save_path = documents
|
|
650
|
-
documents = await config.load_documents(documents)
|
|
651
|
-
result = await fn(project_name, documents, flow_options)
|
|
652
|
-
if save_path:
|
|
653
|
-
await config.save_documents(save_path, result)
|
|
654
|
-
if trace_cost is not None and trace_cost > 0:
|
|
655
|
-
set_trace_cost(trace_cost)
|
|
656
|
-
if not isinstance(result, DocumentList): # pyright: ignore[reportUnnecessaryIsInstance]
|
|
657
|
-
raise TypeError(
|
|
658
|
-
f"Flow '{fname}' must return DocumentList, got {type(result).__name__}"
|
|
659
|
-
)
|
|
660
|
-
return result
|
|
661
|
-
|
|
662
|
-
traced = trace(
|
|
663
|
-
level=trace_level,
|
|
664
|
-
name=name or fname,
|
|
665
|
-
ignore_input=trace_ignore_input,
|
|
666
|
-
ignore_output=trace_ignore_output,
|
|
667
|
-
ignore_inputs=trace_ignore_inputs,
|
|
668
|
-
input_formatter=trace_input_formatter,
|
|
669
|
-
output_formatter=trace_output_formatter,
|
|
670
|
-
trim_documents=trace_trim_documents,
|
|
671
|
-
)(_wrapper)
|
|
672
|
-
|
|
673
|
-
# --- Publish a schema where `documents` accepts str (path) OR DocumentList ---
|
|
674
|
-
_sig = inspect.signature(fn)
|
|
675
|
-
_params = [
|
|
676
|
-
p.replace(annotation=(str | DocumentList)) if p.name == "documents" else p
|
|
677
|
-
for p in _sig.parameters.values()
|
|
678
|
-
]
|
|
679
|
-
if hasattr(traced, "__signature__"):
|
|
680
|
-
setattr(traced, "__signature__", _sig.replace(parameters=_params))
|
|
681
|
-
if hasattr(traced, "__annotations__"):
|
|
682
|
-
traced.__annotations__ = {
|
|
683
|
-
**getattr(traced, "__annotations__", {}),
|
|
684
|
-
"documents": str | DocumentList,
|
|
685
|
-
}
|
|
686
|
-
|
|
687
|
-
flow_obj = cast(
|
|
688
|
-
_FlowLike[FO_contra],
|
|
689
|
-
flow_decorator(
|
|
690
|
-
name=name or fname,
|
|
691
|
-
version=version,
|
|
692
|
-
flow_run_name=flow_run_name or name or fname,
|
|
693
|
-
retries=0 if retries is None else retries,
|
|
694
|
-
retry_delay_seconds=retry_delay_seconds,
|
|
695
|
-
task_runner=task_runner,
|
|
696
|
-
description=description,
|
|
697
|
-
timeout_seconds=timeout_seconds,
|
|
698
|
-
validate_parameters=validate_parameters,
|
|
699
|
-
persist_result=persist_result,
|
|
700
|
-
result_storage=result_storage,
|
|
701
|
-
result_serializer=result_serializer,
|
|
702
|
-
cache_result_in_memory=cache_result_in_memory,
|
|
703
|
-
log_prints=log_prints,
|
|
704
|
-
on_completion=on_completion,
|
|
705
|
-
on_failure=on_failure,
|
|
706
|
-
on_cancellation=on_cancellation,
|
|
707
|
-
on_crashed=on_crashed,
|
|
708
|
-
on_running=on_running,
|
|
709
|
-
)(traced),
|
|
710
|
-
)
|
|
711
|
-
# Attach config to the flow object for later access
|
|
712
|
-
flow_obj.config = config # type: ignore[attr-defined]
|
|
713
|
-
return flow_obj
|
|
714
|
-
|
|
715
|
-
return _apply
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
__all__ = ["pipeline_task", "pipeline_flow"]
|