ai-pipeline-core 0.1.12__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. ai_pipeline_core/__init__.py +83 -119
  2. ai_pipeline_core/deployment/__init__.py +34 -0
  3. ai_pipeline_core/deployment/base.py +861 -0
  4. ai_pipeline_core/deployment/contract.py +80 -0
  5. ai_pipeline_core/deployment/deploy.py +561 -0
  6. ai_pipeline_core/deployment/helpers.py +97 -0
  7. ai_pipeline_core/deployment/progress.py +126 -0
  8. ai_pipeline_core/deployment/remote.py +116 -0
  9. ai_pipeline_core/docs_generator/__init__.py +54 -0
  10. ai_pipeline_core/docs_generator/__main__.py +5 -0
  11. ai_pipeline_core/docs_generator/cli.py +196 -0
  12. ai_pipeline_core/docs_generator/extractor.py +324 -0
  13. ai_pipeline_core/docs_generator/guide_builder.py +644 -0
  14. ai_pipeline_core/docs_generator/trimmer.py +35 -0
  15. ai_pipeline_core/docs_generator/validator.py +114 -0
  16. ai_pipeline_core/document_store/__init__.py +13 -0
  17. ai_pipeline_core/document_store/_summary.py +9 -0
  18. ai_pipeline_core/document_store/_summary_worker.py +170 -0
  19. ai_pipeline_core/document_store/clickhouse.py +492 -0
  20. ai_pipeline_core/document_store/factory.py +38 -0
  21. ai_pipeline_core/document_store/local.py +312 -0
  22. ai_pipeline_core/document_store/memory.py +85 -0
  23. ai_pipeline_core/document_store/protocol.py +68 -0
  24. ai_pipeline_core/documents/__init__.py +14 -15
  25. ai_pipeline_core/documents/_context_vars.py +85 -0
  26. ai_pipeline_core/documents/_hashing.py +52 -0
  27. ai_pipeline_core/documents/attachment.py +85 -0
  28. ai_pipeline_core/documents/context.py +128 -0
  29. ai_pipeline_core/documents/document.py +349 -1062
  30. ai_pipeline_core/documents/mime_type.py +40 -85
  31. ai_pipeline_core/documents/utils.py +62 -7
  32. ai_pipeline_core/exceptions.py +10 -62
  33. ai_pipeline_core/images/__init__.py +309 -0
  34. ai_pipeline_core/images/_processing.py +151 -0
  35. ai_pipeline_core/llm/__init__.py +5 -3
  36. ai_pipeline_core/llm/ai_messages.py +284 -73
  37. ai_pipeline_core/llm/client.py +462 -209
  38. ai_pipeline_core/llm/model_options.py +86 -53
  39. ai_pipeline_core/llm/model_response.py +187 -241
  40. ai_pipeline_core/llm/model_types.py +34 -54
  41. ai_pipeline_core/logging/__init__.py +2 -9
  42. ai_pipeline_core/logging/logging.yml +1 -1
  43. ai_pipeline_core/logging/logging_config.py +27 -43
  44. ai_pipeline_core/logging/logging_mixin.py +17 -51
  45. ai_pipeline_core/observability/__init__.py +32 -0
  46. ai_pipeline_core/observability/_debug/__init__.py +30 -0
  47. ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
  48. ai_pipeline_core/observability/_debug/_config.py +95 -0
  49. ai_pipeline_core/observability/_debug/_content.py +764 -0
  50. ai_pipeline_core/observability/_debug/_processor.py +98 -0
  51. ai_pipeline_core/observability/_debug/_summary.py +312 -0
  52. ai_pipeline_core/observability/_debug/_types.py +75 -0
  53. ai_pipeline_core/observability/_debug/_writer.py +843 -0
  54. ai_pipeline_core/observability/_document_tracking.py +146 -0
  55. ai_pipeline_core/observability/_initialization.py +194 -0
  56. ai_pipeline_core/observability/_logging_bridge.py +57 -0
  57. ai_pipeline_core/observability/_summary.py +81 -0
  58. ai_pipeline_core/observability/_tracking/__init__.py +6 -0
  59. ai_pipeline_core/observability/_tracking/_client.py +178 -0
  60. ai_pipeline_core/observability/_tracking/_internal.py +28 -0
  61. ai_pipeline_core/observability/_tracking/_models.py +138 -0
  62. ai_pipeline_core/observability/_tracking/_processor.py +158 -0
  63. ai_pipeline_core/observability/_tracking/_service.py +311 -0
  64. ai_pipeline_core/observability/_tracking/_writer.py +229 -0
  65. ai_pipeline_core/observability/tracing.py +640 -0
  66. ai_pipeline_core/pipeline/__init__.py +10 -0
  67. ai_pipeline_core/pipeline/decorators.py +915 -0
  68. ai_pipeline_core/pipeline/options.py +16 -0
  69. ai_pipeline_core/prompt_manager.py +26 -105
  70. ai_pipeline_core/settings.py +41 -32
  71. ai_pipeline_core/testing.py +9 -0
  72. ai_pipeline_core-0.4.1.dist-info/METADATA +807 -0
  73. ai_pipeline_core-0.4.1.dist-info/RECORD +76 -0
  74. {ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.4.1.dist-info}/WHEEL +1 -1
  75. ai_pipeline_core/documents/document_list.py +0 -240
  76. ai_pipeline_core/documents/flow_document.py +0 -128
  77. ai_pipeline_core/documents/task_document.py +0 -133
  78. ai_pipeline_core/documents/temporary_document.py +0 -95
  79. ai_pipeline_core/flow/__init__.py +0 -9
  80. ai_pipeline_core/flow/config.py +0 -314
  81. ai_pipeline_core/flow/options.py +0 -75
  82. ai_pipeline_core/pipeline.py +0 -717
  83. ai_pipeline_core/prefect.py +0 -54
  84. ai_pipeline_core/simple_runner/__init__.py +0 -24
  85. ai_pipeline_core/simple_runner/cli.py +0 -255
  86. ai_pipeline_core/simple_runner/simple_runner.py +0 -385
  87. ai_pipeline_core/tracing.py +0 -475
  88. ai_pipeline_core-0.1.12.dist-info/METADATA +0 -450
  89. ai_pipeline_core-0.1.12.dist-info/RECORD +0 -36
  90. {ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.4.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,717 +0,0 @@
1
- """Pipeline decorators with Prefect integration and tracing.
2
-
3
- @public
4
-
5
- Wrappers around Prefect's @task and @flow that add Laminar tracing
6
- and enforce async-only execution for consistency.
7
- """
8
-
9
- from __future__ import annotations
10
-
11
- import datetime
12
- import inspect
13
- from typing import (
14
- Any,
15
- Callable,
16
- Coroutine,
17
- Iterable,
18
- Protocol,
19
- TypeVar,
20
- Union,
21
- cast,
22
- overload,
23
- )
24
-
25
- from prefect.assets import Asset
26
- from prefect.cache_policies import CachePolicy
27
- from prefect.context import TaskRunContext
28
- from prefect.flows import FlowStateHook
29
- from prefect.flows import flow as _prefect_flow # public import
30
- from prefect.futures import PrefectFuture
31
- from prefect.results import ResultSerializer, ResultStorage
32
- from prefect.task_runners import TaskRunner
33
- from prefect.tasks import task as _prefect_task # public import
34
- from prefect.utilities.annotations import NotSet
35
- from typing_extensions import TypeAlias
36
-
37
- from ai_pipeline_core.documents import DocumentList
38
- from ai_pipeline_core.flow.options import FlowOptions
39
- from ai_pipeline_core.tracing import TraceLevel, trace
40
-
41
- # --------------------------------------------------------------------------- #
42
- # Public callback aliases (Prefect stubs omit these exact types)
43
- # --------------------------------------------------------------------------- #
44
- RetryConditionCallable: TypeAlias = Callable[[Any, Any, Any], bool]
45
- StateHookCallable: TypeAlias = Callable[[Any, Any, Any], None]
46
- TaskRunNameValueOrCallable: TypeAlias = Union[str, Callable[[], str]]
47
-
48
- # --------------------------------------------------------------------------- #
49
- # Typing helpers
50
- # --------------------------------------------------------------------------- #
51
- R_co = TypeVar("R_co", covariant=True)
52
- FO_contra = TypeVar("FO_contra", bound=FlowOptions, contravariant=True)
53
- """Flow options are an *input* type, so contravariant fits the callable model."""
54
-
55
-
56
- class _TaskLike(Protocol[R_co]):
57
- """Protocol for type-safe Prefect task representation.
58
-
59
- Defines the minimal interface for a Prefect task as seen by
60
- type checkers. Ensures tasks are awaitable and have common
61
- Prefect task methods.
62
-
63
- Type Parameter:
64
- R_co: Covariant return type of the task.
65
-
66
- Methods:
67
- __call__: Makes the task awaitable.
68
- submit: Submit task for asynchronous execution.
69
- map: Map task over multiple inputs.
70
-
71
- Attributes:
72
- name: Optional task name.
73
-
74
- Note:
75
- This is a typing Protocol, not a runtime class.
76
- __getattr__ allows accessing Prefect-specific helpers.
77
- """
78
-
79
- def __call__(self, *args: Any, **kwargs: Any) -> Coroutine[Any, Any, R_co]: ...
80
-
81
- submit: Callable[..., Any]
82
- map: Callable[..., Any]
83
- name: str | None
84
-
85
- def __getattr__(self, name: str) -> Any: ... # allow unknown helpers without type errors
86
-
87
-
88
- class _DocumentsFlowCallable(Protocol[FO_contra]):
89
- """Protocol for user-defined flow functions.
90
-
91
- Defines the required signature for functions that will be
92
- decorated with @pipeline_flow. Enforces the standard parameters
93
- for document processing flows.
94
-
95
- Type Parameter:
96
- FO_contra: Contravariant FlowOptions type (or subclass).
97
-
98
- Required Parameters:
99
- project_name: Name of the project/pipeline.
100
- documents: Input DocumentList to process.
101
- flow_options: Configuration options (FlowOptions or subclass).
102
- *args, **kwargs: Additional flow-specific parameters.
103
-
104
- Returns:
105
- DocumentList: Processed documents.
106
-
107
- Note:
108
- Functions must be async and return DocumentList.
109
- """
110
-
111
- def __call__(
112
- self,
113
- project_name: str,
114
- documents: DocumentList,
115
- flow_options: FO_contra,
116
- *args: Any,
117
- **kwargs: Any,
118
- ) -> Coroutine[Any, Any, DocumentList]: ...
119
-
120
-
121
- class _FlowLike(Protocol[FO_contra]):
122
- """Protocol for decorated flow objects returned to users.
123
-
124
- Represents the callable object returned by @pipeline_flow,
125
- which wraps the original flow function with Prefect and
126
- tracing capabilities.
127
-
128
- Type Parameter:
129
- FO_contra: Contravariant FlowOptions type.
130
-
131
- Callable Signature:
132
- Same as _DocumentsFlowCallable - accepts project_name,
133
- documents, flow_options, plus additional arguments.
134
-
135
- Attributes:
136
- name: Optional flow name from decorator.
137
-
138
- Note:
139
- __getattr__ provides access to all Prefect flow methods
140
- without explicit typing (e.g., .serve(), .deploy()).
141
- """
142
-
143
- def __call__(
144
- self,
145
- project_name: str,
146
- documents: DocumentList,
147
- flow_options: FO_contra,
148
- *args: Any,
149
- **kwargs: Any,
150
- ) -> Coroutine[Any, Any, DocumentList]: ...
151
-
152
- name: str | None
153
-
154
- def __getattr__(self, name: str) -> Any: ... # allow unknown helpers without type errors
155
-
156
-
157
- # --------------------------------------------------------------------------- #
158
- # Small helper: safely get a callable's name without upsetting the type checker
159
- # --------------------------------------------------------------------------- #
160
- def _callable_name(obj: Any, fallback: str) -> str:
161
- """Safely extract callable's name for error messages.
162
-
163
- Args:
164
- obj: Any object that might have a __name__ attribute.
165
- fallback: Default name if extraction fails.
166
-
167
- Returns:
168
- The callable's __name__ if available, fallback otherwise.
169
-
170
- Note:
171
- Internal helper that never raises exceptions.
172
- """
173
- try:
174
- n = getattr(obj, "__name__", None)
175
- return n if isinstance(n, str) else fallback
176
- except Exception:
177
- return fallback
178
-
179
-
180
- def _is_already_traced(func: Callable[..., Any]) -> bool:
181
- """Check if a function has already been wrapped by the trace decorator.
182
-
183
- This checks both for the explicit __is_traced__ marker and walks
184
- the __wrapped__ chain to detect nested trace decorations.
185
-
186
- Args:
187
- func: Function to check for existing trace decoration.
188
-
189
- Returns:
190
- True if the function is already traced, False otherwise.
191
- """
192
- # Check for explicit marker
193
- if hasattr(func, "__is_traced__") and func.__is_traced__: # type: ignore[attr-defined]
194
- return True
195
-
196
- # Walk the __wrapped__ chain to detect nested traces
197
- current = func
198
- depth = 0
199
- max_depth = 10 # Prevent infinite loops
200
-
201
- while hasattr(current, "__wrapped__") and depth < max_depth:
202
- wrapped = current.__wrapped__ # type: ignore[attr-defined]
203
- # Check if the wrapped function has the trace marker
204
- if hasattr(wrapped, "__is_traced__") and wrapped.__is_traced__: # type: ignore[attr-defined]
205
- return True
206
- current = wrapped
207
- depth += 1
208
-
209
- return False
210
-
211
-
212
- # --------------------------------------------------------------------------- #
213
- # @pipeline_task — async-only, traced, returns Prefect's Task object
214
- # --------------------------------------------------------------------------- #
215
- @overload
216
- def pipeline_task(__fn: Callable[..., Coroutine[Any, Any, R_co]], /) -> _TaskLike[R_co]: ...
217
- @overload
218
- def pipeline_task(
219
- *,
220
- # tracing
221
- trace_level: TraceLevel = "always",
222
- trace_ignore_input: bool = False,
223
- trace_ignore_output: bool = False,
224
- trace_ignore_inputs: list[str] | None = None,
225
- trace_input_formatter: Callable[..., str] | None = None,
226
- trace_output_formatter: Callable[..., str] | None = None,
227
- # prefect passthrough
228
- name: str | None = None,
229
- description: str | None = None,
230
- tags: Iterable[str] | None = None,
231
- version: str | None = None,
232
- cache_policy: CachePolicy | type[NotSet] = NotSet,
233
- cache_key_fn: Callable[[TaskRunContext, dict[str, Any]], str | None] | None = None,
234
- cache_expiration: datetime.timedelta | None = None,
235
- task_run_name: TaskRunNameValueOrCallable | None = None,
236
- retries: int | None = None,
237
- retry_delay_seconds: int | float | list[float] | Callable[[int], list[float]] | None = None,
238
- retry_jitter_factor: float | None = None,
239
- persist_result: bool | None = None,
240
- result_storage: ResultStorage | str | None = None,
241
- result_serializer: ResultSerializer | str | None = None,
242
- result_storage_key: str | None = None,
243
- cache_result_in_memory: bool = True,
244
- timeout_seconds: int | float | None = None,
245
- log_prints: bool | None = False,
246
- refresh_cache: bool | None = None,
247
- on_completion: list[StateHookCallable] | None = None,
248
- on_failure: list[StateHookCallable] | None = None,
249
- retry_condition_fn: RetryConditionCallable | None = None,
250
- viz_return_value: bool | None = None,
251
- asset_deps: list[str | Asset] | None = None,
252
- ) -> Callable[[Callable[..., Coroutine[Any, Any, R_co]]], _TaskLike[R_co]]: ...
253
-
254
-
255
- def pipeline_task(
256
- __fn: Callable[..., Coroutine[Any, Any, R_co]] | None = None,
257
- /,
258
- *,
259
- # tracing
260
- trace_level: TraceLevel = "always",
261
- trace_ignore_input: bool = False,
262
- trace_ignore_output: bool = False,
263
- trace_ignore_inputs: list[str] | None = None,
264
- trace_input_formatter: Callable[..., str] | None = None,
265
- trace_output_formatter: Callable[..., str] | None = None,
266
- # prefect passthrough
267
- name: str | None = None,
268
- description: str | None = None,
269
- tags: Iterable[str] | None = None,
270
- version: str | None = None,
271
- cache_policy: CachePolicy | type[NotSet] = NotSet,
272
- cache_key_fn: Callable[[TaskRunContext, dict[str, Any]], str | None] | None = None,
273
- cache_expiration: datetime.timedelta | None = None,
274
- task_run_name: TaskRunNameValueOrCallable | None = None,
275
- retries: int | None = None,
276
- retry_delay_seconds: int | float | list[float] | Callable[[int], list[float]] | None = None,
277
- retry_jitter_factor: float | None = None,
278
- persist_result: bool | None = None,
279
- result_storage: ResultStorage | str | None = None,
280
- result_serializer: ResultSerializer | str | None = None,
281
- result_storage_key: str | None = None,
282
- cache_result_in_memory: bool = True,
283
- timeout_seconds: int | float | None = None,
284
- log_prints: bool | None = False,
285
- refresh_cache: bool | None = None,
286
- on_completion: list[StateHookCallable] | None = None,
287
- on_failure: list[StateHookCallable] | None = None,
288
- retry_condition_fn: RetryConditionCallable | None = None,
289
- viz_return_value: bool | None = None,
290
- asset_deps: list[str | Asset] | None = None,
291
- ) -> _TaskLike[R_co] | Callable[[Callable[..., Coroutine[Any, Any, R_co]]], _TaskLike[R_co]]:
292
- """Decorate an async function as a traced Prefect task.
293
-
294
- @public
295
-
296
- Wraps an async function with both Prefect task functionality and
297
- LMNR tracing. The function MUST be async (declared with 'async def').
298
-
299
- IMPORTANT: Never combine with @trace decorator - this includes tracing automatically.
300
- The framework will raise TypeError if you try to use both decorators together.
301
-
302
- Best Practice - Use Defaults:
303
- For 90% of use cases, use this decorator WITHOUT any parameters.
304
- Only specify parameters when you have EXPLICIT requirements.
305
-
306
- Args:
307
- __fn: Function to decorate (when used without parentheses).
308
-
309
- Tracing parameters:
310
- trace_level: When to trace ("always", "debug", "off").
311
- - "always": Always trace (default)
312
- - "debug": Only trace when LMNR_DEBUG="true"
313
- - "off": Disable tracing
314
- trace_ignore_input: Don't trace input arguments.
315
- trace_ignore_output: Don't trace return value.
316
- trace_ignore_inputs: List of parameter names to exclude from tracing.
317
- trace_input_formatter: Custom formatter for input tracing.
318
- trace_output_formatter: Custom formatter for output tracing.
319
-
320
- Prefect task parameters:
321
- name: Task name (defaults to function name).
322
- description: Human-readable task description.
323
- tags: Tags for organization and filtering.
324
- version: Task version string.
325
- cache_policy: Caching policy for task results.
326
- cache_key_fn: Custom cache key generation.
327
- cache_expiration: How long to cache results.
328
- task_run_name: Dynamic or static run name.
329
- retries: Number of retry attempts (default 0).
330
- retry_delay_seconds: Delay between retries.
331
- retry_jitter_factor: Random jitter for retry delays.
332
- persist_result: Whether to persist results.
333
- result_storage: Where to store results.
334
- result_serializer: How to serialize results.
335
- result_storage_key: Custom storage key.
336
- cache_result_in_memory: Keep results in memory.
337
- timeout_seconds: Task execution timeout.
338
- log_prints: Capture print() statements.
339
- refresh_cache: Force cache refresh.
340
- on_completion: Hooks for successful completion.
341
- on_failure: Hooks for task failure.
342
- retry_condition_fn: Custom retry condition.
343
- viz_return_value: Include return value in visualization.
344
- asset_deps: Upstream asset dependencies.
345
-
346
- Returns:
347
- Decorated task callable that is awaitable and has Prefect
348
- task methods (submit, map, etc.).
349
-
350
- Example:
351
- >>> # RECOMMENDED - No parameters needed!
352
- >>> @pipeline_task
353
- >>> async def process_document(doc: Document) -> Document:
354
- ... result = await analyze(doc)
355
- ... return result
356
- >>>
357
- >>> # With parameters (only when necessary):
358
- >>> @pipeline_task(retries=5) # Only for known flaky operations
359
- >>> async def unreliable_api_call(url: str) -> dict:
360
- ... # This API fails often, needs extra retries
361
- ... return await fetch_with_retry(url)
362
- >>>
363
- >>> # AVOID specifying defaults - they're already optimal:
364
- >>> # - Automatic task naming
365
- >>> # - Standard retry policy
366
- >>> # - Sensible timeout
367
- >>> # - Full observability
368
-
369
- Performance:
370
- - Task decoration overhead: ~1-2ms
371
- - Tracing overhead: ~1-2ms per call
372
- - Prefect state tracking: ~5-10ms
373
-
374
- Note:
375
- Tasks are automatically traced with LMNR and appear in
376
- both Prefect and LMNR dashboards.
377
-
378
- See Also:
379
- - pipeline_flow: For flow-level decoration
380
- - trace: Lower-level tracing decorator
381
- - prefect.task: Standard Prefect task (no tracing)
382
- """
383
- task_decorator: Callable[..., Any] = _prefect_task # helps the type checker
384
-
385
- def _apply(fn: Callable[..., Coroutine[Any, Any, R_co]]) -> _TaskLike[R_co]:
386
- """Apply pipeline_task decorator to async function.
387
-
388
- Returns:
389
- Wrapped task with tracing and Prefect functionality.
390
-
391
- Raises:
392
- TypeError: If function is not async or already traced.
393
- """
394
- if not inspect.iscoroutinefunction(fn):
395
- raise TypeError(
396
- f"@pipeline_task target '{_callable_name(fn, 'task')}' must be 'async def'"
397
- )
398
-
399
- # Check if function is already traced
400
- if _is_already_traced(fn):
401
- raise TypeError(
402
- f"@pipeline_task target '{_callable_name(fn, 'task')}' is already decorated "
403
- f"with @trace. Remove the @trace decorator - @pipeline_task includes "
404
- f"tracing automatically."
405
- )
406
-
407
- fname = _callable_name(fn, "task")
408
- traced_fn = trace(
409
- level=trace_level,
410
- name=name or fname,
411
- ignore_input=trace_ignore_input,
412
- ignore_output=trace_ignore_output,
413
- ignore_inputs=trace_ignore_inputs,
414
- input_formatter=trace_input_formatter,
415
- output_formatter=trace_output_formatter,
416
- )(fn)
417
-
418
- return cast(
419
- _TaskLike[R_co],
420
- task_decorator(
421
- name=name or fname,
422
- description=description,
423
- tags=tags,
424
- version=version,
425
- cache_policy=cache_policy,
426
- cache_key_fn=cache_key_fn,
427
- cache_expiration=cache_expiration,
428
- task_run_name=task_run_name or name or fname,
429
- retries=0 if retries is None else retries,
430
- retry_delay_seconds=retry_delay_seconds,
431
- retry_jitter_factor=retry_jitter_factor,
432
- persist_result=persist_result,
433
- result_storage=result_storage,
434
- result_serializer=result_serializer,
435
- result_storage_key=result_storage_key,
436
- cache_result_in_memory=cache_result_in_memory,
437
- timeout_seconds=timeout_seconds,
438
- log_prints=log_prints,
439
- refresh_cache=refresh_cache,
440
- on_completion=on_completion,
441
- on_failure=on_failure,
442
- retry_condition_fn=retry_condition_fn,
443
- viz_return_value=viz_return_value,
444
- asset_deps=asset_deps,
445
- )(traced_fn),
446
- )
447
-
448
- return _apply(__fn) if __fn else _apply
449
-
450
-
451
- # --------------------------------------------------------------------------- #
452
- # @pipeline_flow — async-only, traced, returns Prefect's flow wrapper
453
- # --------------------------------------------------------------------------- #
454
- @overload
455
- def pipeline_flow(__fn: _DocumentsFlowCallable[FO_contra], /) -> _FlowLike[FO_contra]: ...
456
- @overload
457
- def pipeline_flow(
458
- *,
459
- # tracing
460
- trace_level: TraceLevel = "always",
461
- trace_ignore_input: bool = False,
462
- trace_ignore_output: bool = False,
463
- trace_ignore_inputs: list[str] | None = None,
464
- trace_input_formatter: Callable[..., str] | None = None,
465
- trace_output_formatter: Callable[..., str] | None = None,
466
- # prefect passthrough
467
- name: str | None = None,
468
- version: str | None = None,
469
- flow_run_name: Union[Callable[[], str], str] | None = None,
470
- retries: int | None = None,
471
- retry_delay_seconds: int | float | None = None,
472
- task_runner: TaskRunner[PrefectFuture[Any]] | None = None,
473
- description: str | None = None,
474
- timeout_seconds: int | float | None = None,
475
- validate_parameters: bool = True,
476
- persist_result: bool | None = None,
477
- result_storage: ResultStorage | str | None = None,
478
- result_serializer: ResultSerializer | str | None = None,
479
- cache_result_in_memory: bool = True,
480
- log_prints: bool | None = None,
481
- on_completion: list[FlowStateHook[Any, Any]] | None = None,
482
- on_failure: list[FlowStateHook[Any, Any]] | None = None,
483
- on_cancellation: list[FlowStateHook[Any, Any]] | None = None,
484
- on_crashed: list[FlowStateHook[Any, Any]] | None = None,
485
- on_running: list[FlowStateHook[Any, Any]] | None = None,
486
- ) -> Callable[[_DocumentsFlowCallable[FO_contra]], _FlowLike[FO_contra]]: ...
487
-
488
-
489
- def pipeline_flow(
490
- __fn: _DocumentsFlowCallable[FO_contra] | None = None,
491
- /,
492
- *,
493
- # tracing
494
- trace_level: TraceLevel = "always",
495
- trace_ignore_input: bool = False,
496
- trace_ignore_output: bool = False,
497
- trace_ignore_inputs: list[str] | None = None,
498
- trace_input_formatter: Callable[..., str] | None = None,
499
- trace_output_formatter: Callable[..., str] | None = None,
500
- # prefect passthrough
501
- name: str | None = None,
502
- version: str | None = None,
503
- flow_run_name: Union[Callable[[], str], str] | None = None,
504
- retries: int | None = None,
505
- retry_delay_seconds: int | float | None = None,
506
- task_runner: TaskRunner[PrefectFuture[Any]] | None = None,
507
- description: str | None = None,
508
- timeout_seconds: int | float | None = None,
509
- validate_parameters: bool = True,
510
- persist_result: bool | None = None,
511
- result_storage: ResultStorage | str | None = None,
512
- result_serializer: ResultSerializer | str | None = None,
513
- cache_result_in_memory: bool = True,
514
- log_prints: bool | None = None,
515
- on_completion: list[FlowStateHook[Any, Any]] | None = None,
516
- on_failure: list[FlowStateHook[Any, Any]] | None = None,
517
- on_cancellation: list[FlowStateHook[Any, Any]] | None = None,
518
- on_crashed: list[FlowStateHook[Any, Any]] | None = None,
519
- on_running: list[FlowStateHook[Any, Any]] | None = None,
520
- ) -> _FlowLike[FO_contra] | Callable[[_DocumentsFlowCallable[FO_contra]], _FlowLike[FO_contra]]:
521
- """Decorate an async flow for document processing.
522
-
523
- @public
524
-
525
- Wraps an async function as a Prefect flow with tracing and type safety.
526
- The decorated function MUST be async and follow the required signature.
527
-
528
- IMPORTANT: Never combine with @trace decorator - this includes tracing automatically.
529
- The framework will raise TypeError if you try to use both decorators together.
530
-
531
- Best Practice - Use Defaults:
532
- For 90% of use cases, use this decorator WITHOUT any parameters.
533
- Only specify parameters when you have EXPLICIT requirements.
534
-
535
- Required function signature:
536
- async def flow_fn(
537
- project_name: str, # Project/pipeline identifier
538
- documents: DocumentList, # Input documents to process
539
- flow_options: FlowOptions, # Configuration (or subclass)
540
- *args, # Additional positional args for custom parameters
541
- **kwargs # Additional keyword args for custom parameters
542
- ) -> DocumentList # Must return DocumentList
543
-
544
- Note: *args and **kwargs allow for defining custom parameters on your flow
545
- function, which can be passed during execution for flow-specific needs.
546
-
547
- Args:
548
- __fn: Function to decorate (when used without parentheses).
549
-
550
- Tracing parameters:
551
- trace_level: When to trace ("always", "debug", "off").
552
- - "always": Always trace (default)
553
- - "debug": Only trace when LMNR_DEBUG="true"
554
- - "off": Disable tracing
555
- trace_ignore_input: Don't trace input arguments.
556
- trace_ignore_output: Don't trace return value.
557
- trace_ignore_inputs: Parameter names to exclude from tracing.
558
- trace_input_formatter: Custom input formatter.
559
- trace_output_formatter: Custom output formatter.
560
-
561
- Prefect flow parameters:
562
- name: Flow name (defaults to function name).
563
- version: Flow version identifier.
564
- flow_run_name: Static or dynamic run name.
565
- retries: Number of flow retry attempts (default 0).
566
- retry_delay_seconds: Delay between flow retries.
567
- task_runner: Task execution strategy (sequential/concurrent).
568
- description: Human-readable flow description.
569
- timeout_seconds: Flow execution timeout.
570
- validate_parameters: Validate input parameters.
571
- persist_result: Persist flow results.
572
- result_storage: Where to store results.
573
- result_serializer: How to serialize results.
574
- cache_result_in_memory: Keep results in memory.
575
- log_prints: Capture print() statements.
576
- on_completion: Hooks for successful completion.
577
- on_failure: Hooks for flow failure.
578
- on_cancellation: Hooks for flow cancellation.
579
- on_crashed: Hooks for flow crashes.
580
- on_running: Hooks for flow start.
581
-
582
- Returns:
583
- Decorated flow callable that maintains Prefect flow interface
584
- while enforcing document processing conventions.
585
-
586
- Example:
587
- >>> from ai_pipeline_core import FlowOptions
588
- >>>
589
- >>> # RECOMMENDED - No parameters needed!
590
- >>> @pipeline_flow
591
- >>> async def analyze_documents(
592
- ... project_name: str,
593
- ... documents: DocumentList,
594
- ... flow_options: FlowOptions
595
- >>> ) -> DocumentList:
596
- ... # Process each document
597
- ... results = []
598
- ... for doc in documents:
599
- ... result = await process(doc)
600
- ... results.append(result)
601
- ... return DocumentList(results)
602
- >>>
603
- >>> # With parameters (only when necessary):
604
- >>> @pipeline_flow(retries=2) # Only for flows that need retry logic
605
- >>> async def critical_flow(
606
- ... project_name: str,
607
- ... documents: DocumentList,
608
- ... flow_options: FlowOptions
609
- >>> ) -> DocumentList:
610
- ... # Critical processing that might fail
611
- ... return await process_critical(documents)
612
- >>>
613
- >>> # AVOID specifying defaults - they're already optimal:
614
- >>> # - Automatic flow naming
615
- >>> # - Standard retry policy
616
- >>> # - Full observability
617
-
618
- Note:
619
- - Flow is wrapped with both Prefect and LMNR tracing
620
- - Return type is validated at runtime
621
- - FlowOptions can be subclassed for custom configuration
622
- - All Prefect flow methods (.serve(), .deploy()) are available
623
-
624
- See Also:
625
- - pipeline_task: For task-level decoration
626
- - FlowConfig: Type-safe flow configuration
627
- - FlowOptions: Base class for flow options
628
- - simple_runner.run_pipeline: Execute flows locally
629
- """
630
- flow_decorator: Callable[..., Any] = _prefect_flow
631
-
632
- def _apply(fn: _DocumentsFlowCallable[FO_contra]) -> _FlowLike[FO_contra]:
633
- """Apply pipeline_flow decorator to flow function.
634
-
635
- Returns:
636
- Wrapped flow with tracing and Prefect functionality.
637
-
638
- Raises:
639
- TypeError: If function is not async, already traced, doesn't have
640
- required parameters, or doesn't return DocumentList.
641
- """
642
- fname = _callable_name(fn, "flow")
643
-
644
- if not inspect.iscoroutinefunction(fn):
645
- raise TypeError(f"@pipeline_flow '{fname}' must be declared with 'async def'")
646
-
647
- # Check if function is already traced
648
- if _is_already_traced(fn):
649
- raise TypeError(
650
- f"@pipeline_flow target '{fname}' is already decorated "
651
- f"with @trace. Remove the @trace decorator - @pipeline_flow includes "
652
- f"tracing automatically."
653
- )
654
-
655
- if len(inspect.signature(fn).parameters) < 3:
656
- raise TypeError(
657
- f"@pipeline_flow '{fname}' must accept "
658
- "'project_name, documents, flow_options' as its first three parameters"
659
- )
660
-
661
- async def _wrapper(
662
- project_name: str,
663
- documents: DocumentList,
664
- flow_options: FO_contra,
665
- *args: Any,
666
- **kwargs: Any,
667
- ) -> DocumentList:
668
- result = await fn(project_name, documents, flow_options, *args, **kwargs)
669
- if not isinstance(result, DocumentList): # pyright: ignore[reportUnnecessaryIsInstance]
670
- raise TypeError(
671
- f"Flow '{fname}' must return DocumentList, got {type(result).__name__}"
672
- )
673
- return result
674
-
675
- # Preserve the original function name for Prefect
676
- _wrapper.__name__ = fname
677
- _wrapper.__qualname__ = getattr(fn, "__qualname__", fname)
678
-
679
- traced = trace(
680
- level=trace_level,
681
- name=name or fname,
682
- ignore_input=trace_ignore_input,
683
- ignore_output=trace_ignore_output,
684
- ignore_inputs=trace_ignore_inputs,
685
- input_formatter=trace_input_formatter,
686
- output_formatter=trace_output_formatter,
687
- )(_wrapper)
688
-
689
- return cast(
690
- _FlowLike[FO_contra],
691
- flow_decorator(
692
- name=name or fname,
693
- version=version,
694
- flow_run_name=flow_run_name or name or fname,
695
- retries=0 if retries is None else retries,
696
- retry_delay_seconds=retry_delay_seconds,
697
- task_runner=task_runner,
698
- description=description,
699
- timeout_seconds=timeout_seconds,
700
- validate_parameters=validate_parameters,
701
- persist_result=persist_result,
702
- result_storage=result_storage,
703
- result_serializer=result_serializer,
704
- cache_result_in_memory=cache_result_in_memory,
705
- log_prints=log_prints,
706
- on_completion=on_completion,
707
- on_failure=on_failure,
708
- on_cancellation=on_cancellation,
709
- on_crashed=on_crashed,
710
- on_running=on_running,
711
- )(traced),
712
- )
713
-
714
- return _apply(__fn) if __fn else _apply
715
-
716
-
717
- __all__ = ["pipeline_task", "pipeline_flow"]