ai-pipeline-core 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. ai_pipeline_core/__init__.py +64 -158
  2. ai_pipeline_core/deployment/__init__.py +6 -18
  3. ai_pipeline_core/deployment/base.py +392 -212
  4. ai_pipeline_core/deployment/contract.py +6 -10
  5. ai_pipeline_core/{utils → deployment}/deploy.py +50 -69
  6. ai_pipeline_core/deployment/helpers.py +16 -17
  7. ai_pipeline_core/{progress.py → deployment/progress.py} +23 -24
  8. ai_pipeline_core/{utils/remote_deployment.py → deployment/remote.py} +11 -14
  9. ai_pipeline_core/docs_generator/__init__.py +54 -0
  10. ai_pipeline_core/docs_generator/__main__.py +5 -0
  11. ai_pipeline_core/docs_generator/cli.py +196 -0
  12. ai_pipeline_core/docs_generator/extractor.py +324 -0
  13. ai_pipeline_core/docs_generator/guide_builder.py +644 -0
  14. ai_pipeline_core/docs_generator/trimmer.py +35 -0
  15. ai_pipeline_core/docs_generator/validator.py +114 -0
  16. ai_pipeline_core/document_store/__init__.py +13 -0
  17. ai_pipeline_core/document_store/_summary.py +9 -0
  18. ai_pipeline_core/document_store/_summary_worker.py +170 -0
  19. ai_pipeline_core/document_store/clickhouse.py +492 -0
  20. ai_pipeline_core/document_store/factory.py +38 -0
  21. ai_pipeline_core/document_store/local.py +312 -0
  22. ai_pipeline_core/document_store/memory.py +85 -0
  23. ai_pipeline_core/document_store/protocol.py +68 -0
  24. ai_pipeline_core/documents/__init__.py +12 -14
  25. ai_pipeline_core/documents/_context_vars.py +85 -0
  26. ai_pipeline_core/documents/_hashing.py +52 -0
  27. ai_pipeline_core/documents/attachment.py +85 -0
  28. ai_pipeline_core/documents/context.py +128 -0
  29. ai_pipeline_core/documents/document.py +318 -1434
  30. ai_pipeline_core/documents/mime_type.py +11 -84
  31. ai_pipeline_core/documents/utils.py +4 -12
  32. ai_pipeline_core/exceptions.py +10 -62
  33. ai_pipeline_core/images/__init__.py +32 -85
  34. ai_pipeline_core/images/_processing.py +5 -11
  35. ai_pipeline_core/llm/__init__.py +6 -4
  36. ai_pipeline_core/llm/ai_messages.py +102 -90
  37. ai_pipeline_core/llm/client.py +229 -183
  38. ai_pipeline_core/llm/model_options.py +12 -84
  39. ai_pipeline_core/llm/model_response.py +53 -99
  40. ai_pipeline_core/llm/model_types.py +8 -23
  41. ai_pipeline_core/logging/__init__.py +2 -7
  42. ai_pipeline_core/logging/logging.yml +1 -1
  43. ai_pipeline_core/logging/logging_config.py +27 -37
  44. ai_pipeline_core/logging/logging_mixin.py +15 -41
  45. ai_pipeline_core/observability/__init__.py +32 -0
  46. ai_pipeline_core/observability/_debug/__init__.py +30 -0
  47. ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
  48. ai_pipeline_core/{debug/config.py → observability/_debug/_config.py} +11 -7
  49. ai_pipeline_core/{debug/content.py → observability/_debug/_content.py} +133 -75
  50. ai_pipeline_core/{debug/processor.py → observability/_debug/_processor.py} +16 -17
  51. ai_pipeline_core/{debug/summary.py → observability/_debug/_summary.py} +113 -37
  52. ai_pipeline_core/observability/_debug/_types.py +75 -0
  53. ai_pipeline_core/{debug/writer.py → observability/_debug/_writer.py} +126 -196
  54. ai_pipeline_core/observability/_document_tracking.py +146 -0
  55. ai_pipeline_core/observability/_initialization.py +194 -0
  56. ai_pipeline_core/observability/_logging_bridge.py +57 -0
  57. ai_pipeline_core/observability/_summary.py +81 -0
  58. ai_pipeline_core/observability/_tracking/__init__.py +6 -0
  59. ai_pipeline_core/observability/_tracking/_client.py +178 -0
  60. ai_pipeline_core/observability/_tracking/_internal.py +28 -0
  61. ai_pipeline_core/observability/_tracking/_models.py +138 -0
  62. ai_pipeline_core/observability/_tracking/_processor.py +158 -0
  63. ai_pipeline_core/observability/_tracking/_service.py +311 -0
  64. ai_pipeline_core/observability/_tracking/_writer.py +229 -0
  65. ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -335
  66. ai_pipeline_core/pipeline/__init__.py +10 -0
  67. ai_pipeline_core/pipeline/decorators.py +915 -0
  68. ai_pipeline_core/pipeline/options.py +16 -0
  69. ai_pipeline_core/prompt_manager.py +16 -102
  70. ai_pipeline_core/settings.py +26 -31
  71. ai_pipeline_core/testing.py +9 -0
  72. ai_pipeline_core-0.4.0.dist-info/METADATA +807 -0
  73. ai_pipeline_core-0.4.0.dist-info/RECORD +76 -0
  74. ai_pipeline_core/debug/__init__.py +0 -26
  75. ai_pipeline_core/documents/document_list.py +0 -420
  76. ai_pipeline_core/documents/flow_document.py +0 -112
  77. ai_pipeline_core/documents/task_document.py +0 -117
  78. ai_pipeline_core/documents/temporary_document.py +0 -74
  79. ai_pipeline_core/flow/__init__.py +0 -9
  80. ai_pipeline_core/flow/config.py +0 -494
  81. ai_pipeline_core/flow/options.py +0 -75
  82. ai_pipeline_core/pipeline.py +0 -718
  83. ai_pipeline_core/prefect.py +0 -63
  84. ai_pipeline_core/prompt_builder/__init__.py +0 -5
  85. ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +0 -23
  86. ai_pipeline_core/prompt_builder/global_cache.py +0 -78
  87. ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +0 -6
  88. ai_pipeline_core/prompt_builder/prompt_builder.py +0 -253
  89. ai_pipeline_core/prompt_builder/system_prompt.jinja2 +0 -41
  90. ai_pipeline_core/storage/__init__.py +0 -8
  91. ai_pipeline_core/storage/storage.py +0 -628
  92. ai_pipeline_core/utils/__init__.py +0 -8
  93. ai_pipeline_core-0.3.4.dist-info/METADATA +0 -569
  94. ai_pipeline_core-0.3.4.dist-info/RECORD +0 -57
  95. {ai_pipeline_core-0.3.4.dist-info → ai_pipeline_core-0.4.0.dist-info}/WHEEL +0 -0
  96. {ai_pipeline_core-0.3.4.dist-info → ai_pipeline_core-0.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,718 +0,0 @@
1
- """Pipeline decorators with Prefect integration and tracing.
2
-
3
- @public
4
-
5
- Wrappers around Prefect's @task and @flow that add Laminar tracing
6
- and enforce async-only execution for consistency.
7
- """
8
-
9
- from __future__ import annotations
10
-
11
- import datetime
12
- import inspect
13
- from functools import wraps
14
- from typing import (
15
- Any,
16
- Callable,
17
- Coroutine,
18
- Iterable,
19
- Protocol,
20
- TypeVar,
21
- Union,
22
- cast,
23
- overload,
24
- )
25
-
26
- from prefect.assets import Asset
27
- from prefect.cache_policies import CachePolicy
28
- from prefect.context import TaskRunContext
29
- from prefect.flows import FlowStateHook
30
- from prefect.flows import flow as _prefect_flow # public import
31
- from prefect.futures import PrefectFuture
32
- from prefect.results import ResultSerializer, ResultStorage
33
- from prefect.task_runners import TaskRunner
34
- from prefect.tasks import task as _prefect_task # public import
35
- from prefect.utilities.annotations import NotSet
36
- from typing_extensions import TypeAlias
37
-
38
- from ai_pipeline_core.documents import DocumentList
39
- from ai_pipeline_core.flow.config import FlowConfig
40
- from ai_pipeline_core.flow.options import FlowOptions
41
- from ai_pipeline_core.tracing import TraceLevel, set_trace_cost, trace
42
-
43
- # --------------------------------------------------------------------------- #
44
- # Public callback aliases (Prefect stubs omit these exact types)
45
- # --------------------------------------------------------------------------- #
46
- RetryConditionCallable: TypeAlias = Callable[[Any, Any, Any], bool]
47
- StateHookCallable: TypeAlias = Callable[[Any, Any, Any], None]
48
- TaskRunNameValueOrCallable: TypeAlias = Union[str, Callable[[], str]]
49
-
50
- # --------------------------------------------------------------------------- #
51
- # Typing helpers
52
- # --------------------------------------------------------------------------- #
53
- R_co = TypeVar("R_co", covariant=True)
54
- FO_contra = TypeVar("FO_contra", bound=FlowOptions, contravariant=True)
55
- """Flow options are an *input* type, so contravariant fits the callable model."""
56
-
57
-
58
- class _TaskLike(Protocol[R_co]):
59
- """Protocol for type-safe Prefect task representation.
60
-
61
- Defines the minimal interface for a Prefect task as seen by
62
- type checkers. Ensures tasks are awaitable and have common
63
- Prefect task methods.
64
-
65
- Type Parameter:
66
- R_co: Covariant return type of the task.
67
-
68
- Methods:
69
- __call__: Makes the task awaitable.
70
- submit: Submit task for asynchronous execution.
71
- map: Map task over multiple inputs.
72
-
73
- Attributes:
74
- name: Optional task name.
75
-
76
- Note:
77
- This is a typing Protocol, not a runtime class.
78
- __getattr__ allows accessing Prefect-specific helpers.
79
- """
80
-
81
- def __call__(self, *args: Any, **kwargs: Any) -> Coroutine[Any, Any, R_co]: ...
82
-
83
- submit: Callable[..., Any]
84
- map: Callable[..., Any]
85
- name: str | None
86
-
87
- def __getattr__(self, name: str) -> Any: ... # allow unknown helpers without type errors
88
-
89
-
90
- class _DocumentsFlowCallable(Protocol[FO_contra]):
91
- """Protocol for user-defined flow functions.
92
-
93
- Defines the required signature for functions that will be
94
- decorated with @pipeline_flow. Enforces the standard parameters
95
- for document processing flows.
96
-
97
- Type Parameter:
98
- FO_contra: Contravariant FlowOptions type (or subclass).
99
-
100
- Required Parameters:
101
- project_name: Name of the project/pipeline.
102
- documents: Input DocumentList to process.
103
- flow_options: Configuration options (FlowOptions or subclass).
104
-
105
- Returns:
106
- DocumentList: Processed documents.
107
-
108
- Note:
109
- Functions must be async and return DocumentList.
110
- """
111
-
112
- def __call__(
113
- self,
114
- project_name: str,
115
- documents: DocumentList,
116
- flow_options: FO_contra,
117
- ) -> Coroutine[Any, Any, DocumentList]: ...
118
-
119
-
120
- class _FlowLike(Protocol[FO_contra]):
121
- """Protocol for decorated flow objects returned to users.
122
-
123
- Represents the callable object returned by @pipeline_flow,
124
- which wraps the original flow function with Prefect and
125
- tracing capabilities.
126
-
127
- Type Parameter:
128
- FO_contra: Contravariant FlowOptions type.
129
-
130
- Callable Signature:
131
- Same as _DocumentsFlowCallable - accepts project_name,
132
- documents, flow_options, plus additional arguments.
133
-
134
- Attributes:
135
- name: Optional flow name from decorator.
136
-
137
- Note:
138
- __getattr__ provides access to all Prefect flow methods
139
- without explicit typing (e.g., .serve(), .deploy()).
140
- """
141
-
142
- def __call__(
143
- self,
144
- project_name: str,
145
- documents: DocumentList,
146
- flow_options: FO_contra,
147
- ) -> Coroutine[Any, Any, DocumentList]: ...
148
-
149
- name: str | None
150
-
151
- def __getattr__(self, name: str) -> Any: ... # allow unknown helpers without type errors
152
-
153
-
154
- # --------------------------------------------------------------------------- #
155
- # Small helper: safely get a callable's name without upsetting the type checker
156
- # --------------------------------------------------------------------------- #
157
- def _callable_name(obj: Any, fallback: str) -> str:
158
- """Safely extract callable's name for error messages.
159
-
160
- Args:
161
- obj: Any object that might have a __name__ attribute.
162
- fallback: Default name if extraction fails.
163
-
164
- Returns:
165
- The callable's __name__ if available, fallback otherwise.
166
-
167
- Note:
168
- Internal helper that never raises exceptions.
169
- """
170
- try:
171
- n = getattr(obj, "__name__", None)
172
- return n if isinstance(n, str) else fallback
173
- except Exception:
174
- return fallback
175
-
176
-
177
- def _is_already_traced(func: Callable[..., Any]) -> bool:
178
- """Check if a function has already been wrapped by the trace decorator.
179
-
180
- This checks both for the explicit __is_traced__ marker and walks
181
- the __wrapped__ chain to detect nested trace decorations.
182
-
183
- Args:
184
- func: Function to check for existing trace decoration.
185
-
186
- Returns:
187
- True if the function is already traced, False otherwise.
188
- """
189
- # Check for explicit marker
190
- if hasattr(func, "__is_traced__") and func.__is_traced__: # type: ignore[attr-defined]
191
- return True
192
-
193
- # Walk the __wrapped__ chain to detect nested traces
194
- current = func
195
- depth = 0
196
- max_depth = 10 # Prevent infinite loops
197
-
198
- while hasattr(current, "__wrapped__") and depth < max_depth:
199
- wrapped = current.__wrapped__ # type: ignore[attr-defined]
200
- # Check if the wrapped function has the trace marker
201
- if hasattr(wrapped, "__is_traced__") and wrapped.__is_traced__: # type: ignore[attr-defined]
202
- return True
203
- current = wrapped
204
- depth += 1
205
-
206
- return False
207
-
208
-
209
- # --------------------------------------------------------------------------- #
210
- # @pipeline_task — async-only, traced, returns Prefect's Task object
211
- # --------------------------------------------------------------------------- #
212
- @overload
213
- def pipeline_task(__fn: Callable[..., Coroutine[Any, Any, R_co]], /) -> _TaskLike[R_co]: ...
214
- @overload
215
- def pipeline_task(
216
- *,
217
- # tracing
218
- trace_level: TraceLevel = "always",
219
- trace_ignore_input: bool = False,
220
- trace_ignore_output: bool = False,
221
- trace_ignore_inputs: list[str] | None = None,
222
- trace_input_formatter: Callable[..., str] | None = None,
223
- trace_output_formatter: Callable[..., str] | None = None,
224
- trace_cost: float | None = None,
225
- trace_trim_documents: bool = True,
226
- # prefect passthrough
227
- name: str | None = None,
228
- description: str | None = None,
229
- tags: Iterable[str] | None = None,
230
- version: str | None = None,
231
- cache_policy: CachePolicy | type[NotSet] = NotSet,
232
- cache_key_fn: Callable[[TaskRunContext, dict[str, Any]], str | None] | None = None,
233
- cache_expiration: datetime.timedelta | None = None,
234
- task_run_name: TaskRunNameValueOrCallable | None = None,
235
- retries: int | None = None,
236
- retry_delay_seconds: int | float | list[float] | Callable[[int], list[float]] | None = None,
237
- retry_jitter_factor: float | None = None,
238
- persist_result: bool | None = None,
239
- result_storage: ResultStorage | str | None = None,
240
- result_serializer: ResultSerializer | str | None = None,
241
- result_storage_key: str | None = None,
242
- cache_result_in_memory: bool = True,
243
- timeout_seconds: int | float | None = None,
244
- log_prints: bool | None = False,
245
- refresh_cache: bool | None = None,
246
- on_completion: list[StateHookCallable] | None = None,
247
- on_failure: list[StateHookCallable] | None = None,
248
- retry_condition_fn: RetryConditionCallable | None = None,
249
- viz_return_value: bool | None = None,
250
- asset_deps: list[str | Asset] | None = None,
251
- ) -> Callable[[Callable[..., Coroutine[Any, Any, R_co]]], _TaskLike[R_co]]: ...
252
-
253
-
254
- def pipeline_task(
255
- __fn: Callable[..., Coroutine[Any, Any, R_co]] | None = None,
256
- /,
257
- *,
258
- # tracing
259
- trace_level: TraceLevel = "always",
260
- trace_ignore_input: bool = False,
261
- trace_ignore_output: bool = False,
262
- trace_ignore_inputs: list[str] | None = None,
263
- trace_input_formatter: Callable[..., str] | None = None,
264
- trace_output_formatter: Callable[..., str] | None = None,
265
- trace_cost: float | None = None,
266
- trace_trim_documents: bool = True,
267
- # prefect passthrough
268
- name: str | None = None,
269
- description: str | None = None,
270
- tags: Iterable[str] | None = None,
271
- version: str | None = None,
272
- cache_policy: CachePolicy | type[NotSet] = NotSet,
273
- cache_key_fn: Callable[[TaskRunContext, dict[str, Any]], str | None] | None = None,
274
- cache_expiration: datetime.timedelta | None = None,
275
- task_run_name: TaskRunNameValueOrCallable | None = None,
276
- retries: int | None = None,
277
- retry_delay_seconds: int | float | list[float] | Callable[[int], list[float]] | None = None,
278
- retry_jitter_factor: float | None = None,
279
- persist_result: bool | None = None,
280
- result_storage: ResultStorage | str | None = None,
281
- result_serializer: ResultSerializer | str | None = None,
282
- result_storage_key: str | None = None,
283
- cache_result_in_memory: bool = True,
284
- timeout_seconds: int | float | None = None,
285
- log_prints: bool | None = False,
286
- refresh_cache: bool | None = None,
287
- on_completion: list[StateHookCallable] | None = None,
288
- on_failure: list[StateHookCallable] | None = None,
289
- retry_condition_fn: RetryConditionCallable | None = None,
290
- viz_return_value: bool | None = None,
291
- asset_deps: list[str | Asset] | None = None,
292
- ) -> _TaskLike[R_co] | Callable[[Callable[..., Coroutine[Any, Any, R_co]]], _TaskLike[R_co]]:
293
- """Decorate an async function as a traced Prefect task.
294
-
295
- @public
296
-
297
- Wraps an async function with both Prefect task functionality and
298
- LMNR tracing. The function MUST be async (declared with 'async def').
299
-
300
- IMPORTANT: Never combine with @trace decorator - this includes tracing automatically.
301
- The framework will raise TypeError if you try to use both decorators together.
302
-
303
- Best Practice - Use Defaults:
304
- For 90% of use cases, use this decorator WITHOUT any parameters.
305
- Only specify parameters when you have EXPLICIT requirements.
306
-
307
- Args:
308
- __fn: Function to decorate (when used without parentheses).
309
- trace_level: When to trace ("always", "debug", "off").
310
- - "always": Always trace (default)
311
- - "debug": Only trace when LMNR_DEBUG="true"
312
- - "off": Disable tracing
313
- trace_ignore_input: Don't trace input arguments.
314
- trace_ignore_output: Don't trace return value.
315
- trace_ignore_inputs: List of parameter names to exclude from tracing.
316
- trace_input_formatter: Custom formatter for input tracing.
317
- trace_output_formatter: Custom formatter for output tracing.
318
- trace_cost: Optional cost value to track in metadata. When provided and > 0,
319
- sets gen_ai.usage.output_cost, gen_ai.usage.cost, and cost metadata.
320
- Also forces trace level to "always" if not already set.
321
- trace_trim_documents: Trim document content in traces to first 100 chars (default True).
322
- Reduces trace size with large documents.
323
- name: Task name (defaults to function name).
324
- description: Human-readable task description.
325
- tags: Tags for organization and filtering.
326
- version: Task version string.
327
- cache_policy: Caching policy for task results.
328
- cache_key_fn: Custom cache key generation.
329
- cache_expiration: How long to cache results.
330
- task_run_name: Dynamic or static run name.
331
- retries: Number of retry attempts (default 0).
332
- retry_delay_seconds: Delay between retries.
333
- retry_jitter_factor: Random jitter for retry delays.
334
- persist_result: Whether to persist results.
335
- result_storage: Where to store results.
336
- result_serializer: How to serialize results.
337
- result_storage_key: Custom storage key.
338
- cache_result_in_memory: Keep results in memory.
339
- timeout_seconds: Task execution timeout.
340
- log_prints: Capture print() statements.
341
- refresh_cache: Force cache refresh.
342
- on_completion: Hooks for successful completion.
343
- on_failure: Hooks for task failure.
344
- retry_condition_fn: Custom retry condition.
345
- viz_return_value: Include return value in visualization.
346
- asset_deps: Upstream asset dependencies.
347
-
348
- Returns:
349
- Decorated task callable that is awaitable and has Prefect
350
- task methods (submit, map, etc.).
351
-
352
- Example:
353
- >>> # RECOMMENDED - No parameters needed!
354
- >>> @pipeline_task
355
- >>> async def process_document(doc: Document) -> Document:
356
- ... result = await analyze(doc)
357
- ... return result
358
- >>>
359
- >>> # With parameters (only when necessary):
360
- >>> @pipeline_task(retries=5) # Only for known flaky operations
361
- >>> async def unreliable_api_call(url: str) -> dict:
362
- ... # This API fails often, needs extra retries
363
- ... return await fetch_with_retry(url)
364
- >>>
365
- >>> # AVOID specifying defaults - they're already optimal:
366
- >>> # - Automatic task naming
367
- >>> # - Standard retry policy
368
- >>> # - Sensible timeout
369
- >>> # - Full observability
370
-
371
- Performance:
372
- - Task decoration overhead: ~1-2ms
373
- - Tracing overhead: ~1-2ms per call
374
- - Prefect state tracking: ~5-10ms
375
-
376
- Note:
377
- Tasks are automatically traced with LMNR and appear in
378
- both Prefect and LMNR dashboards.
379
-
380
- See Also:
381
- - pipeline_flow: For flow-level decoration
382
- - trace: Lower-level tracing decorator
383
- - prefect.task: Standard Prefect task (no tracing)
384
- """
385
- task_decorator: Callable[..., Any] = _prefect_task # helps the type checker
386
-
387
- def _apply(fn: Callable[..., Coroutine[Any, Any, R_co]]) -> _TaskLike[R_co]:
388
- """Apply pipeline_task decorator to async function.
389
-
390
- Returns:
391
- Wrapped task with tracing and Prefect functionality.
392
-
393
- Raises:
394
- TypeError: If function is not async or already traced.
395
- """
396
- if not inspect.iscoroutinefunction(fn):
397
- raise TypeError(
398
- f"@pipeline_task target '{_callable_name(fn, 'task')}' must be 'async def'"
399
- )
400
-
401
- # Check if function is already traced
402
- if _is_already_traced(fn):
403
- raise TypeError(
404
- f"@pipeline_task target '{_callable_name(fn, 'task')}' is already decorated "
405
- f"with @trace. Remove the @trace decorator - @pipeline_task includes "
406
- f"tracing automatically."
407
- )
408
-
409
- fname = _callable_name(fn, "task")
410
-
411
- # Create wrapper to handle trace_cost if provided
412
- @wraps(fn)
413
- async def _wrapper(*args: Any, **kwargs: Any) -> R_co:
414
- result = await fn(*args, **kwargs)
415
- if trace_cost is not None and trace_cost > 0:
416
- set_trace_cost(trace_cost)
417
- return result
418
-
419
- traced_fn = trace(
420
- level=trace_level,
421
- name=name or fname,
422
- ignore_input=trace_ignore_input,
423
- ignore_output=trace_ignore_output,
424
- ignore_inputs=trace_ignore_inputs,
425
- input_formatter=trace_input_formatter,
426
- output_formatter=trace_output_formatter,
427
- trim_documents=trace_trim_documents,
428
- )(_wrapper)
429
-
430
- return cast(
431
- _TaskLike[R_co],
432
- task_decorator(
433
- name=name or fname,
434
- description=description,
435
- tags=tags,
436
- version=version,
437
- cache_policy=cache_policy,
438
- cache_key_fn=cache_key_fn,
439
- cache_expiration=cache_expiration,
440
- task_run_name=task_run_name or name or fname,
441
- retries=0 if retries is None else retries,
442
- retry_delay_seconds=retry_delay_seconds,
443
- retry_jitter_factor=retry_jitter_factor,
444
- persist_result=persist_result,
445
- result_storage=result_storage,
446
- result_serializer=result_serializer,
447
- result_storage_key=result_storage_key,
448
- cache_result_in_memory=cache_result_in_memory,
449
- timeout_seconds=timeout_seconds,
450
- log_prints=log_prints,
451
- refresh_cache=refresh_cache,
452
- on_completion=on_completion,
453
- on_failure=on_failure,
454
- retry_condition_fn=retry_condition_fn,
455
- viz_return_value=viz_return_value,
456
- asset_deps=asset_deps,
457
- )(traced_fn),
458
- )
459
-
460
- return _apply(__fn) if __fn else _apply
461
-
462
-
463
- # --------------------------------------------------------------------------- #
464
- # @pipeline_flow — async-only, traced, returns Prefect's flow wrapper
465
- # --------------------------------------------------------------------------- #
466
- def pipeline_flow(
467
- *,
468
- # config
469
- config: type[FlowConfig],
470
- # tracing
471
- trace_level: TraceLevel = "always",
472
- trace_ignore_input: bool = False,
473
- trace_ignore_output: bool = False,
474
- trace_ignore_inputs: list[str] | None = None,
475
- trace_input_formatter: Callable[..., str] | None = None,
476
- trace_output_formatter: Callable[..., str] | None = None,
477
- trace_cost: float | None = None,
478
- trace_trim_documents: bool = True,
479
- # prefect passthrough
480
- name: str | None = None,
481
- version: str | None = None,
482
- flow_run_name: Union[Callable[[], str], str] | None = None,
483
- retries: int | None = None,
484
- retry_delay_seconds: int | float | None = None,
485
- task_runner: TaskRunner[PrefectFuture[Any]] | None = None,
486
- description: str | None = None,
487
- timeout_seconds: int | float | None = None,
488
- validate_parameters: bool = True,
489
- persist_result: bool | None = None,
490
- result_storage: ResultStorage | str | None = None,
491
- result_serializer: ResultSerializer | str | None = None,
492
- cache_result_in_memory: bool = True,
493
- log_prints: bool | None = None,
494
- on_completion: list[FlowStateHook[Any, Any]] | None = None,
495
- on_failure: list[FlowStateHook[Any, Any]] | None = None,
496
- on_cancellation: list[FlowStateHook[Any, Any]] | None = None,
497
- on_crashed: list[FlowStateHook[Any, Any]] | None = None,
498
- on_running: list[FlowStateHook[Any, Any]] | None = None,
499
- ) -> Callable[[_DocumentsFlowCallable[FO_contra]], _FlowLike[FO_contra]]:
500
- """Decorate an async flow for document processing.
501
-
502
- @public
503
-
504
- Wraps an async function as a Prefect flow with tracing and type safety.
505
- The decorated function MUST be async and follow the required signature.
506
-
507
- IMPORTANT: Never combine with @trace decorator - this includes tracing automatically.
508
- The framework will raise TypeError if you try to use both decorators together.
509
-
510
- Best Practice - Use Defaults:
511
- For 90% of use cases, use this decorator WITHOUT any parameters.
512
- Only specify parameters when you have EXPLICIT requirements.
513
-
514
- Required function signature:
515
- async def flow_fn(
516
- project_name: str, # Project/pipeline identifier
517
- documents: DocumentList, # Input documents to process
518
- flow_options: FlowOptions, # Configuration (or subclass)
519
- ) -> DocumentList # Must return DocumentList
520
-
521
- Args:
522
- config: Required FlowConfig class for document loading/saving. Enables
523
- automatic loading from string paths and saving outputs.
524
- trace_level: When to trace ("always", "debug", "off").
525
- - "always": Always trace (default)
526
- - "debug": Only trace when LMNR_DEBUG="true"
527
- - "off": Disable tracing
528
- trace_ignore_input: Don't trace input arguments.
529
- trace_ignore_output: Don't trace return value.
530
- trace_ignore_inputs: Parameter names to exclude from tracing.
531
- trace_input_formatter: Custom input formatter.
532
- trace_output_formatter: Custom output formatter.
533
- trace_cost: Optional cost value to track in metadata. When provided and > 0,
534
- sets gen_ai.usage.output_cost, gen_ai.usage.cost, and cost metadata.
535
- Also forces trace level to "always" if not already set.
536
- trace_trim_documents: Trim document content in traces to first 100 chars (default True).
537
- Reduces trace size with large documents.
538
- name: Flow name (defaults to function name).
539
- version: Flow version identifier.
540
- flow_run_name: Static or dynamic run name.
541
- retries: Number of flow retry attempts (default 0).
542
- retry_delay_seconds: Delay between flow retries.
543
- task_runner: Task execution strategy (sequential/concurrent).
544
- description: Human-readable flow description.
545
- timeout_seconds: Flow execution timeout.
546
- validate_parameters: Validate input parameters.
547
- persist_result: Persist flow results.
548
- result_storage: Where to store results.
549
- result_serializer: How to serialize results.
550
- cache_result_in_memory: Keep results in memory.
551
- log_prints: Capture print() statements.
552
- on_completion: Hooks for successful completion.
553
- on_failure: Hooks for flow failure.
554
- on_cancellation: Hooks for flow cancellation.
555
- on_crashed: Hooks for flow crashes.
556
- on_running: Hooks for flow start.
557
-
558
- Returns:
559
- Decorated flow callable that maintains Prefect flow interface
560
- while enforcing document processing conventions.
561
-
562
- Example:
563
- >>> from ai_pipeline_core import FlowOptions, FlowConfig
564
- >>>
565
- >>> class MyFlowConfig(FlowConfig):
566
- ... INPUT_DOCUMENT_TYPES = [InputDoc]
567
- ... OUTPUT_DOCUMENT_TYPE = OutputDoc
568
- >>>
569
- >>> # Standard usage with config
570
- >>> @pipeline_flow(config=MyFlowConfig)
571
- >>> async def analyze_documents(
572
- ... project_name: str,
573
- ... documents: DocumentList,
574
- ... flow_options: FlowOptions
575
- >>> ) -> DocumentList:
576
- ... # Process each document
577
- ... results = []
578
- ... for doc in documents:
579
- ... result = await process(doc)
580
- ... results.append(result)
581
- ... return DocumentList(results)
582
- >>>
583
- >>> # With additional parameters:
584
- >>> @pipeline_flow(config=MyFlowConfig, retries=2)
585
- >>> async def critical_flow(
586
- ... project_name: str,
587
- ... documents: DocumentList,
588
- ... flow_options: FlowOptions
589
- >>> ) -> DocumentList:
590
- ... # Critical processing that might fail
591
- ... return await process_critical(documents)
592
- >>>
593
- >>> # AVOID specifying defaults - they're already optimal:
594
- >>> # - Automatic flow naming
595
- >>> # - Standard retry policy
596
- >>> # - Full observability
597
-
598
- Note:
599
- - Flow is wrapped with both Prefect and LMNR tracing
600
- - Return type is validated at runtime
601
- - FlowOptions can be subclassed for custom configuration
602
- - All Prefect flow methods (.serve(), .deploy()) are available
603
-
604
- See Also:
605
- - pipeline_task: For task-level decoration
606
- - FlowConfig: Type-safe flow configuration
607
- - FlowOptions: Base class for flow options
608
- - PipelineDeployment: Execute flows locally or remotely
609
- """
610
- flow_decorator: Callable[..., Any] = _prefect_flow
611
-
612
- def _apply(fn: _DocumentsFlowCallable[FO_contra]) -> _FlowLike[FO_contra]:
613
- """Apply pipeline_flow decorator to flow function.
614
-
615
- Returns:
616
- Wrapped flow with tracing and Prefect functionality.
617
-
618
- Raises:
619
- TypeError: If function is not async, already traced, doesn't have
620
- required parameters, or doesn't return DocumentList.
621
- """
622
- fname = _callable_name(fn, "flow")
623
-
624
- if not inspect.iscoroutinefunction(fn):
625
- raise TypeError(f"@pipeline_flow '{fname}' must be declared with 'async def'")
626
-
627
- # Check if function is already traced
628
- if _is_already_traced(fn):
629
- raise TypeError(
630
- f"@pipeline_flow target '{fname}' is already decorated "
631
- f"with @trace. Remove the @trace decorator - @pipeline_flow includes "
632
- f"tracing automatically."
633
- )
634
-
635
- if len(inspect.signature(fn).parameters) < 3:
636
- raise TypeError(
637
- f"@pipeline_flow '{fname}' must accept "
638
- "'project_name, documents, flow_options' as its first three parameters"
639
- )
640
-
641
- @wraps(fn)
642
- async def _wrapper(
643
- project_name: str,
644
- documents: str | DocumentList,
645
- flow_options: FO_contra,
646
- ) -> DocumentList:
647
- save_path: str | None = None
648
- if isinstance(documents, str):
649
- save_path = documents
650
- documents = await config.load_documents(documents)
651
- result = await fn(project_name, documents, flow_options)
652
- if save_path:
653
- await config.save_documents(save_path, result)
654
- if trace_cost is not None and trace_cost > 0:
655
- set_trace_cost(trace_cost)
656
- if not isinstance(result, DocumentList): # pyright: ignore[reportUnnecessaryIsInstance]
657
- raise TypeError(
658
- f"Flow '{fname}' must return DocumentList, got {type(result).__name__}"
659
- )
660
- return result
661
-
662
- traced = trace(
663
- level=trace_level,
664
- name=name or fname,
665
- ignore_input=trace_ignore_input,
666
- ignore_output=trace_ignore_output,
667
- ignore_inputs=trace_ignore_inputs,
668
- input_formatter=trace_input_formatter,
669
- output_formatter=trace_output_formatter,
670
- trim_documents=trace_trim_documents,
671
- )(_wrapper)
672
-
673
- # --- Publish a schema where `documents` accepts str (path) OR DocumentList ---
674
- _sig = inspect.signature(fn)
675
- _params = [
676
- p.replace(annotation=(str | DocumentList)) if p.name == "documents" else p
677
- for p in _sig.parameters.values()
678
- ]
679
- if hasattr(traced, "__signature__"):
680
- setattr(traced, "__signature__", _sig.replace(parameters=_params))
681
- if hasattr(traced, "__annotations__"):
682
- traced.__annotations__ = {
683
- **getattr(traced, "__annotations__", {}),
684
- "documents": str | DocumentList,
685
- }
686
-
687
- flow_obj = cast(
688
- _FlowLike[FO_contra],
689
- flow_decorator(
690
- name=name or fname,
691
- version=version,
692
- flow_run_name=flow_run_name or name or fname,
693
- retries=0 if retries is None else retries,
694
- retry_delay_seconds=retry_delay_seconds,
695
- task_runner=task_runner,
696
- description=description,
697
- timeout_seconds=timeout_seconds,
698
- validate_parameters=validate_parameters,
699
- persist_result=persist_result,
700
- result_storage=result_storage,
701
- result_serializer=result_serializer,
702
- cache_result_in_memory=cache_result_in_memory,
703
- log_prints=log_prints,
704
- on_completion=on_completion,
705
- on_failure=on_failure,
706
- on_cancellation=on_cancellation,
707
- on_crashed=on_crashed,
708
- on_running=on_running,
709
- )(traced),
710
- )
711
- # Attach config to the flow object for later access
712
- flow_obj.config = config # type: ignore[attr-defined]
713
- return flow_obj
714
-
715
- return _apply
716
-
717
-
718
- __all__ = ["pipeline_task", "pipeline_flow"]