ai-pipeline-core 0.1.10__py3-none-any.whl → 0.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. ai_pipeline_core/__init__.py +84 -4
  2. ai_pipeline_core/documents/__init__.py +9 -0
  3. ai_pipeline_core/documents/document.py +1034 -151
  4. ai_pipeline_core/documents/document_list.py +147 -38
  5. ai_pipeline_core/documents/flow_document.py +112 -11
  6. ai_pipeline_core/documents/mime_type.py +173 -15
  7. ai_pipeline_core/documents/task_document.py +117 -12
  8. ai_pipeline_core/documents/temporary_document.py +84 -5
  9. ai_pipeline_core/documents/utils.py +41 -9
  10. ai_pipeline_core/exceptions.py +47 -11
  11. ai_pipeline_core/flow/__init__.py +2 -0
  12. ai_pipeline_core/flow/config.py +232 -23
  13. ai_pipeline_core/flow/options.py +50 -1
  14. ai_pipeline_core/llm/__init__.py +6 -0
  15. ai_pipeline_core/llm/ai_messages.py +125 -27
  16. ai_pipeline_core/llm/client.py +278 -26
  17. ai_pipeline_core/llm/model_options.py +130 -1
  18. ai_pipeline_core/llm/model_response.py +239 -35
  19. ai_pipeline_core/llm/model_types.py +67 -0
  20. ai_pipeline_core/logging/__init__.py +13 -0
  21. ai_pipeline_core/logging/logging_config.py +72 -20
  22. ai_pipeline_core/logging/logging_mixin.py +38 -32
  23. ai_pipeline_core/pipeline.py +308 -60
  24. ai_pipeline_core/prefect.py +48 -1
  25. ai_pipeline_core/prompt_manager.py +209 -24
  26. ai_pipeline_core/settings.py +108 -4
  27. ai_pipeline_core/simple_runner/__init__.py +5 -0
  28. ai_pipeline_core/simple_runner/cli.py +96 -11
  29. ai_pipeline_core/simple_runner/simple_runner.py +237 -4
  30. ai_pipeline_core/tracing.py +232 -30
  31. ai_pipeline_core-0.1.11.dist-info/METADATA +450 -0
  32. ai_pipeline_core-0.1.11.dist-info/RECORD +36 -0
  33. ai_pipeline_core-0.1.10.dist-info/METADATA +0 -538
  34. ai_pipeline_core-0.1.10.dist-info/RECORD +0 -36
  35. {ai_pipeline_core-0.1.10.dist-info → ai_pipeline_core-0.1.11.dist-info}/WHEEL +0 -0
  36. {ai_pipeline_core-0.1.10.dist-info → ai_pipeline_core-0.1.11.dist-info}/licenses/LICENSE +0 -0
@@ -1,4 +1,7 @@
1
- """Logging mixin for consistent logging across components using Prefect logging"""
1
+ """Logging mixin for consistent logging across components using Prefect logging.
2
+
3
+ @public
4
+ """
2
5
 
3
6
  import contextlib
4
7
  import time
@@ -12,12 +15,17 @@ from prefect.logging import get_logger
12
15
 
13
16
 
14
17
  class LoggerMixin:
15
- """
16
- Mixin class that provides consistent logging functionality using Prefect's logging system
18
+ """Mixin class that provides consistent logging functionality using Prefect's logging system.
19
+
20
+ @public
21
+
22
+ Note for users: In your code, always obtain loggers via get_pipeline_logger(__name__).
23
+ The mixin's internal behavior routes to the appropriate backend; you should not call
24
+ logging.getLogger directly.
17
25
 
18
26
  Automatically uses appropriate logger based on context:
19
- - get_run_logger() when in flow/task context
20
- - get_logger() when outside flow/task context
27
+ - prefect.get_run_logger() when in flow/task context
28
+ - Internal routing when outside flow/task context
21
29
  """
22
30
 
23
31
  _logger_name: Optional[str] = None
@@ -30,7 +38,11 @@ class LoggerMixin:
30
38
  return get_logger(self._logger_name or self.__class__.__module__)
31
39
 
32
40
  def _get_run_logger(self):
33
- """Attempt to get Prefect run logger."""
41
+ """Attempt to get Prefect run logger.
42
+
43
+ Returns:
44
+ The Prefect run logger if in a flow/task context, None otherwise.
45
+ """
34
46
  # Intentionally broad: Must handle any exception when checking context
35
47
  with contextlib.suppress(Exception):
36
48
  if FlowRunContext.get() or TaskRunContext.get():
@@ -38,28 +50,27 @@ class LoggerMixin:
38
50
  return None
39
51
 
40
52
  def log_debug(self, message: str, **kwargs: Any) -> None:
41
- """Log debug message with optional context"""
53
+ """Log debug message with optional context."""
42
54
  self.logger.debug(message, extra=kwargs)
43
55
 
44
56
  def log_info(self, message: str, **kwargs: Any) -> None:
45
- """Log info message with optional context"""
57
+ """Log info message with optional context."""
46
58
  self.logger.info(message, extra=kwargs)
47
59
 
48
60
  def log_warning(self, message: str, **kwargs: Any) -> None:
49
- """Log warning message with optional context"""
61
+ """Log warning message with optional context."""
50
62
  self.logger.warning(message, extra=kwargs)
51
63
 
52
64
  def log_error(self, message: str, exc_info: bool = False, **kwargs: Any) -> None:
53
- """Log error message with optional exception info"""
65
+ """Log error message with optional exception info."""
54
66
  self.logger.error(message, exc_info=exc_info, extra=kwargs)
55
67
 
56
68
  def log_critical(self, message: str, exc_info: bool = False, **kwargs: Any) -> None:
57
- """Log critical message with optional exception info"""
69
+ """Log critical message with optional exception info."""
58
70
  self.logger.critical(message, exc_info=exc_info, extra=kwargs)
59
71
 
60
72
  def log_with_context(self, level: str, message: str, context: Dict[str, Any]) -> None:
61
- """
62
- Log message with structured context
73
+ """Log message with structured context.
63
74
 
64
75
  Args:
65
76
  level: Log level (debug, info, warning, error, critical)
@@ -83,13 +94,13 @@ class LoggerMixin:
83
94
 
84
95
 
85
96
  class StructuredLoggerMixin(LoggerMixin):
86
- """
87
- Extended mixin for structured logging with Prefect
97
+ """Extended mixin for structured logging with Prefect.
98
+
99
+ @public
88
100
  """
89
101
 
90
102
  def log_event(self, event: str, **kwargs: Any) -> None:
91
- """
92
- Log a structured event
103
+ """Log a structured event.
93
104
 
94
105
  Args:
95
106
  event: Event name
@@ -104,8 +115,7 @@ class StructuredLoggerMixin(LoggerMixin):
104
115
  self.logger.info(event, extra={"event": event, "structured": True, **kwargs})
105
116
 
106
117
  def log_metric(self, metric_name: str, value: float, unit: str = "", **tags: Any) -> None:
107
- """
108
- Log a metric value
118
+ """Log a metric value.
109
119
 
110
120
  Args:
111
121
  metric_name: Name of the metric
@@ -129,8 +139,7 @@ class StructuredLoggerMixin(LoggerMixin):
129
139
  )
130
140
 
131
141
  def log_span(self, operation: str, duration_ms: float, **attributes: Any) -> None:
132
- """
133
- Log a span (operation with duration)
142
+ """Log a span (operation with duration).
134
143
 
135
144
  Args:
136
145
  operation: Operation name
@@ -153,8 +162,7 @@ class StructuredLoggerMixin(LoggerMixin):
153
162
 
154
163
  @contextmanager
155
164
  def log_operation(self, operation: str, **context: Any) -> Generator[None, None, None]:
156
- """
157
- Context manager for logging operations with timing
165
+ """Context manager for logging operations with timing.
158
166
 
159
167
  Args:
160
168
  operation: Operation name
@@ -188,36 +196,34 @@ class StructuredLoggerMixin(LoggerMixin):
188
196
 
189
197
 
190
198
  class PrefectLoggerMixin(StructuredLoggerMixin):
191
- """
192
- Enhanced mixin specifically for Prefect flows and tasks
193
- """
199
+ """Enhanced mixin specifically for Prefect flows and tasks."""
194
200
 
195
201
  def log_flow_start(self, flow_name: str, parameters: Dict[str, Any]) -> None:
196
- """Log flow start with parameters"""
202
+ """Log flow start with parameters."""
197
203
  self.log_event("flow_started", flow_name=flow_name, parameters=parameters)
198
204
 
199
205
  def log_flow_end(self, flow_name: str, status: str, duration_ms: float) -> None:
200
- """Log flow completion"""
206
+ """Log flow completion."""
201
207
  self.log_event(
202
208
  "flow_completed", flow_name=flow_name, status=status, duration_ms=duration_ms
203
209
  )
204
210
 
205
211
  def log_task_start(self, task_name: str, inputs: Dict[str, Any]) -> None:
206
- """Log task start with inputs"""
212
+ """Log task start with inputs."""
207
213
  self.log_event("task_started", task_name=task_name, inputs=inputs)
208
214
 
209
215
  def log_task_end(self, task_name: str, status: str, duration_ms: float) -> None:
210
- """Log task completion"""
216
+ """Log task completion."""
211
217
  self.log_event(
212
218
  "task_completed", task_name=task_name, status=status, duration_ms=duration_ms
213
219
  )
214
220
 
215
221
  def log_retry(self, operation: str, attempt: int, max_attempts: int, error: str) -> None:
216
- """Log retry attempt"""
222
+ """Log retry attempt."""
217
223
  self.log_warning(
218
224
  f"Retrying {operation}", attempt=attempt, max_attempts=max_attempts, error=error
219
225
  )
220
226
 
221
227
  def log_checkpoint(self, checkpoint_name: str, **data: Any) -> None:
222
- """Log a checkpoint in processing"""
228
+ """Log a checkpoint in processing."""
223
229
  self.log_info(f"Checkpoint: {checkpoint_name}", checkpoint=checkpoint_name, **data)
@@ -1,51 +1,26 @@
1
- """
2
- ai_pipeline_core.pipeline
3
- =========================
4
-
5
- Tiny wrappers around Prefect's public ``@task`` and ``@flow`` that add our
6
- ``trace`` decorator and **require async functions**.
7
-
8
- Why this exists
9
- ---------------
10
- Prefect tasks/flows are awaitable at runtime, but their public type stubs
11
- don’t declare that clearly. We therefore:
1
+ """Pipeline decorators with Prefect integration and tracing.
12
2
 
13
- 1) Return the **real Prefect objects** (so you keep every Prefect method).
14
- 2) Type them as small Protocols that say “this is awaitable and has common
15
- helpers like `.submit`/`.map`”.
3
+ @public
16
4
 
17
- This keeps Pyright happy without altering runtime behavior and avoids
18
- leaking advanced typing constructs (like ``ParamSpec``) that confuse tools
19
- that introspect callables (e.g., Pydantic).
20
-
21
- Quick start
22
- -----------
23
- from ai_pipeline_core.pipeline import pipeline_task, pipeline_flow
24
- from ai_pipeline_core.documents import DocumentList
25
- from ai_pipeline_core.flow.options import FlowOptions
26
-
27
- @pipeline_task
28
- async def add(x: int, y: int) -> int:
29
- return x + y
30
-
31
- @pipeline_flow
32
- async def my_flow(project_name: str, docs: DocumentList, opts: FlowOptions) -> DocumentList:
33
- await add(1, 2) # awaitable and typed
34
- return docs
35
-
36
- Rules
37
- -----
38
- • Your decorated function **must** be ``async def``.
39
- • ``@pipeline_flow`` functions must accept at least:
40
- (project_name: str, documents: DocumentList, flow_options: FlowOptions | subclass).
41
- • Both wrappers return the same Prefect objects you’d get from Prefect directly.
5
+ Wrappers around Prefect's @task and @flow that add Laminar tracing
6
+ and enforce async-only execution for consistency.
42
7
  """
43
8
 
44
9
  from __future__ import annotations
45
10
 
46
11
  import datetime
47
12
  import inspect
48
- from typing import Any, Callable, Coroutine, Iterable, Protocol, TypeVar, Union, cast, overload
13
+ from typing import (
14
+ Any,
15
+ Callable,
16
+ Coroutine,
17
+ Iterable,
18
+ Protocol,
19
+ TypeVar,
20
+ Union,
21
+ cast,
22
+ overload,
23
+ )
49
24
 
50
25
  from prefect.assets import Asset
51
26
  from prefect.cache_policies import CachePolicy
@@ -79,7 +54,27 @@ FO_contra = TypeVar("FO_contra", bound=FlowOptions, contravariant=True)
79
54
 
80
55
 
81
56
  class _TaskLike(Protocol[R_co]):
82
- """Minimal 'task-like' view: awaitable call + common helpers."""
57
+ """Protocol for type-safe Prefect task representation.
58
+
59
+ Defines the minimal interface for a Prefect task as seen by
60
+ type checkers. Ensures tasks are awaitable and have common
61
+ Prefect task methods.
62
+
63
+ Type Parameter:
64
+ R_co: Covariant return type of the task.
65
+
66
+ Methods:
67
+ __call__: Makes the task awaitable.
68
+ submit: Submit task for asynchronous execution.
69
+ map: Map task over multiple inputs.
70
+
71
+ Attributes:
72
+ name: Optional task name.
73
+
74
+ Note:
75
+ This is a typing Protocol, not a runtime class.
76
+ __getattr__ allows accessing Prefect-specific helpers.
77
+ """
83
78
 
84
79
  def __call__(self, *args: Any, **kwargs: Any) -> Coroutine[Any, Any, R_co]: ...
85
80
 
@@ -91,7 +86,27 @@ class _TaskLike(Protocol[R_co]):
91
86
 
92
87
 
93
88
  class _DocumentsFlowCallable(Protocol[FO_contra]):
94
- """User async flow signature (first three params fixed)."""
89
+ """Protocol for user-defined flow functions.
90
+
91
+ Defines the required signature for functions that will be
92
+ decorated with @pipeline_flow. Enforces the standard parameters
93
+ for document processing flows.
94
+
95
+ Type Parameter:
96
+ FO_contra: Contravariant FlowOptions type (or subclass).
97
+
98
+ Required Parameters:
99
+ project_name: Name of the project/pipeline.
100
+ documents: Input DocumentList to process.
101
+ flow_options: Configuration options (FlowOptions or subclass).
102
+ *args, **kwargs: Additional flow-specific parameters.
103
+
104
+ Returns:
105
+ DocumentList: Processed documents.
106
+
107
+ Note:
108
+ Functions must be async and return DocumentList.
109
+ """
95
110
 
96
111
  def __call__(
97
112
  self,
@@ -104,7 +119,26 @@ class _DocumentsFlowCallable(Protocol[FO_contra]):
104
119
 
105
120
 
106
121
  class _FlowLike(Protocol[FO_contra]):
107
- """Callable returned by Prefect ``@flow`` wrapper that we expose to users."""
122
+ """Protocol for decorated flow objects returned to users.
123
+
124
+ Represents the callable object returned by @pipeline_flow,
125
+ which wraps the original flow function with Prefect and
126
+ tracing capabilities.
127
+
128
+ Type Parameter:
129
+ FO_contra: Contravariant FlowOptions type.
130
+
131
+ Callable Signature:
132
+ Same as _DocumentsFlowCallable - accepts project_name,
133
+ documents, flow_options, plus additional arguments.
134
+
135
+ Attributes:
136
+ name: Optional flow name from decorator.
137
+
138
+ Note:
139
+ __getattr__ provides access to all Prefect flow methods
140
+ without explicit typing (e.g., .serve(), .deploy()).
141
+ """
108
142
 
109
143
  def __call__(
110
144
  self,
@@ -124,6 +158,18 @@ class _FlowLike(Protocol[FO_contra]):
124
158
  # Small helper: safely get a callable's name without upsetting the type checker
125
159
  # --------------------------------------------------------------------------- #
126
160
  def _callable_name(obj: Any, fallback: str) -> str:
161
+ """Safely extract callable's name for error messages.
162
+
163
+ Args:
164
+ obj: Any object that might have a __name__ attribute.
165
+ fallback: Default name if extraction fails.
166
+
167
+ Returns:
168
+ The callable's __name__ if available, fallback otherwise.
169
+
170
+ Note:
171
+ Internal helper that never raises exceptions.
172
+ """
127
173
  try:
128
174
  n = getattr(obj, "__name__", None)
129
175
  return n if isinstance(n, str) else fallback
@@ -211,18 +257,114 @@ def pipeline_task(
211
257
  viz_return_value: bool | None = None,
212
258
  asset_deps: list[str | Asset] | None = None,
213
259
  ) -> _TaskLike[R_co] | Callable[[Callable[..., Coroutine[Any, Any, R_co]]], _TaskLike[R_co]]:
214
- """Decorate an **async** function as a traced Prefect task."""
260
+ """Decorate an async function as a traced Prefect task.
261
+
262
+ @public
263
+
264
+ Wraps an async function with both Prefect task functionality and
265
+ LMNR tracing. The function MUST be async (declared with 'async def').
266
+
267
+ Best Practice - Use Defaults:
268
+ For 90% of use cases, use this decorator WITHOUT any parameters.
269
+ Only specify parameters when you have EXPLICIT requirements.
270
+
271
+ Args:
272
+ __fn: Function to decorate (when used without parentheses).
273
+
274
+ Tracing parameters:
275
+ trace_level: When to trace ("always", "debug", "off").
276
+ - "always": Always trace (default)
277
+ - "debug": Only trace when LMNR_DEBUG="true"
278
+ - "off": Disable tracing
279
+ trace_ignore_input: Don't trace input arguments.
280
+ trace_ignore_output: Don't trace return value.
281
+ trace_ignore_inputs: List of parameter names to exclude from tracing.
282
+ trace_input_formatter: Custom formatter for input tracing.
283
+ trace_output_formatter: Custom formatter for output tracing.
284
+
285
+ Prefect task parameters:
286
+ name: Task name (defaults to function name).
287
+ description: Human-readable task description.
288
+ tags: Tags for organization and filtering.
289
+ version: Task version string.
290
+ cache_policy: Caching policy for task results.
291
+ cache_key_fn: Custom cache key generation.
292
+ cache_expiration: How long to cache results.
293
+ task_run_name: Dynamic or static run name.
294
+ retries: Number of retry attempts (default 0).
295
+ retry_delay_seconds: Delay between retries.
296
+ retry_jitter_factor: Random jitter for retry delays.
297
+ persist_result: Whether to persist results.
298
+ result_storage: Where to store results.
299
+ result_serializer: How to serialize results.
300
+ result_storage_key: Custom storage key.
301
+ cache_result_in_memory: Keep results in memory.
302
+ timeout_seconds: Task execution timeout.
303
+ log_prints: Capture print() statements.
304
+ refresh_cache: Force cache refresh.
305
+ on_completion: Hooks for successful completion.
306
+ on_failure: Hooks for task failure.
307
+ retry_condition_fn: Custom retry condition.
308
+ viz_return_value: Include return value in visualization.
309
+ asset_deps: Upstream asset dependencies.
310
+
311
+ Returns:
312
+ Decorated task callable that is awaitable and has Prefect
313
+ task methods (submit, map, etc.).
314
+
315
+ Example:
316
+ >>> # RECOMMENDED - No parameters needed!
317
+ >>> @pipeline_task
318
+ >>> async def process_document(doc: Document) -> Document:
319
+ ... result = await analyze(doc)
320
+ ... return result
321
+ >>>
322
+ >>> # With parameters (only when necessary):
323
+ >>> @pipeline_task(retries=5) # Only for known flaky operations
324
+ >>> async def unreliable_api_call(url: str) -> dict:
325
+ ... # This API fails often, needs extra retries
326
+ ... return await fetch_with_retry(url)
327
+ >>>
328
+ >>> # AVOID specifying defaults - they're already optimal:
329
+ >>> # - Automatic task naming
330
+ >>> # - Standard retry policy
331
+ >>> # - Sensible timeout
332
+ >>> # - Full observability
333
+
334
+ Performance:
335
+ - Task decoration overhead: ~1-2ms
336
+ - Tracing overhead: ~1-2ms per call
337
+ - Prefect state tracking: ~5-10ms
338
+
339
+ Note:
340
+ Tasks are automatically traced with LMNR and appear in
341
+ both Prefect and LMNR dashboards.
342
+
343
+ See Also:
344
+ - pipeline_flow: For flow-level decoration
345
+ - trace: Lower-level tracing decorator
346
+ - prefect.task: Standard Prefect task (no tracing)
347
+ """
215
348
  task_decorator: Callable[..., Any] = _prefect_task # helps the type checker
216
349
 
217
350
  def _apply(fn: Callable[..., Coroutine[Any, Any, R_co]]) -> _TaskLike[R_co]:
351
+ """Apply pipeline_task decorator to async function.
352
+
353
+ Returns:
354
+ Wrapped task with tracing and Prefect functionality.
355
+
356
+ Raises:
357
+ TypeError: If function is not async.
358
+ """
218
359
  if not inspect.iscoroutinefunction(fn):
219
360
  raise TypeError(
220
361
  f"@pipeline_task target '{_callable_name(fn, 'task')}' must be 'async def'"
221
362
  )
222
363
 
364
+ fname = _callable_name(fn, "task")
223
365
  traced_fn = trace(
224
366
  level=trace_level,
225
- name=name or _callable_name(fn, "task"),
367
+ name=name or fname,
226
368
  ignore_input=trace_ignore_input,
227
369
  ignore_output=trace_ignore_output,
228
370
  ignore_inputs=trace_ignore_inputs,
@@ -233,14 +375,14 @@ def pipeline_task(
233
375
  return cast(
234
376
  _TaskLike[R_co],
235
377
  task_decorator(
236
- name=name,
378
+ name=name or fname,
237
379
  description=description,
238
380
  tags=tags,
239
381
  version=version,
240
382
  cache_policy=cache_policy,
241
383
  cache_key_fn=cache_key_fn,
242
384
  cache_expiration=cache_expiration,
243
- task_run_name=task_run_name,
385
+ task_run_name=task_run_name or name or fname,
244
386
  retries=0 if retries is None else retries,
245
387
  retry_delay_seconds=retry_delay_seconds,
246
388
  retry_jitter_factor=retry_jitter_factor,
@@ -264,7 +406,7 @@ def pipeline_task(
264
406
 
265
407
 
266
408
  # --------------------------------------------------------------------------- #
267
- # @pipeline_flow — async-only, traced, returns Prefects flow wrapper
409
+ # @pipeline_flow — async-only, traced, returns Prefect's flow wrapper
268
410
  # --------------------------------------------------------------------------- #
269
411
  @overload
270
412
  def pipeline_flow(__fn: _DocumentsFlowCallable[FO_contra], /) -> _FlowLike[FO_contra]: ...
@@ -333,22 +475,124 @@ def pipeline_flow(
333
475
  on_crashed: list[FlowStateHook[Any, Any]] | None = None,
334
476
  on_running: list[FlowStateHook[Any, Any]] | None = None,
335
477
  ) -> _FlowLike[FO_contra] | Callable[[_DocumentsFlowCallable[FO_contra]], _FlowLike[FO_contra]]:
336
- """Decorate an **async** flow.
478
+ """Decorate an async flow for document processing.
337
479
 
338
- Required signature:
339
- async def flow_fn(
340
- project_name: str,
341
- documents: DocumentList,
342
- flow_options: FlowOptions, # or any subclass
343
- *args,
344
- **kwargs
345
- ) -> DocumentList
480
+ @public
481
+
482
+ Wraps an async function as a Prefect flow with tracing and type safety.
483
+ The decorated function MUST be async and follow the required signature.
346
484
 
347
- Returns the same callable object Prefect’s ``@flow`` would return.
485
+ Best Practice - Use Defaults:
486
+ For 90% of use cases, use this decorator WITHOUT any parameters.
487
+ Only specify parameters when you have EXPLICIT requirements.
488
+
489
+ Required function signature:
490
+ async def flow_fn(
491
+ project_name: str, # Project/pipeline identifier
492
+ documents: DocumentList, # Input documents to process
493
+ flow_options: FlowOptions, # Configuration (or subclass)
494
+ *args, # Additional positional args for custom parameters
495
+ **kwargs # Additional keyword args for custom parameters
496
+ ) -> DocumentList # Must return DocumentList
497
+
498
+ Note: *args and **kwargs allow for defining custom parameters on your flow
499
+ function, which can be passed during execution for flow-specific needs.
500
+
501
+ Args:
502
+ __fn: Function to decorate (when used without parentheses).
503
+
504
+ Tracing parameters:
505
+ trace_level: When to trace ("always", "debug", "off").
506
+ - "always": Always trace (default)
507
+ - "debug": Only trace when LMNR_DEBUG="true"
508
+ - "off": Disable tracing
509
+ trace_ignore_input: Don't trace input arguments.
510
+ trace_ignore_output: Don't trace return value.
511
+ trace_ignore_inputs: Parameter names to exclude from tracing.
512
+ trace_input_formatter: Custom input formatter.
513
+ trace_output_formatter: Custom output formatter.
514
+
515
+ Prefect flow parameters:
516
+ name: Flow name (defaults to function name).
517
+ version: Flow version identifier.
518
+ flow_run_name: Static or dynamic run name.
519
+ retries: Number of flow retry attempts (default 0).
520
+ retry_delay_seconds: Delay between flow retries.
521
+ task_runner: Task execution strategy (sequential/concurrent).
522
+ description: Human-readable flow description.
523
+ timeout_seconds: Flow execution timeout.
524
+ validate_parameters: Validate input parameters.
525
+ persist_result: Persist flow results.
526
+ result_storage: Where to store results.
527
+ result_serializer: How to serialize results.
528
+ cache_result_in_memory: Keep results in memory.
529
+ log_prints: Capture print() statements.
530
+ on_completion: Hooks for successful completion.
531
+ on_failure: Hooks for flow failure.
532
+ on_cancellation: Hooks for flow cancellation.
533
+ on_crashed: Hooks for flow crashes.
534
+ on_running: Hooks for flow start.
535
+
536
+ Returns:
537
+ Decorated flow callable that maintains Prefect flow interface
538
+ while enforcing document processing conventions.
539
+
540
+ Example:
541
+ >>> from ai_pipeline_core import FlowOptions
542
+ >>>
543
+ >>> # RECOMMENDED - No parameters needed!
544
+ >>> @pipeline_flow
545
+ >>> async def analyze_documents(
546
+ ... project_name: str,
547
+ ... documents: DocumentList,
548
+ ... flow_options: FlowOptions
549
+ >>> ) -> DocumentList:
550
+ ... # Process each document
551
+ ... results = []
552
+ ... for doc in documents:
553
+ ... result = await process(doc)
554
+ ... results.append(result)
555
+ ... return DocumentList(results)
556
+ >>>
557
+ >>> # With parameters (only when necessary):
558
+ >>> @pipeline_flow(retries=2) # Only for flows that need retry logic
559
+ >>> async def critical_flow(
560
+ ... project_name: str,
561
+ ... documents: DocumentList,
562
+ ... flow_options: FlowOptions
563
+ >>> ) -> DocumentList:
564
+ ... # Critical processing that might fail
565
+ ... return await process_critical(documents)
566
+ >>>
567
+ >>> # AVOID specifying defaults - they're already optimal:
568
+ >>> # - Automatic flow naming
569
+ >>> # - Standard retry policy
570
+ >>> # - Full observability
571
+
572
+ Note:
573
+ - Flow is wrapped with both Prefect and LMNR tracing
574
+ - Return type is validated at runtime
575
+ - FlowOptions can be subclassed for custom configuration
576
+ - All Prefect flow methods (.serve(), .deploy()) are available
577
+
578
+ See Also:
579
+ - pipeline_task: For task-level decoration
580
+ - FlowConfig: Type-safe flow configuration
581
+ - FlowOptions: Base class for flow options
582
+ - simple_runner.run_pipeline: Execute flows locally
348
583
  """
349
584
  flow_decorator: Callable[..., Any] = _prefect_flow
350
585
 
351
586
  def _apply(fn: _DocumentsFlowCallable[FO_contra]) -> _FlowLike[FO_contra]:
587
+ """Apply pipeline_flow decorator to flow function.
588
+
589
+ Returns:
590
+ Wrapped flow with tracing and Prefect functionality.
591
+
592
+ Raises:
593
+ TypeError: If function is not async, doesn't have required
594
+ parameters, or doesn't return DocumentList.
595
+ """
352
596
  fname = _callable_name(fn, "flow")
353
597
 
354
598
  if not inspect.iscoroutinefunction(fn):
@@ -373,6 +617,10 @@ def pipeline_flow(
373
617
  )
374
618
  return result
375
619
 
620
+ # Preserve the original function name for Prefect
621
+ _wrapper.__name__ = fname
622
+ _wrapper.__qualname__ = getattr(fn, "__qualname__", fname)
623
+
376
624
  traced = trace(
377
625
  level=trace_level,
378
626
  name=name or fname,
@@ -386,9 +634,9 @@ def pipeline_flow(
386
634
  return cast(
387
635
  _FlowLike[FO_contra],
388
636
  flow_decorator(
389
- name=name,
637
+ name=name or fname,
390
638
  version=version,
391
- flow_run_name=flow_run_name,
639
+ flow_run_name=flow_run_name or name or fname,
392
640
  retries=0 if retries is None else retries,
393
641
  retry_delay_seconds=retry_delay_seconds,
394
642
  task_runner=task_runner,
@@ -1,4 +1,51 @@
1
- """Prefect core features."""
1
+ """Prefect core features for pipeline orchestration.
2
+
3
+ This module provides clean re-exports of essential Prefect functionality.
4
+
5
+ IMPORTANT: You should NEVER use the `task` and `flow` decorators directly
6
+ unless it is 100% impossible to use `pipeline_task` and `pipeline_flow`.
7
+ The standard Prefect decorators are exported here only for extremely
8
+ limited edge cases where the pipeline decorators cannot be used.
9
+
10
+ Always prefer:
11
+ >>> from ai_pipeline_core import pipeline_task, pipeline_flow
12
+ >>>
13
+ >>> @pipeline_task
14
+ >>> async def my_task(...): ...
15
+ >>>
16
+ >>> @pipeline_flow
17
+ >>> async def my_flow(...): ...
18
+
19
+ The `task` and `flow` decorators should only be used when:
20
+ - You absolutely cannot convert to async (pipeline decorators require async)
21
+ - You have a very specific Prefect integration that conflicts with tracing
22
+ - You are writing test utilities or infrastructure code
23
+
24
+ Exported components:
25
+ task: Prefect task decorator (AVOID - use pipeline_task instead).
26
+ flow: Prefect flow decorator (AVOID - use pipeline_flow instead).
27
+ disable_run_logger: Context manager to suppress Prefect logging.
28
+ prefect_test_harness: Test harness for unit testing flows/tasks.
29
+
30
+ Testing utilities (use as fixtures):
31
+ The disable_run_logger and prefect_test_harness should be used as
32
+ pytest fixtures as shown in tests/conftest.py:
33
+
34
+ >>> @pytest.fixture(autouse=True, scope="session")
35
+ >>> def prefect_test_fixture():
36
+ ... with prefect_test_harness():
37
+ ... yield
38
+ >>>
39
+ >>> @pytest.fixture(autouse=True)
40
+ >>> def disable_prefect_logging():
41
+ ... with disable_run_logger():
42
+ ... yield
43
+
44
+ Note:
45
+ The pipeline_task and pipeline_flow decorators from
46
+ ai_pipeline_core.pipeline provide async-only execution with
47
+ integrated LMNR tracing and are the standard for this library.
48
+ """
2
49
 
3
50
  from prefect import flow, task
4
51
  from prefect.logging import disable_run_logger