ai-pipeline-core 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +5 -5
- ai_pipeline_core/documents/document.py +16 -1
- ai_pipeline_core/flow/config.py +21 -0
- ai_pipeline_core/llm/model_options.py +4 -0
- ai_pipeline_core/pipeline.py +313 -317
- ai_pipeline_core/simple_runner/cli.py +43 -11
- {ai_pipeline_core-0.1.7.dist-info → ai_pipeline_core-0.1.8.dist-info}/METADATA +51 -34
- {ai_pipeline_core-0.1.7.dist-info → ai_pipeline_core-0.1.8.dist-info}/RECORD +10 -10
- {ai_pipeline_core-0.1.7.dist-info → ai_pipeline_core-0.1.8.dist-info}/WHEEL +0 -0
- {ai_pipeline_core-0.1.7.dist-info → ai_pipeline_core-0.1.8.dist-info}/licenses/LICENSE +0 -0
ai_pipeline_core/__init__.py
CHANGED
|
@@ -27,12 +27,12 @@ from .logging import (
|
|
|
27
27
|
)
|
|
28
28
|
from .logging import get_pipeline_logger as get_logger
|
|
29
29
|
from .pipeline import pipeline_flow, pipeline_task
|
|
30
|
-
from .prefect import
|
|
30
|
+
from .prefect import disable_run_logger, prefect_test_harness
|
|
31
31
|
from .prompt_manager import PromptManager
|
|
32
32
|
from .settings import settings
|
|
33
33
|
from .tracing import TraceInfo, TraceLevel, trace
|
|
34
34
|
|
|
35
|
-
__version__ = "0.1.
|
|
35
|
+
__version__ = "0.1.8"
|
|
36
36
|
|
|
37
37
|
__all__ = [
|
|
38
38
|
# Config/Settings
|
|
@@ -54,12 +54,12 @@ __all__ = [
|
|
|
54
54
|
# Flow/Task
|
|
55
55
|
"FlowConfig",
|
|
56
56
|
"FlowOptions",
|
|
57
|
-
# Prefect decorators (clean, no tracing)
|
|
58
|
-
"task",
|
|
59
|
-
"flow",
|
|
60
57
|
# Pipeline decorators (with tracing)
|
|
61
58
|
"pipeline_task",
|
|
62
59
|
"pipeline_flow",
|
|
60
|
+
# Prefect decorators (clean, no tracing)
|
|
61
|
+
"prefect_test_harness",
|
|
62
|
+
"disable_run_logger",
|
|
63
63
|
# LLM
|
|
64
64
|
"llm",
|
|
65
65
|
"ModelName",
|
|
@@ -26,12 +26,27 @@ TModel = TypeVar("TModel", bound=BaseModel)
|
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
class Document(BaseModel, ABC):
|
|
29
|
-
"""Abstract base class for all documents
|
|
29
|
+
"""Abstract base class for all documents.
|
|
30
|
+
|
|
31
|
+
Warning: Document subclasses should NOT start with 'Test' prefix as this
|
|
32
|
+
causes conflicts with pytest test discovery. Classes with 'Test' prefix
|
|
33
|
+
will be rejected at definition time.
|
|
34
|
+
"""
|
|
30
35
|
|
|
31
36
|
MAX_CONTENT_SIZE: ClassVar[int] = 25 * 1024 * 1024 # 25MB default
|
|
32
37
|
DESCRIPTION_EXTENSION: ClassVar[str] = ".description.md"
|
|
33
38
|
MARKDOWN_LIST_SEPARATOR: ClassVar[str] = "\n\n---\n\n"
|
|
34
39
|
|
|
40
|
+
def __init_subclass__(cls, **kwargs: Any) -> None:
|
|
41
|
+
"""Validate subclass names to prevent pytest conflicts."""
|
|
42
|
+
super().__init_subclass__(**kwargs)
|
|
43
|
+
if cls.__name__.startswith("Test"):
|
|
44
|
+
raise TypeError(
|
|
45
|
+
f"Document subclass '{cls.__name__}' cannot start with 'Test' prefix. "
|
|
46
|
+
"This causes conflicts with pytest test discovery. "
|
|
47
|
+
"Please use a different name (e.g., 'SampleDocument', 'ExampleDocument')."
|
|
48
|
+
)
|
|
49
|
+
|
|
35
50
|
def __init__(self, **data: Any) -> None:
|
|
36
51
|
"""Prevent direct instantiation of abstract Document class."""
|
|
37
52
|
if type(self) is Document:
|
ai_pipeline_core/flow/config.py
CHANGED
|
@@ -14,6 +14,27 @@ class FlowConfig(ABC):
|
|
|
14
14
|
INPUT_DOCUMENT_TYPES: ClassVar[list[type[FlowDocument]]]
|
|
15
15
|
OUTPUT_DOCUMENT_TYPE: ClassVar[type[FlowDocument]]
|
|
16
16
|
|
|
17
|
+
def __init_subclass__(cls, **kwargs):
|
|
18
|
+
"""Validate that OUTPUT_DOCUMENT_TYPE is not in INPUT_DOCUMENT_TYPES."""
|
|
19
|
+
super().__init_subclass__(**kwargs)
|
|
20
|
+
|
|
21
|
+
# Skip validation for the abstract base class itself
|
|
22
|
+
if cls.__name__ == "FlowConfig":
|
|
23
|
+
return
|
|
24
|
+
|
|
25
|
+
# Ensure required attributes are defined
|
|
26
|
+
if not hasattr(cls, "INPUT_DOCUMENT_TYPES"):
|
|
27
|
+
raise TypeError(f"FlowConfig {cls.__name__} must define INPUT_DOCUMENT_TYPES")
|
|
28
|
+
if not hasattr(cls, "OUTPUT_DOCUMENT_TYPE"):
|
|
29
|
+
raise TypeError(f"FlowConfig {cls.__name__} must define OUTPUT_DOCUMENT_TYPE")
|
|
30
|
+
|
|
31
|
+
# Validate that output type is not in input types
|
|
32
|
+
if cls.OUTPUT_DOCUMENT_TYPE in cls.INPUT_DOCUMENT_TYPES:
|
|
33
|
+
raise TypeError(
|
|
34
|
+
f"FlowConfig {cls.__name__}: OUTPUT_DOCUMENT_TYPE "
|
|
35
|
+
f"({cls.OUTPUT_DOCUMENT_TYPE.__name__}) cannot be in INPUT_DOCUMENT_TYPES"
|
|
36
|
+
)
|
|
37
|
+
|
|
17
38
|
@classmethod
|
|
18
39
|
def get_input_document_types(cls) -> list[type[FlowDocument]]:
|
|
19
40
|
"""
|
|
@@ -4,6 +4,7 @@ from pydantic import BaseModel
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class ModelOptions(BaseModel):
|
|
7
|
+
temperature: float | None = None
|
|
7
8
|
system_prompt: str | None = None
|
|
8
9
|
search_context_size: Literal["low", "medium", "high"] | None = None
|
|
9
10
|
reasoning_effort: Literal["low", "medium", "high"] | None = None
|
|
@@ -21,6 +22,9 @@ class ModelOptions(BaseModel):
|
|
|
21
22
|
"extra_body": {},
|
|
22
23
|
}
|
|
23
24
|
|
|
25
|
+
if self.temperature:
|
|
26
|
+
kwargs["temperature"] = self.temperature
|
|
27
|
+
|
|
24
28
|
if self.max_completion_tokens:
|
|
25
29
|
kwargs["max_completion_tokens"] = self.max_completion_tokens
|
|
26
30
|
|
ai_pipeline_core/pipeline.py
CHANGED
|
@@ -1,397 +1,395 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""
|
|
2
|
+
ai_pipeline_core.pipeline
|
|
3
|
+
=========================
|
|
4
|
+
|
|
5
|
+
Tiny wrappers around Prefect's public ``@task`` and ``@flow`` that add our
|
|
6
|
+
``trace`` decorator and **require async functions**.
|
|
7
|
+
|
|
8
|
+
Why this exists
|
|
9
|
+
---------------
|
|
10
|
+
Prefect tasks/flows are awaitable at runtime, but their public type stubs
|
|
11
|
+
don’t declare that clearly. We therefore:
|
|
12
|
+
|
|
13
|
+
1) Return the **real Prefect objects** (so you keep every Prefect method).
|
|
14
|
+
2) Type them as small Protocols that say “this is awaitable and has common
|
|
15
|
+
helpers like `.submit`/`.map`”.
|
|
2
16
|
|
|
3
|
-
|
|
17
|
+
This keeps Pyright happy without altering runtime behavior and avoids
|
|
18
|
+
leaking advanced typing constructs (like ``ParamSpec``) that confuse tools
|
|
19
|
+
that introspect callables (e.g., Pydantic).
|
|
20
|
+
|
|
21
|
+
Quick start
|
|
22
|
+
-----------
|
|
23
|
+
from ai_pipeline_core.pipeline import pipeline_task, pipeline_flow
|
|
24
|
+
from ai_pipeline_core.documents import DocumentList
|
|
25
|
+
from ai_pipeline_core.flow.options import FlowOptions
|
|
26
|
+
|
|
27
|
+
@pipeline_task
|
|
28
|
+
async def add(x: int, y: int) -> int:
|
|
29
|
+
return x + y
|
|
30
|
+
|
|
31
|
+
@pipeline_flow
|
|
32
|
+
async def my_flow(project_name: str, docs: DocumentList, opts: FlowOptions) -> DocumentList:
|
|
33
|
+
await add(1, 2) # awaitable and typed
|
|
34
|
+
return docs
|
|
35
|
+
|
|
36
|
+
Rules
|
|
37
|
+
-----
|
|
38
|
+
• Your decorated function **must** be ``async def``.
|
|
39
|
+
• ``@pipeline_flow`` functions must accept at least:
|
|
40
|
+
(project_name: str, documents: DocumentList, flow_options: FlowOptions | subclass).
|
|
41
|
+
• Both wrappers return the same Prefect objects you’d get from Prefect directly.
|
|
4
42
|
"""
|
|
5
43
|
|
|
44
|
+
from __future__ import annotations
|
|
45
|
+
|
|
6
46
|
import datetime
|
|
7
|
-
import functools
|
|
8
47
|
import inspect
|
|
9
|
-
from typing import
|
|
10
|
-
TYPE_CHECKING,
|
|
11
|
-
Any,
|
|
12
|
-
Callable,
|
|
13
|
-
Coroutine,
|
|
14
|
-
Dict,
|
|
15
|
-
Iterable,
|
|
16
|
-
Optional,
|
|
17
|
-
TypeVar,
|
|
18
|
-
Union,
|
|
19
|
-
cast,
|
|
20
|
-
overload,
|
|
21
|
-
)
|
|
48
|
+
from typing import Any, Callable, Coroutine, Iterable, Protocol, TypeVar, Union, cast, overload
|
|
22
49
|
|
|
23
50
|
from prefect.assets import Asset
|
|
24
51
|
from prefect.cache_policies import CachePolicy
|
|
25
52
|
from prefect.context import TaskRunContext
|
|
26
|
-
from prefect.flows import
|
|
53
|
+
from prefect.flows import FlowStateHook
|
|
54
|
+
from prefect.flows import flow as _prefect_flow # public import
|
|
27
55
|
from prefect.futures import PrefectFuture
|
|
28
56
|
from prefect.results import ResultSerializer, ResultStorage
|
|
29
57
|
from prefect.task_runners import TaskRunner
|
|
30
|
-
from prefect.tasks import
|
|
31
|
-
RetryConditionCallable,
|
|
32
|
-
StateHookCallable,
|
|
33
|
-
Task,
|
|
34
|
-
TaskRunNameValueOrCallable,
|
|
35
|
-
)
|
|
58
|
+
from prefect.tasks import task as _prefect_task # public import
|
|
36
59
|
from prefect.utilities.annotations import NotSet
|
|
37
|
-
from typing_extensions import
|
|
60
|
+
from typing_extensions import TypeAlias
|
|
38
61
|
|
|
39
62
|
from ai_pipeline_core.documents import DocumentList
|
|
40
63
|
from ai_pipeline_core.flow.options import FlowOptions
|
|
41
|
-
from ai_pipeline_core.prefect import flow, task
|
|
42
64
|
from ai_pipeline_core.tracing import TraceLevel, trace
|
|
43
65
|
|
|
44
|
-
|
|
45
|
-
|
|
66
|
+
# --------------------------------------------------------------------------- #
|
|
67
|
+
# Public callback aliases (Prefect stubs omit these exact types)
|
|
68
|
+
# --------------------------------------------------------------------------- #
|
|
69
|
+
RetryConditionCallable: TypeAlias = Callable[[Any, Any, Any], bool]
|
|
70
|
+
StateHookCallable: TypeAlias = Callable[[Any, Any, Any], None]
|
|
71
|
+
TaskRunNameValueOrCallable: TypeAlias = Union[str, Callable[[], str]]
|
|
46
72
|
|
|
47
|
-
|
|
48
|
-
|
|
73
|
+
# --------------------------------------------------------------------------- #
|
|
74
|
+
# Typing helpers
|
|
75
|
+
# --------------------------------------------------------------------------- #
|
|
76
|
+
R_co = TypeVar("R_co", covariant=True)
|
|
77
|
+
FO_contra = TypeVar("FO_contra", bound=FlowOptions, contravariant=True)
|
|
78
|
+
"""Flow options are an *input* type, so contravariant fits the callable model."""
|
|
49
79
|
|
|
50
|
-
# ============================================================================
|
|
51
|
-
# PIPELINE TASK DECORATOR
|
|
52
|
-
# ============================================================================
|
|
53
80
|
|
|
81
|
+
class _TaskLike(Protocol[R_co]):
|
|
82
|
+
"""Minimal 'task-like' view: awaitable call + common helpers."""
|
|
83
|
+
|
|
84
|
+
def __call__(self, *args: Any, **kwargs: Any) -> Coroutine[Any, Any, R_co]: ...
|
|
85
|
+
|
|
86
|
+
submit: Callable[..., Any]
|
|
87
|
+
map: Callable[..., Any]
|
|
88
|
+
name: str | None
|
|
89
|
+
|
|
90
|
+
def __getattr__(self, name: str) -> Any: ... # allow unknown helpers without type errors
|
|
54
91
|
|
|
55
|
-
@overload
|
|
56
|
-
def pipeline_task(__fn: Callable[P, R], /) -> Task[P, R]: ...
|
|
57
92
|
|
|
93
|
+
class _DocumentsFlowCallable(Protocol[FO_contra]):
|
|
94
|
+
"""User async flow signature (first three params fixed)."""
|
|
58
95
|
|
|
96
|
+
def __call__(
|
|
97
|
+
self,
|
|
98
|
+
project_name: str,
|
|
99
|
+
documents: DocumentList,
|
|
100
|
+
flow_options: FO_contra,
|
|
101
|
+
*args: Any,
|
|
102
|
+
**kwargs: Any,
|
|
103
|
+
) -> Coroutine[Any, Any, DocumentList]: ...
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class _FlowLike(Protocol[FO_contra]):
|
|
107
|
+
"""Callable returned by Prefect ``@flow`` wrapper that we expose to users."""
|
|
108
|
+
|
|
109
|
+
def __call__(
|
|
110
|
+
self,
|
|
111
|
+
project_name: str,
|
|
112
|
+
documents: DocumentList,
|
|
113
|
+
flow_options: FO_contra,
|
|
114
|
+
*args: Any,
|
|
115
|
+
**kwargs: Any,
|
|
116
|
+
) -> Coroutine[Any, Any, DocumentList]: ...
|
|
117
|
+
|
|
118
|
+
name: str | None
|
|
119
|
+
|
|
120
|
+
def __getattr__(self, name: str) -> Any: ... # allow unknown helpers without type errors
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
# --------------------------------------------------------------------------- #
|
|
124
|
+
# Small helper: safely get a callable's name without upsetting the type checker
|
|
125
|
+
# --------------------------------------------------------------------------- #
|
|
126
|
+
def _callable_name(obj: Any, fallback: str) -> str:
|
|
127
|
+
try:
|
|
128
|
+
n = getattr(obj, "__name__", None)
|
|
129
|
+
return n if isinstance(n, str) else fallback
|
|
130
|
+
except Exception:
|
|
131
|
+
return fallback
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
# --------------------------------------------------------------------------- #
|
|
135
|
+
# @pipeline_task — async-only, traced, returns Prefect's Task object
|
|
136
|
+
# --------------------------------------------------------------------------- #
|
|
137
|
+
@overload
|
|
138
|
+
def pipeline_task(__fn: Callable[..., Coroutine[Any, Any, R_co]], /) -> _TaskLike[R_co]: ...
|
|
59
139
|
@overload
|
|
60
140
|
def pipeline_task(
|
|
61
141
|
*,
|
|
62
|
-
#
|
|
142
|
+
# tracing
|
|
63
143
|
trace_level: TraceLevel = "always",
|
|
64
144
|
trace_ignore_input: bool = False,
|
|
65
145
|
trace_ignore_output: bool = False,
|
|
66
146
|
trace_ignore_inputs: list[str] | None = None,
|
|
67
|
-
trace_input_formatter:
|
|
68
|
-
trace_output_formatter:
|
|
69
|
-
#
|
|
70
|
-
name:
|
|
71
|
-
description:
|
|
72
|
-
tags:
|
|
73
|
-
version:
|
|
74
|
-
cache_policy:
|
|
75
|
-
cache_key_fn:
|
|
76
|
-
cache_expiration:
|
|
77
|
-
task_run_name:
|
|
78
|
-
retries:
|
|
79
|
-
retry_delay_seconds:
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
result_serializer: Optional[Union[ResultSerializer, str]] = None,
|
|
86
|
-
result_storage_key: Optional[str] = None,
|
|
147
|
+
trace_input_formatter: Callable[..., str] | None = None,
|
|
148
|
+
trace_output_formatter: Callable[..., str] | None = None,
|
|
149
|
+
# prefect passthrough
|
|
150
|
+
name: str | None = None,
|
|
151
|
+
description: str | None = None,
|
|
152
|
+
tags: Iterable[str] | None = None,
|
|
153
|
+
version: str | None = None,
|
|
154
|
+
cache_policy: CachePolicy | type[NotSet] = NotSet,
|
|
155
|
+
cache_key_fn: Callable[[TaskRunContext, dict[str, Any]], str | None] | None = None,
|
|
156
|
+
cache_expiration: datetime.timedelta | None = None,
|
|
157
|
+
task_run_name: TaskRunNameValueOrCallable | None = None,
|
|
158
|
+
retries: int | None = None,
|
|
159
|
+
retry_delay_seconds: int | float | list[float] | Callable[[int], list[float]] | None = None,
|
|
160
|
+
retry_jitter_factor: float | None = None,
|
|
161
|
+
persist_result: bool | None = None,
|
|
162
|
+
result_storage: ResultStorage | str | None = None,
|
|
163
|
+
result_serializer: ResultSerializer | str | None = None,
|
|
164
|
+
result_storage_key: str | None = None,
|
|
87
165
|
cache_result_in_memory: bool = True,
|
|
88
|
-
timeout_seconds:
|
|
89
|
-
log_prints:
|
|
90
|
-
refresh_cache:
|
|
91
|
-
on_completion:
|
|
92
|
-
on_failure:
|
|
93
|
-
retry_condition_fn:
|
|
94
|
-
viz_return_value:
|
|
95
|
-
asset_deps:
|
|
96
|
-
) -> Callable[[Callable[
|
|
166
|
+
timeout_seconds: int | float | None = None,
|
|
167
|
+
log_prints: bool | None = False,
|
|
168
|
+
refresh_cache: bool | None = None,
|
|
169
|
+
on_completion: list[StateHookCallable] | None = None,
|
|
170
|
+
on_failure: list[StateHookCallable] | None = None,
|
|
171
|
+
retry_condition_fn: RetryConditionCallable | None = None,
|
|
172
|
+
viz_return_value: bool | None = None,
|
|
173
|
+
asset_deps: list[str | Asset] | None = None,
|
|
174
|
+
) -> Callable[[Callable[..., Coroutine[Any, Any, R_co]]], _TaskLike[R_co]]: ...
|
|
97
175
|
|
|
98
176
|
|
|
99
177
|
def pipeline_task(
|
|
100
|
-
__fn:
|
|
178
|
+
__fn: Callable[..., Coroutine[Any, Any, R_co]] | None = None,
|
|
101
179
|
/,
|
|
102
180
|
*,
|
|
103
|
-
#
|
|
181
|
+
# tracing
|
|
104
182
|
trace_level: TraceLevel = "always",
|
|
105
183
|
trace_ignore_input: bool = False,
|
|
106
184
|
trace_ignore_output: bool = False,
|
|
107
185
|
trace_ignore_inputs: list[str] | None = None,
|
|
108
|
-
trace_input_formatter:
|
|
109
|
-
trace_output_formatter:
|
|
110
|
-
#
|
|
111
|
-
name:
|
|
112
|
-
description:
|
|
113
|
-
tags:
|
|
114
|
-
version:
|
|
115
|
-
cache_policy:
|
|
116
|
-
cache_key_fn:
|
|
117
|
-
cache_expiration:
|
|
118
|
-
task_run_name:
|
|
119
|
-
retries:
|
|
120
|
-
retry_delay_seconds:
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
result_serializer: Optional[Union[ResultSerializer, str]] = None,
|
|
127
|
-
result_storage_key: Optional[str] = None,
|
|
186
|
+
trace_input_formatter: Callable[..., str] | None = None,
|
|
187
|
+
trace_output_formatter: Callable[..., str] | None = None,
|
|
188
|
+
# prefect passthrough
|
|
189
|
+
name: str | None = None,
|
|
190
|
+
description: str | None = None,
|
|
191
|
+
tags: Iterable[str] | None = None,
|
|
192
|
+
version: str | None = None,
|
|
193
|
+
cache_policy: CachePolicy | type[NotSet] = NotSet,
|
|
194
|
+
cache_key_fn: Callable[[TaskRunContext, dict[str, Any]], str | None] | None = None,
|
|
195
|
+
cache_expiration: datetime.timedelta | None = None,
|
|
196
|
+
task_run_name: TaskRunNameValueOrCallable | None = None,
|
|
197
|
+
retries: int | None = None,
|
|
198
|
+
retry_delay_seconds: int | float | list[float] | Callable[[int], list[float]] | None = None,
|
|
199
|
+
retry_jitter_factor: float | None = None,
|
|
200
|
+
persist_result: bool | None = None,
|
|
201
|
+
result_storage: ResultStorage | str | None = None,
|
|
202
|
+
result_serializer: ResultSerializer | str | None = None,
|
|
203
|
+
result_storage_key: str | None = None,
|
|
128
204
|
cache_result_in_memory: bool = True,
|
|
129
|
-
timeout_seconds:
|
|
130
|
-
log_prints:
|
|
131
|
-
refresh_cache:
|
|
132
|
-
on_completion:
|
|
133
|
-
on_failure:
|
|
134
|
-
retry_condition_fn:
|
|
135
|
-
viz_return_value:
|
|
136
|
-
asset_deps:
|
|
137
|
-
) ->
|
|
138
|
-
"""
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
trace_ignore_input: Whether to ignore input in traces
|
|
147
|
-
trace_ignore_output: Whether to ignore output in traces
|
|
148
|
-
trace_ignore_inputs: List of input parameter names to ignore
|
|
149
|
-
trace_input_formatter: Custom formatter for inputs
|
|
150
|
-
trace_output_formatter: Custom formatter for outputs
|
|
205
|
+
timeout_seconds: int | float | None = None,
|
|
206
|
+
log_prints: bool | None = False,
|
|
207
|
+
refresh_cache: bool | None = None,
|
|
208
|
+
on_completion: list[StateHookCallable] | None = None,
|
|
209
|
+
on_failure: list[StateHookCallable] | None = None,
|
|
210
|
+
retry_condition_fn: RetryConditionCallable | None = None,
|
|
211
|
+
viz_return_value: bool | None = None,
|
|
212
|
+
asset_deps: list[str | Asset] | None = None,
|
|
213
|
+
) -> _TaskLike[R_co] | Callable[[Callable[..., Coroutine[Any, Any, R_co]]], _TaskLike[R_co]]:
|
|
214
|
+
"""Decorate an **async** function as a traced Prefect task."""
|
|
215
|
+
task_decorator: Callable[..., Any] = _prefect_task # helps the type checker
|
|
216
|
+
|
|
217
|
+
def _apply(fn: Callable[..., Coroutine[Any, Any, R_co]]) -> _TaskLike[R_co]:
|
|
218
|
+
if not inspect.iscoroutinefunction(fn):
|
|
219
|
+
raise TypeError(
|
|
220
|
+
f"@pipeline_task target '{_callable_name(fn, 'task')}' must be 'async def'"
|
|
221
|
+
)
|
|
151
222
|
|
|
152
|
-
|
|
153
|
-
|
|
223
|
+
traced_fn = trace(
|
|
224
|
+
level=trace_level,
|
|
225
|
+
name=name or _callable_name(fn, "task"),
|
|
226
|
+
ignore_input=trace_ignore_input,
|
|
227
|
+
ignore_output=trace_ignore_output,
|
|
228
|
+
ignore_inputs=trace_ignore_inputs,
|
|
229
|
+
input_formatter=trace_input_formatter,
|
|
230
|
+
output_formatter=trace_output_formatter,
|
|
231
|
+
)(fn)
|
|
154
232
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
retry_jitter_factor=retry_jitter_factor,
|
|
184
|
-
persist_result=persist_result,
|
|
185
|
-
result_storage=result_storage,
|
|
186
|
-
result_serializer=result_serializer,
|
|
187
|
-
result_storage_key=result_storage_key,
|
|
188
|
-
cache_result_in_memory=cache_result_in_memory,
|
|
189
|
-
timeout_seconds=timeout_seconds,
|
|
190
|
-
log_prints=log_prints,
|
|
191
|
-
refresh_cache=refresh_cache,
|
|
192
|
-
on_completion=on_completion,
|
|
193
|
-
on_failure=on_failure,
|
|
194
|
-
retry_condition_fn=retry_condition_fn,
|
|
195
|
-
viz_return_value=viz_return_value,
|
|
196
|
-
asset_deps=asset_deps,
|
|
233
|
+
return cast(
|
|
234
|
+
_TaskLike[R_co],
|
|
235
|
+
task_decorator(
|
|
236
|
+
name=name,
|
|
237
|
+
description=description,
|
|
238
|
+
tags=tags,
|
|
239
|
+
version=version,
|
|
240
|
+
cache_policy=cache_policy,
|
|
241
|
+
cache_key_fn=cache_key_fn,
|
|
242
|
+
cache_expiration=cache_expiration,
|
|
243
|
+
task_run_name=task_run_name,
|
|
244
|
+
retries=0 if retries is None else retries,
|
|
245
|
+
retry_delay_seconds=retry_delay_seconds,
|
|
246
|
+
retry_jitter_factor=retry_jitter_factor,
|
|
247
|
+
persist_result=persist_result,
|
|
248
|
+
result_storage=result_storage,
|
|
249
|
+
result_serializer=result_serializer,
|
|
250
|
+
result_storage_key=result_storage_key,
|
|
251
|
+
cache_result_in_memory=cache_result_in_memory,
|
|
252
|
+
timeout_seconds=timeout_seconds,
|
|
253
|
+
log_prints=log_prints,
|
|
254
|
+
refresh_cache=refresh_cache,
|
|
255
|
+
on_completion=on_completion,
|
|
256
|
+
on_failure=on_failure,
|
|
257
|
+
retry_condition_fn=retry_condition_fn,
|
|
258
|
+
viz_return_value=viz_return_value,
|
|
259
|
+
asset_deps=asset_deps,
|
|
260
|
+
)(traced_fn),
|
|
197
261
|
)
|
|
198
262
|
|
|
199
|
-
if __fn
|
|
200
|
-
return decorator(__fn)
|
|
201
|
-
return decorator
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
# ============================================================================
|
|
205
|
-
# PIPELINE FLOW DECORATOR WITH DOCUMENT PROCESSING
|
|
206
|
-
# ============================================================================
|
|
207
|
-
|
|
208
|
-
# Type aliases for document flow signatures
|
|
209
|
-
DocumentsFlowSig = Callable[
|
|
210
|
-
Concatenate[str, DocumentList, FlowOptions, P],
|
|
211
|
-
Union[DocumentList, Coroutine[Any, Any, DocumentList]],
|
|
212
|
-
]
|
|
213
|
-
|
|
214
|
-
DocumentsFlowResult = Flow[Concatenate[str, DocumentList, FlowOptions, P], DocumentList]
|
|
263
|
+
return _apply(__fn) if __fn else _apply
|
|
215
264
|
|
|
216
265
|
|
|
266
|
+
# --------------------------------------------------------------------------- #
|
|
267
|
+
# @pipeline_flow — async-only, traced, returns Prefect’s flow wrapper
|
|
268
|
+
# --------------------------------------------------------------------------- #
|
|
217
269
|
@overload
|
|
218
|
-
def pipeline_flow(
|
|
219
|
-
__fn: DocumentsFlowSig[P],
|
|
220
|
-
/,
|
|
221
|
-
) -> DocumentsFlowResult[P]: ...
|
|
222
|
-
|
|
223
|
-
|
|
270
|
+
def pipeline_flow(__fn: _DocumentsFlowCallable[FO_contra], /) -> _FlowLike[FO_contra]: ...
|
|
224
271
|
@overload
|
|
225
272
|
def pipeline_flow(
|
|
226
273
|
*,
|
|
227
|
-
#
|
|
274
|
+
# tracing
|
|
228
275
|
trace_level: TraceLevel = "always",
|
|
229
276
|
trace_ignore_input: bool = False,
|
|
230
277
|
trace_ignore_output: bool = False,
|
|
231
278
|
trace_ignore_inputs: list[str] | None = None,
|
|
232
|
-
trace_input_formatter:
|
|
233
|
-
trace_output_formatter:
|
|
234
|
-
#
|
|
235
|
-
name:
|
|
236
|
-
version:
|
|
237
|
-
flow_run_name:
|
|
238
|
-
retries:
|
|
239
|
-
retry_delay_seconds:
|
|
240
|
-
task_runner:
|
|
241
|
-
description:
|
|
242
|
-
timeout_seconds:
|
|
279
|
+
trace_input_formatter: Callable[..., str] | None = None,
|
|
280
|
+
trace_output_formatter: Callable[..., str] | None = None,
|
|
281
|
+
# prefect passthrough
|
|
282
|
+
name: str | None = None,
|
|
283
|
+
version: str | None = None,
|
|
284
|
+
flow_run_name: Union[Callable[[], str], str] | None = None,
|
|
285
|
+
retries: int | None = None,
|
|
286
|
+
retry_delay_seconds: int | float | None = None,
|
|
287
|
+
task_runner: TaskRunner[PrefectFuture[Any]] | None = None,
|
|
288
|
+
description: str | None = None,
|
|
289
|
+
timeout_seconds: int | float | None = None,
|
|
243
290
|
validate_parameters: bool = True,
|
|
244
|
-
persist_result:
|
|
245
|
-
result_storage:
|
|
246
|
-
result_serializer:
|
|
291
|
+
persist_result: bool | None = None,
|
|
292
|
+
result_storage: ResultStorage | str | None = None,
|
|
293
|
+
result_serializer: ResultSerializer | str | None = None,
|
|
247
294
|
cache_result_in_memory: bool = True,
|
|
248
|
-
log_prints:
|
|
249
|
-
on_completion:
|
|
250
|
-
on_failure:
|
|
251
|
-
on_cancellation:
|
|
252
|
-
on_crashed:
|
|
253
|
-
on_running:
|
|
254
|
-
) -> Callable[[
|
|
295
|
+
log_prints: bool | None = None,
|
|
296
|
+
on_completion: list[FlowStateHook[Any, Any]] | None = None,
|
|
297
|
+
on_failure: list[FlowStateHook[Any, Any]] | None = None,
|
|
298
|
+
on_cancellation: list[FlowStateHook[Any, Any]] | None = None,
|
|
299
|
+
on_crashed: list[FlowStateHook[Any, Any]] | None = None,
|
|
300
|
+
on_running: list[FlowStateHook[Any, Any]] | None = None,
|
|
301
|
+
) -> Callable[[_DocumentsFlowCallable[FO_contra]], _FlowLike[FO_contra]]: ...
|
|
255
302
|
|
|
256
303
|
|
|
257
304
|
def pipeline_flow(
|
|
258
|
-
__fn:
|
|
305
|
+
__fn: _DocumentsFlowCallable[FO_contra] | None = None,
|
|
259
306
|
/,
|
|
260
307
|
*,
|
|
261
|
-
#
|
|
308
|
+
# tracing
|
|
262
309
|
trace_level: TraceLevel = "always",
|
|
263
310
|
trace_ignore_input: bool = False,
|
|
264
311
|
trace_ignore_output: bool = False,
|
|
265
312
|
trace_ignore_inputs: list[str] | None = None,
|
|
266
|
-
trace_input_formatter:
|
|
267
|
-
trace_output_formatter:
|
|
268
|
-
#
|
|
269
|
-
name:
|
|
270
|
-
version:
|
|
271
|
-
flow_run_name:
|
|
272
|
-
retries:
|
|
273
|
-
retry_delay_seconds:
|
|
274
|
-
task_runner:
|
|
275
|
-
description:
|
|
276
|
-
timeout_seconds:
|
|
313
|
+
trace_input_formatter: Callable[..., str] | None = None,
|
|
314
|
+
trace_output_formatter: Callable[..., str] | None = None,
|
|
315
|
+
# prefect passthrough
|
|
316
|
+
name: str | None = None,
|
|
317
|
+
version: str | None = None,
|
|
318
|
+
flow_run_name: Union[Callable[[], str], str] | None = None,
|
|
319
|
+
retries: int | None = None,
|
|
320
|
+
retry_delay_seconds: int | float | None = None,
|
|
321
|
+
task_runner: TaskRunner[PrefectFuture[Any]] | None = None,
|
|
322
|
+
description: str | None = None,
|
|
323
|
+
timeout_seconds: int | float | None = None,
|
|
277
324
|
validate_parameters: bool = True,
|
|
278
|
-
persist_result:
|
|
279
|
-
result_storage:
|
|
280
|
-
result_serializer:
|
|
325
|
+
persist_result: bool | None = None,
|
|
326
|
+
result_storage: ResultStorage | str | None = None,
|
|
327
|
+
result_serializer: ResultSerializer | str | None = None,
|
|
281
328
|
cache_result_in_memory: bool = True,
|
|
282
|
-
log_prints:
|
|
283
|
-
on_completion:
|
|
284
|
-
on_failure:
|
|
285
|
-
on_cancellation:
|
|
286
|
-
on_crashed:
|
|
287
|
-
on_running:
|
|
288
|
-
) ->
|
|
289
|
-
"""
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
Args:
|
|
302
|
-
trace_level: Control tracing ("always", "debug", "off")
|
|
303
|
-
trace_ignore_input: Whether to ignore input in traces
|
|
304
|
-
trace_ignore_output: Whether to ignore output in traces
|
|
305
|
-
trace_ignore_inputs: List of input parameter names to ignore
|
|
306
|
-
trace_input_formatter: Custom formatter for inputs
|
|
307
|
-
trace_output_formatter: Custom formatter for outputs
|
|
308
|
-
|
|
309
|
-
Plus all standard Prefect flow parameters...
|
|
329
|
+
log_prints: bool | None = None,
|
|
330
|
+
on_completion: list[FlowStateHook[Any, Any]] | None = None,
|
|
331
|
+
on_failure: list[FlowStateHook[Any, Any]] | None = None,
|
|
332
|
+
on_cancellation: list[FlowStateHook[Any, Any]] | None = None,
|
|
333
|
+
on_crashed: list[FlowStateHook[Any, Any]] | None = None,
|
|
334
|
+
on_running: list[FlowStateHook[Any, Any]] | None = None,
|
|
335
|
+
) -> _FlowLike[FO_contra] | Callable[[_DocumentsFlowCallable[FO_contra]], _FlowLike[FO_contra]]:
|
|
336
|
+
"""Decorate an **async** flow.
|
|
337
|
+
|
|
338
|
+
Required signature:
|
|
339
|
+
async def flow_fn(
|
|
340
|
+
project_name: str,
|
|
341
|
+
documents: DocumentList,
|
|
342
|
+
flow_options: FlowOptions, # or any subclass
|
|
343
|
+
*args,
|
|
344
|
+
**kwargs
|
|
345
|
+
) -> DocumentList
|
|
346
|
+
|
|
347
|
+
Returns the same callable object Prefect’s ``@flow`` would return.
|
|
310
348
|
"""
|
|
349
|
+
flow_decorator: Callable[..., Any] = _prefect_flow
|
|
311
350
|
|
|
312
|
-
def
|
|
313
|
-
|
|
314
|
-
params = list(sig.parameters.values())
|
|
351
|
+
def _apply(fn: _DocumentsFlowCallable[FO_contra]) -> _FlowLike[FO_contra]:
|
|
352
|
+
fname = _callable_name(fn, "flow")
|
|
315
353
|
|
|
316
|
-
if
|
|
354
|
+
if not inspect.iscoroutinefunction(fn):
|
|
355
|
+
raise TypeError(f"@pipeline_flow '{fname}' must be declared with 'async def'")
|
|
356
|
+
if len(inspect.signature(fn).parameters) < 3:
|
|
317
357
|
raise TypeError(
|
|
318
|
-
f"@pipeline_flow '{
|
|
319
|
-
"
|
|
358
|
+
f"@pipeline_flow '{fname}' must accept "
|
|
359
|
+
"'project_name, documents, flow_options' as its first three parameters"
|
|
320
360
|
)
|
|
321
361
|
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
362
|
+
async def _wrapper(
|
|
363
|
+
project_name: str,
|
|
364
|
+
documents: DocumentList,
|
|
365
|
+
flow_options: FO_contra,
|
|
366
|
+
*args: Any,
|
|
367
|
+
**kwargs: Any,
|
|
368
|
+
) -> DocumentList:
|
|
369
|
+
result = await fn(project_name, documents, flow_options, *args, **kwargs)
|
|
370
|
+
if not isinstance(result, DocumentList): # pyright: ignore[reportUnnecessaryIsInstance]
|
|
371
|
+
raise TypeError(
|
|
372
|
+
f"Flow '{fname}' must return DocumentList, got {type(result).__name__}"
|
|
330
373
|
)
|
|
374
|
+
return result
|
|
375
|
+
|
|
376
|
+
traced = trace(
|
|
377
|
+
level=trace_level,
|
|
378
|
+
name=name or fname,
|
|
379
|
+
ignore_input=trace_ignore_input,
|
|
380
|
+
ignore_output=trace_ignore_output,
|
|
381
|
+
ignore_inputs=trace_ignore_inputs,
|
|
382
|
+
input_formatter=trace_input_formatter,
|
|
383
|
+
output_formatter=trace_output_formatter,
|
|
384
|
+
)(_wrapper)
|
|
331
385
|
|
|
332
|
-
# Create wrapper that ensures return type
|
|
333
|
-
if inspect.iscoroutinefunction(func):
|
|
334
|
-
|
|
335
|
-
@functools.wraps(func)
|
|
336
|
-
async def wrapper( # pyright: ignore[reportRedeclaration]
|
|
337
|
-
project_name: str,
|
|
338
|
-
documents: DocumentList,
|
|
339
|
-
flow_options: FlowOptions,
|
|
340
|
-
*args, # pyright: ignore[reportMissingParameterType]
|
|
341
|
-
**kwargs, # pyright: ignore[reportMissingParameterType]
|
|
342
|
-
) -> DocumentList:
|
|
343
|
-
result = await func(project_name, documents, flow_options, *args, **kwargs)
|
|
344
|
-
# Runtime type checking
|
|
345
|
-
DL = DocumentList # Avoid recomputation
|
|
346
|
-
if not isinstance(result, DL):
|
|
347
|
-
raise TypeError(
|
|
348
|
-
f"Flow '{func.__name__}' must return a DocumentList, "
|
|
349
|
-
f"but returned {type(result).__name__}"
|
|
350
|
-
)
|
|
351
|
-
return result
|
|
352
|
-
else:
|
|
353
|
-
|
|
354
|
-
@functools.wraps(func)
|
|
355
|
-
def wrapper( # pyright: ignore[reportRedeclaration]
|
|
356
|
-
project_name: str,
|
|
357
|
-
documents: DocumentList,
|
|
358
|
-
flow_options: FlowOptions,
|
|
359
|
-
*args, # pyright: ignore[reportMissingParameterType]
|
|
360
|
-
**kwargs, # pyright: ignore[reportMissingParameterType]
|
|
361
|
-
) -> DocumentList:
|
|
362
|
-
result = func(project_name, documents, flow_options, *args, **kwargs)
|
|
363
|
-
# Runtime type checking
|
|
364
|
-
DL = DocumentList # Avoid recomputation
|
|
365
|
-
if not isinstance(result, DL):
|
|
366
|
-
raise TypeError(
|
|
367
|
-
f"Flow '{func.__name__}' must return a DocumentList, "
|
|
368
|
-
f"but returned {type(result).__name__}"
|
|
369
|
-
)
|
|
370
|
-
return result
|
|
371
|
-
|
|
372
|
-
# Apply tracing first if enabled
|
|
373
|
-
if trace_level != "off":
|
|
374
|
-
traced_wrapper = trace(
|
|
375
|
-
level=trace_level,
|
|
376
|
-
name=name or func.__name__,
|
|
377
|
-
ignore_input=trace_ignore_input,
|
|
378
|
-
ignore_output=trace_ignore_output,
|
|
379
|
-
ignore_inputs=trace_ignore_inputs,
|
|
380
|
-
input_formatter=trace_input_formatter,
|
|
381
|
-
output_formatter=trace_output_formatter,
|
|
382
|
-
)(wrapper)
|
|
383
|
-
else:
|
|
384
|
-
traced_wrapper = wrapper
|
|
385
|
-
|
|
386
|
-
# Then apply Prefect flow decorator
|
|
387
386
|
return cast(
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
traced_wrapper, # pyright: ignore[reportArgumentType]
|
|
387
|
+
_FlowLike[FO_contra],
|
|
388
|
+
flow_decorator(
|
|
391
389
|
name=name,
|
|
392
390
|
version=version,
|
|
393
391
|
flow_run_name=flow_run_name,
|
|
394
|
-
retries=retries,
|
|
392
|
+
retries=0 if retries is None else retries,
|
|
395
393
|
retry_delay_seconds=retry_delay_seconds,
|
|
396
394
|
task_runner=task_runner,
|
|
397
395
|
description=description,
|
|
@@ -407,12 +405,10 @@ def pipeline_flow(
|
|
|
407
405
|
on_cancellation=on_cancellation,
|
|
408
406
|
on_crashed=on_crashed,
|
|
409
407
|
on_running=on_running,
|
|
410
|
-
),
|
|
408
|
+
)(traced),
|
|
411
409
|
)
|
|
412
410
|
|
|
413
|
-
if __fn
|
|
414
|
-
return decorator(__fn)
|
|
415
|
-
return decorator
|
|
411
|
+
return _apply(__fn) if __fn else _apply
|
|
416
412
|
|
|
417
413
|
|
|
418
414
|
__all__ = ["pipeline_task", "pipeline_flow"]
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
from contextlib import ExitStack
|
|
4
7
|
from pathlib import Path
|
|
5
8
|
from typing import Callable, Type, TypeVar, cast
|
|
6
9
|
|
|
@@ -10,6 +13,8 @@ from pydantic_settings import CliPositionalArg, SettingsConfigDict
|
|
|
10
13
|
from ai_pipeline_core.documents import DocumentList
|
|
11
14
|
from ai_pipeline_core.flow.options import FlowOptions
|
|
12
15
|
from ai_pipeline_core.logging import get_pipeline_logger, setup_logging
|
|
16
|
+
from ai_pipeline_core.prefect import disable_run_logger, prefect_test_harness
|
|
17
|
+
from ai_pipeline_core.settings import settings
|
|
13
18
|
|
|
14
19
|
from .simple_runner import ConfigSequence, FlowSequence, run_pipelines, save_documents_to_directory
|
|
15
20
|
|
|
@@ -28,12 +33,18 @@ def _initialize_environment() -> None:
|
|
|
28
33
|
logger.warning(f"Failed to initialize LMNR tracing: {e}")
|
|
29
34
|
|
|
30
35
|
|
|
36
|
+
def _running_under_pytest() -> bool: # NEW
|
|
37
|
+
"""Return True when invoked by pytest (so fixtures will supply test contexts)."""
|
|
38
|
+
return "PYTEST_CURRENT_TEST" in os.environ or "pytest" in sys.modules
|
|
39
|
+
|
|
40
|
+
|
|
31
41
|
def run_cli(
|
|
32
42
|
*,
|
|
33
43
|
flows: FlowSequence,
|
|
34
44
|
flow_configs: ConfigSequence,
|
|
35
45
|
options_cls: Type[TOptions],
|
|
36
46
|
initializer: InitializerFunc = None,
|
|
47
|
+
trace_name: str | None = None,
|
|
37
48
|
) -> None:
|
|
38
49
|
"""
|
|
39
50
|
Parse CLI+env into options, then run the pipeline.
|
|
@@ -43,13 +54,20 @@ def run_cli(
|
|
|
43
54
|
- --start/--end: optional, 1-based step bounds
|
|
44
55
|
- all other flags come from options_cls (fields & Field descriptions)
|
|
45
56
|
"""
|
|
57
|
+
# Check if no arguments provided before initialization
|
|
58
|
+
if len(sys.argv) == 1:
|
|
59
|
+
# Add --help to show usage
|
|
60
|
+
sys.argv.append("--help")
|
|
61
|
+
|
|
46
62
|
_initialize_environment()
|
|
47
63
|
|
|
48
64
|
class _RunnerOptions( # type: ignore[reportRedeclaration]
|
|
49
65
|
options_cls,
|
|
50
66
|
cli_parse_args=True,
|
|
51
67
|
cli_kebab_case=True,
|
|
52
|
-
cli_exit_on_error=
|
|
68
|
+
cli_exit_on_error=True, # Let it exit normally on error
|
|
69
|
+
cli_prog_name="ai-pipeline",
|
|
70
|
+
cli_use_class_docs_for_groups=True,
|
|
53
71
|
):
|
|
54
72
|
working_directory: CliPositionalArg[Path]
|
|
55
73
|
project_name: str | None = None
|
|
@@ -82,14 +100,28 @@ def run_cli(
|
|
|
82
100
|
if getattr(opts, "start", 1) == 1 and initial_documents:
|
|
83
101
|
save_documents_to_directory(wd, initial_documents)
|
|
84
102
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
103
|
+
# Setup context stack with optional test harness and tracing
|
|
104
|
+
|
|
105
|
+
with ExitStack() as stack:
|
|
106
|
+
if not settings.prefect_api_key and not _running_under_pytest():
|
|
107
|
+
stack.enter_context(prefect_test_harness())
|
|
108
|
+
stack.enter_context(disable_run_logger())
|
|
109
|
+
|
|
110
|
+
if trace_name:
|
|
111
|
+
stack.enter_context(
|
|
112
|
+
Laminar.start_span(
|
|
113
|
+
name=f"{trace_name}-{project_name}", input=[opts.model_dump_json()]
|
|
114
|
+
)
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
asyncio.run(
|
|
118
|
+
run_pipelines(
|
|
119
|
+
project_name=project_name,
|
|
120
|
+
output_dir=wd,
|
|
121
|
+
flows=flows,
|
|
122
|
+
flow_configs=flow_configs,
|
|
123
|
+
flow_options=opts,
|
|
124
|
+
start_step=getattr(opts, "start", 1),
|
|
125
|
+
end_step=getattr(opts, "end", None),
|
|
126
|
+
)
|
|
94
127
|
)
|
|
95
|
-
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ai-pipeline-core
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.8
|
|
4
4
|
Summary: Core utilities for AI-powered processing pipelines using prefect
|
|
5
5
|
Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
|
|
6
6
|
Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
|
|
@@ -151,7 +151,7 @@ async def process_document(doc: Document):
|
|
|
151
151
|
return response.parsed
|
|
152
152
|
```
|
|
153
153
|
|
|
154
|
-
### Enhanced Pipeline Decorators
|
|
154
|
+
### Enhanced Pipeline Decorators
|
|
155
155
|
```python
|
|
156
156
|
from ai_pipeline_core import pipeline_flow, pipeline_task
|
|
157
157
|
from ai_pipeline_core.flow import FlowOptions
|
|
@@ -182,7 +182,7 @@ async def my_pipeline(
|
|
|
182
182
|
return DocumentList(results)
|
|
183
183
|
```
|
|
184
184
|
|
|
185
|
-
### Simple Runner Utility
|
|
185
|
+
### Simple Runner Utility
|
|
186
186
|
```python
|
|
187
187
|
from ai_pipeline_core.simple_runner import run_cli, run_pipeline
|
|
188
188
|
from ai_pipeline_core.flow import FlowOptions
|
|
@@ -206,7 +206,7 @@ async def main():
|
|
|
206
206
|
)
|
|
207
207
|
```
|
|
208
208
|
|
|
209
|
-
### Clean Prefect Decorators
|
|
209
|
+
### Clean Prefect Decorators
|
|
210
210
|
```python
|
|
211
211
|
# Import clean Prefect decorators without tracing
|
|
212
212
|
from ai_pipeline_core.prefect import flow, task
|
|
@@ -214,12 +214,12 @@ from ai_pipeline_core.prefect import flow, task
|
|
|
214
214
|
# Or use pipeline decorators with tracing
|
|
215
215
|
from ai_pipeline_core import pipeline_flow, pipeline_task
|
|
216
216
|
|
|
217
|
-
@task # Clean Prefect task
|
|
217
|
+
@task # Clean Prefect task (supports both sync and async)
|
|
218
218
|
def compute(x: int) -> int:
|
|
219
219
|
return x * 2
|
|
220
220
|
|
|
221
|
-
@pipeline_task(trace_level="always") # With tracing
|
|
222
|
-
def compute_traced(x: int) -> int:
|
|
221
|
+
@pipeline_task(trace_level="always") # With tracing (async only)
|
|
222
|
+
async def compute_traced(x: int) -> int:
|
|
223
223
|
return x * 2
|
|
224
224
|
```
|
|
225
225
|
|
|
@@ -246,12 +246,12 @@ docs = DocumentList([doc1, doc2])
|
|
|
246
246
|
Managed AI interactions with built-in retry logic, cost tracking, and structured outputs.
|
|
247
247
|
|
|
248
248
|
**Supported Models** (via LiteLLM proxy):
|
|
249
|
-
- OpenAI:
|
|
250
|
-
- Anthropic:
|
|
251
|
-
- Google:
|
|
252
|
-
- xAI:
|
|
253
|
-
- Perplexity:
|
|
254
|
-
- And many more through LiteLLM compatibility
|
|
249
|
+
- OpenAI: gpt-5
|
|
250
|
+
- Anthropic: claude-4
|
|
251
|
+
- Google: gemini-2.5
|
|
252
|
+
- xAI: grok-3, grok-4
|
|
253
|
+
- Perplexity: sonar-pro-search
|
|
254
|
+
- And many more through LiteLLM compatibility. Every model from openrouter should work.
|
|
255
255
|
|
|
256
256
|
```python
|
|
257
257
|
from ai_pipeline_core.llm import generate_structured, AIMessages, ModelOptions
|
|
@@ -328,13 +328,13 @@ ai_pipeline_core/
|
|
|
328
328
|
│ └── model_options.py # Configuration models
|
|
329
329
|
├── flow/ # Prefect flow utilities
|
|
330
330
|
│ ├── config.py # Type-safe flow configuration
|
|
331
|
-
│ └── options.py # FlowOptions base class
|
|
332
|
-
├── simple_runner/ # Pipeline execution utilities
|
|
331
|
+
│ └── options.py # FlowOptions base class
|
|
332
|
+
├── simple_runner/ # Pipeline execution utilities
|
|
333
333
|
│ ├── cli.py # CLI interface
|
|
334
334
|
│ └── simple_runner.py # Core runner logic
|
|
335
335
|
├── logging/ # Structured logging
|
|
336
|
-
├── pipeline.py # Enhanced decorators
|
|
337
|
-
├── prefect.py # Clean Prefect exports
|
|
336
|
+
├── pipeline.py # Enhanced decorators
|
|
337
|
+
├── prefect.py # Clean Prefect exports
|
|
338
338
|
├── tracing.py # Observability decorators
|
|
339
339
|
└── settings.py # Centralized configuration
|
|
340
340
|
```
|
|
@@ -345,6 +345,7 @@ ai_pipeline_core/
|
|
|
345
345
|
```bash
|
|
346
346
|
make test # Run all tests
|
|
347
347
|
make test-cov # Run with coverage report
|
|
348
|
+
make test-showcase # Test the showcase.py CLI example
|
|
348
349
|
pytest tests/test_documents.py::TestDocument::test_creation # Single test
|
|
349
350
|
```
|
|
350
351
|
|
|
@@ -481,6 +482,22 @@ For learning purposes, see [CLAUDE.md](CLAUDE.md) for our comprehensive coding s
|
|
|
481
482
|
|
|
482
483
|
- [CLAUDE.md](CLAUDE.md) - Detailed coding standards and architecture guide
|
|
483
484
|
|
|
485
|
+
## Examples
|
|
486
|
+
|
|
487
|
+
### In This Repository
|
|
488
|
+
- [showcase.py](examples/showcase.py) - Complete example demonstrating all core features including the CLI runner
|
|
489
|
+
```bash
|
|
490
|
+
# Run the showcase example with CLI
|
|
491
|
+
python examples/showcase.py ./output --temperature 0.7 --batch-size 5
|
|
492
|
+
|
|
493
|
+
# Show help
|
|
494
|
+
python examples/showcase.py --help
|
|
495
|
+
```
|
|
496
|
+
- [showcase.jinja2](examples/showcase.jinja2) - Example Jinja2 prompt template
|
|
497
|
+
|
|
498
|
+
### Real-World Application
|
|
499
|
+
- [AI Documentation Writer](https://github.com/bbarwik/ai-documentation-writer) - Production-ready example showing how to build sophisticated AI pipelines for automated documentation generation. See [examples/ai-documentation-writer.md](examples/ai-documentation-writer.md) for a detailed overview.
|
|
500
|
+
|
|
484
501
|
### dependencies_docs/ Directory
|
|
485
502
|
> [!NOTE]
|
|
486
503
|
> The `dependencies_docs/` directory contains guides for AI assistants (like Claude Code) on how to interact with the project's external dependencies and tooling, NOT user documentation for ai-pipeline-core itself. These files are excluded from repository listings to avoid confusion.
|
|
@@ -511,29 +528,29 @@ Built with:
|
|
|
511
528
|
- [LiteLLM](https://litellm.ai/) - LLM proxy
|
|
512
529
|
- [Pydantic](https://pydantic-docs.helpmanual.io/) - Data validation
|
|
513
530
|
|
|
514
|
-
## What's New in v0.1.
|
|
531
|
+
## What's New in v0.1.8
|
|
515
532
|
|
|
516
|
-
###
|
|
517
|
-
- **
|
|
518
|
-
- **
|
|
519
|
-
- **
|
|
520
|
-
- **
|
|
521
|
-
- **Expanded Exports**: All major components now accessible from top-level package import
|
|
533
|
+
### Breaking Changes
|
|
534
|
+
- **Async-Only Pipeline Decorators**: `@pipeline_flow` and `@pipeline_task` now require `async def` functions (raises TypeError for sync)
|
|
535
|
+
- **Document Class Name Validation**: Document subclasses cannot start with "Test" prefix (pytest conflict prevention)
|
|
536
|
+
- **FlowConfig Validation**: OUTPUT_DOCUMENT_TYPE cannot be in INPUT_DOCUMENT_TYPES (prevents circular dependencies)
|
|
537
|
+
- **Temperature Field**: Added optional `temperature` field to `ModelOptions` for explicit control
|
|
522
538
|
|
|
523
|
-
###
|
|
524
|
-
-
|
|
525
|
-
-
|
|
526
|
-
-
|
|
527
|
-
-
|
|
539
|
+
### Major Improvements
|
|
540
|
+
- **Pipeline Module Refactoring**: Reduced from ~400 to ~150 lines with cleaner Protocol-based typing
|
|
541
|
+
- **Enhanced Validation**: FlowConfig and Document classes now validate at definition time
|
|
542
|
+
- **Better CLI Support**: Auto-displays help when no arguments provided, improved context management
|
|
543
|
+
- **Test Suite Updates**: All tests updated to use async/await consistently
|
|
528
544
|
|
|
529
|
-
###
|
|
530
|
-
-
|
|
531
|
-
-
|
|
532
|
-
-
|
|
545
|
+
### Documentation Updates
|
|
546
|
+
- Added Document naming rules to CLAUDE.md
|
|
547
|
+
- Added FlowConfig validation rules
|
|
548
|
+
- Added code elegance principles section
|
|
549
|
+
- Updated guide_for_ai.md to API reference format
|
|
533
550
|
|
|
534
551
|
## Stability Notice
|
|
535
552
|
|
|
536
|
-
**Current Version**: 0.1.
|
|
553
|
+
**Current Version**: 0.1.8
|
|
537
554
|
**Status**: Internal Preview
|
|
538
555
|
**API Stability**: Unstable - Breaking changes expected
|
|
539
556
|
**Recommended Use**: Learning and reference only
|
|
@@ -1,25 +1,25 @@
|
|
|
1
|
-
ai_pipeline_core/__init__.py,sha256=
|
|
1
|
+
ai_pipeline_core/__init__.py,sha256=dWkrDbW3oqrplHH7oBQ59dOc0wtJr0AcKVtQo63C_wM,1662
|
|
2
2
|
ai_pipeline_core/exceptions.py,sha256=_vW0Hbw2LGb5tcVvH0YzTKMff7QOPfCRr3w-w_zPyCE,968
|
|
3
|
-
ai_pipeline_core/pipeline.py,sha256=
|
|
3
|
+
ai_pipeline_core/pipeline.py,sha256=f-pEDwrEhMLfcSEvPP2b74xb0WzFI05IQcl-NDFzH7w,16565
|
|
4
4
|
ai_pipeline_core/prefect.py,sha256=VHYkkRcUmSpdwyWosOOxuExVCncIQgT6MypqGdjcYnM,241
|
|
5
5
|
ai_pipeline_core/prompt_manager.py,sha256=XmNUdMIC0WrE9fF0LIcfozAKOGrlYwj8AfXvCndIH-o,4693
|
|
6
6
|
ai_pipeline_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
7
|
ai_pipeline_core/settings.py,sha256=Zl2BPa6IHzh-B5V7cg5mtySr1dhWZQYYKxXz3BwrHlQ,615
|
|
8
8
|
ai_pipeline_core/tracing.py,sha256=T-3fTyA37TejXxotkVzTNqL2a5nOfZ0bcHg9TClLvmg,9471
|
|
9
9
|
ai_pipeline_core/documents/__init__.py,sha256=TLW8eOEmthfDHOTssXjyBlqhgrZe9ZIyxlkd0LBJ3_s,340
|
|
10
|
-
ai_pipeline_core/documents/document.py,sha256=
|
|
10
|
+
ai_pipeline_core/documents/document.py,sha256=AIdkg2RIyYB5Tje1GmnQqtk8qesOIQwtwzEztypYIhg,13121
|
|
11
11
|
ai_pipeline_core/documents/document_list.py,sha256=HOG_uZDazA9CJB7Lr_tNcDFzb5Ff9RUt0ELWQK_eYNM,4940
|
|
12
12
|
ai_pipeline_core/documents/flow_document.py,sha256=qsV-2JYOMhkvAj7lW54ZNH_4QUclld9h06CoU59tWww,815
|
|
13
13
|
ai_pipeline_core/documents/mime_type.py,sha256=sBhNRoBJQ35JoHWhJzBGpp00WFDfMdEX0JZKKkR7QH0,3371
|
|
14
14
|
ai_pipeline_core/documents/task_document.py,sha256=WjHqtl1d60XFBBqewNRdz1OqBErGI0jRx15oQYCTHo8,907
|
|
15
15
|
ai_pipeline_core/documents/utils.py,sha256=BdE4taSl1vrBhxnFbOP5nDA7lXIcvY__AMRTHoaNb5M,2764
|
|
16
16
|
ai_pipeline_core/flow/__init__.py,sha256=54DRfZnjXQVrimgtKEVEm5u5ErImx31cjK2PpBvHjU4,116
|
|
17
|
-
ai_pipeline_core/flow/config.py,sha256=
|
|
17
|
+
ai_pipeline_core/flow/config.py,sha256=gRCtiahTA7h6_xVPY3su85pZbu5gu41yXUgGLILey2E,3220
|
|
18
18
|
ai_pipeline_core/flow/options.py,sha256=WygJEwjqOa14l23a_Hp36hJX-WgxHMq-YzSieC31Z4Y,701
|
|
19
19
|
ai_pipeline_core/llm/__init__.py,sha256=3XVK-bSJdOe0s6KmmO7PDbsXHfjlcZEG1MVBmaz3EeU,442
|
|
20
20
|
ai_pipeline_core/llm/ai_messages.py,sha256=DwJJe05BtYdnMZeHbBbyEbDCqrW63SRvprxptoJUCn4,4586
|
|
21
21
|
ai_pipeline_core/llm/client.py,sha256=VMs1nQKCfoxbcvE2mypn5QF19u90Ua87-5IiZxWOj98,7784
|
|
22
|
-
ai_pipeline_core/llm/model_options.py,sha256=
|
|
22
|
+
ai_pipeline_core/llm/model_options.py,sha256=7O5y-qtYtmTXzIUS7vxKOQlRAM3TTggqHw2_dOnS_a8,1441
|
|
23
23
|
ai_pipeline_core/llm/model_response.py,sha256=fIWueaemgo0cMruvToMZyKsRPzKwL6IlvUJN7DLG710,5558
|
|
24
24
|
ai_pipeline_core/llm/model_types.py,sha256=rIwY6voT8-xdfsKPDC0Gkdl2iTp9Q2LuvWGSRU9Mp3k,342
|
|
25
25
|
ai_pipeline_core/logging/__init__.py,sha256=DOO6ckgnMVXl29Sy7q6jhO-iW96h54pCHQDzgA2Pu6I,272
|
|
@@ -27,9 +27,9 @@ ai_pipeline_core/logging/logging.yml,sha256=YTW48keO_K5bkkb-KXGM7ZuaYKiquLsjsURe
|
|
|
27
27
|
ai_pipeline_core/logging/logging_config.py,sha256=6MBz9nnVNvqiLDoyy9-R3sWkn6927Re5hdz4hwTptpI,4903
|
|
28
28
|
ai_pipeline_core/logging/logging_mixin.py,sha256=RDaR2ju2-vKTJRzXGa0DquGPT8_UxahWjvKJnaD0IV8,7810
|
|
29
29
|
ai_pipeline_core/simple_runner/__init__.py,sha256=OPbTCZvqpnYdwi1Knnkj-MpmD0Nvtg5O7UwIdAKz_AY,384
|
|
30
|
-
ai_pipeline_core/simple_runner/cli.py,sha256=
|
|
30
|
+
ai_pipeline_core/simple_runner/cli.py,sha256=1X2kkdsGFIewYMxtoRVDS1RY6cx5wNVEIw-TeShbLCc,4281
|
|
31
31
|
ai_pipeline_core/simple_runner/simple_runner.py,sha256=70BHT1iz-G368H2t4tsWAVni0jw2VkWVdnKICuVtLPw,5009
|
|
32
|
-
ai_pipeline_core-0.1.
|
|
33
|
-
ai_pipeline_core-0.1.
|
|
34
|
-
ai_pipeline_core-0.1.
|
|
35
|
-
ai_pipeline_core-0.1.
|
|
32
|
+
ai_pipeline_core-0.1.8.dist-info/METADATA,sha256=RX8VUdE5M5DUE7S4LebGvnJkse87s23SxzxsDC7Rys4,19119
|
|
33
|
+
ai_pipeline_core-0.1.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
34
|
+
ai_pipeline_core-0.1.8.dist-info/licenses/LICENSE,sha256=kKj8mfbdWwkyG3U6n7ztB3bAZlEwShTkAsvaY657i3I,1074
|
|
35
|
+
ai_pipeline_core-0.1.8.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|