ai-pipeline-core 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +54 -13
- ai_pipeline_core/documents/__init__.py +3 -0
- ai_pipeline_core/documents/document.py +16 -1
- ai_pipeline_core/flow/__init__.py +5 -1
- ai_pipeline_core/flow/config.py +21 -0
- ai_pipeline_core/flow/options.py +26 -0
- ai_pipeline_core/llm/client.py +5 -3
- ai_pipeline_core/llm/model_options.py +4 -0
- ai_pipeline_core/pipeline.py +414 -0
- ai_pipeline_core/prefect.py +7 -0
- ai_pipeline_core/simple_runner/__init__.py +19 -0
- ai_pipeline_core/simple_runner/cli.py +127 -0
- ai_pipeline_core/simple_runner/simple_runner.py +147 -0
- ai_pipeline_core/tracing.py +63 -20
- {ai_pipeline_core-0.1.6.dist-info → ai_pipeline_core-0.1.8.dist-info}/METADATA +115 -36
- {ai_pipeline_core-0.1.6.dist-info → ai_pipeline_core-0.1.8.dist-info}/RECORD +18 -12
- {ai_pipeline_core-0.1.6.dist-info → ai_pipeline_core-0.1.8.dist-info}/WHEEL +0 -0
- {ai_pipeline_core-0.1.6.dist-info → ai_pipeline_core-0.1.8.dist-info}/licenses/LICENSE +0 -0
ai_pipeline_core/__init__.py
CHANGED
|
@@ -1,7 +1,23 @@
|
|
|
1
1
|
"""Pipeline Core - Shared infrastructure for AI pipelines."""
|
|
2
2
|
|
|
3
|
-
from .
|
|
4
|
-
from .
|
|
3
|
+
from . import llm
|
|
4
|
+
from .documents import (
|
|
5
|
+
Document,
|
|
6
|
+
DocumentList,
|
|
7
|
+
FlowDocument,
|
|
8
|
+
TaskDocument,
|
|
9
|
+
canonical_name_key,
|
|
10
|
+
sanitize_url,
|
|
11
|
+
)
|
|
12
|
+
from .flow import FlowConfig, FlowOptions
|
|
13
|
+
from .llm import (
|
|
14
|
+
AIMessages,
|
|
15
|
+
AIMessageType,
|
|
16
|
+
ModelName,
|
|
17
|
+
ModelOptions,
|
|
18
|
+
ModelResponse,
|
|
19
|
+
StructuredModelResponse,
|
|
20
|
+
)
|
|
5
21
|
from .logging import (
|
|
6
22
|
LoggerMixin,
|
|
7
23
|
LoggingConfig,
|
|
@@ -9,28 +25,53 @@ from .logging import (
|
|
|
9
25
|
get_pipeline_logger,
|
|
10
26
|
setup_logging,
|
|
11
27
|
)
|
|
12
|
-
from .logging import
|
|
13
|
-
|
|
14
|
-
|
|
28
|
+
from .logging import get_pipeline_logger as get_logger
|
|
29
|
+
from .pipeline import pipeline_flow, pipeline_task
|
|
30
|
+
from .prefect import disable_run_logger, prefect_test_harness
|
|
15
31
|
from .prompt_manager import PromptManager
|
|
16
32
|
from .settings import settings
|
|
17
|
-
from .tracing import trace
|
|
33
|
+
from .tracing import TraceInfo, TraceLevel, trace
|
|
18
34
|
|
|
19
|
-
__version__ = "0.1.
|
|
35
|
+
__version__ = "0.1.8"
|
|
20
36
|
|
|
21
37
|
__all__ = [
|
|
22
|
-
|
|
23
|
-
"
|
|
24
|
-
|
|
25
|
-
"FlowDocument",
|
|
38
|
+
# Config/Settings
|
|
39
|
+
"settings",
|
|
40
|
+
# Logging
|
|
26
41
|
"get_logger",
|
|
27
42
|
"get_pipeline_logger",
|
|
28
43
|
"LoggerMixin",
|
|
29
44
|
"LoggingConfig",
|
|
30
|
-
"PromptManager",
|
|
31
|
-
"settings",
|
|
32
45
|
"setup_logging",
|
|
33
46
|
"StructuredLoggerMixin",
|
|
47
|
+
# Documents
|
|
48
|
+
"Document",
|
|
49
|
+
"DocumentList",
|
|
50
|
+
"FlowDocument",
|
|
34
51
|
"TaskDocument",
|
|
52
|
+
"canonical_name_key",
|
|
53
|
+
"sanitize_url",
|
|
54
|
+
# Flow/Task
|
|
55
|
+
"FlowConfig",
|
|
56
|
+
"FlowOptions",
|
|
57
|
+
# Pipeline decorators (with tracing)
|
|
58
|
+
"pipeline_task",
|
|
59
|
+
"pipeline_flow",
|
|
60
|
+
# Prefect decorators (clean, no tracing)
|
|
61
|
+
"prefect_test_harness",
|
|
62
|
+
"disable_run_logger",
|
|
63
|
+
# LLM
|
|
64
|
+
"llm",
|
|
65
|
+
"ModelName",
|
|
66
|
+
"ModelOptions",
|
|
67
|
+
"ModelResponse",
|
|
68
|
+
"StructuredModelResponse",
|
|
69
|
+
"AIMessages",
|
|
70
|
+
"AIMessageType",
|
|
71
|
+
# Tracing
|
|
35
72
|
"trace",
|
|
73
|
+
"TraceLevel",
|
|
74
|
+
"TraceInfo",
|
|
75
|
+
# Utils
|
|
76
|
+
"PromptManager",
|
|
36
77
|
]
|
|
@@ -2,10 +2,13 @@ from .document import Document
|
|
|
2
2
|
from .document_list import DocumentList
|
|
3
3
|
from .flow_document import FlowDocument
|
|
4
4
|
from .task_document import TaskDocument
|
|
5
|
+
from .utils import canonical_name_key, sanitize_url
|
|
5
6
|
|
|
6
7
|
__all__ = [
|
|
7
8
|
"Document",
|
|
8
9
|
"DocumentList",
|
|
9
10
|
"FlowDocument",
|
|
10
11
|
"TaskDocument",
|
|
12
|
+
"canonical_name_key",
|
|
13
|
+
"sanitize_url",
|
|
11
14
|
]
|
|
@@ -26,12 +26,27 @@ TModel = TypeVar("TModel", bound=BaseModel)
|
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
class Document(BaseModel, ABC):
|
|
29
|
-
"""Abstract base class for all documents
|
|
29
|
+
"""Abstract base class for all documents.
|
|
30
|
+
|
|
31
|
+
Warning: Document subclasses should NOT start with 'Test' prefix as this
|
|
32
|
+
causes conflicts with pytest test discovery. Classes with 'Test' prefix
|
|
33
|
+
will be rejected at definition time.
|
|
34
|
+
"""
|
|
30
35
|
|
|
31
36
|
MAX_CONTENT_SIZE: ClassVar[int] = 25 * 1024 * 1024 # 25MB default
|
|
32
37
|
DESCRIPTION_EXTENSION: ClassVar[str] = ".description.md"
|
|
33
38
|
MARKDOWN_LIST_SEPARATOR: ClassVar[str] = "\n\n---\n\n"
|
|
34
39
|
|
|
40
|
+
def __init_subclass__(cls, **kwargs: Any) -> None:
|
|
41
|
+
"""Validate subclass names to prevent pytest conflicts."""
|
|
42
|
+
super().__init_subclass__(**kwargs)
|
|
43
|
+
if cls.__name__.startswith("Test"):
|
|
44
|
+
raise TypeError(
|
|
45
|
+
f"Document subclass '{cls.__name__}' cannot start with 'Test' prefix. "
|
|
46
|
+
"This causes conflicts with pytest test discovery. "
|
|
47
|
+
"Please use a different name (e.g., 'SampleDocument', 'ExampleDocument')."
|
|
48
|
+
)
|
|
49
|
+
|
|
35
50
|
def __init__(self, **data: Any) -> None:
|
|
36
51
|
"""Prevent direct instantiation of abstract Document class."""
|
|
37
52
|
if type(self) is Document:
|
ai_pipeline_core/flow/config.py
CHANGED
|
@@ -14,6 +14,27 @@ class FlowConfig(ABC):
|
|
|
14
14
|
INPUT_DOCUMENT_TYPES: ClassVar[list[type[FlowDocument]]]
|
|
15
15
|
OUTPUT_DOCUMENT_TYPE: ClassVar[type[FlowDocument]]
|
|
16
16
|
|
|
17
|
+
def __init_subclass__(cls, **kwargs):
|
|
18
|
+
"""Validate that OUTPUT_DOCUMENT_TYPE is not in INPUT_DOCUMENT_TYPES."""
|
|
19
|
+
super().__init_subclass__(**kwargs)
|
|
20
|
+
|
|
21
|
+
# Skip validation for the abstract base class itself
|
|
22
|
+
if cls.__name__ == "FlowConfig":
|
|
23
|
+
return
|
|
24
|
+
|
|
25
|
+
# Ensure required attributes are defined
|
|
26
|
+
if not hasattr(cls, "INPUT_DOCUMENT_TYPES"):
|
|
27
|
+
raise TypeError(f"FlowConfig {cls.__name__} must define INPUT_DOCUMENT_TYPES")
|
|
28
|
+
if not hasattr(cls, "OUTPUT_DOCUMENT_TYPE"):
|
|
29
|
+
raise TypeError(f"FlowConfig {cls.__name__} must define OUTPUT_DOCUMENT_TYPE")
|
|
30
|
+
|
|
31
|
+
# Validate that output type is not in input types
|
|
32
|
+
if cls.OUTPUT_DOCUMENT_TYPE in cls.INPUT_DOCUMENT_TYPES:
|
|
33
|
+
raise TypeError(
|
|
34
|
+
f"FlowConfig {cls.__name__}: OUTPUT_DOCUMENT_TYPE "
|
|
35
|
+
f"({cls.OUTPUT_DOCUMENT_TYPE.__name__}) cannot be in INPUT_DOCUMENT_TYPES"
|
|
36
|
+
)
|
|
37
|
+
|
|
17
38
|
@classmethod
|
|
18
39
|
def get_input_document_types(cls) -> list[type[FlowDocument]]:
|
|
19
40
|
"""
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from typing import TypeVar
|
|
2
|
+
|
|
3
|
+
from pydantic import Field
|
|
4
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
5
|
+
|
|
6
|
+
from ai_pipeline_core.llm import ModelName
|
|
7
|
+
|
|
8
|
+
T = TypeVar("T", bound="FlowOptions")
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class FlowOptions(BaseSettings):
|
|
12
|
+
"""Base configuration for AI Pipeline flows."""
|
|
13
|
+
|
|
14
|
+
core_model: ModelName | str = Field(
|
|
15
|
+
default="gpt-5",
|
|
16
|
+
description="Primary model for complex analysis and generation tasks.",
|
|
17
|
+
)
|
|
18
|
+
small_model: ModelName | str = Field(
|
|
19
|
+
default="gpt-5-mini",
|
|
20
|
+
description="Fast, cost-effective model for simple tasks and orchestration.",
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
model_config = SettingsConfigDict(frozen=True, extra="ignore")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
__all__ = ["FlowOptions"]
|
ai_pipeline_core/llm/client.py
CHANGED
|
@@ -118,11 +118,13 @@ async def _generate_with_retry(
|
|
|
118
118
|
span.set_attributes(response.get_laminar_metadata())
|
|
119
119
|
Laminar.set_span_output(response.content)
|
|
120
120
|
if not response.content:
|
|
121
|
-
# disable cache in case of empty response
|
|
122
|
-
completion_kwargs["extra_body"]["cache"] = {"no-cache": True}
|
|
123
121
|
raise ValueError(f"Model {model} returned an empty response.")
|
|
124
122
|
return response
|
|
125
123
|
except (asyncio.TimeoutError, ValueError, Exception) as e:
|
|
124
|
+
if not isinstance(e, asyncio.TimeoutError):
|
|
125
|
+
# disable cache if it's not a timeout because it may cause an error
|
|
126
|
+
completion_kwargs["extra_body"]["cache"] = {"no-cache": True}
|
|
127
|
+
|
|
126
128
|
logger.warning(
|
|
127
129
|
"LLM generation failed (attempt %d/%d): %s",
|
|
128
130
|
attempt + 1,
|
|
@@ -167,7 +169,7 @@ T = TypeVar("T", bound=BaseModel)
|
|
|
167
169
|
|
|
168
170
|
@trace(ignore_inputs=["context"])
|
|
169
171
|
async def generate_structured(
|
|
170
|
-
model: ModelName,
|
|
172
|
+
model: ModelName | str,
|
|
171
173
|
response_format: type[T],
|
|
172
174
|
*,
|
|
173
175
|
context: AIMessages = AIMessages(),
|
|
@@ -4,6 +4,7 @@ from pydantic import BaseModel
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class ModelOptions(BaseModel):
|
|
7
|
+
temperature: float | None = None
|
|
7
8
|
system_prompt: str | None = None
|
|
8
9
|
search_context_size: Literal["low", "medium", "high"] | None = None
|
|
9
10
|
reasoning_effort: Literal["low", "medium", "high"] | None = None
|
|
@@ -21,6 +22,9 @@ class ModelOptions(BaseModel):
|
|
|
21
22
|
"extra_body": {},
|
|
22
23
|
}
|
|
23
24
|
|
|
25
|
+
if self.temperature:
|
|
26
|
+
kwargs["temperature"] = self.temperature
|
|
27
|
+
|
|
24
28
|
if self.max_completion_tokens:
|
|
25
29
|
kwargs["max_completion_tokens"] = self.max_completion_tokens
|
|
26
30
|
|
|
@@ -0,0 +1,414 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ai_pipeline_core.pipeline
|
|
3
|
+
=========================
|
|
4
|
+
|
|
5
|
+
Tiny wrappers around Prefect's public ``@task`` and ``@flow`` that add our
|
|
6
|
+
``trace`` decorator and **require async functions**.
|
|
7
|
+
|
|
8
|
+
Why this exists
|
|
9
|
+
---------------
|
|
10
|
+
Prefect tasks/flows are awaitable at runtime, but their public type stubs
|
|
11
|
+
don’t declare that clearly. We therefore:
|
|
12
|
+
|
|
13
|
+
1) Return the **real Prefect objects** (so you keep every Prefect method).
|
|
14
|
+
2) Type them as small Protocols that say “this is awaitable and has common
|
|
15
|
+
helpers like `.submit`/`.map`”.
|
|
16
|
+
|
|
17
|
+
This keeps Pyright happy without altering runtime behavior and avoids
|
|
18
|
+
leaking advanced typing constructs (like ``ParamSpec``) that confuse tools
|
|
19
|
+
that introspect callables (e.g., Pydantic).
|
|
20
|
+
|
|
21
|
+
Quick start
|
|
22
|
+
-----------
|
|
23
|
+
from ai_pipeline_core.pipeline import pipeline_task, pipeline_flow
|
|
24
|
+
from ai_pipeline_core.documents import DocumentList
|
|
25
|
+
from ai_pipeline_core.flow.options import FlowOptions
|
|
26
|
+
|
|
27
|
+
@pipeline_task
|
|
28
|
+
async def add(x: int, y: int) -> int:
|
|
29
|
+
return x + y
|
|
30
|
+
|
|
31
|
+
@pipeline_flow
|
|
32
|
+
async def my_flow(project_name: str, docs: DocumentList, opts: FlowOptions) -> DocumentList:
|
|
33
|
+
await add(1, 2) # awaitable and typed
|
|
34
|
+
return docs
|
|
35
|
+
|
|
36
|
+
Rules
|
|
37
|
+
-----
|
|
38
|
+
• Your decorated function **must** be ``async def``.
|
|
39
|
+
• ``@pipeline_flow`` functions must accept at least:
|
|
40
|
+
(project_name: str, documents: DocumentList, flow_options: FlowOptions | subclass).
|
|
41
|
+
• Both wrappers return the same Prefect objects you’d get from Prefect directly.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
from __future__ import annotations
|
|
45
|
+
|
|
46
|
+
import datetime
|
|
47
|
+
import inspect
|
|
48
|
+
from typing import Any, Callable, Coroutine, Iterable, Protocol, TypeVar, Union, cast, overload
|
|
49
|
+
|
|
50
|
+
from prefect.assets import Asset
|
|
51
|
+
from prefect.cache_policies import CachePolicy
|
|
52
|
+
from prefect.context import TaskRunContext
|
|
53
|
+
from prefect.flows import FlowStateHook
|
|
54
|
+
from prefect.flows import flow as _prefect_flow # public import
|
|
55
|
+
from prefect.futures import PrefectFuture
|
|
56
|
+
from prefect.results import ResultSerializer, ResultStorage
|
|
57
|
+
from prefect.task_runners import TaskRunner
|
|
58
|
+
from prefect.tasks import task as _prefect_task # public import
|
|
59
|
+
from prefect.utilities.annotations import NotSet
|
|
60
|
+
from typing_extensions import TypeAlias
|
|
61
|
+
|
|
62
|
+
from ai_pipeline_core.documents import DocumentList
|
|
63
|
+
from ai_pipeline_core.flow.options import FlowOptions
|
|
64
|
+
from ai_pipeline_core.tracing import TraceLevel, trace
|
|
65
|
+
|
|
66
|
+
# --------------------------------------------------------------------------- #
|
|
67
|
+
# Public callback aliases (Prefect stubs omit these exact types)
|
|
68
|
+
# --------------------------------------------------------------------------- #
|
|
69
|
+
RetryConditionCallable: TypeAlias = Callable[[Any, Any, Any], bool]
|
|
70
|
+
StateHookCallable: TypeAlias = Callable[[Any, Any, Any], None]
|
|
71
|
+
TaskRunNameValueOrCallable: TypeAlias = Union[str, Callable[[], str]]
|
|
72
|
+
|
|
73
|
+
# --------------------------------------------------------------------------- #
|
|
74
|
+
# Typing helpers
|
|
75
|
+
# --------------------------------------------------------------------------- #
|
|
76
|
+
R_co = TypeVar("R_co", covariant=True)
|
|
77
|
+
FO_contra = TypeVar("FO_contra", bound=FlowOptions, contravariant=True)
|
|
78
|
+
"""Flow options are an *input* type, so contravariant fits the callable model."""
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class _TaskLike(Protocol[R_co]):
|
|
82
|
+
"""Minimal 'task-like' view: awaitable call + common helpers."""
|
|
83
|
+
|
|
84
|
+
def __call__(self, *args: Any, **kwargs: Any) -> Coroutine[Any, Any, R_co]: ...
|
|
85
|
+
|
|
86
|
+
submit: Callable[..., Any]
|
|
87
|
+
map: Callable[..., Any]
|
|
88
|
+
name: str | None
|
|
89
|
+
|
|
90
|
+
def __getattr__(self, name: str) -> Any: ... # allow unknown helpers without type errors
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class _DocumentsFlowCallable(Protocol[FO_contra]):
|
|
94
|
+
"""User async flow signature (first three params fixed)."""
|
|
95
|
+
|
|
96
|
+
def __call__(
|
|
97
|
+
self,
|
|
98
|
+
project_name: str,
|
|
99
|
+
documents: DocumentList,
|
|
100
|
+
flow_options: FO_contra,
|
|
101
|
+
*args: Any,
|
|
102
|
+
**kwargs: Any,
|
|
103
|
+
) -> Coroutine[Any, Any, DocumentList]: ...
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class _FlowLike(Protocol[FO_contra]):
|
|
107
|
+
"""Callable returned by Prefect ``@flow`` wrapper that we expose to users."""
|
|
108
|
+
|
|
109
|
+
def __call__(
|
|
110
|
+
self,
|
|
111
|
+
project_name: str,
|
|
112
|
+
documents: DocumentList,
|
|
113
|
+
flow_options: FO_contra,
|
|
114
|
+
*args: Any,
|
|
115
|
+
**kwargs: Any,
|
|
116
|
+
) -> Coroutine[Any, Any, DocumentList]: ...
|
|
117
|
+
|
|
118
|
+
name: str | None
|
|
119
|
+
|
|
120
|
+
def __getattr__(self, name: str) -> Any: ... # allow unknown helpers without type errors
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
# --------------------------------------------------------------------------- #
|
|
124
|
+
# Small helper: safely get a callable's name without upsetting the type checker
|
|
125
|
+
# --------------------------------------------------------------------------- #
|
|
126
|
+
def _callable_name(obj: Any, fallback: str) -> str:
|
|
127
|
+
try:
|
|
128
|
+
n = getattr(obj, "__name__", None)
|
|
129
|
+
return n if isinstance(n, str) else fallback
|
|
130
|
+
except Exception:
|
|
131
|
+
return fallback
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
# --------------------------------------------------------------------------- #
|
|
135
|
+
# @pipeline_task — async-only, traced, returns Prefect's Task object
|
|
136
|
+
# --------------------------------------------------------------------------- #
|
|
137
|
+
@overload
|
|
138
|
+
def pipeline_task(__fn: Callable[..., Coroutine[Any, Any, R_co]], /) -> _TaskLike[R_co]: ...
|
|
139
|
+
@overload
|
|
140
|
+
def pipeline_task(
|
|
141
|
+
*,
|
|
142
|
+
# tracing
|
|
143
|
+
trace_level: TraceLevel = "always",
|
|
144
|
+
trace_ignore_input: bool = False,
|
|
145
|
+
trace_ignore_output: bool = False,
|
|
146
|
+
trace_ignore_inputs: list[str] | None = None,
|
|
147
|
+
trace_input_formatter: Callable[..., str] | None = None,
|
|
148
|
+
trace_output_formatter: Callable[..., str] | None = None,
|
|
149
|
+
# prefect passthrough
|
|
150
|
+
name: str | None = None,
|
|
151
|
+
description: str | None = None,
|
|
152
|
+
tags: Iterable[str] | None = None,
|
|
153
|
+
version: str | None = None,
|
|
154
|
+
cache_policy: CachePolicy | type[NotSet] = NotSet,
|
|
155
|
+
cache_key_fn: Callable[[TaskRunContext, dict[str, Any]], str | None] | None = None,
|
|
156
|
+
cache_expiration: datetime.timedelta | None = None,
|
|
157
|
+
task_run_name: TaskRunNameValueOrCallable | None = None,
|
|
158
|
+
retries: int | None = None,
|
|
159
|
+
retry_delay_seconds: int | float | list[float] | Callable[[int], list[float]] | None = None,
|
|
160
|
+
retry_jitter_factor: float | None = None,
|
|
161
|
+
persist_result: bool | None = None,
|
|
162
|
+
result_storage: ResultStorage | str | None = None,
|
|
163
|
+
result_serializer: ResultSerializer | str | None = None,
|
|
164
|
+
result_storage_key: str | None = None,
|
|
165
|
+
cache_result_in_memory: bool = True,
|
|
166
|
+
timeout_seconds: int | float | None = None,
|
|
167
|
+
log_prints: bool | None = False,
|
|
168
|
+
refresh_cache: bool | None = None,
|
|
169
|
+
on_completion: list[StateHookCallable] | None = None,
|
|
170
|
+
on_failure: list[StateHookCallable] | None = None,
|
|
171
|
+
retry_condition_fn: RetryConditionCallable | None = None,
|
|
172
|
+
viz_return_value: bool | None = None,
|
|
173
|
+
asset_deps: list[str | Asset] | None = None,
|
|
174
|
+
) -> Callable[[Callable[..., Coroutine[Any, Any, R_co]]], _TaskLike[R_co]]: ...
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def pipeline_task(
|
|
178
|
+
__fn: Callable[..., Coroutine[Any, Any, R_co]] | None = None,
|
|
179
|
+
/,
|
|
180
|
+
*,
|
|
181
|
+
# tracing
|
|
182
|
+
trace_level: TraceLevel = "always",
|
|
183
|
+
trace_ignore_input: bool = False,
|
|
184
|
+
trace_ignore_output: bool = False,
|
|
185
|
+
trace_ignore_inputs: list[str] | None = None,
|
|
186
|
+
trace_input_formatter: Callable[..., str] | None = None,
|
|
187
|
+
trace_output_formatter: Callable[..., str] | None = None,
|
|
188
|
+
# prefect passthrough
|
|
189
|
+
name: str | None = None,
|
|
190
|
+
description: str | None = None,
|
|
191
|
+
tags: Iterable[str] | None = None,
|
|
192
|
+
version: str | None = None,
|
|
193
|
+
cache_policy: CachePolicy | type[NotSet] = NotSet,
|
|
194
|
+
cache_key_fn: Callable[[TaskRunContext, dict[str, Any]], str | None] | None = None,
|
|
195
|
+
cache_expiration: datetime.timedelta | None = None,
|
|
196
|
+
task_run_name: TaskRunNameValueOrCallable | None = None,
|
|
197
|
+
retries: int | None = None,
|
|
198
|
+
retry_delay_seconds: int | float | list[float] | Callable[[int], list[float]] | None = None,
|
|
199
|
+
retry_jitter_factor: float | None = None,
|
|
200
|
+
persist_result: bool | None = None,
|
|
201
|
+
result_storage: ResultStorage | str | None = None,
|
|
202
|
+
result_serializer: ResultSerializer | str | None = None,
|
|
203
|
+
result_storage_key: str | None = None,
|
|
204
|
+
cache_result_in_memory: bool = True,
|
|
205
|
+
timeout_seconds: int | float | None = None,
|
|
206
|
+
log_prints: bool | None = False,
|
|
207
|
+
refresh_cache: bool | None = None,
|
|
208
|
+
on_completion: list[StateHookCallable] | None = None,
|
|
209
|
+
on_failure: list[StateHookCallable] | None = None,
|
|
210
|
+
retry_condition_fn: RetryConditionCallable | None = None,
|
|
211
|
+
viz_return_value: bool | None = None,
|
|
212
|
+
asset_deps: list[str | Asset] | None = None,
|
|
213
|
+
) -> _TaskLike[R_co] | Callable[[Callable[..., Coroutine[Any, Any, R_co]]], _TaskLike[R_co]]:
|
|
214
|
+
"""Decorate an **async** function as a traced Prefect task."""
|
|
215
|
+
task_decorator: Callable[..., Any] = _prefect_task # helps the type checker
|
|
216
|
+
|
|
217
|
+
def _apply(fn: Callable[..., Coroutine[Any, Any, R_co]]) -> _TaskLike[R_co]:
|
|
218
|
+
if not inspect.iscoroutinefunction(fn):
|
|
219
|
+
raise TypeError(
|
|
220
|
+
f"@pipeline_task target '{_callable_name(fn, 'task')}' must be 'async def'"
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
traced_fn = trace(
|
|
224
|
+
level=trace_level,
|
|
225
|
+
name=name or _callable_name(fn, "task"),
|
|
226
|
+
ignore_input=trace_ignore_input,
|
|
227
|
+
ignore_output=trace_ignore_output,
|
|
228
|
+
ignore_inputs=trace_ignore_inputs,
|
|
229
|
+
input_formatter=trace_input_formatter,
|
|
230
|
+
output_formatter=trace_output_formatter,
|
|
231
|
+
)(fn)
|
|
232
|
+
|
|
233
|
+
return cast(
|
|
234
|
+
_TaskLike[R_co],
|
|
235
|
+
task_decorator(
|
|
236
|
+
name=name,
|
|
237
|
+
description=description,
|
|
238
|
+
tags=tags,
|
|
239
|
+
version=version,
|
|
240
|
+
cache_policy=cache_policy,
|
|
241
|
+
cache_key_fn=cache_key_fn,
|
|
242
|
+
cache_expiration=cache_expiration,
|
|
243
|
+
task_run_name=task_run_name,
|
|
244
|
+
retries=0 if retries is None else retries,
|
|
245
|
+
retry_delay_seconds=retry_delay_seconds,
|
|
246
|
+
retry_jitter_factor=retry_jitter_factor,
|
|
247
|
+
persist_result=persist_result,
|
|
248
|
+
result_storage=result_storage,
|
|
249
|
+
result_serializer=result_serializer,
|
|
250
|
+
result_storage_key=result_storage_key,
|
|
251
|
+
cache_result_in_memory=cache_result_in_memory,
|
|
252
|
+
timeout_seconds=timeout_seconds,
|
|
253
|
+
log_prints=log_prints,
|
|
254
|
+
refresh_cache=refresh_cache,
|
|
255
|
+
on_completion=on_completion,
|
|
256
|
+
on_failure=on_failure,
|
|
257
|
+
retry_condition_fn=retry_condition_fn,
|
|
258
|
+
viz_return_value=viz_return_value,
|
|
259
|
+
asset_deps=asset_deps,
|
|
260
|
+
)(traced_fn),
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
return _apply(__fn) if __fn else _apply
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
# --------------------------------------------------------------------------- #
|
|
267
|
+
# @pipeline_flow — async-only, traced, returns Prefect’s flow wrapper
|
|
268
|
+
# --------------------------------------------------------------------------- #
|
|
269
|
+
@overload
|
|
270
|
+
def pipeline_flow(__fn: _DocumentsFlowCallable[FO_contra], /) -> _FlowLike[FO_contra]: ...
|
|
271
|
+
@overload
|
|
272
|
+
def pipeline_flow(
|
|
273
|
+
*,
|
|
274
|
+
# tracing
|
|
275
|
+
trace_level: TraceLevel = "always",
|
|
276
|
+
trace_ignore_input: bool = False,
|
|
277
|
+
trace_ignore_output: bool = False,
|
|
278
|
+
trace_ignore_inputs: list[str] | None = None,
|
|
279
|
+
trace_input_formatter: Callable[..., str] | None = None,
|
|
280
|
+
trace_output_formatter: Callable[..., str] | None = None,
|
|
281
|
+
# prefect passthrough
|
|
282
|
+
name: str | None = None,
|
|
283
|
+
version: str | None = None,
|
|
284
|
+
flow_run_name: Union[Callable[[], str], str] | None = None,
|
|
285
|
+
retries: int | None = None,
|
|
286
|
+
retry_delay_seconds: int | float | None = None,
|
|
287
|
+
task_runner: TaskRunner[PrefectFuture[Any]] | None = None,
|
|
288
|
+
description: str | None = None,
|
|
289
|
+
timeout_seconds: int | float | None = None,
|
|
290
|
+
validate_parameters: bool = True,
|
|
291
|
+
persist_result: bool | None = None,
|
|
292
|
+
result_storage: ResultStorage | str | None = None,
|
|
293
|
+
result_serializer: ResultSerializer | str | None = None,
|
|
294
|
+
cache_result_in_memory: bool = True,
|
|
295
|
+
log_prints: bool | None = None,
|
|
296
|
+
on_completion: list[FlowStateHook[Any, Any]] | None = None,
|
|
297
|
+
on_failure: list[FlowStateHook[Any, Any]] | None = None,
|
|
298
|
+
on_cancellation: list[FlowStateHook[Any, Any]] | None = None,
|
|
299
|
+
on_crashed: list[FlowStateHook[Any, Any]] | None = None,
|
|
300
|
+
on_running: list[FlowStateHook[Any, Any]] | None = None,
|
|
301
|
+
) -> Callable[[_DocumentsFlowCallable[FO_contra]], _FlowLike[FO_contra]]: ...
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def pipeline_flow(
|
|
305
|
+
__fn: _DocumentsFlowCallable[FO_contra] | None = None,
|
|
306
|
+
/,
|
|
307
|
+
*,
|
|
308
|
+
# tracing
|
|
309
|
+
trace_level: TraceLevel = "always",
|
|
310
|
+
trace_ignore_input: bool = False,
|
|
311
|
+
trace_ignore_output: bool = False,
|
|
312
|
+
trace_ignore_inputs: list[str] | None = None,
|
|
313
|
+
trace_input_formatter: Callable[..., str] | None = None,
|
|
314
|
+
trace_output_formatter: Callable[..., str] | None = None,
|
|
315
|
+
# prefect passthrough
|
|
316
|
+
name: str | None = None,
|
|
317
|
+
version: str | None = None,
|
|
318
|
+
flow_run_name: Union[Callable[[], str], str] | None = None,
|
|
319
|
+
retries: int | None = None,
|
|
320
|
+
retry_delay_seconds: int | float | None = None,
|
|
321
|
+
task_runner: TaskRunner[PrefectFuture[Any]] | None = None,
|
|
322
|
+
description: str | None = None,
|
|
323
|
+
timeout_seconds: int | float | None = None,
|
|
324
|
+
validate_parameters: bool = True,
|
|
325
|
+
persist_result: bool | None = None,
|
|
326
|
+
result_storage: ResultStorage | str | None = None,
|
|
327
|
+
result_serializer: ResultSerializer | str | None = None,
|
|
328
|
+
cache_result_in_memory: bool = True,
|
|
329
|
+
log_prints: bool | None = None,
|
|
330
|
+
on_completion: list[FlowStateHook[Any, Any]] | None = None,
|
|
331
|
+
on_failure: list[FlowStateHook[Any, Any]] | None = None,
|
|
332
|
+
on_cancellation: list[FlowStateHook[Any, Any]] | None = None,
|
|
333
|
+
on_crashed: list[FlowStateHook[Any, Any]] | None = None,
|
|
334
|
+
on_running: list[FlowStateHook[Any, Any]] | None = None,
|
|
335
|
+
) -> _FlowLike[FO_contra] | Callable[[_DocumentsFlowCallable[FO_contra]], _FlowLike[FO_contra]]:
|
|
336
|
+
"""Decorate an **async** flow.
|
|
337
|
+
|
|
338
|
+
Required signature:
|
|
339
|
+
async def flow_fn(
|
|
340
|
+
project_name: str,
|
|
341
|
+
documents: DocumentList,
|
|
342
|
+
flow_options: FlowOptions, # or any subclass
|
|
343
|
+
*args,
|
|
344
|
+
**kwargs
|
|
345
|
+
) -> DocumentList
|
|
346
|
+
|
|
347
|
+
Returns the same callable object Prefect’s ``@flow`` would return.
|
|
348
|
+
"""
|
|
349
|
+
flow_decorator: Callable[..., Any] = _prefect_flow
|
|
350
|
+
|
|
351
|
+
def _apply(fn: _DocumentsFlowCallable[FO_contra]) -> _FlowLike[FO_contra]:
|
|
352
|
+
fname = _callable_name(fn, "flow")
|
|
353
|
+
|
|
354
|
+
if not inspect.iscoroutinefunction(fn):
|
|
355
|
+
raise TypeError(f"@pipeline_flow '{fname}' must be declared with 'async def'")
|
|
356
|
+
if len(inspect.signature(fn).parameters) < 3:
|
|
357
|
+
raise TypeError(
|
|
358
|
+
f"@pipeline_flow '{fname}' must accept "
|
|
359
|
+
"'project_name, documents, flow_options' as its first three parameters"
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
async def _wrapper(
|
|
363
|
+
project_name: str,
|
|
364
|
+
documents: DocumentList,
|
|
365
|
+
flow_options: FO_contra,
|
|
366
|
+
*args: Any,
|
|
367
|
+
**kwargs: Any,
|
|
368
|
+
) -> DocumentList:
|
|
369
|
+
result = await fn(project_name, documents, flow_options, *args, **kwargs)
|
|
370
|
+
if not isinstance(result, DocumentList): # pyright: ignore[reportUnnecessaryIsInstance]
|
|
371
|
+
raise TypeError(
|
|
372
|
+
f"Flow '{fname}' must return DocumentList, got {type(result).__name__}"
|
|
373
|
+
)
|
|
374
|
+
return result
|
|
375
|
+
|
|
376
|
+
traced = trace(
|
|
377
|
+
level=trace_level,
|
|
378
|
+
name=name or fname,
|
|
379
|
+
ignore_input=trace_ignore_input,
|
|
380
|
+
ignore_output=trace_ignore_output,
|
|
381
|
+
ignore_inputs=trace_ignore_inputs,
|
|
382
|
+
input_formatter=trace_input_formatter,
|
|
383
|
+
output_formatter=trace_output_formatter,
|
|
384
|
+
)(_wrapper)
|
|
385
|
+
|
|
386
|
+
return cast(
|
|
387
|
+
_FlowLike[FO_contra],
|
|
388
|
+
flow_decorator(
|
|
389
|
+
name=name,
|
|
390
|
+
version=version,
|
|
391
|
+
flow_run_name=flow_run_name,
|
|
392
|
+
retries=0 if retries is None else retries,
|
|
393
|
+
retry_delay_seconds=retry_delay_seconds,
|
|
394
|
+
task_runner=task_runner,
|
|
395
|
+
description=description,
|
|
396
|
+
timeout_seconds=timeout_seconds,
|
|
397
|
+
validate_parameters=validate_parameters,
|
|
398
|
+
persist_result=persist_result,
|
|
399
|
+
result_storage=result_storage,
|
|
400
|
+
result_serializer=result_serializer,
|
|
401
|
+
cache_result_in_memory=cache_result_in_memory,
|
|
402
|
+
log_prints=log_prints,
|
|
403
|
+
on_completion=on_completion,
|
|
404
|
+
on_failure=on_failure,
|
|
405
|
+
on_cancellation=on_cancellation,
|
|
406
|
+
on_crashed=on_crashed,
|
|
407
|
+
on_running=on_running,
|
|
408
|
+
)(traced),
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
return _apply(__fn) if __fn else _apply
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
__all__ = ["pipeline_task", "pipeline_flow"]
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from .cli import run_cli
|
|
2
|
+
from .simple_runner import (
|
|
3
|
+
ConfigSequence,
|
|
4
|
+
FlowSequence,
|
|
5
|
+
load_documents_from_directory,
|
|
6
|
+
run_pipeline,
|
|
7
|
+
run_pipelines,
|
|
8
|
+
save_documents_to_directory,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"run_cli",
|
|
13
|
+
"run_pipeline",
|
|
14
|
+
"run_pipelines",
|
|
15
|
+
"load_documents_from_directory",
|
|
16
|
+
"save_documents_to_directory",
|
|
17
|
+
"FlowSequence",
|
|
18
|
+
"ConfigSequence",
|
|
19
|
+
]
|