ai-pipeline-core 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +3 -1
- ai_pipeline_core/documents/__init__.py +2 -0
- ai_pipeline_core/documents/document.py +115 -22
- ai_pipeline_core/documents/temporary_document.py +16 -0
- ai_pipeline_core/flow/config.py +20 -2
- ai_pipeline_core/prompt_manager.py +7 -1
- ai_pipeline_core/simple_runner/cli.py +49 -6
- ai_pipeline_core/simple_runner/simple_runner.py +7 -2
- {ai_pipeline_core-0.1.8.dist-info → ai_pipeline_core-0.1.10.dist-info}/METADATA +2 -22
- {ai_pipeline_core-0.1.8.dist-info → ai_pipeline_core-0.1.10.dist-info}/RECORD +12 -11
- {ai_pipeline_core-0.1.8.dist-info → ai_pipeline_core-0.1.10.dist-info}/WHEEL +0 -0
- {ai_pipeline_core-0.1.8.dist-info → ai_pipeline_core-0.1.10.dist-info}/licenses/LICENSE +0 -0
ai_pipeline_core/__init__.py
CHANGED
|
@@ -6,6 +6,7 @@ from .documents import (
|
|
|
6
6
|
DocumentList,
|
|
7
7
|
FlowDocument,
|
|
8
8
|
TaskDocument,
|
|
9
|
+
TemporaryDocument,
|
|
9
10
|
canonical_name_key,
|
|
10
11
|
sanitize_url,
|
|
11
12
|
)
|
|
@@ -32,7 +33,7 @@ from .prompt_manager import PromptManager
|
|
|
32
33
|
from .settings import settings
|
|
33
34
|
from .tracing import TraceInfo, TraceLevel, trace
|
|
34
35
|
|
|
35
|
-
__version__ = "0.1.
|
|
36
|
+
__version__ = "0.1.10"
|
|
36
37
|
|
|
37
38
|
__all__ = [
|
|
38
39
|
# Config/Settings
|
|
@@ -49,6 +50,7 @@ __all__ = [
|
|
|
49
50
|
"DocumentList",
|
|
50
51
|
"FlowDocument",
|
|
51
52
|
"TaskDocument",
|
|
53
|
+
"TemporaryDocument",
|
|
52
54
|
"canonical_name_key",
|
|
53
55
|
"sanitize_url",
|
|
54
56
|
# Flow/Task
|
|
@@ -2,6 +2,7 @@ from .document import Document
|
|
|
2
2
|
from .document_list import DocumentList
|
|
3
3
|
from .flow_document import FlowDocument
|
|
4
4
|
from .task_document import TaskDocument
|
|
5
|
+
from .temporary_document import TemporaryDocument
|
|
5
6
|
from .utils import canonical_name_key, sanitize_url
|
|
6
7
|
|
|
7
8
|
__all__ = [
|
|
@@ -9,6 +10,7 @@ __all__ = [
|
|
|
9
10
|
"DocumentList",
|
|
10
11
|
"FlowDocument",
|
|
11
12
|
"TaskDocument",
|
|
13
|
+
"TemporaryDocument",
|
|
12
14
|
"canonical_name_key",
|
|
13
15
|
"sanitize_url",
|
|
14
16
|
]
|
|
@@ -6,7 +6,19 @@ from abc import ABC, abstractmethod
|
|
|
6
6
|
from base64 import b32encode
|
|
7
7
|
from enum import StrEnum
|
|
8
8
|
from functools import cached_property
|
|
9
|
-
from
|
|
9
|
+
from io import BytesIO
|
|
10
|
+
from typing import (
|
|
11
|
+
Any,
|
|
12
|
+
ClassVar,
|
|
13
|
+
Literal,
|
|
14
|
+
Self,
|
|
15
|
+
TypeVar,
|
|
16
|
+
cast,
|
|
17
|
+
final,
|
|
18
|
+
get_args,
|
|
19
|
+
get_origin,
|
|
20
|
+
overload,
|
|
21
|
+
)
|
|
10
22
|
|
|
11
23
|
from pydantic import BaseModel, ConfigDict, field_serializer, field_validator
|
|
12
24
|
from ruamel.yaml import YAML
|
|
@@ -23,6 +35,7 @@ from .mime_type import (
|
|
|
23
35
|
)
|
|
24
36
|
|
|
25
37
|
TModel = TypeVar("TModel", bound=BaseModel)
|
|
38
|
+
ContentInput = bytes | str | BaseModel | list[str] | Any
|
|
26
39
|
|
|
27
40
|
|
|
28
41
|
class Document(BaseModel, ABC):
|
|
@@ -46,6 +59,22 @@ class Document(BaseModel, ABC):
|
|
|
46
59
|
"This causes conflicts with pytest test discovery. "
|
|
47
60
|
"Please use a different name (e.g., 'SampleDocument', 'ExampleDocument')."
|
|
48
61
|
)
|
|
62
|
+
if hasattr(cls, "FILES"):
|
|
63
|
+
files = getattr(cls, "FILES")
|
|
64
|
+
if not issubclass(files, StrEnum):
|
|
65
|
+
raise TypeError(
|
|
66
|
+
f"Document subclass '{cls.__name__}'.FILES must be an Enum of string values"
|
|
67
|
+
)
|
|
68
|
+
# Check that the Document's model_fields only contain the allowed fields
|
|
69
|
+
# It prevents AI models from adding additional fields to documents
|
|
70
|
+
allowed = {"name", "description", "content"}
|
|
71
|
+
current = set(getattr(cls, "model_fields", {}).keys())
|
|
72
|
+
extras = current - allowed
|
|
73
|
+
if extras:
|
|
74
|
+
raise TypeError(
|
|
75
|
+
f"Document subclass '{cls.__name__}' cannot declare additional fields: "
|
|
76
|
+
f"{', '.join(sorted(extras))}. Only {', '.join(sorted(allowed))} are allowed."
|
|
77
|
+
)
|
|
49
78
|
|
|
50
79
|
def __init__(self, **data: Any) -> None:
|
|
51
80
|
"""Prevent direct instantiation of abstract Document class."""
|
|
@@ -53,49 +82,60 @@ class Document(BaseModel, ABC):
|
|
|
53
82
|
raise TypeError("Cannot instantiate abstract Document class directly")
|
|
54
83
|
super().__init__(**data)
|
|
55
84
|
|
|
56
|
-
# Optional enum of allowed file names. Subclasses may set this.
|
|
57
|
-
# This is used to validate the document name.
|
|
58
|
-
FILES: ClassVar[type[StrEnum] | None] = None
|
|
59
|
-
|
|
60
85
|
name: str
|
|
61
86
|
description: str | None = None
|
|
62
87
|
content: bytes
|
|
63
88
|
|
|
64
89
|
# Pydantic configuration
|
|
65
90
|
model_config = ConfigDict(
|
|
66
|
-
frozen=True,
|
|
91
|
+
frozen=True,
|
|
67
92
|
arbitrary_types_allowed=True,
|
|
93
|
+
extra="forbid",
|
|
68
94
|
)
|
|
69
95
|
|
|
70
96
|
@abstractmethod
|
|
71
|
-
def get_base_type(self) -> Literal["flow", "task"]:
|
|
97
|
+
def get_base_type(self) -> Literal["flow", "task", "temporary"]:
|
|
72
98
|
"""Get the type of the document - must be implemented by subclasses"""
|
|
73
99
|
raise NotImplementedError("Subclasses must implement this method")
|
|
74
100
|
|
|
101
|
+
@final
|
|
75
102
|
@property
|
|
76
|
-
def base_type(self) -> Literal["flow", "task"]:
|
|
103
|
+
def base_type(self) -> Literal["flow", "task", "temporary"]:
|
|
77
104
|
"""Alias for document_type for backward compatibility"""
|
|
78
105
|
return self.get_base_type()
|
|
79
106
|
|
|
107
|
+
@final
|
|
80
108
|
@property
|
|
81
109
|
def is_flow(self) -> bool:
|
|
82
110
|
"""Check if document is a flow document"""
|
|
83
111
|
return self.get_base_type() == "flow"
|
|
84
112
|
|
|
113
|
+
@final
|
|
85
114
|
@property
|
|
86
115
|
def is_task(self) -> bool:
|
|
87
116
|
"""Check if document is a task document"""
|
|
88
117
|
return self.get_base_type() == "task"
|
|
89
118
|
|
|
119
|
+
@final
|
|
120
|
+
@property
|
|
121
|
+
def is_temporary(self) -> bool:
|
|
122
|
+
"""Check if document is a temporary document"""
|
|
123
|
+
return self.get_base_type() == "temporary"
|
|
124
|
+
|
|
125
|
+
@final
|
|
90
126
|
@classmethod
|
|
91
127
|
def get_expected_files(cls) -> list[str] | None:
|
|
92
128
|
"""
|
|
93
129
|
Return the list of allowed file names for this document class, or None if unrestricted.
|
|
94
130
|
"""
|
|
95
|
-
if cls
|
|
131
|
+
if not hasattr(cls, "FILES"):
|
|
96
132
|
return None
|
|
133
|
+
files = getattr(cls, "FILES")
|
|
134
|
+
if not files:
|
|
135
|
+
return None
|
|
136
|
+
assert issubclass(files, StrEnum)
|
|
97
137
|
try:
|
|
98
|
-
values = [member.value for member in
|
|
138
|
+
values = [member.value for member in files]
|
|
99
139
|
except TypeError:
|
|
100
140
|
raise DocumentNameError(f"{cls.__name__}.FILES must be an Enum of string values")
|
|
101
141
|
if len(values) == 0:
|
|
@@ -115,14 +155,10 @@ class Document(BaseModel, ABC):
|
|
|
115
155
|
Override this method in subclasses for custom conventions (regex, prefixes, etc.).
|
|
116
156
|
Raise DocumentNameError when invalid.
|
|
117
157
|
"""
|
|
118
|
-
|
|
158
|
+
allowed = cls.get_expected_files()
|
|
159
|
+
if not allowed:
|
|
119
160
|
return
|
|
120
161
|
|
|
121
|
-
try:
|
|
122
|
-
allowed = {str(member.value) for member in cls.FILES} # type: ignore[arg-type]
|
|
123
|
-
except TypeError:
|
|
124
|
-
raise DocumentNameError(f"{cls.__name__}.FILES must be an Enum of string values")
|
|
125
|
-
|
|
126
162
|
if len(allowed) > 0 and name not in allowed:
|
|
127
163
|
allowed_str = ", ".join(sorted(allowed))
|
|
128
164
|
raise DocumentNameError(f"Invalid filename '{name}'. Allowed names: {allowed_str}")
|
|
@@ -166,16 +202,19 @@ class Document(BaseModel, ABC):
|
|
|
166
202
|
# Fall back to base64 for binary content
|
|
167
203
|
return base64.b64encode(v).decode("ascii")
|
|
168
204
|
|
|
205
|
+
@final
|
|
169
206
|
@property
|
|
170
207
|
def id(self) -> str:
|
|
171
208
|
"""Return the first 6 characters of the SHA256 hash of the content, encoded in base32"""
|
|
172
209
|
return self.sha256[:6]
|
|
173
210
|
|
|
211
|
+
@final
|
|
174
212
|
@cached_property
|
|
175
213
|
def sha256(self) -> str:
|
|
176
214
|
"""Full SHA256 hash of content, encoded in base32"""
|
|
177
215
|
return b32encode(hashlib.sha256(self.content).digest()).decode("ascii").upper()
|
|
178
216
|
|
|
217
|
+
@final
|
|
179
218
|
@property
|
|
180
219
|
def size(self) -> int:
|
|
181
220
|
"""Size of content in bytes"""
|
|
@@ -225,23 +264,61 @@ class Document(BaseModel, ABC):
|
|
|
225
264
|
"""Parse document as JSON"""
|
|
226
265
|
return json.loads(self.as_text())
|
|
227
266
|
|
|
228
|
-
|
|
267
|
+
@overload
|
|
268
|
+
def as_pydantic_model(self, model_type: type[TModel]) -> TModel: ...
|
|
269
|
+
|
|
270
|
+
@overload
|
|
271
|
+
def as_pydantic_model(self, model_type: type[list[TModel]]) -> list[TModel]: ...
|
|
272
|
+
|
|
273
|
+
def as_pydantic_model(
|
|
274
|
+
self, model_type: type[TModel] | type[list[TModel]]
|
|
275
|
+
) -> TModel | list[TModel]:
|
|
229
276
|
"""Parse document as a pydantic model and return the validated instance"""
|
|
230
277
|
data = self.as_yaml() if is_yaml_mime_type(self.mime_type) else self.as_json()
|
|
231
|
-
|
|
278
|
+
|
|
279
|
+
if get_origin(model_type) is list:
|
|
280
|
+
if not isinstance(data, list):
|
|
281
|
+
raise ValueError(f"Expected list data for {model_type}, got {type(data)}")
|
|
282
|
+
item_type = get_args(model_type)[0]
|
|
283
|
+
return [item_type.model_validate(item) for item in data]
|
|
284
|
+
|
|
285
|
+
# At this point model_type must be type[TModel], not type[list[TModel]]
|
|
286
|
+
single_model = cast(type[TModel], model_type)
|
|
287
|
+
return single_model.model_validate(data)
|
|
232
288
|
|
|
233
289
|
def as_markdown_list(self) -> list[str]:
|
|
234
290
|
"""Parse document as a markdown list"""
|
|
235
291
|
return self.as_text().split(self.MARKDOWN_LIST_SEPARATOR)
|
|
236
292
|
|
|
293
|
+
@overload
|
|
294
|
+
@classmethod
|
|
295
|
+
def create(cls, name: str, content: ContentInput, /) -> Self: ...
|
|
296
|
+
@overload
|
|
297
|
+
@classmethod
|
|
298
|
+
def create(cls, name: str, *, content: ContentInput) -> Self: ...
|
|
299
|
+
@overload
|
|
300
|
+
@classmethod
|
|
301
|
+
def create(cls, name: str, description: str | None, content: ContentInput, /) -> Self: ...
|
|
302
|
+
@overload
|
|
303
|
+
@classmethod
|
|
304
|
+
def create(cls, name: str, description: str | None, *, content: ContentInput) -> Self: ...
|
|
305
|
+
|
|
237
306
|
@classmethod
|
|
238
307
|
def create(
|
|
239
308
|
cls,
|
|
240
309
|
name: str,
|
|
241
|
-
description:
|
|
242
|
-
content:
|
|
310
|
+
description: ContentInput = None,
|
|
311
|
+
content: ContentInput = None,
|
|
243
312
|
) -> Self:
|
|
244
313
|
"""Create a document from a name, description, and content"""
|
|
314
|
+
if content is None:
|
|
315
|
+
if description is None:
|
|
316
|
+
raise ValueError(f"Unsupported content type: {type(content)} for {name}")
|
|
317
|
+
content = description
|
|
318
|
+
description = None
|
|
319
|
+
else:
|
|
320
|
+
assert description is None or isinstance(description, str)
|
|
321
|
+
|
|
245
322
|
is_yaml_extension = name.endswith(".yaml") or name.endswith(".yml")
|
|
246
323
|
is_json_extension = name.endswith(".json")
|
|
247
324
|
is_markdown_extension = name.endswith(".md")
|
|
@@ -252,6 +329,14 @@ class Document(BaseModel, ABC):
|
|
|
252
329
|
content = content.encode("utf-8")
|
|
253
330
|
elif is_str_list and is_markdown_extension:
|
|
254
331
|
return cls.create_as_markdown_list(name, description, content) # type: ignore[arg-type]
|
|
332
|
+
elif isinstance(content, list) and all(isinstance(item, BaseModel) for item in content):
|
|
333
|
+
# Handle list[BaseModel] for JSON/YAML files
|
|
334
|
+
if is_yaml_extension:
|
|
335
|
+
return cls.create_as_yaml(name, description, content)
|
|
336
|
+
elif is_json_extension:
|
|
337
|
+
return cls.create_as_json(name, description, content)
|
|
338
|
+
else:
|
|
339
|
+
raise ValueError(f"list[BaseModel] requires .json or .yaml extension, got {name}")
|
|
255
340
|
elif is_yaml_extension:
|
|
256
341
|
return cls.create_as_yaml(name, description, content)
|
|
257
342
|
elif is_json_extension:
|
|
@@ -261,6 +346,7 @@ class Document(BaseModel, ABC):
|
|
|
261
346
|
|
|
262
347
|
return cls(name=name, description=description, content=content)
|
|
263
348
|
|
|
349
|
+
@final
|
|
264
350
|
@classmethod
|
|
265
351
|
def create_as_markdown_list(cls, name: str, description: str | None, items: list[str]) -> Self:
|
|
266
352
|
"""Create a document from a name, description, and list of strings"""
|
|
@@ -273,15 +359,19 @@ class Document(BaseModel, ABC):
|
|
|
273
359
|
content = Document.MARKDOWN_LIST_SEPARATOR.join(cleaned_items)
|
|
274
360
|
return cls.create(name, description, content)
|
|
275
361
|
|
|
362
|
+
@final
|
|
276
363
|
@classmethod
|
|
277
364
|
def create_as_json(cls, name: str, description: str | None, data: Any) -> Self:
|
|
278
365
|
"""Create a document from a name, description, and JSON data"""
|
|
279
366
|
assert name.endswith(".json"), f"Document name must end with .json: {name}"
|
|
280
367
|
if isinstance(data, BaseModel):
|
|
281
368
|
data = data.model_dump(mode="json")
|
|
369
|
+
elif isinstance(data, list) and all(isinstance(item, BaseModel) for item in data):
|
|
370
|
+
data = [item.model_dump(mode="json") for item in data]
|
|
282
371
|
content = json.dumps(data, indent=2).encode("utf-8")
|
|
283
372
|
return cls.create(name, description, content)
|
|
284
373
|
|
|
374
|
+
@final
|
|
285
375
|
@classmethod
|
|
286
376
|
def create_as_yaml(cls, name: str, description: str | None, data: Any) -> Self:
|
|
287
377
|
"""Create a document from a name, description, and YAML data"""
|
|
@@ -289,16 +379,18 @@ class Document(BaseModel, ABC):
|
|
|
289
379
|
f"Document name must end with .yaml or .yml: {name}"
|
|
290
380
|
)
|
|
291
381
|
if isinstance(data, BaseModel):
|
|
292
|
-
data = data.model_dump()
|
|
382
|
+
data = data.model_dump(mode="json")
|
|
383
|
+
elif isinstance(data, list) and all(isinstance(item, BaseModel) for item in data):
|
|
384
|
+
data = [item.model_dump(mode="json") for item in data]
|
|
293
385
|
yaml = YAML()
|
|
294
386
|
yaml.indent(mapping=2, sequence=4, offset=2)
|
|
295
|
-
from io import BytesIO
|
|
296
387
|
|
|
297
388
|
stream = BytesIO()
|
|
298
389
|
yaml.dump(data, stream)
|
|
299
390
|
content = stream.getvalue()
|
|
300
391
|
return cls.create(name, description, content)
|
|
301
392
|
|
|
393
|
+
@final
|
|
302
394
|
def serialize_model(self) -> dict[str, Any]:
|
|
303
395
|
"""Serialize document to a dictionary with proper encoding."""
|
|
304
396
|
result = {
|
|
@@ -327,6 +419,7 @@ class Document(BaseModel, ABC):
|
|
|
327
419
|
|
|
328
420
|
return result
|
|
329
421
|
|
|
422
|
+
@final
|
|
330
423
|
@classmethod
|
|
331
424
|
def from_dict(cls, data: dict[str, Any]) -> Self:
|
|
332
425
|
"""Deserialize document from dictionary."""
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""Task-specific document base class."""
|
|
2
|
+
|
|
3
|
+
from typing import Literal, final
|
|
4
|
+
|
|
5
|
+
from .document import Document
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@final
|
|
9
|
+
class TemporaryDocument(Document):
|
|
10
|
+
"""
|
|
11
|
+
Temporary document is a document that is not persisted in any case.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
def get_base_type(self) -> Literal["temporary"]:
|
|
15
|
+
"""Get the document type."""
|
|
16
|
+
return "temporary"
|
ai_pipeline_core/flow/config.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""Flow configuration base class."""
|
|
2
2
|
|
|
3
3
|
from abc import ABC
|
|
4
|
-
from typing import ClassVar
|
|
4
|
+
from typing import Any, ClassVar
|
|
5
5
|
|
|
6
6
|
from ai_pipeline_core.documents import DocumentList, FlowDocument
|
|
7
7
|
|
|
@@ -14,7 +14,7 @@ class FlowConfig(ABC):
|
|
|
14
14
|
INPUT_DOCUMENT_TYPES: ClassVar[list[type[FlowDocument]]]
|
|
15
15
|
OUTPUT_DOCUMENT_TYPE: ClassVar[type[FlowDocument]]
|
|
16
16
|
|
|
17
|
-
def __init_subclass__(cls, **kwargs):
|
|
17
|
+
def __init_subclass__(cls, **kwargs: Any):
|
|
18
18
|
"""Validate that OUTPUT_DOCUMENT_TYPE is not in INPUT_DOCUMENT_TYPES."""
|
|
19
19
|
super().__init_subclass__(**kwargs)
|
|
20
20
|
|
|
@@ -85,3 +85,21 @@ class FlowConfig(ABC):
|
|
|
85
85
|
"Documents must be of the correct type. "
|
|
86
86
|
f"Expected: {output_document_class.__name__}, Got invalid: {invalid}"
|
|
87
87
|
)
|
|
88
|
+
|
|
89
|
+
@classmethod
|
|
90
|
+
def create_and_validate_output(
|
|
91
|
+
cls, output: FlowDocument | list[FlowDocument] | DocumentList
|
|
92
|
+
) -> DocumentList:
|
|
93
|
+
"""
|
|
94
|
+
Create the output documents for the flow.
|
|
95
|
+
"""
|
|
96
|
+
documents: DocumentList
|
|
97
|
+
if isinstance(output, FlowDocument):
|
|
98
|
+
documents = DocumentList([output])
|
|
99
|
+
elif isinstance(output, DocumentList):
|
|
100
|
+
documents = output
|
|
101
|
+
else:
|
|
102
|
+
assert isinstance(output, list)
|
|
103
|
+
documents = DocumentList(output) # type: ignore[arg-type]
|
|
104
|
+
cls.validate_output_documents(documents)
|
|
105
|
+
return documents
|
|
@@ -5,7 +5,7 @@ import jinja2
|
|
|
5
5
|
|
|
6
6
|
from ai_pipeline_core.logging import get_pipeline_logger
|
|
7
7
|
|
|
8
|
-
from .exceptions import PromptNotFoundError, PromptRenderError
|
|
8
|
+
from .exceptions import PromptError, PromptNotFoundError, PromptRenderError
|
|
9
9
|
|
|
10
10
|
logger = get_pipeline_logger(__name__)
|
|
11
11
|
|
|
@@ -28,6 +28,12 @@ class PromptManager:
|
|
|
28
28
|
|
|
29
29
|
# Start from the directory containing the calling file
|
|
30
30
|
current_path = Path(current_dir).resolve()
|
|
31
|
+
if not current_path.exists():
|
|
32
|
+
raise PromptError(
|
|
33
|
+
f"PromptManager expected __file__ (a valid file path), "
|
|
34
|
+
f"but got {current_dir!r}. Did you pass __name__ instead?"
|
|
35
|
+
)
|
|
36
|
+
|
|
31
37
|
if current_path.is_file():
|
|
32
38
|
current_path = current_path.parent
|
|
33
39
|
|
|
@@ -8,6 +8,7 @@ from pathlib import Path
|
|
|
8
8
|
from typing import Callable, Type, TypeVar, cast
|
|
9
9
|
|
|
10
10
|
from lmnr import Laminar
|
|
11
|
+
from pydantic import ValidationError
|
|
11
12
|
from pydantic_settings import CliPositionalArg, SettingsConfigDict
|
|
12
13
|
|
|
13
14
|
from ai_pipeline_core.documents import DocumentList
|
|
@@ -76,7 +77,49 @@ def run_cli(
|
|
|
76
77
|
|
|
77
78
|
model_config = SettingsConfigDict(frozen=True, extra="ignore")
|
|
78
79
|
|
|
79
|
-
|
|
80
|
+
try:
|
|
81
|
+
opts = cast(FlowOptions, _RunnerOptions()) # type: ignore[reportCallIssue]
|
|
82
|
+
except ValidationError as e:
|
|
83
|
+
print("\nError: Invalid command line arguments\n", file=sys.stderr)
|
|
84
|
+
for error in e.errors():
|
|
85
|
+
field = " -> ".join(str(loc) for loc in error["loc"])
|
|
86
|
+
msg = error["msg"]
|
|
87
|
+
value = error.get("input", "")
|
|
88
|
+
|
|
89
|
+
# Format the field name nicely (convert from snake_case to kebab-case for CLI)
|
|
90
|
+
cli_field = field.replace("_", "-")
|
|
91
|
+
|
|
92
|
+
print(f" --{cli_field}: {msg}", file=sys.stderr)
|
|
93
|
+
if value:
|
|
94
|
+
print(f" Provided value: '{value}'", file=sys.stderr)
|
|
95
|
+
|
|
96
|
+
# Add helpful hints for common errors
|
|
97
|
+
if error["type"] == "float_parsing":
|
|
98
|
+
print(" Hint: Please provide a valid number (e.g., 0.7)", file=sys.stderr)
|
|
99
|
+
elif error["type"] == "int_parsing":
|
|
100
|
+
print(" Hint: Please provide a valid integer (e.g., 10)", file=sys.stderr)
|
|
101
|
+
elif error["type"] == "literal_error":
|
|
102
|
+
ctx = error.get("ctx", {})
|
|
103
|
+
expected = ctx.get("expected", "valid options")
|
|
104
|
+
print(f" Hint: Valid options are: {expected}", file=sys.stderr)
|
|
105
|
+
elif error["type"] in [
|
|
106
|
+
"less_than_equal",
|
|
107
|
+
"greater_than_equal",
|
|
108
|
+
"less_than",
|
|
109
|
+
"greater_than",
|
|
110
|
+
]:
|
|
111
|
+
ctx = error.get("ctx", {})
|
|
112
|
+
if "le" in ctx:
|
|
113
|
+
print(f" Hint: Value must be ≤ {ctx['le']}", file=sys.stderr)
|
|
114
|
+
elif "ge" in ctx:
|
|
115
|
+
print(f" Hint: Value must be ≥ {ctx['ge']}", file=sys.stderr)
|
|
116
|
+
elif "lt" in ctx:
|
|
117
|
+
print(f" Hint: Value must be < {ctx['lt']}", file=sys.stderr)
|
|
118
|
+
elif "gt" in ctx:
|
|
119
|
+
print(f" Hint: Value must be > {ctx['gt']}", file=sys.stderr)
|
|
120
|
+
|
|
121
|
+
print("\nRun with --help to see all available options\n", file=sys.stderr)
|
|
122
|
+
sys.exit(1)
|
|
80
123
|
|
|
81
124
|
wd: Path = cast(Path, getattr(opts, "working_directory"))
|
|
82
125
|
wd.mkdir(parents=True, exist_ok=True)
|
|
@@ -103,17 +146,17 @@ def run_cli(
|
|
|
103
146
|
# Setup context stack with optional test harness and tracing
|
|
104
147
|
|
|
105
148
|
with ExitStack() as stack:
|
|
106
|
-
if not settings.prefect_api_key and not _running_under_pytest():
|
|
107
|
-
stack.enter_context(prefect_test_harness())
|
|
108
|
-
stack.enter_context(disable_run_logger())
|
|
109
|
-
|
|
110
149
|
if trace_name:
|
|
111
150
|
stack.enter_context(
|
|
112
|
-
Laminar.
|
|
151
|
+
Laminar.start_as_current_span(
|
|
113
152
|
name=f"{trace_name}-{project_name}", input=[opts.model_dump_json()]
|
|
114
153
|
)
|
|
115
154
|
)
|
|
116
155
|
|
|
156
|
+
if not settings.prefect_api_key and not _running_under_pytest():
|
|
157
|
+
stack.enter_context(prefect_test_harness())
|
|
158
|
+
stack.enter_context(disable_run_logger())
|
|
159
|
+
|
|
117
160
|
asyncio.run(
|
|
118
161
|
run_pipelines(
|
|
119
162
|
project_name=project_name,
|
|
@@ -77,7 +77,9 @@ async def run_pipeline(
|
|
|
77
77
|
) -> DocumentList:
|
|
78
78
|
"""Execute a single pipeline flow."""
|
|
79
79
|
if flow_name is None:
|
|
80
|
-
|
|
80
|
+
# For Prefect Flow objects, use their name attribute
|
|
81
|
+
# For regular functions, fall back to __name__
|
|
82
|
+
flow_name = getattr(flow_func, "name", None) or getattr(flow_func, "__name__", "flow")
|
|
81
83
|
|
|
82
84
|
logger.info(f"Running Flow: {flow_name}")
|
|
83
85
|
|
|
@@ -126,7 +128,10 @@ async def run_pipelines(
|
|
|
126
128
|
for i in range(start_index, end_index + 1):
|
|
127
129
|
flow_func = flows[i]
|
|
128
130
|
config = flow_configs[i]
|
|
129
|
-
|
|
131
|
+
# For Prefect Flow objects, use their name attribute; for functions, use __name__
|
|
132
|
+
flow_name = getattr(flow_func, "name", None) or getattr(
|
|
133
|
+
flow_func, "__name__", f"flow_{i + 1}"
|
|
134
|
+
)
|
|
130
135
|
|
|
131
136
|
logger.info(f"--- [Step {i + 1}/{num_steps}] Running Flow: {flow_name} ---")
|
|
132
137
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ai-pipeline-core
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.10
|
|
4
4
|
Summary: Core utilities for AI-powered processing pipelines using prefect
|
|
5
5
|
Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
|
|
6
6
|
Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
|
|
@@ -528,29 +528,9 @@ Built with:
|
|
|
528
528
|
- [LiteLLM](https://litellm.ai/) - LLM proxy
|
|
529
529
|
- [Pydantic](https://pydantic-docs.helpmanual.io/) - Data validation
|
|
530
530
|
|
|
531
|
-
## What's New in v0.1.8
|
|
532
|
-
|
|
533
|
-
### Breaking Changes
|
|
534
|
-
- **Async-Only Pipeline Decorators**: `@pipeline_flow` and `@pipeline_task` now require `async def` functions (raises TypeError for sync)
|
|
535
|
-
- **Document Class Name Validation**: Document subclasses cannot start with "Test" prefix (pytest conflict prevention)
|
|
536
|
-
- **FlowConfig Validation**: OUTPUT_DOCUMENT_TYPE cannot be in INPUT_DOCUMENT_TYPES (prevents circular dependencies)
|
|
537
|
-
- **Temperature Field**: Added optional `temperature` field to `ModelOptions` for explicit control
|
|
538
|
-
|
|
539
|
-
### Major Improvements
|
|
540
|
-
- **Pipeline Module Refactoring**: Reduced from ~400 to ~150 lines with cleaner Protocol-based typing
|
|
541
|
-
- **Enhanced Validation**: FlowConfig and Document classes now validate at definition time
|
|
542
|
-
- **Better CLI Support**: Auto-displays help when no arguments provided, improved context management
|
|
543
|
-
- **Test Suite Updates**: All tests updated to use async/await consistently
|
|
544
|
-
|
|
545
|
-
### Documentation Updates
|
|
546
|
-
- Added Document naming rules to CLAUDE.md
|
|
547
|
-
- Added FlowConfig validation rules
|
|
548
|
-
- Added code elegance principles section
|
|
549
|
-
- Updated guide_for_ai.md to API reference format
|
|
550
|
-
|
|
551
531
|
## Stability Notice
|
|
552
532
|
|
|
553
|
-
**Current Version**: 0.1.
|
|
533
|
+
**Current Version**: 0.1.10
|
|
554
534
|
**Status**: Internal Preview
|
|
555
535
|
**API Stability**: Unstable - Breaking changes expected
|
|
556
536
|
**Recommended Use**: Learning and reference only
|
|
@@ -1,20 +1,21 @@
|
|
|
1
|
-
ai_pipeline_core/__init__.py,sha256=
|
|
1
|
+
ai_pipeline_core/__init__.py,sha256=qKdAEzvFaIG3FgMh-90X825rcL4vqshSM2xI_WIeiq0,1711
|
|
2
2
|
ai_pipeline_core/exceptions.py,sha256=_vW0Hbw2LGb5tcVvH0YzTKMff7QOPfCRr3w-w_zPyCE,968
|
|
3
3
|
ai_pipeline_core/pipeline.py,sha256=f-pEDwrEhMLfcSEvPP2b74xb0WzFI05IQcl-NDFzH7w,16565
|
|
4
4
|
ai_pipeline_core/prefect.py,sha256=VHYkkRcUmSpdwyWosOOxuExVCncIQgT6MypqGdjcYnM,241
|
|
5
|
-
ai_pipeline_core/prompt_manager.py,sha256=
|
|
5
|
+
ai_pipeline_core/prompt_manager.py,sha256=e6i9xOpgmyj-0FoJQg4Y4ZgnYSOUbCARU4UYNk_rT-0,4938
|
|
6
6
|
ai_pipeline_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
7
|
ai_pipeline_core/settings.py,sha256=Zl2BPa6IHzh-B5V7cg5mtySr1dhWZQYYKxXz3BwrHlQ,615
|
|
8
8
|
ai_pipeline_core/tracing.py,sha256=T-3fTyA37TejXxotkVzTNqL2a5nOfZ0bcHg9TClLvmg,9471
|
|
9
|
-
ai_pipeline_core/documents/__init__.py,sha256=
|
|
10
|
-
ai_pipeline_core/documents/document.py,sha256=
|
|
9
|
+
ai_pipeline_core/documents/__init__.py,sha256=a5fdAZxlIX5j-BEcawFnBN3jwyR1nHuvy5iw5G1MwC8,415
|
|
10
|
+
ai_pipeline_core/documents/document.py,sha256=3X1u78If77UqoHgl1uTiwqCc5QdQ9kbGmkL6LFoJVUM,16405
|
|
11
11
|
ai_pipeline_core/documents/document_list.py,sha256=HOG_uZDazA9CJB7Lr_tNcDFzb5Ff9RUt0ELWQK_eYNM,4940
|
|
12
12
|
ai_pipeline_core/documents/flow_document.py,sha256=qsV-2JYOMhkvAj7lW54ZNH_4QUclld9h06CoU59tWww,815
|
|
13
13
|
ai_pipeline_core/documents/mime_type.py,sha256=sBhNRoBJQ35JoHWhJzBGpp00WFDfMdEX0JZKKkR7QH0,3371
|
|
14
14
|
ai_pipeline_core/documents/task_document.py,sha256=WjHqtl1d60XFBBqewNRdz1OqBErGI0jRx15oQYCTHo8,907
|
|
15
|
+
ai_pipeline_core/documents/temporary_document.py,sha256=qaJYNza9EyvgBh7uch8Oic6DDk3QL0OEXGF-zuvhK_4,358
|
|
15
16
|
ai_pipeline_core/documents/utils.py,sha256=BdE4taSl1vrBhxnFbOP5nDA7lXIcvY__AMRTHoaNb5M,2764
|
|
16
17
|
ai_pipeline_core/flow/__init__.py,sha256=54DRfZnjXQVrimgtKEVEm5u5ErImx31cjK2PpBvHjU4,116
|
|
17
|
-
ai_pipeline_core/flow/config.py,sha256=
|
|
18
|
+
ai_pipeline_core/flow/config.py,sha256=j2FP56gTNqNrxrtUnkfn-mUnIs1Cayy2ge4TzoDqj8E,3856
|
|
18
19
|
ai_pipeline_core/flow/options.py,sha256=WygJEwjqOa14l23a_Hp36hJX-WgxHMq-YzSieC31Z4Y,701
|
|
19
20
|
ai_pipeline_core/llm/__init__.py,sha256=3XVK-bSJdOe0s6KmmO7PDbsXHfjlcZEG1MVBmaz3EeU,442
|
|
20
21
|
ai_pipeline_core/llm/ai_messages.py,sha256=DwJJe05BtYdnMZeHbBbyEbDCqrW63SRvprxptoJUCn4,4586
|
|
@@ -27,9 +28,9 @@ ai_pipeline_core/logging/logging.yml,sha256=YTW48keO_K5bkkb-KXGM7ZuaYKiquLsjsURe
|
|
|
27
28
|
ai_pipeline_core/logging/logging_config.py,sha256=6MBz9nnVNvqiLDoyy9-R3sWkn6927Re5hdz4hwTptpI,4903
|
|
28
29
|
ai_pipeline_core/logging/logging_mixin.py,sha256=RDaR2ju2-vKTJRzXGa0DquGPT8_UxahWjvKJnaD0IV8,7810
|
|
29
30
|
ai_pipeline_core/simple_runner/__init__.py,sha256=OPbTCZvqpnYdwi1Knnkj-MpmD0Nvtg5O7UwIdAKz_AY,384
|
|
30
|
-
ai_pipeline_core/simple_runner/cli.py,sha256=
|
|
31
|
-
ai_pipeline_core/simple_runner/simple_runner.py,sha256=
|
|
32
|
-
ai_pipeline_core-0.1.
|
|
33
|
-
ai_pipeline_core-0.1.
|
|
34
|
-
ai_pipeline_core-0.1.
|
|
35
|
-
ai_pipeline_core-0.1.
|
|
31
|
+
ai_pipeline_core/simple_runner/cli.py,sha256=vRB10SiBFCHD9eqjqSDNZkXYrL3oIQsxNUrHi6L7hu4,6310
|
|
32
|
+
ai_pipeline_core/simple_runner/simple_runner.py,sha256=TVFFVWAt2pxIJdMqVgxXJ1wHn1Xf_-N1fSMVNQH-Hqo,5253
|
|
33
|
+
ai_pipeline_core-0.1.10.dist-info/METADATA,sha256=HGRwvKh5XDGW7ufmmZ3lIwqVnZVr3eb8hLxYFCXhEbc,18010
|
|
34
|
+
ai_pipeline_core-0.1.10.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
35
|
+
ai_pipeline_core-0.1.10.dist-info/licenses/LICENSE,sha256=kKj8mfbdWwkyG3U6n7ztB3bAZlEwShTkAsvaY657i3I,1074
|
|
36
|
+
ai_pipeline_core-0.1.10.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|