ai-pipeline-core 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +84 -4
- ai_pipeline_core/documents/__init__.py +9 -0
- ai_pipeline_core/documents/document.py +1044 -152
- ai_pipeline_core/documents/document_list.py +147 -38
- ai_pipeline_core/documents/flow_document.py +112 -11
- ai_pipeline_core/documents/mime_type.py +173 -15
- ai_pipeline_core/documents/task_document.py +117 -12
- ai_pipeline_core/documents/temporary_document.py +84 -5
- ai_pipeline_core/documents/utils.py +41 -9
- ai_pipeline_core/exceptions.py +47 -11
- ai_pipeline_core/flow/__init__.py +2 -0
- ai_pipeline_core/flow/config.py +236 -27
- ai_pipeline_core/flow/options.py +50 -1
- ai_pipeline_core/llm/__init__.py +6 -0
- ai_pipeline_core/llm/ai_messages.py +125 -27
- ai_pipeline_core/llm/client.py +278 -26
- ai_pipeline_core/llm/model_options.py +130 -1
- ai_pipeline_core/llm/model_response.py +239 -35
- ai_pipeline_core/llm/model_types.py +67 -0
- ai_pipeline_core/logging/__init__.py +13 -0
- ai_pipeline_core/logging/logging_config.py +72 -20
- ai_pipeline_core/logging/logging_mixin.py +38 -32
- ai_pipeline_core/pipeline.py +363 -60
- ai_pipeline_core/prefect.py +48 -1
- ai_pipeline_core/prompt_manager.py +209 -24
- ai_pipeline_core/settings.py +108 -4
- ai_pipeline_core/simple_runner/__init__.py +5 -0
- ai_pipeline_core/simple_runner/cli.py +96 -11
- ai_pipeline_core/simple_runner/simple_runner.py +237 -4
- ai_pipeline_core/tracing.py +253 -30
- ai_pipeline_core-0.1.12.dist-info/METADATA +450 -0
- ai_pipeline_core-0.1.12.dist-info/RECORD +36 -0
- ai_pipeline_core-0.1.10.dist-info/METADATA +0 -538
- ai_pipeline_core-0.1.10.dist-info/RECORD +0 -36
- {ai_pipeline_core-0.1.10.dist-info → ai_pipeline_core-0.1.12.dist-info}/WHEEL +0 -0
- {ai_pipeline_core-0.1.10.dist-info → ai_pipeline_core-0.1.12.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,28 +1,133 @@
|
|
|
1
|
-
"""Task-specific document base class.
|
|
1
|
+
"""Task-specific document base class for temporary pipeline data.
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
@public
|
|
4
|
+
|
|
5
|
+
This module provides the TaskDocument abstract base class for documents
|
|
6
|
+
that exist only during Prefect task execution and are not persisted.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from typing import Literal, final
|
|
4
10
|
|
|
5
11
|
from .document import Document
|
|
6
12
|
|
|
7
13
|
|
|
8
14
|
class TaskDocument(Document):
|
|
9
|
-
"""
|
|
10
|
-
|
|
15
|
+
"""Abstract base class for temporary documents within task execution.
|
|
16
|
+
|
|
17
|
+
@public
|
|
18
|
+
|
|
19
|
+
TaskDocument is used for intermediate data that exists only during
|
|
20
|
+
the execution of a Prefect task and is not persisted to disk. These
|
|
21
|
+
documents are ideal for temporary processing results, transformations,
|
|
22
|
+
and data that doesn't need to survive beyond the current task.
|
|
23
|
+
|
|
24
|
+
Key characteristics:
|
|
25
|
+
- Not persisted to file system
|
|
26
|
+
- Exists only during task execution
|
|
27
|
+
- Garbage collected after task completes
|
|
28
|
+
- Used for intermediate processing results
|
|
29
|
+
- Reduces persistent I/O for temporary data
|
|
30
|
+
|
|
31
|
+
Creating TaskDocuments:
|
|
32
|
+
**Use the `create` classmethod** for most use cases. It handles automatic
|
|
33
|
+
conversion of various content types. Only use __init__ when you have bytes.
|
|
34
|
+
|
|
35
|
+
>>> from enum import StrEnum
|
|
36
|
+
>>>
|
|
37
|
+
>>> # Simple task document:
|
|
38
|
+
>>> class TempDoc(TaskDocument):
|
|
39
|
+
... pass
|
|
40
|
+
>>>
|
|
41
|
+
>>> # With restricted files:
|
|
42
|
+
>>> class CacheDoc(TaskDocument):
|
|
43
|
+
... class FILES(StrEnum):
|
|
44
|
+
... CACHE = "cache.json"
|
|
45
|
+
... INDEX = "index.dat"
|
|
46
|
+
>>>
|
|
47
|
+
>>> # RECOMMENDED - automatic conversion:
|
|
48
|
+
>>> doc = TempDoc.create(name="temp.json", content={"status": "processing"})
|
|
49
|
+
>>> doc = CacheDoc.create(name="cache.json", content={"data": [1, 2, 3]})
|
|
50
|
+
|
|
51
|
+
Use Cases:
|
|
52
|
+
- Intermediate transformation results
|
|
53
|
+
- Temporary buffers during processing
|
|
54
|
+
- Task-local cache data
|
|
55
|
+
- Processing status documents
|
|
11
56
|
|
|
12
|
-
|
|
13
|
-
|
|
57
|
+
Note:
|
|
58
|
+
- Cannot instantiate TaskDocument directly - must subclass
|
|
59
|
+
- Not saved by simple_runner utilities
|
|
60
|
+
- Reduces I/O overhead for temporary data
|
|
61
|
+
- No additional abstract methods to implement
|
|
14
62
|
|
|
15
|
-
|
|
16
|
-
|
|
63
|
+
See Also:
|
|
64
|
+
FlowDocument: For documents that persist across flow runs
|
|
65
|
+
TemporaryDocument: Alternative for non-persistent documents
|
|
17
66
|
"""
|
|
18
67
|
|
|
19
|
-
def __init__(
|
|
20
|
-
|
|
68
|
+
def __init__(
|
|
69
|
+
self,
|
|
70
|
+
*,
|
|
71
|
+
name: str,
|
|
72
|
+
content: bytes,
|
|
73
|
+
description: str | None = None,
|
|
74
|
+
) -> None:
|
|
75
|
+
"""Initialize a TaskDocument with raw bytes content.
|
|
76
|
+
|
|
77
|
+
Important:
|
|
78
|
+
**Most users should use the `create` classmethod instead of __init__.**
|
|
79
|
+
The create method provides automatic content conversion for various types
|
|
80
|
+
(str, dict, list, Pydantic models) while __init__ only accepts bytes.
|
|
81
|
+
|
|
82
|
+
Prevents direct instantiation of the abstract TaskDocument class.
|
|
83
|
+
TaskDocument must be subclassed for specific temporary document types.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
name: Document filename (required, keyword-only)
|
|
87
|
+
content: Document content as raw bytes (required, keyword-only)
|
|
88
|
+
description: Optional human-readable description (keyword-only)
|
|
89
|
+
|
|
90
|
+
Raises:
|
|
91
|
+
TypeError: If attempting to instantiate TaskDocument directly
|
|
92
|
+
instead of using a concrete subclass.
|
|
93
|
+
|
|
94
|
+
Example:
|
|
95
|
+
>>> from enum import StrEnum
|
|
96
|
+
>>>
|
|
97
|
+
>>> # Simple subclass:
|
|
98
|
+
>>> class MyTaskDoc(TaskDocument):
|
|
99
|
+
... pass
|
|
100
|
+
>>>
|
|
101
|
+
>>> # With FILES restriction:
|
|
102
|
+
>>> class TempProcessDoc(TaskDocument):
|
|
103
|
+
... class FILES(StrEnum):
|
|
104
|
+
... BUFFER = "buffer.bin"
|
|
105
|
+
... STATUS = "status.json"
|
|
106
|
+
>>>
|
|
107
|
+
>>> # Direct constructor - only for bytes:
|
|
108
|
+
>>> doc = MyTaskDoc(name="temp.bin", content=b"raw data")
|
|
109
|
+
>>>
|
|
110
|
+
>>> # RECOMMENDED - use create for automatic conversion:
|
|
111
|
+
>>> doc = TempProcessDoc.create(name="status.json", content={"percent": 50})
|
|
112
|
+
>>> # This would raise DocumentNameError:
|
|
113
|
+
>>> # doc = TempProcessDoc.create(name="other.json", content={})
|
|
114
|
+
"""
|
|
21
115
|
if type(self) is TaskDocument:
|
|
22
116
|
raise TypeError("Cannot instantiate abstract TaskDocument class directly")
|
|
23
|
-
super().__init__(
|
|
117
|
+
super().__init__(name=name, content=content, description=description)
|
|
24
118
|
|
|
25
119
|
@final
|
|
26
120
|
def get_base_type(self) -> Literal["task"]:
|
|
27
|
-
"""
|
|
121
|
+
"""Return the base type identifier for task documents.
|
|
122
|
+
|
|
123
|
+
This method is final and cannot be overridden by subclasses.
|
|
124
|
+
It identifies this document as a task-scoped temporary document.
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
"task" - Indicates this document is temporary within task execution.
|
|
128
|
+
|
|
129
|
+
Note:
|
|
130
|
+
This determines that the document will not be persisted and
|
|
131
|
+
exists only during task execution.
|
|
132
|
+
"""
|
|
28
133
|
return "task"
|
|
@@ -1,16 +1,95 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Temporary document implementation for non-persistent data.
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
@public
|
|
4
|
+
|
|
5
|
+
This module provides the TemporaryDocument class for documents that
|
|
6
|
+
are never persisted, regardless of context.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from typing import Any, Literal, final
|
|
4
10
|
|
|
5
11
|
from .document import Document
|
|
6
12
|
|
|
7
13
|
|
|
8
14
|
@final
|
|
9
15
|
class TemporaryDocument(Document):
|
|
16
|
+
r"""Concrete document class for data that is never persisted.
|
|
17
|
+
|
|
18
|
+
@public
|
|
19
|
+
|
|
20
|
+
TemporaryDocument is a final (non-subclassable) document type for
|
|
21
|
+
data that should never be saved to disk, regardless of whether it's
|
|
22
|
+
used in a flow or task context. Unlike FlowDocument and TaskDocument
|
|
23
|
+
which are abstract, TemporaryDocument can be instantiated directly.
|
|
24
|
+
|
|
25
|
+
Key characteristics:
|
|
26
|
+
- Never persisted to file system
|
|
27
|
+
- Can be instantiated directly (not abstract)
|
|
28
|
+
- Cannot be subclassed (annotated with Python's @final decorator in code)
|
|
29
|
+
- Useful for transient data like API responses or intermediate calculations
|
|
30
|
+
- Ignored by simple_runner save operations
|
|
31
|
+
|
|
32
|
+
Creating TemporaryDocuments:
|
|
33
|
+
**Use the `create` classmethod** for most use cases. It handles automatic
|
|
34
|
+
conversion of various content types. Only use __init__ when you have bytes.
|
|
35
|
+
|
|
36
|
+
>>> # RECOMMENDED - automatic conversion:
|
|
37
|
+
>>> doc = TemporaryDocument.create(
|
|
38
|
+
... name="api_response.json",
|
|
39
|
+
... content={"status": "ok", "data": [1, 2, 3]}
|
|
40
|
+
... )
|
|
41
|
+
>>> doc = TemporaryDocument.create(
|
|
42
|
+
... name="credentials.txt",
|
|
43
|
+
... content="secret_token_xyz"
|
|
44
|
+
... )
|
|
45
|
+
>>>
|
|
46
|
+
>>> # Direct constructor - only for bytes:
|
|
47
|
+
>>> doc = TemporaryDocument(
|
|
48
|
+
... name="binary.dat",
|
|
49
|
+
... content=b"\x00\x01\x02"
|
|
50
|
+
... )
|
|
51
|
+
>>>
|
|
52
|
+
>>> doc.is_temporary # Always True
|
|
53
|
+
|
|
54
|
+
Use Cases:
|
|
55
|
+
- API responses that shouldn't be cached
|
|
56
|
+
- Sensitive credentials or tokens
|
|
57
|
+
- Intermediate calculations
|
|
58
|
+
- Temporary transformations
|
|
59
|
+
- Data explicitly marked as non-persistent
|
|
60
|
+
|
|
61
|
+
Note:
|
|
62
|
+
- This is a final class and cannot be subclassed
|
|
63
|
+
- Use when you explicitly want to prevent persistence
|
|
64
|
+
- Useful for sensitive data that shouldn't be written to disk
|
|
65
|
+
|
|
66
|
+
See Also:
|
|
67
|
+
FlowDocument: For documents that persist across flow runs
|
|
68
|
+
TaskDocument: For documents temporary within task execution
|
|
10
69
|
"""
|
|
11
|
-
|
|
12
|
-
|
|
70
|
+
|
|
71
|
+
def __init_subclass__(cls, **kwargs: Any) -> None:
|
|
72
|
+
"""Disallow subclassing.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
**kwargs: Additional keyword arguments (ignored).
|
|
76
|
+
|
|
77
|
+
Raises:
|
|
78
|
+
TypeError: Always raised to prevent subclassing of `TemporaryDocument`.
|
|
79
|
+
"""
|
|
80
|
+
raise TypeError("TemporaryDocument is final and cannot be subclassed")
|
|
13
81
|
|
|
14
82
|
def get_base_type(self) -> Literal["temporary"]:
|
|
15
|
-
"""
|
|
83
|
+
"""Return the base type identifier for temporary documents.
|
|
84
|
+
|
|
85
|
+
Identifies this document as temporary, ensuring it will
|
|
86
|
+
never be persisted by the pipeline system.
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
"temporary" - Indicates this document is never persisted.
|
|
90
|
+
|
|
91
|
+
Note:
|
|
92
|
+
Documents with this type are explicitly excluded from
|
|
93
|
+
all persistence operations in the pipeline system.
|
|
94
|
+
"""
|
|
16
95
|
return "temporary"
|
|
@@ -1,12 +1,26 @@
|
|
|
1
|
+
"""Utility functions for document handling.
|
|
2
|
+
|
|
3
|
+
Provides helper functions for URL sanitization, naming conventions,
|
|
4
|
+
and canonical key generation used throughout the document system.
|
|
5
|
+
"""
|
|
6
|
+
|
|
1
7
|
import re
|
|
2
8
|
from typing import Any, Iterable, Type
|
|
3
9
|
from urllib.parse import urlparse
|
|
4
10
|
|
|
5
11
|
|
|
6
12
|
def sanitize_url(url: str) -> str:
|
|
7
|
-
"""
|
|
8
|
-
|
|
13
|
+
"""Sanitize URL or query string for use in filenames.
|
|
14
|
+
|
|
15
|
+
@public
|
|
16
|
+
|
|
9
17
|
Removes or replaces characters that are invalid in filenames.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
url: The URL or query string to sanitize.
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
A sanitized string safe for use as a filename.
|
|
10
24
|
"""
|
|
11
25
|
# Remove protocol if it's a URL
|
|
12
26
|
if url.startswith(("http://", "https://")):
|
|
@@ -35,7 +49,14 @@ def sanitize_url(url: str) -> str:
|
|
|
35
49
|
|
|
36
50
|
|
|
37
51
|
def camel_to_snake(name: str) -> str:
|
|
38
|
-
"""Convert CamelCase (incl. acronyms) to snake_case.
|
|
52
|
+
"""Convert CamelCase (incl. acronyms) to snake_case.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
name: The CamelCase string to convert.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
The converted snake_case string.
|
|
59
|
+
"""
|
|
39
60
|
s1 = re.sub(r"(.)([A-Z][a-z0-9]+)", r"\1_\2", name)
|
|
40
61
|
s2 = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", s1)
|
|
41
62
|
return s2.replace("__", "_").strip("_").lower()
|
|
@@ -47,17 +68,28 @@ def canonical_name_key(
|
|
|
47
68
|
max_parent_suffixes: int = 3,
|
|
48
69
|
extra_suffixes: Iterable[str] = (),
|
|
49
70
|
) -> str:
|
|
50
|
-
"""
|
|
51
|
-
|
|
71
|
+
"""Produce a canonical snake_case key from a class or name.
|
|
72
|
+
|
|
73
|
+
@public
|
|
74
|
+
|
|
75
|
+
Process:
|
|
52
76
|
1) Starting with the class name (or given string),
|
|
53
77
|
2) Stripping any trailing parent class names (up to `max_parent_suffixes` from the MRO),
|
|
54
78
|
3) Stripping any `extra_suffixes`,
|
|
55
79
|
4) Converting to snake_case.
|
|
56
80
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
81
|
+
Args:
|
|
82
|
+
obj_or_name: A class or string to convert.
|
|
83
|
+
max_parent_suffixes: Maximum number of parent classes to consider for suffix removal.
|
|
84
|
+
extra_suffixes: Additional suffixes to strip.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
The canonical snake_case name.
|
|
88
|
+
|
|
89
|
+
Examples:
|
|
90
|
+
FinalReportDocument(WorkflowDocument -> Document) -> 'final_report'
|
|
91
|
+
FooWorkflowDocument(WorkflowDocument -> Document) -> 'foo'
|
|
92
|
+
BarFlow(Config -> Base -> Flow) -> 'bar'
|
|
61
93
|
"""
|
|
62
94
|
name = obj_or_name.__name__ if isinstance(obj_or_name, type) else str(obj_or_name)
|
|
63
95
|
|
ai_pipeline_core/exceptions.py
CHANGED
|
@@ -1,61 +1,97 @@
|
|
|
1
|
-
"""Exception hierarchy for AI Pipeline Core.
|
|
1
|
+
"""Exception hierarchy for AI Pipeline Core.
|
|
2
|
+
|
|
3
|
+
@public
|
|
4
|
+
|
|
5
|
+
This module defines the exception hierarchy used throughout the AI Pipeline Core library.
|
|
6
|
+
All exceptions inherit from PipelineCoreError, providing a consistent error handling interface.
|
|
7
|
+
"""
|
|
2
8
|
|
|
3
9
|
|
|
4
10
|
class PipelineCoreError(Exception):
|
|
5
|
-
"""Base exception for all
|
|
11
|
+
"""Base exception for all AI Pipeline Core errors.
|
|
12
|
+
|
|
13
|
+
@public
|
|
14
|
+
"""
|
|
6
15
|
|
|
7
16
|
pass
|
|
8
17
|
|
|
9
18
|
|
|
10
19
|
class DocumentError(PipelineCoreError):
|
|
11
|
-
"""
|
|
20
|
+
"""Base exception for document-related errors.
|
|
21
|
+
|
|
22
|
+
@public
|
|
23
|
+
"""
|
|
12
24
|
|
|
13
25
|
pass
|
|
14
26
|
|
|
15
27
|
|
|
16
28
|
class DocumentValidationError(DocumentError):
|
|
17
|
-
"""
|
|
29
|
+
"""Raised when document validation fails.
|
|
30
|
+
|
|
31
|
+
@public
|
|
32
|
+
"""
|
|
18
33
|
|
|
19
34
|
pass
|
|
20
35
|
|
|
21
36
|
|
|
22
37
|
class DocumentSizeError(DocumentValidationError):
|
|
23
|
-
"""
|
|
38
|
+
"""Raised when document content exceeds MAX_CONTENT_SIZE limit.
|
|
39
|
+
|
|
40
|
+
@public
|
|
41
|
+
"""
|
|
24
42
|
|
|
25
43
|
pass
|
|
26
44
|
|
|
27
45
|
|
|
28
46
|
class DocumentNameError(DocumentValidationError):
|
|
29
|
-
"""
|
|
47
|
+
"""Raised when document name contains invalid characters or patterns.
|
|
48
|
+
|
|
49
|
+
@public
|
|
50
|
+
"""
|
|
30
51
|
|
|
31
52
|
pass
|
|
32
53
|
|
|
33
54
|
|
|
34
55
|
class LLMError(PipelineCoreError):
|
|
35
|
-
"""LLM
|
|
56
|
+
"""Raised when LLM generation fails after all retries.
|
|
57
|
+
|
|
58
|
+
@public
|
|
59
|
+
"""
|
|
36
60
|
|
|
37
61
|
pass
|
|
38
62
|
|
|
39
63
|
|
|
40
64
|
class PromptError(PipelineCoreError):
|
|
41
|
-
"""
|
|
65
|
+
"""Base exception for prompt template errors.
|
|
66
|
+
|
|
67
|
+
@public
|
|
68
|
+
"""
|
|
42
69
|
|
|
43
70
|
pass
|
|
44
71
|
|
|
45
72
|
|
|
46
73
|
class PromptRenderError(PromptError):
|
|
47
|
-
"""
|
|
74
|
+
"""Raised when Jinja2 template rendering fails.
|
|
75
|
+
|
|
76
|
+
@public
|
|
77
|
+
"""
|
|
48
78
|
|
|
49
79
|
pass
|
|
50
80
|
|
|
51
81
|
|
|
52
82
|
class PromptNotFoundError(PromptError):
|
|
53
|
-
"""
|
|
83
|
+
"""Raised when prompt template file is not found in search paths.
|
|
84
|
+
|
|
85
|
+
@public
|
|
86
|
+
"""
|
|
54
87
|
|
|
55
88
|
pass
|
|
56
89
|
|
|
57
90
|
|
|
58
91
|
class MimeTypeError(DocumentError):
|
|
59
|
-
"""MIME type detection or validation
|
|
92
|
+
"""Raised when MIME type detection or validation fails.
|
|
93
|
+
|
|
94
|
+
@public
|
|
95
|
+
"""
|
|
60
96
|
|
|
61
97
|
pass
|