ai-pipeline-core 0.1.8__py3-none-any.whl → 0.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. ai_pipeline_core/__init__.py +86 -4
  2. ai_pipeline_core/documents/__init__.py +11 -0
  3. ai_pipeline_core/documents/document.py +1107 -131
  4. ai_pipeline_core/documents/document_list.py +147 -38
  5. ai_pipeline_core/documents/flow_document.py +112 -11
  6. ai_pipeline_core/documents/mime_type.py +173 -15
  7. ai_pipeline_core/documents/task_document.py +117 -12
  8. ai_pipeline_core/documents/temporary_document.py +95 -0
  9. ai_pipeline_core/documents/utils.py +41 -9
  10. ai_pipeline_core/exceptions.py +47 -11
  11. ai_pipeline_core/flow/__init__.py +2 -0
  12. ai_pipeline_core/flow/config.py +250 -23
  13. ai_pipeline_core/flow/options.py +50 -1
  14. ai_pipeline_core/llm/__init__.py +6 -0
  15. ai_pipeline_core/llm/ai_messages.py +125 -27
  16. ai_pipeline_core/llm/client.py +278 -26
  17. ai_pipeline_core/llm/model_options.py +130 -1
  18. ai_pipeline_core/llm/model_response.py +239 -35
  19. ai_pipeline_core/llm/model_types.py +67 -0
  20. ai_pipeline_core/logging/__init__.py +13 -0
  21. ai_pipeline_core/logging/logging_config.py +72 -20
  22. ai_pipeline_core/logging/logging_mixin.py +38 -32
  23. ai_pipeline_core/pipeline.py +308 -60
  24. ai_pipeline_core/prefect.py +48 -1
  25. ai_pipeline_core/prompt_manager.py +215 -24
  26. ai_pipeline_core/settings.py +108 -4
  27. ai_pipeline_core/simple_runner/__init__.py +5 -0
  28. ai_pipeline_core/simple_runner/cli.py +145 -17
  29. ai_pipeline_core/simple_runner/simple_runner.py +244 -6
  30. ai_pipeline_core/tracing.py +232 -30
  31. ai_pipeline_core-0.1.11.dist-info/METADATA +450 -0
  32. ai_pipeline_core-0.1.11.dist-info/RECORD +36 -0
  33. ai_pipeline_core-0.1.8.dist-info/METADATA +0 -558
  34. ai_pipeline_core-0.1.8.dist-info/RECORD +0 -35
  35. {ai_pipeline_core-0.1.8.dist-info → ai_pipeline_core-0.1.11.dist-info}/WHEEL +0 -0
  36. {ai_pipeline_core-0.1.8.dist-info → ai_pipeline_core-0.1.11.dist-info}/licenses/LICENSE +0 -0
@@ -1,28 +1,133 @@
1
- """Task-specific document base class."""
1
+ """Task-specific document base class for temporary pipeline data.
2
2
 
3
- from typing import Any, Literal, final
3
+ @public
4
+
5
+ This module provides the TaskDocument abstract base class for documents
6
+ that exist only during Prefect task execution and are not persisted.
7
+ """
8
+
9
+ from typing import Literal, final
4
10
 
5
11
  from .document import Document
6
12
 
7
13
 
8
14
  class TaskDocument(Document):
9
- """
10
- Abstract base class for task-specific documents.
15
+ """Abstract base class for temporary documents within task execution.
16
+
17
+ @public
18
+
19
+ TaskDocument is used for intermediate data that exists only during
20
+ the execution of a Prefect task and is not persisted to disk. These
21
+ documents are ideal for temporary processing results, transformations,
22
+ and data that doesn't need to survive beyond the current task.
23
+
24
+ Key characteristics:
25
+ - Not persisted to file system
26
+ - Exists only during task execution
27
+ - Garbage collected after task completes
28
+ - Used for intermediate processing results
29
+ - Reduces persistent I/O for temporary data
30
+
31
+ Creating TaskDocuments:
32
+ **Use the `create` classmethod** for most use cases. It handles automatic
33
+ conversion of various content types. Only use __init__ when you have bytes.
34
+
35
+ >>> from enum import StrEnum
36
+ >>>
37
+ >>> # Simple task document:
38
+ >>> class TempDoc(TaskDocument):
39
+ ... pass
40
+ >>>
41
+ >>> # With restricted files:
42
+ >>> class CacheDoc(TaskDocument):
43
+ ... class FILES(StrEnum):
44
+ ... CACHE = "cache.json"
45
+ ... INDEX = "index.dat"
46
+ >>>
47
+ >>> # RECOMMENDED - automatic conversion:
48
+ >>> doc = TempDoc.create(name="temp.json", content={"status": "processing"})
49
+ >>> doc = CacheDoc.create(name="cache.json", content={"data": [1, 2, 3]})
50
+
51
+ Use Cases:
52
+ - Intermediate transformation results
53
+ - Temporary buffers during processing
54
+ - Task-local cache data
55
+ - Processing status documents
11
56
 
12
- Task documents represent inputs, outputs, and intermediate results
13
- within a Prefect task execution context.
57
+ Note:
58
+ - Cannot instantiate TaskDocument directly - must subclass
59
+ - Not saved by simple_runner utilities
60
+ - Reduces I/O overhead for temporary data
61
+ - No additional abstract methods to implement
14
62
 
15
- Compared to FlowDocument, TaskDocument are not persisted across Prefect task runs.
16
- They are used for intermediate results that are not needed after the task completes.
63
+ See Also:
64
+ FlowDocument: For documents that persist across flow runs
65
+ TemporaryDocument: Alternative for non-persistent documents
17
66
  """
18
67
 
19
- def __init__(self, **data: Any) -> None:
20
- """Prevent direct instantiation of abstract TaskDocument class."""
68
+ def __init__(
69
+ self,
70
+ *,
71
+ name: str,
72
+ content: bytes,
73
+ description: str | None = None,
74
+ ) -> None:
75
+ """Initialize a TaskDocument with raw bytes content.
76
+
77
+ Important:
78
+ **Most users should use the `create` classmethod instead of __init__.**
79
+ The create method provides automatic content conversion for various types
80
+ (str, dict, list, Pydantic models) while __init__ only accepts bytes.
81
+
82
+ Prevents direct instantiation of the abstract TaskDocument class.
83
+ TaskDocument must be subclassed for specific temporary document types.
84
+
85
+ Args:
86
+ name: Document filename (required, keyword-only)
87
+ content: Document content as raw bytes (required, keyword-only)
88
+ description: Optional human-readable description (keyword-only)
89
+
90
+ Raises:
91
+ TypeError: If attempting to instantiate TaskDocument directly
92
+ instead of using a concrete subclass.
93
+
94
+ Example:
95
+ >>> from enum import StrEnum
96
+ >>>
97
+ >>> # Simple subclass:
98
+ >>> class MyTaskDoc(TaskDocument):
99
+ ... pass
100
+ >>>
101
+ >>> # With FILES restriction:
102
+ >>> class TempProcessDoc(TaskDocument):
103
+ ... class FILES(StrEnum):
104
+ ... BUFFER = "buffer.bin"
105
+ ... STATUS = "status.json"
106
+ >>>
107
+ >>> # Direct constructor - only for bytes:
108
+ >>> doc = MyTaskDoc(name="temp.bin", content=b"raw data")
109
+ >>>
110
+ >>> # RECOMMENDED - use create for automatic conversion:
111
+ >>> doc = TempProcessDoc.create(name="status.json", content={"percent": 50})
112
+ >>> # This would raise DocumentNameError:
113
+ >>> # doc = TempProcessDoc.create(name="other.json", content={})
114
+ """
21
115
  if type(self) is TaskDocument:
22
116
  raise TypeError("Cannot instantiate abstract TaskDocument class directly")
23
- super().__init__(**data)
117
+ super().__init__(name=name, content=content, description=description)
24
118
 
25
119
  @final
26
120
  def get_base_type(self) -> Literal["task"]:
27
- """Get the document type."""
121
+ """Return the base type identifier for task documents.
122
+
123
+ This method is final and cannot be overridden by subclasses.
124
+ It identifies this document as a task-scoped temporary document.
125
+
126
+ Returns:
127
+ "task" - Indicates this document is temporary within task execution.
128
+
129
+ Note:
130
+ This determines that the document will not be persisted and
131
+ exists only during task execution.
132
+ """
28
133
  return "task"
@@ -0,0 +1,95 @@
1
+ """Temporary document implementation for non-persistent data.
2
+
3
+ @public
4
+
5
+ This module provides the TemporaryDocument class for documents that
6
+ are never persisted, regardless of context.
7
+ """
8
+
9
+ from typing import Any, Literal, final
10
+
11
+ from .document import Document
12
+
13
+
14
+ @final
15
+ class TemporaryDocument(Document):
16
+ r"""Concrete document class for data that is never persisted.
17
+
18
+ @public
19
+
20
+ TemporaryDocument is a final (non-subclassable) document type for
21
+ data that should never be saved to disk, regardless of whether it's
22
+ used in a flow or task context. Unlike FlowDocument and TaskDocument
23
+ which are abstract, TemporaryDocument can be instantiated directly.
24
+
25
+ Key characteristics:
26
+ - Never persisted to file system
27
+ - Can be instantiated directly (not abstract)
28
+ - Cannot be subclassed (annotated with Python's @final decorator in code)
29
+ - Useful for transient data like API responses or intermediate calculations
30
+ - Ignored by simple_runner save operations
31
+
32
+ Creating TemporaryDocuments:
33
+ **Use the `create` classmethod** for most use cases. It handles automatic
34
+ conversion of various content types. Only use __init__ when you have bytes.
35
+
36
+ >>> # RECOMMENDED - automatic conversion:
37
+ >>> doc = TemporaryDocument.create(
38
+ ... name="api_response.json",
39
+ ... content={"status": "ok", "data": [1, 2, 3]}
40
+ ... )
41
+ >>> doc = TemporaryDocument.create(
42
+ ... name="credentials.txt",
43
+ ... content="secret_token_xyz"
44
+ ... )
45
+ >>>
46
+ >>> # Direct constructor - only for bytes:
47
+ >>> doc = TemporaryDocument(
48
+ ... name="binary.dat",
49
+ ... content=b"\x00\x01\x02"
50
+ ... )
51
+ >>>
52
+ >>> doc.is_temporary # Always True
53
+
54
+ Use Cases:
55
+ - API responses that shouldn't be cached
56
+ - Sensitive credentials or tokens
57
+ - Intermediate calculations
58
+ - Temporary transformations
59
+ - Data explicitly marked as non-persistent
60
+
61
+ Note:
62
+ - This is a final class and cannot be subclassed
63
+ - Use when you explicitly want to prevent persistence
64
+ - Useful for sensitive data that shouldn't be written to disk
65
+
66
+ See Also:
67
+ FlowDocument: For documents that persist across flow runs
68
+ TaskDocument: For documents temporary within task execution
69
+ """
70
+
71
+ def __init_subclass__(cls, **kwargs: Any) -> None:
72
+ """Disallow subclassing.
73
+
74
+ Args:
75
+ **kwargs: Additional keyword arguments (ignored).
76
+
77
+ Raises:
78
+ TypeError: Always raised to prevent subclassing of `TemporaryDocument`.
79
+ """
80
+ raise TypeError("TemporaryDocument is final and cannot be subclassed")
81
+
82
+ def get_base_type(self) -> Literal["temporary"]:
83
+ """Return the base type identifier for temporary documents.
84
+
85
+ Identifies this document as temporary, ensuring it will
86
+ never be persisted by the pipeline system.
87
+
88
+ Returns:
89
+ "temporary" - Indicates this document is never persisted.
90
+
91
+ Note:
92
+ Documents with this type are explicitly excluded from
93
+ all persistence operations in the pipeline system.
94
+ """
95
+ return "temporary"
@@ -1,12 +1,26 @@
1
+ """Utility functions for document handling.
2
+
3
+ Provides helper functions for URL sanitization, naming conventions,
4
+ and canonical key generation used throughout the document system.
5
+ """
6
+
1
7
  import re
2
8
  from typing import Any, Iterable, Type
3
9
  from urllib.parse import urlparse
4
10
 
5
11
 
6
12
  def sanitize_url(url: str) -> str:
7
- """
8
- Sanitize URL or query string for use in filenames.
13
+ """Sanitize URL or query string for use in filenames.
14
+
15
+ @public
16
+
9
17
  Removes or replaces characters that are invalid in filenames.
18
+
19
+ Args:
20
+ url: The URL or query string to sanitize.
21
+
22
+ Returns:
23
+ A sanitized string safe for use as a filename.
10
24
  """
11
25
  # Remove protocol if it's a URL
12
26
  if url.startswith(("http://", "https://")):
@@ -35,7 +49,14 @@ def sanitize_url(url: str) -> str:
35
49
 
36
50
 
37
51
  def camel_to_snake(name: str) -> str:
38
- """Convert CamelCase (incl. acronyms) to snake_case."""
52
+ """Convert CamelCase (incl. acronyms) to snake_case.
53
+
54
+ Args:
55
+ name: The CamelCase string to convert.
56
+
57
+ Returns:
58
+ The converted snake_case string.
59
+ """
39
60
  s1 = re.sub(r"(.)([A-Z][a-z0-9]+)", r"\1_\2", name)
40
61
  s2 = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", s1)
41
62
  return s2.replace("__", "_").strip("_").lower()
@@ -47,17 +68,28 @@ def canonical_name_key(
47
68
  max_parent_suffixes: int = 3,
48
69
  extra_suffixes: Iterable[str] = (),
49
70
  ) -> str:
50
- """
51
- Produce a canonical snake_case key from a class or name by:
71
+ """Produce a canonical snake_case key from a class or name.
72
+
73
+ @public
74
+
75
+ Process:
52
76
  1) Starting with the class name (or given string),
53
77
  2) Stripping any trailing parent class names (up to `max_parent_suffixes` from the MRO),
54
78
  3) Stripping any `extra_suffixes`,
55
79
  4) Converting to snake_case.
56
80
 
57
- Examples (given typical MROs):
58
- FinalReportDocument(WorkflowDocument -> Document) -> 'final_report'
59
- FooWorkflowDocument(WorkflowDocument -> Document) -> 'foo'
60
- BarFlow(Config -> Base -> Flow) -> 'bar'
81
+ Args:
82
+ obj_or_name: A class or string to convert.
83
+ max_parent_suffixes: Maximum number of parent classes to consider for suffix removal.
84
+ extra_suffixes: Additional suffixes to strip.
85
+
86
+ Returns:
87
+ The canonical snake_case name.
88
+
89
+ Examples:
90
+ FinalReportDocument(WorkflowDocument -> Document) -> 'final_report'
91
+ FooWorkflowDocument(WorkflowDocument -> Document) -> 'foo'
92
+ BarFlow(Config -> Base -> Flow) -> 'bar'
61
93
  """
62
94
  name = obj_or_name.__name__ if isinstance(obj_or_name, type) else str(obj_or_name)
63
95
 
@@ -1,61 +1,97 @@
1
- """Exception hierarchy for AI Pipeline Core."""
1
+ """Exception hierarchy for AI Pipeline Core.
2
+
3
+ @public
4
+
5
+ This module defines the exception hierarchy used throughout the AI Pipeline Core library.
6
+ All exceptions inherit from PipelineCoreError, providing a consistent error handling interface.
7
+ """
2
8
 
3
9
 
4
10
  class PipelineCoreError(Exception):
5
- """Base exception for all pipeline errors."""
11
+ """Base exception for all AI Pipeline Core errors.
12
+
13
+ @public
14
+ """
6
15
 
7
16
  pass
8
17
 
9
18
 
10
19
  class DocumentError(PipelineCoreError):
11
- """Document-related errors."""
20
+ """Base exception for document-related errors.
21
+
22
+ @public
23
+ """
12
24
 
13
25
  pass
14
26
 
15
27
 
16
28
  class DocumentValidationError(DocumentError):
17
- """Document validation failed."""
29
+ """Raised when document validation fails.
30
+
31
+ @public
32
+ """
18
33
 
19
34
  pass
20
35
 
21
36
 
22
37
  class DocumentSizeError(DocumentValidationError):
23
- """Document size exceeds limits."""
38
+ """Raised when document content exceeds MAX_CONTENT_SIZE limit.
39
+
40
+ @public
41
+ """
24
42
 
25
43
  pass
26
44
 
27
45
 
28
46
  class DocumentNameError(DocumentValidationError):
29
- """Invalid document name."""
47
+ """Raised when document name contains invalid characters or patterns.
48
+
49
+ @public
50
+ """
30
51
 
31
52
  pass
32
53
 
33
54
 
34
55
  class LLMError(PipelineCoreError):
35
- """LLM-related errors."""
56
+ """Raised when LLM generation fails after all retries.
57
+
58
+ @public
59
+ """
36
60
 
37
61
  pass
38
62
 
39
63
 
40
64
  class PromptError(PipelineCoreError):
41
- """Prompt-related errors."""
65
+ """Base exception for prompt template errors.
66
+
67
+ @public
68
+ """
42
69
 
43
70
  pass
44
71
 
45
72
 
46
73
  class PromptRenderError(PromptError):
47
- """Failed to render prompt template."""
74
+ """Raised when Jinja2 template rendering fails.
75
+
76
+ @public
77
+ """
48
78
 
49
79
  pass
50
80
 
51
81
 
52
82
  class PromptNotFoundError(PromptError):
53
- """Prompt template not found."""
83
+ """Raised when prompt template file is not found in search paths.
84
+
85
+ @public
86
+ """
54
87
 
55
88
  pass
56
89
 
57
90
 
58
91
  class MimeTypeError(DocumentError):
59
- """MIME type detection or validation error."""
92
+ """Raised when MIME type detection or validation fails.
93
+
94
+ @public
95
+ """
60
96
 
61
97
  pass
@@ -1,3 +1,5 @@
1
+ """Flow configuration and options for Prefect-based pipeline flows."""
2
+
1
3
  from .config import FlowConfig
2
4
  from .options import FlowOptions
3
5