ai-pipeline-core 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,7 +16,7 @@ from .prompt_manager import PromptManager
16
16
  from .settings import settings
17
17
  from .tracing import trace
18
18
 
19
- __version__ = "0.1.1"
19
+ __version__ = "0.1.2"
20
20
 
21
21
  __all__ = [
22
22
  "Document",
@@ -10,6 +10,7 @@ from typing import Any, ClassVar, Literal, Self
10
10
  from pydantic import BaseModel, ConfigDict, field_serializer, field_validator
11
11
  from ruamel.yaml import YAML
12
12
 
13
+ from ai_pipeline_core.documents.utils import canonical_name_key
13
14
  from ai_pipeline_core.exceptions import DocumentNameError, DocumentSizeError
14
15
 
15
16
  from .mime_type import (
@@ -26,6 +27,12 @@ class Document(BaseModel, ABC):
26
27
  MAX_CONTENT_SIZE: ClassVar[int] = 10 * 1024 * 1024 # 10MB default
27
28
  DESCRIPTION_EXTENSION: ClassVar[str] = ".description.md"
28
29
 
30
+ def __init__(self, **data: Any) -> None:
31
+ """Prevent direct instantiation of abstract Document class."""
32
+ if type(self) is Document:
33
+ raise TypeError("Cannot instantiate abstract Document class directly")
34
+ super().__init__(**data)
35
+
29
36
  # Optional enum of allowed file names. Subclasses may set this.
30
37
  # This is used to validate the document name.
31
38
  FILES: ClassVar[type[StrEnum] | None] = None
@@ -179,10 +186,10 @@ class Document(BaseModel, ABC):
179
186
  """Check if document is an image"""
180
187
  return is_image_mime_type(self.mime_type)
181
188
 
182
- @property
183
- def should_be_cached(self) -> bool:
184
- """Check if document should be cached"""
185
- return False
189
+ @classmethod
190
+ def canonical_name(cls) -> str:
191
+ """Get the canonical name of the document"""
192
+ return canonical_name_key(cls)
186
193
 
187
194
  def as_text(self) -> str:
188
195
  """Parse document as text"""
@@ -1,6 +1,6 @@
1
1
  """Flow-specific document base class."""
2
2
 
3
- from typing import Literal, final
3
+ from typing import Any, Literal, final
4
4
 
5
5
  from .document import Document
6
6
 
@@ -15,6 +15,12 @@ class FlowDocument(Document):
15
15
  Compared to TaskDocument, FlowDocument are persistent across Prefect flow runs.
16
16
  """
17
17
 
18
+ def __init__(self, **data: Any) -> None:
19
+ """Prevent direct instantiation of abstract FlowDocument class."""
20
+ if type(self) is FlowDocument:
21
+ raise TypeError("Cannot instantiate abstract FlowDocument class directly")
22
+ super().__init__(**data)
23
+
18
24
  @final
19
25
  def get_base_type(self) -> Literal["flow"]:
20
26
  """Get the document type."""
@@ -1,6 +1,6 @@
1
1
  """Task-specific document base class."""
2
2
 
3
- from typing import Literal, final
3
+ from typing import Any, Literal, final
4
4
 
5
5
  from .document import Document
6
6
 
@@ -16,6 +16,12 @@ class TaskDocument(Document):
16
16
  They are used for intermediate results that are not needed after the task completes.
17
17
  """
18
18
 
19
+ def __init__(self, **data: Any) -> None:
20
+ """Prevent direct instantiation of abstract TaskDocument class."""
21
+ if type(self) is TaskDocument:
22
+ raise TypeError("Cannot instantiate abstract TaskDocument class directly")
23
+ super().__init__(**data)
24
+
19
25
  @final
20
26
  def get_base_type(self) -> Literal["task"]:
21
27
  """Get the document type."""
@@ -1,4 +1,5 @@
1
1
  import re
2
+ from typing import Any, Iterable, Type
2
3
  from urllib.parse import urlparse
3
4
 
4
5
 
@@ -31,3 +32,54 @@ def sanitize_url(url: str) -> str:
31
32
  sanitized = "unnamed"
32
33
 
33
34
  return sanitized
35
+
36
+
37
+ def camel_to_snake(name: str) -> str:
38
+ """Convert CamelCase (incl. acronyms) to snake_case."""
39
+ s1 = re.sub(r"(.)([A-Z][a-z0-9]+)", r"\1_\2", name)
40
+ s2 = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", s1)
41
+ return s2.replace("__", "_").strip("_").lower()
42
+
43
+
44
+ def canonical_name_key(
45
+ obj_or_name: Type[Any] | str,
46
+ *,
47
+ max_parent_suffixes: int = 3,
48
+ extra_suffixes: Iterable[str] = (),
49
+ ) -> str:
50
+ """
51
+ Produce a canonical snake_case key from a class or name by:
52
+ 1) Starting with the class name (or given string),
53
+ 2) Stripping any trailing parent class names (up to `max_parent_suffixes` from the MRO),
54
+ 3) Stripping any `extra_suffixes`,
55
+ 4) Converting to snake_case.
56
+
57
+ Examples (given typical MROs):
58
+ FinalReportDocument(WorkflowDocument -> Document) -> 'final_report'
59
+ FooWorkflowDocument(WorkflowDocument -> Document) -> 'foo'
60
+ BarFlow(Config -> Base -> Flow) -> 'bar'
61
+ """
62
+ name = obj_or_name.__name__ if isinstance(obj_or_name, type) else str(obj_or_name)
63
+
64
+ # From MRO, collect up to N parent names to consider as removable suffixes
65
+ suffixes: list[str] = []
66
+ if isinstance(obj_or_name, type):
67
+ for base in obj_or_name.mro()[1 : 1 + max_parent_suffixes]:
68
+ if base is object:
69
+ continue
70
+ suffixes.append(base.__name__)
71
+
72
+ # Add any custom suffixes the caller wants to strip (e.g., 'Config')
73
+ suffixes.extend(extra_suffixes)
74
+
75
+ # Iteratively trim the longest matching suffix first
76
+ trimmed = True
77
+ while trimmed and suffixes:
78
+ trimmed = False
79
+ for sfx in sorted(set(suffixes), key=len, reverse=True):
80
+ if sfx and name.endswith(sfx):
81
+ name = name[: -len(sfx)]
82
+ trimmed = True
83
+ break
84
+
85
+ return camel_to_snake(name)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-pipeline-core
3
- Version: 0.1.1
3
+ Version: 0.1.2
4
4
  Summary: Core utilities for AI-powered processing pipelines using prefect
5
5
  Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
6
6
  Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
@@ -438,9 +438,15 @@ For learning purposes, see [CLAUDE.md](CLAUDE.md) for our comprehensive coding s
438
438
  ## Documentation
439
439
 
440
440
  - [CLAUDE.md](CLAUDE.md) - Detailed coding standards and architecture guide
441
- - [Prefect Integration](docs/prefect.md) - Prefect patterns and best practices
442
- - [Deployment Guide](docs/prefect_deployment.md) - Production deployment
443
- - [Prefect Logging](docs/prefect_logging.md) - Logging configuration guide
441
+
442
+ ### dependencies_docs/ Directory
443
+ > [!NOTE]
444
+ > The `dependencies_docs/` directory contains guides for AI assistants (like Claude Code) on how to interact with the project's external dependencies and tooling, NOT user documentation for ai-pipeline-core itself. These files are excluded from repository listings to avoid confusion.
445
+
446
+ **AI Assistant Dependency Guides:**
447
+ - [Prefect Integration](dependencies_docs/prefect.md) - Prefect patterns and best practices for AI assistants
448
+ - [Deployment Guide](dependencies_docs/prefect_deployment.md) - Production deployment guide for AI assistants
449
+ - [Prefect Logging](dependencies_docs/prefect_logging.md) - Logging configuration guide for AI assistants
444
450
 
445
451
  ## License
446
452
 
@@ -465,13 +471,9 @@ Built with:
465
471
 
466
472
  ## Stability Notice
467
473
 
468
- **Current Version**: 0.1.1
474
+ **Current Version**: 0.1.2
469
475
  **Status**: Internal Preview
470
476
  **API Stability**: Unstable - Breaking changes expected
471
477
  **Recommended Use**: Learning and reference only
472
478
 
473
479
  For production use, please fork this repository and maintain your own stable version.
474
-
475
- ---
476
-
477
- **Remember**: The best code is no code. The second best is minimal, typed, async code that does exactly what's needed.
@@ -1,16 +1,16 @@
1
- ai_pipeline_core/__init__.py,sha256=pZsQw7615IfTfZ-4owfhB9QhzRXd71fQoUH0hzVnKt0,779
1
+ ai_pipeline_core/__init__.py,sha256=qJfrXgVvGcXERAsm0EB-5MgxlQjnrnirnRqi12O3B80,779
2
2
  ai_pipeline_core/exceptions.py,sha256=_vW0Hbw2LGb5tcVvH0YzTKMff7QOPfCRr3w-w_zPyCE,968
3
3
  ai_pipeline_core/prompt_manager.py,sha256=XmNUdMIC0WrE9fF0LIcfozAKOGrlYwj8AfXvCndIH-o,4693
4
4
  ai_pipeline_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  ai_pipeline_core/settings.py,sha256=Zl2BPa6IHzh-B5V7cg5mtySr1dhWZQYYKxXz3BwrHlQ,615
6
6
  ai_pipeline_core/tracing.py,sha256=UcQ_z1F8KrBLq5ZJaXyoIZsiMdqjHzBhADV07pYXY-w,7663
7
7
  ai_pipeline_core/documents/__init__.py,sha256=rEnKj-sSlZ9WnFlZAmSGVi1P8vnsHmU9O9_YwtP40ms,242
8
- ai_pipeline_core/documents/document.py,sha256=p2nlnvsjQhmbEzw5lkXXmb26GMpGZ-QICaNnVapQ5JA,8588
8
+ ai_pipeline_core/documents/document.py,sha256=Gj4WR57VW67hhRN1360oqHfVQg6Xxj4jx_XueK4cvl0,8941
9
9
  ai_pipeline_core/documents/document_list.py,sha256=HOG_uZDazA9CJB7Lr_tNcDFzb5Ff9RUt0ELWQK_eYNM,4940
10
- ai_pipeline_core/documents/flow_document.py,sha256=WxdPvavSCWzyTo7r-R5xKy6RzVNjl56705MoiSE76P8,530
10
+ ai_pipeline_core/documents/flow_document.py,sha256=qsV-2JYOMhkvAj7lW54ZNH_4QUclld9h06CoU59tWww,815
11
11
  ai_pipeline_core/documents/mime_type.py,sha256=tMWGH9PVmHe6a_IzdaJUqIHf4qnwQOwOCBhsgW2AyTE,2244
12
- ai_pipeline_core/documents/task_document.py,sha256=gxfXMeh85gwWd17bVTZ9xVWb_K91YbJ9N7w1zfHqZyw,622
13
- ai_pipeline_core/documents/utils.py,sha256=P46oaTG266YBiq-FnaD0iQ763ReKKPekhlykf9w-fWQ,905
12
+ ai_pipeline_core/documents/task_document.py,sha256=WjHqtl1d60XFBBqewNRdz1OqBErGI0jRx15oQYCTHo8,907
13
+ ai_pipeline_core/documents/utils.py,sha256=BdE4taSl1vrBhxnFbOP5nDA7lXIcvY__AMRTHoaNb5M,2764
14
14
  ai_pipeline_core/flow/__init__.py,sha256=_Sji2yY1ICkvVX6QiiGWKzqIXtg9UAiuvhjHSK_gdO8,57
15
15
  ai_pipeline_core/flow/config.py,sha256=crbe_OvNE6qulIKv1D8yKoe8xrEsIlvICyxjhqHHBxQ,2266
16
16
  ai_pipeline_core/llm/__init__.py,sha256=3XVK-bSJdOe0s6KmmO7PDbsXHfjlcZEG1MVBmaz3EeU,442
@@ -23,7 +23,7 @@ ai_pipeline_core/logging/__init__.py,sha256=DOO6ckgnMVXl29Sy7q6jhO-iW96h54pCHQDz
23
23
  ai_pipeline_core/logging/logging.yml,sha256=YTW48keO_K5bkkb-KXGM7ZuaYKiquLsjsURei8Ql0V4,1353
24
24
  ai_pipeline_core/logging/logging_config.py,sha256=6MBz9nnVNvqiLDoyy9-R3sWkn6927Re5hdz4hwTptpI,4903
25
25
  ai_pipeline_core/logging/logging_mixin.py,sha256=RDaR2ju2-vKTJRzXGa0DquGPT8_UxahWjvKJnaD0IV8,7810
26
- ai_pipeline_core-0.1.1.dist-info/METADATA,sha256=88CYHbskiMyW1FkjS0OZOloDaHRR04hWl7PpTge2H2s,15529
27
- ai_pipeline_core-0.1.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
28
- ai_pipeline_core-0.1.1.dist-info/licenses/LICENSE,sha256=kKj8mfbdWwkyG3U6n7ztB3bAZlEwShTkAsvaY657i3I,1074
29
- ai_pipeline_core-0.1.1.dist-info/RECORD,,
26
+ ai_pipeline_core-0.1.2.dist-info/METADATA,sha256=LO-DGxVRhZPcwRwb2_zibFzp8aRE59STOYp2BxDag8M,15869
27
+ ai_pipeline_core-0.1.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
28
+ ai_pipeline_core-0.1.2.dist-info/licenses/LICENSE,sha256=kKj8mfbdWwkyG3U6n7ztB3bAZlEwShTkAsvaY657i3I,1074
29
+ ai_pipeline_core-0.1.2.dist-info/RECORD,,