ai-pipeline-core 0.1.1__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.2}/PKG-INFO +11 -9
  2. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.2}/README.md +10 -8
  3. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.2}/ai_pipeline_core/__init__.py +1 -1
  4. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.2}/ai_pipeline_core/documents/document.py +11 -4
  5. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.2}/ai_pipeline_core/documents/flow_document.py +7 -1
  6. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.2}/ai_pipeline_core/documents/task_document.py +7 -1
  7. ai_pipeline_core-0.1.2/ai_pipeline_core/documents/utils.py +85 -0
  8. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.2}/pyproject.toml +2 -2
  9. ai_pipeline_core-0.1.1/ai_pipeline_core/documents/utils.py +0 -33
  10. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.2}/.gitignore +0 -0
  11. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.2}/LICENSE +0 -0
  12. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.2}/ai_pipeline_core/documents/__init__.py +0 -0
  13. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.2}/ai_pipeline_core/documents/document_list.py +0 -0
  14. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.2}/ai_pipeline_core/documents/mime_type.py +0 -0
  15. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.2}/ai_pipeline_core/exceptions.py +0 -0
  16. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.2}/ai_pipeline_core/flow/__init__.py +0 -0
  17. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.2}/ai_pipeline_core/flow/config.py +0 -0
  18. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.2}/ai_pipeline_core/llm/__init__.py +0 -0
  19. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.2}/ai_pipeline_core/llm/ai_messages.py +0 -0
  20. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.2}/ai_pipeline_core/llm/client.py +0 -0
  21. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.2}/ai_pipeline_core/llm/model_options.py +0 -0
  22. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.2}/ai_pipeline_core/llm/model_response.py +0 -0
  23. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.2}/ai_pipeline_core/llm/model_types.py +0 -0
  24. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.2}/ai_pipeline_core/logging/__init__.py +0 -0
  25. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.2}/ai_pipeline_core/logging/logging.yml +0 -0
  26. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.2}/ai_pipeline_core/logging/logging_config.py +0 -0
  27. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.2}/ai_pipeline_core/logging/logging_mixin.py +0 -0
  28. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.2}/ai_pipeline_core/prompt_manager.py +0 -0
  29. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.2}/ai_pipeline_core/py.typed +0 -0
  30. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.2}/ai_pipeline_core/settings.py +0 -0
  31. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.2}/ai_pipeline_core/tracing.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-pipeline-core
3
- Version: 0.1.1
3
+ Version: 0.1.2
4
4
  Summary: Core utilities for AI-powered processing pipelines using prefect
5
5
  Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
6
6
  Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
@@ -438,9 +438,15 @@ For learning purposes, see [CLAUDE.md](CLAUDE.md) for our comprehensive coding s
438
438
  ## Documentation
439
439
 
440
440
  - [CLAUDE.md](CLAUDE.md) - Detailed coding standards and architecture guide
441
- - [Prefect Integration](docs/prefect.md) - Prefect patterns and best practices
442
- - [Deployment Guide](docs/prefect_deployment.md) - Production deployment
443
- - [Prefect Logging](docs/prefect_logging.md) - Logging configuration guide
441
+
442
+ ### dependencies_docs/ Directory
443
+ > [!NOTE]
444
+ > The `dependencies_docs/` directory contains guides for AI assistants (like Claude Code) on how to interact with the project's external dependencies and tooling, NOT user documentation for ai-pipeline-core itself. These files are excluded from repository listings to avoid confusion.
445
+
446
+ **AI Assistant Dependency Guides:**
447
+ - [Prefect Integration](dependencies_docs/prefect.md) - Prefect patterns and best practices for AI assistants
448
+ - [Deployment Guide](dependencies_docs/prefect_deployment.md) - Production deployment guide for AI assistants
449
+ - [Prefect Logging](dependencies_docs/prefect_logging.md) - Logging configuration guide for AI assistants
444
450
 
445
451
  ## License
446
452
 
@@ -465,13 +471,9 @@ Built with:
465
471
 
466
472
  ## Stability Notice
467
473
 
468
- **Current Version**: 0.1.1
474
+ **Current Version**: 0.1.2
469
475
  **Status**: Internal Preview
470
476
  **API Stability**: Unstable - Breaking changes expected
471
477
  **Recommended Use**: Learning and reference only
472
478
 
473
479
  For production use, please fork this repository and maintain your own stable version.
474
-
475
- ---
476
-
477
- **Remember**: The best code is no code. The second best is minimal, typed, async code that does exactly what's needed.
@@ -396,9 +396,15 @@ For learning purposes, see [CLAUDE.md](CLAUDE.md) for our comprehensive coding s
396
396
  ## Documentation
397
397
 
398
398
  - [CLAUDE.md](CLAUDE.md) - Detailed coding standards and architecture guide
399
- - [Prefect Integration](docs/prefect.md) - Prefect patterns and best practices
400
- - [Deployment Guide](docs/prefect_deployment.md) - Production deployment
401
- - [Prefect Logging](docs/prefect_logging.md) - Logging configuration guide
399
+
400
+ ### dependencies_docs/ Directory
401
+ > [!NOTE]
402
+ > The `dependencies_docs/` directory contains guides for AI assistants (like Claude Code) on how to interact with the project's external dependencies and tooling, NOT user documentation for ai-pipeline-core itself. These files are excluded from repository listings to avoid confusion.
403
+
404
+ **AI Assistant Dependency Guides:**
405
+ - [Prefect Integration](dependencies_docs/prefect.md) - Prefect patterns and best practices for AI assistants
406
+ - [Deployment Guide](dependencies_docs/prefect_deployment.md) - Production deployment guide for AI assistants
407
+ - [Prefect Logging](dependencies_docs/prefect_logging.md) - Logging configuration guide for AI assistants
402
408
 
403
409
  ## License
404
410
 
@@ -423,13 +429,9 @@ Built with:
423
429
 
424
430
  ## Stability Notice
425
431
 
426
- **Current Version**: 0.1.1
432
+ **Current Version**: 0.1.2
427
433
  **Status**: Internal Preview
428
434
  **API Stability**: Unstable - Breaking changes expected
429
435
  **Recommended Use**: Learning and reference only
430
436
 
431
437
  For production use, please fork this repository and maintain your own stable version.
432
-
433
- ---
434
-
435
- **Remember**: The best code is no code. The second best is minimal, typed, async code that does exactly what's needed.
@@ -16,7 +16,7 @@ from .prompt_manager import PromptManager
16
16
  from .settings import settings
17
17
  from .tracing import trace
18
18
 
19
- __version__ = "0.1.1"
19
+ __version__ = "0.1.2"
20
20
 
21
21
  __all__ = [
22
22
  "Document",
@@ -10,6 +10,7 @@ from typing import Any, ClassVar, Literal, Self
10
10
  from pydantic import BaseModel, ConfigDict, field_serializer, field_validator
11
11
  from ruamel.yaml import YAML
12
12
 
13
+ from ai_pipeline_core.documents.utils import canonical_name_key
13
14
  from ai_pipeline_core.exceptions import DocumentNameError, DocumentSizeError
14
15
 
15
16
  from .mime_type import (
@@ -26,6 +27,12 @@ class Document(BaseModel, ABC):
26
27
  MAX_CONTENT_SIZE: ClassVar[int] = 10 * 1024 * 1024 # 10MB default
27
28
  DESCRIPTION_EXTENSION: ClassVar[str] = ".description.md"
28
29
 
30
+ def __init__(self, **data: Any) -> None:
31
+ """Prevent direct instantiation of abstract Document class."""
32
+ if type(self) is Document:
33
+ raise TypeError("Cannot instantiate abstract Document class directly")
34
+ super().__init__(**data)
35
+
29
36
  # Optional enum of allowed file names. Subclasses may set this.
30
37
  # This is used to validate the document name.
31
38
  FILES: ClassVar[type[StrEnum] | None] = None
@@ -179,10 +186,10 @@ class Document(BaseModel, ABC):
179
186
  """Check if document is an image"""
180
187
  return is_image_mime_type(self.mime_type)
181
188
 
182
- @property
183
- def should_be_cached(self) -> bool:
184
- """Check if document should be cached"""
185
- return False
189
+ @classmethod
190
+ def canonical_name(cls) -> str:
191
+ """Get the canonical name of the document"""
192
+ return canonical_name_key(cls)
186
193
 
187
194
  def as_text(self) -> str:
188
195
  """Parse document as text"""
@@ -1,6 +1,6 @@
1
1
  """Flow-specific document base class."""
2
2
 
3
- from typing import Literal, final
3
+ from typing import Any, Literal, final
4
4
 
5
5
  from .document import Document
6
6
 
@@ -15,6 +15,12 @@ class FlowDocument(Document):
15
15
  Compared to TaskDocument, FlowDocument are persistent across Prefect flow runs.
16
16
  """
17
17
 
18
+ def __init__(self, **data: Any) -> None:
19
+ """Prevent direct instantiation of abstract FlowDocument class."""
20
+ if type(self) is FlowDocument:
21
+ raise TypeError("Cannot instantiate abstract FlowDocument class directly")
22
+ super().__init__(**data)
23
+
18
24
  @final
19
25
  def get_base_type(self) -> Literal["flow"]:
20
26
  """Get the document type."""
@@ -1,6 +1,6 @@
1
1
  """Task-specific document base class."""
2
2
 
3
- from typing import Literal, final
3
+ from typing import Any, Literal, final
4
4
 
5
5
  from .document import Document
6
6
 
@@ -16,6 +16,12 @@ class TaskDocument(Document):
16
16
  They are used for intermediate results that are not needed after the task completes.
17
17
  """
18
18
 
19
+ def __init__(self, **data: Any) -> None:
20
+ """Prevent direct instantiation of abstract TaskDocument class."""
21
+ if type(self) is TaskDocument:
22
+ raise TypeError("Cannot instantiate abstract TaskDocument class directly")
23
+ super().__init__(**data)
24
+
19
25
  @final
20
26
  def get_base_type(self) -> Literal["task"]:
21
27
  """Get the document type."""
@@ -0,0 +1,85 @@
1
+ import re
2
+ from typing import Any, Iterable, Type
3
+ from urllib.parse import urlparse
4
+
5
+
6
+ def sanitize_url(url: str) -> str:
7
+ """
8
+ Sanitize URL or query string for use in filenames.
9
+ Removes or replaces characters that are invalid in filenames.
10
+ """
11
+ # Remove protocol if it's a URL
12
+ if url.startswith(("http://", "https://")):
13
+ parsed = urlparse(url)
14
+ # Use domain + path
15
+ url = parsed.netloc + parsed.path
16
+
17
+ # Replace invalid filename characters
18
+ sanitized = re.sub(r'[<>:"/\\|?*]', "_", url)
19
+
20
+ # Replace multiple underscores with single one
21
+ sanitized = re.sub(r"_+", "_", sanitized)
22
+
23
+ # Remove leading/trailing underscores and dots
24
+ sanitized = sanitized.strip("_.")
25
+
26
+ # Limit length to prevent too long filenames
27
+ if len(sanitized) > 100:
28
+ sanitized = sanitized[:100]
29
+
30
+ # Ensure we have something
31
+ if not sanitized:
32
+ sanitized = "unnamed"
33
+
34
+ return sanitized
35
+
36
+
37
+ def camel_to_snake(name: str) -> str:
38
+ """Convert CamelCase (incl. acronyms) to snake_case."""
39
+ s1 = re.sub(r"(.)([A-Z][a-z0-9]+)", r"\1_\2", name)
40
+ s2 = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", s1)
41
+ return s2.replace("__", "_").strip("_").lower()
42
+
43
+
44
+ def canonical_name_key(
45
+ obj_or_name: Type[Any] | str,
46
+ *,
47
+ max_parent_suffixes: int = 3,
48
+ extra_suffixes: Iterable[str] = (),
49
+ ) -> str:
50
+ """
51
+ Produce a canonical snake_case key from a class or name by:
52
+ 1) Starting with the class name (or given string),
53
+ 2) Stripping any trailing parent class names (up to `max_parent_suffixes` from the MRO),
54
+ 3) Stripping any `extra_suffixes`,
55
+ 4) Converting to snake_case.
56
+
57
+ Examples (given typical MROs):
58
+ FinalReportDocument(WorkflowDocument -> Document) -> 'final_report'
59
+ FooWorkflowDocument(WorkflowDocument -> Document) -> 'foo'
60
+ BarFlow(Config -> Base -> Flow) -> 'bar'
61
+ """
62
+ name = obj_or_name.__name__ if isinstance(obj_or_name, type) else str(obj_or_name)
63
+
64
+ # From MRO, collect up to N parent names to consider as removable suffixes
65
+ suffixes: list[str] = []
66
+ if isinstance(obj_or_name, type):
67
+ for base in obj_or_name.mro()[1 : 1 + max_parent_suffixes]:
68
+ if base is object:
69
+ continue
70
+ suffixes.append(base.__name__)
71
+
72
+ # Add any custom suffixes the caller wants to strip (e.g., 'Config')
73
+ suffixes.extend(extra_suffixes)
74
+
75
+ # Iteratively trim the longest matching suffix first
76
+ trimmed = True
77
+ while trimmed and suffixes:
78
+ trimmed = False
79
+ for sfx in sorted(set(suffixes), key=len, reverse=True):
80
+ if sfx and name.endswith(sfx):
81
+ name = name[: -len(sfx)]
82
+ trimmed = True
83
+ break
84
+
85
+ return camel_to_snake(name)
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "ai-pipeline-core"
3
- version = "0.1.1"
3
+ version = "0.1.2"
4
4
  description = "Core utilities for AI-powered processing pipelines using prefect"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
@@ -140,7 +140,7 @@ reportIncompatibleVariableOverride = "error"
140
140
  reportMissingParameterType = "warning"
141
141
 
142
142
  [tool.bumpversion]
143
- current_version = "0.1.1"
143
+ current_version = "0.1.2"
144
144
  commit = true
145
145
  tag = true
146
146
  tag_name = "v{new_version}"
@@ -1,33 +0,0 @@
1
- import re
2
- from urllib.parse import urlparse
3
-
4
-
5
- def sanitize_url(url: str) -> str:
6
- """
7
- Sanitize URL or query string for use in filenames.
8
- Removes or replaces characters that are invalid in filenames.
9
- """
10
- # Remove protocol if it's a URL
11
- if url.startswith(("http://", "https://")):
12
- parsed = urlparse(url)
13
- # Use domain + path
14
- url = parsed.netloc + parsed.path
15
-
16
- # Replace invalid filename characters
17
- sanitized = re.sub(r'[<>:"/\\|?*]', "_", url)
18
-
19
- # Replace multiple underscores with single one
20
- sanitized = re.sub(r"_+", "_", sanitized)
21
-
22
- # Remove leading/trailing underscores and dots
23
- sanitized = sanitized.strip("_.")
24
-
25
- # Limit length to prevent too long filenames
26
- if len(sanitized) > 100:
27
- sanitized = sanitized[:100]
28
-
29
- # Ensure we have something
30
- if not sanitized:
31
- sanitized = "unnamed"
32
-
33
- return sanitized