PyPI - ai-pipeline-core - Versions diffs - 0.1.1__tar.gz → 0.1.3__tar.gz - Mend

ai-pipeline-core 0.1.1tar.gz → 0.1.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

{ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ai-pipeline-core
-Version: 0.1.1
+Version: 0.1.3
 Summary: Core utilities for AI-powered processing pipelines using prefect
 Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
 Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
@@ -438,9 +438,15 @@ For learning purposes, see [CLAUDE.md](CLAUDE.md) for our comprehensive coding s
 ## Documentation
 - [CLAUDE.md](CLAUDE.md) - Detailed coding standards and architecture guide
-- [Prefect Integration](docs/prefect.md) - Prefect patterns and best practices
-- [Deployment Guide](docs/prefect_deployment.md) - Production deployment
-- [Prefect Logging](docs/prefect_logging.md) - Logging configuration guide
+### dependencies_docs/ Directory
+> [!NOTE]
+> The `dependencies_docs/` directory contains guides for AI assistants (like Claude Code) on how to interact with the project's external dependencies and tooling, NOT user documentation for ai-pipeline-core itself. These files are excluded from repository listings to avoid confusion.
+**AI Assistant Dependency Guides:**
+- [Prefect Integration](dependencies_docs/prefect.md) - Prefect patterns and best practices for AI assistants
+- [Deployment Guide](dependencies_docs/prefect_deployment.md) - Production deployment guide for AI assistants
+- [Prefect Logging](dependencies_docs/prefect_logging.md) - Logging configuration guide for AI assistants
 ## License
@@ -465,13 +471,9 @@ Built with:
 ## Stability Notice
-**Current Version**: 0.1.1
+**Current Version**: 0.1.2
 **Status**: Internal Preview
 **API Stability**: Unstable - Breaking changes expected
 **Recommended Use**: Learning and reference only
 For production use, please fork this repository and maintain your own stable version.
----
-**Remember**: The best code is no code. The second best is minimal, typed, async code that does exactly what's needed.

{ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/README.md RENAMED Viewed

@@ -396,9 +396,15 @@ For learning purposes, see [CLAUDE.md](CLAUDE.md) for our comprehensive coding s
 ## Documentation
 - [CLAUDE.md](CLAUDE.md) - Detailed coding standards and architecture guide
-- [Prefect Integration](docs/prefect.md) - Prefect patterns and best practices
-- [Deployment Guide](docs/prefect_deployment.md) - Production deployment
-- [Prefect Logging](docs/prefect_logging.md) - Logging configuration guide
+### dependencies_docs/ Directory
+> [!NOTE]
+> The `dependencies_docs/` directory contains guides for AI assistants (like Claude Code) on how to interact with the project's external dependencies and tooling, NOT user documentation for ai-pipeline-core itself. These files are excluded from repository listings to avoid confusion.
+**AI Assistant Dependency Guides:**
+- [Prefect Integration](dependencies_docs/prefect.md) - Prefect patterns and best practices for AI assistants
+- [Deployment Guide](dependencies_docs/prefect_deployment.md) - Production deployment guide for AI assistants
+- [Prefect Logging](dependencies_docs/prefect_logging.md) - Logging configuration guide for AI assistants
 ## License
@@ -423,13 +429,9 @@ Built with:
 ## Stability Notice
-**Current Version**: 0.1.1
+**Current Version**: 0.1.2
 **Status**: Internal Preview
 **API Stability**: Unstable - Breaking changes expected
 **Recommended Use**: Learning and reference only
 For production use, please fork this repository and maintain your own stable version.
----
-**Remember**: The best code is no code. The second best is minimal, typed, async code that does exactly what's needed.

{ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/ai_pipeline_core/__init__.py RENAMED Viewed

@@ -16,7 +16,7 @@ from .prompt_manager import PromptManager
 from .settings import settings
 from .tracing import trace
-__version__ = "0.1.1"
+__version__ = "0.1.2"
 __all__ = [
     "Document",

{ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/ai_pipeline_core/documents/document.py RENAMED Viewed

@@ -1,6 +1,7 @@
 import base64
 import hashlib
 import json
+import re
 from abc import ABC, abstractmethod
 from base64 import b32encode
 from enum import StrEnum
@@ -10,6 +11,7 @@ from typing import Any, ClassVar, Literal, Self
 from pydantic import BaseModel, ConfigDict, field_serializer, field_validator
 from ruamel.yaml import YAML
+from ai_pipeline_core.documents.utils import canonical_name_key
 from ai_pipeline_core.exceptions import DocumentNameError, DocumentSizeError
 from .mime_type import (
@@ -25,6 +27,13 @@ class Document(BaseModel, ABC):
     MAX_CONTENT_SIZE: ClassVar[int] = 10 * 1024 * 1024  # 10MB default
     DESCRIPTION_EXTENSION: ClassVar[str] = ".description.md"
+    MARKDOWN_LIST_SEPARATOR: ClassVar[str] = "\n\n---\n\n"
+    def __init__(self, **data: Any) -> None:
+        """Prevent direct instantiation of abstract Document class."""
+        if type(self) is Document:
+            raise TypeError("Cannot instantiate abstract Document class directly")
+        super().__init__(**data)
     # Optional enum of allowed file names. Subclasses may set this.
     # This is used to validate the document name.
@@ -179,10 +188,10 @@ class Document(BaseModel, ABC):
         """Check if document is an image"""
         return is_image_mime_type(self.mime_type)
-    @property
-    def should_be_cached(self) -> bool:
-        """Check if document should be cached"""
-        return False
+    @classmethod
+    def canonical_name(cls) -> str:
+        """Get the canonical name of the document"""
+        return canonical_name_key(cls)
     def as_text(self) -> str:
         """Parse document as text"""
@@ -192,15 +201,34 @@ class Document(BaseModel, ABC):
     def as_yaml(self) -> Any:
         """Parse document as YAML"""
-        if not self.is_text:
-            raise ValueError(f"Document is not text: {self.name}")
-        return YAML().load(self.content.decode("utf-8"))  # type: ignore
+        return YAML().load(self.as_text())
     def as_json(self) -> Any:
         """Parse document as JSON"""
-        if not self.is_text:
-            raise ValueError(f"Document is not text: {self.name}")
-        return json.loads(self.content.decode("utf-8"))
+        return json.loads(self.as_text())
+    def as_markdown_list(self) -> list[str]:
+        """Parse document as a markdown list"""
+        return self.as_text().split(self.MARKDOWN_LIST_SEPARATOR)
+    @classmethod
+    def create(cls, name: str, description: str | None, content: bytes | str) -> Self:
+        """Create a document from a name, description, and content"""
+        if isinstance(content, str):
+            content = content.encode("utf-8")
+        return cls(name=name, description=description, content=content)
+    @classmethod
+    def create_as_markdown_list(cls, name: str, description: str | None, items: list[str]) -> Self:
+        """Create a document from a name, description, and list of strings"""
+        # remove other list separators (lines that are only the separator + whitespace)
+        separator = Document.MARKDOWN_LIST_SEPARATOR.strip()
+        pattern = re.compile(rf"^[ \t]*{re.escape(separator)}[ \t]*(?:\r?\n|$)", flags=re.MULTILINE)
+        # Normalize CRLF/CR to LF before cleaning to ensure consistent behavior
+        normalized_items = [re.sub(r"\r\n?", "\n", item) for item in items]
+        cleaned_items = [pattern.sub("", item) for item in normalized_items]
+        content = Document.MARKDOWN_LIST_SEPARATOR.join(cleaned_items)
+        return cls.create(name, description, content)
     def serialize_model(self) -> dict[str, Any]:
         """Serialize document to a dictionary with proper encoding."""

{ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/ai_pipeline_core/documents/flow_document.py RENAMED Viewed

@@ -1,6 +1,6 @@
 """Flow-specific document base class."""
-from typing import Literal, final
+from typing import Any, Literal, final
 from .document import Document
@@ -15,6 +15,12 @@ class FlowDocument(Document):
     Compared to TaskDocument, FlowDocument are persistent across Prefect flow runs.
     """
+    def __init__(self, **data: Any) -> None:
+        """Prevent direct instantiation of abstract FlowDocument class."""
+        if type(self) is FlowDocument:
+            raise TypeError("Cannot instantiate abstract FlowDocument class directly")
+        super().__init__(**data)
     @final
     def get_base_type(self) -> Literal["flow"]:
         """Get the document type."""

{ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/ai_pipeline_core/documents/task_document.py RENAMED Viewed

@@ -1,6 +1,6 @@
 """Task-specific document base class."""
-from typing import Literal, final
+from typing import Any, Literal, final
 from .document import Document
@@ -16,6 +16,12 @@ class TaskDocument(Document):
     They are used for intermediate results that are not needed after the task completes.
     """
+    def __init__(self, **data: Any) -> None:
+        """Prevent direct instantiation of abstract TaskDocument class."""
+        if type(self) is TaskDocument:
+            raise TypeError("Cannot instantiate abstract TaskDocument class directly")
+        super().__init__(**data)
     @final
     def get_base_type(self) -> Literal["task"]:
         """Get the document type."""

ai_pipeline_core-0.1.3/ai_pipeline_core/documents/utils.py ADDED Viewed

@@ -0,0 +1,85 @@
+import re
+from typing import Any, Iterable, Type
+from urllib.parse import urlparse
+def sanitize_url(url: str) -> str:
+    """
+    Sanitize URL or query string for use in filenames.
+    Removes or replaces characters that are invalid in filenames.
+    """
+    # Remove protocol if it's a URL
+    if url.startswith(("http://", "https://")):
+        parsed = urlparse(url)
+        # Use domain + path
+        url = parsed.netloc + parsed.path
+    # Replace invalid filename characters
+    sanitized = re.sub(r'[<>:"/\\|?*]', "_", url)
+    # Replace multiple underscores with single one
+    sanitized = re.sub(r"_+", "_", sanitized)
+    # Remove leading/trailing underscores and dots
+    sanitized = sanitized.strip("_.")
+    # Limit length to prevent too long filenames
+    if len(sanitized) > 100:
+        sanitized = sanitized[:100]
+    # Ensure we have something
+    if not sanitized:
+        sanitized = "unnamed"
+    return sanitized
+def camel_to_snake(name: str) -> str:
+    """Convert CamelCase (incl. acronyms) to snake_case."""
+    s1 = re.sub(r"(.)([A-Z][a-z0-9]+)", r"\1_\2", name)
+    s2 = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", s1)
+    return s2.replace("__", "_").strip("_").lower()
+def canonical_name_key(
+    obj_or_name: Type[Any] | str,
+    *,
+    max_parent_suffixes: int = 3,
+    extra_suffixes: Iterable[str] = (),
+) -> str:
+    """
+    Produce a canonical snake_case key from a class or name by:
+      1) Starting with the class name (or given string),
+      2) Stripping any trailing parent class names (up to `max_parent_suffixes` from the MRO),
+      3) Stripping any `extra_suffixes`,
+      4) Converting to snake_case.
+    Examples (given typical MROs):
+      FinalReportDocument(WorkflowDocument -> Document) -> 'final_report'
+      FooWorkflowDocument(WorkflowDocument -> Document) -> 'foo'
+      BarFlow(Config -> Base -> Flow) -> 'bar'
+    """
+    name = obj_or_name.__name__ if isinstance(obj_or_name, type) else str(obj_or_name)
+    # From MRO, collect up to N parent names to consider as removable suffixes
+    suffixes: list[str] = []
+    if isinstance(obj_or_name, type):
+        for base in obj_or_name.mro()[1 : 1 + max_parent_suffixes]:
+            if base is object:
+                continue
+            suffixes.append(base.__name__)
+    # Add any custom suffixes the caller wants to strip (e.g., 'Config')
+    suffixes.extend(extra_suffixes)
+    # Iteratively trim the longest matching suffix first
+    trimmed = True
+    while trimmed and suffixes:
+        trimmed = False
+        for sfx in sorted(set(suffixes), key=len, reverse=True):
+            if sfx and name.endswith(sfx):
+                name = name[: -len(sfx)]
+                trimmed = True
+                break
+    return camel_to_snake(name)

{ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/ai_pipeline_core/llm/client.py RENAMED Viewed

@@ -162,7 +162,7 @@ async def generate(
 T = TypeVar("T", bound=BaseModel)
-@trace
+@trace(ignore_inputs=["context"])
 async def generate_structured(
     model: ModelName,
     response_format: type[T],

{ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "ai-pipeline-core"
-version = "0.1.1"
+version = "0.1.3"
 description = "Core utilities for AI-powered processing pipelines using prefect"
 readme = "README.md"
 license = {text = "MIT"}
@@ -140,7 +140,7 @@ reportIncompatibleVariableOverride = "error"
 reportMissingParameterType = "warning"
 [tool.bumpversion]
-current_version = "0.1.1"
+current_version = "0.1.3"
 commit = true
 tag = true
 tag_name = "v{new_version}"

ai_pipeline_core-0.1.1/ai_pipeline_core/documents/utils.py DELETED Viewed

@@ -1,33 +0,0 @@
-import re
-from urllib.parse import urlparse
-def sanitize_url(url: str) -> str:
-    """
-    Sanitize URL or query string for use in filenames.
-    Removes or replaces characters that are invalid in filenames.
-    """
-    # Remove protocol if it's a URL
-    if url.startswith(("http://", "https://")):
-        parsed = urlparse(url)
-        # Use domain + path
-        url = parsed.netloc + parsed.path
-    # Replace invalid filename characters
-    sanitized = re.sub(r'[<>:"/\\|?*]', "_", url)
-    # Replace multiple underscores with single one
-    sanitized = re.sub(r"_+", "_", sanitized)
-    # Remove leading/trailing underscores and dots
-    sanitized = sanitized.strip("_.")
-    # Limit length to prevent too long filenames
-    if len(sanitized) > 100:
-        sanitized = sanitized[:100]
-    # Ensure we have something
-    if not sanitized:
-        sanitized = "unnamed"
-    return sanitized