PyPI - ai-pipeline-core - Versions diffs - 0.1.4__tar.gz → 0.1.6__tar.gz - Mend

ai-pipeline-core 0.1.4tar.gz → 0.1.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

{ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ai-pipeline-core
-Version: 0.1.4
+Version: 0.1.6
 Summary: Core utilities for AI-powered processing pipelines using prefect
 Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
 Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
@@ -20,7 +20,7 @@ Classifier: Typing :: Typed
 Requires-Python: >=3.12
 Requires-Dist: httpx>=0.28.1
 Requires-Dist: jinja2>=3.1.6
-Requires-Dist: lmnr>=0.7.4
+Requires-Dist: lmnr>=0.7.5
 Requires-Dist: openai>=1.99.9
 Requires-Dist: prefect>=3.4.13
 Requires-Dist: pydantic-settings>=2.10.1
@@ -471,7 +471,7 @@ Built with:
 ## Stability Notice
-**Current Version**: 0.1.2
+**Current Version**: 0.1.6
 **Status**: Internal Preview
 **API Stability**: Unstable - Breaking changes expected
 **Recommended Use**: Learning and reference only

{ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/README.md RENAMED Viewed

@@ -429,7 +429,7 @@ Built with:
 ## Stability Notice
-**Current Version**: 0.1.2
+**Current Version**: 0.1.6
 **Status**: Internal Preview
 **API Stability**: Unstable - Breaking changes expected
 **Recommended Use**: Learning and reference only

{ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/ai_pipeline_core/__init__.py RENAMED Viewed

@@ -16,7 +16,7 @@ from .prompt_manager import PromptManager
 from .settings import settings
 from .tracing import trace
-__version__ = "0.1.2"
+__version__ = "0.1.6"
 __all__ = [
     "Document",

{ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/ai_pipeline_core/documents/document.py RENAMED Viewed

@@ -6,7 +6,7 @@ from abc import ABC, abstractmethod
 from base64 import b32encode
 from enum import StrEnum
 from functools import cached_property
-from typing import Any, ClassVar, Literal, Self
+from typing import Any, ClassVar, Literal, Self, TypeVar
 from pydantic import BaseModel, ConfigDict, field_serializer, field_validator
 from ruamel.yaml import YAML
@@ -19,13 +19,16 @@ from .mime_type import (
     is_image_mime_type,
     is_pdf_mime_type,
     is_text_mime_type,
+    is_yaml_mime_type,
 )
+TModel = TypeVar("TModel", bound=BaseModel)
 class Document(BaseModel, ABC):
     """Abstract base class for all documents"""
-    MAX_CONTENT_SIZE: ClassVar[int] = 10 * 1024 * 1024  # 10MB default
+    MAX_CONTENT_SIZE: ClassVar[int] = 25 * 1024 * 1024  # 25MB default
     DESCRIPTION_EXTENSION: ClassVar[str] = ".description.md"
     MARKDOWN_LIST_SEPARATOR: ClassVar[str] = "\n\n---\n\n"
@@ -105,7 +108,7 @@ class Document(BaseModel, ABC):
         except TypeError:
             raise DocumentNameError(f"{cls.__name__}.FILES must be an Enum of string values")
-        if name not in allowed:
+        if len(allowed) > 0 and name not in allowed:
             allowed_str = ", ".join(sorted(allowed))
             raise DocumentNameError(f"Invalid filename '{name}'. Allowed names: {allowed_str}")
@@ -207,15 +210,40 @@ class Document(BaseModel, ABC):
         """Parse document as JSON"""
         return json.loads(self.as_text())
+    def as_pydantic_model(self, model_type: type[TModel]) -> TModel:
+        """Parse document as a pydantic model and return the validated instance"""
+        data = self.as_yaml() if is_yaml_mime_type(self.mime_type) else self.as_json()
+        return model_type.model_validate(data)
     def as_markdown_list(self) -> list[str]:
         """Parse document as a markdown list"""
         return self.as_text().split(self.MARKDOWN_LIST_SEPARATOR)
     @classmethod
-    def create(cls, name: str, description: str | None, content: bytes | str) -> Self:
+    def create(
+        cls,
+        name: str,
+        description: str | None,
+        content: bytes | str | BaseModel | list[str] | Any,
+    ) -> Self:
         """Create a document from a name, description, and content"""
-        if isinstance(content, str):
+        is_yaml_extension = name.endswith(".yaml") or name.endswith(".yml")
+        is_json_extension = name.endswith(".json")
+        is_markdown_extension = name.endswith(".md")
+        is_str_list = isinstance(content, list) and all(isinstance(item, str) for item in content)
+        if isinstance(content, bytes):
+            pass
+        elif isinstance(content, str):
             content = content.encode("utf-8")
+        elif is_str_list and is_markdown_extension:
+            return cls.create_as_markdown_list(name, description, content)  # type: ignore[arg-type]
+        elif is_yaml_extension:
+            return cls.create_as_yaml(name, description, content)
+        elif is_json_extension:
+            return cls.create_as_json(name, description, content)
+        else:
+            raise ValueError(f"Unsupported content type: {type(content)} for {name}")
         return cls(name=name, description=description, content=content)
     @classmethod
@@ -230,6 +258,32 @@ class Document(BaseModel, ABC):
         content = Document.MARKDOWN_LIST_SEPARATOR.join(cleaned_items)
         return cls.create(name, description, content)
+    @classmethod
+    def create_as_json(cls, name: str, description: str | None, data: Any) -> Self:
+        """Create a document from a name, description, and JSON data"""
+        assert name.endswith(".json"), f"Document name must end with .json: {name}"
+        if isinstance(data, BaseModel):
+            data = data.model_dump(mode="json")
+        content = json.dumps(data, indent=2).encode("utf-8")
+        return cls.create(name, description, content)
+    @classmethod
+    def create_as_yaml(cls, name: str, description: str | None, data: Any) -> Self:
+        """Create a document from a name, description, and YAML data"""
+        assert name.endswith(".yaml") or name.endswith(".yml"), (
+            f"Document name must end with .yaml or .yml: {name}"
+        )
+        if isinstance(data, BaseModel):
+            data = data.model_dump()
+        yaml = YAML()
+        yaml.indent(mapping=2, sequence=4, offset=2)
+        from io import BytesIO
+        stream = BytesIO()
+        yaml.dump(data, stream)
+        content = stream.getvalue()
+        return cls.create(name, description, content)
     def serialize_model(self) -> dict[str, Any]:
         """Serialize document to a dictionary with proper encoding."""
         result = {

ai_pipeline_core-0.1.6/ai_pipeline_core/documents/mime_type.py ADDED Viewed

@@ -0,0 +1,110 @@
+"""MIME type detection utilities for documents"""
+import magic
+from ai_pipeline_core.logging import get_pipeline_logger
+logger = get_pipeline_logger(__name__)
+# Extension to MIME type mapping for common formats
+# These are formats where extension-based detection is more reliable
+EXTENSION_MIME_MAP = {
+    "md": "text/markdown",
+    "txt": "text/plain",
+    "pdf": "application/pdf",
+    "png": "image/png",
+    "jpg": "image/jpeg",
+    "jpeg": "image/jpeg",
+    "gif": "image/gif",
+    "bmp": "image/bmp",
+    "webp": "image/webp",
+    "json": "application/json",
+    "yaml": "application/yaml",
+    "yml": "application/yaml",
+    "xml": "text/xml",
+    "html": "text/html",
+    "htm": "text/html",
+    "py": "text/x-python",
+    "css": "text/css",
+    "js": "application/javascript",
+    "ts": "application/typescript",
+    "tsx": "application/typescript",
+    "jsx": "application/javascript",
+}
+def detect_mime_type(content: bytes, name: str) -> str:
+    """Detect MIME type from content and filename
+    Uses a hybrid approach:
+    1. Check for empty content
+    2. Try extension-based detection for known formats
+    3. Fall back to magic content detection
+    4. Final fallback to application/octet-stream
+    """
+    # Check for empty content
+    if len(content) == 0:
+        return "application/x-empty"
+    # Try extension-based detection first for known formats
+    # This is more reliable for text formats that magic might misidentify
+    ext = name.lower().split(".")[-1] if "." in name else ""
+    if ext in EXTENSION_MIME_MAP:
+        return EXTENSION_MIME_MAP[ext]
+    # Try content-based detection with magic
+    try:
+        mime = magic.from_buffer(content[:1024], mime=True)
+        # If magic returns a valid mime type, use it
+        if mime and mime != "application/octet-stream":
+            return mime
+    except (AttributeError, OSError, magic.MagicException) as e:
+        logger.warning(f"MIME detection failed for {name}: {e}")
+    except Exception as e:
+        logger.error(f"Unexpected error in MIME detection for {name}: {e}")
+    # Final fallback based on extension or default
+    return EXTENSION_MIME_MAP.get(ext, "application/octet-stream")
+def mime_type_from_extension(name: str) -> str:
+    """Get MIME type based on file extension
+    Legacy function kept for compatibility
+    """
+    ext = name.lower().split(".")[-1] if "." in name else ""
+    return EXTENSION_MIME_MAP.get(ext, "application/octet-stream")
+def is_text_mime_type(mime_type: str) -> bool:
+    """Check if MIME type represents text content"""
+    text_types = [
+        "text/",
+        "application/json",
+        "application/xml",
+        "application/javascript",
+        "application/yaml",
+        "application/x-yaml",
+    ]
+    return any(mime_type.startswith(t) for t in text_types)
+def is_json_mime_type(mime_type: str) -> bool:
+    """Check if MIME type is JSON"""
+    return mime_type == "application/json"
+def is_yaml_mime_type(mime_type: str) -> bool:
+    """Check if MIME type is YAML"""
+    return mime_type == "application/yaml" or mime_type == "application/x-yaml"
+def is_pdf_mime_type(mime_type: str) -> bool:
+    """Check if MIME type is PDF"""
+    return mime_type == "application/pdf"
+def is_image_mime_type(mime_type: str) -> bool:
+    """Check if MIME type is an image"""
+    return mime_type.startswith("image/")

{ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/ai_pipeline_core/tracing.py RENAMED Viewed

@@ -103,6 +103,7 @@ def trace(
     ignore_inputs: list[str] | None = None,
     input_formatter: Callable[..., str] | None = None,
     output_formatter: Callable[..., str] | None = None,
+    preserve_global_context: bool = True,
 ) -> Callable[[Callable[P, R]], Callable[P, R]] | Callable[P, R]:
     """Decorator that wires Laminar tracing and observation into a function.
@@ -136,6 +137,7 @@ def trace(
         _ignore_inputs = ignore_inputs
         _input_formatter = input_formatter
         _output_formatter = output_formatter
+        _preserve_global_context = preserve_global_context
         # --- Check debug_only flag and environment variable ---
         if debug_only and os.getenv("LMNR_DEBUG", "").lower() != "true":
@@ -173,6 +175,8 @@ def trace(
                 observe_params["input_formatter"] = _input_formatter
             if _output_formatter is not None:
                 observe_params["output_formatter"] = _output_formatter
+            if _preserve_global_context:
+                observe_params["preserve_global_context"] = _preserve_global_context
             return observe_params

{ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "ai-pipeline-core"
-version = "0.1.4"
+version = "0.1.6"
 description = "Core utilities for AI-powered processing pipelines using prefect"
 readme = "README.md"
 license = {text = "MIT"}
@@ -22,7 +22,7 @@ classifiers = [
 dependencies = [
     "httpx>=0.28.1",
     "Jinja2>=3.1.6",
-    "lmnr>=0.7.4",
+    "lmnr>=0.7.5",
     "openai>=1.99.9",
     "prefect>=3.4.13",
     "pydantic-settings>=2.10.1",
@@ -140,7 +140,7 @@ reportIncompatibleVariableOverride = "error"
 reportMissingParameterType = "warning"
 [tool.bumpversion]
-current_version = "0.1.4"
+current_version = "0.1.6"
 commit = true
 tag = true
 tag_name = "v{new_version}"

ai_pipeline_core-0.1.4/ai_pipeline_core/documents/mime_type.py DELETED Viewed

@@ -1,78 +0,0 @@
-"""MIME type detection utilities for documents"""
-import magic
-from ai_pipeline_core.logging import get_pipeline_logger
-logger = get_pipeline_logger(__name__)
-def detect_mime_type(content: bytes, name: str) -> str:
-    """Detect MIME type from content using python-magic"""
-    try:
-        if name.endswith(".md") and content.decode("utf-8"):
-            return "text/markdown"
-    except UnicodeDecodeError:
-        pass
-    if len(content) <= 4:
-        return "application/x-empty"
-    try:
-        mime = magic.from_buffer(content[:1024], mime=True)
-        return mime
-    except (AttributeError, OSError, magic.MagicException) as e:
-        logger.warning(f"MIME detection failed for {name}: {e}, falling back to extension")
-        return mime_type_from_extension(name)
-    except Exception as e:
-        logger.error(f"Unexpected error in MIME detection for {name}: {e}")
-        return mime_type_from_extension(name)
-def mime_type_from_extension(name: str) -> str:
-    """Get MIME type based on file extension"""
-    ext = name.lower().split(".")[-1] if "." in name else ""
-    mime_map = {
-        "md": "text/markdown",
-        "txt": "text/plain",
-        "pdf": "application/pdf",
-        "png": "image/png",
-        "jpg": "image/jpeg",
-        "jpeg": "image/jpeg",
-        "gif": "image/gif",
-        "bmp": "image/bmp",
-        "webp": "image/webp",
-        "json": "application/json",
-        "yaml": "application/yaml",
-        "yml": "application/yaml",
-        "xml": "text/xml",
-        "html": "text/html",
-        "htm": "text/html",
-    }
-    return mime_map.get(ext, "application/octet-stream")
-def is_text_mime_type(mime_type: str) -> bool:
-    """Check if MIME type represents text content"""
-    text_types = [
-        "text/",
-        "application/json",
-        "application/xml",
-        "application/javascript",
-        "application/yaml",
-        "application/x-yaml",
-    ]
-    return any(mime_type.startswith(t) for t in text_types)
-def is_pdf_mime_type(mime_type: str) -> bool:
-    """Check if MIME type is PDF"""
-    return mime_type == "application/pdf"
-def is_image_mime_type(mime_type: str) -> bool:
-    """Check if MIME type is an image"""
-    return mime_type.startswith("image/")