PyPI - ai-pipeline-core - Versions diffs - 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl - Mend

ai-pipeline-core 0.1.5py3-none-any.whl → 0.1.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

ai_pipeline_core/__init__.py +54 -13
ai_pipeline_core/documents/__init__.py +3 -0
ai_pipeline_core/documents/document.py +57 -3
ai_pipeline_core/documents/mime_type.py +64 -32
ai_pipeline_core/flow/__init__.py +5 -1
ai_pipeline_core/flow/options.py +26 -0
ai_pipeline_core/llm/client.py +5 -3
ai_pipeline_core/pipeline.py +418 -0
ai_pipeline_core/prefect.py +7 -0
ai_pipeline_core/simple_runner/__init__.py +19 -0
ai_pipeline_core/simple_runner/cli.py +95 -0
ai_pipeline_core/simple_runner/simple_runner.py +147 -0
ai_pipeline_core/tracing.py +63 -20
{ai_pipeline_core-0.1.5.dist-info → ai_pipeline_core-0.1.7.dist-info}/METADATA +92 -30
{ai_pipeline_core-0.1.5.dist-info → ai_pipeline_core-0.1.7.dist-info}/RECORD +17 -11
{ai_pipeline_core-0.1.5.dist-info → ai_pipeline_core-0.1.7.dist-info}/WHEEL +0 -0
{ai_pipeline_core-0.1.5.dist-info → ai_pipeline_core-0.1.7.dist-info}/licenses/LICENSE +0 -0

ai_pipeline_core/__init__.py CHANGED Viewed

@@ -1,7 +1,23 @@
 """Pipeline Core - Shared infrastructure for AI pipelines."""
-from .documents import Document, DocumentList, FlowDocument, TaskDocument
-from .flow import FlowConfig
+from . import llm
+from .documents import (
+    Document,
+    DocumentList,
+    FlowDocument,
+    TaskDocument,
+    canonical_name_key,
+    sanitize_url,
+)
+from .flow import FlowConfig, FlowOptions
+from .llm import (
+    AIMessages,
+    AIMessageType,
+    ModelName,
+    ModelOptions,
+    ModelResponse,
+    StructuredModelResponse,
+)
 from .logging import (
     LoggerMixin,
     LoggingConfig,
@@ -9,28 +25,53 @@ from .logging import (
     get_pipeline_logger,
     setup_logging,
 )
-from .logging import (
-    get_pipeline_logger as get_logger,
-)
+from .logging import get_pipeline_logger as get_logger
+from .pipeline import pipeline_flow, pipeline_task
+from .prefect import flow, task
 from .prompt_manager import PromptManager
 from .settings import settings
-from .tracing import trace
+from .tracing import TraceInfo, TraceLevel, trace
-__version__ = "0.1.4"
+__version__ = "0.1.7"
 __all__ = [
-    "Document",
-    "DocumentList",
-    "FlowConfig",
-    "FlowDocument",
+    # Config/Settings
+    "settings",
+    # Logging
     "get_logger",
     "get_pipeline_logger",
     "LoggerMixin",
     "LoggingConfig",
-    "PromptManager",
-    "settings",
     "setup_logging",
     "StructuredLoggerMixin",
+    # Documents
+    "Document",
+    "DocumentList",
+    "FlowDocument",
     "TaskDocument",
+    "canonical_name_key",
+    "sanitize_url",
+    # Flow/Task
+    "FlowConfig",
+    "FlowOptions",
+    # Prefect decorators (clean, no tracing)
+    "task",
+    "flow",
+    # Pipeline decorators (with tracing)
+    "pipeline_task",
+    "pipeline_flow",
+    # LLM
+    "llm",
+    "ModelName",
+    "ModelOptions",
+    "ModelResponse",
+    "StructuredModelResponse",
+    "AIMessages",
+    "AIMessageType",
+    # Tracing
     "trace",
+    "TraceLevel",
+    "TraceInfo",
+    # Utils
+    "PromptManager",
 ]

ai_pipeline_core/documents/__init__.py CHANGED Viewed

@@ -2,10 +2,13 @@ from .document import Document
 from .document_list import DocumentList
 from .flow_document import FlowDocument
 from .task_document import TaskDocument
+from .utils import canonical_name_key, sanitize_url
 __all__ = [
     "Document",
     "DocumentList",
     "FlowDocument",
     "TaskDocument",
+    "canonical_name_key",
+    "sanitize_url",
 ]

ai_pipeline_core/documents/document.py CHANGED Viewed

@@ -6,7 +6,7 @@ from abc import ABC, abstractmethod
 from base64 import b32encode
 from enum import StrEnum
 from functools import cached_property
-from typing import Any, ClassVar, Literal, Self
+from typing import Any, ClassVar, Literal, Self, TypeVar
 from pydantic import BaseModel, ConfigDict, field_serializer, field_validator
 from ruamel.yaml import YAML
@@ -19,8 +19,11 @@ from .mime_type import (
     is_image_mime_type,
     is_pdf_mime_type,
     is_text_mime_type,
+    is_yaml_mime_type,
 )
+TModel = TypeVar("TModel", bound=BaseModel)
 class Document(BaseModel, ABC):
     """Abstract base class for all documents"""
@@ -207,15 +210,40 @@ class Document(BaseModel, ABC):
         """Parse document as JSON"""
         return json.loads(self.as_text())
+    def as_pydantic_model(self, model_type: type[TModel]) -> TModel:
+        """Parse document as a pydantic model and return the validated instance"""
+        data = self.as_yaml() if is_yaml_mime_type(self.mime_type) else self.as_json()
+        return model_type.model_validate(data)
     def as_markdown_list(self) -> list[str]:
         """Parse document as a markdown list"""
         return self.as_text().split(self.MARKDOWN_LIST_SEPARATOR)
     @classmethod
-    def create(cls, name: str, description: str | None, content: bytes | str) -> Self:
+    def create(
+        cls,
+        name: str,
+        description: str | None,
+        content: bytes | str | BaseModel | list[str] | Any,
+    ) -> Self:
         """Create a document from a name, description, and content"""
-        if isinstance(content, str):
+        is_yaml_extension = name.endswith(".yaml") or name.endswith(".yml")
+        is_json_extension = name.endswith(".json")
+        is_markdown_extension = name.endswith(".md")
+        is_str_list = isinstance(content, list) and all(isinstance(item, str) for item in content)
+        if isinstance(content, bytes):
+            pass
+        elif isinstance(content, str):
             content = content.encode("utf-8")
+        elif is_str_list and is_markdown_extension:
+            return cls.create_as_markdown_list(name, description, content)  # type: ignore[arg-type]
+        elif is_yaml_extension:
+            return cls.create_as_yaml(name, description, content)
+        elif is_json_extension:
+            return cls.create_as_json(name, description, content)
+        else:
+            raise ValueError(f"Unsupported content type: {type(content)} for {name}")
         return cls(name=name, description=description, content=content)
     @classmethod
@@ -230,6 +258,32 @@ class Document(BaseModel, ABC):
         content = Document.MARKDOWN_LIST_SEPARATOR.join(cleaned_items)
         return cls.create(name, description, content)
+    @classmethod
+    def create_as_json(cls, name: str, description: str | None, data: Any) -> Self:
+        """Create a document from a name, description, and JSON data"""
+        assert name.endswith(".json"), f"Document name must end with .json: {name}"
+        if isinstance(data, BaseModel):
+            data = data.model_dump(mode="json")
+        content = json.dumps(data, indent=2).encode("utf-8")
+        return cls.create(name, description, content)
+    @classmethod
+    def create_as_yaml(cls, name: str, description: str | None, data: Any) -> Self:
+        """Create a document from a name, description, and YAML data"""
+        assert name.endswith(".yaml") or name.endswith(".yml"), (
+            f"Document name must end with .yaml or .yml: {name}"
+        )
+        if isinstance(data, BaseModel):
+            data = data.model_dump()
+        yaml = YAML()
+        yaml.indent(mapping=2, sequence=4, offset=2)
+        from io import BytesIO
+        stream = BytesIO()
+        yaml.dump(data, stream)
+        content = stream.getvalue()
+        return cls.create(name, description, content)
     def serialize_model(self) -> dict[str, Any]:
         """Serialize document to a dictionary with proper encoding."""
         result = {

ai_pipeline_core/documents/mime_type.py CHANGED Viewed

@@ -6,53 +6,75 @@ from ai_pipeline_core.logging import get_pipeline_logger
 logger = get_pipeline_logger(__name__)
+# Extension to MIME type mapping for common formats
+# These are formats where extension-based detection is more reliable
+EXTENSION_MIME_MAP = {
+    "md": "text/markdown",
+    "txt": "text/plain",
+    "pdf": "application/pdf",
+    "png": "image/png",
+    "jpg": "image/jpeg",
+    "jpeg": "image/jpeg",
+    "gif": "image/gif",
+    "bmp": "image/bmp",
+    "webp": "image/webp",
+    "json": "application/json",
+    "yaml": "application/yaml",
+    "yml": "application/yaml",
+    "xml": "text/xml",
+    "html": "text/html",
+    "htm": "text/html",
+    "py": "text/x-python",
+    "css": "text/css",
+    "js": "application/javascript",
+    "ts": "application/typescript",
+    "tsx": "application/typescript",
+    "jsx": "application/javascript",
+}
 def detect_mime_type(content: bytes, name: str) -> str:
-    """Detect MIME type from content using python-magic"""
+    """Detect MIME type from content and filename
-    try:
-        if name.endswith(".md") and content.decode("utf-8"):
-            return "text/markdown"
-    except UnicodeDecodeError:
-        pass
+    Uses a hybrid approach:
+    1. Check for empty content
+    2. Try extension-based detection for known formats
+    3. Fall back to magic content detection
+    4. Final fallback to application/octet-stream
+    """
-    if len(content) <= 4:
+    # Check for empty content
+    if len(content) == 0:
         return "application/x-empty"
+    # Try extension-based detection first for known formats
+    # This is more reliable for text formats that magic might misidentify
+    ext = name.lower().split(".")[-1] if "." in name else ""
+    if ext in EXTENSION_MIME_MAP:
+        return EXTENSION_MIME_MAP[ext]
+    # Try content-based detection with magic
     try:
         mime = magic.from_buffer(content[:1024], mime=True)
-        return mime
+        # If magic returns a valid mime type, use it
+        if mime and mime != "application/octet-stream":
+            return mime
     except (AttributeError, OSError, magic.MagicException) as e:
-        logger.warning(f"MIME detection failed for {name}: {e}, falling back to extension")
-        return mime_type_from_extension(name)
+        logger.warning(f"MIME detection failed for {name}: {e}")
     except Exception as e:
         logger.error(f"Unexpected error in MIME detection for {name}: {e}")
-        return mime_type_from_extension(name)
+    # Final fallback based on extension or default
+    return EXTENSION_MIME_MAP.get(ext, "application/octet-stream")
 def mime_type_from_extension(name: str) -> str:
-    """Get MIME type based on file extension"""
-    ext = name.lower().split(".")[-1] if "." in name else ""
+    """Get MIME type based on file extension
-    mime_map = {
-        "md": "text/markdown",
-        "txt": "text/plain",
-        "pdf": "application/pdf",
-        "png": "image/png",
-        "jpg": "image/jpeg",
-        "jpeg": "image/jpeg",
-        "gif": "image/gif",
-        "bmp": "image/bmp",
-        "webp": "image/webp",
-        "json": "application/json",
-        "yaml": "application/yaml",
-        "yml": "application/yaml",
-        "xml": "text/xml",
-        "html": "text/html",
-        "htm": "text/html",
-    }
-    return mime_map.get(ext, "application/octet-stream")
+    Legacy function kept for compatibility
+    """
+    ext = name.lower().split(".")[-1] if "." in name else ""
+    return EXTENSION_MIME_MAP.get(ext, "application/octet-stream")
 def is_text_mime_type(mime_type: str) -> bool:
@@ -68,6 +90,16 @@ def is_text_mime_type(mime_type: str) -> bool:
     return any(mime_type.startswith(t) for t in text_types)
+def is_json_mime_type(mime_type: str) -> bool:
+    """Check if MIME type is JSON"""
+    return mime_type == "application/json"
+def is_yaml_mime_type(mime_type: str) -> bool:
+    """Check if MIME type is YAML"""
+    return mime_type == "application/yaml" or mime_type == "application/x-yaml"
 def is_pdf_mime_type(mime_type: str) -> bool:
     """Check if MIME type is PDF"""
     return mime_type == "application/pdf"

ai_pipeline_core/flow/__init__.py CHANGED Viewed

@@ -1,3 +1,7 @@
 from .config import FlowConfig
+from .options import FlowOptions
-__all__ = ["FlowConfig"]
+__all__ = [
+    "FlowConfig",
+    "FlowOptions",
+]

ai_pipeline_core/flow/options.py ADDED Viewed

@@ -0,0 +1,26 @@
+from typing import TypeVar
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+from ai_pipeline_core.llm import ModelName
+T = TypeVar("T", bound="FlowOptions")
+class FlowOptions(BaseSettings):
+    """Base configuration for AI Pipeline flows."""
+    core_model: ModelName | str = Field(
+        default="gpt-5",
+        description="Primary model for complex analysis and generation tasks.",
+    )
+    small_model: ModelName | str = Field(
+        default="gpt-5-mini",
+        description="Fast, cost-effective model for simple tasks and orchestration.",
+    )
+    model_config = SettingsConfigDict(frozen=True, extra="ignore")
+__all__ = ["FlowOptions"]

ai_pipeline_core/llm/client.py CHANGED Viewed

@@ -118,11 +118,13 @@ async def _generate_with_retry(
                 span.set_attributes(response.get_laminar_metadata())
                 Laminar.set_span_output(response.content)
                 if not response.content:
-                    # disable cache in case of empty response
-                    completion_kwargs["extra_body"]["cache"] = {"no-cache": True}
                     raise ValueError(f"Model {model} returned an empty response.")
                 return response
         except (asyncio.TimeoutError, ValueError, Exception) as e:
+            if not isinstance(e, asyncio.TimeoutError):
+                # disable cache if it's not a timeout because it may cause an error
+                completion_kwargs["extra_body"]["cache"] = {"no-cache": True}
             logger.warning(
                 "LLM generation failed (attempt %d/%d): %s",
                 attempt + 1,
@@ -167,7 +169,7 @@ T = TypeVar("T", bound=BaseModel)
 @trace(ignore_inputs=["context"])
 async def generate_structured(
-    model: ModelName,
+    model: ModelName | str,
     response_format: type[T],
     *,
     context: AIMessages = AIMessages(),

ai-pipeline-core 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl

ai-pipeline-core 0.1.5py3-none-any.whl → 0.1.7py3-none-any.whl