PyPI - ai-pipeline-core - Versions diffs - 0.1.1__py3-none-any.whl - Mend

ai-pipeline-core 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

ai_pipeline_core/__init__.py +36 -0
ai_pipeline_core/documents/__init__.py +11 -0
ai_pipeline_core/documents/document.py +252 -0
ai_pipeline_core/documents/document_list.py +131 -0
ai_pipeline_core/documents/flow_document.py +21 -0
ai_pipeline_core/documents/mime_type.py +78 -0
ai_pipeline_core/documents/task_document.py +22 -0
ai_pipeline_core/documents/utils.py +33 -0
ai_pipeline_core/exceptions.py +61 -0
ai_pipeline_core/flow/__init__.py +3 -0
ai_pipeline_core/flow/config.py +66 -0
ai_pipeline_core/llm/__init__.py +19 -0
ai_pipeline_core/llm/ai_messages.py +129 -0
ai_pipeline_core/llm/client.py +218 -0
ai_pipeline_core/llm/model_options.py +39 -0
ai_pipeline_core/llm/model_response.py +149 -0
ai_pipeline_core/llm/model_types.py +17 -0
ai_pipeline_core/logging/__init__.py +10 -0
ai_pipeline_core/logging/logging.yml +66 -0
ai_pipeline_core/logging/logging_config.py +154 -0
ai_pipeline_core/logging/logging_mixin.py +223 -0
ai_pipeline_core/prompt_manager.py +115 -0
ai_pipeline_core/py.typed +0 -0
ai_pipeline_core/settings.py +24 -0
ai_pipeline_core/tracing.py +205 -0
ai_pipeline_core-0.1.1.dist-info/METADATA +477 -0
ai_pipeline_core-0.1.1.dist-info/RECORD +29 -0
ai_pipeline_core-0.1.1.dist-info/WHEEL +4 -0
ai_pipeline_core-0.1.1.dist-info/licenses/LICENSE +21 -0

ai_pipeline_core/logging/logging.yml ADDED Viewed

@@ -0,0 +1,66 @@
+# AI Pipeline Core Logging Configuration
+# This configuration integrates with Prefect's logging system
+version: 1
+disable_existing_loggers: false
+formatters:
+  standard:
+    format: "%(asctime)s.%(msecs)03d | %(levelname)-7s | %(name)s - %(message)s"
+    datefmt: "%H:%M:%S"
+  detailed:
+    format: "%(asctime)s | %(levelname)-7s | %(name)s | %(funcName)s:%(lineno)d - %(message)s"
+    datefmt: "%Y-%m-%d %H:%M:%S"
+handlers:
+  console:
+    class: logging.StreamHandler
+    formatter: standard
+    stream: ext://sys.stdout
+  file:
+    class: logging.handlers.RotatingFileHandler
+    formatter: detailed
+    filename: ai_pipeline.log
+    maxBytes: 10485760  # 10MB
+    backupCount: 5
+    encoding: utf-8
+  error_file:
+    class: logging.handlers.RotatingFileHandler
+    formatter: detailed
+    filename: ai_pipeline_errors.log
+    maxBytes: 10485760  # 10MB
+    backupCount: 5
+    level: ERROR
+    encoding: utf-8
+loggers:
+  # AI Pipeline Core loggers
+  ai_pipeline_core:
+    level: INFO
+    handlers: [console]
+    propagate: false
+  ai_pipeline_core.documents:
+    level: INFO
+  ai_pipeline_core.llm:
+    level: INFO
+  ai_pipeline_core.flow:
+    level: INFO
+  ai_pipeline_core.testing:
+    level: DEBUG
+  # External libraries
+  httpx:
+    level: WARNING
+  openai:
+    level: WARNING
+root:
+  level: WARNING
+  handlers: [console, error_file]

ai_pipeline_core/logging/logging_config.py ADDED Viewed

@@ -0,0 +1,154 @@
+"""Centralized logging configuration for AI Pipeline Core using Prefect logging"""
+import os
+from pathlib import Path
+from typing import Any, Dict, Optional
+import yaml
+from prefect.logging import get_logger
+# Default log levels for different components
+DEFAULT_LOG_LEVELS = {
+    "ai_pipeline_core": "INFO",
+    "ai_pipeline_core.documents": "INFO",
+    "ai_pipeline_core.llm": "INFO",
+    "ai_pipeline_core.flow": "INFO",
+    "ai_pipeline_core.testing": "DEBUG",
+}
+class LoggingConfig:
+    """Manages logging configuration for the pipeline using Prefect logging"""
+    def __init__(self, config_path: Optional[Path] = None):
+        self.config_path = config_path or self._get_default_config_path()
+        self._config: Optional[Dict[str, Any]] = None
+    @staticmethod
+    def _get_default_config_path() -> Optional[Path]:
+        """Get default config path from environment or package"""
+        # Check environment variable first
+        if env_path := os.environ.get("AI_PIPELINE_LOGGING_CONFIG"):
+            return Path(env_path)
+        # Check Prefect's setting
+        if prefect_path := os.environ.get("PREFECT_LOGGING_SETTINGS_PATH"):
+            return Path(prefect_path)
+        return None
+    def load_config(self) -> Dict[str, Any]:
+        """Load logging configuration from file"""
+        if self._config is None:
+            if self.config_path and self.config_path.exists():
+                with open(self.config_path, "r") as f:
+                    self._config = yaml.safe_load(f)
+            else:
+                self._config = self._get_default_config()
+        # self._config cannot be None at this point
+        assert self._config is not None
+        return self._config
+    @staticmethod
+    def _get_default_config() -> Dict[str, Any]:
+        """Get default logging configuration compatible with Prefect"""
+        return {
+            "version": 1,
+            "disable_existing_loggers": False,
+            "formatters": {
+                "standard": {
+                    "format": "%(asctime)s.%(msecs)03d | %(levelname)-7s | %(name)s - %(message)s",
+                    "datefmt": "%H:%M:%S",
+                },
+                "detailed": {
+                    "format": (
+                        "%(asctime)s | %(levelname)-7s | %(name)s | "
+                        "%(funcName)s:%(lineno)d - %(message)s"
+                    ),
+                    "datefmt": "%Y-%m-%d %H:%M:%S",
+                },
+            },
+            "handlers": {
+                "console": {
+                    "class": "logging.StreamHandler",
+                    "formatter": "standard",
+                    "stream": "ext://sys.stdout",
+                },
+            },
+            "loggers": {
+                "ai_pipeline_core": {
+                    "level": os.environ.get("AI_PIPELINE_LOG_LEVEL", "INFO"),
+                    "handlers": ["console"],
+                    "propagate": False,
+                },
+            },
+            "root": {
+                "level": "WARNING",
+                "handlers": ["console"],
+            },
+        }
+    def apply(self):
+        """Apply the logging configuration"""
+        import logging.config
+        config = self.load_config()
+        logging.config.dictConfig(config)
+        # Set Prefect logging environment variables if needed
+        if "prefect" in config.get("loggers", {}):
+            prefect_level = config["loggers"]["prefect"].get("level", "INFO")
+            os.environ.setdefault("PREFECT_LOGGING_LEVEL", prefect_level)
+# Global configuration instance
+_logging_config: Optional[LoggingConfig] = None
+def setup_logging(config_path: Optional[Path] = None, level: Optional[str] = None):
+    """
+    Setup logging for the AI Pipeline Core library
+    Args:
+        config_path: Optional path to logging configuration file
+        level: Optional default log level (overrides config)
+    Example:
+        >>> from ai_pipeline_core.logging_config import setup_logging
+        >>> setup_logging(level="DEBUG")
+    """
+    global _logging_config
+    _logging_config = LoggingConfig(config_path)
+    _logging_config.apply()
+    # Override level if provided
+    if level:
+        # Set for our loggers
+        for logger_name in DEFAULT_LOG_LEVELS:
+            logger = get_logger(logger_name)
+            logger.setLevel(level)
+        # Also set for Prefect
+        os.environ["PREFECT_LOGGING_LEVEL"] = level
+def get_pipeline_logger(name: str):
+    """
+    Get a logger for pipeline components using Prefect's get_logger
+    Args:
+        name: Logger name (e.g., "ai_pipeline_core.documents")
+    Returns:
+        Logger instance
+    Example:
+        >>> logger = get_pipeline_logger("ai_pipeline_core.llm")
+        >>> logger.info("Starting LLM processing")
+    """
+    # Ensure logging is setup
+    if _logging_config is None:
+        setup_logging()
+    return get_logger(name)

ai_pipeline_core/logging/logging_mixin.py ADDED Viewed

@@ -0,0 +1,223 @@
+"""Logging mixin for consistent logging across components using Prefect logging"""
+import contextlib
+import time
+from contextlib import contextmanager
+from functools import cached_property
+from typing import Any, Dict, Generator, Optional
+from prefect import get_run_logger
+from prefect.context import FlowRunContext, TaskRunContext
+from prefect.logging import get_logger
+class LoggerMixin:
+    """
+    Mixin class that provides consistent logging functionality using Prefect's logging system
+    Automatically uses appropriate logger based on context:
+    - get_run_logger() when in flow/task context
+    - get_logger() when outside flow/task context
+    """
+    _logger_name: Optional[str] = None
+    @cached_property
+    def logger(self):
+        """Get appropriate logger based on context."""
+        if logger := self._get_run_logger():
+            return logger
+        return get_logger(self._logger_name or self.__class__.__module__)
+    def _get_run_logger(self):
+        """Attempt to get Prefect run logger."""
+        # Intentionally broad: Must handle any exception when checking context
+        with contextlib.suppress(Exception):
+            if FlowRunContext.get() or TaskRunContext.get():
+                return get_run_logger()
+        return None
+    def log_debug(self, message: str, **kwargs: Any) -> None:
+        """Log debug message with optional context"""
+        self.logger.debug(message, extra=kwargs)
+    def log_info(self, message: str, **kwargs: Any) -> None:
+        """Log info message with optional context"""
+        self.logger.info(message, extra=kwargs)
+    def log_warning(self, message: str, **kwargs: Any) -> None:
+        """Log warning message with optional context"""
+        self.logger.warning(message, extra=kwargs)
+    def log_error(self, message: str, exc_info: bool = False, **kwargs: Any) -> None:
+        """Log error message with optional exception info"""
+        self.logger.error(message, exc_info=exc_info, extra=kwargs)
+    def log_critical(self, message: str, exc_info: bool = False, **kwargs: Any) -> None:
+        """Log critical message with optional exception info"""
+        self.logger.critical(message, exc_info=exc_info, extra=kwargs)
+    def log_with_context(self, level: str, message: str, context: Dict[str, Any]) -> None:
+        """
+        Log message with structured context
+        Args:
+            level: Log level (debug, info, warning, error, critical)
+            message: Log message
+            context: Additional context as dictionary
+        Example:
+            self.log_with_context("info", "Processing document", {
+                "document_id": doc.id,
+                "document_size": doc.size,
+                "document_type": doc.type
+            })
+        """
+        log_method = getattr(self.logger, level.lower(), self.logger.info)
+        # Format context for logging
+        context_str = " | ".join(f"{k}={v}" for k, v in context.items())
+        full_message = f"{message} | {context_str}" if context else message
+        log_method(full_message, extra={"context": context})
+class StructuredLoggerMixin(LoggerMixin):
+    """
+    Extended mixin for structured logging with Prefect
+    """
+    def log_event(self, event: str, **kwargs: Any) -> None:
+        """
+        Log a structured event
+        Args:
+            event: Event name
+            **kwargs: Event attributes
+        Example:
+            self.log_event("document_processed",
+                          document_id=doc.id,
+                          duration_ms=processing_time,
+                          status="success")
+        """
+        self.logger.info(event, extra={"event": event, "structured": True, **kwargs})
+    def log_metric(self, metric_name: str, value: float, unit: str = "", **tags: Any) -> None:
+        """
+        Log a metric value
+        Args:
+            metric_name: Name of the metric
+            value: Metric value
+            unit: Unit of measurement
+            **tags: Additional tags
+        Example:
+            self.log_metric("processing_time", 1.23, "seconds",
+                          document_type="pdf", model="gpt-4")
+        """
+        self.logger.info(
+            f"Metric: {metric_name}",
+            extra={
+                "metric": metric_name,
+                "value": value,
+                "unit": unit,
+                "tags": tags,
+                "structured": True,
+            },
+        )
+    def log_span(self, operation: str, duration_ms: float, **attributes: Any) -> None:
+        """
+        Log a span (operation with duration)
+        Args:
+            operation: Operation name
+            duration_ms: Duration in milliseconds
+            **attributes: Additional attributes
+        Example:
+            self.log_span("llm_generation", 1234.5,
+                         model="gpt-4", tokens=500)
+        """
+        self.logger.info(
+            f"Span: {operation}",
+            extra={
+                "span": operation,
+                "duration_ms": duration_ms,
+                "attributes": attributes,
+                "structured": True,
+            },
+        )
+    @contextmanager
+    def log_operation(self, operation: str, **context: Any) -> Generator[None, None, None]:
+        """
+        Context manager for logging operations with timing
+        Args:
+            operation: Operation name
+            **context: Additional context
+        Example:
+            with self.log_operation("document_processing", doc_id=doc.id):
+                process_document(doc)
+        """
+        start_time = time.perf_counter()
+        self.log_debug(f"Starting {operation}", **context)
+        try:
+            yield
+            duration_ms = (time.perf_counter() - start_time) * 1000
+            self.log_info(
+                f"Completed {operation}", duration_ms=duration_ms, status="success", **context
+            )
+        except Exception as e:
+            # Intentionally broad: Context manager must catch all exceptions to log them
+            duration_ms = (time.perf_counter() - start_time) * 1000
+            self.log_error(
+                f"Failed {operation}: {str(e)}",
+                exc_info=True,
+                duration_ms=duration_ms,
+                status="failure",
+                **context,
+            )
+            raise
+class PrefectLoggerMixin(StructuredLoggerMixin):
+    """
+    Enhanced mixin specifically for Prefect flows and tasks
+    """
+    def log_flow_start(self, flow_name: str, parameters: Dict[str, Any]) -> None:
+        """Log flow start with parameters"""
+        self.log_event("flow_started", flow_name=flow_name, parameters=parameters)
+    def log_flow_end(self, flow_name: str, status: str, duration_ms: float) -> None:
+        """Log flow completion"""
+        self.log_event(
+            "flow_completed", flow_name=flow_name, status=status, duration_ms=duration_ms
+        )
+    def log_task_start(self, task_name: str, inputs: Dict[str, Any]) -> None:
+        """Log task start with inputs"""
+        self.log_event("task_started", task_name=task_name, inputs=inputs)
+    def log_task_end(self, task_name: str, status: str, duration_ms: float) -> None:
+        """Log task completion"""
+        self.log_event(
+            "task_completed", task_name=task_name, status=status, duration_ms=duration_ms
+        )
+    def log_retry(self, operation: str, attempt: int, max_attempts: int, error: str) -> None:
+        """Log retry attempt"""
+        self.log_warning(
+            f"Retrying {operation}", attempt=attempt, max_attempts=max_attempts, error=error
+        )
+    def log_checkpoint(self, checkpoint_name: str, **data: Any) -> None:
+        """Log a checkpoint in processing"""
+        self.log_info(f"Checkpoint: {checkpoint_name}", checkpoint=checkpoint_name, **data)

ai_pipeline_core/prompt_manager.py ADDED Viewed

@@ -0,0 +1,115 @@
+from pathlib import Path
+from typing import Any
+import jinja2
+from ai_pipeline_core.logging import get_pipeline_logger
+from .exceptions import PromptNotFoundError, PromptRenderError
+logger = get_pipeline_logger(__name__)
+class PromptManager:
+    """A utility to load and render prompts from a structured directory.
+    Searches for 'prompts' directory in the current directory and parent directories
+    (as long as __init__.py exists in parent directories).
+    """
+    def __init__(self, current_dir: str, prompts_dir: str = "prompts"):
+        """Initialize PromptManager with the current file path.
+        Args:
+            current_dir: The __file__ path of the calling module (required)
+            prompts_dir: Name of the prompts directory to search for (default: "prompts")
+        """
+        search_paths: list[Path] = []
+        # Start from the directory containing the calling file
+        current_path = Path(current_dir).resolve()
+        if current_path.is_file():
+            current_path = current_path.parent
+        # First, add the immediate directory if it has a prompts subdirectory
+        local_prompts = current_path / prompts_dir
+        if local_prompts.is_dir():
+            search_paths.append(local_prompts)
+        # Also add the current directory itself for local templates
+        search_paths.append(current_path)
+        # Search for prompts directory in parent directories
+        # Stop when we can't find __init__.py (indicating we've left the package)
+        parent_path = current_path.parent
+        max_depth = 4  # Reasonable limit to prevent infinite searching
+        depth = 0
+        while depth < max_depth:
+            # Check if we're still within a Python package
+            if not (parent_path / "__init__.py").exists():
+                break
+            # Check if this directory has a prompts subdirectory
+            parent_prompts = parent_path / prompts_dir
+            if parent_prompts.is_dir():
+                search_paths.append(parent_prompts)
+            # Move to the next parent
+            parent_path = parent_path.parent
+            depth += 1
+        # If no prompts directories were found, that's okay - we can still use local templates
+        if not search_paths:
+            search_paths = [current_path]
+        self.search_paths = search_paths
+        # Create Jinja2 environment with all found search paths
+        self.env = jinja2.Environment(
+            loader=jinja2.FileSystemLoader(self.search_paths),
+            trim_blocks=True,
+            lstrip_blocks=True,
+            autoescape=False,  # Important for prompt engineering
+        )
+    def get(self, prompt_path: str, **kwargs: Any) -> str:
+        """
+        Renders a specific prompt with the given context.
+        Args:
+            prompt_path: The path to the prompt file relative to the `prompts`
+                         directory (e.g., 'step_01_process_inputs/summarize_document.jinja2').
+                         The .jinja2 extension will be added automatically if missing.
+            **kwargs: Variables to be injected into the template.
+        Returns:
+            The rendered prompt string.
+        """
+        try:
+            template = self.env.get_template(prompt_path)
+            return template.render(**kwargs)
+        except jinja2.TemplateNotFound:
+            # If the template wasn't found and doesn't end with .jinja2, try adding the extension
+            if not prompt_path.endswith(".jinja2"):
+                try:
+                    template = self.env.get_template(prompt_path + ".jinja2")
+                    return template.render(**kwargs)
+                except jinja2.TemplateNotFound:
+                    pass  # Fall through to the original error
+            if not prompt_path.endswith(".jinja"):
+                try:
+                    template = self.env.get_template(prompt_path + ".jinja")
+                    return template.render(**kwargs)
+                except jinja2.TemplateNotFound:
+                    pass  # Fall through to the original error
+            raise PromptNotFoundError(
+                f"Prompt template '{prompt_path}' not found (searched in {self.search_paths})."
+            )
+        except jinja2.TemplateError as e:
+            raise PromptRenderError(f"Template error in '{prompt_path}': {e}") from e
+        except PromptNotFoundError:
+            raise  # Re-raise our custom exception
+        except (KeyError, TypeError, AttributeError, IOError, ValueError) as e:
+            logger.error(f"Unexpected error rendering '{prompt_path}'", exc_info=True)
+            raise PromptRenderError(f"Failed to render prompt '{prompt_path}': {e}") from e

ai_pipeline_core/py.typed ADDED Viewed

File without changes

ai_pipeline_core/settings.py ADDED Viewed

@@ -0,0 +1,24 @@
+"""Core configuration settings for pipeline operations."""
+from pydantic_settings import BaseSettings, SettingsConfigDict
+class Settings(BaseSettings):
+    """Core settings for pipeline operations."""
+    model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="ignore")
+    # LLM API Configuration
+    openai_base_url: str = ""
+    openai_api_key: str = ""
+    # Prefect Configuration
+    prefect_api_url: str = ""
+    prefect_api_key: str = ""
+    # Observability
+    lmnr_project_api_key: str = ""
+# Create a single, importable instance of the settings
+settings = Settings()