PyPI - ai-pipeline-core - Versions diffs - 0.1.12__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

ai-pipeline-core 0.1.12py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

ai_pipeline_core/__init__.py +83 -119
ai_pipeline_core/deployment/__init__.py +34 -0
ai_pipeline_core/deployment/base.py +861 -0
ai_pipeline_core/deployment/contract.py +80 -0
ai_pipeline_core/deployment/deploy.py +561 -0
ai_pipeline_core/deployment/helpers.py +97 -0
ai_pipeline_core/deployment/progress.py +126 -0
ai_pipeline_core/deployment/remote.py +116 -0
ai_pipeline_core/docs_generator/__init__.py +54 -0
ai_pipeline_core/docs_generator/__main__.py +5 -0
ai_pipeline_core/docs_generator/cli.py +196 -0
ai_pipeline_core/docs_generator/extractor.py +324 -0
ai_pipeline_core/docs_generator/guide_builder.py +644 -0
ai_pipeline_core/docs_generator/trimmer.py +35 -0
ai_pipeline_core/docs_generator/validator.py +114 -0
ai_pipeline_core/document_store/__init__.py +13 -0
ai_pipeline_core/document_store/_summary.py +9 -0
ai_pipeline_core/document_store/_summary_worker.py +170 -0
ai_pipeline_core/document_store/clickhouse.py +492 -0
ai_pipeline_core/document_store/factory.py +38 -0
ai_pipeline_core/document_store/local.py +312 -0
ai_pipeline_core/document_store/memory.py +85 -0
ai_pipeline_core/document_store/protocol.py +68 -0
ai_pipeline_core/documents/__init__.py +14 -15
ai_pipeline_core/documents/_context_vars.py +85 -0
ai_pipeline_core/documents/_hashing.py +52 -0
ai_pipeline_core/documents/attachment.py +85 -0
ai_pipeline_core/documents/context.py +128 -0
ai_pipeline_core/documents/document.py +349 -1062
ai_pipeline_core/documents/mime_type.py +40 -85
ai_pipeline_core/documents/utils.py +62 -7
ai_pipeline_core/exceptions.py +10 -62
ai_pipeline_core/images/__init__.py +309 -0
ai_pipeline_core/images/_processing.py +151 -0
ai_pipeline_core/llm/__init__.py +5 -3
ai_pipeline_core/llm/ai_messages.py +284 -73
ai_pipeline_core/llm/client.py +462 -209
ai_pipeline_core/llm/model_options.py +86 -53
ai_pipeline_core/llm/model_response.py +187 -241
ai_pipeline_core/llm/model_types.py +34 -54
ai_pipeline_core/logging/__init__.py +2 -9
ai_pipeline_core/logging/logging.yml +1 -1
ai_pipeline_core/logging/logging_config.py +27 -43
ai_pipeline_core/logging/logging_mixin.py +17 -51
ai_pipeline_core/observability/__init__.py +32 -0
ai_pipeline_core/observability/_debug/__init__.py +30 -0
ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
ai_pipeline_core/observability/_debug/_config.py +95 -0
ai_pipeline_core/observability/_debug/_content.py +764 -0
ai_pipeline_core/observability/_debug/_processor.py +98 -0
ai_pipeline_core/observability/_debug/_summary.py +312 -0
ai_pipeline_core/observability/_debug/_types.py +75 -0
ai_pipeline_core/observability/_debug/_writer.py +843 -0
ai_pipeline_core/observability/_document_tracking.py +146 -0
ai_pipeline_core/observability/_initialization.py +194 -0
ai_pipeline_core/observability/_logging_bridge.py +57 -0
ai_pipeline_core/observability/_summary.py +81 -0
ai_pipeline_core/observability/_tracking/__init__.py +6 -0
ai_pipeline_core/observability/_tracking/_client.py +178 -0
ai_pipeline_core/observability/_tracking/_internal.py +28 -0
ai_pipeline_core/observability/_tracking/_models.py +138 -0
ai_pipeline_core/observability/_tracking/_processor.py +158 -0
ai_pipeline_core/observability/_tracking/_service.py +311 -0
ai_pipeline_core/observability/_tracking/_writer.py +229 -0
ai_pipeline_core/observability/tracing.py +640 -0
ai_pipeline_core/pipeline/__init__.py +10 -0
ai_pipeline_core/pipeline/decorators.py +915 -0
ai_pipeline_core/pipeline/options.py +16 -0
ai_pipeline_core/prompt_manager.py +26 -105
ai_pipeline_core/settings.py +41 -32
ai_pipeline_core/testing.py +9 -0
ai_pipeline_core-0.4.1.dist-info/METADATA +807 -0
ai_pipeline_core-0.4.1.dist-info/RECORD +76 -0
{ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.4.1.dist-info}/WHEEL +1 -1
ai_pipeline_core/documents/document_list.py +0 -240
ai_pipeline_core/documents/flow_document.py +0 -128
ai_pipeline_core/documents/task_document.py +0 -133
ai_pipeline_core/documents/temporary_document.py +0 -95
ai_pipeline_core/flow/__init__.py +0 -9
ai_pipeline_core/flow/config.py +0 -314
ai_pipeline_core/flow/options.py +0 -75
ai_pipeline_core/pipeline.py +0 -717
ai_pipeline_core/prefect.py +0 -54
ai_pipeline_core/simple_runner/__init__.py +0 -24
ai_pipeline_core/simple_runner/cli.py +0 -255
ai_pipeline_core/simple_runner/simple_runner.py +0 -385
ai_pipeline_core/tracing.py +0 -475
ai_pipeline_core-0.1.12.dist-info/METADATA +0 -450
ai_pipeline_core-0.1.12.dist-info/RECORD +0 -36
{ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.4.1.dist-info}/licenses/LICENSE +0 -0

ai_pipeline_core/flow/__init__.py DELETED Viewed

@@ -1,9 +0,0 @@
-"""Flow configuration and options for Prefect-based pipeline flows."""
-from .config import FlowConfig
-from .options import FlowOptions
-__all__ = [
-    "FlowConfig",
-    "FlowOptions",
-]

ai_pipeline_core/flow/config.py DELETED Viewed

@@ -1,314 +0,0 @@
-"""Flow configuration system for type-safe pipeline definitions.
-@public
-This module provides the FlowConfig abstract base class that enforces
-type safety for flow inputs and outputs in the pipeline system.
-Best Practice:
-    Always finish @pipeline_flow functions with create_and_validate_output()
-    to ensure type safety and proper validation of output documents.
-"""
-from abc import ABC
-from typing import Any, ClassVar, Iterable
-from ai_pipeline_core.documents import DocumentList, FlowDocument
-from ai_pipeline_core.exceptions import DocumentValidationError
-class FlowConfig(ABC):
-    """Abstract base class for type-safe flow configuration.
-    @public
-    FlowConfig defines the contract for flow inputs and outputs, ensuring
-    type safety and preventing circular dependencies in pipeline flows.
-    Each flow must have a corresponding FlowConfig subclass that specifies
-    its input document types and output document type.
-    CRITICAL RULE: OUTPUT_DOCUMENT_TYPE must NEVER be in INPUT_DOCUMENT_TYPES!
-        This prevents circular dependencies as flows chain together.
-        Each flow transforms input types to a DIFFERENT output type.
-    Class Variables:
-        INPUT_DOCUMENT_TYPES: List of FlowDocument types this flow accepts
-        OUTPUT_DOCUMENT_TYPE: Single FlowDocument type this flow produces
-    Validation Rules:
-        - INPUT_DOCUMENT_TYPES and OUTPUT_DOCUMENT_TYPE must be defined
-        - OUTPUT_DOCUMENT_TYPE cannot be in INPUT_DOCUMENT_TYPES (prevents cycles)
-        - Field names must be exact (common typos are detected)
-    Why this matters:
-        Flows connect in pipelines where one flow's output becomes another's input.
-        Same input/output types would create infinite loops or circular dependencies.
-    Example:
-        >>> # CORRECT - Different output type from inputs
-        >>> class ProcessingFlowConfig(FlowConfig):
-        ...     INPUT_DOCUMENT_TYPES = [RawDataDocument]
-        ...     OUTPUT_DOCUMENT_TYPE = ProcessedDocument  # Different type!
-        >>>
-        >>> # Use in @pipeline_flow - RECOMMENDED PATTERN
-        >>> @pipeline_flow(name="processing")
-        >>> async def process(config: ProcessingFlowConfig, docs: DocumentList) -> DocumentList:
-        ...     outputs = []
-        ...     # ... processing logic ...
-        ...     return config.create_and_validate_output(outputs)
-        >>> # WRONG - Will raise TypeError
-        >>> class BadConfig(FlowConfig):
-        ...     INPUT_DOCUMENT_TYPES = [DataDocument]
-        ...     OUTPUT_DOCUMENT_TYPE = DataDocument  # SAME TYPE - NOT ALLOWED!
-    Note:
-        - Validation happens at class definition time
-        - Helps catch configuration errors early
-        - Used by simple_runner to manage document flow
-    """
-    INPUT_DOCUMENT_TYPES: ClassVar[list[type[FlowDocument]]]
-    OUTPUT_DOCUMENT_TYPE: ClassVar[type[FlowDocument]]
-    def __init_subclass__(cls, **kwargs: Any):
-        """Validate flow configuration at subclass definition time.
-        Performs comprehensive validation when a FlowConfig subclass is defined:
-        1. Checks for common field name mistakes (typos)
-        2. Ensures required fields are defined
-        3. Prevents circular dependencies (output != input)
-        Args:
-            **kwargs: Additional arguments for parent __init_subclass__.
-        Raises:
-            TypeError: If configuration violates any validation rules:
-                      - Missing required fields
-                      - Incorrect field names
-                      - Circular dependency detected
-        Note:
-            This runs at class definition time, not instantiation,
-            providing immediate feedback during development.
-        """
-        super().__init_subclass__(**kwargs)
-        # Skip validation for the abstract base class itself
-        if cls.__name__ == "FlowConfig":
-            return
-        # Check for invalid field names (common mistakes)
-        allowed_fields = {"INPUT_DOCUMENT_TYPES", "OUTPUT_DOCUMENT_TYPE"}
-        class_attrs = {name for name in dir(cls) if not name.startswith("_") and name.isupper()}
-        # Find fields that look like they might be mistakes
-        suspicious_fields = class_attrs - allowed_fields
-        common_mistakes = {
-            "OUTPUT_DOCUMENT_TYPES": "OUTPUT_DOCUMENT_TYPE",
-            "INPUT_DOCUMENT_TYPE": "INPUT_DOCUMENT_TYPES",
-        }
-        for field in suspicious_fields:
-            # Skip inherited attributes from parent classes
-            if any(hasattr(base, field) for base in cls.__bases__):
-                continue
-            if field in common_mistakes:
-                raise TypeError(
-                    f"FlowConfig {cls.__name__}: Found '{field}' but expected "
-                    f"'{common_mistakes[field]}'. Please use the correct field name."
-                )
-            elif "DOCUMENT" in field:
-                raise TypeError(
-                    f"FlowConfig {cls.__name__}: Invalid field '{field}'. "
-                    f"Only 'INPUT_DOCUMENT_TYPES' and 'OUTPUT_DOCUMENT_TYPE' are allowed."
-                )
-        # Ensure required attributes are defined
-        if not hasattr(cls, "INPUT_DOCUMENT_TYPES"):
-            raise TypeError(f"FlowConfig {cls.__name__} must define INPUT_DOCUMENT_TYPES")
-        if not hasattr(cls, "OUTPUT_DOCUMENT_TYPE"):
-            raise TypeError(f"FlowConfig {cls.__name__} must define OUTPUT_DOCUMENT_TYPE")
-        # Validate that output type is not in input types
-        if cls.OUTPUT_DOCUMENT_TYPE in cls.INPUT_DOCUMENT_TYPES:
-            raise TypeError(
-                f"FlowConfig {cls.__name__}: OUTPUT_DOCUMENT_TYPE "
-                f"({cls.OUTPUT_DOCUMENT_TYPE.__name__}) cannot be in INPUT_DOCUMENT_TYPES"
-            )
-    @classmethod
-    def get_input_document_types(cls) -> list[type[FlowDocument]]:
-        """Get the list of input document types this flow accepts.
-        Returns:
-            List of FlowDocument subclasses that this flow requires
-            as input.
-        Example:
-            >>> types = MyFlowConfig.get_input_document_types()
-            >>> print([t.__name__ for t in types])
-            ['InputDoc', 'ConfigDoc']
-        """
-        return cls.INPUT_DOCUMENT_TYPES
-    @classmethod
-    def get_output_document_type(cls) -> type[FlowDocument]:
-        """Get the output document type this flow produces.
-        Returns:
-            Single FlowDocument subclass that this flow outputs.
-        Example:
-            >>> output_type = MyFlowConfig.get_output_document_type()
-            >>> print(output_type.__name__)
-            'ProcessedDataDocument'
-        """
-        return cls.OUTPUT_DOCUMENT_TYPE
-    @classmethod
-    def has_input_documents(cls, documents: DocumentList) -> bool:
-        """Check if all required input documents are present.
-        Verifies that the document list contains at least one instance
-        of each required input document type.
-        Args:
-            documents: DocumentList to check for required inputs.
-        Returns:
-            True if all required document types are present,
-            False if any are missing.
-        Example:
-            >>> docs = DocumentList([input_doc, config_doc])
-            >>> if MyFlowConfig.has_input_documents(docs):
-            ...     # Safe to proceed with flow
-            ...     pass
-        Note:
-            Use this before get_input_documents() to avoid exceptions.
-        """
-        for doc_cls in cls.INPUT_DOCUMENT_TYPES:
-            if not any(isinstance(doc, doc_cls) for doc in documents):
-                return False
-        return True
-    @classmethod
-    def get_input_documents(cls, documents: DocumentList) -> DocumentList:
-        """Extract and return all required input documents.
-        Filters the provided document list to return only documents
-        matching the required input types. Returns all matching documents,
-        not just the first of each type.
-        Args:
-            documents: DocumentList containing mixed document types.
-        Returns:
-            DocumentList containing only the required input documents.
-        Raises:
-            ValueError: If any required document type is missing.
-        Example:
-            >>> all_docs = DocumentList([input1, input2, other_doc])
-            >>> input_docs = MyFlowConfig.get_input_documents(all_docs)
-            >>> len(input_docs)  # Contains only input1 and input2
-            2
-        Note:
-            Call has_input_documents() first to check availability.
-        """
-        input_documents = DocumentList()
-        for doc_cls in cls.INPUT_DOCUMENT_TYPES:
-            filtered_documents = [doc for doc in documents if isinstance(doc, doc_cls)]
-            if not filtered_documents:
-                raise ValueError(f"No input document found for class {doc_cls.__name__}")
-            input_documents.extend(filtered_documents)
-        return input_documents
-    @classmethod
-    def validate_output_documents(cls, documents: Any) -> None:
-        """Validate that output documents match the expected type.
-        Ensures all documents in the list are instances of the
-        declared OUTPUT_DOCUMENT_TYPE.
-        Args:
-            documents: DocumentList to validate.
-        Raises:
-            DocumentValidationError: If documents is not a DocumentList or if any
-                document has incorrect type.
-        Example:
-            >>> output = DocumentList([ProcessedDoc(...)])
-            >>> MyFlowConfig.validate_output_documents(output)
-            >>> # No exception means valid
-        Note:
-            Used internally by create_and_validate_output().
-            Uses explicit exceptions for validation (works with python -O).
-        """
-        if not isinstance(documents, DocumentList):
-            raise DocumentValidationError("Documents must be a DocumentList")
-        output_document_class = cls.get_output_document_type()
-        for doc in documents:
-            if not isinstance(doc, output_document_class):
-                raise DocumentValidationError(
-                    f"Document '{doc.name}' has incorrect type. "
-                    f"Expected: {output_document_class.__name__}, "
-                    f"Got: {type(doc).__name__}"
-                )
-    @classmethod
-    def create_and_validate_output(
-        cls, output: FlowDocument | Iterable[FlowDocument] | DocumentList
-    ) -> DocumentList:
-        """Create and validate flow output documents.
-        @public
-        RECOMMENDED: Always use this method at the end of @pipeline_flow functions
-        to ensure type safety and proper output validation.
-        Convenience method that wraps output in a DocumentList if needed
-        and validates it matches the expected OUTPUT_DOCUMENT_TYPE.
-        Args:
-            output: Single document, iterable of documents, or DocumentList.
-        Returns:
-            Validated DocumentList containing the output documents.
-        Raises:
-            DocumentValidationError: If output type doesn't match OUTPUT_DOCUMENT_TYPE.
-        Example:
-            >>> @pipeline_flow(name="my_flow")
-            >>> async def process_flow(config: MyFlowConfig, ...) -> DocumentList:
-            >>>     outputs = []
-            >>>     # ... processing logic ...
-            >>>     outputs.append(OutputDoc(...))
-            >>>
-            >>>     # Always finish with this validation
-            >>>     return config.create_and_validate_output(outputs)
-        Note:
-            This is the recommended pattern for all @pipeline_flow functions.
-            It ensures type safety and catches output errors immediately.
-        """
-        documents: DocumentList
-        if isinstance(output, FlowDocument):
-            documents = DocumentList([output])
-        elif isinstance(output, DocumentList):
-            documents = output
-        else:
-            # Handle any iterable of FlowDocuments
-            documents = DocumentList(list(output))  # type: ignore[arg-type]
-        cls.validate_output_documents(documents)
-        return documents

ai_pipeline_core/flow/options.py DELETED Viewed

@@ -1,75 +0,0 @@
-"""Flow options configuration for pipeline execution.
-@public
-Provides base configuration settings for AI pipeline flows,
-including model selection and runtime parameters.
-"""
-from typing import TypeVar
-from pydantic import Field
-from pydantic_settings import BaseSettings, SettingsConfigDict
-from ai_pipeline_core.llm import ModelName
-T = TypeVar("T", bound="FlowOptions")
-class FlowOptions(BaseSettings):
-    """Base configuration settings for AI pipeline flows.
-    @public
-    FlowOptions provides runtime configuration for pipeline flows,
-    including model selection and other parameters. It uses pydantic-settings
-    to support environment variable overrides and is immutable (frozen) by default.
-    This class is designed to be subclassed for flow-specific configuration:
-    Example:
-        >>> class MyFlowOptions(FlowOptions):
-        ...     temperature: float = Field(0.7, ge=0, le=2)
-        ...     batch_size: int = Field(10, gt=0)
-        ...     custom_param: str = "default"
-        >>> # Use in CLI with run_cli:
-        >>> run_cli(
-        ...     flows=[my_flow],
-        ...     options_cls=MyFlowOptions  # Will parse CLI args
-        ... )
-        >>> # Or create programmatically:
-        >>> options = MyFlowOptions(
-        ...     core_model="gemini-2.5-pro",
-        ...     temperature=0.9
-        ... )
-    Attributes:
-        core_model: Primary LLM for complex tasks (default: gpt-5)
-        small_model: Fast model for simple tasks (default: gpt-5-mini)
-    Configuration:
-        - Frozen (immutable) after creation
-        - Extra fields ignored (not strict)
-        - Can be populated from environment variables
-        - Used by simple_runner.cli for command-line parsing
-    Note:
-        The base class provides model selection. Subclasses should
-        add flow-specific parameters with appropriate validation.
-    """
-    core_model: ModelName | str = Field(
-        default="gpt-5",
-        description="Primary model for complex analysis and generation tasks.",
-    )
-    small_model: ModelName | str = Field(
-        default="gpt-5-mini",
-        description="Fast, cost-effective model for simple tasks and orchestration.",
-    )
-    model_config = SettingsConfigDict(frozen=True, extra="ignore")
-__all__ = ["FlowOptions"]

ai-pipeline-core 0.1.12__py3-none-any.whl → 0.4.1__py3-none-any.whl

ai-pipeline-core 0.1.12py3-none-any.whl → 0.4.1py3-none-any.whl