PyPI - ai-pipeline-core - Versions diffs - 0.1.14__py3-none-any.whl → 0.2.1__py3-none-any.whl - Mend

ai-pipeline-core 0.1.14py3-none-any.whl → 0.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

ai_pipeline_core/__init__.py +21 -13
ai_pipeline_core/documents/document.py +202 -51
ai_pipeline_core/documents/document_list.py +148 -24
ai_pipeline_core/documents/flow_document.py +2 -6
ai_pipeline_core/documents/task_document.py +0 -4
ai_pipeline_core/documents/temporary_document.py +1 -8
ai_pipeline_core/flow/config.py +174 -5
ai_pipeline_core/llm/__init__.py +1 -6
ai_pipeline_core/llm/ai_messages.py +137 -4
ai_pipeline_core/llm/client.py +118 -65
ai_pipeline_core/llm/model_options.py +6 -7
ai_pipeline_core/llm/model_response.py +17 -16
ai_pipeline_core/llm/model_types.py +3 -7
ai_pipeline_core/logging/__init__.py +0 -2
ai_pipeline_core/logging/logging_config.py +0 -6
ai_pipeline_core/logging/logging_mixin.py +2 -10
ai_pipeline_core/pipeline.py +54 -68
ai_pipeline_core/prefect.py +12 -3
ai_pipeline_core/prompt_manager.py +14 -7
ai_pipeline_core/settings.py +13 -5
ai_pipeline_core/simple_runner/__init__.py +1 -11
ai_pipeline_core/simple_runner/cli.py +13 -12
ai_pipeline_core/simple_runner/simple_runner.py +34 -189
ai_pipeline_core/storage/__init__.py +8 -0
ai_pipeline_core/storage/storage.py +628 -0
ai_pipeline_core/tracing.py +234 -30
{ai_pipeline_core-0.1.14.dist-info → ai_pipeline_core-0.2.1.dist-info}/METADATA +35 -20
ai_pipeline_core-0.2.1.dist-info/RECORD +38 -0
ai_pipeline_core-0.1.14.dist-info/RECORD +0 -36
{ai_pipeline_core-0.1.14.dist-info → ai_pipeline_core-0.2.1.dist-info}/WHEEL +0 -0
{ai_pipeline_core-0.1.14.dist-info → ai_pipeline_core-0.2.1.dist-info}/licenses/LICENSE +0 -0

ai_pipeline_core/__init__.py CHANGED Viewed

@@ -7,7 +7,7 @@ It combines document processing, LLM integration, and workflow orchestration int
 system designed for production use.
 The framework enforces best practices through strong typing (Pydantic), automatic retries,
-cost tracking, and distributed tracing. All I/O operations are async for maximum throughput.
+and cost tracking. All I/O operations are async for maximum throughput.
 **CRITICAL IMPORT RULE**:
     Always import from the top-level package:
@@ -18,12 +18,12 @@ cost tracking, and distributed tracing. All I/O operations are async for maximum
         from ai_pipeline_core.llm import generate  # NO!
         from ai_pipeline_core.documents import FlowDocument  # NO!
-FRAMEWORK RULES (90% Use Cases):
-    1. Decorators: Use @trace, @pipeline_task, @pipeline_flow WITHOUT parameters
+FRAMEWORK RULES (Use by default, unless instructed otherwise):
+    1. Decorators: Use @pipeline_task WITHOUT parameters, @pipeline_flow WITH config
     2. Logging: Use get_pipeline_logger(__name__) - NEVER print() or logging module
     3. LLM calls: Use AIMessages or str. Wrap Documents in AIMessages; do not call .text yourself
-    4. Options: Omit ModelOptions unless specifically needed (defaults are optimal)
-    5. Documents: Create with just name and content - skip description
+    4. Options: DO NOT use options parameter - omit it entirely (defaults are optimal)
+    5. Documents: Create with just name and content - skip description unless needed
     6. FlowConfig: OUTPUT_DOCUMENT_TYPE must differ from all INPUT_DOCUMENT_TYPES
     7. Initialization: PromptManager and logger at module scope, not in functions
     8. DocumentList: Use default constructor - no validation flags needed
@@ -36,18 +36,22 @@ Core Capabilities:
     - **LLM Integration**: Unified interface to any model via LiteLLM with caching
     - **Structured Output**: Type-safe generation with Pydantic model validation
     - **Workflow Orchestration**: Prefect-based flows and tasks with retries
-    - **Observability**: Distributed tracing via Laminar (LMNR) for debugging
+    - **Observability**: Built-in monitoring and debugging capabilities
     - **Local Development**: Simple runner for testing without infrastructure
 Quick Start:
     >>> from ai_pipeline_core import (
-    ...     pipeline_flow, FlowDocument, DocumentList, FlowOptions, llm, AIMessages
+    ...     pipeline_flow, FlowDocument, DocumentList, FlowOptions, FlowConfig, llm, AIMessages
     ... )
     >>>
     >>> class OutputDoc(FlowDocument):
     ...     '''Analysis result document.'''
     >>>
-    >>> @pipeline_flow
+    >>> class MyFlowConfig(FlowConfig):
+    ...     INPUT_DOCUMENT_TYPES = []
+    ...     OUTPUT_DOCUMENT_TYPE = OutputDoc
+    >>>
+    >>> @pipeline_flow(config=MyFlowConfig)
     >>> async def analyze_flow(
     ...     project_name: str,
     ...     documents: DocumentList,
@@ -55,7 +59,7 @@ Quick Start:
     ... ) -> DocumentList:
     ...     # Messages accept AIMessages or str. Wrap documents: AIMessages([doc])
     ...     response = await llm.generate(
-    ...         model="gpt-5",
+    ...         "gpt-5",
     ...         messages=AIMessages([documents[0]])
     ...     )
     ...     result = OutputDoc.create(
@@ -76,8 +80,6 @@ Optional Environment Variables:
     - PREFECT_API_KEY: Prefect API authentication key
     - LMNR_PROJECT_API_KEY: Laminar (LMNR) API key for tracing
     - LMNR_DEBUG: Set to "true" to enable debug-level traces
-    - LMNR_SESSION_ID: Default session ID for traces
-    - LMNR_USER_ID: Default user ID for traces
 """
 from . import llm
@@ -99,6 +101,8 @@ from .llm import (
     ModelOptions,
     ModelResponse,
     StructuredModelResponse,
+    generate,
+    generate_structured,
 )
 from .logging import (
     LoggerMixin,
@@ -114,7 +118,7 @@ from .prompt_manager import PromptManager
 from .settings import Settings
 from .tracing import TraceInfo, TraceLevel, set_trace_cost, trace
-__version__ = "0.1.14"
+__version__ = "0.2.1"
 __all__ = [
     # Config/Settings
@@ -145,7 +149,9 @@ __all__ = [
     "prefect_test_harness",
     "disable_run_logger",
     # LLM
-    "llm",
+    "llm",  # for backward compatibility
+    "generate",
+    "generate_structured",
     "ModelName",
     "ModelOptions",
     "ModelResponse",
@@ -159,4 +165,6 @@ __all__ = [
     "set_trace_cost",
     # Utils
     "PromptManager",
+    "generate",
+    "generate_structured",
 ]

ai_pipeline_core/documents/document.py CHANGED Viewed

@@ -51,6 +51,7 @@ from .mime_type import (
 )
 TModel = TypeVar("TModel", bound=BaseModel)
+TDocument = TypeVar("TDocument", bound="Document")
 class Document(BaseModel, ABC):
@@ -61,8 +62,7 @@ class Document(BaseModel, ABC):
     Document is the fundamental data abstraction for all content flowing through
     pipelines. It provides automatic encoding, MIME type detection, serialization,
     and validation. All documents must be subclassed from FlowDocument or TaskDocument
-    based on their persistence requirements. TemporaryDocument is a special concrete
-    class that can be instantiated directly (not abstract).
+    based on their persistence requirements.
     VALIDATION IS AUTOMATIC - Do not add manual validation!
         Size validation, name validation, and MIME type detection are built-in.
@@ -74,7 +74,7 @@ class Document(BaseModel, ABC):
         document.validate_file_name(document.name)  # NO! Automatic
     Best Practices:
-        - Use create() classmethod for automatic type conversion (90% of cases)
+        - Use create() classmethod for automatic type conversion (default preferred)
         - Omit description parameter unless truly needed for metadata
         - When using LLM functions, pass AIMessages or str. Wrap any Document values
           in AIMessages([...]). Do not call .text yourself
@@ -98,6 +98,8 @@ class Document(BaseModel, ABC):
     - Support for text, JSON, YAML, PDF, and image formats
     - Conversion utilities between different formats
     - Source provenance tracking via sources field
+    - Document type conversion via model_convert() method
+    - Standard Pydantic model_copy() for same-type copying
     Class Variables:
         MAX_CONTENT_SIZE: Maximum allowed content size in bytes (default 25MB)
@@ -131,10 +133,62 @@ class Document(BaseModel, ABC):
         2. Embed metadata in content (e.g., JSON with data + metadata fields)
         3. Create a separate MetadataDocument type to accompany data documents
         4. Use document naming conventions (e.g., "data_v2_2024.json")
-        5. Store metadata in flow_options or pass through TraceInfo
+        5. Store metadata in flow_options
+    FILES Enum Best Practice:
+        When defining a FILES enum, NEVER use magic strings to reference files.
+        Always use the enum values to maintain type safety and refactorability.
+        WRONG - Magic strings/numbers:
+            doc = ConfigDocument.create(name="config.yaml", content=data)  # NO!
+            doc = docs.get_by("settings.json")  # NO! Magic string
+            files = ["config.yaml", "settings.json"]  # NO! Magic strings
+        CORRECT - Use enum references:
+            doc = ConfigDocument.create(
+                name=ConfigDocument.FILES.CONFIG,  # YES! Type-safe
+                content=data
+            )
+            doc = docs.get_by(ConfigDocument.FILES.SETTINGS)  # YES!
+            files = [
+                ConfigDocument.FILES.CONFIG,
+                ConfigDocument.FILES.SETTINGS
+            ]  # YES! Refactorable
+    Pydantic Model Interaction:
+        Documents provide DIRECT support for Pydantic models. Use the built-in
+        methods instead of manual JSON conversion.
+        WRONG - Manual JSON conversion:
+            # Don't do this - manual JSON handling
+            json_str = doc.text
+            json_data = json.loads(json_str)
+            model = MyModel(**json_data)  # NO! Use as_pydantic_model
+            # Don't do this - manual serialization
+            json_str = model.model_dump_json()
+            doc = MyDocument.create(name="data.json", content=json_str)  # NO!
+        CORRECT - Direct Pydantic interaction:
+            # Reading Pydantic model from document
+            model = doc.as_pydantic_model(MyModel)  # Direct conversion
+            models = doc.as_pydantic_model(list[MyModel])  # List support
+            # Creating document from Pydantic model
+            doc = MyDocument.create(
+                name="data.json",
+                content=model  # Direct BaseModel support
+            )
+            # Round-trip is seamless
+            original_model = MyModel(field="value")
+            doc = MyDocument.create(name="data.json", content=original_model)
+            restored = doc.as_pydantic_model(MyModel)
+            assert restored == original_model  # Perfect round-trip
     Example:
         >>> from enum import StrEnum
+        >>> from pydantic import BaseModel
         >>>
         >>> # Simple document:
         >>> class MyDocument(FlowDocument):
@@ -146,10 +200,23 @@ class Document(BaseModel, ABC):
         ...         CONFIG = "config.yaml"
         ...         SETTINGS = "settings.json"
         >>>
-        >>> # RECOMMENDED: Use create for automatic conversion
-        >>> doc = MyDocument.create(name="data.json", content={"key": "value"})
-        >>> print(doc.is_text)  # True
-        >>> data = doc.as_json()  # {'key': 'value'}
+        >>> # CORRECT FILES usage - no magic strings:
+        >>> doc = ConfigDocument.create(
+        ...     name=ConfigDocument.FILES.CONFIG,  # Use enum
+        ...     content={"key": "value"}
+        ... )
+        >>>
+        >>> # CORRECT Pydantic usage:
+        >>> class Config(BaseModel):
+        ...     key: str
+        >>>
+        >>> # Direct creation from Pydantic model
+        >>> config_model = Config(key="value")
+        >>> doc = MyDocument.create(name="data.json", content=config_model)
+        >>>
+        >>> # Direct extraction to Pydantic model
+        >>> restored = doc.as_pydantic_model(Config)
+        >>> print(restored.key)  # "value"
         >>>
         >>> # Track document provenance with sources
         >>> source_doc = MyDocument.create(name="input.txt", content="raw data")
@@ -159,6 +226,14 @@ class Document(BaseModel, ABC):
         ...     sources=[source_doc.sha256]  # Reference source document
         ... )
         >>> processed.has_source(source_doc)  # True
+        >>>
+        >>> # Document copying and type conversion:
+        >>> # Standard Pydantic model_copy (doesn't validate updates)
+        >>> copied = doc.model_copy(update={"name": "new_name.json"})
+        >>> # Type conversion with validation via model_convert
+        >>> task_doc = MyTaskDoc.create(name="temp.json", content={"data": "value"})
+        >>> flow_doc = task_doc.model_convert(MyFlowDoc)  # Convert to FlowDocument
+        >>> flow_doc.is_flow  # True
     """
     MAX_CONTENT_SIZE: ClassVar[int] = 25 * 1024 * 1024
@@ -170,6 +245,9 @@ class Document(BaseModel, ABC):
     DESCRIPTION_EXTENSION: ClassVar[str] = ".description.md"
     """File extension for description files."""
+    SOURCES_EXTENSION: ClassVar[str] = ".sources.json"
+    """File extension for sources metadata files."""
     MARKDOWN_LIST_SEPARATOR: ClassVar[str] = "\n\n-----------------\n\n"
     """Separator for markdown list items."""
@@ -288,7 +366,7 @@ class Document(BaseModel, ABC):
         content types and automatically converts them to bytes based on the file
         extension. Use the `parse` method to reverse this conversion.
-        Best Practice (90% of cases):
+        Best Practice (by default, unless instructed otherwise):
             Only provide name and content. The description parameter is RARELY needed.
         Args:
@@ -302,8 +380,8 @@ class Document(BaseModel, ABC):
                 - bytes: Used directly without conversion
                 - str: Encoded to UTF-8 bytes
                 - dict[str, Any]: Serialized to JSON (.json) or YAML (.yaml/.yml)
-                - list[str]: Joined with separator for .md (validates no items
-                            contain separator), else JSON/YAML
+                - list[str]: Joined automatically for .md (validates format compatibility),
+                            else JSON/YAML
                 - list[BaseModel]: Serialized to JSON or YAML based on extension
                 - BaseModel: Serialized to JSON or YAML based on extension
             description: Optional description - USUALLY OMIT THIS (defaults to None).
@@ -319,7 +397,7 @@ class Document(BaseModel, ABC):
         Raises:
             ValueError: If content type is not supported for the file extension,
-                       or if markdown list items contain the separator
+                       or if markdown list format is incompatible
             DocumentNameError: If filename violates validation rules
             DocumentSizeError: If content exceeds MAX_CONTENT_SIZE
@@ -329,7 +407,7 @@ class Document(BaseModel, ABC):
             returns the original dictionary {"key": "value"}.
         Example:
-            >>> # CORRECT - no description needed (90% of cases)
+            >>> # CORRECT - no description needed (by default, unless instructed otherwise)
             >>> doc = MyDocument.create(name="test.txt", content="Hello World")
             >>> doc.content  # b'Hello World'
             >>> doc.parse(str)  # "Hello World"
@@ -427,10 +505,6 @@ class Document(BaseModel, ABC):
             >>> doc = MyDocument.create(name="data.json", content={"key": "value"})
             >>> doc = MyDocument.create(name="config.yaml", content=my_model)
             >>> doc = MyDocument.create(name="items.md", content=["item1", "item2"])
-        See Also:
-            create: Recommended factory method with automatic type conversion
-            parse: Method to reverse the conversion done by create
         """
         if type(self) is Document:
             raise TypeError("Cannot instantiate abstract Document class directly")
@@ -467,8 +541,7 @@ class Document(BaseModel, ABC):
         Note:
             This method determines document persistence and lifecycle.
-            FlowDocument returns "flow", TaskDocument returns "task",
-            TemporaryDocument returns "temporary".
+            FlowDocument returns "flow", TaskDocument returns "task".
         """
         raise NotImplementedError("Subclasses must implement this method")
@@ -520,7 +593,7 @@ class Document(BaseModel, ABC):
         during execution.
         Returns:
-            True if this is a TemporaryDocument, False otherwise.
+            True if this document is temporary, False otherwise.
         """
         return self.get_base_type() == "temporary"
@@ -565,8 +638,6 @@ class Document(BaseModel, ABC):
     def validate_file_name(cls, name: str) -> None:
         """Validate that a file name matches allowed patterns.
-        @public
         DO NOT OVERRIDE this method if you define a FILES enum!
         The validation is automatic when FILES enum is present.
@@ -610,7 +681,7 @@ class Document(BaseModel, ABC):
         Ensures the document name is secure and follows conventions:
         - No path traversal characters (.., \\, /)
-        - Cannot end with .description.md
+        - Cannot end with .description.md or .sources.json
         - No leading/trailing whitespace
         - Must match FILES enum if defined
@@ -635,6 +706,9 @@ class Document(BaseModel, ABC):
                 f"Document names cannot end with {cls.DESCRIPTION_EXTENSION}: {v}"
             )
+        if v.endswith(cls.SOURCES_EXTENSION):
+            raise DocumentNameError(f"Document names cannot end with {cls.SOURCES_EXTENSION}: {v}")
         if ".." in v or "\\" in v or "/" in v:
             raise DocumentNameError(f"Invalid filename - contains path traversal characters: {v}")
@@ -659,7 +733,7 @@ class Document(BaseModel, ABC):
             2. str → UTF-8 encoding
             3. dict/BaseModel + .json → JSON serialization (indented)
             4. dict/BaseModel + .yaml/.yml → YAML serialization
-            5. list[str] + .md → Join with markdown separator (validates no items contain separator)
+            5. list[str] + .md → Join with markdown sections (validates format compatibility)
             6. list[Any] + .json/.yaml → JSON/YAML array
             7. int/float/bool + .json → JSON primitive
@@ -1028,8 +1102,6 @@ class Document(BaseModel, ABC):
     def as_yaml(self) -> Any:
         r"""Parse document content as YAML.
-        @public
         Parses the document's text content as YAML and returns Python objects.
         Uses ruamel.yaml which is safe by default (no code execution).
@@ -1057,8 +1129,6 @@ class Document(BaseModel, ABC):
     def as_json(self) -> Any:
         """Parse document content as JSON.
-        @public
         Parses the document's text content as JSON and returns Python objects.
         Document must contain valid JSON text.
@@ -1153,7 +1223,7 @@ class Document(BaseModel, ABC):
         @public
-        Splits text content using markdown separator ("\n\n-----------------\n\n").
+        Splits text content automatically using markdown section separators.
         Designed for markdown documents with multiple sections.
         Returns:
@@ -1168,9 +1238,9 @@ class Document(BaseModel, ABC):
             >>> doc = MyDocument.create(name="book.md", content=sections)
             >>> doc.as_markdown_list()  # Returns original sections
-            >>> # Manual creation with separator
-            >>> content = "Part 1\n\n-----------------\n\nPart 2\n\n-----------------\n\nPart 3"
-            >>> doc2 = MyDocument(name="parts.md", content=content.encode())
+            >>> # Round-trip conversion works automatically
+            >>> sections = ["Part 1", "Part 2", "Part 3"]
+            >>> doc2 = MyDocument.create(name="parts.md", content=sections)
             >>> doc2.as_markdown_list()  # ['Part 1', 'Part 2', 'Part 3']
         """
         return self.text.split(self.MARKDOWN_LIST_SEPARATOR)
@@ -1207,7 +1277,7 @@ class Document(BaseModel, ABC):
         Extension Rules:
             - .json → JSON parsing for dict/list/BaseModel
             - .yaml/.yml → YAML parsing for dict/list/BaseModel
-            - .md + list → Split by markdown separator
+            - .md + list → Split automatically into sections
             - Any + str → UTF-8 decode
             - Any + bytes → Raw content
@@ -1223,8 +1293,7 @@ class Document(BaseModel, ABC):
             >>> # Markdown list
             >>> items = ["Item 1", "Item 2"]
-            >>> content = "\n\n---\n\n".join(items).encode()
-            >>> doc = MyDocument(name="list.md", content=content)
+            >>> doc = MyDocument.create(name="list.md", content=items)
             >>> doc.parse(list)
             ['Item 1', 'Item 2']
         """
@@ -1330,11 +1399,6 @@ class Document(BaseModel, ABC):
             >>> # Check if specific document is a source
             >>> if source1.sha256 in doc_refs:
             ...     print("Document derived from source1")
-        See Also:
-            - get_source_references: Get non-document source references (URLs, etc.)
-            - has_source: Check if a specific source is tracked
-            - Document.create: Add sources when creating documents
         """
         return [src for src in self.sources if is_document_sha256(src)]
@@ -1372,11 +1436,6 @@ class Document(BaseModel, ABC):
             >>> # Use for attribution or debugging
             >>> for ref in refs:
             ...     print(f"Data sourced from: {ref}")
-        See Also:
-            - get_source_documents: Get document SHA256 references
-            - has_source: Check if a specific source is tracked
-            - Document.create: Add sources when creating documents
         """
         return [src for src in self.sources if not is_document_sha256(src)]
@@ -1422,11 +1481,6 @@ class Document(BaseModel, ABC):
             >>> # Check by SHA256 directly
             >>> if derived.has_source(source_doc.sha256):
             ...     print("Has specific hash")
-        See Also:
-            - get_source_documents: Get all document sources
-            - get_source_references: Get all reference sources
-            - Document.create: Add sources when creating documents
         """
         if isinstance(source, str):
             # Direct string comparison
@@ -1455,6 +1509,8 @@ class Document(BaseModel, ABC):
                 - sha256: Full SHA256 hash in base32 encoding without padding (str)
                 - mime_type: Detected MIME type (str)
                 - sources: List of source strings (list[dict])
+                - canonical_name: Canonical snake_case name for debug tracing (str)
+                - class_name: Name of the actual document class for debug tracing (str)
                 - content: Encoded content (str)
                 - content_encoding: Either "utf-8" or "base64" (str)
@@ -1478,10 +1534,12 @@ class Document(BaseModel, ABC):
             "sha256": self.sha256,
             "mime_type": self.mime_type,
             "sources": self.sources,
+            "canonical_name": canonical_name_key(self.__class__),
+            "class_name": self.__class__.__name__,
         }
         # Try to encode content as UTF-8, fall back to base64
-        if self.is_text or self.mime_type.startswith("text/"):
+        if self.is_text:
             try:
                 result["content"] = self.content.decode("utf-8")
                 result["content_encoding"] = "utf-8"
@@ -1557,3 +1615,96 @@ class Document(BaseModel, ABC):
             description=data.get("description"),
             sources=data.get("sources", []),
         )
+    @final
+    def model_convert(
+        self,
+        new_type: type[TDocument],
+        *,
+        update: dict[str, Any] | None = None,
+        deep: bool = False,
+    ) -> TDocument:
+        """Convert document to a different Document type with optional updates.
+        @public
+        Creates a new document of a different type, preserving all attributes
+        while allowing updates. This is useful for converting between document
+        types (e.g., TaskDocument to FlowDocument) while maintaining data integrity.
+        Args:
+            new_type: Target Document class for conversion. Must be a concrete
+                     subclass of Document (not abstract classes like Document,
+                     FlowDocument, or TaskDocument).
+            update: Dictionary of attributes to update. Supports any attributes
+                   that the Document constructor accepts (name, content,
+                   description, sources).
+            deep: Whether to perform a deep copy of mutable attributes.
+        Returns:
+            New Document instance of the specified type.
+        Raises:
+            TypeError: If new_type is not a subclass of Document, is an abstract
+                      class, or if update contains invalid attributes.
+            DocumentNameError: If the name violates the target type's FILES enum.
+            DocumentSizeError: If content exceeds MAX_CONTENT_SIZE.
+        Example:
+            >>> # Convert TaskDocument to FlowDocument
+            >>> task_doc = MyTaskDoc.create(name="temp.json", content={"data": "value"})
+            >>> flow_doc = task_doc.model_convert(MyFlowDoc)
+            >>> assert flow_doc.is_flow
+            >>> assert flow_doc.content == task_doc.content
+            >>>
+            >>> # Convert with updates
+            >>> updated = task_doc.model_convert(
+            ...     MyFlowDoc,
+            ...     update={"name": "permanent.json", "description": "Converted"}
+            ... )
+            >>>
+            >>> # Track document lineage
+            >>> derived = doc.model_convert(
+            ...     ProcessedDoc,
+            ...     update={"sources": [doc.sha256]}
+            ... )
+        """
+        # Validate new_type
+        try:
+            # Use a runtime check to ensure it's a class
+            if not isinstance(new_type, type):  # type: ignore[reportIncompatibleArgumentType]
+                raise TypeError(f"new_type must be a class, got {new_type}")
+            if not issubclass(new_type, Document):  # type: ignore[reportIncompatibleArgumentType]
+                raise TypeError(f"new_type must be a subclass of Document, got {new_type}")
+        except (TypeError, AttributeError):
+            # Not a class at all
+            raise TypeError(f"new_type must be a subclass of Document, got {new_type}")
+        # Check for abstract classes by name (avoid circular imports)
+        class_name = new_type.__name__
+        if class_name == "Document":
+            raise TypeError("Cannot instantiate abstract Document class directly")
+        if class_name == "FlowDocument":
+            raise TypeError("Cannot instantiate abstract FlowDocument class directly")
+        if class_name == "TaskDocument":
+            raise TypeError("Cannot instantiate abstract TaskDocument class directly")
+        # Get current document data with proper typing
+        data: dict[str, Any] = {
+            "name": self.name,
+            "content": self.content,
+            "description": self.description,
+            "sources": self.sources.copy() if deep else self.sources,
+        }
+        # Apply updates if provided
+        if update:
+            data.update(update)
+        # Create new document of target type
+        return new_type(
+            name=data["name"],
+            content=data["content"],
+            description=data.get("description"),
+            sources=data.get("sources", []),
+        )

ai-pipeline-core 0.1.14__py3-none-any.whl → 0.2.1__py3-none-any.whl

ai-pipeline-core 0.1.14py3-none-any.whl → 0.2.1py3-none-any.whl