PyPI - ai-pipeline-core - Versions diffs - 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

ai-pipeline-core 0.3.4py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (96) hide show

ai_pipeline_core/__init__.py +64 -158
ai_pipeline_core/deployment/__init__.py +6 -18
ai_pipeline_core/deployment/base.py +392 -212
ai_pipeline_core/deployment/contract.py +6 -10
ai_pipeline_core/{utils → deployment}/deploy.py +50 -69
ai_pipeline_core/deployment/helpers.py +16 -17
ai_pipeline_core/{progress.py → deployment/progress.py} +23 -24
ai_pipeline_core/{utils/remote_deployment.py → deployment/remote.py} +11 -14
ai_pipeline_core/docs_generator/__init__.py +54 -0
ai_pipeline_core/docs_generator/__main__.py +5 -0
ai_pipeline_core/docs_generator/cli.py +196 -0
ai_pipeline_core/docs_generator/extractor.py +324 -0
ai_pipeline_core/docs_generator/guide_builder.py +644 -0
ai_pipeline_core/docs_generator/trimmer.py +35 -0
ai_pipeline_core/docs_generator/validator.py +114 -0
ai_pipeline_core/document_store/__init__.py +13 -0
ai_pipeline_core/document_store/_summary.py +9 -0
ai_pipeline_core/document_store/_summary_worker.py +170 -0
ai_pipeline_core/document_store/clickhouse.py +492 -0
ai_pipeline_core/document_store/factory.py +38 -0
ai_pipeline_core/document_store/local.py +312 -0
ai_pipeline_core/document_store/memory.py +85 -0
ai_pipeline_core/document_store/protocol.py +68 -0
ai_pipeline_core/documents/__init__.py +12 -14
ai_pipeline_core/documents/_context_vars.py +85 -0
ai_pipeline_core/documents/_hashing.py +52 -0
ai_pipeline_core/documents/attachment.py +85 -0
ai_pipeline_core/documents/context.py +128 -0
ai_pipeline_core/documents/document.py +318 -1434
ai_pipeline_core/documents/mime_type.py +11 -84
ai_pipeline_core/documents/utils.py +4 -12
ai_pipeline_core/exceptions.py +10 -62
ai_pipeline_core/images/__init__.py +32 -85
ai_pipeline_core/images/_processing.py +5 -11
ai_pipeline_core/llm/__init__.py +6 -4
ai_pipeline_core/llm/ai_messages.py +102 -90
ai_pipeline_core/llm/client.py +229 -183
ai_pipeline_core/llm/model_options.py +12 -84
ai_pipeline_core/llm/model_response.py +53 -99
ai_pipeline_core/llm/model_types.py +8 -23
ai_pipeline_core/logging/__init__.py +2 -7
ai_pipeline_core/logging/logging.yml +1 -1
ai_pipeline_core/logging/logging_config.py +27 -37
ai_pipeline_core/logging/logging_mixin.py +15 -41
ai_pipeline_core/observability/__init__.py +32 -0
ai_pipeline_core/observability/_debug/__init__.py +30 -0
ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
ai_pipeline_core/{debug/config.py → observability/_debug/_config.py} +11 -7
ai_pipeline_core/{debug/content.py → observability/_debug/_content.py} +133 -75
ai_pipeline_core/{debug/processor.py → observability/_debug/_processor.py} +16 -17
ai_pipeline_core/{debug/summary.py → observability/_debug/_summary.py} +113 -37
ai_pipeline_core/observability/_debug/_types.py +75 -0
ai_pipeline_core/{debug/writer.py → observability/_debug/_writer.py} +126 -196
ai_pipeline_core/observability/_document_tracking.py +146 -0
ai_pipeline_core/observability/_initialization.py +194 -0
ai_pipeline_core/observability/_logging_bridge.py +57 -0
ai_pipeline_core/observability/_summary.py +81 -0
ai_pipeline_core/observability/_tracking/__init__.py +6 -0
ai_pipeline_core/observability/_tracking/_client.py +178 -0
ai_pipeline_core/observability/_tracking/_internal.py +28 -0
ai_pipeline_core/observability/_tracking/_models.py +138 -0
ai_pipeline_core/observability/_tracking/_processor.py +158 -0
ai_pipeline_core/observability/_tracking/_service.py +311 -0
ai_pipeline_core/observability/_tracking/_writer.py +229 -0
ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -335
ai_pipeline_core/pipeline/__init__.py +10 -0
ai_pipeline_core/pipeline/decorators.py +915 -0
ai_pipeline_core/pipeline/options.py +16 -0
ai_pipeline_core/prompt_manager.py +16 -102
ai_pipeline_core/settings.py +26 -31
ai_pipeline_core/testing.py +9 -0
ai_pipeline_core-0.4.0.dist-info/METADATA +807 -0
ai_pipeline_core-0.4.0.dist-info/RECORD +76 -0
ai_pipeline_core/debug/__init__.py +0 -26
ai_pipeline_core/documents/document_list.py +0 -420
ai_pipeline_core/documents/flow_document.py +0 -112
ai_pipeline_core/documents/task_document.py +0 -117
ai_pipeline_core/documents/temporary_document.py +0 -74
ai_pipeline_core/flow/__init__.py +0 -9
ai_pipeline_core/flow/config.py +0 -494
ai_pipeline_core/flow/options.py +0 -75
ai_pipeline_core/pipeline.py +0 -718
ai_pipeline_core/prefect.py +0 -63
ai_pipeline_core/prompt_builder/__init__.py +0 -5
ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +0 -23
ai_pipeline_core/prompt_builder/global_cache.py +0 -78
ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +0 -6
ai_pipeline_core/prompt_builder/prompt_builder.py +0 -253
ai_pipeline_core/prompt_builder/system_prompt.jinja2 +0 -41
ai_pipeline_core/storage/__init__.py +0 -8
ai_pipeline_core/storage/storage.py +0 -628
ai_pipeline_core/utils/__init__.py +0 -8
ai_pipeline_core-0.3.4.dist-info/METADATA +0 -569
ai_pipeline_core-0.3.4.dist-info/RECORD +0 -57
{ai_pipeline_core-0.3.4.dist-info → ai_pipeline_core-0.4.0.dist-info}/WHEEL +0 -0
{ai_pipeline_core-0.3.4.dist-info → ai_pipeline_core-0.4.0.dist-info}/licenses/LICENSE +0 -0

ai_pipeline_core-0.4.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,76 @@
+ai_pipeline_core/__init__.py,sha256=_GM0O3dDQuCvXQD44dyXKvzLOZk2htwwFS0mXpvxJQU,3270
+ai_pipeline_core/exceptions.py,sha256=csAl7vq6xjSFBF8-UM9WZODCbhsOdOG5zH6IbA8iteM,1280
+ai_pipeline_core/prompt_manager.py,sha256=3wFkL5rrjtUT1cLInkgyhS8hKnO4MeD1cdXAEuLhgoE,9459
+ai_pipeline_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+ai_pipeline_core/settings.py,sha256=BUz8JEFfJQrdE4rNOhQWwxnTrfekLjWkoy-3wDZQ7PY,5142
+ai_pipeline_core/testing.py,sha256=jIRrLxNvTwdamucfJoHET2qMeRhhMZV9uEJXO5vAfis,279
+ai_pipeline_core/deployment/__init__.py,sha256=wTkVK6gcEQvqBajFMTAuodRONpN25yHbR1jtcumf0WQ,900
+ai_pipeline_core/deployment/base.py,sha256=ros0VzvkNCwbPgG9D49ceKSjTq857iRHzvW-uhiaNUE,34750
+ai_pipeline_core/deployment/contract.py,sha256=a1qbHhneTGB27oSOUy79CUIhOIzOoq37M63XoIMzA4Y,1952
+ai_pipeline_core/deployment/deploy.py,sha256=89W0w22cRkK_yMrn9iuH5L4dvnlMu31eojhJIKHtK2E,21991
+ai_pipeline_core/deployment/helpers.py,sha256=yVtGFUs4AFXkpLkiQ_ale0nXXt5btfWSb5PAbikQHNs,3312
+ai_pipeline_core/deployment/progress.py,sha256=5tVD9nW0N-b8Z2BxazcWCWHFpLu6pJ-eqPmRyj68X6Y,3591
+ai_pipeline_core/deployment/remote.py,sha256=tOexisKEeeBoHLGYZWqcjr2H-nqqYc6kvoDL72AW78w,4661
+ai_pipeline_core/docs_generator/__init__.py,sha256=JbWbk-Lw5GgWrCMRuw8zvKNTZY2jXv7XqoMiBYudvRI,1255
+ai_pipeline_core/docs_generator/__main__.py,sha256=CH4agiM2suFJ63MhTg5m0GuXdc40z-6o4ojR72JQWVA,145
+ai_pipeline_core/docs_generator/cli.py,sha256=8OjdMtzQraPxWN3uPapSNJnKyPLPtnygKL0rF5JL2GY,7172
+ai_pipeline_core/docs_generator/extractor.py,sha256=yHQfeb_LwgBZW5dBY65L6a4qvNvxnwWiFXhqeZV5y5w,10631
+ai_pipeline_core/docs_generator/guide_builder.py,sha256=cxVEoYMfwOsFWDLbXddJ7IBBCRshbfSUoQ84ZMw_YQE,22232
+ai_pipeline_core/docs_generator/trimmer.py,sha256=olsl4MSmMHqsIEeVu9HU7xjONmIbSU7NmPwWdhOH6AA,1052
+ai_pipeline_core/docs_generator/validator.py,sha256=w-UdE6h6LLCwVy9Qqmv-TavIttA_1mcRoAhF9_HKszc,4460
+ai_pipeline_core/document_store/__init__.py,sha256=5aHsCpRkfkaLhLo0sVaKXEjqCcRYgzkVJErULKUpDAk,366
+ai_pipeline_core/document_store/_summary.py,sha256=qwy4kHEEwHwXGN7LVol09qzf7RjOJ2-6qTme-mtE8aM,377
+ai_pipeline_core/document_store/_summary_worker.py,sha256=K4575wCot0EoKCEsOj8XMCS1O6aWC37S9L_3TZjONco,6659
+ai_pipeline_core/document_store/clickhouse.py,sha256=vUlN2rIxCn5A8ceBFbpaHPS2O3tYEuR_UZkffWdy7E4,20636
+ai_pipeline_core/document_store/factory.py,sha256=F56ZM8TxgzFNYUkdzZidbxTe-JDiIAqi_tlE30cdlp0,1499
+ai_pipeline_core/document_store/local.py,sha256=r_dCJ46fto89yxZfKuGNQonpocQ1TwFFaCUntW-ZSQw,13396
+ai_pipeline_core/document_store/memory.py,sha256=MlsWHLLaEK6MdHBPZUgsNkbkFPvd2d2gFcfdDMBYvXo,3679
+ai_pipeline_core/document_store/protocol.py,sha256=UhA60PuSMBwpX9yVLOtUAsKqdPnU2synDos6cB-WQng,2407
+ai_pipeline_core/documents/__init__.py,sha256=LphKH_CiN3BQ0gjtJps1Y1WF_Lt2Qg-75aq2U1_PvP8,723
+ai_pipeline_core/documents/_context_vars.py,sha256=JbgQoCNaHPrOAMlEa6HYB8Ti7iw_jQfZOi5eNrwHYWg,2687
+ai_pipeline_core/documents/_hashing.py,sha256=_u1P4z1bMNSREJ6GNf3sSqf0TCrDr9sVcAIG9bnORnU,1667
+ai_pipeline_core/documents/attachment.py,sha256=eVpb27Qu8mLO2Bxv_JYd5JXhgViaJusVM8RBGcU1iQE,2951
+ai_pipeline_core/documents/context.py,sha256=vlORnRk2klMTZk6X4jEJayeH4B2Xo6ZxZ-31mtdba6o,5482
+ai_pipeline_core/documents/document.py,sha256=lU3hqbyYswRlWAiS9YGW9IRZnVA-3qmMpnAKPmi8Bws,26157
+ai_pipeline_core/documents/mime_type.py,sha256=QeRX6GiQnTpqx3Fk8QLhi1lT0Z5uEs496dGc4_xqgsA,6530
+ai_pipeline_core/documents/utils.py,sha256=9WOW3zvKYxQPnM8LjYFy3V9-yqc6hwgCaiog3kUH274,5413
+ai_pipeline_core/images/__init__.py,sha256=Hc2QKR27Q2Q-h5nH-EbzfxdE3dHArBm-st5_xjOKFh0,8854
+ai_pipeline_core/images/_processing.py,sha256=MrCuPGsyyEl9UlXYIPhZs0wN8CPTMZmejV2Lo2wyCZk,4362
+ai_pipeline_core/llm/__init__.py,sha256=oyRvYD5DLDl7JIRTBUaiVz6jUC5dLLujkMNFpfRp2zc,795
+ai_pipeline_core/llm/ai_messages.py,sha256=Ycmntk5d6NUFqVVsnNR_IDwJUFuHYEH7CPvmmDfYaJI,17424
+ai_pipeline_core/llm/client.py,sha256=CjxOiniuy5CEsA_Xz0KPLCBthbnUfC43fTpuDcqkIUM,30276
+ai_pipeline_core/llm/model_options.py,sha256=hg8xR0RJdJKp8QJNA4EbLnfFsnkE4HnxD85aYxc--hM,9164
+ai_pipeline_core/llm/model_response.py,sha256=Ml9wcssSssqibReJxCc9EQu488pz69Cmq_XNBs_xmak,12219
+ai_pipeline_core/llm/model_types.py,sha256=qHoUPPEkHu9B4kJ5xcIC09fk72v667ZxvzigxtgLpVo,2174
+ai_pipeline_core/logging/__init__.py,sha256=H8G3bycxwNxc4e4Gjwi-al9e2ufTJbTV5iFKCF1Ticw,495
+ai_pipeline_core/logging/logging.yml,sha256=qsf6vcxtWIHD5xwJGtylibiuy_0KF_Ji7-qb-xvFtaU,1357
+ai_pipeline_core/logging/logging_config.py,sha256=JnTarGSSkpi7eqR7N13TLKeuwNCvZgwJUPlhObiwrHk,6095
+ai_pipeline_core/logging/logging_mixin.py,sha256=Jn3x0xvSwSjbAMfWELMOEfffWBB1u4IeIr7M2-55CJs,7191
+ai_pipeline_core/observability/__init__.py,sha256=km2nIiY3aYH13s2m4nR91erQG4qKnGuvQkrKDdVW3bw,720
+ai_pipeline_core/observability/_document_tracking.py,sha256=tXv6rbGIuxOYdq22aVbyn9Ve5EhYHPnrYCE-kj2NGXI,5428
+ai_pipeline_core/observability/_initialization.py,sha256=GfwRHpg90Og3PzmG1ZUilJVXoFx9BIWpbMgXxJ5Alqk,6747
+ai_pipeline_core/observability/_logging_bridge.py,sha256=T3PpkgoI0YKN2vvBJEHzR5rFMFNHq9REHJs7PQX2VQk,2053
+ai_pipeline_core/observability/_summary.py,sha256=GAZXzXVkwUcubSiGb5DgkHfO1gGwx6pYoDz6RUJmL5k,3390
+ai_pipeline_core/observability/tracing.py,sha256=KhIXSl5fe39UE1Eokz9-1fe5biX6anKbwZDmXY_Z2LU,27050
+ai_pipeline_core/observability/_debug/__init__.py,sha256=V8pbgdQOx-7oFKQ_sNzAZ1-oq5c73P4kVjEClZDXe8k,942
+ai_pipeline_core/observability/_debug/_auto_summary.py,sha256=LMvETvx_RPKF8srewCKwjigTiWs3KfDmQAYYSuVybIM,2687
+ai_pipeline_core/observability/_debug/_config.py,sha256=CWfnK-F3knUuOQ34y_CjmU3l67J85NIZ3siftYhevc0,3367
+ai_pipeline_core/observability/_debug/_content.py,sha256=ECy2vR8wDHJq0RD2X10XS-ed8uCq9VD3K8fnGOjQxgs,30657
+ai_pipeline_core/observability/_debug/_processor.py,sha256=FkX1xqeJds-Gctt5keYSTSaC85FM4QaeFIEevTn7Qh8,3875
+ai_pipeline_core/observability/_debug/_summary.py,sha256=gD7MtWldBRs2VniQxUBjr6XbD2Z8HhbqZdnkcr4HdzE,11274
+ai_pipeline_core/observability/_debug/_types.py,sha256=Cw80SWSVso02kkj6T7hICGU_vn3W2RUEv74h94V5ZfI,2220
+ai_pipeline_core/observability/_debug/_writer.py,sha256=0JOmaQtg9OuhqcAf15C2AAlkicIOGEoyWerKTiOmuTw,32497
+ai_pipeline_core/observability/_tracking/__init__.py,sha256=tiZhj_d0STL0ACq2mTktciGjXzpepfMtl5KA_OFElTE,245
+ai_pipeline_core/observability/_tracking/_client.py,sha256=q3YhKJVm3jEWDXzKclQmndZ6RYIu7_F4Az_uY98uA-k,6634
+ai_pipeline_core/observability/_tracking/_internal.py,sha256=zv4DI2a8pG3wM_QEuwTNxk2V_q0jEZe6HsL6P7eVO7Y,820
+ai_pipeline_core/observability/_tracking/_models.py,sha256=p3nZucNGr-JsdscqnbJOy8JL1B-w6p43I-1bXvOkfc8,3237
+ai_pipeline_core/observability/_tracking/_processor.py,sha256=H8D82gRs4JY6ya0ewojoVAg85FUQV-imi9bQO8M0kGU,5999
+ai_pipeline_core/observability/_tracking/_service.py,sha256=diK-0qJg4HU-BHgpN1NTyFEbgPXq2e0gluRq21B8IbE,10357
+ai_pipeline_core/observability/_tracking/_writer.py,sha256=xZjwYyIxDzzzPxqkKjYAYOyNP4uvKXZ-r_u-APSV_x8,9246
+ai_pipeline_core/pipeline/__init__.py,sha256=uMv1jwSyq8Ym8Hbn5097twBJLdwN1iMeqnVM4EWyrhA,282
+ai_pipeline_core/pipeline/decorators.py,sha256=CDJAeOjGLt5Ewc0Jc9zEuwLZwKyutOv89LSRS9dcXmI,37456
+ai_pipeline_core/pipeline/options.py,sha256=Y--5-DxzxR1Ul4GltGPP3JUIk8bw1GlUdZ3IDX8UIHQ,439
+ai_pipeline_core-0.4.0.dist-info/METADATA,sha256=hH5B9XsY_NT4bCB1W-LvBEG5LYR7SyaXdt4Z75PWvEE,29947
+ai_pipeline_core-0.4.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+ai_pipeline_core-0.4.0.dist-info/licenses/LICENSE,sha256=kKj8mfbdWwkyG3U6n7ztB3bAZlEwShTkAsvaY657i3I,1074
+ai_pipeline_core-0.4.0.dist-info/RECORD,,

ai_pipeline_core/debug/__init__.py DELETED Viewed

@@ -1,26 +0,0 @@
-"""Local trace debugging system for AI pipelines.
-This module provides filesystem-based trace debugging that saves all spans
-with their inputs/outputs for LLM-assisted debugging.
-Enable by setting TRACE_DEBUG_PATH environment variable.
-"""
-from .config import TraceDebugConfig
-from .content import ArtifactStore, ContentRef, ContentWriter, reconstruct_span_content
-from .processor import LocalDebugSpanProcessor
-from .summary import generate_summary
-from .writer import LocalTraceWriter, TraceState, WriteJob
-__all__ = [
-    "TraceDebugConfig",
-    "ContentRef",
-    "ContentWriter",
-    "ArtifactStore",
-    "reconstruct_span_content",
-    "LocalDebugSpanProcessor",
-    "LocalTraceWriter",
-    "TraceState",
-    "WriteJob",
-    "generate_summary",
-]

ai_pipeline_core/documents/document_list.py DELETED Viewed

@@ -1,420 +0,0 @@
-"""Type-safe list container for Document objects.
-@public
-"""
-from copy import deepcopy
-from typing import Any, Callable, Iterable, SupportsIndex, Union, overload
-from typing_extensions import Self
-from .document import Document
-class DocumentList(list[Document]):
-    """Type-safe container for Document objects.
-    @public
-    Specialized list with validation and filtering for documents.
-    Best Practice: Use default constructor by default, unless instructed otherwise.
-    Only enable validate_same_type or validate_duplicates when you explicitly need them.
-    Example:
-        >>> # RECOMMENDED - default constructor for most cases
-        >>> docs = DocumentList([doc1, doc2])
-        >>> # Or empty initialization
-        >>> docs = DocumentList()
-        >>> docs.append(MyDocument(name="file.txt", content=b"data"))
-        >>>
-        >>> # Only use validation flags when specifically needed:
-        >>> docs = DocumentList(validate_same_type=True)  # Rare use case
-        >>> doc = docs.get_by("file.txt")  # Get by name
-    """
-    def __init__(
-        self,
-        documents: list[Document] | None = None,
-        validate_same_type: bool = False,
-        validate_duplicates: bool = False,
-        frozen: bool = False,
-    ) -> None:
-        """Initialize DocumentList.
-        @public
-        Args:
-            documents: Initial list of documents.
-            validate_same_type: Enforce same document type.
-            validate_duplicates: Prevent duplicate filenames.
-            frozen: If True, list is immutable from creation.
-        """
-        super().__init__()
-        self._validate_same_type = validate_same_type
-        self._validate_duplicates = validate_duplicates
-        self._frozen = False  # Initialize as unfrozen to allow initial population
-        if documents:
-            self.extend(documents)
-        self._frozen = frozen  # Set frozen state after initial population
-    def _validate_no_duplicates(self) -> None:
-        """Check for duplicate document names.
-        Raises:
-            ValueError: If duplicate document names are found.
-        """
-        if not self._validate_duplicates:
-            return
-        filenames = [doc.name for doc in self]
-        seen: set[str] = set()
-        duplicates: list[str] = []
-        for name in filenames:
-            if name in seen:
-                duplicates.append(name)
-            seen.add(name)
-        if duplicates:
-            unique_duplicates = list(set(duplicates))
-            raise ValueError(f"Duplicate document names found: {unique_duplicates}")
-    def _validate_no_description_files(self) -> None:
-        """Ensure no documents use reserved description file extension.
-        Raises:
-            ValueError: If any document uses the reserved description file extension.
-        """
-        description_files = [
-            doc.name for doc in self if doc.name.endswith(Document.DESCRIPTION_EXTENSION)
-        ]
-        if description_files:
-            raise ValueError(
-                f"Documents with {Document.DESCRIPTION_EXTENSION} suffix are not allowed: "
-                f"{description_files}"
-            )
-    def _validate_types(self) -> None:
-        """Ensure all documents are of the same class type.
-        Raises:
-            ValueError: If documents have different class types.
-        """
-        if not self._validate_same_type or not self:
-            return
-        first_class = type(self[0])
-        different_types = [doc for doc in self if type(doc) is not first_class]
-        if different_types:
-            types = list({type(doc).__name__ for doc in self})
-            raise ValueError(f"All documents must have the same type. Found types: {types}")
-    def _validate(self) -> None:
-        """Run all configured validation checks."""
-        self._validate_no_duplicates()
-        self._validate_no_description_files()
-        self._validate_types()
-    def freeze(self) -> None:
-        """Permanently freeze the list, preventing modifications.
-        Once frozen, the list cannot be unfrozen.
-        """
-        self._frozen = True
-    def copy(self) -> "DocumentList":
-        """Create an unfrozen deep copy of the list.
-        Returns:
-            New unfrozen DocumentList with deep-copied documents.
-        """
-        copied_docs = deepcopy(list(self))
-        return DocumentList(
-            documents=copied_docs,
-            validate_same_type=self._validate_same_type,
-            validate_duplicates=self._validate_duplicates,
-            frozen=False,  # Copies are always unfrozen
-        )
-    def _check_frozen(self) -> None:
-        """Check if list is frozen and raise if it is.
-        Raises:
-            RuntimeError: If the list is frozen.
-        """
-        if self._frozen:
-            raise RuntimeError("Cannot modify frozen DocumentList")
-    def append(self, document: Document) -> None:
-        """Add a document to the end of the list."""
-        self._check_frozen()
-        super().append(document)
-        self._validate()
-    def extend(self, documents: Iterable[Document]) -> None:
-        """Add multiple documents to the list."""
-        self._check_frozen()
-        super().extend(documents)
-        self._validate()
-    def insert(self, index: SupportsIndex, document: Document) -> None:
-        """Insert a document at the specified position."""
-        self._check_frozen()
-        super().insert(index, document)
-        self._validate()
-    @overload
-    def __setitem__(self, index: SupportsIndex, value: Document) -> None: ...
-    @overload
-    def __setitem__(self, index: slice, value: Iterable[Document]) -> None: ...
-    def __setitem__(self, index: Union[SupportsIndex, slice], value: Any) -> None:
-        """Set item or slice with validation."""
-        self._check_frozen()
-        super().__setitem__(index, value)
-        self._validate()
-    def __iadd__(self, other: Any) -> "Self":
-        """In-place addition (+=) with validation.
-        Returns:
-            Self: This DocumentList after modification.
-        """
-        self._check_frozen()
-        result = super().__iadd__(other)
-        self._validate()
-        return result
-    def __delitem__(self, index: Union[SupportsIndex, slice]) -> None:
-        """Delete item or slice from list."""
-        self._check_frozen()
-        super().__delitem__(index)
-    def pop(self, index: SupportsIndex = -1) -> Document:
-        """Remove and return item at index.
-        Returns:
-            Document removed from the list.
-        """
-        self._check_frozen()
-        return super().pop(index)
-    def remove(self, document: Document) -> None:
-        """Remove first occurrence of document."""
-        self._check_frozen()
-        super().remove(document)
-    def clear(self) -> None:
-        """Remove all items from list."""
-        self._check_frozen()
-        super().clear()
-    def reverse(self) -> None:
-        """Reverse list in place."""
-        self._check_frozen()
-        super().reverse()
-    def sort(self, *, key: Callable[[Document], Any] | None = None, reverse: bool = False) -> None:
-        """Sort list in place."""
-        self._check_frozen()
-        if key is None:
-            super().sort(reverse=reverse)  # type: ignore[call-arg]
-        else:
-            super().sort(key=key, reverse=reverse)
-    @overload
-    def filter_by(self, arg: str) -> "DocumentList": ...
-    @overload
-    def filter_by(self, arg: type[Document]) -> "DocumentList": ...
-    @overload
-    def filter_by(self, arg: Iterable[type[Document]]) -> "DocumentList": ...
-    @overload
-    def filter_by(self, arg: Iterable[str]) -> "DocumentList": ...
-    def filter_by(
-        self, arg: str | type[Document] | Iterable[type[Document]] | Iterable[str]
-    ) -> "DocumentList":
-        """Filter documents by name(s) or type(s).
-        @public
-        ALWAYS returns a DocumentList (which may be empty), never raises an exception
-        for no matches. Use this when you want to process all matching documents.
-        Args:
-            arg: Can be one of:
-                - str: Single document name to filter by
-                - type[Document]: Single document type to filter by (includes subclasses)
-                - Iterable[type[Document]]: Multiple document types to filter by
-                  (list, tuple, set, generator, or any iterable)
-                - Iterable[str]: Multiple document names to filter by
-                  (list, tuple, set, generator, or any iterable)
-        Returns:
-            New DocumentList with filtered documents (may be empty).
-            - Returns ALL matching documents
-            - Empty DocumentList if no matches found
-        Raises:
-            TypeError: If arg is not a valid type (not str, type, or iterable),
-                or if iterable contains mixed types (strings and types together).
-            AttributeError: If arg is expected to be iterable but doesn't support iteration.
-        Example:
-            >>> # Returns list with all matching documents
-            >>> matching_docs = docs.filter_by("file.txt")  # May be empty
-            >>> for doc in matching_docs:
-            ...     process(doc)
-            >>>
-            >>> # Filter by type - returns all instances
-            >>> config_docs = docs.filter_by(ConfigDocument)
-            >>> print(f"Found {len(config_docs)} config documents")
-            >>>
-            >>> # Filter by multiple names
-            >>> important_docs = docs.filter_by(["config.yaml", "settings.json"])
-            >>> if not important_docs:  # Check if empty
-            ...     print("No important documents found")
-        """
-        if isinstance(arg, str):
-            # Filter by single name
-            return DocumentList([doc for doc in self if doc.name == arg])
-        elif isinstance(arg, type):
-            # Filter by single type (including subclasses)
-            # The type system ensures arg is type[Document] due to overloads
-            return DocumentList([doc for doc in self if isinstance(doc, arg)])
-        else:
-            # Try to consume as iterable
-            try:
-                # Convert to list to check the first element and allow reuse
-                items = list(arg)  # type: ignore[arg-type]
-                if not items:
-                    return DocumentList()
-                first_item = items[0]
-                if isinstance(first_item, str):
-                    # Iterable of names - validate all items are strings
-                    for item in items:
-                        if not isinstance(item, str):
-                            raise TypeError(
-                                "Iterable must contain only strings or only Document types, "
-                                "not mixed types"
-                            )
-                    names_set = set(items)
-                    return DocumentList([doc for doc in self if doc.name in names_set])
-                elif isinstance(first_item, type):  # type: ignore[reportUnnecessaryIsInstance]
-                    # Iterable of document types - validate all items are types
-                    for item in items:
-                        if not isinstance(item, type):
-                            raise TypeError(
-                                "Iterable must contain only strings or only Document types, "
-                                "not mixed types"
-                            )
-                    # Convert to set for efficient lookup
-                    types_set = set(items)
-                    # Filter documents that match any of the requested types
-                    matching = [
-                        doc
-                        for doc in self
-                        if any(isinstance(doc, doc_type) for doc_type in types_set)  # type: ignore[arg-type]
-                    ]
-                    return DocumentList(matching)
-                else:
-                    raise TypeError(
-                        f"Iterable must contain strings or Document types, "
-                        f"got {type(first_item).__name__}"
-                    )
-            except (TypeError, AttributeError) as e:
-                # If the error message already mentions Iterable, re-raise it
-                if "Iterable" in str(e) or "strings or Document types" in str(e):
-                    raise
-                # Otherwise, provide a generic error message
-                raise TypeError(f"Invalid argument type for filter_by: {type(arg).__name__}") from e
-    @overload
-    def get_by(self, arg: str) -> Document: ...
-    @overload
-    def get_by(self, arg: type[Document]) -> Document: ...
-    @overload
-    def get_by(self, arg: str, required: bool = True) -> Document | None: ...
-    @overload
-    def get_by(self, arg: type[Document], required: bool = True) -> Document | None: ...
-    def get_by(self, arg: str | type[Document], required: bool = True) -> Document | None:
-        """Get EXACTLY ONE document by name or type.
-        @public
-        IMPORTANT: This method expects to find exactly one matching document.
-        - If no matches and required=True: raises ValueError
-        - If no matches and required=False: returns None
-        - If multiple matches: ALWAYS raises ValueError (ambiguous)
-        When required=True (default), you do NOT need to check for None:
-            >>> doc = docs.get_by("config.yaml")  # Will raise if not found
-            >>> # No need for: if doc is not None  <- This is redundant!
-            >>> print(doc.content)  # Safe to use directly
-        Args:
-            arg: Document name (str) or document type.
-            required: If True (default), raises ValueError when not found.
-                     If False, returns None when not found.
-        Returns:
-            The single matching document, or None if not found and required=False.
-        Raises:
-            ValueError: If required=True and document not found, OR if multiple
-                       documents match (ambiguous result).
-            TypeError: If arg is not a string or Document type.
-        Example:
-            >>> # CORRECT - No need to check for None when required=True (default)
-            >>> doc = docs.get_by("file.txt")  # Raises if not found
-            >>> print(doc.content)  # Safe to use directly
-            >>>
-            >>> # When using required=False, check for None
-            >>> doc = docs.get_by("optional.txt", required=False)
-            >>> if doc is not None:
-            ...     print(doc.content)
-            >>>
-            >>> # Will raise if multiple documents have same type
-            >>> # Use filter_by() instead if you want all matches
-            >>> try:
-            ...     doc = docs.get_by(ConfigDocument)  # Error if 2+ configs
-            >>> except ValueError as e:
-            ...     configs = docs.filter_by(ConfigDocument)  # Get all instead
-        """
-        if isinstance(arg, str):
-            # Get by name - collect all matches to check for duplicates
-            matches = [doc for doc in self if doc.name == arg]
-            if len(matches) > 1:
-                raise ValueError(
-                    f"Multiple documents found with name '{arg}'. "
-                    f"Found {len(matches)} matches. Use filter_by() to get all matches."
-                )
-            if matches:
-                return matches[0]
-            if required:
-                raise ValueError(f"Document with name '{arg}' not found")
-            return None
-        elif isinstance(arg, type):  # type: ignore[reportUnnecessaryIsInstance]
-            # Get by type (including subclasses) - collect all matches
-            matches = [doc for doc in self if isinstance(doc, arg)]
-            if len(matches) > 1:
-                raise ValueError(
-                    f"Multiple documents found of type '{arg.__name__}'. "
-                    f"Found {len(matches)} matches. Use filter_by() to get all matches."
-                )
-            if matches:
-                return matches[0]
-            if required:
-                raise ValueError(f"Document of type '{arg.__name__}' not found")
-            return None
-        else:
-            raise TypeError(f"Invalid argument type for get_by: {type(arg)}")

ai_pipeline_core/documents/flow_document.py DELETED Viewed

@@ -1,112 +0,0 @@
-"""Flow-specific document base class for persistent pipeline data.
-@public
-This module provides the FlowDocument abstract base class for documents
-that need to persist across Prefect flow runs and between pipeline steps.
-"""
-from typing import Literal, final
-from .document import Document
-class FlowDocument(Document):
-    """Abstract base class for documents that persist across flow runs.
-    @public
-    FlowDocument is used for data that needs to be saved between pipeline
-    steps and across multiple flow executions. These documents are typically
-    written to the file system using the deployment utilities.
-    Key characteristics:
-    - Persisted to file system between pipeline steps
-    - Survives across multiple flow runs
-    - Used for flow inputs and outputs
-    - Saved in directories organized by the document's type/name
-    Creating FlowDocuments:
-        Same as Document - use `create()` for automatic conversion, `__init__` for bytes.
-        See Document.create() for detailed usage examples.
-    Persistence:
-        Documents are saved under an output directory path associated with the document's type/name.
-        For example: output/my_doc/data.json
-    Note:
-        - Cannot instantiate FlowDocument directly - must subclass
-        - Used with FlowConfig to define flow input/output types
-        - No additional abstract methods to implement
-    """
-    def __init__(
-        self,
-        *,
-        name: str,
-        content: bytes,
-        description: str | None = None,
-        sources: list[str] | None = None,
-    ) -> None:
-        """Initialize a FlowDocument with raw bytes content.
-        See Document.__init__() for parameter details and usage notes.
-        Prevents direct instantiation of the abstract FlowDocument class.
-        FlowDocument must be subclassed for specific document types.
-        Args:
-            name: Document filename (required, keyword-only)
-            content: Document content as raw bytes (required, keyword-only)
-            description: Optional human-readable description (keyword-only)
-            sources: Optional list of strings for provenance tracking
-        Raises:
-            TypeError: If attempting to instantiate FlowDocument directly
-                      instead of using a concrete subclass.
-        Example:
-            >>> from enum import StrEnum
-            >>>
-            >>> # Simple subclass:
-            >>> class MyFlowDoc(FlowDocument):
-            ...     pass
-            >>>
-            >>> # With FILES restriction:
-            >>> class RestrictedDoc(FlowDocument):
-            ...     class FILES(StrEnum):
-            ...         DATA = "data.json"
-            ...         METADATA = "metadata.yaml"
-            >>>
-            >>> # Direct constructor - only for bytes:
-            >>> doc = MyFlowDoc(name="test.bin", content=b"raw data")
-            >>>
-            >>> # RECOMMENDED - use create for automatic conversion:
-            >>> doc = RestrictedDoc.create(name="data.json", content={"key": "value"})
-            >>> # This would raise DocumentNameError:
-            >>> # doc = RestrictedDoc.create(name="other.json", content={})
-        """
-        if type(self) is FlowDocument:
-            raise TypeError("Cannot instantiate abstract FlowDocument class directly")
-        # Only pass sources if not None to let Pydantic's default_factory handle it
-        if sources is not None:
-            super().__init__(name=name, content=content, description=description, sources=sources)
-        else:
-            super().__init__(name=name, content=content, description=description)
-    @final
-    def get_base_type(self) -> Literal["flow"]:
-        """Return the base type identifier for flow documents.
-        This method is final and cannot be overridden by subclasses.
-        It identifies this document as a flow-persistent document.
-        Returns:
-            "flow" - Indicates this document persists across flow runs.
-        Note:
-            This determines the document's lifecycle and persistence behavior
-            in the pipeline system.
-        """
-        return "flow"

ai-pipeline-core 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

ai-pipeline-core 0.3.4py3-none-any.whl → 0.4.0py3-none-any.whl