PyPI - ai-pipeline-core - Versions diffs - 0.1.14__tar.gz → 0.2.0__tar.gz - Mend

ai-pipeline-core 0.1.14tar.gz → 0.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

{ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.0}/.gitignore RENAMED Viewed

@@ -112,6 +112,7 @@ venv/
 ENV/
 env.bak/
 venv.bak/
+key.json
 # Spyder project settings
 .spyderproject

{ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ai-pipeline-core
-Version: 0.1.14
+Version: 0.2.0
 Summary: Core utilities for AI-powered processing pipelines using prefect
 Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
 Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
@@ -22,6 +22,7 @@ Requires-Dist: httpx>=0.28.1
 Requires-Dist: jinja2>=3.1.6
 Requires-Dist: lmnr>=0.7.6
 Requires-Dist: openai>=1.99.9
+Requires-Dist: prefect-gcp[cloud-storage]>=0.6.10
 Requires-Dist: prefect>=3.4.13
 Requires-Dist: pydantic-settings>=2.10.1
 Requires-Dist: pydantic>=2.11.7
@@ -111,15 +112,13 @@ class AnalysisConfig(FlowConfig):
     INPUT_DOCUMENT_TYPES = [InputDoc]
     OUTPUT_DOCUMENT_TYPE = OutputDoc
-# Create pipeline flow
-@pipeline_flow
+# Create pipeline flow with required config
+@pipeline_flow(config=AnalysisConfig)
 async def analyze_flow(
     project_name: str,
     documents: DocumentList,
     flow_options: FlowOptions
 ) -> DocumentList:
-    config = AnalysisConfig()
     # Process documents
     outputs = []
     for doc in documents:
@@ -136,7 +135,7 @@ async def analyze_flow(
         outputs.append(output)
     # RECOMMENDED: Always validate output
-    return config.create_and_validate_output(outputs)
+    return AnalysisConfig.create_and_validate_output(outputs)
 ```
 ### Structured Output
@@ -289,15 +288,15 @@ class ProcessingConfig(FlowConfig):
     INPUT_DOCUMENT_TYPES = [RawDataDocument]
     OUTPUT_DOCUMENT_TYPE = ProcessedDocument  # Must be different!
-    # Use in flows for validation
-    @pipeline_flow
-    async def process(
-        config: ProcessingConfig,
-        documents: DocumentList,
-        flow_options: FlowOptions
-    ) -> DocumentList:
-        # ... processing logic ...
-        return config.create_and_validate_output(outputs)
+# Use in flows for validation
+@pipeline_flow(config=ProcessingConfig)
+async def process(
+    project_name: str,
+    documents: DocumentList,
+    flow_options: FlowOptions
+) -> DocumentList:
+    # ... processing logic ...
+    return ProcessingConfig.create_and_validate_output(outputs)
 ```
 ### Pipeline Decorators
@@ -313,7 +312,7 @@ async def process_chunk(data: str) -> str:
     set_trace_cost(0.05)  # Track costs (new in v0.1.14)
     return result
-@pipeline_flow  # Full observability and orchestration
+@pipeline_flow(config=MyFlowConfig)  # Full observability and orchestration
 async def main_flow(
     project_name: str,
     documents: DocumentList,
@@ -339,6 +338,9 @@ LMNR_DEBUG=true  # Enable debug traces
 # Optional: Orchestration
 PREFECT_API_URL=http://localhost:4200/api
 PREFECT_API_KEY=your-prefect-key
+# Optional: Storage (for Google Cloud Storage)
+GCS_SERVICE_ACCOUNT_FILE=/path/to/service-account.json  # GCS auth file
 ```
 ### Settings Management
@@ -366,7 +368,7 @@ print(settings.app_name)
 ### Framework Rules (90% Use Cases)
-1. **Decorators**: Use `@trace`, `@pipeline_task`, `@pipeline_flow` WITHOUT parameters
+1. **Decorators**: Use `@pipeline_task` WITHOUT parameters, `@pipeline_flow` WITH config
 2. **Logging**: Use `get_pipeline_logger(__name__)` - NEVER `print()` or `logging` module
 3. **LLM calls**: Use `AIMessages` or `str`. Wrap Documents in `AIMessages`
 4. **Options**: Omit `ModelOptions` unless specifically needed (defaults are optimal)

{ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.0}/README.md RENAMED Viewed

@@ -67,15 +67,13 @@ class AnalysisConfig(FlowConfig):
     INPUT_DOCUMENT_TYPES = [InputDoc]
     OUTPUT_DOCUMENT_TYPE = OutputDoc
-# Create pipeline flow
-@pipeline_flow
+# Create pipeline flow with required config
+@pipeline_flow(config=AnalysisConfig)
 async def analyze_flow(
     project_name: str,
     documents: DocumentList,
     flow_options: FlowOptions
 ) -> DocumentList:
-    config = AnalysisConfig()
     # Process documents
     outputs = []
     for doc in documents:
@@ -92,7 +90,7 @@ async def analyze_flow(
         outputs.append(output)
     # RECOMMENDED: Always validate output
-    return config.create_and_validate_output(outputs)
+    return AnalysisConfig.create_and_validate_output(outputs)
 ```
 ### Structured Output
@@ -245,15 +243,15 @@ class ProcessingConfig(FlowConfig):
     INPUT_DOCUMENT_TYPES = [RawDataDocument]
     OUTPUT_DOCUMENT_TYPE = ProcessedDocument  # Must be different!
-    # Use in flows for validation
-    @pipeline_flow
-    async def process(
-        config: ProcessingConfig,
-        documents: DocumentList,
-        flow_options: FlowOptions
-    ) -> DocumentList:
-        # ... processing logic ...
-        return config.create_and_validate_output(outputs)
+# Use in flows for validation
+@pipeline_flow(config=ProcessingConfig)
+async def process(
+    project_name: str,
+    documents: DocumentList,
+    flow_options: FlowOptions
+) -> DocumentList:
+    # ... processing logic ...
+    return ProcessingConfig.create_and_validate_output(outputs)
 ```
 ### Pipeline Decorators
@@ -269,7 +267,7 @@ async def process_chunk(data: str) -> str:
     set_trace_cost(0.05)  # Track costs (new in v0.1.14)
     return result
-@pipeline_flow  # Full observability and orchestration
+@pipeline_flow(config=MyFlowConfig)  # Full observability and orchestration
 async def main_flow(
     project_name: str,
     documents: DocumentList,
@@ -295,6 +293,9 @@ LMNR_DEBUG=true  # Enable debug traces
 # Optional: Orchestration
 PREFECT_API_URL=http://localhost:4200/api
 PREFECT_API_KEY=your-prefect-key
+# Optional: Storage (for Google Cloud Storage)
+GCS_SERVICE_ACCOUNT_FILE=/path/to/service-account.json  # GCS auth file
 ```
 ### Settings Management
@@ -322,7 +323,7 @@ print(settings.app_name)
 ### Framework Rules (90% Use Cases)
-1. **Decorators**: Use `@trace`, `@pipeline_task`, `@pipeline_flow` WITHOUT parameters
+1. **Decorators**: Use `@pipeline_task` WITHOUT parameters, `@pipeline_flow` WITH config
 2. **Logging**: Use `get_pipeline_logger(__name__)` - NEVER `print()` or `logging` module
 3. **LLM calls**: Use `AIMessages` or `str`. Wrap Documents in `AIMessages`
 4. **Options**: Omit `ModelOptions` unless specifically needed (defaults are optimal)

{ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.0}/ai_pipeline_core/__init__.py RENAMED Viewed

@@ -7,7 +7,7 @@ It combines document processing, LLM integration, and workflow orchestration int
 system designed for production use.
 The framework enforces best practices through strong typing (Pydantic), automatic retries,
-cost tracking, and distributed tracing. All I/O operations are async for maximum throughput.
+and cost tracking. All I/O operations are async for maximum throughput.
 **CRITICAL IMPORT RULE**:
     Always import from the top-level package:
@@ -18,12 +18,12 @@ cost tracking, and distributed tracing. All I/O operations are async for maximum
         from ai_pipeline_core.llm import generate  # NO!
         from ai_pipeline_core.documents import FlowDocument  # NO!
-FRAMEWORK RULES (90% Use Cases):
-    1. Decorators: Use @trace, @pipeline_task, @pipeline_flow WITHOUT parameters
+FRAMEWORK RULES (Use by default, unless instructed otherwise):
+    1. Decorators: Use @pipeline_task WITHOUT parameters, @pipeline_flow WITH config
     2. Logging: Use get_pipeline_logger(__name__) - NEVER print() or logging module
     3. LLM calls: Use AIMessages or str. Wrap Documents in AIMessages; do not call .text yourself
-    4. Options: Omit ModelOptions unless specifically needed (defaults are optimal)
-    5. Documents: Create with just name and content - skip description
+    4. Options: DO NOT use options parameter - omit it entirely (defaults are optimal)
+    5. Documents: Create with just name and content - skip description unless needed
     6. FlowConfig: OUTPUT_DOCUMENT_TYPE must differ from all INPUT_DOCUMENT_TYPES
     7. Initialization: PromptManager and logger at module scope, not in functions
     8. DocumentList: Use default constructor - no validation flags needed
@@ -36,18 +36,22 @@ Core Capabilities:
     - **LLM Integration**: Unified interface to any model via LiteLLM with caching
     - **Structured Output**: Type-safe generation with Pydantic model validation
     - **Workflow Orchestration**: Prefect-based flows and tasks with retries
-    - **Observability**: Distributed tracing via Laminar (LMNR) for debugging
+    - **Observability**: Built-in monitoring and debugging capabilities
     - **Local Development**: Simple runner for testing without infrastructure
 Quick Start:
     >>> from ai_pipeline_core import (
-    ...     pipeline_flow, FlowDocument, DocumentList, FlowOptions, llm, AIMessages
+    ...     pipeline_flow, FlowDocument, DocumentList, FlowOptions, FlowConfig, llm, AIMessages
     ... )
     >>>
     >>> class OutputDoc(FlowDocument):
     ...     '''Analysis result document.'''
     >>>
-    >>> @pipeline_flow
+    >>> class MyFlowConfig(FlowConfig):
+    ...     INPUT_DOCUMENT_TYPES = []
+    ...     OUTPUT_DOCUMENT_TYPE = OutputDoc
+    >>>
+    >>> @pipeline_flow(config=MyFlowConfig)
     >>> async def analyze_flow(
     ...     project_name: str,
     ...     documents: DocumentList,
@@ -55,7 +59,7 @@ Quick Start:
     ... ) -> DocumentList:
     ...     # Messages accept AIMessages or str. Wrap documents: AIMessages([doc])
     ...     response = await llm.generate(
-    ...         model="gpt-5",
+    ...         "gpt-5",
     ...         messages=AIMessages([documents[0]])
     ...     )
     ...     result = OutputDoc.create(
@@ -76,8 +80,6 @@ Optional Environment Variables:
     - PREFECT_API_KEY: Prefect API authentication key
     - LMNR_PROJECT_API_KEY: Laminar (LMNR) API key for tracing
     - LMNR_DEBUG: Set to "true" to enable debug-level traces
-    - LMNR_SESSION_ID: Default session ID for traces
-    - LMNR_USER_ID: Default user ID for traces
 """
 from . import llm
@@ -99,6 +101,8 @@ from .llm import (
     ModelOptions,
     ModelResponse,
     StructuredModelResponse,
+    generate,
+    generate_structured,
 )
 from .logging import (
     LoggerMixin,
@@ -114,7 +118,7 @@ from .prompt_manager import PromptManager
 from .settings import Settings
 from .tracing import TraceInfo, TraceLevel, set_trace_cost, trace
-__version__ = "0.1.14"
+__version__ = "0.2.0"
 __all__ = [
     # Config/Settings
@@ -145,7 +149,9 @@ __all__ = [
     "prefect_test_harness",
     "disable_run_logger",
     # LLM
-    "llm",
+    "llm",  # for backward compatibility
+    "generate",
+    "generate_structured",
     "ModelName",
     "ModelOptions",
     "ModelResponse",
@@ -159,4 +165,6 @@ __all__ = [
     "set_trace_cost",
     # Utils
     "PromptManager",
+    "generate",
+    "generate_structured",
 ]

{ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.0}/ai_pipeline_core/documents/document.py RENAMED Viewed

@@ -61,8 +61,7 @@ class Document(BaseModel, ABC):
     Document is the fundamental data abstraction for all content flowing through
     pipelines. It provides automatic encoding, MIME type detection, serialization,
     and validation. All documents must be subclassed from FlowDocument or TaskDocument
-    based on their persistence requirements. TemporaryDocument is a special concrete
-    class that can be instantiated directly (not abstract).
+    based on their persistence requirements.
     VALIDATION IS AUTOMATIC - Do not add manual validation!
         Size validation, name validation, and MIME type detection are built-in.
@@ -74,7 +73,7 @@ class Document(BaseModel, ABC):
         document.validate_file_name(document.name)  # NO! Automatic
     Best Practices:
-        - Use create() classmethod for automatic type conversion (90% of cases)
+        - Use create() classmethod for automatic type conversion (default preferred)
         - Omit description parameter unless truly needed for metadata
         - When using LLM functions, pass AIMessages or str. Wrap any Document values
           in AIMessages([...]). Do not call .text yourself
@@ -131,10 +130,62 @@ class Document(BaseModel, ABC):
         2. Embed metadata in content (e.g., JSON with data + metadata fields)
         3. Create a separate MetadataDocument type to accompany data documents
         4. Use document naming conventions (e.g., "data_v2_2024.json")
-        5. Store metadata in flow_options or pass through TraceInfo
+        5. Store metadata in flow_options
+    FILES Enum Best Practice:
+        When defining a FILES enum, NEVER use magic strings to reference files.
+        Always use the enum values to maintain type safety and refactorability.
+        WRONG - Magic strings/numbers:
+            doc = ConfigDocument.create(name="config.yaml", content=data)  # NO!
+            doc = docs.get_by("settings.json")  # NO! Magic string
+            files = ["config.yaml", "settings.json"]  # NO! Magic strings
+        CORRECT - Use enum references:
+            doc = ConfigDocument.create(
+                name=ConfigDocument.FILES.CONFIG,  # YES! Type-safe
+                content=data
+            )
+            doc = docs.get_by(ConfigDocument.FILES.SETTINGS)  # YES!
+            files = [
+                ConfigDocument.FILES.CONFIG,
+                ConfigDocument.FILES.SETTINGS
+            ]  # YES! Refactorable
+    Pydantic Model Interaction:
+        Documents provide DIRECT support for Pydantic models. Use the built-in
+        methods instead of manual JSON conversion.
+        WRONG - Manual JSON conversion:
+            # Don't do this - manual JSON handling
+            json_str = doc.text
+            json_data = json.loads(json_str)
+            model = MyModel(**json_data)  # NO! Use as_pydantic_model
+            # Don't do this - manual serialization
+            json_str = model.model_dump_json()
+            doc = MyDocument.create(name="data.json", content=json_str)  # NO!
+        CORRECT - Direct Pydantic interaction:
+            # Reading Pydantic model from document
+            model = doc.as_pydantic_model(MyModel)  # Direct conversion
+            models = doc.as_pydantic_model(list[MyModel])  # List support
+            # Creating document from Pydantic model
+            doc = MyDocument.create(
+                name="data.json",
+                content=model  # Direct BaseModel support
+            )
+            # Round-trip is seamless
+            original_model = MyModel(field="value")
+            doc = MyDocument.create(name="data.json", content=original_model)
+            restored = doc.as_pydantic_model(MyModel)
+            assert restored == original_model  # Perfect round-trip
     Example:
         >>> from enum import StrEnum
+        >>> from pydantic import BaseModel
         >>>
         >>> # Simple document:
         >>> class MyDocument(FlowDocument):
@@ -146,10 +197,23 @@ class Document(BaseModel, ABC):
         ...         CONFIG = "config.yaml"
         ...         SETTINGS = "settings.json"
         >>>
-        >>> # RECOMMENDED: Use create for automatic conversion
-        >>> doc = MyDocument.create(name="data.json", content={"key": "value"})
-        >>> print(doc.is_text)  # True
-        >>> data = doc.as_json()  # {'key': 'value'}
+        >>> # CORRECT FILES usage - no magic strings:
+        >>> doc = ConfigDocument.create(
+        ...     name=ConfigDocument.FILES.CONFIG,  # Use enum
+        ...     content={"key": "value"}
+        ... )
+        >>>
+        >>> # CORRECT Pydantic usage:
+        >>> class Config(BaseModel):
+        ...     key: str
+        >>>
+        >>> # Direct creation from Pydantic model
+        >>> config_model = Config(key="value")
+        >>> doc = MyDocument.create(name="data.json", content=config_model)
+        >>>
+        >>> # Direct extraction to Pydantic model
+        >>> restored = doc.as_pydantic_model(Config)
+        >>> print(restored.key)  # "value"
         >>>
         >>> # Track document provenance with sources
         >>> source_doc = MyDocument.create(name="input.txt", content="raw data")
@@ -170,6 +234,9 @@ class Document(BaseModel, ABC):
     DESCRIPTION_EXTENSION: ClassVar[str] = ".description.md"
     """File extension for description files."""
+    SOURCES_EXTENSION: ClassVar[str] = ".sources.json"
+    """File extension for sources metadata files."""
     MARKDOWN_LIST_SEPARATOR: ClassVar[str] = "\n\n-----------------\n\n"
     """Separator for markdown list items."""
@@ -288,7 +355,7 @@ class Document(BaseModel, ABC):
         content types and automatically converts them to bytes based on the file
         extension. Use the `parse` method to reverse this conversion.
-        Best Practice (90% of cases):
+        Best Practice (by default, unless instructed otherwise):
             Only provide name and content. The description parameter is RARELY needed.
         Args:
@@ -302,8 +369,8 @@ class Document(BaseModel, ABC):
                 - bytes: Used directly without conversion
                 - str: Encoded to UTF-8 bytes
                 - dict[str, Any]: Serialized to JSON (.json) or YAML (.yaml/.yml)
-                - list[str]: Joined with separator for .md (validates no items
-                            contain separator), else JSON/YAML
+                - list[str]: Joined automatically for .md (validates format compatibility),
+                            else JSON/YAML
                 - list[BaseModel]: Serialized to JSON or YAML based on extension
                 - BaseModel: Serialized to JSON or YAML based on extension
             description: Optional description - USUALLY OMIT THIS (defaults to None).
@@ -319,7 +386,7 @@ class Document(BaseModel, ABC):
         Raises:
             ValueError: If content type is not supported for the file extension,
-                       or if markdown list items contain the separator
+                       or if markdown list format is incompatible
             DocumentNameError: If filename violates validation rules
             DocumentSizeError: If content exceeds MAX_CONTENT_SIZE
@@ -329,7 +396,7 @@ class Document(BaseModel, ABC):
             returns the original dictionary {"key": "value"}.
         Example:
-            >>> # CORRECT - no description needed (90% of cases)
+            >>> # CORRECT - no description needed (by default, unless instructed otherwise)
             >>> doc = MyDocument.create(name="test.txt", content="Hello World")
             >>> doc.content  # b'Hello World'
             >>> doc.parse(str)  # "Hello World"
@@ -427,10 +494,6 @@ class Document(BaseModel, ABC):
             >>> doc = MyDocument.create(name="data.json", content={"key": "value"})
             >>> doc = MyDocument.create(name="config.yaml", content=my_model)
             >>> doc = MyDocument.create(name="items.md", content=["item1", "item2"])
-        See Also:
-            create: Recommended factory method with automatic type conversion
-            parse: Method to reverse the conversion done by create
         """
         if type(self) is Document:
             raise TypeError("Cannot instantiate abstract Document class directly")
@@ -467,8 +530,7 @@ class Document(BaseModel, ABC):
         Note:
             This method determines document persistence and lifecycle.
-            FlowDocument returns "flow", TaskDocument returns "task",
-            TemporaryDocument returns "temporary".
+            FlowDocument returns "flow", TaskDocument returns "task".
         """
         raise NotImplementedError("Subclasses must implement this method")
@@ -520,7 +582,7 @@ class Document(BaseModel, ABC):
         during execution.
         Returns:
-            True if this is a TemporaryDocument, False otherwise.
+            True if this document is temporary, False otherwise.
         """
         return self.get_base_type() == "temporary"
@@ -565,8 +627,6 @@ class Document(BaseModel, ABC):
     def validate_file_name(cls, name: str) -> None:
         """Validate that a file name matches allowed patterns.
-        @public
         DO NOT OVERRIDE this method if you define a FILES enum!
         The validation is automatic when FILES enum is present.
@@ -610,7 +670,7 @@ class Document(BaseModel, ABC):
         Ensures the document name is secure and follows conventions:
         - No path traversal characters (.., \\, /)
-        - Cannot end with .description.md
+        - Cannot end with .description.md or .sources.json
         - No leading/trailing whitespace
         - Must match FILES enum if defined
@@ -635,6 +695,9 @@ class Document(BaseModel, ABC):
                 f"Document names cannot end with {cls.DESCRIPTION_EXTENSION}: {v}"
             )
+        if v.endswith(cls.SOURCES_EXTENSION):
+            raise DocumentNameError(f"Document names cannot end with {cls.SOURCES_EXTENSION}: {v}")
         if ".." in v or "\\" in v or "/" in v:
             raise DocumentNameError(f"Invalid filename - contains path traversal characters: {v}")
@@ -659,7 +722,7 @@ class Document(BaseModel, ABC):
             2. str → UTF-8 encoding
             3. dict/BaseModel + .json → JSON serialization (indented)
             4. dict/BaseModel + .yaml/.yml → YAML serialization
-            5. list[str] + .md → Join with markdown separator (validates no items contain separator)
+            5. list[str] + .md → Join with markdown sections (validates format compatibility)
             6. list[Any] + .json/.yaml → JSON/YAML array
             7. int/float/bool + .json → JSON primitive
@@ -1028,8 +1091,6 @@ class Document(BaseModel, ABC):
     def as_yaml(self) -> Any:
         r"""Parse document content as YAML.
-        @public
         Parses the document's text content as YAML and returns Python objects.
         Uses ruamel.yaml which is safe by default (no code execution).
@@ -1057,8 +1118,6 @@ class Document(BaseModel, ABC):
     def as_json(self) -> Any:
         """Parse document content as JSON.
-        @public
         Parses the document's text content as JSON and returns Python objects.
         Document must contain valid JSON text.
@@ -1153,7 +1212,7 @@ class Document(BaseModel, ABC):
         @public
-        Splits text content using markdown separator ("\n\n-----------------\n\n").
+        Splits text content automatically using markdown section separators.
         Designed for markdown documents with multiple sections.
         Returns:
@@ -1168,9 +1227,9 @@ class Document(BaseModel, ABC):
             >>> doc = MyDocument.create(name="book.md", content=sections)
             >>> doc.as_markdown_list()  # Returns original sections
-            >>> # Manual creation with separator
-            >>> content = "Part 1\n\n-----------------\n\nPart 2\n\n-----------------\n\nPart 3"
-            >>> doc2 = MyDocument(name="parts.md", content=content.encode())
+            >>> # Round-trip conversion works automatically
+            >>> sections = ["Part 1", "Part 2", "Part 3"]
+            >>> doc2 = MyDocument.create(name="parts.md", content=sections)
             >>> doc2.as_markdown_list()  # ['Part 1', 'Part 2', 'Part 3']
         """
         return self.text.split(self.MARKDOWN_LIST_SEPARATOR)
@@ -1207,7 +1266,7 @@ class Document(BaseModel, ABC):
         Extension Rules:
             - .json → JSON parsing for dict/list/BaseModel
             - .yaml/.yml → YAML parsing for dict/list/BaseModel
-            - .md + list → Split by markdown separator
+            - .md + list → Split automatically into sections
             - Any + str → UTF-8 decode
             - Any + bytes → Raw content
@@ -1223,8 +1282,7 @@ class Document(BaseModel, ABC):
             >>> # Markdown list
             >>> items = ["Item 1", "Item 2"]
-            >>> content = "\n\n---\n\n".join(items).encode()
-            >>> doc = MyDocument(name="list.md", content=content)
+            >>> doc = MyDocument.create(name="list.md", content=items)
             >>> doc.parse(list)
             ['Item 1', 'Item 2']
         """
@@ -1330,11 +1388,6 @@ class Document(BaseModel, ABC):
             >>> # Check if specific document is a source
             >>> if source1.sha256 in doc_refs:
             ...     print("Document derived from source1")
-        See Also:
-            - get_source_references: Get non-document source references (URLs, etc.)
-            - has_source: Check if a specific source is tracked
-            - Document.create: Add sources when creating documents
         """
         return [src for src in self.sources if is_document_sha256(src)]
@@ -1372,11 +1425,6 @@ class Document(BaseModel, ABC):
             >>> # Use for attribution or debugging
             >>> for ref in refs:
             ...     print(f"Data sourced from: {ref}")
-        See Also:
-            - get_source_documents: Get document SHA256 references
-            - has_source: Check if a specific source is tracked
-            - Document.create: Add sources when creating documents
         """
         return [src for src in self.sources if not is_document_sha256(src)]
@@ -1422,11 +1470,6 @@ class Document(BaseModel, ABC):
             >>> # Check by SHA256 directly
             >>> if derived.has_source(source_doc.sha256):
             ...     print("Has specific hash")
-        See Also:
-            - get_source_documents: Get all document sources
-            - get_source_references: Get all reference sources
-            - Document.create: Add sources when creating documents
         """
         if isinstance(source, str):
             # Direct string comparison

ai-pipeline-core 0.1.14__tar.gz → 0.2.0__tar.gz

ai-pipeline-core 0.1.14tar.gz → 0.2.0tar.gz