PyPI - ai-pipeline-core - Versions diffs - 0.1.14__tar.gz → 0.2.1__tar.gz - Mend

ai-pipeline-core 0.1.14tar.gz → 0.2.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

{ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/.gitignore RENAMED Viewed

@@ -112,6 +112,7 @@ venv/
 ENV/
 env.bak/
 venv.bak/
+key.json
 # Spyder project settings
 .spyderproject

{ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ai-pipeline-core
-Version: 0.1.14
+Version: 0.2.1
 Summary: Core utilities for AI-powered processing pipelines using prefect
 Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
 Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
@@ -20,9 +20,10 @@ Classifier: Typing :: Typed
 Requires-Python: >=3.12
 Requires-Dist: httpx>=0.28.1
 Requires-Dist: jinja2>=3.1.6
-Requires-Dist: lmnr>=0.7.6
-Requires-Dist: openai>=1.99.9
-Requires-Dist: prefect>=3.4.13
+Requires-Dist: lmnr>=0.7.13
+Requires-Dist: openai>=1.108.1
+Requires-Dist: prefect-gcp[cloud-storage]>=0.6.10
+Requires-Dist: prefect>=3.4.19
 Requires-Dist: pydantic-settings>=2.10.1
 Requires-Dist: pydantic>=2.11.7
 Requires-Dist: python-magic>=0.4.27
@@ -111,15 +112,13 @@ class AnalysisConfig(FlowConfig):
     INPUT_DOCUMENT_TYPES = [InputDoc]
     OUTPUT_DOCUMENT_TYPE = OutputDoc
-# Create pipeline flow
-@pipeline_flow
+# Create pipeline flow with required config
+@pipeline_flow(config=AnalysisConfig)
 async def analyze_flow(
     project_name: str,
     documents: DocumentList,
     flow_options: FlowOptions
 ) -> DocumentList:
-    config = AnalysisConfig()
     # Process documents
     outputs = []
     for doc in documents:
@@ -136,7 +135,7 @@ async def analyze_flow(
         outputs.append(output)
     # RECOMMENDED: Always validate output
-    return config.create_and_validate_output(outputs)
+    return AnalysisConfig.create_and_validate_output(outputs)
 ```
 ### Structured Output
@@ -225,9 +224,17 @@ if doc.is_text:
 # Parse structured data
 data = doc.as_json()  # or as_yaml(), as_pydantic_model()
+# Convert between document types (new in v0.2.1)
+task_doc = flow_doc.model_convert(TaskDocument)  # Convert FlowDocument to TaskDocument
+new_doc = doc.model_convert(OtherDocType, content={"new": "data"})  # With content update
 # Enhanced filtering (new in v0.1.14)
 filtered = documents.filter_by([Doc1, Doc2, Doc3])  # Multiple types
 named = documents.filter_by(["file1.txt", "file2.txt"])  # Multiple names
+# Immutable collections (new in v0.2.1)
+frozen_docs = DocumentList(docs, frozen=True)  # Immutable document list
+frozen_msgs = AIMessages(messages, frozen=True)  # Immutable message list
 ```
 ### LLM Integration
@@ -289,15 +296,15 @@ class ProcessingConfig(FlowConfig):
     INPUT_DOCUMENT_TYPES = [RawDataDocument]
     OUTPUT_DOCUMENT_TYPE = ProcessedDocument  # Must be different!
-    # Use in flows for validation
-    @pipeline_flow
-    async def process(
-        config: ProcessingConfig,
-        documents: DocumentList,
-        flow_options: FlowOptions
-    ) -> DocumentList:
-        # ... processing logic ...
-        return config.create_and_validate_output(outputs)
+# Use in flows for validation
+@pipeline_flow(config=ProcessingConfig)
+async def process(
+    project_name: str,
+    documents: DocumentList,
+    flow_options: FlowOptions
+) -> DocumentList:
+    # ... processing logic ...
+    return ProcessingConfig.create_and_validate_output(outputs)
 ```
 ### Pipeline Decorators
@@ -313,13 +320,18 @@ async def process_chunk(data: str) -> str:
     set_trace_cost(0.05)  # Track costs (new in v0.1.14)
     return result
-@pipeline_flow  # Full observability and orchestration
+@pipeline_flow(
+    config=MyFlowConfig,
+    trace_trim_documents=True  # Trim large documents in traces (new in v0.2.1)
+)
 async def main_flow(
     project_name: str,
     documents: DocumentList,
     flow_options: FlowOptions
 ) -> DocumentList:
     # Your pipeline logic
+    # Large documents are automatically trimmed to 100 chars in traces
+    # for better observability without overwhelming the tracing UI
     return DocumentList(results)
 ```
@@ -339,6 +351,9 @@ LMNR_DEBUG=true  # Enable debug traces
 # Optional: Orchestration
 PREFECT_API_URL=http://localhost:4200/api
 PREFECT_API_KEY=your-prefect-key
+# Optional: Storage (for Google Cloud Storage)
+GCS_SERVICE_ACCOUNT_FILE=/path/to/service-account.json  # GCS auth file
 ```
 ### Settings Management
@@ -366,7 +381,7 @@ print(settings.app_name)
 ### Framework Rules (90% Use Cases)
-1. **Decorators**: Use `@trace`, `@pipeline_task`, `@pipeline_flow` WITHOUT parameters
+1. **Decorators**: Use `@pipeline_task` WITHOUT parameters, `@pipeline_flow` WITH config
 2. **Logging**: Use `get_pipeline_logger(__name__)` - NEVER `print()` or `logging` module
 3. **LLM calls**: Use `AIMessages` or `str`. Wrap Documents in `AIMessages`
 4. **Options**: Omit `ModelOptions` unless specifically needed (defaults are optimal)

{ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/README.md RENAMED Viewed

@@ -67,15 +67,13 @@ class AnalysisConfig(FlowConfig):
     INPUT_DOCUMENT_TYPES = [InputDoc]
     OUTPUT_DOCUMENT_TYPE = OutputDoc
-# Create pipeline flow
-@pipeline_flow
+# Create pipeline flow with required config
+@pipeline_flow(config=AnalysisConfig)
 async def analyze_flow(
     project_name: str,
     documents: DocumentList,
     flow_options: FlowOptions
 ) -> DocumentList:
-    config = AnalysisConfig()
     # Process documents
     outputs = []
     for doc in documents:
@@ -92,7 +90,7 @@ async def analyze_flow(
         outputs.append(output)
     # RECOMMENDED: Always validate output
-    return config.create_and_validate_output(outputs)
+    return AnalysisConfig.create_and_validate_output(outputs)
 ```
 ### Structured Output
@@ -181,9 +179,17 @@ if doc.is_text:
 # Parse structured data
 data = doc.as_json()  # or as_yaml(), as_pydantic_model()
+# Convert between document types (new in v0.2.1)
+task_doc = flow_doc.model_convert(TaskDocument)  # Convert FlowDocument to TaskDocument
+new_doc = doc.model_convert(OtherDocType, content={"new": "data"})  # With content update
 # Enhanced filtering (new in v0.1.14)
 filtered = documents.filter_by([Doc1, Doc2, Doc3])  # Multiple types
 named = documents.filter_by(["file1.txt", "file2.txt"])  # Multiple names
+# Immutable collections (new in v0.2.1)
+frozen_docs = DocumentList(docs, frozen=True)  # Immutable document list
+frozen_msgs = AIMessages(messages, frozen=True)  # Immutable message list
 ```
 ### LLM Integration
@@ -245,15 +251,15 @@ class ProcessingConfig(FlowConfig):
     INPUT_DOCUMENT_TYPES = [RawDataDocument]
     OUTPUT_DOCUMENT_TYPE = ProcessedDocument  # Must be different!
-    # Use in flows for validation
-    @pipeline_flow
-    async def process(
-        config: ProcessingConfig,
-        documents: DocumentList,
-        flow_options: FlowOptions
-    ) -> DocumentList:
-        # ... processing logic ...
-        return config.create_and_validate_output(outputs)
+# Use in flows for validation
+@pipeline_flow(config=ProcessingConfig)
+async def process(
+    project_name: str,
+    documents: DocumentList,
+    flow_options: FlowOptions
+) -> DocumentList:
+    # ... processing logic ...
+    return ProcessingConfig.create_and_validate_output(outputs)
 ```
 ### Pipeline Decorators
@@ -269,13 +275,18 @@ async def process_chunk(data: str) -> str:
     set_trace_cost(0.05)  # Track costs (new in v0.1.14)
     return result
-@pipeline_flow  # Full observability and orchestration
+@pipeline_flow(
+    config=MyFlowConfig,
+    trace_trim_documents=True  # Trim large documents in traces (new in v0.2.1)
+)
 async def main_flow(
     project_name: str,
     documents: DocumentList,
     flow_options: FlowOptions
 ) -> DocumentList:
     # Your pipeline logic
+    # Large documents are automatically trimmed to 100 chars in traces
+    # for better observability without overwhelming the tracing UI
     return DocumentList(results)
 ```
@@ -295,6 +306,9 @@ LMNR_DEBUG=true  # Enable debug traces
 # Optional: Orchestration
 PREFECT_API_URL=http://localhost:4200/api
 PREFECT_API_KEY=your-prefect-key
+# Optional: Storage (for Google Cloud Storage)
+GCS_SERVICE_ACCOUNT_FILE=/path/to/service-account.json  # GCS auth file
 ```
 ### Settings Management
@@ -322,7 +336,7 @@ print(settings.app_name)
 ### Framework Rules (90% Use Cases)
-1. **Decorators**: Use `@trace`, `@pipeline_task`, `@pipeline_flow` WITHOUT parameters
+1. **Decorators**: Use `@pipeline_task` WITHOUT parameters, `@pipeline_flow` WITH config
 2. **Logging**: Use `get_pipeline_logger(__name__)` - NEVER `print()` or `logging` module
 3. **LLM calls**: Use `AIMessages` or `str`. Wrap Documents in `AIMessages`
 4. **Options**: Omit `ModelOptions` unless specifically needed (defaults are optimal)

{ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/ai_pipeline_core/__init__.py RENAMED Viewed

@@ -7,7 +7,7 @@ It combines document processing, LLM integration, and workflow orchestration int
 system designed for production use.
 The framework enforces best practices through strong typing (Pydantic), automatic retries,
-cost tracking, and distributed tracing. All I/O operations are async for maximum throughput.
+and cost tracking. All I/O operations are async for maximum throughput.
 **CRITICAL IMPORT RULE**:
     Always import from the top-level package:
@@ -18,12 +18,12 @@ cost tracking, and distributed tracing. All I/O operations are async for maximum
         from ai_pipeline_core.llm import generate  # NO!
         from ai_pipeline_core.documents import FlowDocument  # NO!
-FRAMEWORK RULES (90% Use Cases):
-    1. Decorators: Use @trace, @pipeline_task, @pipeline_flow WITHOUT parameters
+FRAMEWORK RULES (Use by default, unless instructed otherwise):
+    1. Decorators: Use @pipeline_task WITHOUT parameters, @pipeline_flow WITH config
     2. Logging: Use get_pipeline_logger(__name__) - NEVER print() or logging module
     3. LLM calls: Use AIMessages or str. Wrap Documents in AIMessages; do not call .text yourself
-    4. Options: Omit ModelOptions unless specifically needed (defaults are optimal)
-    5. Documents: Create with just name and content - skip description
+    4. Options: DO NOT use options parameter - omit it entirely (defaults are optimal)
+    5. Documents: Create with just name and content - skip description unless needed
     6. FlowConfig: OUTPUT_DOCUMENT_TYPE must differ from all INPUT_DOCUMENT_TYPES
     7. Initialization: PromptManager and logger at module scope, not in functions
     8. DocumentList: Use default constructor - no validation flags needed
@@ -36,18 +36,22 @@ Core Capabilities:
     - **LLM Integration**: Unified interface to any model via LiteLLM with caching
     - **Structured Output**: Type-safe generation with Pydantic model validation
     - **Workflow Orchestration**: Prefect-based flows and tasks with retries
-    - **Observability**: Distributed tracing via Laminar (LMNR) for debugging
+    - **Observability**: Built-in monitoring and debugging capabilities
     - **Local Development**: Simple runner for testing without infrastructure
 Quick Start:
     >>> from ai_pipeline_core import (
-    ...     pipeline_flow, FlowDocument, DocumentList, FlowOptions, llm, AIMessages
+    ...     pipeline_flow, FlowDocument, DocumentList, FlowOptions, FlowConfig, llm, AIMessages
     ... )
     >>>
     >>> class OutputDoc(FlowDocument):
     ...     '''Analysis result document.'''
     >>>
-    >>> @pipeline_flow
+    >>> class MyFlowConfig(FlowConfig):
+    ...     INPUT_DOCUMENT_TYPES = []
+    ...     OUTPUT_DOCUMENT_TYPE = OutputDoc
+    >>>
+    >>> @pipeline_flow(config=MyFlowConfig)
     >>> async def analyze_flow(
     ...     project_name: str,
     ...     documents: DocumentList,
@@ -55,7 +59,7 @@ Quick Start:
     ... ) -> DocumentList:
     ...     # Messages accept AIMessages or str. Wrap documents: AIMessages([doc])
     ...     response = await llm.generate(
-    ...         model="gpt-5",
+    ...         "gpt-5",
     ...         messages=AIMessages([documents[0]])
     ...     )
     ...     result = OutputDoc.create(
@@ -76,8 +80,6 @@ Optional Environment Variables:
     - PREFECT_API_KEY: Prefect API authentication key
     - LMNR_PROJECT_API_KEY: Laminar (LMNR) API key for tracing
     - LMNR_DEBUG: Set to "true" to enable debug-level traces
-    - LMNR_SESSION_ID: Default session ID for traces
-    - LMNR_USER_ID: Default user ID for traces
 """
 from . import llm
@@ -99,6 +101,8 @@ from .llm import (
     ModelOptions,
     ModelResponse,
     StructuredModelResponse,
+    generate,
+    generate_structured,
 )
 from .logging import (
     LoggerMixin,
@@ -114,7 +118,7 @@ from .prompt_manager import PromptManager
 from .settings import Settings
 from .tracing import TraceInfo, TraceLevel, set_trace_cost, trace
-__version__ = "0.1.14"
+__version__ = "0.2.1"
 __all__ = [
     # Config/Settings
@@ -145,7 +149,9 @@ __all__ = [
     "prefect_test_harness",
     "disable_run_logger",
     # LLM
-    "llm",
+    "llm",  # for backward compatibility
+    "generate",
+    "generate_structured",
     "ModelName",
     "ModelOptions",
     "ModelResponse",
@@ -159,4 +165,6 @@ __all__ = [
     "set_trace_cost",
     # Utils
     "PromptManager",
+    "generate",
+    "generate_structured",
 ]

ai-pipeline-core 0.1.14__tar.gz → 0.2.1__tar.gz

ai-pipeline-core 0.1.14tar.gz → 0.2.1tar.gz