PyPI - ai-pipeline-core - Versions diffs - 0.2.9__tar.gz → 0.3.3__tar.gz - Mend

ai-pipeline-core 0.2.9tar.gz → 0.3.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

{ai_pipeline_core-0.2.9 → ai_pipeline_core-0.3.3}/.gitignore RENAMED Viewed

@@ -66,6 +66,10 @@ instance/
 # Scrapy stuff:
 .scrapy
+# claude and cursor
+.claude
+.cursor
 # Sphinx documentation
 docs/_build/

{ai_pipeline_core-0.2.9 → ai_pipeline_core-0.3.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ai-pipeline-core
-Version: 0.2.9
+Version: 0.3.3
 Summary: Core utilities for AI-powered processing pipelines using prefect
 Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
 Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
@@ -22,6 +22,7 @@ Requires-Dist: httpx>=0.28.1
 Requires-Dist: jinja2>=3.1.6
 Requires-Dist: lmnr>=0.7.18
 Requires-Dist: openai>=1.109.1
+Requires-Dist: pillow>=10.0.0
 Requires-Dist: prefect-gcp[cloud-storage]>=0.6.10
 Requires-Dist: prefect>=3.4.21
 Requires-Dist: pydantic-settings>=2.10.1
@@ -63,7 +64,7 @@ AI Pipeline Core is a production-ready framework that combines document processi
 - **Structured Output**: Type-safe generation with Pydantic model validation
 - **Workflow Orchestration**: Prefect-based flows and tasks with automatic retries
 - **Observability**: Built-in distributed tracing via Laminar (LMNR) with cost tracking for debugging and monitoring
-- **Local Development**: Simple runner for testing pipelines without infrastructure
+- **Deployment**: Unified pipeline execution for local, CLI, and production environments
 ## Installation
@@ -124,7 +125,7 @@ async def analyze_flow(
     for doc in documents:
         # Use AIMessages for LLM interaction
         response = await llm.generate(
-            model="gpt-5",
+            model="gpt-5.1",
             messages=AIMessages([doc])
         )
@@ -151,7 +152,7 @@ class Analysis(BaseModel):
 # Generate structured output
 response = await llm.generate_structured(
-    model="gpt-5",
+    model="gpt-5.1",
     response_format=Analysis,
     messages="Analyze this product review: ..."
 )
@@ -177,7 +178,7 @@ doc = MyDocument.create(
 # Parse back to original type
 data = doc.parse(dict)  # Returns {"key": "value"}
-# Document provenance tracking (new in v0.1.14)
+# Document provenance tracking
 doc_with_sources = MyDocument.create(
     name="derived.json",
     content={"result": "processed"},
@@ -224,15 +225,15 @@ if doc.is_text:
 # Parse structured data
 data = doc.as_json()  # or as_yaml(), as_pydantic_model()
-# Convert between document types (new in v0.2.1)
+# Convert between document types
 task_doc = flow_doc.model_convert(TaskDocument)  # Convert FlowDocument to TaskDocument
 new_doc = doc.model_convert(OtherDocType, content={"new": "data"})  # With content update
-# Enhanced filtering (new in v0.1.14)
+# Enhanced filtering
 filtered = documents.filter_by([Doc1, Doc2, Doc3])  # Multiple types
 named = documents.filter_by(["file1.txt", "file2.txt"])  # Multiple names
-# Immutable collections (new in v0.2.1)
+# Immutable collections
 frozen_docs = DocumentList(docs, frozen=True)  # Immutable document list
 frozen_msgs = AIMessages(messages, frozen=True)  # Immutable message list
 ```
@@ -246,7 +247,7 @@ from ai_pipeline_core import llm, AIMessages, ModelOptions
 # Simple generation
 response = await llm.generate(
-    model="gpt-5",
+    model="gpt-5.1",
     messages="Explain quantum computing"
 )
 print(response.content)
@@ -256,21 +257,21 @@ static_context = AIMessages([large_document])
 # First call: caches context
 r1 = await llm.generate(
-    model="gpt-5",
+    model="gpt-5.1",
     context=static_context,  # Cached for 120 seconds by default
     messages="Summarize"     # Dynamic query
 )
 # Second call: reuses cache
 r2 = await llm.generate(
-    model="gpt-5",
+    model="gpt-5.1",
     context=static_context,  # Reused from cache!
     messages="Key points?"   # Different query
 )
-# Custom cache TTL (new in v0.1.14)
+# Custom cache TTL
 response = await llm.generate(
-    model="gpt-5",
+    model="gpt-5.1",
     context=static_context,
     messages="Analyze",
     options=ModelOptions(cache_ttl="300s")  # Cache for 5 minutes
@@ -278,7 +279,7 @@ response = await llm.generate(
 # Disable caching for dynamic contexts
 response = await llm.generate(
-    model="gpt-5",
+    model="gpt-5.1",
     context=dynamic_context,
     messages="Process",
     options=ModelOptions(cache_ttl=None)  # No caching
@@ -317,12 +318,12 @@ from ai_pipeline_core import pipeline_flow, pipeline_task, set_trace_cost
 @pipeline_task  # Automatic retry, tracing, and monitoring
 async def process_chunk(data: str) -> str:
     result = await transform(data)
-    set_trace_cost(0.05)  # Track costs (new in v0.1.14)
+    set_trace_cost(0.05)  # Track costs
     return result
 @pipeline_flow(
     config=MyFlowConfig,
-    trace_trim_documents=True  # Trim large documents in traces (new in v0.2.1)
+    trace_trim_documents=True  # Trim large documents in traces
 )
 async def main_flow(
     project_name: str,
@@ -335,6 +336,68 @@ async def main_flow(
     return DocumentList(results)
 ```
+### Local Trace Debugging
+Save all trace spans to the local filesystem for LLM-assisted debugging:
+```bash
+export TRACE_DEBUG_PATH=/path/to/debug/output
+```
+This creates a hierarchical directory structure that mirrors the execution flow with automatic deduplication:
+```
+20260128_152932_abc12345_my_flow/
+├── _trace.yaml           # Trace metadata
+├── _index.yaml           # Span ID → path mapping
+├── _summary.md           # Unified summary for human inspection and LLM debugging
+├── artifacts/            # Deduplicated content storage
+│   └── sha256/
+│       └── ab/cd/        # Sharded by hash prefix
+│           └── abcdef...1234.txt  # Large content (>10KB)
+└── 0001_my_flow/         # Root span (numbered for execution order)
+    ├── _span.yaml        # Span metadata (timing, status, I/O refs)
+    ├── input.yaml        # Structured inputs (inline or refs)
+    ├── output.yaml       # Structured outputs (inline or refs)
+    ├── 0002_task_1/      # Child spans nested inside parent
+    │   ├── _span.yaml
+    │   ├── input.yaml
+    │   ├── output.yaml
+    │   └── 0003_llm_call/
+    │       ├── _span.yaml
+    │       ├── input.yaml   # LLM messages with inline/external content
+    │       └── output.yaml
+    └── 0004_task_2/
+        └── ...
+```
+**Key Features:**
+- **Automatic Deduplication**: Identical content (e.g., system prompts) stored once in `artifacts/`
+- **Smart Externalization**: Large content (>10KB) externalized with 2KB inline previews
+- **AI-Friendly**: Files capped at 50KB for easy LLM processing
+- **Lossless**: Full content reconstruction via `content_ref` pointers
+Example `input.yaml` with externalization:
+```yaml
+format_version: 3
+type: llm_messages
+messages:
+  - role: system
+    parts:
+      - type: text
+        size_bytes: 28500
+        content_ref:  # Large content → artifact
+          hash: sha256:a1b2c3d4...
+          path: artifacts/sha256/a1/b2/a1b2c3d4...txt
+        excerpt: "You are a helpful assistant...\n[TRUNCATED]"
+  - role: user
+    parts:
+      - type: text
+        content: "Hello!"  # Small content stays inline
+```
+Run `tree` on the output directory to visualize the entire execution hierarchy. Feed `_summary.md` to an LLM for debugging assistance - it combines high-level overview with detailed navigation for comprehensive trace analysis.
 ## Configuration
 ### Environment Variables
@@ -348,6 +411,9 @@ OPENAI_API_KEY=your-api-key
 LMNR_PROJECT_API_KEY=your-lmnr-key
 LMNR_DEBUG=true  # Enable debug traces
+# Optional: Local Trace Debugging
+TRACE_DEBUG_PATH=/path/to/trace/output  # Save traces locally for LLM-assisted debugging
 # Optional: Orchestration
 PREFECT_API_URL=http://localhost:4200/api
 PREFECT_API_KEY=your-prefect-key
@@ -458,18 +524,21 @@ For AI assistants:
 ```
 ai-pipeline-core/
 ├── ai_pipeline_core/
-│   ├── documents/      # Document abstraction system
-│   ├── flow/           # Flow configuration and options
-│   ├── llm/            # LLM client and response handling
-│   ├── logging/        # Logging infrastructure
-│   ├── tracing.py      # Distributed tracing
-│   ├── pipeline.py     # Pipeline decorators
+│   ├── deployment/      # Pipeline deployment and execution
+│   ├── documents/       # Document abstraction system
+│   ├── flow/            # Flow configuration and options
+│   ├── llm/             # LLM client and response handling
+│   ├── logging/         # Logging infrastructure
+│   ├── prompt_builder/  # Document-aware prompt construction
+│   ├── pipeline.py      # Pipeline decorators
+│   ├── progress.py      # Intra-flow progress tracking
 │   ├── prompt_manager.py # Jinja2 template management
-│   └── settings.py     # Configuration management
-├── tests/              # Comprehensive test suite
-├── examples/           # Usage examples
-├── API.md             # Complete API reference
-└── pyproject.toml     # Project configuration
+│   ├── settings.py      # Configuration management
+│   └── tracing.py       # Distributed tracing
+├── tests/               # Comprehensive test suite
+├── examples/            # Usage examples
+├── API.md               # Complete API reference
+└── pyproject.toml       # Project configuration
 ```
 ## Contributing

{ai_pipeline_core-0.2.9 → ai_pipeline_core-0.3.3}/README.md RENAMED Viewed

@@ -18,7 +18,7 @@ AI Pipeline Core is a production-ready framework that combines document processi
 - **Structured Output**: Type-safe generation with Pydantic model validation
 - **Workflow Orchestration**: Prefect-based flows and tasks with automatic retries
 - **Observability**: Built-in distributed tracing via Laminar (LMNR) with cost tracking for debugging and monitoring
-- **Local Development**: Simple runner for testing pipelines without infrastructure
+- **Deployment**: Unified pipeline execution for local, CLI, and production environments
 ## Installation
@@ -79,7 +79,7 @@ async def analyze_flow(
     for doc in documents:
         # Use AIMessages for LLM interaction
         response = await llm.generate(
-            model="gpt-5",
+            model="gpt-5.1",
             messages=AIMessages([doc])
         )
@@ -106,7 +106,7 @@ class Analysis(BaseModel):
 # Generate structured output
 response = await llm.generate_structured(
-    model="gpt-5",
+    model="gpt-5.1",
     response_format=Analysis,
     messages="Analyze this product review: ..."
 )
@@ -132,7 +132,7 @@ doc = MyDocument.create(
 # Parse back to original type
 data = doc.parse(dict)  # Returns {"key": "value"}
-# Document provenance tracking (new in v0.1.14)
+# Document provenance tracking
 doc_with_sources = MyDocument.create(
     name="derived.json",
     content={"result": "processed"},
@@ -179,15 +179,15 @@ if doc.is_text:
 # Parse structured data
 data = doc.as_json()  # or as_yaml(), as_pydantic_model()
-# Convert between document types (new in v0.2.1)
+# Convert between document types
 task_doc = flow_doc.model_convert(TaskDocument)  # Convert FlowDocument to TaskDocument
 new_doc = doc.model_convert(OtherDocType, content={"new": "data"})  # With content update
-# Enhanced filtering (new in v0.1.14)
+# Enhanced filtering
 filtered = documents.filter_by([Doc1, Doc2, Doc3])  # Multiple types
 named = documents.filter_by(["file1.txt", "file2.txt"])  # Multiple names
-# Immutable collections (new in v0.2.1)
+# Immutable collections
 frozen_docs = DocumentList(docs, frozen=True)  # Immutable document list
 frozen_msgs = AIMessages(messages, frozen=True)  # Immutable message list
 ```
@@ -201,7 +201,7 @@ from ai_pipeline_core import llm, AIMessages, ModelOptions
 # Simple generation
 response = await llm.generate(
-    model="gpt-5",
+    model="gpt-5.1",
     messages="Explain quantum computing"
 )
 print(response.content)
@@ -211,21 +211,21 @@ static_context = AIMessages([large_document])
 # First call: caches context
 r1 = await llm.generate(
-    model="gpt-5",
+    model="gpt-5.1",
     context=static_context,  # Cached for 120 seconds by default
     messages="Summarize"     # Dynamic query
 )
 # Second call: reuses cache
 r2 = await llm.generate(
-    model="gpt-5",
+    model="gpt-5.1",
     context=static_context,  # Reused from cache!
     messages="Key points?"   # Different query
 )
-# Custom cache TTL (new in v0.1.14)
+# Custom cache TTL
 response = await llm.generate(
-    model="gpt-5",
+    model="gpt-5.1",
     context=static_context,
     messages="Analyze",
     options=ModelOptions(cache_ttl="300s")  # Cache for 5 minutes
@@ -233,7 +233,7 @@ response = await llm.generate(
 # Disable caching for dynamic contexts
 response = await llm.generate(
-    model="gpt-5",
+    model="gpt-5.1",
     context=dynamic_context,
     messages="Process",
     options=ModelOptions(cache_ttl=None)  # No caching
@@ -272,12 +272,12 @@ from ai_pipeline_core import pipeline_flow, pipeline_task, set_trace_cost
 @pipeline_task  # Automatic retry, tracing, and monitoring
 async def process_chunk(data: str) -> str:
     result = await transform(data)
-    set_trace_cost(0.05)  # Track costs (new in v0.1.14)
+    set_trace_cost(0.05)  # Track costs
     return result
 @pipeline_flow(
     config=MyFlowConfig,
-    trace_trim_documents=True  # Trim large documents in traces (new in v0.2.1)
+    trace_trim_documents=True  # Trim large documents in traces
 )
 async def main_flow(
     project_name: str,
@@ -290,6 +290,68 @@ async def main_flow(
     return DocumentList(results)
 ```
+### Local Trace Debugging
+Save all trace spans to the local filesystem for LLM-assisted debugging:
+```bash
+export TRACE_DEBUG_PATH=/path/to/debug/output
+```
+This creates a hierarchical directory structure that mirrors the execution flow with automatic deduplication:
+```
+20260128_152932_abc12345_my_flow/
+├── _trace.yaml           # Trace metadata
+├── _index.yaml           # Span ID → path mapping
+├── _summary.md           # Unified summary for human inspection and LLM debugging
+├── artifacts/            # Deduplicated content storage
+│   └── sha256/
+│       └── ab/cd/        # Sharded by hash prefix
+│           └── abcdef...1234.txt  # Large content (>10KB)
+└── 0001_my_flow/         # Root span (numbered for execution order)
+    ├── _span.yaml        # Span metadata (timing, status, I/O refs)
+    ├── input.yaml        # Structured inputs (inline or refs)
+    ├── output.yaml       # Structured outputs (inline or refs)
+    ├── 0002_task_1/      # Child spans nested inside parent
+    │   ├── _span.yaml
+    │   ├── input.yaml
+    │   ├── output.yaml
+    │   └── 0003_llm_call/
+    │       ├── _span.yaml
+    │       ├── input.yaml   # LLM messages with inline/external content
+    │       └── output.yaml
+    └── 0004_task_2/
+        └── ...
+```
+**Key Features:**
+- **Automatic Deduplication**: Identical content (e.g., system prompts) stored once in `artifacts/`
+- **Smart Externalization**: Large content (>10KB) externalized with 2KB inline previews
+- **AI-Friendly**: Files capped at 50KB for easy LLM processing
+- **Lossless**: Full content reconstruction via `content_ref` pointers
+Example `input.yaml` with externalization:
+```yaml
+format_version: 3
+type: llm_messages
+messages:
+  - role: system
+    parts:
+      - type: text
+        size_bytes: 28500
+        content_ref:  # Large content → artifact
+          hash: sha256:a1b2c3d4...
+          path: artifacts/sha256/a1/b2/a1b2c3d4...txt
+        excerpt: "You are a helpful assistant...\n[TRUNCATED]"
+  - role: user
+    parts:
+      - type: text
+        content: "Hello!"  # Small content stays inline
+```
+Run `tree` on the output directory to visualize the entire execution hierarchy. Feed `_summary.md` to an LLM for debugging assistance - it combines high-level overview with detailed navigation for comprehensive trace analysis.
 ## Configuration
 ### Environment Variables
@@ -303,6 +365,9 @@ OPENAI_API_KEY=your-api-key
 LMNR_PROJECT_API_KEY=your-lmnr-key
 LMNR_DEBUG=true  # Enable debug traces
+# Optional: Local Trace Debugging
+TRACE_DEBUG_PATH=/path/to/trace/output  # Save traces locally for LLM-assisted debugging
 # Optional: Orchestration
 PREFECT_API_URL=http://localhost:4200/api
 PREFECT_API_KEY=your-prefect-key
@@ -413,18 +478,21 @@ For AI assistants:
 ```
 ai-pipeline-core/
 ├── ai_pipeline_core/
-│   ├── documents/      # Document abstraction system
-│   ├── flow/           # Flow configuration and options
-│   ├── llm/            # LLM client and response handling
-│   ├── logging/        # Logging infrastructure
-│   ├── tracing.py      # Distributed tracing
-│   ├── pipeline.py     # Pipeline decorators
+│   ├── deployment/      # Pipeline deployment and execution
+│   ├── documents/       # Document abstraction system
+│   ├── flow/            # Flow configuration and options
+│   ├── llm/             # LLM client and response handling
+│   ├── logging/         # Logging infrastructure
+│   ├── prompt_builder/  # Document-aware prompt construction
+│   ├── pipeline.py      # Pipeline decorators
+│   ├── progress.py      # Intra-flow progress tracking
 │   ├── prompt_manager.py # Jinja2 template management
-│   └── settings.py     # Configuration management
-├── tests/              # Comprehensive test suite
-├── examples/           # Usage examples
-├── API.md             # Complete API reference
-└── pyproject.toml     # Project configuration
+│   ├── settings.py      # Configuration management
+│   └── tracing.py       # Distributed tracing
+├── tests/               # Comprehensive test suite
+├── examples/            # Usage examples
+├── API.md               # Complete API reference
+└── pyproject.toml       # Project configuration
 ```
 ## Contributing

{ai_pipeline_core-0.2.9 → ai_pipeline_core-0.3.3}/ai_pipeline_core/__init__.py RENAMED Viewed

@@ -59,7 +59,7 @@ Quick Start:
     ... ) -> DocumentList:
     ...     # Messages accept AIMessages or str. Wrap documents: AIMessages([doc])
     ...     response = await llm.generate(
-    ...         "gpt-5",
+    ...         "gpt-5.1",
     ...         messages=AIMessages([documents[0]])
     ...     )
     ...     result = OutputDoc.create(
@@ -82,7 +82,8 @@ Optional Environment Variables:
     - LMNR_DEBUG: Set to "true" to enable debug-level traces
 """
-from . import llm
+from . import llm, progress
+from .deployment import DeploymentContext, DeploymentResult, PipelineDeployment
 from .documents import (
     Document,
     DocumentList,
@@ -94,6 +95,15 @@ from .documents import (
     sanitize_url,
 )
 from .flow import FlowConfig, FlowOptions
+from .images import (
+    ImagePart,
+    ImagePreset,
+    ImageProcessingConfig,
+    ImageProcessingError,
+    ProcessedImage,
+    process_image,
+    process_image_to_documents,
+)
 from .llm import (
     AIMessages,
     AIMessageType,
@@ -114,11 +124,13 @@ from .logging import (
 from .logging import get_pipeline_logger as get_logger
 from .pipeline import pipeline_flow, pipeline_task
 from .prefect import disable_run_logger, prefect_test_harness
+from .prompt_builder import EnvironmentVariable, PromptBuilder
 from .prompt_manager import PromptManager
 from .settings import Settings
 from .tracing import TraceInfo, TraceLevel, set_trace_cost, trace
+from .utils.remote_deployment import remote_deployment
-__version__ = "0.2.9"
+__version__ = "0.3.3"
 __all__ = [
     # Config/Settings
@@ -148,6 +160,12 @@ __all__ = [
     # Prefect decorators (clean, no tracing)
     "prefect_test_harness",
     "disable_run_logger",
+    # Deployment
+    "PipelineDeployment",
+    "DeploymentContext",
+    "DeploymentResult",
+    "remote_deployment",
+    "progress",
     # LLM
     "llm",  # for backward compatibility
     "generate",
@@ -163,8 +181,17 @@ __all__ = [
     "TraceLevel",
     "TraceInfo",
     "set_trace_cost",
+    # Prompt Builder
+    "PromptBuilder",
+    "EnvironmentVariable",
+    # Images
+    "process_image",
+    "process_image_to_documents",
+    "ImagePreset",
+    "ImageProcessingConfig",
+    "ProcessedImage",
+    "ImagePart",
+    "ImageProcessingError",
     # Utils
     "PromptManager",
-    "generate",
-    "generate_structured",
 ]

ai_pipeline_core-0.3.3/ai_pipeline_core/debug/__init__.py ADDED Viewed

@@ -0,0 +1,26 @@
+"""Local trace debugging system for AI pipelines.
+This module provides filesystem-based trace debugging that saves all spans
+with their inputs/outputs for LLM-assisted debugging.
+Enable by setting TRACE_DEBUG_PATH environment variable.
+"""
+from .config import TraceDebugConfig
+from .content import ArtifactStore, ContentRef, ContentWriter, reconstruct_span_content
+from .processor import LocalDebugSpanProcessor
+from .summary import generate_summary
+from .writer import LocalTraceWriter, TraceState, WriteJob
+__all__ = [
+    "TraceDebugConfig",
+    "ContentRef",
+    "ContentWriter",
+    "ArtifactStore",
+    "reconstruct_span_content",
+    "LocalDebugSpanProcessor",
+    "LocalTraceWriter",
+    "TraceState",
+    "WriteJob",
+    "generate_summary",
+]

ai_pipeline_core-0.3.3/ai_pipeline_core/debug/config.py ADDED Viewed

@@ -0,0 +1,91 @@
+"""Configuration for local trace debugging."""
+from pathlib import Path
+from pydantic import BaseModel, ConfigDict, Field
+class TraceDebugConfig(BaseModel):
+    """Configuration for local trace debugging.
+    Controls how traces are written to the local filesystem for debugging.
+    Enable by setting TRACE_DEBUG_PATH environment variable.
+    """
+    model_config = ConfigDict(frozen=True)
+    path: Path = Field(description="Directory for debug traces")
+    enabled: bool = Field(default=True, description="Whether debug tracing is enabled")
+    # Content size limits (Issue #2)
+    max_file_bytes: int = Field(
+        default=50_000,
+        description="Max bytes for input.yaml or output.yaml. Elements externalized to stay under.",
+    )
+    max_element_bytes: int = Field(
+        default=10_000,
+        description="Max bytes for single element. Above this, partial + artifact ref.",
+    )
+    element_excerpt_bytes: int = Field(
+        default=2_000,
+        description="Bytes of content to keep inline when element exceeds max_element_bytes.",
+    )
+    max_content_bytes: int = Field(
+        default=10_000_000,
+        description="Max bytes for any single artifact. Above this, truncate.",
+    )
+    # Image handling (Issue #7 - no changes per user)
+    extract_base64_images: bool = Field(
+        default=True,
+        description="Extract base64 images to artifact files",
+    )
+    # Span optimization (Issue #4)
+    merge_wrapper_spans: bool = Field(
+        default=True,
+        description="Merge Prefect wrapper spans with inner traced function spans",
+    )
+    # Events (Issue #12)
+    events_file_mode: str = Field(
+        default="errors_only",
+        description="When to write events.yaml: 'all', 'errors_only', 'none'",
+    )
+    # Indexes (Issue #1)
+    include_llm_index: bool = Field(
+        default=True,
+        description="Generate _llm_calls.yaml with LLM-specific details",
+    )
+    include_error_index: bool = Field(
+        default=True,
+        description="Generate _errors.yaml with failed span details",
+    )
+    # Cleanup
+    max_traces: int | None = Field(
+        default=None,
+        description="Max number of traces to keep. None for unlimited.",
+    )
+    # Security - default redaction patterns for common secrets
+    redact_patterns: tuple[str, ...] = Field(
+        default=(
+            r"sk-[a-zA-Z0-9]{20,}",  # OpenAI API keys
+            r"sk-proj-[a-zA-Z0-9\-_]{20,}",  # OpenAI project keys
+            r"AKIA[0-9A-Z]{16}",  # AWS access keys
+            r"ghp_[a-zA-Z0-9]{36}",  # GitHub personal tokens
+            r"gho_[a-zA-Z0-9]{36}",  # GitHub OAuth tokens
+            r"xoxb-[a-zA-Z0-9\-]+",  # Slack bot tokens
+            r"xoxp-[a-zA-Z0-9\-]+",  # Slack user tokens
+            r"(?i)password\s*[:=]\s*['\"]?[^\s'\"]+",  # Passwords
+            r"(?i)secret\s*[:=]\s*['\"]?[^\s'\"]+",  # Secrets
+            r"(?i)api[_\-]?key\s*[:=]\s*['\"]?[^\s'\"]+",  # API keys
+            r"(?i)bearer\s+[a-zA-Z0-9\-_\.]+",  # Bearer tokens
+        ),
+        description="Regex patterns for secrets to redact",
+    )
+    # Summary
+    generate_summary: bool = Field(default=True, description="Generate _summary.md")

ai-pipeline-core 0.2.9__tar.gz → 0.3.3__tar.gz

ai-pipeline-core 0.2.9tar.gz → 0.3.3tar.gz