ai-pipeline-core 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,205 @@
1
+ """Tracing utilities that integrate Laminar (``lmnr``) with our code-base.
2
+
3
+ This module centralises:
4
+ • ``TraceInfo`` - a small helper object for propagating contextual metadata.
5
+ • ``trace`` decorator - augments a callable with Laminar tracing, automatic
6
+ ``observe`` instrumentation, and optional support for test runs.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import inspect
12
+ import os
13
+ from functools import wraps
14
+ from typing import Any, Callable, ParamSpec, TypeVar, cast, overload
15
+
16
+ from lmnr import Instruments, Laminar, observe
17
+ from pydantic import BaseModel
18
+
19
+ from ai_pipeline_core.settings import settings
20
+
21
+ # ---------------------------------------------------------------------------
22
+ # Typing helpers
23
+ # ---------------------------------------------------------------------------
24
+ P = ParamSpec("P")
25
+ R = TypeVar("R")
26
+
27
+
28
+ # ---------------------------------------------------------------------------
29
+ # ``TraceInfo`` – metadata container
30
+ # ---------------------------------------------------------------------------
31
+ class TraceInfo(BaseModel):
32
+ """A container that holds contextual metadata for the current trace."""
33
+
34
+ session_id: str | None = None
35
+ user_id: str | None = None
36
+ metadata: dict[str, str] = {}
37
+ tags: list[str] = []
38
+
39
+ def get_observe_kwargs(self) -> dict[str, Any]:
40
+ """Return kwargs suitable for passing to the observe decorator."""
41
+ kwargs: dict[str, Any] = {}
42
+
43
+ # Use environment variable fallback for session_id
44
+ session_id = self.session_id or os.getenv("LMNR_SESSION_ID")
45
+ if session_id:
46
+ kwargs["session_id"] = session_id
47
+
48
+ # Use environment variable fallback for user_id
49
+ user_id = self.user_id or os.getenv("LMNR_USER_ID")
50
+ if user_id:
51
+ kwargs["user_id"] = user_id
52
+
53
+ if self.metadata:
54
+ kwargs["metadata"] = self.metadata
55
+ if self.tags:
56
+ kwargs["tags"] = self.tags
57
+ return kwargs
58
+
59
+
60
+ # ---------------------------------------------------------------------------
61
+ # ``trace`` decorator
62
+ # ---------------------------------------------------------------------------
63
+
64
+
65
+ def _initialise_laminar() -> None:
66
+ """Ensure Laminar is initialised once per process."""
67
+ if settings.lmnr_project_api_key:
68
+ Laminar.initialize(
69
+ project_api_key=settings.lmnr_project_api_key,
70
+ disabled_instruments=[Instruments.OPENAI],
71
+ )
72
+
73
+
74
+ # Overload for calls like @trace(name="...", test=True)
75
+ @overload
76
+ def trace(
77
+ *,
78
+ name: str | None = None,
79
+ test: bool = False,
80
+ debug_only: bool = False,
81
+ ignore_input: bool = False,
82
+ ignore_output: bool = False,
83
+ ignore_inputs: list[str] | None = None,
84
+ input_formatter: Callable[..., str] | None = None,
85
+ output_formatter: Callable[..., str] | None = None,
86
+ ) -> Callable[[Callable[P, R]], Callable[P, R]]: ...
87
+
88
+
89
+ # Overload for the bare @trace call
90
+ @overload
91
+ def trace(func: Callable[P, R]) -> Callable[P, R]: ...
92
+
93
+
94
+ # Actual implementation
95
+ def trace(
96
+ func: Callable[P, R] | None = None,
97
+ *,
98
+ name: str | None = None,
99
+ test: bool = False,
100
+ debug_only: bool = False,
101
+ ignore_input: bool = False,
102
+ ignore_output: bool = False,
103
+ ignore_inputs: list[str] | None = None,
104
+ input_formatter: Callable[..., str] | None = None,
105
+ output_formatter: Callable[..., str] | None = None,
106
+ ) -> Callable[[Callable[P, R]], Callable[P, R]] | Callable[P, R]:
107
+ """Decorator that wires Laminar tracing and observation into a function.
108
+
109
+ Args:
110
+ func: The function to be traced (when used as @trace)
111
+ name: Custom name for the observation (defaults to function name)
112
+ test: Mark this trace as a test run
113
+ debug_only: Only trace when LMNR_DEBUG=true environment variable is set
114
+ ignore_input: Ignore all inputs in the trace
115
+ ignore_output: Ignore the output in the trace
116
+ ignore_inputs: List of specific input parameter names to ignore
117
+ input_formatter: Custom formatter for inputs (takes any arguments, returns string)
118
+ output_formatter: Custom formatter for outputs (takes any arguments, returns string)
119
+
120
+ Returns:
121
+ The decorated function with Laminar tracing enabled
122
+ """
123
+
124
+ def decorator(f: Callable[P, R]) -> Callable[P, R]:
125
+ # --- Pre-computation (done once when the function is decorated) ---
126
+ _initialise_laminar()
127
+ sig = inspect.signature(f)
128
+ is_coroutine = inspect.iscoroutinefunction(f)
129
+ decorator_test_flag = test
130
+ observe_name = name or f.__name__
131
+ _observe = observe
132
+
133
+ # Store the new parameters
134
+ _ignore_input = ignore_input
135
+ _ignore_output = ignore_output
136
+ _ignore_inputs = ignore_inputs
137
+ _input_formatter = input_formatter
138
+ _output_formatter = output_formatter
139
+
140
+ # --- Check debug_only flag and environment variable ---
141
+ if debug_only and os.getenv("LMNR_DEBUG", "").lower() != "true":
142
+ # If debug_only is True but LMNR_DEBUG is not set to "true",
143
+ # return the original function without tracing
144
+ return f
145
+
146
+ # --- Helper function for runtime logic ---
147
+ def _prepare_and_get_observe_params(runtime_kwargs: dict[str, Any]) -> dict[str, Any]:
148
+ """
149
+ Inspects runtime args, manages TraceInfo, and returns params for lmnr.observe.
150
+ Modifies runtime_kwargs in place to inject TraceInfo if the function expects it.
151
+ """
152
+ trace_info = runtime_kwargs.get("trace_info")
153
+ if not isinstance(trace_info, TraceInfo):
154
+ trace_info = TraceInfo()
155
+ if "trace_info" in sig.parameters:
156
+ runtime_kwargs["trace_info"] = trace_info
157
+
158
+ runtime_test_flag = bool(runtime_kwargs.get("test", False))
159
+ if (decorator_test_flag or runtime_test_flag) and "test" not in trace_info.tags:
160
+ trace_info.tags.append("test")
161
+
162
+ observe_params = trace_info.get_observe_kwargs()
163
+ observe_params["name"] = observe_name
164
+
165
+ # Add the new Laminar parameters
166
+ if _ignore_input:
167
+ observe_params["ignore_input"] = _ignore_input
168
+ if _ignore_output:
169
+ observe_params["ignore_output"] = _ignore_output
170
+ if _ignore_inputs is not None:
171
+ observe_params["ignore_inputs"] = _ignore_inputs
172
+ if _input_formatter is not None:
173
+ observe_params["input_formatter"] = _input_formatter
174
+ if _output_formatter is not None:
175
+ observe_params["output_formatter"] = _output_formatter
176
+
177
+ return observe_params
178
+
179
+ # --- The actual wrappers ---
180
+ @wraps(f)
181
+ def sync_wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
182
+ observe_params = _prepare_and_get_observe_params(kwargs)
183
+ observed_func = _observe(**observe_params)(f)
184
+ return observed_func(*args, **kwargs)
185
+
186
+ @wraps(f)
187
+ async def async_wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
188
+ observe_params = _prepare_and_get_observe_params(kwargs)
189
+ observed_func = _observe(**observe_params)(f)
190
+ return await observed_func(*args, **kwargs)
191
+
192
+ wrapper = async_wrapper if is_coroutine else sync_wrapper
193
+
194
+ # Preserve the original function signature
195
+ try:
196
+ wrapper.__signature__ = sig # type: ignore[attr-defined]
197
+ except (AttributeError, ValueError):
198
+ pass
199
+
200
+ return cast(Callable[P, R], wrapper)
201
+
202
+ if func:
203
+ return decorator(func) # Called as @trace
204
+ else:
205
+ return decorator # Called as @trace(...)
@@ -0,0 +1,477 @@
1
+ Metadata-Version: 2.4
2
+ Name: ai-pipeline-core
3
+ Version: 0.1.1
4
+ Summary: Core utilities for AI-powered processing pipelines using prefect
5
+ Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
6
+ Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
7
+ Project-URL: Issues, https://github.com/bbarwik/ai-pipeline-core/issues
8
+ Author-email: bbarwik <bbarwik@gmail.com>
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
+ Classifier: Typing :: Typed
20
+ Requires-Python: >=3.12
21
+ Requires-Dist: httpx>=0.28.1
22
+ Requires-Dist: jinja2>=3.1.6
23
+ Requires-Dist: lmnr>=0.7.4
24
+ Requires-Dist: openai>=1.99.9
25
+ Requires-Dist: prefect>=3.4.13
26
+ Requires-Dist: pydantic-settings>=2.10.1
27
+ Requires-Dist: pydantic>=2.11.7
28
+ Requires-Dist: python-magic>=0.4.27
29
+ Requires-Dist: ruamel-yaml>=0.18.14
30
+ Requires-Dist: tiktoken>=0.11.0
31
+ Provides-Extra: dev
32
+ Requires-Dist: basedpyright>=1.31.2; extra == 'dev'
33
+ Requires-Dist: bump2version>=1.0.1; extra == 'dev'
34
+ Requires-Dist: pre-commit>=4.3.0; extra == 'dev'
35
+ Requires-Dist: pytest-asyncio>=1.1.0; extra == 'dev'
36
+ Requires-Dist: pytest-cov>=5.0.0; extra == 'dev'
37
+ Requires-Dist: pytest-mock>=3.14.0; extra == 'dev'
38
+ Requires-Dist: pytest-xdist>=3.8.0; extra == 'dev'
39
+ Requires-Dist: pytest>=8.4.1; extra == 'dev'
40
+ Requires-Dist: ruff>=0.12.9; extra == 'dev'
41
+ Description-Content-Type: text/markdown
42
+
43
+ # AI Pipeline Core
44
+
45
+ A high-performance, type-safe Python library for building AI-powered data processing pipelines with Prefect orchestration and LMNR observability.
46
+
47
+ [![Python Version](https://img.shields.io/badge/python-3.12%2B-blue)](https://www.python.org/downloads/)
48
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
49
+ [![Code Style: Ruff](https://img.shields.io/badge/code%20style-ruff-000000.svg)](https://github.com/astral-sh/ruff)
50
+ [![Type Checked: Basedpyright](https://img.shields.io/badge/type%20checked-basedpyright-blue)](https://github.com/DetachHead/basedpyright)
51
+ [![Test Coverage](https://img.shields.io/badge/coverage-80%25-green)](https://github.com/bbarwik/ai-pipeline-core)
52
+ [![Status: Beta](https://img.shields.io/badge/status-beta-yellow)](https://github.com/bbarwik/ai-pipeline-core)
53
+ [![PyPI version](https://img.shields.io/pypi/v/ai-pipeline-core.svg)](https://pypi.org/project/ai-pipeline-core/)
54
+ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ai-pipeline-core.svg)](https://pypi.org/project/ai-pipeline-core/)
55
+
56
+ > [!NOTE]
57
+ > **Beta Release**
58
+ >
59
+ > This library is in beta. While actively used in production systems, the API may still evolve. We follow semantic versioning for releases.
60
+
61
+ ## Overview
62
+
63
+ AI Pipeline Core provides a robust foundation for building production-grade AI pipelines with a focus on:
64
+
65
+ - **100% Async Architecture** - Built for high-throughput, non-blocking operations
66
+ - **Type Safety** - Comprehensive type hints with Pydantic models throughout
67
+ - **Minimal Design** - Every line of code justified, no unnecessary abstractions
68
+ - **Production Ready** - Built-in retry logic, caching, monitoring, and error handling
69
+ - **LLM Optimization** - Smart context/message splitting for efficient token usage
70
+
71
+ ## Key Features
72
+
73
+ ### 🚀 Performance First
74
+ - Fully asynchronous I/O operations
75
+ - Intelligent caching for LLM context
76
+ - Streaming support for large documents
77
+ - Automatic retry with exponential backoff
78
+
79
+ ### 🔒 Type Safety
80
+ - Pydantic models for all data structures
81
+ - Strict type checking with basedpyright
82
+ - Runtime validation for all inputs
83
+ - Immutable configurations by default
84
+
85
+ ### 📊 Observability
86
+ - LMNR (Laminar) tracing integration
87
+ - Structured logging with Prefect
88
+ - Cost tracking for LLM operations
89
+ - Performance metrics out of the box
90
+
91
+ ### 🎯 Developer Experience
92
+ - Self-documenting code for experienced developers
93
+ - Consistent patterns throughout
94
+ - Comprehensive error messages
95
+ - Smart defaults with override capabilities
96
+
97
+ ### 🤖 Advanced LLM Features
98
+ - Search-enabled models (Perplexity Sonar, Gemini Flash Search)
99
+ - Reasoning models support (O1 series)
100
+ - Structured output with Pydantic models
101
+ - Dynamic model selection based on task
102
+
103
+ ## Installation
104
+
105
+ ```bash
106
+ pip install ai-pipeline-core
107
+ ```
108
+
109
+ ### Development Installation
110
+
111
+ For contributors and development:
112
+
113
+ ```bash
114
+ git clone https://github.com/bbarwik/ai-pipeline-core.git
115
+ cd ai-pipeline-core
116
+ pip install -e ".[dev]"
117
+ make install-dev # Installs pre-commit hooks
118
+ ```
119
+
120
+ ### Requirements
121
+ - Python 3.12 or higher
122
+ - Linux/macOS (Windows via WSL2)
123
+
124
+ ## Quick Start
125
+
126
+ ### Basic Document Processing
127
+ ```python
128
+ from ai_pipeline_core.documents import Document, FlowDocument
129
+ from ai_pipeline_core.llm import generate_structured, AIMessages, ModelOptions
130
+ from pydantic import BaseModel
131
+
132
+ class InputDocument(FlowDocument):
133
+ """Custom document type for your flow"""
134
+ def get_type(self) -> str:
135
+ return "input"
136
+
137
+ class AnalysisResult(BaseModel):
138
+ """Example Pydantic model for structured output"""
139
+ summary: str
140
+ key_points: list[str]
141
+
142
+ async def process_document(doc: Document):
143
+ # Generate AI response with structured output
144
+ response = await generate_structured(
145
+ model="gemini-2.5-pro", # Model is required first parameter
146
+ response_format=AnalysisResult, # Pydantic model class
147
+ context=AIMessages([doc]), # Cached context
148
+ messages=AIMessages(["Analyze this document"]), # Dynamic messages
149
+ options=ModelOptions(max_completion_tokens=5000) # Optional options
150
+ )
151
+ return response.parsed
152
+ ```
153
+
154
+ ### Prefect Flow Integration
155
+ ```python
156
+ from prefect import flow, task
157
+ from ai_pipeline_core.documents import Document, DocumentList, FlowDocument
158
+ from ai_pipeline_core.flow import FlowConfig
159
+ from ai_pipeline_core.tracing import trace
160
+
161
+ class OutputDocument(FlowDocument):
162
+ """Custom output document type"""
163
+ def get_type(self) -> str:
164
+ return "output"
165
+
166
+ class MyFlowConfig(FlowConfig):
167
+ INPUT_DOCUMENT_TYPES = [InputDocument]
168
+ OUTPUT_DOCUMENT_TYPE = OutputDocument
169
+
170
+ @task
171
+ @trace
172
+ async def process_task(doc: Document) -> Document:
173
+ # Task-level processing with automatic tracing
174
+ result = await process_document(doc)
175
+ # Convert result to JSON string for document content
176
+ import json
177
+ return OutputDocument(name="result", content=json.dumps(result.model_dump()).encode())
178
+
179
+ @flow
180
+ async def my_pipeline(documents: DocumentList):
181
+ config = MyFlowConfig()
182
+ input_docs = config.get_input_documents(documents)
183
+
184
+ results = await process_task.map(input_docs)
185
+
186
+ config.validate_output_documents(results)
187
+ return results
188
+ ```
189
+
190
+ ## Core Modules
191
+
192
+ ### Documents System
193
+ The foundation for all data handling. Documents are immutable, type-safe wrappers around content with automatic MIME type detection.
194
+
195
+ ```python
196
+ from ai_pipeline_core.documents import Document, DocumentList
197
+
198
+ # Documents handle encoding/decoding automatically
199
+ doc = MyDocument(
200
+ name="report.pdf",
201
+ content=pdf_bytes,
202
+ description="Q3 Financial Report"
203
+ )
204
+
205
+ # Type-safe document collections
206
+ docs = DocumentList([doc1, doc2])
207
+ ```
208
+
209
+ ### LLM Module
210
+ Managed AI interactions with built-in retry logic, cost tracking, and structured outputs.
211
+
212
+ **Supported Models** (via LiteLLM proxy):
213
+ - OpenAI: GPT-4, GPT-5 series
214
+ - Anthropic: Claude 3 series
215
+ - Google: Gemini 2.5 series
216
+ - xAI: Grok models
217
+ - Perplexity: Sonar models (with search capabilities)
218
+ - And many more through LiteLLM compatibility
219
+
220
+ ```python
221
+ from ai_pipeline_core.llm import generate_structured, AIMessages, ModelOptions
222
+ from pydantic import BaseModel
223
+
224
+ class YourPydanticModel(BaseModel):
225
+ field1: str
226
+ field2: int
227
+
228
+ # Get structured Pydantic model responses
229
+ result = await generate_structured(
230
+ model="gemini-2.5-pro", # Model is required first parameter
231
+ response_format=YourPydanticModel, # Pydantic model class for structured output
232
+ context=AIMessages(), # Optional context (cached)
233
+ messages=AIMessages(["Your prompt here"]), # Required messages
234
+ options=ModelOptions(
235
+ retries=3,
236
+ timeout=30,
237
+ max_completion_tokens=10000
238
+ )
239
+ )
240
+ # Access the parsed result
241
+ model_instance = result.parsed # Type: YourPydanticModel
242
+ ```
243
+
244
+ ### Prompt Management
245
+ Flexible Jinja2-based prompt system with smart path resolution.
246
+
247
+ ```python
248
+ from ai_pipeline_core import PromptManager
249
+
250
+ pm = PromptManager(__file__)
251
+ prompt = pm.get("analyze_document.jinja2",
252
+ document=doc,
253
+ instructions=instructions)
254
+ ```
255
+
256
+ ### Tracing & Monitoring
257
+ Automatic observability with LMNR integration.
258
+
259
+ ```python
260
+ from ai_pipeline_core.tracing import trace
261
+
262
+ @trace(metadata={"workflow": "analysis"})
263
+ async def analyze_data(data: InputData) -> OutputData:
264
+ # Automatic tracing with performance metrics
265
+ ...
266
+ ```
267
+
268
+ ## Architecture Principles
269
+
270
+ ### 1. Async-First Design
271
+ Every I/O operation is asynchronous. No blocking calls, no synchronous fallbacks.
272
+
273
+ ### 2. Type Safety Throughout
274
+ Complete type annotations with runtime validation. If it compiles, it works.
275
+
276
+ ### 3. Minimal Surface Area
277
+ Less code is better code. Every line must justify its existence.
278
+
279
+ ### 4. Configuration as Code
280
+ All configurations are Pydantic models - validated, typed, and immutable.
281
+
282
+ ## Project Structure
283
+
284
+ ```
285
+ ai_pipeline_core/
286
+ ├── documents/ # Document handling system
287
+ │ ├── document.py # Base document class
288
+ │ ├── flow_document.py # Prefect flow documents
289
+ │ └── task_document.py # Prefect task documents
290
+ ├── llm/ # LLM interaction layer
291
+ │ ├── client.py # Async client implementation
292
+ │ └── model_options.py # Configuration models
293
+ ├── flow/ # Prefect flow utilities
294
+ │ └── config.py # Type-safe flow configuration
295
+ ├── logging/ # Structured logging
296
+ ├── tracing.py # Observability decorators
297
+ └── settings.py # Centralized configuration
298
+ ```
299
+
300
+ ## Development
301
+
302
+ ### Running Tests
303
+ ```bash
304
+ make test # Run all tests
305
+ make test-cov # Run with coverage report
306
+ pytest tests/test_documents.py::TestDocument::test_creation # Single test
307
+ ```
308
+
309
+ ### Code Quality
310
+ ```bash
311
+ make lint # Run linting checks
312
+ make format # Auto-format code
313
+ make typecheck # Run type checking
314
+ make pre-commit # Run all pre-commit hooks
315
+ ```
316
+
317
+ ### Development Workflow
318
+ 1. Create feature branch
319
+ 2. Write tests first (TDD)
320
+ 3. Implement minimal solution
321
+ 4. Run `make format` and `make typecheck`
322
+ 5. Ensure >80% test coverage
323
+ 6. Submit PR with clear description
324
+
325
+ ## Best Practices
326
+
327
+ ### DO ✅
328
+ - Use async/await for all I/O operations
329
+ - Define Pydantic models for all data structures
330
+ - Keep functions under 20 lines
331
+ - Use type hints for everything
332
+ - Let Documents handle serialization
333
+
334
+ ### DON'T ❌
335
+ - Import `logging` directly (use pipeline logger)
336
+ - Use raw dictionaries for configuration
337
+ - Write defensive code for unlikely scenarios
338
+ - Add comments explaining what (code should be clear)
339
+ - Use `requests` or other blocking libraries
340
+
341
+ ## Configuration
342
+
343
+ ### Environment Variables
344
+ ```bash
345
+ # Required for LLM operations
346
+ OPENAI_API_KEY=sk-... # Your OpenAI or LiteLLM proxy key
347
+ OPENAI_BASE_URL=http://your-proxy:8000 # LiteLLM proxy endpoint
348
+
349
+ # Optional - for observability
350
+ LMNR_PROJECT_API_KEY=lmnr_... # LMNR tracing
351
+
352
+ # Optional - for orchestration
353
+ PREFECT_API_URL=http://localhost:4200/api
354
+ AI_PIPELINE_LOG_LEVEL=INFO
355
+ ```
356
+
357
+ ### Settings Management
358
+ ```python
359
+ from ai_pipeline_core.settings import settings
360
+
361
+ # All settings are validated Pydantic models
362
+ api_key = settings.openai_api_key
363
+ base_url = settings.openai_base_url # LiteLLM proxy endpoint
364
+ ```
365
+
366
+ ## Integration Examples
367
+
368
+ ### With Prefect Cloud
369
+ ```python
370
+ from prefect import flow
371
+ from ai_pipeline_core.flow import FlowConfig
372
+
373
+ @flow(name="document-processor")
374
+ async def process_documents(docs: DocumentList):
375
+ # Automatic Prefect Cloud integration
376
+ ...
377
+ ```
378
+
379
+ ### With Custom LLM Providers
380
+ ```python
381
+ from ai_pipeline_core.settings import settings
382
+
383
+ # Configure LiteLLM proxy endpoint via environment variables
384
+ # OPENAI_BASE_URL=http://your-litellm-proxy:8000
385
+ # OPENAI_API_KEY=your-proxy-key
386
+
387
+ # Access in code (settings are immutable)
388
+ base_url = settings.openai_base_url
389
+ ```
390
+
391
+ ## Performance Considerations
392
+
393
+ - **Context Caching**: The LLM module automatically caches context to reduce token usage
394
+ - **Document Streaming**: Large documents are streamed rather than loaded entirely into memory
395
+ - **Batch Processing**: Use Prefect's `.map()` for parallel task execution
396
+ - **Connection Pooling**: HTTP clients use connection pooling by default
397
+
398
+ ## Troubleshooting
399
+
400
+ ### Common Issues
401
+
402
+ 1. **Import Errors**: Ensure Python 3.12+ is installed
403
+ 2. **Async Warnings**: All I/O operations must use `await`
404
+ 3. **Type Errors**: Run `make typecheck` to identify issues
405
+ 4. **MIME Detection**: Install `python-magic` system dependencies
406
+
407
+ ### Debug Mode
408
+ ```python
409
+ from ai_pipeline_core.logging import setup_logging, LoggingConfig
410
+
411
+ # Setup logging with DEBUG level
412
+ setup_logging(LoggingConfig(level="DEBUG"))
413
+ ```
414
+
415
+ ## Release Process
416
+
417
+ See [RELEASE.md](RELEASE.md) for detailed release procedures.
418
+
419
+ **Important**: All releases require:
420
+ - ✅ Zero errors from `make typecheck`
421
+ - ✅ All unit tests passing with >80% coverage
422
+ - ✅ **Integration tests passing** (with configured API keys)
423
+
424
+ ## Contributing
425
+
426
+ > [!NOTE]
427
+ > As this is a preview repository used internally, we are not actively accepting external contributions. The codebase may change significantly without notice.
428
+ >
429
+ > **Recommended approach:**
430
+ > 1. Fork the repository
431
+ > 2. Make changes in your fork
432
+ > 3. Share your improvements with the community through your fork
433
+
434
+ If you've found a critical security issue, please report it via the GitHub Security tab.
435
+
436
+ For learning purposes, see [CLAUDE.md](CLAUDE.md) for our comprehensive coding standards and architecture guide.
437
+
438
+ ## Documentation
439
+
440
+ - [CLAUDE.md](CLAUDE.md) - Detailed coding standards and architecture guide
441
+ - [Prefect Integration](docs/prefect.md) - Prefect patterns and best practices
442
+ - [Deployment Guide](docs/prefect_deployment.md) - Production deployment
443
+ - [Prefect Logging](docs/prefect_logging.md) - Logging configuration guide
444
+
445
+ ## License
446
+
447
+ MIT License - see [LICENSE](LICENSE) file for details.
448
+
449
+ ## Support
450
+
451
+ > [!CAUTION]
452
+ > This is a preview repository with no guaranteed support. Issues and discussions may not be actively monitored.
453
+
454
+ - **For Learning**: Review the code, documentation, and examples
455
+ - **For Usage**: Fork the repository and maintain your own version
456
+ - **Security Issues**: Report via GitHub Security tab
457
+
458
+ ## Acknowledgments
459
+
460
+ Built with:
461
+ - [Prefect](https://www.prefect.io/) - Workflow orchestration
462
+ - [LMNR](https://www.lmnr.ai/) - LLM observability
463
+ - [LiteLLM](https://litellm.ai/) - LLM proxy
464
+ - [Pydantic](https://pydantic-docs.helpmanual.io/) - Data validation
465
+
466
+ ## Stability Notice
467
+
468
+ **Current Version**: 0.1.1
469
+ **Status**: Internal Preview
470
+ **API Stability**: Unstable - Breaking changes expected
471
+ **Recommended Use**: Learning and reference only
472
+
473
+ For production use, please fork this repository and maintain your own stable version.
474
+
475
+ ---
476
+
477
+ **Remember**: The best code is no code. The second best is minimal, typed, async code that does exactly what's needed.