ai-pipeline-core 0.1.8__py3-none-any.whl → 0.1.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +86 -4
- ai_pipeline_core/documents/__init__.py +11 -0
- ai_pipeline_core/documents/document.py +1107 -131
- ai_pipeline_core/documents/document_list.py +147 -38
- ai_pipeline_core/documents/flow_document.py +112 -11
- ai_pipeline_core/documents/mime_type.py +173 -15
- ai_pipeline_core/documents/task_document.py +117 -12
- ai_pipeline_core/documents/temporary_document.py +95 -0
- ai_pipeline_core/documents/utils.py +41 -9
- ai_pipeline_core/exceptions.py +47 -11
- ai_pipeline_core/flow/__init__.py +2 -0
- ai_pipeline_core/flow/config.py +250 -23
- ai_pipeline_core/flow/options.py +50 -1
- ai_pipeline_core/llm/__init__.py +6 -0
- ai_pipeline_core/llm/ai_messages.py +125 -27
- ai_pipeline_core/llm/client.py +278 -26
- ai_pipeline_core/llm/model_options.py +130 -1
- ai_pipeline_core/llm/model_response.py +239 -35
- ai_pipeline_core/llm/model_types.py +67 -0
- ai_pipeline_core/logging/__init__.py +13 -0
- ai_pipeline_core/logging/logging_config.py +72 -20
- ai_pipeline_core/logging/logging_mixin.py +38 -32
- ai_pipeline_core/pipeline.py +308 -60
- ai_pipeline_core/prefect.py +48 -1
- ai_pipeline_core/prompt_manager.py +215 -24
- ai_pipeline_core/settings.py +108 -4
- ai_pipeline_core/simple_runner/__init__.py +5 -0
- ai_pipeline_core/simple_runner/cli.py +145 -17
- ai_pipeline_core/simple_runner/simple_runner.py +244 -6
- ai_pipeline_core/tracing.py +232 -30
- ai_pipeline_core-0.1.11.dist-info/METADATA +450 -0
- ai_pipeline_core-0.1.11.dist-info/RECORD +36 -0
- ai_pipeline_core-0.1.8.dist-info/METADATA +0 -558
- ai_pipeline_core-0.1.8.dist-info/RECORD +0 -35
- {ai_pipeline_core-0.1.8.dist-info → ai_pipeline_core-0.1.11.dist-info}/WHEEL +0 -0
- {ai_pipeline_core-0.1.8.dist-info → ai_pipeline_core-0.1.11.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,558 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: ai-pipeline-core
|
|
3
|
-
Version: 0.1.8
|
|
4
|
-
Summary: Core utilities for AI-powered processing pipelines using prefect
|
|
5
|
-
Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
|
|
6
|
-
Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
|
|
7
|
-
Project-URL: Issues, https://github.com/bbarwik/ai-pipeline-core/issues
|
|
8
|
-
Author-email: bbarwik <bbarwik@gmail.com>
|
|
9
|
-
License: MIT
|
|
10
|
-
License-File: LICENSE
|
|
11
|
-
Classifier: Development Status :: 4 - Beta
|
|
12
|
-
Classifier: Intended Audience :: Developers
|
|
13
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
-
Classifier: Programming Language :: Python :: 3
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
-
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
18
|
-
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
|
-
Classifier: Typing :: Typed
|
|
20
|
-
Requires-Python: >=3.12
|
|
21
|
-
Requires-Dist: httpx>=0.28.1
|
|
22
|
-
Requires-Dist: jinja2>=3.1.6
|
|
23
|
-
Requires-Dist: lmnr>=0.7.6
|
|
24
|
-
Requires-Dist: openai>=1.99.9
|
|
25
|
-
Requires-Dist: prefect>=3.4.13
|
|
26
|
-
Requires-Dist: pydantic-settings>=2.10.1
|
|
27
|
-
Requires-Dist: pydantic>=2.11.7
|
|
28
|
-
Requires-Dist: python-magic>=0.4.27
|
|
29
|
-
Requires-Dist: ruamel-yaml>=0.18.14
|
|
30
|
-
Requires-Dist: tiktoken>=0.11.0
|
|
31
|
-
Provides-Extra: dev
|
|
32
|
-
Requires-Dist: basedpyright>=1.31.2; extra == 'dev'
|
|
33
|
-
Requires-Dist: bump2version>=1.0.1; extra == 'dev'
|
|
34
|
-
Requires-Dist: pre-commit>=4.3.0; extra == 'dev'
|
|
35
|
-
Requires-Dist: pytest-asyncio>=1.1.0; extra == 'dev'
|
|
36
|
-
Requires-Dist: pytest-cov>=5.0.0; extra == 'dev'
|
|
37
|
-
Requires-Dist: pytest-mock>=3.14.0; extra == 'dev'
|
|
38
|
-
Requires-Dist: pytest-xdist>=3.8.0; extra == 'dev'
|
|
39
|
-
Requires-Dist: pytest>=8.4.1; extra == 'dev'
|
|
40
|
-
Requires-Dist: ruff>=0.12.9; extra == 'dev'
|
|
41
|
-
Description-Content-Type: text/markdown
|
|
42
|
-
|
|
43
|
-
# AI Pipeline Core
|
|
44
|
-
|
|
45
|
-
A high-performance, type-safe Python library for building AI-powered data processing pipelines with Prefect orchestration and LMNR observability.
|
|
46
|
-
|
|
47
|
-
[](https://www.python.org/downloads/)
|
|
48
|
-
[](https://opensource.org/licenses/MIT)
|
|
49
|
-
[](https://github.com/astral-sh/ruff)
|
|
50
|
-
[](https://github.com/DetachHead/basedpyright)
|
|
51
|
-
[](https://github.com/bbarwik/ai-pipeline-core)
|
|
52
|
-
[](https://github.com/bbarwik/ai-pipeline-core)
|
|
53
|
-
[](https://pypi.org/project/ai-pipeline-core/)
|
|
54
|
-
[](https://pypi.org/project/ai-pipeline-core/)
|
|
55
|
-
|
|
56
|
-
> [!NOTE]
|
|
57
|
-
> **Beta Release**
|
|
58
|
-
>
|
|
59
|
-
> This library is in beta. While actively used in production systems, the API may still evolve. We follow semantic versioning for releases.
|
|
60
|
-
|
|
61
|
-
## Overview
|
|
62
|
-
|
|
63
|
-
AI Pipeline Core provides a robust foundation for building production-grade AI pipelines with a focus on:
|
|
64
|
-
|
|
65
|
-
- **100% Async Architecture** - Built for high-throughput, non-blocking operations
|
|
66
|
-
- **Type Safety** - Comprehensive type hints with Pydantic models throughout
|
|
67
|
-
- **Minimal Design** - Every line of code justified, no unnecessary abstractions
|
|
68
|
-
- **Production Ready** - Built-in retry logic, caching, monitoring, and error handling
|
|
69
|
-
- **LLM Optimization** - Smart context/message splitting for efficient token usage
|
|
70
|
-
|
|
71
|
-
## Key Features
|
|
72
|
-
|
|
73
|
-
### 🚀 Performance First
|
|
74
|
-
- Fully asynchronous I/O operations
|
|
75
|
-
- Intelligent caching for LLM context
|
|
76
|
-
- Streaming support for large documents
|
|
77
|
-
- Automatic retry with exponential backoff
|
|
78
|
-
|
|
79
|
-
### 🔒 Type Safety
|
|
80
|
-
- Pydantic models for all data structures
|
|
81
|
-
- Strict type checking with basedpyright
|
|
82
|
-
- Runtime validation for all inputs
|
|
83
|
-
- Immutable configurations by default
|
|
84
|
-
|
|
85
|
-
### 📊 Observability
|
|
86
|
-
- LMNR (Laminar) tracing integration
|
|
87
|
-
- Structured logging with Prefect
|
|
88
|
-
- Cost tracking for LLM operations
|
|
89
|
-
- Performance metrics out of the box
|
|
90
|
-
|
|
91
|
-
### 🎯 Developer Experience
|
|
92
|
-
- Self-documenting code for experienced developers
|
|
93
|
-
- Consistent patterns throughout
|
|
94
|
-
- Comprehensive error messages
|
|
95
|
-
- Smart defaults with override capabilities
|
|
96
|
-
|
|
97
|
-
### 🤖 Advanced LLM Features
|
|
98
|
-
- Search-enabled models (Perplexity Sonar, Gemini Flash Search)
|
|
99
|
-
- Reasoning models support (O1 series)
|
|
100
|
-
- Structured output with Pydantic models
|
|
101
|
-
- Dynamic model selection based on task
|
|
102
|
-
|
|
103
|
-
## Installation
|
|
104
|
-
|
|
105
|
-
```bash
|
|
106
|
-
pip install ai-pipeline-core
|
|
107
|
-
```
|
|
108
|
-
|
|
109
|
-
### Development Installation
|
|
110
|
-
|
|
111
|
-
For contributors and development:
|
|
112
|
-
|
|
113
|
-
```bash
|
|
114
|
-
git clone https://github.com/bbarwik/ai-pipeline-core.git
|
|
115
|
-
cd ai-pipeline-core
|
|
116
|
-
pip install -e ".[dev]"
|
|
117
|
-
make install-dev # Installs pre-commit hooks
|
|
118
|
-
```
|
|
119
|
-
|
|
120
|
-
### Requirements
|
|
121
|
-
- Python 3.12 or higher
|
|
122
|
-
- Linux/macOS (Windows via WSL2)
|
|
123
|
-
|
|
124
|
-
## Quick Start
|
|
125
|
-
|
|
126
|
-
### Basic Document Processing
|
|
127
|
-
```python
|
|
128
|
-
from ai_pipeline_core.documents import Document, FlowDocument
|
|
129
|
-
from ai_pipeline_core.llm import generate_structured, AIMessages, ModelOptions
|
|
130
|
-
from pydantic import BaseModel
|
|
131
|
-
|
|
132
|
-
class InputDocument(FlowDocument):
|
|
133
|
-
"""Custom document type for your flow"""
|
|
134
|
-
def get_type(self) -> str:
|
|
135
|
-
return "input"
|
|
136
|
-
|
|
137
|
-
class AnalysisResult(BaseModel):
|
|
138
|
-
"""Example Pydantic model for structured output"""
|
|
139
|
-
summary: str
|
|
140
|
-
key_points: list[str]
|
|
141
|
-
|
|
142
|
-
async def process_document(doc: Document):
|
|
143
|
-
# Generate AI response with structured output
|
|
144
|
-
response = await generate_structured(
|
|
145
|
-
model="gemini-2.5-pro", # Model is required first parameter
|
|
146
|
-
response_format=AnalysisResult, # Pydantic model class
|
|
147
|
-
context=AIMessages([doc]), # Cached context
|
|
148
|
-
messages=AIMessages(["Analyze this document"]), # Dynamic messages
|
|
149
|
-
options=ModelOptions(max_completion_tokens=5000) # Optional options
|
|
150
|
-
)
|
|
151
|
-
return response.parsed
|
|
152
|
-
```
|
|
153
|
-
|
|
154
|
-
### Enhanced Pipeline Decorators
|
|
155
|
-
```python
|
|
156
|
-
from ai_pipeline_core import pipeline_flow, pipeline_task
|
|
157
|
-
from ai_pipeline_core.flow import FlowOptions
|
|
158
|
-
from ai_pipeline_core.documents import DocumentList, FlowDocument
|
|
159
|
-
|
|
160
|
-
class CustomFlowOptions(FlowOptions):
|
|
161
|
-
"""Extend base options with your custom fields"""
|
|
162
|
-
batch_size: int = 100
|
|
163
|
-
temperature: float = 0.7
|
|
164
|
-
|
|
165
|
-
@pipeline_task(trace_level="always", retries=3)
|
|
166
|
-
async def process_task(doc: Document) -> Document:
|
|
167
|
-
# Task with automatic tracing and retries
|
|
168
|
-
result = await process_document(doc)
|
|
169
|
-
return OutputDocument(name="result", content=result.encode())
|
|
170
|
-
|
|
171
|
-
@pipeline_flow(trace_level="always")
|
|
172
|
-
async def my_pipeline(
|
|
173
|
-
project_name: str,
|
|
174
|
-
documents: DocumentList,
|
|
175
|
-
flow_options: CustomFlowOptions # Type-safe custom options
|
|
176
|
-
) -> DocumentList:
|
|
177
|
-
# Pipeline flow with enforced signature and tracing
|
|
178
|
-
results = []
|
|
179
|
-
for doc in documents:
|
|
180
|
-
result = await process_task(doc)
|
|
181
|
-
results.append(result)
|
|
182
|
-
return DocumentList(results)
|
|
183
|
-
```
|
|
184
|
-
|
|
185
|
-
### Simple Runner Utility
|
|
186
|
-
```python
|
|
187
|
-
from ai_pipeline_core.simple_runner import run_cli, run_pipeline
|
|
188
|
-
from ai_pipeline_core.flow import FlowOptions
|
|
189
|
-
|
|
190
|
-
# CLI-based pipeline execution
|
|
191
|
-
if __name__ == "__main__":
|
|
192
|
-
run_cli(
|
|
193
|
-
flows=[my_pipeline],
|
|
194
|
-
flow_configs=[MyFlowConfig],
|
|
195
|
-
options_cls=CustomFlowOptions
|
|
196
|
-
)
|
|
197
|
-
|
|
198
|
-
# Or programmatic execution
|
|
199
|
-
async def main():
|
|
200
|
-
result = await run_pipeline(
|
|
201
|
-
project_name="my-project",
|
|
202
|
-
output_dir=Path("./output"),
|
|
203
|
-
flow=my_pipeline,
|
|
204
|
-
flow_config=MyFlowConfig,
|
|
205
|
-
flow_options=CustomFlowOptions(batch_size=50)
|
|
206
|
-
)
|
|
207
|
-
```
|
|
208
|
-
|
|
209
|
-
### Clean Prefect Decorators
|
|
210
|
-
```python
|
|
211
|
-
# Import clean Prefect decorators without tracing
|
|
212
|
-
from ai_pipeline_core.prefect import flow, task
|
|
213
|
-
|
|
214
|
-
# Or use pipeline decorators with tracing
|
|
215
|
-
from ai_pipeline_core import pipeline_flow, pipeline_task
|
|
216
|
-
|
|
217
|
-
@task # Clean Prefect task (supports both sync and async)
|
|
218
|
-
def compute(x: int) -> int:
|
|
219
|
-
return x * 2
|
|
220
|
-
|
|
221
|
-
@pipeline_task(trace_level="always") # With tracing (async only)
|
|
222
|
-
async def compute_traced(x: int) -> int:
|
|
223
|
-
return x * 2
|
|
224
|
-
```
|
|
225
|
-
|
|
226
|
-
## Core Modules
|
|
227
|
-
|
|
228
|
-
### Documents System
|
|
229
|
-
The foundation for all data handling. Documents are immutable, type-safe wrappers around content with automatic MIME type detection.
|
|
230
|
-
|
|
231
|
-
```python
|
|
232
|
-
from ai_pipeline_core.documents import Document, DocumentList
|
|
233
|
-
|
|
234
|
-
# Documents handle encoding/decoding automatically
|
|
235
|
-
doc = MyDocument(
|
|
236
|
-
name="report.pdf",
|
|
237
|
-
content=pdf_bytes,
|
|
238
|
-
description="Q3 Financial Report"
|
|
239
|
-
)
|
|
240
|
-
|
|
241
|
-
# Type-safe document collections
|
|
242
|
-
docs = DocumentList([doc1, doc2])
|
|
243
|
-
```
|
|
244
|
-
|
|
245
|
-
### LLM Module
|
|
246
|
-
Managed AI interactions with built-in retry logic, cost tracking, and structured outputs.
|
|
247
|
-
|
|
248
|
-
**Supported Models** (via LiteLLM proxy):
|
|
249
|
-
- OpenAI: gpt-5
|
|
250
|
-
- Anthropic: claude-4
|
|
251
|
-
- Google: gemini-2.5
|
|
252
|
-
- xAI: grok-3, grok-4
|
|
253
|
-
- Perplexity: sonar-pro-search
|
|
254
|
-
- And many more through LiteLLM compatibility. Every model from openrouter should work.
|
|
255
|
-
|
|
256
|
-
```python
|
|
257
|
-
from ai_pipeline_core.llm import generate_structured, AIMessages, ModelOptions
|
|
258
|
-
from pydantic import BaseModel
|
|
259
|
-
|
|
260
|
-
class YourPydanticModel(BaseModel):
|
|
261
|
-
field1: str
|
|
262
|
-
field2: int
|
|
263
|
-
|
|
264
|
-
# Get structured Pydantic model responses
|
|
265
|
-
result = await generate_structured(
|
|
266
|
-
model="gemini-2.5-pro", # Model is required first parameter
|
|
267
|
-
response_format=YourPydanticModel, # Pydantic model class for structured output
|
|
268
|
-
context=AIMessages(), # Optional context (cached)
|
|
269
|
-
messages=AIMessages(["Your prompt here"]), # Required messages
|
|
270
|
-
options=ModelOptions(
|
|
271
|
-
retries=3,
|
|
272
|
-
timeout=30,
|
|
273
|
-
max_completion_tokens=10000
|
|
274
|
-
)
|
|
275
|
-
)
|
|
276
|
-
# Access the parsed result
|
|
277
|
-
model_instance = result.parsed # Type: YourPydanticModel
|
|
278
|
-
```
|
|
279
|
-
|
|
280
|
-
### Prompt Management
|
|
281
|
-
Flexible Jinja2-based prompt system with smart path resolution.
|
|
282
|
-
|
|
283
|
-
```python
|
|
284
|
-
from ai_pipeline_core import PromptManager
|
|
285
|
-
|
|
286
|
-
pm = PromptManager(__file__)
|
|
287
|
-
prompt = pm.get("analyze_document.jinja2",
|
|
288
|
-
document=doc,
|
|
289
|
-
instructions=instructions)
|
|
290
|
-
```
|
|
291
|
-
|
|
292
|
-
### Tracing & Monitoring
|
|
293
|
-
Automatic observability with LMNR integration.
|
|
294
|
-
|
|
295
|
-
```python
|
|
296
|
-
from ai_pipeline_core.tracing import trace
|
|
297
|
-
|
|
298
|
-
@trace(metadata={"workflow": "analysis"})
|
|
299
|
-
async def analyze_data(data: InputData) -> OutputData:
|
|
300
|
-
# Automatic tracing with performance metrics
|
|
301
|
-
...
|
|
302
|
-
```
|
|
303
|
-
|
|
304
|
-
## Architecture Principles
|
|
305
|
-
|
|
306
|
-
### 1. Async-First Design
|
|
307
|
-
Every I/O operation is asynchronous. No blocking calls, no synchronous fallbacks.
|
|
308
|
-
|
|
309
|
-
### 2. Type Safety Throughout
|
|
310
|
-
Complete type annotations with runtime validation. If it compiles, it works.
|
|
311
|
-
|
|
312
|
-
### 3. Minimal Surface Area
|
|
313
|
-
Less code is better code. Every line must justify its existence.
|
|
314
|
-
|
|
315
|
-
### 4. Configuration as Code
|
|
316
|
-
All configurations are Pydantic models - validated, typed, and immutable.
|
|
317
|
-
|
|
318
|
-
## Project Structure
|
|
319
|
-
|
|
320
|
-
```
|
|
321
|
-
ai_pipeline_core/
|
|
322
|
-
├── documents/ # Document handling system
|
|
323
|
-
│ ├── document.py # Base document class
|
|
324
|
-
│ ├── flow_document.py # Prefect flow documents
|
|
325
|
-
│ └── task_document.py # Prefect task documents
|
|
326
|
-
├── llm/ # LLM interaction layer
|
|
327
|
-
│ ├── client.py # Async client implementation
|
|
328
|
-
│ └── model_options.py # Configuration models
|
|
329
|
-
├── flow/ # Prefect flow utilities
|
|
330
|
-
│ ├── config.py # Type-safe flow configuration
|
|
331
|
-
│ └── options.py # FlowOptions base class
|
|
332
|
-
├── simple_runner/ # Pipeline execution utilities
|
|
333
|
-
│ ├── cli.py # CLI interface
|
|
334
|
-
│ └── simple_runner.py # Core runner logic
|
|
335
|
-
├── logging/ # Structured logging
|
|
336
|
-
├── pipeline.py # Enhanced decorators
|
|
337
|
-
├── prefect.py # Clean Prefect exports
|
|
338
|
-
├── tracing.py # Observability decorators
|
|
339
|
-
└── settings.py # Centralized configuration
|
|
340
|
-
```
|
|
341
|
-
|
|
342
|
-
## Development
|
|
343
|
-
|
|
344
|
-
### Running Tests
|
|
345
|
-
```bash
|
|
346
|
-
make test # Run all tests
|
|
347
|
-
make test-cov # Run with coverage report
|
|
348
|
-
make test-showcase # Test the showcase.py CLI example
|
|
349
|
-
pytest tests/test_documents.py::TestDocument::test_creation # Single test
|
|
350
|
-
```
|
|
351
|
-
|
|
352
|
-
### Code Quality
|
|
353
|
-
```bash
|
|
354
|
-
make lint # Run linting checks
|
|
355
|
-
make format # Auto-format code
|
|
356
|
-
make typecheck # Run type checking
|
|
357
|
-
make pre-commit # Run all pre-commit hooks
|
|
358
|
-
```
|
|
359
|
-
|
|
360
|
-
### Development Workflow
|
|
361
|
-
1. Create feature branch
|
|
362
|
-
2. Write tests first (TDD)
|
|
363
|
-
3. Implement minimal solution
|
|
364
|
-
4. Run `make format` and `make typecheck`
|
|
365
|
-
5. Ensure >80% test coverage
|
|
366
|
-
6. Submit PR with clear description
|
|
367
|
-
|
|
368
|
-
## Best Practices
|
|
369
|
-
|
|
370
|
-
### DO ✅
|
|
371
|
-
- Use async/await for all I/O operations
|
|
372
|
-
- Define Pydantic models for all data structures
|
|
373
|
-
- Keep functions under 20 lines
|
|
374
|
-
- Use type hints for everything
|
|
375
|
-
- Let Documents handle serialization
|
|
376
|
-
|
|
377
|
-
### DON'T ❌
|
|
378
|
-
- Import `logging` directly (use pipeline logger)
|
|
379
|
-
- Use raw dictionaries for configuration
|
|
380
|
-
- Write defensive code for unlikely scenarios
|
|
381
|
-
- Add comments explaining what (code should be clear)
|
|
382
|
-
- Use `requests` or other blocking libraries
|
|
383
|
-
|
|
384
|
-
## Configuration
|
|
385
|
-
|
|
386
|
-
### Environment Variables
|
|
387
|
-
```bash
|
|
388
|
-
# Required for LLM operations
|
|
389
|
-
OPENAI_API_KEY=sk-... # Your OpenAI or LiteLLM proxy key
|
|
390
|
-
OPENAI_BASE_URL=http://your-proxy:8000 # LiteLLM proxy endpoint
|
|
391
|
-
|
|
392
|
-
# Optional - for observability
|
|
393
|
-
LMNR_PROJECT_API_KEY=lmnr_... # LMNR tracing
|
|
394
|
-
|
|
395
|
-
# Optional - for orchestration
|
|
396
|
-
PREFECT_API_URL=http://localhost:4200/api
|
|
397
|
-
AI_PIPELINE_LOG_LEVEL=INFO
|
|
398
|
-
```
|
|
399
|
-
|
|
400
|
-
### Settings Management
|
|
401
|
-
```python
|
|
402
|
-
from ai_pipeline_core.settings import settings
|
|
403
|
-
|
|
404
|
-
# All settings are validated Pydantic models
|
|
405
|
-
api_key = settings.openai_api_key
|
|
406
|
-
base_url = settings.openai_base_url # LiteLLM proxy endpoint
|
|
407
|
-
```
|
|
408
|
-
|
|
409
|
-
## Integration Examples
|
|
410
|
-
|
|
411
|
-
### With Prefect Cloud
|
|
412
|
-
```python
|
|
413
|
-
from prefect import flow
|
|
414
|
-
from ai_pipeline_core.flow import FlowConfig
|
|
415
|
-
|
|
416
|
-
@flow(name="document-processor")
|
|
417
|
-
async def process_documents(docs: DocumentList):
|
|
418
|
-
# Automatic Prefect Cloud integration
|
|
419
|
-
...
|
|
420
|
-
```
|
|
421
|
-
|
|
422
|
-
### With Custom LLM Providers
|
|
423
|
-
```python
|
|
424
|
-
from ai_pipeline_core.settings import settings
|
|
425
|
-
|
|
426
|
-
# Configure LiteLLM proxy endpoint via environment variables
|
|
427
|
-
# OPENAI_BASE_URL=http://your-litellm-proxy:8000
|
|
428
|
-
# OPENAI_API_KEY=your-proxy-key
|
|
429
|
-
|
|
430
|
-
# Access in code (settings are immutable)
|
|
431
|
-
base_url = settings.openai_base_url
|
|
432
|
-
```
|
|
433
|
-
|
|
434
|
-
## Performance Considerations
|
|
435
|
-
|
|
436
|
-
- **Context Caching**: The LLM module automatically caches context to reduce token usage
|
|
437
|
-
- **Document Streaming**: Large documents are streamed rather than loaded entirely into memory
|
|
438
|
-
- **Batch Processing**: Use Prefect's `.map()` for parallel task execution
|
|
439
|
-
- **Connection Pooling**: HTTP clients use connection pooling by default
|
|
440
|
-
|
|
441
|
-
## Troubleshooting
|
|
442
|
-
|
|
443
|
-
### Common Issues
|
|
444
|
-
|
|
445
|
-
1. **Import Errors**: Ensure Python 3.12+ is installed
|
|
446
|
-
2. **Async Warnings**: All I/O operations must use `await`
|
|
447
|
-
3. **Type Errors**: Run `make typecheck` to identify issues
|
|
448
|
-
4. **MIME Detection**: Install `python-magic` system dependencies
|
|
449
|
-
|
|
450
|
-
### Debug Mode
|
|
451
|
-
```python
|
|
452
|
-
from ai_pipeline_core.logging import setup_logging, LoggingConfig
|
|
453
|
-
|
|
454
|
-
# Setup logging with DEBUG level
|
|
455
|
-
setup_logging(LoggingConfig(level="DEBUG"))
|
|
456
|
-
```
|
|
457
|
-
|
|
458
|
-
## Release Process
|
|
459
|
-
|
|
460
|
-
See [RELEASE.md](RELEASE.md) for detailed release procedures.
|
|
461
|
-
|
|
462
|
-
**Important**: All releases require:
|
|
463
|
-
- ✅ Zero errors from `make typecheck`
|
|
464
|
-
- ✅ All unit tests passing with >80% coverage
|
|
465
|
-
- ✅ **Integration tests passing** (with configured API keys)
|
|
466
|
-
|
|
467
|
-
## Contributing
|
|
468
|
-
|
|
469
|
-
> [!NOTE]
|
|
470
|
-
> As this is a preview repository used internally, we are not actively accepting external contributions. The codebase may change significantly without notice.
|
|
471
|
-
>
|
|
472
|
-
> **Recommended approach:**
|
|
473
|
-
> 1. Fork the repository
|
|
474
|
-
> 2. Make changes in your fork
|
|
475
|
-
> 3. Share your improvements with the community through your fork
|
|
476
|
-
|
|
477
|
-
If you've found a critical security issue, please report it via the GitHub Security tab.
|
|
478
|
-
|
|
479
|
-
For learning purposes, see [CLAUDE.md](CLAUDE.md) for our comprehensive coding standards and architecture guide.
|
|
480
|
-
|
|
481
|
-
## Documentation
|
|
482
|
-
|
|
483
|
-
- [CLAUDE.md](CLAUDE.md) - Detailed coding standards and architecture guide
|
|
484
|
-
|
|
485
|
-
## Examples
|
|
486
|
-
|
|
487
|
-
### In This Repository
|
|
488
|
-
- [showcase.py](examples/showcase.py) - Complete example demonstrating all core features including the CLI runner
|
|
489
|
-
```bash
|
|
490
|
-
# Run the showcase example with CLI
|
|
491
|
-
python examples/showcase.py ./output --temperature 0.7 --batch-size 5
|
|
492
|
-
|
|
493
|
-
# Show help
|
|
494
|
-
python examples/showcase.py --help
|
|
495
|
-
```
|
|
496
|
-
- [showcase.jinja2](examples/showcase.jinja2) - Example Jinja2 prompt template
|
|
497
|
-
|
|
498
|
-
### Real-World Application
|
|
499
|
-
- [AI Documentation Writer](https://github.com/bbarwik/ai-documentation-writer) - Production-ready example showing how to build sophisticated AI pipelines for automated documentation generation. See [examples/ai-documentation-writer.md](examples/ai-documentation-writer.md) for a detailed overview.
|
|
500
|
-
|
|
501
|
-
### dependencies_docs/ Directory
|
|
502
|
-
> [!NOTE]
|
|
503
|
-
> The `dependencies_docs/` directory contains guides for AI assistants (like Claude Code) on how to interact with the project's external dependencies and tooling, NOT user documentation for ai-pipeline-core itself. These files are excluded from repository listings to avoid confusion.
|
|
504
|
-
|
|
505
|
-
**AI Assistant Dependency Guides:**
|
|
506
|
-
- [Prefect Integration](dependencies_docs/prefect.md) - Prefect patterns and best practices for AI assistants
|
|
507
|
-
- [Deployment Guide](dependencies_docs/prefect_deployment.md) - Production deployment guide for AI assistants
|
|
508
|
-
- [Prefect Logging](dependencies_docs/prefect_logging.md) - Logging configuration guide for AI assistants
|
|
509
|
-
|
|
510
|
-
## License
|
|
511
|
-
|
|
512
|
-
MIT License - see [LICENSE](LICENSE) file for details.
|
|
513
|
-
|
|
514
|
-
## Support
|
|
515
|
-
|
|
516
|
-
> [!CAUTION]
|
|
517
|
-
> This is a preview repository with no guaranteed support. Issues and discussions may not be actively monitored.
|
|
518
|
-
|
|
519
|
-
- **For Learning**: Review the code, documentation, and examples
|
|
520
|
-
- **For Usage**: Fork the repository and maintain your own version
|
|
521
|
-
- **Security Issues**: Report via GitHub Security tab
|
|
522
|
-
|
|
523
|
-
## Acknowledgments
|
|
524
|
-
|
|
525
|
-
Built with:
|
|
526
|
-
- [Prefect](https://www.prefect.io/) - Workflow orchestration
|
|
527
|
-
- [LMNR](https://www.lmnr.ai/) - LLM observability
|
|
528
|
-
- [LiteLLM](https://litellm.ai/) - LLM proxy
|
|
529
|
-
- [Pydantic](https://pydantic-docs.helpmanual.io/) - Data validation
|
|
530
|
-
|
|
531
|
-
## What's New in v0.1.8
|
|
532
|
-
|
|
533
|
-
### Breaking Changes
|
|
534
|
-
- **Async-Only Pipeline Decorators**: `@pipeline_flow` and `@pipeline_task` now require `async def` functions (raises TypeError for sync)
|
|
535
|
-
- **Document Class Name Validation**: Document subclasses cannot start with "Test" prefix (pytest conflict prevention)
|
|
536
|
-
- **FlowConfig Validation**: OUTPUT_DOCUMENT_TYPE cannot be in INPUT_DOCUMENT_TYPES (prevents circular dependencies)
|
|
537
|
-
- **Temperature Field**: Added optional `temperature` field to `ModelOptions` for explicit control
|
|
538
|
-
|
|
539
|
-
### Major Improvements
|
|
540
|
-
- **Pipeline Module Refactoring**: Reduced from ~400 to ~150 lines with cleaner Protocol-based typing
|
|
541
|
-
- **Enhanced Validation**: FlowConfig and Document classes now validate at definition time
|
|
542
|
-
- **Better CLI Support**: Auto-displays help when no arguments provided, improved context management
|
|
543
|
-
- **Test Suite Updates**: All tests updated to use async/await consistently
|
|
544
|
-
|
|
545
|
-
### Documentation Updates
|
|
546
|
-
- Added Document naming rules to CLAUDE.md
|
|
547
|
-
- Added FlowConfig validation rules
|
|
548
|
-
- Added code elegance principles section
|
|
549
|
-
- Updated guide_for_ai.md to API reference format
|
|
550
|
-
|
|
551
|
-
## Stability Notice
|
|
552
|
-
|
|
553
|
-
**Current Version**: 0.1.8
|
|
554
|
-
**Status**: Internal Preview
|
|
555
|
-
**API Stability**: Unstable - Breaking changes expected
|
|
556
|
-
**Recommended Use**: Learning and reference only
|
|
557
|
-
|
|
558
|
-
For production use, please fork this repository and maintain your own stable version.
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
ai_pipeline_core/__init__.py,sha256=dWkrDbW3oqrplHH7oBQ59dOc0wtJr0AcKVtQo63C_wM,1662
|
|
2
|
-
ai_pipeline_core/exceptions.py,sha256=_vW0Hbw2LGb5tcVvH0YzTKMff7QOPfCRr3w-w_zPyCE,968
|
|
3
|
-
ai_pipeline_core/pipeline.py,sha256=f-pEDwrEhMLfcSEvPP2b74xb0WzFI05IQcl-NDFzH7w,16565
|
|
4
|
-
ai_pipeline_core/prefect.py,sha256=VHYkkRcUmSpdwyWosOOxuExVCncIQgT6MypqGdjcYnM,241
|
|
5
|
-
ai_pipeline_core/prompt_manager.py,sha256=XmNUdMIC0WrE9fF0LIcfozAKOGrlYwj8AfXvCndIH-o,4693
|
|
6
|
-
ai_pipeline_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
-
ai_pipeline_core/settings.py,sha256=Zl2BPa6IHzh-B5V7cg5mtySr1dhWZQYYKxXz3BwrHlQ,615
|
|
8
|
-
ai_pipeline_core/tracing.py,sha256=T-3fTyA37TejXxotkVzTNqL2a5nOfZ0bcHg9TClLvmg,9471
|
|
9
|
-
ai_pipeline_core/documents/__init__.py,sha256=TLW8eOEmthfDHOTssXjyBlqhgrZe9ZIyxlkd0LBJ3_s,340
|
|
10
|
-
ai_pipeline_core/documents/document.py,sha256=AIdkg2RIyYB5Tje1GmnQqtk8qesOIQwtwzEztypYIhg,13121
|
|
11
|
-
ai_pipeline_core/documents/document_list.py,sha256=HOG_uZDazA9CJB7Lr_tNcDFzb5Ff9RUt0ELWQK_eYNM,4940
|
|
12
|
-
ai_pipeline_core/documents/flow_document.py,sha256=qsV-2JYOMhkvAj7lW54ZNH_4QUclld9h06CoU59tWww,815
|
|
13
|
-
ai_pipeline_core/documents/mime_type.py,sha256=sBhNRoBJQ35JoHWhJzBGpp00WFDfMdEX0JZKKkR7QH0,3371
|
|
14
|
-
ai_pipeline_core/documents/task_document.py,sha256=WjHqtl1d60XFBBqewNRdz1OqBErGI0jRx15oQYCTHo8,907
|
|
15
|
-
ai_pipeline_core/documents/utils.py,sha256=BdE4taSl1vrBhxnFbOP5nDA7lXIcvY__AMRTHoaNb5M,2764
|
|
16
|
-
ai_pipeline_core/flow/__init__.py,sha256=54DRfZnjXQVrimgtKEVEm5u5ErImx31cjK2PpBvHjU4,116
|
|
17
|
-
ai_pipeline_core/flow/config.py,sha256=gRCtiahTA7h6_xVPY3su85pZbu5gu41yXUgGLILey2E,3220
|
|
18
|
-
ai_pipeline_core/flow/options.py,sha256=WygJEwjqOa14l23a_Hp36hJX-WgxHMq-YzSieC31Z4Y,701
|
|
19
|
-
ai_pipeline_core/llm/__init__.py,sha256=3XVK-bSJdOe0s6KmmO7PDbsXHfjlcZEG1MVBmaz3EeU,442
|
|
20
|
-
ai_pipeline_core/llm/ai_messages.py,sha256=DwJJe05BtYdnMZeHbBbyEbDCqrW63SRvprxptoJUCn4,4586
|
|
21
|
-
ai_pipeline_core/llm/client.py,sha256=VMs1nQKCfoxbcvE2mypn5QF19u90Ua87-5IiZxWOj98,7784
|
|
22
|
-
ai_pipeline_core/llm/model_options.py,sha256=7O5y-qtYtmTXzIUS7vxKOQlRAM3TTggqHw2_dOnS_a8,1441
|
|
23
|
-
ai_pipeline_core/llm/model_response.py,sha256=fIWueaemgo0cMruvToMZyKsRPzKwL6IlvUJN7DLG710,5558
|
|
24
|
-
ai_pipeline_core/llm/model_types.py,sha256=rIwY6voT8-xdfsKPDC0Gkdl2iTp9Q2LuvWGSRU9Mp3k,342
|
|
25
|
-
ai_pipeline_core/logging/__init__.py,sha256=DOO6ckgnMVXl29Sy7q6jhO-iW96h54pCHQDzgA2Pu6I,272
|
|
26
|
-
ai_pipeline_core/logging/logging.yml,sha256=YTW48keO_K5bkkb-KXGM7ZuaYKiquLsjsURei8Ql0V4,1353
|
|
27
|
-
ai_pipeline_core/logging/logging_config.py,sha256=6MBz9nnVNvqiLDoyy9-R3sWkn6927Re5hdz4hwTptpI,4903
|
|
28
|
-
ai_pipeline_core/logging/logging_mixin.py,sha256=RDaR2ju2-vKTJRzXGa0DquGPT8_UxahWjvKJnaD0IV8,7810
|
|
29
|
-
ai_pipeline_core/simple_runner/__init__.py,sha256=OPbTCZvqpnYdwi1Knnkj-MpmD0Nvtg5O7UwIdAKz_AY,384
|
|
30
|
-
ai_pipeline_core/simple_runner/cli.py,sha256=1X2kkdsGFIewYMxtoRVDS1RY6cx5wNVEIw-TeShbLCc,4281
|
|
31
|
-
ai_pipeline_core/simple_runner/simple_runner.py,sha256=70BHT1iz-G368H2t4tsWAVni0jw2VkWVdnKICuVtLPw,5009
|
|
32
|
-
ai_pipeline_core-0.1.8.dist-info/METADATA,sha256=RX8VUdE5M5DUE7S4LebGvnJkse87s23SxzxsDC7Rys4,19119
|
|
33
|
-
ai_pipeline_core-0.1.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
34
|
-
ai_pipeline_core-0.1.8.dist-info/licenses/LICENSE,sha256=kKj8mfbdWwkyG3U6n7ztB3bAZlEwShTkAsvaY657i3I,1074
|
|
35
|
-
ai_pipeline_core-0.1.8.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|