ai-pipeline-core 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core-0.1.1/.gitignore +158 -0
- ai_pipeline_core-0.1.1/LICENSE +21 -0
- ai_pipeline_core-0.1.1/PKG-INFO +477 -0
- ai_pipeline_core-0.1.1/README.md +435 -0
- ai_pipeline_core-0.1.1/ai_pipeline_core/__init__.py +36 -0
- ai_pipeline_core-0.1.1/ai_pipeline_core/documents/__init__.py +11 -0
- ai_pipeline_core-0.1.1/ai_pipeline_core/documents/document.py +252 -0
- ai_pipeline_core-0.1.1/ai_pipeline_core/documents/document_list.py +131 -0
- ai_pipeline_core-0.1.1/ai_pipeline_core/documents/flow_document.py +21 -0
- ai_pipeline_core-0.1.1/ai_pipeline_core/documents/mime_type.py +78 -0
- ai_pipeline_core-0.1.1/ai_pipeline_core/documents/task_document.py +22 -0
- ai_pipeline_core-0.1.1/ai_pipeline_core/documents/utils.py +33 -0
- ai_pipeline_core-0.1.1/ai_pipeline_core/exceptions.py +61 -0
- ai_pipeline_core-0.1.1/ai_pipeline_core/flow/__init__.py +3 -0
- ai_pipeline_core-0.1.1/ai_pipeline_core/flow/config.py +66 -0
- ai_pipeline_core-0.1.1/ai_pipeline_core/llm/__init__.py +19 -0
- ai_pipeline_core-0.1.1/ai_pipeline_core/llm/ai_messages.py +129 -0
- ai_pipeline_core-0.1.1/ai_pipeline_core/llm/client.py +218 -0
- ai_pipeline_core-0.1.1/ai_pipeline_core/llm/model_options.py +39 -0
- ai_pipeline_core-0.1.1/ai_pipeline_core/llm/model_response.py +149 -0
- ai_pipeline_core-0.1.1/ai_pipeline_core/llm/model_types.py +17 -0
- ai_pipeline_core-0.1.1/ai_pipeline_core/logging/__init__.py +10 -0
- ai_pipeline_core-0.1.1/ai_pipeline_core/logging/logging.yml +66 -0
- ai_pipeline_core-0.1.1/ai_pipeline_core/logging/logging_config.py +154 -0
- ai_pipeline_core-0.1.1/ai_pipeline_core/logging/logging_mixin.py +223 -0
- ai_pipeline_core-0.1.1/ai_pipeline_core/prompt_manager.py +115 -0
- ai_pipeline_core-0.1.1/ai_pipeline_core/py.typed +0 -0
- ai_pipeline_core-0.1.1/ai_pipeline_core/settings.py +24 -0
- ai_pipeline_core-0.1.1/ai_pipeline_core/tracing.py +205 -0
- ai_pipeline_core-0.1.1/pyproject.toml +177 -0
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
share/python-wheels/
|
|
24
|
+
*.egg-info/
|
|
25
|
+
.installed.cfg
|
|
26
|
+
*.egg
|
|
27
|
+
MANIFEST
|
|
28
|
+
|
|
29
|
+
# PyInstaller
|
|
30
|
+
*.manifest
|
|
31
|
+
*.spec
|
|
32
|
+
|
|
33
|
+
# Installer logs
|
|
34
|
+
pip-log.txt
|
|
35
|
+
pip-delete-this-directory.txt
|
|
36
|
+
|
|
37
|
+
# Unit test / coverage reports
|
|
38
|
+
htmlcov/
|
|
39
|
+
.tox/
|
|
40
|
+
.nox/
|
|
41
|
+
.coverage
|
|
42
|
+
.coverage.*
|
|
43
|
+
.cache
|
|
44
|
+
nosetests.xml
|
|
45
|
+
coverage.xml
|
|
46
|
+
*.cover
|
|
47
|
+
*.py,cover
|
|
48
|
+
.hypothesis/
|
|
49
|
+
.pytest_cache/
|
|
50
|
+
cover/
|
|
51
|
+
|
|
52
|
+
# Translations
|
|
53
|
+
*.mo
|
|
54
|
+
*.pot
|
|
55
|
+
|
|
56
|
+
# Django stuff:
|
|
57
|
+
*.log
|
|
58
|
+
local_settings.py
|
|
59
|
+
db.sqlite3
|
|
60
|
+
db.sqlite3-journal
|
|
61
|
+
|
|
62
|
+
# Flask stuff:
|
|
63
|
+
instance/
|
|
64
|
+
.webassets-cache
|
|
65
|
+
|
|
66
|
+
# Scrapy stuff:
|
|
67
|
+
.scrapy
|
|
68
|
+
|
|
69
|
+
# Sphinx documentation
|
|
70
|
+
docs/_build/
|
|
71
|
+
|
|
72
|
+
# PyBuilder
|
|
73
|
+
.pybuilder/
|
|
74
|
+
target/
|
|
75
|
+
|
|
76
|
+
# Jupyter Notebook
|
|
77
|
+
.ipynb_checkpoints
|
|
78
|
+
|
|
79
|
+
# IPython
|
|
80
|
+
profile_default/
|
|
81
|
+
ipython_config.py
|
|
82
|
+
|
|
83
|
+
# pyenv
|
|
84
|
+
.python-version
|
|
85
|
+
|
|
86
|
+
# pipenv
|
|
87
|
+
Pipfile.lock
|
|
88
|
+
|
|
89
|
+
# poetry
|
|
90
|
+
poetry.lock
|
|
91
|
+
|
|
92
|
+
# pdm
|
|
93
|
+
.pdm.toml
|
|
94
|
+
.pdm-python
|
|
95
|
+
.pdm-build/
|
|
96
|
+
|
|
97
|
+
# PEP 582
|
|
98
|
+
__pypackages__/
|
|
99
|
+
|
|
100
|
+
# Celery stuff
|
|
101
|
+
celerybeat-schedule
|
|
102
|
+
celerybeat.pid
|
|
103
|
+
|
|
104
|
+
# SageMath parsed files
|
|
105
|
+
*.sage.py
|
|
106
|
+
|
|
107
|
+
# Environments
|
|
108
|
+
.env
|
|
109
|
+
.venv
|
|
110
|
+
env/
|
|
111
|
+
venv/
|
|
112
|
+
ENV/
|
|
113
|
+
env.bak/
|
|
114
|
+
venv.bak/
|
|
115
|
+
|
|
116
|
+
# Spyder project settings
|
|
117
|
+
.spyderproject
|
|
118
|
+
.spyproject
|
|
119
|
+
|
|
120
|
+
# Rope project settings
|
|
121
|
+
.ropeproject
|
|
122
|
+
|
|
123
|
+
# mkdocs documentation
|
|
124
|
+
/site
|
|
125
|
+
|
|
126
|
+
# mypy
|
|
127
|
+
.mypy_cache/
|
|
128
|
+
.dmypy.json
|
|
129
|
+
dmypy.json
|
|
130
|
+
|
|
131
|
+
# Pyre type checker
|
|
132
|
+
.pyre/
|
|
133
|
+
|
|
134
|
+
# pytype static type analyzer
|
|
135
|
+
.pytype/
|
|
136
|
+
|
|
137
|
+
# Cython debug symbols
|
|
138
|
+
cython_debug/
|
|
139
|
+
|
|
140
|
+
# PyCharm
|
|
141
|
+
.idea/
|
|
142
|
+
|
|
143
|
+
# VS Code
|
|
144
|
+
.vscode/
|
|
145
|
+
|
|
146
|
+
# macOS
|
|
147
|
+
.DS_Store
|
|
148
|
+
|
|
149
|
+
# Windows
|
|
150
|
+
Thumbs.db
|
|
151
|
+
ehthumbs.db
|
|
152
|
+
|
|
153
|
+
# Prefect
|
|
154
|
+
.prefect/
|
|
155
|
+
|
|
156
|
+
# Test artifacts
|
|
157
|
+
tests/test_data/
|
|
158
|
+
test_output/
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 bbarwik@gmail.com
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,477 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ai-pipeline-core
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Core utilities for AI-powered processing pipelines using prefect
|
|
5
|
+
Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
|
|
6
|
+
Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
|
|
7
|
+
Project-URL: Issues, https://github.com/bbarwik/ai-pipeline-core/issues
|
|
8
|
+
Author-email: bbarwik <bbarwik@gmail.com>
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
|
+
Classifier: Typing :: Typed
|
|
20
|
+
Requires-Python: >=3.12
|
|
21
|
+
Requires-Dist: httpx>=0.28.1
|
|
22
|
+
Requires-Dist: jinja2>=3.1.6
|
|
23
|
+
Requires-Dist: lmnr>=0.7.4
|
|
24
|
+
Requires-Dist: openai>=1.99.9
|
|
25
|
+
Requires-Dist: prefect>=3.4.13
|
|
26
|
+
Requires-Dist: pydantic-settings>=2.10.1
|
|
27
|
+
Requires-Dist: pydantic>=2.11.7
|
|
28
|
+
Requires-Dist: python-magic>=0.4.27
|
|
29
|
+
Requires-Dist: ruamel-yaml>=0.18.14
|
|
30
|
+
Requires-Dist: tiktoken>=0.11.0
|
|
31
|
+
Provides-Extra: dev
|
|
32
|
+
Requires-Dist: basedpyright>=1.31.2; extra == 'dev'
|
|
33
|
+
Requires-Dist: bump2version>=1.0.1; extra == 'dev'
|
|
34
|
+
Requires-Dist: pre-commit>=4.3.0; extra == 'dev'
|
|
35
|
+
Requires-Dist: pytest-asyncio>=1.1.0; extra == 'dev'
|
|
36
|
+
Requires-Dist: pytest-cov>=5.0.0; extra == 'dev'
|
|
37
|
+
Requires-Dist: pytest-mock>=3.14.0; extra == 'dev'
|
|
38
|
+
Requires-Dist: pytest-xdist>=3.8.0; extra == 'dev'
|
|
39
|
+
Requires-Dist: pytest>=8.4.1; extra == 'dev'
|
|
40
|
+
Requires-Dist: ruff>=0.12.9; extra == 'dev'
|
|
41
|
+
Description-Content-Type: text/markdown
|
|
42
|
+
|
|
43
|
+
# AI Pipeline Core
|
|
44
|
+
|
|
45
|
+
A high-performance, type-safe Python library for building AI-powered data processing pipelines with Prefect orchestration and LMNR observability.
|
|
46
|
+
|
|
47
|
+
[](https://www.python.org/downloads/)
|
|
48
|
+
[](https://opensource.org/licenses/MIT)
|
|
49
|
+
[](https://github.com/astral-sh/ruff)
|
|
50
|
+
[](https://github.com/DetachHead/basedpyright)
|
|
51
|
+
[](https://github.com/bbarwik/ai-pipeline-core)
|
|
52
|
+
[](https://github.com/bbarwik/ai-pipeline-core)
|
|
53
|
+
[](https://pypi.org/project/ai-pipeline-core/)
|
|
54
|
+
[](https://pypi.org/project/ai-pipeline-core/)
|
|
55
|
+
|
|
56
|
+
> [!NOTE]
|
|
57
|
+
> **Beta Release**
|
|
58
|
+
>
|
|
59
|
+
> This library is in beta. While actively used in production systems, the API may still evolve. We follow semantic versioning for releases.
|
|
60
|
+
|
|
61
|
+
## Overview
|
|
62
|
+
|
|
63
|
+
AI Pipeline Core provides a robust foundation for building production-grade AI pipelines with a focus on:
|
|
64
|
+
|
|
65
|
+
- **100% Async Architecture** - Built for high-throughput, non-blocking operations
|
|
66
|
+
- **Type Safety** - Comprehensive type hints with Pydantic models throughout
|
|
67
|
+
- **Minimal Design** - Every line of code justified, no unnecessary abstractions
|
|
68
|
+
- **Production Ready** - Built-in retry logic, caching, monitoring, and error handling
|
|
69
|
+
- **LLM Optimization** - Smart context/message splitting for efficient token usage
|
|
70
|
+
|
|
71
|
+
## Key Features
|
|
72
|
+
|
|
73
|
+
### 🚀 Performance First
|
|
74
|
+
- Fully asynchronous I/O operations
|
|
75
|
+
- Intelligent caching for LLM context
|
|
76
|
+
- Streaming support for large documents
|
|
77
|
+
- Automatic retry with exponential backoff
|
|
78
|
+
|
|
79
|
+
### 🔒 Type Safety
|
|
80
|
+
- Pydantic models for all data structures
|
|
81
|
+
- Strict type checking with basedpyright
|
|
82
|
+
- Runtime validation for all inputs
|
|
83
|
+
- Immutable configurations by default
|
|
84
|
+
|
|
85
|
+
### 📊 Observability
|
|
86
|
+
- LMNR (Laminar) tracing integration
|
|
87
|
+
- Structured logging with Prefect
|
|
88
|
+
- Cost tracking for LLM operations
|
|
89
|
+
- Performance metrics out of the box
|
|
90
|
+
|
|
91
|
+
### 🎯 Developer Experience
|
|
92
|
+
- Self-documenting code for experienced developers
|
|
93
|
+
- Consistent patterns throughout
|
|
94
|
+
- Comprehensive error messages
|
|
95
|
+
- Smart defaults with override capabilities
|
|
96
|
+
|
|
97
|
+
### 🤖 Advanced LLM Features
|
|
98
|
+
- Search-enabled models (Perplexity Sonar, Gemini Flash Search)
|
|
99
|
+
- Reasoning models support (O1 series)
|
|
100
|
+
- Structured output with Pydantic models
|
|
101
|
+
- Dynamic model selection based on task
|
|
102
|
+
|
|
103
|
+
## Installation
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
pip install ai-pipeline-core
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### Development Installation
|
|
110
|
+
|
|
111
|
+
For contributors and development:
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
git clone https://github.com/bbarwik/ai-pipeline-core.git
|
|
115
|
+
cd ai-pipeline-core
|
|
116
|
+
pip install -e ".[dev]"
|
|
117
|
+
make install-dev # Installs pre-commit hooks
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### Requirements
|
|
121
|
+
- Python 3.12 or higher
|
|
122
|
+
- Linux/macOS (Windows via WSL2)
|
|
123
|
+
|
|
124
|
+
## Quick Start
|
|
125
|
+
|
|
126
|
+
### Basic Document Processing
|
|
127
|
+
```python
|
|
128
|
+
from ai_pipeline_core.documents import Document, FlowDocument
|
|
129
|
+
from ai_pipeline_core.llm import generate_structured, AIMessages, ModelOptions
|
|
130
|
+
from pydantic import BaseModel
|
|
131
|
+
|
|
132
|
+
class InputDocument(FlowDocument):
|
|
133
|
+
"""Custom document type for your flow"""
|
|
134
|
+
def get_type(self) -> str:
|
|
135
|
+
return "input"
|
|
136
|
+
|
|
137
|
+
class AnalysisResult(BaseModel):
|
|
138
|
+
"""Example Pydantic model for structured output"""
|
|
139
|
+
summary: str
|
|
140
|
+
key_points: list[str]
|
|
141
|
+
|
|
142
|
+
async def process_document(doc: Document):
|
|
143
|
+
# Generate AI response with structured output
|
|
144
|
+
response = await generate_structured(
|
|
145
|
+
model="gemini-2.5-pro", # Model is required first parameter
|
|
146
|
+
response_format=AnalysisResult, # Pydantic model class
|
|
147
|
+
context=AIMessages([doc]), # Cached context
|
|
148
|
+
messages=AIMessages(["Analyze this document"]), # Dynamic messages
|
|
149
|
+
options=ModelOptions(max_completion_tokens=5000) # Optional options
|
|
150
|
+
)
|
|
151
|
+
return response.parsed
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### Prefect Flow Integration
|
|
155
|
+
```python
|
|
156
|
+
from prefect import flow, task
|
|
157
|
+
from ai_pipeline_core.documents import Document, DocumentList, FlowDocument
|
|
158
|
+
from ai_pipeline_core.flow import FlowConfig
|
|
159
|
+
from ai_pipeline_core.tracing import trace
|
|
160
|
+
|
|
161
|
+
class OutputDocument(FlowDocument):
|
|
162
|
+
"""Custom output document type"""
|
|
163
|
+
def get_type(self) -> str:
|
|
164
|
+
return "output"
|
|
165
|
+
|
|
166
|
+
class MyFlowConfig(FlowConfig):
|
|
167
|
+
INPUT_DOCUMENT_TYPES = [InputDocument]
|
|
168
|
+
OUTPUT_DOCUMENT_TYPE = OutputDocument
|
|
169
|
+
|
|
170
|
+
@task
|
|
171
|
+
@trace
|
|
172
|
+
async def process_task(doc: Document) -> Document:
|
|
173
|
+
# Task-level processing with automatic tracing
|
|
174
|
+
result = await process_document(doc)
|
|
175
|
+
# Convert result to JSON string for document content
|
|
176
|
+
import json
|
|
177
|
+
return OutputDocument(name="result", content=json.dumps(result.model_dump()).encode())
|
|
178
|
+
|
|
179
|
+
@flow
|
|
180
|
+
async def my_pipeline(documents: DocumentList):
|
|
181
|
+
config = MyFlowConfig()
|
|
182
|
+
input_docs = config.get_input_documents(documents)
|
|
183
|
+
|
|
184
|
+
results = await process_task.map(input_docs)
|
|
185
|
+
|
|
186
|
+
config.validate_output_documents(results)
|
|
187
|
+
return results
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
## Core Modules
|
|
191
|
+
|
|
192
|
+
### Documents System
|
|
193
|
+
The foundation for all data handling. Documents are immutable, type-safe wrappers around content with automatic MIME type detection.
|
|
194
|
+
|
|
195
|
+
```python
|
|
196
|
+
from ai_pipeline_core.documents import Document, DocumentList
|
|
197
|
+
|
|
198
|
+
# Documents handle encoding/decoding automatically
|
|
199
|
+
doc = MyDocument(
|
|
200
|
+
name="report.pdf",
|
|
201
|
+
content=pdf_bytes,
|
|
202
|
+
description="Q3 Financial Report"
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
# Type-safe document collections
|
|
206
|
+
docs = DocumentList([doc1, doc2])
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
### LLM Module
|
|
210
|
+
Managed AI interactions with built-in retry logic, cost tracking, and structured outputs.
|
|
211
|
+
|
|
212
|
+
**Supported Models** (via LiteLLM proxy):
|
|
213
|
+
- OpenAI: GPT-4, GPT-5 series
|
|
214
|
+
- Anthropic: Claude 3 series
|
|
215
|
+
- Google: Gemini 2.5 series
|
|
216
|
+
- xAI: Grok models
|
|
217
|
+
- Perplexity: Sonar models (with search capabilities)
|
|
218
|
+
- And many more through LiteLLM compatibility
|
|
219
|
+
|
|
220
|
+
```python
|
|
221
|
+
from ai_pipeline_core.llm import generate_structured, AIMessages, ModelOptions
|
|
222
|
+
from pydantic import BaseModel
|
|
223
|
+
|
|
224
|
+
class YourPydanticModel(BaseModel):
|
|
225
|
+
field1: str
|
|
226
|
+
field2: int
|
|
227
|
+
|
|
228
|
+
# Get structured Pydantic model responses
|
|
229
|
+
result = await generate_structured(
|
|
230
|
+
model="gemini-2.5-pro", # Model is required first parameter
|
|
231
|
+
response_format=YourPydanticModel, # Pydantic model class for structured output
|
|
232
|
+
context=AIMessages(), # Optional context (cached)
|
|
233
|
+
messages=AIMessages(["Your prompt here"]), # Required messages
|
|
234
|
+
options=ModelOptions(
|
|
235
|
+
retries=3,
|
|
236
|
+
timeout=30,
|
|
237
|
+
max_completion_tokens=10000
|
|
238
|
+
)
|
|
239
|
+
)
|
|
240
|
+
# Access the parsed result
|
|
241
|
+
model_instance = result.parsed # Type: YourPydanticModel
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
### Prompt Management
|
|
245
|
+
Flexible Jinja2-based prompt system with smart path resolution.
|
|
246
|
+
|
|
247
|
+
```python
|
|
248
|
+
from ai_pipeline_core import PromptManager
|
|
249
|
+
|
|
250
|
+
pm = PromptManager(__file__)
|
|
251
|
+
prompt = pm.get("analyze_document.jinja2",
|
|
252
|
+
document=doc,
|
|
253
|
+
instructions=instructions)
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
### Tracing & Monitoring
|
|
257
|
+
Automatic observability with LMNR integration.
|
|
258
|
+
|
|
259
|
+
```python
|
|
260
|
+
from ai_pipeline_core.tracing import trace
|
|
261
|
+
|
|
262
|
+
@trace(metadata={"workflow": "analysis"})
|
|
263
|
+
async def analyze_data(data: InputData) -> OutputData:
|
|
264
|
+
# Automatic tracing with performance metrics
|
|
265
|
+
...
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
## Architecture Principles
|
|
269
|
+
|
|
270
|
+
### 1. Async-First Design
|
|
271
|
+
Every I/O operation is asynchronous. No blocking calls, no synchronous fallbacks.
|
|
272
|
+
|
|
273
|
+
### 2. Type Safety Throughout
|
|
274
|
+
Complete type annotations with runtime validation. If it compiles, it works.
|
|
275
|
+
|
|
276
|
+
### 3. Minimal Surface Area
|
|
277
|
+
Less code is better code. Every line must justify its existence.
|
|
278
|
+
|
|
279
|
+
### 4. Configuration as Code
|
|
280
|
+
All configurations are Pydantic models - validated, typed, and immutable.
|
|
281
|
+
|
|
282
|
+
## Project Structure
|
|
283
|
+
|
|
284
|
+
```
|
|
285
|
+
ai_pipeline_core/
|
|
286
|
+
├── documents/ # Document handling system
|
|
287
|
+
│ ├── document.py # Base document class
|
|
288
|
+
│ ├── flow_document.py # Prefect flow documents
|
|
289
|
+
│ └── task_document.py # Prefect task documents
|
|
290
|
+
├── llm/ # LLM interaction layer
|
|
291
|
+
│ ├── client.py # Async client implementation
|
|
292
|
+
│ └── model_options.py # Configuration models
|
|
293
|
+
├── flow/ # Prefect flow utilities
|
|
294
|
+
│ └── config.py # Type-safe flow configuration
|
|
295
|
+
├── logging/ # Structured logging
|
|
296
|
+
├── tracing.py # Observability decorators
|
|
297
|
+
└── settings.py # Centralized configuration
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
## Development
|
|
301
|
+
|
|
302
|
+
### Running Tests
|
|
303
|
+
```bash
|
|
304
|
+
make test # Run all tests
|
|
305
|
+
make test-cov # Run with coverage report
|
|
306
|
+
pytest tests/test_documents.py::TestDocument::test_creation # Single test
|
|
307
|
+
```
|
|
308
|
+
|
|
309
|
+
### Code Quality
|
|
310
|
+
```bash
|
|
311
|
+
make lint # Run linting checks
|
|
312
|
+
make format # Auto-format code
|
|
313
|
+
make typecheck # Run type checking
|
|
314
|
+
make pre-commit # Run all pre-commit hooks
|
|
315
|
+
```
|
|
316
|
+
|
|
317
|
+
### Development Workflow
|
|
318
|
+
1. Create feature branch
|
|
319
|
+
2. Write tests first (TDD)
|
|
320
|
+
3. Implement minimal solution
|
|
321
|
+
4. Run `make format` and `make typecheck`
|
|
322
|
+
5. Ensure >80% test coverage
|
|
323
|
+
6. Submit PR with clear description
|
|
324
|
+
|
|
325
|
+
## Best Practices
|
|
326
|
+
|
|
327
|
+
### DO ✅
|
|
328
|
+
- Use async/await for all I/O operations
|
|
329
|
+
- Define Pydantic models for all data structures
|
|
330
|
+
- Keep functions under 20 lines
|
|
331
|
+
- Use type hints for everything
|
|
332
|
+
- Let Documents handle serialization
|
|
333
|
+
|
|
334
|
+
### DON'T ❌
|
|
335
|
+
- Import `logging` directly (use pipeline logger)
|
|
336
|
+
- Use raw dictionaries for configuration
|
|
337
|
+
- Write defensive code for unlikely scenarios
|
|
338
|
+
- Add comments explaining what (code should be clear)
|
|
339
|
+
- Use `requests` or other blocking libraries
|
|
340
|
+
|
|
341
|
+
## Configuration
|
|
342
|
+
|
|
343
|
+
### Environment Variables
|
|
344
|
+
```bash
|
|
345
|
+
# Required for LLM operations
|
|
346
|
+
OPENAI_API_KEY=sk-... # Your OpenAI or LiteLLM proxy key
|
|
347
|
+
OPENAI_BASE_URL=http://your-proxy:8000 # LiteLLM proxy endpoint
|
|
348
|
+
|
|
349
|
+
# Optional - for observability
|
|
350
|
+
LMNR_PROJECT_API_KEY=lmnr_... # LMNR tracing
|
|
351
|
+
|
|
352
|
+
# Optional - for orchestration
|
|
353
|
+
PREFECT_API_URL=http://localhost:4200/api
|
|
354
|
+
AI_PIPELINE_LOG_LEVEL=INFO
|
|
355
|
+
```
|
|
356
|
+
|
|
357
|
+
### Settings Management
|
|
358
|
+
```python
|
|
359
|
+
from ai_pipeline_core.settings import settings
|
|
360
|
+
|
|
361
|
+
# All settings are validated Pydantic models
|
|
362
|
+
api_key = settings.openai_api_key
|
|
363
|
+
base_url = settings.openai_base_url # LiteLLM proxy endpoint
|
|
364
|
+
```
|
|
365
|
+
|
|
366
|
+
## Integration Examples
|
|
367
|
+
|
|
368
|
+
### With Prefect Cloud
|
|
369
|
+
```python
|
|
370
|
+
from prefect import flow
|
|
371
|
+
from ai_pipeline_core.flow import FlowConfig
|
|
372
|
+
|
|
373
|
+
@flow(name="document-processor")
|
|
374
|
+
async def process_documents(docs: DocumentList):
|
|
375
|
+
# Automatic Prefect Cloud integration
|
|
376
|
+
...
|
|
377
|
+
```
|
|
378
|
+
|
|
379
|
+
### With Custom LLM Providers
|
|
380
|
+
```python
|
|
381
|
+
from ai_pipeline_core.settings import settings
|
|
382
|
+
|
|
383
|
+
# Configure LiteLLM proxy endpoint via environment variables
|
|
384
|
+
# OPENAI_BASE_URL=http://your-litellm-proxy:8000
|
|
385
|
+
# OPENAI_API_KEY=your-proxy-key
|
|
386
|
+
|
|
387
|
+
# Access in code (settings are immutable)
|
|
388
|
+
base_url = settings.openai_base_url
|
|
389
|
+
```
|
|
390
|
+
|
|
391
|
+
## Performance Considerations
|
|
392
|
+
|
|
393
|
+
- **Context Caching**: The LLM module automatically caches context to reduce token usage
|
|
394
|
+
- **Document Streaming**: Large documents are streamed rather than loaded entirely into memory
|
|
395
|
+
- **Batch Processing**: Use Prefect's `.map()` for parallel task execution
|
|
396
|
+
- **Connection Pooling**: HTTP clients use connection pooling by default
|
|
397
|
+
|
|
398
|
+
## Troubleshooting
|
|
399
|
+
|
|
400
|
+
### Common Issues
|
|
401
|
+
|
|
402
|
+
1. **Import Errors**: Ensure Python 3.12+ is installed
|
|
403
|
+
2. **Async Warnings**: All I/O operations must use `await`
|
|
404
|
+
3. **Type Errors**: Run `make typecheck` to identify issues
|
|
405
|
+
4. **MIME Detection**: Install `python-magic` system dependencies
|
|
406
|
+
|
|
407
|
+
### Debug Mode
|
|
408
|
+
```python
|
|
409
|
+
from ai_pipeline_core.logging import setup_logging, LoggingConfig
|
|
410
|
+
|
|
411
|
+
# Setup logging with DEBUG level
|
|
412
|
+
setup_logging(LoggingConfig(level="DEBUG"))
|
|
413
|
+
```
|
|
414
|
+
|
|
415
|
+
## Release Process
|
|
416
|
+
|
|
417
|
+
See [RELEASE.md](RELEASE.md) for detailed release procedures.
|
|
418
|
+
|
|
419
|
+
**Important**: All releases require:
|
|
420
|
+
- ✅ Zero errors from `make typecheck`
|
|
421
|
+
- ✅ All unit tests passing with >80% coverage
|
|
422
|
+
- ✅ **Integration tests passing** (with configured API keys)
|
|
423
|
+
|
|
424
|
+
## Contributing
|
|
425
|
+
|
|
426
|
+
> [!NOTE]
|
|
427
|
+
> As this is a preview repository used internally, we are not actively accepting external contributions. The codebase may change significantly without notice.
|
|
428
|
+
>
|
|
429
|
+
> **Recommended approach:**
|
|
430
|
+
> 1. Fork the repository
|
|
431
|
+
> 2. Make changes in your fork
|
|
432
|
+
> 3. Share your improvements with the community through your fork
|
|
433
|
+
|
|
434
|
+
If you've found a critical security issue, please report it via the GitHub Security tab.
|
|
435
|
+
|
|
436
|
+
For learning purposes, see [CLAUDE.md](CLAUDE.md) for our comprehensive coding standards and architecture guide.
|
|
437
|
+
|
|
438
|
+
## Documentation
|
|
439
|
+
|
|
440
|
+
- [CLAUDE.md](CLAUDE.md) - Detailed coding standards and architecture guide
|
|
441
|
+
- [Prefect Integration](docs/prefect.md) - Prefect patterns and best practices
|
|
442
|
+
- [Deployment Guide](docs/prefect_deployment.md) - Production deployment
|
|
443
|
+
- [Prefect Logging](docs/prefect_logging.md) - Logging configuration guide
|
|
444
|
+
|
|
445
|
+
## License
|
|
446
|
+
|
|
447
|
+
MIT License - see [LICENSE](LICENSE) file for details.
|
|
448
|
+
|
|
449
|
+
## Support
|
|
450
|
+
|
|
451
|
+
> [!CAUTION]
|
|
452
|
+
> This is a preview repository with no guaranteed support. Issues and discussions may not be actively monitored.
|
|
453
|
+
|
|
454
|
+
- **For Learning**: Review the code, documentation, and examples
|
|
455
|
+
- **For Usage**: Fork the repository and maintain your own version
|
|
456
|
+
- **Security Issues**: Report via GitHub Security tab
|
|
457
|
+
|
|
458
|
+
## Acknowledgments
|
|
459
|
+
|
|
460
|
+
Built with:
|
|
461
|
+
- [Prefect](https://www.prefect.io/) - Workflow orchestration
|
|
462
|
+
- [LMNR](https://www.lmnr.ai/) - LLM observability
|
|
463
|
+
- [LiteLLM](https://litellm.ai/) - LLM proxy
|
|
464
|
+
- [Pydantic](https://pydantic-docs.helpmanual.io/) - Data validation
|
|
465
|
+
|
|
466
|
+
## Stability Notice
|
|
467
|
+
|
|
468
|
+
**Current Version**: 0.1.1
|
|
469
|
+
**Status**: Internal Preview
|
|
470
|
+
**API Stability**: Unstable - Breaking changes expected
|
|
471
|
+
**Recommended Use**: Learning and reference only
|
|
472
|
+
|
|
473
|
+
For production use, please fork this repository and maintain your own stable version.
|
|
474
|
+
|
|
475
|
+
---
|
|
476
|
+
|
|
477
|
+
**Remember**: The best code is no code. The second best is minimal, typed, async code that does exactly what's needed.
|