ai-pipeline-core 0.1.6__tar.gz → 0.1.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/PKG-INFO +115 -36
- {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/README.md +113 -34
- ai_pipeline_core-0.1.8/ai_pipeline_core/__init__.py +77 -0
- {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/documents/__init__.py +3 -0
- {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/documents/document.py +16 -1
- ai_pipeline_core-0.1.8/ai_pipeline_core/flow/__init__.py +7 -0
- {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/flow/config.py +21 -0
- ai_pipeline_core-0.1.8/ai_pipeline_core/flow/options.py +26 -0
- {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/llm/client.py +5 -3
- {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/llm/model_options.py +4 -0
- ai_pipeline_core-0.1.8/ai_pipeline_core/pipeline.py +414 -0
- ai_pipeline_core-0.1.8/ai_pipeline_core/prefect.py +7 -0
- ai_pipeline_core-0.1.8/ai_pipeline_core/simple_runner/__init__.py +19 -0
- ai_pipeline_core-0.1.8/ai_pipeline_core/simple_runner/cli.py +127 -0
- ai_pipeline_core-0.1.8/ai_pipeline_core/simple_runner/simple_runner.py +147 -0
- {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/tracing.py +63 -20
- {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/pyproject.toml +9 -5
- ai_pipeline_core-0.1.6/ai_pipeline_core/__init__.py +0 -36
- ai_pipeline_core-0.1.6/ai_pipeline_core/flow/__init__.py +0 -3
- {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/.gitignore +0 -0
- {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/LICENSE +0 -0
- {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/documents/document_list.py +0 -0
- {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/documents/flow_document.py +0 -0
- {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/documents/mime_type.py +0 -0
- {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/documents/task_document.py +0 -0
- {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/documents/utils.py +0 -0
- {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/exceptions.py +0 -0
- {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/llm/__init__.py +0 -0
- {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/llm/ai_messages.py +0 -0
- {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/llm/model_response.py +0 -0
- {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/llm/model_types.py +0 -0
- {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/logging/__init__.py +0 -0
- {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/logging/logging.yml +0 -0
- {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/logging/logging_config.py +0 -0
- {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/logging/logging_mixin.py +0 -0
- {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/prompt_manager.py +0 -0
- {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/py.typed +0 -0
- {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/settings.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ai-pipeline-core
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.8
|
|
4
4
|
Summary: Core utilities for AI-powered processing pipelines using prefect
|
|
5
5
|
Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
|
|
6
6
|
Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
|
|
@@ -20,7 +20,7 @@ Classifier: Typing :: Typed
|
|
|
20
20
|
Requires-Python: >=3.12
|
|
21
21
|
Requires-Dist: httpx>=0.28.1
|
|
22
22
|
Requires-Dist: jinja2>=3.1.6
|
|
23
|
-
Requires-Dist: lmnr>=0.7.
|
|
23
|
+
Requires-Dist: lmnr>=0.7.6
|
|
24
24
|
Requires-Dist: openai>=1.99.9
|
|
25
25
|
Requires-Dist: prefect>=3.4.13
|
|
26
26
|
Requires-Dist: pydantic-settings>=2.10.1
|
|
@@ -151,40 +151,76 @@ async def process_document(doc: Document):
|
|
|
151
151
|
return response.parsed
|
|
152
152
|
```
|
|
153
153
|
|
|
154
|
-
###
|
|
154
|
+
### Enhanced Pipeline Decorators
|
|
155
155
|
```python
|
|
156
|
-
from
|
|
157
|
-
from ai_pipeline_core.
|
|
158
|
-
from ai_pipeline_core.
|
|
159
|
-
from ai_pipeline_core.tracing import trace
|
|
160
|
-
|
|
161
|
-
class OutputDocument(FlowDocument):
|
|
162
|
-
"""Custom output document type"""
|
|
163
|
-
def get_type(self) -> str:
|
|
164
|
-
return "output"
|
|
156
|
+
from ai_pipeline_core import pipeline_flow, pipeline_task
|
|
157
|
+
from ai_pipeline_core.flow import FlowOptions
|
|
158
|
+
from ai_pipeline_core.documents import DocumentList, FlowDocument
|
|
165
159
|
|
|
166
|
-
class
|
|
167
|
-
|
|
168
|
-
|
|
160
|
+
class CustomFlowOptions(FlowOptions):
|
|
161
|
+
"""Extend base options with your custom fields"""
|
|
162
|
+
batch_size: int = 100
|
|
163
|
+
temperature: float = 0.7
|
|
169
164
|
|
|
170
|
-
@
|
|
171
|
-
@trace
|
|
165
|
+
@pipeline_task(trace_level="always", retries=3)
|
|
172
166
|
async def process_task(doc: Document) -> Document:
|
|
173
|
-
# Task
|
|
167
|
+
# Task with automatic tracing and retries
|
|
174
168
|
result = await process_document(doc)
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
169
|
+
return OutputDocument(name="result", content=result.encode())
|
|
170
|
+
|
|
171
|
+
@pipeline_flow(trace_level="always")
|
|
172
|
+
async def my_pipeline(
|
|
173
|
+
project_name: str,
|
|
174
|
+
documents: DocumentList,
|
|
175
|
+
flow_options: CustomFlowOptions # Type-safe custom options
|
|
176
|
+
) -> DocumentList:
|
|
177
|
+
# Pipeline flow with enforced signature and tracing
|
|
178
|
+
results = []
|
|
179
|
+
for doc in documents:
|
|
180
|
+
result = await process_task(doc)
|
|
181
|
+
results.append(result)
|
|
182
|
+
return DocumentList(results)
|
|
183
|
+
```
|
|
178
184
|
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
185
|
+
### Simple Runner Utility
|
|
186
|
+
```python
|
|
187
|
+
from ai_pipeline_core.simple_runner import run_cli, run_pipeline
|
|
188
|
+
from ai_pipeline_core.flow import FlowOptions
|
|
189
|
+
|
|
190
|
+
# CLI-based pipeline execution
|
|
191
|
+
if __name__ == "__main__":
|
|
192
|
+
run_cli(
|
|
193
|
+
flows=[my_pipeline],
|
|
194
|
+
flow_configs=[MyFlowConfig],
|
|
195
|
+
options_cls=CustomFlowOptions
|
|
196
|
+
)
|
|
183
197
|
|
|
184
|
-
|
|
198
|
+
# Or programmatic execution
|
|
199
|
+
async def main():
|
|
200
|
+
result = await run_pipeline(
|
|
201
|
+
project_name="my-project",
|
|
202
|
+
output_dir=Path("./output"),
|
|
203
|
+
flow=my_pipeline,
|
|
204
|
+
flow_config=MyFlowConfig,
|
|
205
|
+
flow_options=CustomFlowOptions(batch_size=50)
|
|
206
|
+
)
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
### Clean Prefect Decorators
|
|
210
|
+
```python
|
|
211
|
+
# Import clean Prefect decorators without tracing
|
|
212
|
+
from ai_pipeline_core.prefect import flow, task
|
|
185
213
|
|
|
186
|
-
|
|
187
|
-
|
|
214
|
+
# Or use pipeline decorators with tracing
|
|
215
|
+
from ai_pipeline_core import pipeline_flow, pipeline_task
|
|
216
|
+
|
|
217
|
+
@task # Clean Prefect task (supports both sync and async)
|
|
218
|
+
def compute(x: int) -> int:
|
|
219
|
+
return x * 2
|
|
220
|
+
|
|
221
|
+
@pipeline_task(trace_level="always") # With tracing (async only)
|
|
222
|
+
async def compute_traced(x: int) -> int:
|
|
223
|
+
return x * 2
|
|
188
224
|
```
|
|
189
225
|
|
|
190
226
|
## Core Modules
|
|
@@ -210,12 +246,12 @@ docs = DocumentList([doc1, doc2])
|
|
|
210
246
|
Managed AI interactions with built-in retry logic, cost tracking, and structured outputs.
|
|
211
247
|
|
|
212
248
|
**Supported Models** (via LiteLLM proxy):
|
|
213
|
-
- OpenAI:
|
|
214
|
-
- Anthropic:
|
|
215
|
-
- Google:
|
|
216
|
-
- xAI:
|
|
217
|
-
- Perplexity:
|
|
218
|
-
- And many more through LiteLLM compatibility
|
|
249
|
+
- OpenAI: gpt-5
|
|
250
|
+
- Anthropic: claude-4
|
|
251
|
+
- Google: gemini-2.5
|
|
252
|
+
- xAI: grok-3, grok-4
|
|
253
|
+
- Perplexity: sonar-pro-search
|
|
254
|
+
- And many more through LiteLLM compatibility. Every model from openrouter should work.
|
|
219
255
|
|
|
220
256
|
```python
|
|
221
257
|
from ai_pipeline_core.llm import generate_structured, AIMessages, ModelOptions
|
|
@@ -291,8 +327,14 @@ ai_pipeline_core/
|
|
|
291
327
|
│ ├── client.py # Async client implementation
|
|
292
328
|
│ └── model_options.py # Configuration models
|
|
293
329
|
├── flow/ # Prefect flow utilities
|
|
294
|
-
│
|
|
330
|
+
│ ├── config.py # Type-safe flow configuration
|
|
331
|
+
│ └── options.py # FlowOptions base class
|
|
332
|
+
├── simple_runner/ # Pipeline execution utilities
|
|
333
|
+
│ ├── cli.py # CLI interface
|
|
334
|
+
│ └── simple_runner.py # Core runner logic
|
|
295
335
|
├── logging/ # Structured logging
|
|
336
|
+
├── pipeline.py # Enhanced decorators
|
|
337
|
+
├── prefect.py # Clean Prefect exports
|
|
296
338
|
├── tracing.py # Observability decorators
|
|
297
339
|
└── settings.py # Centralized configuration
|
|
298
340
|
```
|
|
@@ -303,6 +345,7 @@ ai_pipeline_core/
|
|
|
303
345
|
```bash
|
|
304
346
|
make test # Run all tests
|
|
305
347
|
make test-cov # Run with coverage report
|
|
348
|
+
make test-showcase # Test the showcase.py CLI example
|
|
306
349
|
pytest tests/test_documents.py::TestDocument::test_creation # Single test
|
|
307
350
|
```
|
|
308
351
|
|
|
@@ -439,6 +482,22 @@ For learning purposes, see [CLAUDE.md](CLAUDE.md) for our comprehensive coding s
|
|
|
439
482
|
|
|
440
483
|
- [CLAUDE.md](CLAUDE.md) - Detailed coding standards and architecture guide
|
|
441
484
|
|
|
485
|
+
## Examples
|
|
486
|
+
|
|
487
|
+
### In This Repository
|
|
488
|
+
- [showcase.py](examples/showcase.py) - Complete example demonstrating all core features including the CLI runner
|
|
489
|
+
```bash
|
|
490
|
+
# Run the showcase example with CLI
|
|
491
|
+
python examples/showcase.py ./output --temperature 0.7 --batch-size 5
|
|
492
|
+
|
|
493
|
+
# Show help
|
|
494
|
+
python examples/showcase.py --help
|
|
495
|
+
```
|
|
496
|
+
- [showcase.jinja2](examples/showcase.jinja2) - Example Jinja2 prompt template
|
|
497
|
+
|
|
498
|
+
### Real-World Application
|
|
499
|
+
- [AI Documentation Writer](https://github.com/bbarwik/ai-documentation-writer) - Production-ready example showing how to build sophisticated AI pipelines for automated documentation generation. See [examples/ai-documentation-writer.md](examples/ai-documentation-writer.md) for a detailed overview.
|
|
500
|
+
|
|
442
501
|
### dependencies_docs/ Directory
|
|
443
502
|
> [!NOTE]
|
|
444
503
|
> The `dependencies_docs/` directory contains guides for AI assistants (like Claude Code) on how to interact with the project's external dependencies and tooling, NOT user documentation for ai-pipeline-core itself. These files are excluded from repository listings to avoid confusion.
|
|
@@ -469,9 +528,29 @@ Built with:
|
|
|
469
528
|
- [LiteLLM](https://litellm.ai/) - LLM proxy
|
|
470
529
|
- [Pydantic](https://pydantic-docs.helpmanual.io/) - Data validation
|
|
471
530
|
|
|
531
|
+
## What's New in v0.1.8
|
|
532
|
+
|
|
533
|
+
### Breaking Changes
|
|
534
|
+
- **Async-Only Pipeline Decorators**: `@pipeline_flow` and `@pipeline_task` now require `async def` functions (raises TypeError for sync)
|
|
535
|
+
- **Document Class Name Validation**: Document subclasses cannot start with "Test" prefix (pytest conflict prevention)
|
|
536
|
+
- **FlowConfig Validation**: OUTPUT_DOCUMENT_TYPE cannot be in INPUT_DOCUMENT_TYPES (prevents circular dependencies)
|
|
537
|
+
- **Temperature Field**: Added optional `temperature` field to `ModelOptions` for explicit control
|
|
538
|
+
|
|
539
|
+
### Major Improvements
|
|
540
|
+
- **Pipeline Module Refactoring**: Reduced from ~400 to ~150 lines with cleaner Protocol-based typing
|
|
541
|
+
- **Enhanced Validation**: FlowConfig and Document classes now validate at definition time
|
|
542
|
+
- **Better CLI Support**: Auto-displays help when no arguments provided, improved context management
|
|
543
|
+
- **Test Suite Updates**: All tests updated to use async/await consistently
|
|
544
|
+
|
|
545
|
+
### Documentation Updates
|
|
546
|
+
- Added Document naming rules to CLAUDE.md
|
|
547
|
+
- Added FlowConfig validation rules
|
|
548
|
+
- Added code elegance principles section
|
|
549
|
+
- Updated guide_for_ai.md to API reference format
|
|
550
|
+
|
|
472
551
|
## Stability Notice
|
|
473
552
|
|
|
474
|
-
**Current Version**: 0.1.
|
|
553
|
+
**Current Version**: 0.1.8
|
|
475
554
|
**Status**: Internal Preview
|
|
476
555
|
**API Stability**: Unstable - Breaking changes expected
|
|
477
556
|
**Recommended Use**: Learning and reference only
|
|
@@ -109,40 +109,76 @@ async def process_document(doc: Document):
|
|
|
109
109
|
return response.parsed
|
|
110
110
|
```
|
|
111
111
|
|
|
112
|
-
###
|
|
112
|
+
### Enhanced Pipeline Decorators
|
|
113
113
|
```python
|
|
114
|
-
from
|
|
115
|
-
from ai_pipeline_core.
|
|
116
|
-
from ai_pipeline_core.
|
|
117
|
-
from ai_pipeline_core.tracing import trace
|
|
118
|
-
|
|
119
|
-
class OutputDocument(FlowDocument):
|
|
120
|
-
"""Custom output document type"""
|
|
121
|
-
def get_type(self) -> str:
|
|
122
|
-
return "output"
|
|
114
|
+
from ai_pipeline_core import pipeline_flow, pipeline_task
|
|
115
|
+
from ai_pipeline_core.flow import FlowOptions
|
|
116
|
+
from ai_pipeline_core.documents import DocumentList, FlowDocument
|
|
123
117
|
|
|
124
|
-
class
|
|
125
|
-
|
|
126
|
-
|
|
118
|
+
class CustomFlowOptions(FlowOptions):
|
|
119
|
+
"""Extend base options with your custom fields"""
|
|
120
|
+
batch_size: int = 100
|
|
121
|
+
temperature: float = 0.7
|
|
127
122
|
|
|
128
|
-
@
|
|
129
|
-
@trace
|
|
123
|
+
@pipeline_task(trace_level="always", retries=3)
|
|
130
124
|
async def process_task(doc: Document) -> Document:
|
|
131
|
-
# Task
|
|
125
|
+
# Task with automatic tracing and retries
|
|
132
126
|
result = await process_document(doc)
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
127
|
+
return OutputDocument(name="result", content=result.encode())
|
|
128
|
+
|
|
129
|
+
@pipeline_flow(trace_level="always")
|
|
130
|
+
async def my_pipeline(
|
|
131
|
+
project_name: str,
|
|
132
|
+
documents: DocumentList,
|
|
133
|
+
flow_options: CustomFlowOptions # Type-safe custom options
|
|
134
|
+
) -> DocumentList:
|
|
135
|
+
# Pipeline flow with enforced signature and tracing
|
|
136
|
+
results = []
|
|
137
|
+
for doc in documents:
|
|
138
|
+
result = await process_task(doc)
|
|
139
|
+
results.append(result)
|
|
140
|
+
return DocumentList(results)
|
|
141
|
+
```
|
|
136
142
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
143
|
+
### Simple Runner Utility
|
|
144
|
+
```python
|
|
145
|
+
from ai_pipeline_core.simple_runner import run_cli, run_pipeline
|
|
146
|
+
from ai_pipeline_core.flow import FlowOptions
|
|
147
|
+
|
|
148
|
+
# CLI-based pipeline execution
|
|
149
|
+
if __name__ == "__main__":
|
|
150
|
+
run_cli(
|
|
151
|
+
flows=[my_pipeline],
|
|
152
|
+
flow_configs=[MyFlowConfig],
|
|
153
|
+
options_cls=CustomFlowOptions
|
|
154
|
+
)
|
|
141
155
|
|
|
142
|
-
|
|
156
|
+
# Or programmatic execution
|
|
157
|
+
async def main():
|
|
158
|
+
result = await run_pipeline(
|
|
159
|
+
project_name="my-project",
|
|
160
|
+
output_dir=Path("./output"),
|
|
161
|
+
flow=my_pipeline,
|
|
162
|
+
flow_config=MyFlowConfig,
|
|
163
|
+
flow_options=CustomFlowOptions(batch_size=50)
|
|
164
|
+
)
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
### Clean Prefect Decorators
|
|
168
|
+
```python
|
|
169
|
+
# Import clean Prefect decorators without tracing
|
|
170
|
+
from ai_pipeline_core.prefect import flow, task
|
|
143
171
|
|
|
144
|
-
|
|
145
|
-
|
|
172
|
+
# Or use pipeline decorators with tracing
|
|
173
|
+
from ai_pipeline_core import pipeline_flow, pipeline_task
|
|
174
|
+
|
|
175
|
+
@task # Clean Prefect task (supports both sync and async)
|
|
176
|
+
def compute(x: int) -> int:
|
|
177
|
+
return x * 2
|
|
178
|
+
|
|
179
|
+
@pipeline_task(trace_level="always") # With tracing (async only)
|
|
180
|
+
async def compute_traced(x: int) -> int:
|
|
181
|
+
return x * 2
|
|
146
182
|
```
|
|
147
183
|
|
|
148
184
|
## Core Modules
|
|
@@ -168,12 +204,12 @@ docs = DocumentList([doc1, doc2])
|
|
|
168
204
|
Managed AI interactions with built-in retry logic, cost tracking, and structured outputs.
|
|
169
205
|
|
|
170
206
|
**Supported Models** (via LiteLLM proxy):
|
|
171
|
-
- OpenAI:
|
|
172
|
-
- Anthropic:
|
|
173
|
-
- Google:
|
|
174
|
-
- xAI:
|
|
175
|
-
- Perplexity:
|
|
176
|
-
- And many more through LiteLLM compatibility
|
|
207
|
+
- OpenAI: gpt-5
|
|
208
|
+
- Anthropic: claude-4
|
|
209
|
+
- Google: gemini-2.5
|
|
210
|
+
- xAI: grok-3, grok-4
|
|
211
|
+
- Perplexity: sonar-pro-search
|
|
212
|
+
- And many more through LiteLLM compatibility. Every model from openrouter should work.
|
|
177
213
|
|
|
178
214
|
```python
|
|
179
215
|
from ai_pipeline_core.llm import generate_structured, AIMessages, ModelOptions
|
|
@@ -249,8 +285,14 @@ ai_pipeline_core/
|
|
|
249
285
|
│ ├── client.py # Async client implementation
|
|
250
286
|
│ └── model_options.py # Configuration models
|
|
251
287
|
├── flow/ # Prefect flow utilities
|
|
252
|
-
│
|
|
288
|
+
│ ├── config.py # Type-safe flow configuration
|
|
289
|
+
│ └── options.py # FlowOptions base class
|
|
290
|
+
├── simple_runner/ # Pipeline execution utilities
|
|
291
|
+
│ ├── cli.py # CLI interface
|
|
292
|
+
│ └── simple_runner.py # Core runner logic
|
|
253
293
|
├── logging/ # Structured logging
|
|
294
|
+
├── pipeline.py # Enhanced decorators
|
|
295
|
+
├── prefect.py # Clean Prefect exports
|
|
254
296
|
├── tracing.py # Observability decorators
|
|
255
297
|
└── settings.py # Centralized configuration
|
|
256
298
|
```
|
|
@@ -261,6 +303,7 @@ ai_pipeline_core/
|
|
|
261
303
|
```bash
|
|
262
304
|
make test # Run all tests
|
|
263
305
|
make test-cov # Run with coverage report
|
|
306
|
+
make test-showcase # Test the showcase.py CLI example
|
|
264
307
|
pytest tests/test_documents.py::TestDocument::test_creation # Single test
|
|
265
308
|
```
|
|
266
309
|
|
|
@@ -397,6 +440,22 @@ For learning purposes, see [CLAUDE.md](CLAUDE.md) for our comprehensive coding s
|
|
|
397
440
|
|
|
398
441
|
- [CLAUDE.md](CLAUDE.md) - Detailed coding standards and architecture guide
|
|
399
442
|
|
|
443
|
+
## Examples
|
|
444
|
+
|
|
445
|
+
### In This Repository
|
|
446
|
+
- [showcase.py](examples/showcase.py) - Complete example demonstrating all core features including the CLI runner
|
|
447
|
+
```bash
|
|
448
|
+
# Run the showcase example with CLI
|
|
449
|
+
python examples/showcase.py ./output --temperature 0.7 --batch-size 5
|
|
450
|
+
|
|
451
|
+
# Show help
|
|
452
|
+
python examples/showcase.py --help
|
|
453
|
+
```
|
|
454
|
+
- [showcase.jinja2](examples/showcase.jinja2) - Example Jinja2 prompt template
|
|
455
|
+
|
|
456
|
+
### Real-World Application
|
|
457
|
+
- [AI Documentation Writer](https://github.com/bbarwik/ai-documentation-writer) - Production-ready example showing how to build sophisticated AI pipelines for automated documentation generation. See [examples/ai-documentation-writer.md](examples/ai-documentation-writer.md) for a detailed overview.
|
|
458
|
+
|
|
400
459
|
### dependencies_docs/ Directory
|
|
401
460
|
> [!NOTE]
|
|
402
461
|
> The `dependencies_docs/` directory contains guides for AI assistants (like Claude Code) on how to interact with the project's external dependencies and tooling, NOT user documentation for ai-pipeline-core itself. These files are excluded from repository listings to avoid confusion.
|
|
@@ -427,9 +486,29 @@ Built with:
|
|
|
427
486
|
- [LiteLLM](https://litellm.ai/) - LLM proxy
|
|
428
487
|
- [Pydantic](https://pydantic-docs.helpmanual.io/) - Data validation
|
|
429
488
|
|
|
489
|
+
## What's New in v0.1.8
|
|
490
|
+
|
|
491
|
+
### Breaking Changes
|
|
492
|
+
- **Async-Only Pipeline Decorators**: `@pipeline_flow` and `@pipeline_task` now require `async def` functions (raises TypeError for sync)
|
|
493
|
+
- **Document Class Name Validation**: Document subclasses cannot start with "Test" prefix (pytest conflict prevention)
|
|
494
|
+
- **FlowConfig Validation**: OUTPUT_DOCUMENT_TYPE cannot be in INPUT_DOCUMENT_TYPES (prevents circular dependencies)
|
|
495
|
+
- **Temperature Field**: Added optional `temperature` field to `ModelOptions` for explicit control
|
|
496
|
+
|
|
497
|
+
### Major Improvements
|
|
498
|
+
- **Pipeline Module Refactoring**: Reduced from ~400 to ~150 lines with cleaner Protocol-based typing
|
|
499
|
+
- **Enhanced Validation**: FlowConfig and Document classes now validate at definition time
|
|
500
|
+
- **Better CLI Support**: Auto-displays help when no arguments provided, improved context management
|
|
501
|
+
- **Test Suite Updates**: All tests updated to use async/await consistently
|
|
502
|
+
|
|
503
|
+
### Documentation Updates
|
|
504
|
+
- Added Document naming rules to CLAUDE.md
|
|
505
|
+
- Added FlowConfig validation rules
|
|
506
|
+
- Added code elegance principles section
|
|
507
|
+
- Updated guide_for_ai.md to API reference format
|
|
508
|
+
|
|
430
509
|
## Stability Notice
|
|
431
510
|
|
|
432
|
-
**Current Version**: 0.1.
|
|
511
|
+
**Current Version**: 0.1.8
|
|
433
512
|
**Status**: Internal Preview
|
|
434
513
|
**API Stability**: Unstable - Breaking changes expected
|
|
435
514
|
**Recommended Use**: Learning and reference only
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""Pipeline Core - Shared infrastructure for AI pipelines."""
|
|
2
|
+
|
|
3
|
+
from . import llm
|
|
4
|
+
from .documents import (
|
|
5
|
+
Document,
|
|
6
|
+
DocumentList,
|
|
7
|
+
FlowDocument,
|
|
8
|
+
TaskDocument,
|
|
9
|
+
canonical_name_key,
|
|
10
|
+
sanitize_url,
|
|
11
|
+
)
|
|
12
|
+
from .flow import FlowConfig, FlowOptions
|
|
13
|
+
from .llm import (
|
|
14
|
+
AIMessages,
|
|
15
|
+
AIMessageType,
|
|
16
|
+
ModelName,
|
|
17
|
+
ModelOptions,
|
|
18
|
+
ModelResponse,
|
|
19
|
+
StructuredModelResponse,
|
|
20
|
+
)
|
|
21
|
+
from .logging import (
|
|
22
|
+
LoggerMixin,
|
|
23
|
+
LoggingConfig,
|
|
24
|
+
StructuredLoggerMixin,
|
|
25
|
+
get_pipeline_logger,
|
|
26
|
+
setup_logging,
|
|
27
|
+
)
|
|
28
|
+
from .logging import get_pipeline_logger as get_logger
|
|
29
|
+
from .pipeline import pipeline_flow, pipeline_task
|
|
30
|
+
from .prefect import disable_run_logger, prefect_test_harness
|
|
31
|
+
from .prompt_manager import PromptManager
|
|
32
|
+
from .settings import settings
|
|
33
|
+
from .tracing import TraceInfo, TraceLevel, trace
|
|
34
|
+
|
|
35
|
+
__version__ = "0.1.8"
|
|
36
|
+
|
|
37
|
+
__all__ = [
|
|
38
|
+
# Config/Settings
|
|
39
|
+
"settings",
|
|
40
|
+
# Logging
|
|
41
|
+
"get_logger",
|
|
42
|
+
"get_pipeline_logger",
|
|
43
|
+
"LoggerMixin",
|
|
44
|
+
"LoggingConfig",
|
|
45
|
+
"setup_logging",
|
|
46
|
+
"StructuredLoggerMixin",
|
|
47
|
+
# Documents
|
|
48
|
+
"Document",
|
|
49
|
+
"DocumentList",
|
|
50
|
+
"FlowDocument",
|
|
51
|
+
"TaskDocument",
|
|
52
|
+
"canonical_name_key",
|
|
53
|
+
"sanitize_url",
|
|
54
|
+
# Flow/Task
|
|
55
|
+
"FlowConfig",
|
|
56
|
+
"FlowOptions",
|
|
57
|
+
# Pipeline decorators (with tracing)
|
|
58
|
+
"pipeline_task",
|
|
59
|
+
"pipeline_flow",
|
|
60
|
+
# Prefect decorators (clean, no tracing)
|
|
61
|
+
"prefect_test_harness",
|
|
62
|
+
"disable_run_logger",
|
|
63
|
+
# LLM
|
|
64
|
+
"llm",
|
|
65
|
+
"ModelName",
|
|
66
|
+
"ModelOptions",
|
|
67
|
+
"ModelResponse",
|
|
68
|
+
"StructuredModelResponse",
|
|
69
|
+
"AIMessages",
|
|
70
|
+
"AIMessageType",
|
|
71
|
+
# Tracing
|
|
72
|
+
"trace",
|
|
73
|
+
"TraceLevel",
|
|
74
|
+
"TraceInfo",
|
|
75
|
+
# Utils
|
|
76
|
+
"PromptManager",
|
|
77
|
+
]
|
|
@@ -2,10 +2,13 @@ from .document import Document
|
|
|
2
2
|
from .document_list import DocumentList
|
|
3
3
|
from .flow_document import FlowDocument
|
|
4
4
|
from .task_document import TaskDocument
|
|
5
|
+
from .utils import canonical_name_key, sanitize_url
|
|
5
6
|
|
|
6
7
|
__all__ = [
|
|
7
8
|
"Document",
|
|
8
9
|
"DocumentList",
|
|
9
10
|
"FlowDocument",
|
|
10
11
|
"TaskDocument",
|
|
12
|
+
"canonical_name_key",
|
|
13
|
+
"sanitize_url",
|
|
11
14
|
]
|
|
@@ -26,12 +26,27 @@ TModel = TypeVar("TModel", bound=BaseModel)
|
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
class Document(BaseModel, ABC):
|
|
29
|
-
"""Abstract base class for all documents
|
|
29
|
+
"""Abstract base class for all documents.
|
|
30
|
+
|
|
31
|
+
Warning: Document subclasses should NOT start with 'Test' prefix as this
|
|
32
|
+
causes conflicts with pytest test discovery. Classes with 'Test' prefix
|
|
33
|
+
will be rejected at definition time.
|
|
34
|
+
"""
|
|
30
35
|
|
|
31
36
|
MAX_CONTENT_SIZE: ClassVar[int] = 25 * 1024 * 1024 # 25MB default
|
|
32
37
|
DESCRIPTION_EXTENSION: ClassVar[str] = ".description.md"
|
|
33
38
|
MARKDOWN_LIST_SEPARATOR: ClassVar[str] = "\n\n---\n\n"
|
|
34
39
|
|
|
40
|
+
def __init_subclass__(cls, **kwargs: Any) -> None:
|
|
41
|
+
"""Validate subclass names to prevent pytest conflicts."""
|
|
42
|
+
super().__init_subclass__(**kwargs)
|
|
43
|
+
if cls.__name__.startswith("Test"):
|
|
44
|
+
raise TypeError(
|
|
45
|
+
f"Document subclass '{cls.__name__}' cannot start with 'Test' prefix. "
|
|
46
|
+
"This causes conflicts with pytest test discovery. "
|
|
47
|
+
"Please use a different name (e.g., 'SampleDocument', 'ExampleDocument')."
|
|
48
|
+
)
|
|
49
|
+
|
|
35
50
|
def __init__(self, **data: Any) -> None:
|
|
36
51
|
"""Prevent direct instantiation of abstract Document class."""
|
|
37
52
|
if type(self) is Document:
|
|
@@ -14,6 +14,27 @@ class FlowConfig(ABC):
|
|
|
14
14
|
INPUT_DOCUMENT_TYPES: ClassVar[list[type[FlowDocument]]]
|
|
15
15
|
OUTPUT_DOCUMENT_TYPE: ClassVar[type[FlowDocument]]
|
|
16
16
|
|
|
17
|
+
def __init_subclass__(cls, **kwargs):
|
|
18
|
+
"""Validate that OUTPUT_DOCUMENT_TYPE is not in INPUT_DOCUMENT_TYPES."""
|
|
19
|
+
super().__init_subclass__(**kwargs)
|
|
20
|
+
|
|
21
|
+
# Skip validation for the abstract base class itself
|
|
22
|
+
if cls.__name__ == "FlowConfig":
|
|
23
|
+
return
|
|
24
|
+
|
|
25
|
+
# Ensure required attributes are defined
|
|
26
|
+
if not hasattr(cls, "INPUT_DOCUMENT_TYPES"):
|
|
27
|
+
raise TypeError(f"FlowConfig {cls.__name__} must define INPUT_DOCUMENT_TYPES")
|
|
28
|
+
if not hasattr(cls, "OUTPUT_DOCUMENT_TYPE"):
|
|
29
|
+
raise TypeError(f"FlowConfig {cls.__name__} must define OUTPUT_DOCUMENT_TYPE")
|
|
30
|
+
|
|
31
|
+
# Validate that output type is not in input types
|
|
32
|
+
if cls.OUTPUT_DOCUMENT_TYPE in cls.INPUT_DOCUMENT_TYPES:
|
|
33
|
+
raise TypeError(
|
|
34
|
+
f"FlowConfig {cls.__name__}: OUTPUT_DOCUMENT_TYPE "
|
|
35
|
+
f"({cls.OUTPUT_DOCUMENT_TYPE.__name__}) cannot be in INPUT_DOCUMENT_TYPES"
|
|
36
|
+
)
|
|
37
|
+
|
|
17
38
|
@classmethod
|
|
18
39
|
def get_input_document_types(cls) -> list[type[FlowDocument]]:
|
|
19
40
|
"""
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from typing import TypeVar
|
|
2
|
+
|
|
3
|
+
from pydantic import Field
|
|
4
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
5
|
+
|
|
6
|
+
from ai_pipeline_core.llm import ModelName
|
|
7
|
+
|
|
8
|
+
T = TypeVar("T", bound="FlowOptions")
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class FlowOptions(BaseSettings):
|
|
12
|
+
"""Base configuration for AI Pipeline flows."""
|
|
13
|
+
|
|
14
|
+
core_model: ModelName | str = Field(
|
|
15
|
+
default="gpt-5",
|
|
16
|
+
description="Primary model for complex analysis and generation tasks.",
|
|
17
|
+
)
|
|
18
|
+
small_model: ModelName | str = Field(
|
|
19
|
+
default="gpt-5-mini",
|
|
20
|
+
description="Fast, cost-effective model for simple tasks and orchestration.",
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
model_config = SettingsConfigDict(frozen=True, extra="ignore")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
__all__ = ["FlowOptions"]
|
|
@@ -118,11 +118,13 @@ async def _generate_with_retry(
|
|
|
118
118
|
span.set_attributes(response.get_laminar_metadata())
|
|
119
119
|
Laminar.set_span_output(response.content)
|
|
120
120
|
if not response.content:
|
|
121
|
-
# disable cache in case of empty response
|
|
122
|
-
completion_kwargs["extra_body"]["cache"] = {"no-cache": True}
|
|
123
121
|
raise ValueError(f"Model {model} returned an empty response.")
|
|
124
122
|
return response
|
|
125
123
|
except (asyncio.TimeoutError, ValueError, Exception) as e:
|
|
124
|
+
if not isinstance(e, asyncio.TimeoutError):
|
|
125
|
+
# disable cache if it's not a timeout because it may cause an error
|
|
126
|
+
completion_kwargs["extra_body"]["cache"] = {"no-cache": True}
|
|
127
|
+
|
|
126
128
|
logger.warning(
|
|
127
129
|
"LLM generation failed (attempt %d/%d): %s",
|
|
128
130
|
attempt + 1,
|
|
@@ -167,7 +169,7 @@ T = TypeVar("T", bound=BaseModel)
|
|
|
167
169
|
|
|
168
170
|
@trace(ignore_inputs=["context"])
|
|
169
171
|
async def generate_structured(
|
|
170
|
-
model: ModelName,
|
|
172
|
+
model: ModelName | str,
|
|
171
173
|
response_format: type[T],
|
|
172
174
|
*,
|
|
173
175
|
context: AIMessages = AIMessages(),
|
|
@@ -4,6 +4,7 @@ from pydantic import BaseModel
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class ModelOptions(BaseModel):
|
|
7
|
+
temperature: float | None = None
|
|
7
8
|
system_prompt: str | None = None
|
|
8
9
|
search_context_size: Literal["low", "medium", "high"] | None = None
|
|
9
10
|
reasoning_effort: Literal["low", "medium", "high"] | None = None
|
|
@@ -21,6 +22,9 @@ class ModelOptions(BaseModel):
|
|
|
21
22
|
"extra_body": {},
|
|
22
23
|
}
|
|
23
24
|
|
|
25
|
+
if self.temperature:
|
|
26
|
+
kwargs["temperature"] = self.temperature
|
|
27
|
+
|
|
24
28
|
if self.max_completion_tokens:
|
|
25
29
|
kwargs["max_completion_tokens"] = self.max_completion_tokens
|
|
26
30
|
|