ai-pipeline-core 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +70 -144
- ai_pipeline_core/deployment/__init__.py +6 -18
- ai_pipeline_core/deployment/base.py +392 -212
- ai_pipeline_core/deployment/contract.py +6 -10
- ai_pipeline_core/{utils → deployment}/deploy.py +50 -69
- ai_pipeline_core/deployment/helpers.py +16 -17
- ai_pipeline_core/{progress.py → deployment/progress.py} +23 -24
- ai_pipeline_core/{utils/remote_deployment.py → deployment/remote.py} +11 -14
- ai_pipeline_core/docs_generator/__init__.py +54 -0
- ai_pipeline_core/docs_generator/__main__.py +5 -0
- ai_pipeline_core/docs_generator/cli.py +196 -0
- ai_pipeline_core/docs_generator/extractor.py +324 -0
- ai_pipeline_core/docs_generator/guide_builder.py +644 -0
- ai_pipeline_core/docs_generator/trimmer.py +35 -0
- ai_pipeline_core/docs_generator/validator.py +114 -0
- ai_pipeline_core/document_store/__init__.py +13 -0
- ai_pipeline_core/document_store/_summary.py +9 -0
- ai_pipeline_core/document_store/_summary_worker.py +170 -0
- ai_pipeline_core/document_store/clickhouse.py +492 -0
- ai_pipeline_core/document_store/factory.py +38 -0
- ai_pipeline_core/document_store/local.py +312 -0
- ai_pipeline_core/document_store/memory.py +85 -0
- ai_pipeline_core/document_store/protocol.py +68 -0
- ai_pipeline_core/documents/__init__.py +12 -14
- ai_pipeline_core/documents/_context_vars.py +85 -0
- ai_pipeline_core/documents/_hashing.py +52 -0
- ai_pipeline_core/documents/attachment.py +85 -0
- ai_pipeline_core/documents/context.py +128 -0
- ai_pipeline_core/documents/document.py +318 -1434
- ai_pipeline_core/documents/mime_type.py +37 -82
- ai_pipeline_core/documents/utils.py +4 -12
- ai_pipeline_core/exceptions.py +10 -62
- ai_pipeline_core/images/__init__.py +32 -85
- ai_pipeline_core/images/_processing.py +5 -11
- ai_pipeline_core/llm/__init__.py +6 -4
- ai_pipeline_core/llm/ai_messages.py +106 -81
- ai_pipeline_core/llm/client.py +267 -158
- ai_pipeline_core/llm/model_options.py +12 -84
- ai_pipeline_core/llm/model_response.py +53 -99
- ai_pipeline_core/llm/model_types.py +8 -23
- ai_pipeline_core/logging/__init__.py +2 -7
- ai_pipeline_core/logging/logging.yml +1 -1
- ai_pipeline_core/logging/logging_config.py +27 -37
- ai_pipeline_core/logging/logging_mixin.py +15 -41
- ai_pipeline_core/observability/__init__.py +32 -0
- ai_pipeline_core/observability/_debug/__init__.py +30 -0
- ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
- ai_pipeline_core/{debug/config.py → observability/_debug/_config.py} +11 -7
- ai_pipeline_core/{debug/content.py → observability/_debug/_content.py} +134 -75
- ai_pipeline_core/{debug/processor.py → observability/_debug/_processor.py} +16 -17
- ai_pipeline_core/{debug/summary.py → observability/_debug/_summary.py} +113 -37
- ai_pipeline_core/observability/_debug/_types.py +75 -0
- ai_pipeline_core/{debug/writer.py → observability/_debug/_writer.py} +126 -196
- ai_pipeline_core/observability/_document_tracking.py +146 -0
- ai_pipeline_core/observability/_initialization.py +194 -0
- ai_pipeline_core/observability/_logging_bridge.py +57 -0
- ai_pipeline_core/observability/_summary.py +81 -0
- ai_pipeline_core/observability/_tracking/__init__.py +6 -0
- ai_pipeline_core/observability/_tracking/_client.py +178 -0
- ai_pipeline_core/observability/_tracking/_internal.py +28 -0
- ai_pipeline_core/observability/_tracking/_models.py +138 -0
- ai_pipeline_core/observability/_tracking/_processor.py +158 -0
- ai_pipeline_core/observability/_tracking/_service.py +311 -0
- ai_pipeline_core/observability/_tracking/_writer.py +229 -0
- ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -335
- ai_pipeline_core/pipeline/__init__.py +10 -0
- ai_pipeline_core/pipeline/decorators.py +915 -0
- ai_pipeline_core/pipeline/options.py +16 -0
- ai_pipeline_core/prompt_manager.py +16 -102
- ai_pipeline_core/settings.py +26 -31
- ai_pipeline_core/testing.py +9 -0
- ai_pipeline_core-0.4.0.dist-info/METADATA +807 -0
- ai_pipeline_core-0.4.0.dist-info/RECORD +76 -0
- ai_pipeline_core/debug/__init__.py +0 -26
- ai_pipeline_core/documents/document_list.py +0 -420
- ai_pipeline_core/documents/flow_document.py +0 -112
- ai_pipeline_core/documents/task_document.py +0 -117
- ai_pipeline_core/documents/temporary_document.py +0 -74
- ai_pipeline_core/flow/__init__.py +0 -9
- ai_pipeline_core/flow/config.py +0 -494
- ai_pipeline_core/flow/options.py +0 -75
- ai_pipeline_core/pipeline.py +0 -718
- ai_pipeline_core/prefect.py +0 -63
- ai_pipeline_core/prompt_builder/__init__.py +0 -5
- ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +0 -23
- ai_pipeline_core/prompt_builder/global_cache.py +0 -78
- ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +0 -6
- ai_pipeline_core/prompt_builder/prompt_builder.py +0 -253
- ai_pipeline_core/prompt_builder/system_prompt.jinja2 +0 -41
- ai_pipeline_core/storage/__init__.py +0 -8
- ai_pipeline_core/storage/storage.py +0 -628
- ai_pipeline_core/utils/__init__.py +0 -8
- ai_pipeline_core-0.3.3.dist-info/METADATA +0 -569
- ai_pipeline_core-0.3.3.dist-info/RECORD +0 -57
- {ai_pipeline_core-0.3.3.dist-info → ai_pipeline_core-0.4.0.dist-info}/WHEEL +0 -0
- {ai_pipeline_core-0.3.3.dist-info → ai_pipeline_core-0.4.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,569 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: ai-pipeline-core
|
|
3
|
-
Version: 0.3.3
|
|
4
|
-
Summary: Core utilities for AI-powered processing pipelines using prefect
|
|
5
|
-
Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
|
|
6
|
-
Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
|
|
7
|
-
Project-URL: Issues, https://github.com/bbarwik/ai-pipeline-core/issues
|
|
8
|
-
Author-email: bbarwik <bbarwik@gmail.com>
|
|
9
|
-
License: MIT
|
|
10
|
-
License-File: LICENSE
|
|
11
|
-
Classifier: Development Status :: 4 - Beta
|
|
12
|
-
Classifier: Intended Audience :: Developers
|
|
13
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
-
Classifier: Programming Language :: Python :: 3
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
-
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
18
|
-
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
|
-
Classifier: Typing :: Typed
|
|
20
|
-
Requires-Python: >=3.12
|
|
21
|
-
Requires-Dist: httpx>=0.28.1
|
|
22
|
-
Requires-Dist: jinja2>=3.1.6
|
|
23
|
-
Requires-Dist: lmnr>=0.7.18
|
|
24
|
-
Requires-Dist: openai>=1.109.1
|
|
25
|
-
Requires-Dist: pillow>=10.0.0
|
|
26
|
-
Requires-Dist: prefect-gcp[cloud-storage]>=0.6.10
|
|
27
|
-
Requires-Dist: prefect>=3.4.21
|
|
28
|
-
Requires-Dist: pydantic-settings>=2.10.1
|
|
29
|
-
Requires-Dist: pydantic>=2.11.9
|
|
30
|
-
Requires-Dist: python-magic>=0.4.27
|
|
31
|
-
Requires-Dist: ruamel-yaml>=0.18.14
|
|
32
|
-
Requires-Dist: tiktoken>=0.12.0
|
|
33
|
-
Provides-Extra: dev
|
|
34
|
-
Requires-Dist: basedpyright>=1.31.2; extra == 'dev'
|
|
35
|
-
Requires-Dist: bump2version>=1.0.1; extra == 'dev'
|
|
36
|
-
Requires-Dist: interrogate>=1.5.0; extra == 'dev'
|
|
37
|
-
Requires-Dist: pre-commit>=4.3.0; extra == 'dev'
|
|
38
|
-
Requires-Dist: pydoc-markdown[jinja]>=4.8.0; extra == 'dev'
|
|
39
|
-
Requires-Dist: pytest-asyncio>=1.1.0; extra == 'dev'
|
|
40
|
-
Requires-Dist: pytest-cov>=5.0.0; extra == 'dev'
|
|
41
|
-
Requires-Dist: pytest-mock>=3.14.0; extra == 'dev'
|
|
42
|
-
Requires-Dist: pytest-xdist>=3.8.0; extra == 'dev'
|
|
43
|
-
Requires-Dist: pytest>=8.4.1; extra == 'dev'
|
|
44
|
-
Requires-Dist: ruff>=0.14.1; extra == 'dev'
|
|
45
|
-
Description-Content-Type: text/markdown
|
|
46
|
-
|
|
47
|
-
# AI Pipeline Core
|
|
48
|
-
|
|
49
|
-
A high-performance async framework for building type-safe AI pipelines with LLMs, document processing, and workflow orchestration.
|
|
50
|
-
|
|
51
|
-
[](https://www.python.org/downloads/)
|
|
52
|
-
[](https://opensource.org/licenses/MIT)
|
|
53
|
-
[](https://github.com/astral-sh/ruff)
|
|
54
|
-
[](https://github.com/DetachHead/basedpyright)
|
|
55
|
-
|
|
56
|
-
## Overview
|
|
57
|
-
|
|
58
|
-
AI Pipeline Core is a production-ready framework that combines document processing, LLM integration, and workflow orchestration into a unified system. Built with strong typing (Pydantic), automatic retries, cost tracking, and distributed tracing, it enforces best practices while maintaining high performance through fully async operations.
|
|
59
|
-
|
|
60
|
-
### Key Features
|
|
61
|
-
|
|
62
|
-
- **Document Processing**: Type-safe handling of text, JSON, YAML, PDFs, and images with automatic MIME type detection and provenance tracking
|
|
63
|
-
- **LLM Integration**: Unified interface to any model via LiteLLM proxy with configurable context caching
|
|
64
|
-
- **Structured Output**: Type-safe generation with Pydantic model validation
|
|
65
|
-
- **Workflow Orchestration**: Prefect-based flows and tasks with automatic retries
|
|
66
|
-
- **Observability**: Built-in distributed tracing via Laminar (LMNR) with cost tracking for debugging and monitoring
|
|
67
|
-
- **Deployment**: Unified pipeline execution for local, CLI, and production environments
|
|
68
|
-
|
|
69
|
-
## Installation
|
|
70
|
-
|
|
71
|
-
```bash
|
|
72
|
-
pip install ai-pipeline-core
|
|
73
|
-
```
|
|
74
|
-
|
|
75
|
-
### Requirements
|
|
76
|
-
|
|
77
|
-
- Python 3.12 or higher
|
|
78
|
-
- Linux/macOS (Windows via WSL2)
|
|
79
|
-
|
|
80
|
-
### Development Installation
|
|
81
|
-
|
|
82
|
-
```bash
|
|
83
|
-
git clone https://github.com/bbarwik/ai-pipeline-core.git
|
|
84
|
-
cd ai-pipeline-core
|
|
85
|
-
pip install -e ".[dev]"
|
|
86
|
-
make install-dev # Installs pre-commit hooks
|
|
87
|
-
```
|
|
88
|
-
|
|
89
|
-
## Quick Start
|
|
90
|
-
|
|
91
|
-
### Basic Pipeline
|
|
92
|
-
|
|
93
|
-
```python
|
|
94
|
-
from ai_pipeline_core import (
|
|
95
|
-
pipeline_flow,
|
|
96
|
-
FlowDocument,
|
|
97
|
-
DocumentList,
|
|
98
|
-
FlowOptions,
|
|
99
|
-
FlowConfig,
|
|
100
|
-
llm,
|
|
101
|
-
AIMessages
|
|
102
|
-
)
|
|
103
|
-
|
|
104
|
-
# Define document types
|
|
105
|
-
class InputDoc(FlowDocument):
|
|
106
|
-
"""Input document for processing."""
|
|
107
|
-
|
|
108
|
-
class OutputDoc(FlowDocument):
|
|
109
|
-
"""Analysis result document."""
|
|
110
|
-
|
|
111
|
-
# Define flow configuration
|
|
112
|
-
class AnalysisConfig(FlowConfig):
|
|
113
|
-
INPUT_DOCUMENT_TYPES = [InputDoc]
|
|
114
|
-
OUTPUT_DOCUMENT_TYPE = OutputDoc
|
|
115
|
-
|
|
116
|
-
# Create pipeline flow with required config
|
|
117
|
-
@pipeline_flow(config=AnalysisConfig)
|
|
118
|
-
async def analyze_flow(
|
|
119
|
-
project_name: str,
|
|
120
|
-
documents: DocumentList,
|
|
121
|
-
flow_options: FlowOptions
|
|
122
|
-
) -> DocumentList:
|
|
123
|
-
# Process documents
|
|
124
|
-
outputs = []
|
|
125
|
-
for doc in documents:
|
|
126
|
-
# Use AIMessages for LLM interaction
|
|
127
|
-
response = await llm.generate(
|
|
128
|
-
model="gpt-5.1",
|
|
129
|
-
messages=AIMessages([doc])
|
|
130
|
-
)
|
|
131
|
-
|
|
132
|
-
output = OutputDoc.create(
|
|
133
|
-
name=f"analysis_{doc.name}",
|
|
134
|
-
content=response.content
|
|
135
|
-
)
|
|
136
|
-
outputs.append(output)
|
|
137
|
-
|
|
138
|
-
# RECOMMENDED: Always validate output
|
|
139
|
-
return AnalysisConfig.create_and_validate_output(outputs)
|
|
140
|
-
```
|
|
141
|
-
|
|
142
|
-
### Structured Output
|
|
143
|
-
|
|
144
|
-
```python
|
|
145
|
-
from pydantic import BaseModel
|
|
146
|
-
from ai_pipeline_core import llm
|
|
147
|
-
|
|
148
|
-
class Analysis(BaseModel):
|
|
149
|
-
summary: str
|
|
150
|
-
sentiment: float
|
|
151
|
-
key_points: list[str]
|
|
152
|
-
|
|
153
|
-
# Generate structured output
|
|
154
|
-
response = await llm.generate_structured(
|
|
155
|
-
model="gpt-5.1",
|
|
156
|
-
response_format=Analysis,
|
|
157
|
-
messages="Analyze this product review: ..."
|
|
158
|
-
)
|
|
159
|
-
|
|
160
|
-
# Access parsed result with type safety
|
|
161
|
-
analysis = response.parsed
|
|
162
|
-
print(f"Sentiment: {analysis.sentiment}")
|
|
163
|
-
for point in analysis.key_points:
|
|
164
|
-
print(f"- {point}")
|
|
165
|
-
```
|
|
166
|
-
|
|
167
|
-
### Document Handling
|
|
168
|
-
|
|
169
|
-
```python
|
|
170
|
-
from ai_pipeline_core import FlowDocument, TemporaryDocument
|
|
171
|
-
|
|
172
|
-
# Create documents with automatic conversion
|
|
173
|
-
doc = MyDocument.create(
|
|
174
|
-
name="data.json",
|
|
175
|
-
content={"key": "value"} # Automatically converted to JSON bytes
|
|
176
|
-
)
|
|
177
|
-
|
|
178
|
-
# Parse back to original type
|
|
179
|
-
data = doc.parse(dict) # Returns {"key": "value"}
|
|
180
|
-
|
|
181
|
-
# Document provenance tracking
|
|
182
|
-
doc_with_sources = MyDocument.create(
|
|
183
|
-
name="derived.json",
|
|
184
|
-
content={"result": "processed"},
|
|
185
|
-
sources=[source_doc.sha256, "https://api.example.com/data"]
|
|
186
|
-
)
|
|
187
|
-
|
|
188
|
-
# Check provenance
|
|
189
|
-
for hash in doc_with_sources.get_source_documents():
|
|
190
|
-
print(f"Derived from document: {hash}")
|
|
191
|
-
for ref in doc_with_sources.get_source_references():
|
|
192
|
-
print(f"External source: {ref}")
|
|
193
|
-
|
|
194
|
-
# Temporary documents (never persisted)
|
|
195
|
-
temp = TemporaryDocument.create(
|
|
196
|
-
name="api_response.json",
|
|
197
|
-
content={"status": "ok"}
|
|
198
|
-
)
|
|
199
|
-
```
|
|
200
|
-
|
|
201
|
-
## Core Concepts
|
|
202
|
-
|
|
203
|
-
### Documents
|
|
204
|
-
|
|
205
|
-
Documents are immutable Pydantic models that wrap binary content with metadata:
|
|
206
|
-
|
|
207
|
-
- **FlowDocument**: Persists across flow runs, saved to filesystem
|
|
208
|
-
- **TaskDocument**: Temporary within task execution, not persisted
|
|
209
|
-
- **TemporaryDocument**: Never persisted, useful for sensitive data
|
|
210
|
-
|
|
211
|
-
```python
|
|
212
|
-
class MyDocument(FlowDocument):
|
|
213
|
-
"""Custom document type."""
|
|
214
|
-
|
|
215
|
-
# Use create() for automatic conversion
|
|
216
|
-
doc = MyDocument.create(
|
|
217
|
-
name="data.json",
|
|
218
|
-
content={"key": "value"} # Auto-converts to JSON
|
|
219
|
-
)
|
|
220
|
-
|
|
221
|
-
# Access content
|
|
222
|
-
if doc.is_text:
|
|
223
|
-
print(doc.text)
|
|
224
|
-
|
|
225
|
-
# Parse structured data
|
|
226
|
-
data = doc.as_json() # or as_yaml(), as_pydantic_model()
|
|
227
|
-
|
|
228
|
-
# Convert between document types
|
|
229
|
-
task_doc = flow_doc.model_convert(TaskDocument) # Convert FlowDocument to TaskDocument
|
|
230
|
-
new_doc = doc.model_convert(OtherDocType, content={"new": "data"}) # With content update
|
|
231
|
-
|
|
232
|
-
# Enhanced filtering
|
|
233
|
-
filtered = documents.filter_by([Doc1, Doc2, Doc3]) # Multiple types
|
|
234
|
-
named = documents.filter_by(["file1.txt", "file2.txt"]) # Multiple names
|
|
235
|
-
|
|
236
|
-
# Immutable collections
|
|
237
|
-
frozen_docs = DocumentList(docs, frozen=True) # Immutable document list
|
|
238
|
-
frozen_msgs = AIMessages(messages, frozen=True) # Immutable message list
|
|
239
|
-
```
|
|
240
|
-
|
|
241
|
-
### LLM Integration
|
|
242
|
-
|
|
243
|
-
The framework provides a unified interface for LLM interactions with smart caching:
|
|
244
|
-
|
|
245
|
-
```python
|
|
246
|
-
from ai_pipeline_core import llm, AIMessages, ModelOptions
|
|
247
|
-
|
|
248
|
-
# Simple generation
|
|
249
|
-
response = await llm.generate(
|
|
250
|
-
model="gpt-5.1",
|
|
251
|
-
messages="Explain quantum computing"
|
|
252
|
-
)
|
|
253
|
-
print(response.content)
|
|
254
|
-
|
|
255
|
-
# With context caching (saves 50-90% tokens)
|
|
256
|
-
static_context = AIMessages([large_document])
|
|
257
|
-
|
|
258
|
-
# First call: caches context
|
|
259
|
-
r1 = await llm.generate(
|
|
260
|
-
model="gpt-5.1",
|
|
261
|
-
context=static_context, # Cached for 120 seconds by default
|
|
262
|
-
messages="Summarize" # Dynamic query
|
|
263
|
-
)
|
|
264
|
-
|
|
265
|
-
# Second call: reuses cache
|
|
266
|
-
r2 = await llm.generate(
|
|
267
|
-
model="gpt-5.1",
|
|
268
|
-
context=static_context, # Reused from cache!
|
|
269
|
-
messages="Key points?" # Different query
|
|
270
|
-
)
|
|
271
|
-
|
|
272
|
-
# Custom cache TTL
|
|
273
|
-
response = await llm.generate(
|
|
274
|
-
model="gpt-5.1",
|
|
275
|
-
context=static_context,
|
|
276
|
-
messages="Analyze",
|
|
277
|
-
options=ModelOptions(cache_ttl="300s") # Cache for 5 minutes
|
|
278
|
-
)
|
|
279
|
-
|
|
280
|
-
# Disable caching for dynamic contexts
|
|
281
|
-
response = await llm.generate(
|
|
282
|
-
model="gpt-5.1",
|
|
283
|
-
context=dynamic_context,
|
|
284
|
-
messages="Process",
|
|
285
|
-
options=ModelOptions(cache_ttl=None) # No caching
|
|
286
|
-
)
|
|
287
|
-
```
|
|
288
|
-
|
|
289
|
-
### Flow Configuration
|
|
290
|
-
|
|
291
|
-
Type-safe flow configuration ensures proper document flow:
|
|
292
|
-
|
|
293
|
-
```python
|
|
294
|
-
from ai_pipeline_core import FlowConfig
|
|
295
|
-
|
|
296
|
-
class ProcessingConfig(FlowConfig):
|
|
297
|
-
INPUT_DOCUMENT_TYPES = [RawDataDocument]
|
|
298
|
-
OUTPUT_DOCUMENT_TYPE = ProcessedDocument # Must be different!
|
|
299
|
-
|
|
300
|
-
# Use in flows for validation
|
|
301
|
-
@pipeline_flow(config=ProcessingConfig)
|
|
302
|
-
async def process(
|
|
303
|
-
project_name: str,
|
|
304
|
-
documents: DocumentList,
|
|
305
|
-
flow_options: FlowOptions
|
|
306
|
-
) -> DocumentList:
|
|
307
|
-
# ... processing logic ...
|
|
308
|
-
return ProcessingConfig.create_and_validate_output(outputs)
|
|
309
|
-
```
|
|
310
|
-
|
|
311
|
-
### Pipeline Decorators
|
|
312
|
-
|
|
313
|
-
Enhanced decorators with built-in tracing and monitoring:
|
|
314
|
-
|
|
315
|
-
```python
|
|
316
|
-
from ai_pipeline_core import pipeline_flow, pipeline_task, set_trace_cost
|
|
317
|
-
|
|
318
|
-
@pipeline_task # Automatic retry, tracing, and monitoring
|
|
319
|
-
async def process_chunk(data: str) -> str:
|
|
320
|
-
result = await transform(data)
|
|
321
|
-
set_trace_cost(0.05) # Track costs
|
|
322
|
-
return result
|
|
323
|
-
|
|
324
|
-
@pipeline_flow(
|
|
325
|
-
config=MyFlowConfig,
|
|
326
|
-
trace_trim_documents=True # Trim large documents in traces
|
|
327
|
-
)
|
|
328
|
-
async def main_flow(
|
|
329
|
-
project_name: str,
|
|
330
|
-
documents: DocumentList,
|
|
331
|
-
flow_options: FlowOptions
|
|
332
|
-
) -> DocumentList:
|
|
333
|
-
# Your pipeline logic
|
|
334
|
-
# Large documents are automatically trimmed to 100 chars in traces
|
|
335
|
-
# for better observability without overwhelming the tracing UI
|
|
336
|
-
return DocumentList(results)
|
|
337
|
-
```
|
|
338
|
-
|
|
339
|
-
### Local Trace Debugging
|
|
340
|
-
|
|
341
|
-
Save all trace spans to the local filesystem for LLM-assisted debugging:
|
|
342
|
-
|
|
343
|
-
```bash
|
|
344
|
-
export TRACE_DEBUG_PATH=/path/to/debug/output
|
|
345
|
-
```
|
|
346
|
-
|
|
347
|
-
This creates a hierarchical directory structure that mirrors the execution flow with automatic deduplication:
|
|
348
|
-
|
|
349
|
-
```
|
|
350
|
-
20260128_152932_abc12345_my_flow/
|
|
351
|
-
├── _trace.yaml # Trace metadata
|
|
352
|
-
├── _index.yaml # Span ID → path mapping
|
|
353
|
-
├── _summary.md # Unified summary for human inspection and LLM debugging
|
|
354
|
-
├── artifacts/ # Deduplicated content storage
|
|
355
|
-
│ └── sha256/
|
|
356
|
-
│ └── ab/cd/ # Sharded by hash prefix
|
|
357
|
-
│ └── abcdef...1234.txt # Large content (>10KB)
|
|
358
|
-
└── 0001_my_flow/ # Root span (numbered for execution order)
|
|
359
|
-
├── _span.yaml # Span metadata (timing, status, I/O refs)
|
|
360
|
-
├── input.yaml # Structured inputs (inline or refs)
|
|
361
|
-
├── output.yaml # Structured outputs (inline or refs)
|
|
362
|
-
├── 0002_task_1/ # Child spans nested inside parent
|
|
363
|
-
│ ├── _span.yaml
|
|
364
|
-
│ ├── input.yaml
|
|
365
|
-
│ ├── output.yaml
|
|
366
|
-
│ └── 0003_llm_call/
|
|
367
|
-
│ ├── _span.yaml
|
|
368
|
-
│ ├── input.yaml # LLM messages with inline/external content
|
|
369
|
-
│ └── output.yaml
|
|
370
|
-
└── 0004_task_2/
|
|
371
|
-
└── ...
|
|
372
|
-
```
|
|
373
|
-
|
|
374
|
-
**Key Features:**
|
|
375
|
-
- **Automatic Deduplication**: Identical content (e.g., system prompts) stored once in `artifacts/`
|
|
376
|
-
- **Smart Externalization**: Large content (>10KB) externalized with 2KB inline previews
|
|
377
|
-
- **AI-Friendly**: Files capped at 50KB for easy LLM processing
|
|
378
|
-
- **Lossless**: Full content reconstruction via `content_ref` pointers
|
|
379
|
-
|
|
380
|
-
Example `input.yaml` with externalization:
|
|
381
|
-
```yaml
|
|
382
|
-
format_version: 3
|
|
383
|
-
type: llm_messages
|
|
384
|
-
messages:
|
|
385
|
-
- role: system
|
|
386
|
-
parts:
|
|
387
|
-
- type: text
|
|
388
|
-
size_bytes: 28500
|
|
389
|
-
content_ref: # Large content → artifact
|
|
390
|
-
hash: sha256:a1b2c3d4...
|
|
391
|
-
path: artifacts/sha256/a1/b2/a1b2c3d4...txt
|
|
392
|
-
excerpt: "You are a helpful assistant...\n[TRUNCATED]"
|
|
393
|
-
- role: user
|
|
394
|
-
parts:
|
|
395
|
-
- type: text
|
|
396
|
-
content: "Hello!" # Small content stays inline
|
|
397
|
-
```
|
|
398
|
-
|
|
399
|
-
Run `tree` on the output directory to visualize the entire execution hierarchy. Feed `_summary.md` to an LLM for debugging assistance - it combines high-level overview with detailed navigation for comprehensive trace analysis.
|
|
400
|
-
|
|
401
|
-
## Configuration
|
|
402
|
-
|
|
403
|
-
### Environment Variables
|
|
404
|
-
|
|
405
|
-
```bash
|
|
406
|
-
# LLM Configuration (via LiteLLM proxy)
|
|
407
|
-
OPENAI_BASE_URL=http://localhost:4000
|
|
408
|
-
OPENAI_API_KEY=your-api-key
|
|
409
|
-
|
|
410
|
-
# Optional: Observability
|
|
411
|
-
LMNR_PROJECT_API_KEY=your-lmnr-key
|
|
412
|
-
LMNR_DEBUG=true # Enable debug traces
|
|
413
|
-
|
|
414
|
-
# Optional: Local Trace Debugging
|
|
415
|
-
TRACE_DEBUG_PATH=/path/to/trace/output # Save traces locally for LLM-assisted debugging
|
|
416
|
-
|
|
417
|
-
# Optional: Orchestration
|
|
418
|
-
PREFECT_API_URL=http://localhost:4200/api
|
|
419
|
-
PREFECT_API_KEY=your-prefect-key
|
|
420
|
-
|
|
421
|
-
# Optional: Storage (for Google Cloud Storage)
|
|
422
|
-
GCS_SERVICE_ACCOUNT_FILE=/path/to/service-account.json # GCS auth file
|
|
423
|
-
```
|
|
424
|
-
|
|
425
|
-
### Settings Management
|
|
426
|
-
|
|
427
|
-
Create custom settings by inheriting from the base Settings class:
|
|
428
|
-
|
|
429
|
-
```python
|
|
430
|
-
from ai_pipeline_core import Settings
|
|
431
|
-
|
|
432
|
-
class ProjectSettings(Settings):
|
|
433
|
-
"""Project-specific configuration."""
|
|
434
|
-
app_name: str = "my-app"
|
|
435
|
-
max_retries: int = 3
|
|
436
|
-
enable_cache: bool = True
|
|
437
|
-
|
|
438
|
-
# Create singleton instance
|
|
439
|
-
settings = ProjectSettings()
|
|
440
|
-
|
|
441
|
-
# Access configuration
|
|
442
|
-
print(settings.openai_base_url)
|
|
443
|
-
print(settings.app_name)
|
|
444
|
-
```
|
|
445
|
-
|
|
446
|
-
## Best Practices
|
|
447
|
-
|
|
448
|
-
### Framework Rules (90% Use Cases)
|
|
449
|
-
|
|
450
|
-
1. **Decorators**: Use `@pipeline_task` WITHOUT parameters, `@pipeline_flow` WITH config
|
|
451
|
-
2. **Logging**: Use `get_pipeline_logger(__name__)` - NEVER `print()` or `logging` module
|
|
452
|
-
3. **LLM calls**: Use `AIMessages` or `str`. Wrap Documents in `AIMessages`
|
|
453
|
-
4. **Options**: Omit `ModelOptions` unless specifically needed (defaults are optimal)
|
|
454
|
-
5. **Documents**: Create with just `name` and `content` - skip `description`
|
|
455
|
-
6. **FlowConfig**: `OUTPUT_DOCUMENT_TYPE` must differ from all `INPUT_DOCUMENT_TYPES`
|
|
456
|
-
7. **Initialization**: `PromptManager` and logger at module scope, not in functions
|
|
457
|
-
8. **DocumentList**: Use default constructor - no validation flags needed
|
|
458
|
-
9. **setup_logging()**: Only in application `main()`, never at import time
|
|
459
|
-
|
|
460
|
-
### Import Convention
|
|
461
|
-
|
|
462
|
-
Always import from the top-level package:
|
|
463
|
-
|
|
464
|
-
```python
|
|
465
|
-
# CORRECT
|
|
466
|
-
from ai_pipeline_core import llm, pipeline_flow, FlowDocument
|
|
467
|
-
|
|
468
|
-
# WRONG - Never import from submodules
|
|
469
|
-
from ai_pipeline_core.llm import generate # NO!
|
|
470
|
-
from ai_pipeline_core.documents import FlowDocument # NO!
|
|
471
|
-
```
|
|
472
|
-
|
|
473
|
-
## Development
|
|
474
|
-
|
|
475
|
-
### Running Tests
|
|
476
|
-
|
|
477
|
-
```bash
|
|
478
|
-
make test # Run all tests
|
|
479
|
-
make test-cov # Run with coverage report
|
|
480
|
-
make test-showcase # Test showcase example
|
|
481
|
-
```
|
|
482
|
-
|
|
483
|
-
### Code Quality
|
|
484
|
-
|
|
485
|
-
```bash
|
|
486
|
-
make lint # Run linting
|
|
487
|
-
make format # Auto-format code
|
|
488
|
-
make typecheck # Type checking with basedpyright
|
|
489
|
-
```
|
|
490
|
-
|
|
491
|
-
### Building Documentation
|
|
492
|
-
|
|
493
|
-
```bash
|
|
494
|
-
make docs-build # Generate API.md
|
|
495
|
-
make docs-check # Verify documentation is up-to-date
|
|
496
|
-
```
|
|
497
|
-
|
|
498
|
-
## Examples
|
|
499
|
-
|
|
500
|
-
The `examples/` directory contains:
|
|
501
|
-
|
|
502
|
-
- `showcase.py` - Comprehensive example demonstrating all major features
|
|
503
|
-
- Run with: `cd examples && python showcase.py /path/to/documents`
|
|
504
|
-
|
|
505
|
-
## API Reference
|
|
506
|
-
|
|
507
|
-
See [API.md](API.md) for complete API documentation.
|
|
508
|
-
|
|
509
|
-
### Navigation Tips
|
|
510
|
-
|
|
511
|
-
For humans:
|
|
512
|
-
```bash
|
|
513
|
-
grep -n '^##' API.md # List all main sections
|
|
514
|
-
grep -n '^###' API.md # List all classes and functions
|
|
515
|
-
```
|
|
516
|
-
|
|
517
|
-
For AI assistants:
|
|
518
|
-
- Use pattern `^##` to find module sections
|
|
519
|
-
- Use pattern `^###` for classes and functions
|
|
520
|
-
- Use pattern `^####` for methods and properties
|
|
521
|
-
|
|
522
|
-
## Project Structure
|
|
523
|
-
|
|
524
|
-
```
|
|
525
|
-
ai-pipeline-core/
|
|
526
|
-
├── ai_pipeline_core/
|
|
527
|
-
│ ├── deployment/ # Pipeline deployment and execution
|
|
528
|
-
│ ├── documents/ # Document abstraction system
|
|
529
|
-
│ ├── flow/ # Flow configuration and options
|
|
530
|
-
│ ├── llm/ # LLM client and response handling
|
|
531
|
-
│ ├── logging/ # Logging infrastructure
|
|
532
|
-
│ ├── prompt_builder/ # Document-aware prompt construction
|
|
533
|
-
│ ├── pipeline.py # Pipeline decorators
|
|
534
|
-
│ ├── progress.py # Intra-flow progress tracking
|
|
535
|
-
│ ├── prompt_manager.py # Jinja2 template management
|
|
536
|
-
│ ├── settings.py # Configuration management
|
|
537
|
-
│ └── tracing.py # Distributed tracing
|
|
538
|
-
├── tests/ # Comprehensive test suite
|
|
539
|
-
├── examples/ # Usage examples
|
|
540
|
-
├── API.md # Complete API reference
|
|
541
|
-
└── pyproject.toml # Project configuration
|
|
542
|
-
```
|
|
543
|
-
|
|
544
|
-
## Contributing
|
|
545
|
-
|
|
546
|
-
1. Fork the repository
|
|
547
|
-
2. Create a feature branch (`git checkout -b feature/amazing-feature`)
|
|
548
|
-
3. Make changes following the project's style guide
|
|
549
|
-
4. Run tests and linting (`make test lint typecheck`)
|
|
550
|
-
5. Commit your changes
|
|
551
|
-
6. Push to the branch (`git push origin feature/amazing-feature`)
|
|
552
|
-
7. Open a Pull Request
|
|
553
|
-
|
|
554
|
-
## License
|
|
555
|
-
|
|
556
|
-
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
557
|
-
|
|
558
|
-
## Support
|
|
559
|
-
|
|
560
|
-
- **Issues**: [GitHub Issues](https://github.com/bbarwik/ai-pipeline-core/issues)
|
|
561
|
-
- **Discussions**: [GitHub Discussions](https://github.com/bbarwik/ai-pipeline-core/discussions)
|
|
562
|
-
- **Documentation**: [API Reference](API.md)
|
|
563
|
-
|
|
564
|
-
## Acknowledgments
|
|
565
|
-
|
|
566
|
-
- Built on [Prefect](https://www.prefect.io/) for workflow orchestration
|
|
567
|
-
- Uses [LiteLLM](https://github.com/BerriAI/litellm) for LLM provider abstraction
|
|
568
|
-
- Integrates [Laminar (LMNR)](https://www.lmnr.ai/) for observability
|
|
569
|
-
- Type checking with [Pydantic](https://pydantic.dev/) and [basedpyright](https://github.com/DetachHead/basedpyright)
|
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
ai_pipeline_core/__init__.py,sha256=2jzEQktQJp-A3bzDU-A7c9xntnY3x9I-1XwYcojYjYE,6452
|
|
2
|
-
ai_pipeline_core/exceptions.py,sha256=vx-XLTw2fJSPs-vwtXVYtqoQUcOc0JeI7UmHqRqQYWU,1569
|
|
3
|
-
ai_pipeline_core/pipeline.py,sha256=t9qH-V6umpKY5MhGuXFgUGfdzGyxzVlS0n9RoKLfnug,28704
|
|
4
|
-
ai_pipeline_core/prefect.py,sha256=91ZgLJHsDsRUW77CpNmkKxYs3RCJuucPM3pjKmNBeDg,2199
|
|
5
|
-
ai_pipeline_core/progress.py,sha256=Ppxk4OOm84Y0x3t-Y3CmHsL4PovQLNUxXMu24zRCD-Q,3621
|
|
6
|
-
ai_pipeline_core/prompt_manager.py,sha256=FAtb1yK7bGuAeuIJ523LOX9bd7TrcHG-TqZ7Lz4RJC0,12087
|
|
7
|
-
ai_pipeline_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
-
ai_pipeline_core/settings.py,sha256=IMrFaX0i-WIlaOA5O53ipNSta6KQVSFHc1aJXmS3nSo,5078
|
|
9
|
-
ai_pipeline_core/tracing.py,sha256=HJ_DJhCEk6W_u3skecjETMQVLyOmbuPcqcOuoMIJlPs,33194
|
|
10
|
-
ai_pipeline_core/debug/__init__.py,sha256=wOc9KotFqGYzBEtZUZ7ATfJf3dXWarYm6PXs6yW9uwE,756
|
|
11
|
-
ai_pipeline_core/debug/config.py,sha256=l5WC2xbd6PgC-CcuioZg696iva_MkqyZj4C9TFdwfMs,3205
|
|
12
|
-
ai_pipeline_core/debug/content.py,sha256=REtA1cJnOJy3OqaGud59B3Bug8cOJszm8w1GCqdAKJs,26696
|
|
13
|
-
ai_pipeline_core/debug/processor.py,sha256=Cvm1HKc6lKRm80Xx7WXi_Z8pWoKH6actVZvntP9Mons,3935
|
|
14
|
-
ai_pipeline_core/debug/summary.py,sha256=pzXC7QoFOBeen_XZ-AMFAVvaOtDuf28YB-WwCbsHYdQ,8017
|
|
15
|
-
ai_pipeline_core/debug/writer.py,sha256=IF5eyML10EBFBqCGqlVwcWKraFvTgfqbU8WJ_XG_RU4,33108
|
|
16
|
-
ai_pipeline_core/deployment/__init__.py,sha256=FN2HVoM80x2GJuNs7o4DnccB8HWWibgM1pJesB942CM,1259
|
|
17
|
-
ai_pipeline_core/deployment/base.py,sha256=JYf8XLFR73c0H24dr6atK7yUcoE0vLxbYZ8EkQpEwN4,24791
|
|
18
|
-
ai_pipeline_core/deployment/contract.py,sha256=0DKt5eqNE-grcITwMNq9CuBdo5WxdopEjDeQFzFZxhU,2225
|
|
19
|
-
ai_pipeline_core/deployment/helpers.py,sha256=3nRuCyABkUEDZiL0q9u19XHpjA4527B6rsxQNOGTohw,3460
|
|
20
|
-
ai_pipeline_core/documents/__init__.py,sha256=WHStvGZiSyybOcMTYxSV24U6MA3Am_0_Az5p-DuMFrk,738
|
|
21
|
-
ai_pipeline_core/documents/document.py,sha256=hdTh36KGEcrDollTnQmTI66DJIqYfe4X42Y0q7Cm4fY,68153
|
|
22
|
-
ai_pipeline_core/documents/document_list.py,sha256=Y_NCjfM_CjkIwHRD2iyGgYBuIykN8lT2IIH_uWOiGis,16254
|
|
23
|
-
ai_pipeline_core/documents/flow_document.py,sha256=QK6RxNQu449IRAosOHSk3G_5yIq5I7yLBOSQPCd3m64,4141
|
|
24
|
-
ai_pipeline_core/documents/mime_type.py,sha256=JFEOq4HwlIW2snobyNfWwySdT7urZSWkobiRMVs2fSE,7959
|
|
25
|
-
ai_pipeline_core/documents/task_document.py,sha256=uASmAaxNkYtuqQrBM57vutFT9DXNTbqv0wbwwF55E3I,4300
|
|
26
|
-
ai_pipeline_core/documents/temporary_document.py,sha256=jaz2ZHC5CmSbVbkXdI7pOB5DGEuhH16C0Yutv-lS_UI,2708
|
|
27
|
-
ai_pipeline_core/documents/utils.py,sha256=ZyJNjFN7ihWno0K7dJZed7twYmmPLA0z40UzFw1A3A8,5465
|
|
28
|
-
ai_pipeline_core/flow/__init__.py,sha256=2BfWYMOPYW5teGzwo-qzpn_bom1lxxry0bPsjVgcsCk,188
|
|
29
|
-
ai_pipeline_core/flow/config.py,sha256=a9FALpgrFsdz-D7HU3diVeUzbaBvLwI8hsPviuj001s,19389
|
|
30
|
-
ai_pipeline_core/flow/options.py,sha256=s5GBTy5lwFa1irf8BKrWO8NMZ5s_f4tqq7Wg9WQ7TTg,2302
|
|
31
|
-
ai_pipeline_core/images/__init__.py,sha256=6R6Ncif6oRyVOH7LsdwNvEuMGHuljo-_gImY8C3Z_ow,9877
|
|
32
|
-
ai_pipeline_core/images/_processing.py,sha256=wKSBAFe5TO-mo64ll20nmN9huazHwvVWFfNJB6g7u2Q,4421
|
|
33
|
-
ai_pipeline_core/llm/__init__.py,sha256=3B_vtEzxrzidP1qOUNQ4RxlUmxZ2MBKQcUhQiTybM9g,661
|
|
34
|
-
ai_pipeline_core/llm/ai_messages.py,sha256=XR2fwzguuh7v-HQ9LEJX_xwNX1D_-6f4T7E8_iNVTS4,15680
|
|
35
|
-
ai_pipeline_core/llm/client.py,sha256=777Zf5BBRA-6g1I4Og8mitpoCPdYMp66WE52wH-1I-o,24910
|
|
36
|
-
ai_pipeline_core/llm/model_options.py,sha256=uRNIHfVeh2sgt1mZBiOUx6hPQ6GKjB8b7TytZJ6afKg,11768
|
|
37
|
-
ai_pipeline_core/llm/model_response.py,sha256=zEANsfuSAYVRKPwKx9gFIqHbdVG_1_JNMRHNoE43_YM,13503
|
|
38
|
-
ai_pipeline_core/llm/model_types.py,sha256=wx-m0up7_NncTmSYmMsL-l-RgydjjJ905u7RMEAg7tI,2710
|
|
39
|
-
ai_pipeline_core/logging/__init__.py,sha256=Nz6-ghAoENsgNmLD2ma9TW9M0U2_QfxuQ5DDW6Vt6M0,651
|
|
40
|
-
ai_pipeline_core/logging/logging.yml,sha256=YTW48keO_K5bkkb-KXGM7ZuaYKiquLsjsURei8Ql0V4,1353
|
|
41
|
-
ai_pipeline_core/logging/logging_config.py,sha256=pV2x6GgMPXrzPH27sicCSXfw56beio4C2JKCJ3NsXrg,6207
|
|
42
|
-
ai_pipeline_core/logging/logging_mixin.py,sha256=OpdR3ASiM2ZwKZYGjZRJFUloGWUCv2Grnr8RqUWlYn8,8094
|
|
43
|
-
ai_pipeline_core/prompt_builder/__init__.py,sha256=-v0SKZlir07xRzxXwv75VP66aINRUiKH0VUgB-PCDmI,195
|
|
44
|
-
ai_pipeline_core/prompt_builder/documents_prompt.jinja2,sha256=LPql5AaFhFWtDfhnBWvi-bWbz5vdgsWqKGzcqxWfLIM,1075
|
|
45
|
-
ai_pipeline_core/prompt_builder/global_cache.py,sha256=9_9zoF6-sr3KBMxF5QLD3vxqXg9B2tT8o9ViplzUCNg,2811
|
|
46
|
-
ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2,sha256=M8uPpwf-uLpsWWJT9DY_DnjrLToGPVnrD-gVhQrQdaQ,229
|
|
47
|
-
ai_pipeline_core/prompt_builder/prompt_builder.py,sha256=4TrDRPiOMFwEfi6QGfriTHfjzj_CtbEjAcgQrVfRqhw,9378
|
|
48
|
-
ai_pipeline_core/prompt_builder/system_prompt.jinja2,sha256=-1jLcfvAG07Zfl-dnYrjfVcAG4PWeeoeWpaKJGY3rKQ,3945
|
|
49
|
-
ai_pipeline_core/storage/__init__.py,sha256=tcIkjJ3zPBLCyetwiJDewBvS2sbRJrDlBh3gEsQm08E,184
|
|
50
|
-
ai_pipeline_core/storage/storage.py,sha256=ClMr419Y-eU2RuOjZYd51dC0stWQk28Vb56PvQaoUwc,20007
|
|
51
|
-
ai_pipeline_core/utils/__init__.py,sha256=TJSmEm1Quf-gKwXrxM96u2IGzVolUyeNNfLMPoLstXI,254
|
|
52
|
-
ai_pipeline_core/utils/deploy.py,sha256=N3i7B97DQJs1lwgYN3sa1UgwCNjseKXfjs50ZJUMCEI,22106
|
|
53
|
-
ai_pipeline_core/utils/remote_deployment.py,sha256=U7MNJ1SU1mg3RrJyLqpuN_4pwqm8LSsFZbypJvjGPoo,4630
|
|
54
|
-
ai_pipeline_core-0.3.3.dist-info/METADATA,sha256=WCRBGS2kO8916jlGc6jY_YuPwzw8diXfeNSrNFKxuvk,17893
|
|
55
|
-
ai_pipeline_core-0.3.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
56
|
-
ai_pipeline_core-0.3.3.dist-info/licenses/LICENSE,sha256=kKj8mfbdWwkyG3U6n7ztB3bAZlEwShTkAsvaY657i3I,1074
|
|
57
|
-
ai_pipeline_core-0.3.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|