ai-pipeline-core 0.1.13__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/.gitignore +1 -0
- {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/PKG-INFO +60 -23
- {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/README.md +58 -22
- {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/__init__.py +25 -14
- {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/documents/__init__.py +2 -1
- {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/documents/document.py +317 -49
- ai_pipeline_core-0.2.0/ai_pipeline_core/documents/document_list.py +343 -0
- {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/documents/flow_document.py +8 -29
- {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/documents/task_document.py +6 -27
- {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/documents/temporary_document.py +6 -27
- {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/documents/utils.py +64 -1
- {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/flow/config.py +174 -5
- {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/flow/options.py +2 -2
- {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/llm/__init__.py +6 -1
- {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/llm/ai_messages.py +14 -7
- {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/llm/client.py +143 -55
- {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/llm/model_options.py +20 -5
- {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/llm/model_response.py +77 -29
- ai_pipeline_core-0.2.0/ai_pipeline_core/llm/model_types.py +82 -0
- {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/logging/__init__.py +0 -2
- {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/logging/logging_config.py +0 -6
- {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/logging/logging_mixin.py +2 -10
- {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/pipeline.py +68 -65
- {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/prefect.py +12 -3
- {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/prompt_manager.py +6 -7
- {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/settings.py +13 -5
- ai_pipeline_core-0.2.0/ai_pipeline_core/simple_runner/__init__.py +14 -0
- {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/simple_runner/cli.py +13 -12
- {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/simple_runner/simple_runner.py +34 -172
- ai_pipeline_core-0.2.0/ai_pipeline_core/storage/__init__.py +8 -0
- ai_pipeline_core-0.2.0/ai_pipeline_core/storage/storage.py +628 -0
- {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/tracing.py +110 -26
- {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/pyproject.toml +4 -2
- ai_pipeline_core-0.1.13/ai_pipeline_core/documents/document_list.py +0 -240
- ai_pipeline_core-0.1.13/ai_pipeline_core/llm/model_types.py +0 -84
- ai_pipeline_core-0.1.13/ai_pipeline_core/simple_runner/__init__.py +0 -24
- {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/LICENSE +0 -0
- {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/documents/mime_type.py +0 -0
- {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/exceptions.py +0 -0
- {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/flow/__init__.py +0 -0
- {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/logging/logging.yml +0 -0
- {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/py.typed +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ai-pipeline-core
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Core utilities for AI-powered processing pipelines using prefect
|
|
5
5
|
Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
|
|
6
6
|
Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
|
|
@@ -22,6 +22,7 @@ Requires-Dist: httpx>=0.28.1
|
|
|
22
22
|
Requires-Dist: jinja2>=3.1.6
|
|
23
23
|
Requires-Dist: lmnr>=0.7.6
|
|
24
24
|
Requires-Dist: openai>=1.99.9
|
|
25
|
+
Requires-Dist: prefect-gcp[cloud-storage]>=0.6.10
|
|
25
26
|
Requires-Dist: prefect>=3.4.13
|
|
26
27
|
Requires-Dist: pydantic-settings>=2.10.1
|
|
27
28
|
Requires-Dist: pydantic>=2.11.7
|
|
@@ -57,11 +58,11 @@ AI Pipeline Core is a production-ready framework that combines document processi
|
|
|
57
58
|
|
|
58
59
|
### Key Features
|
|
59
60
|
|
|
60
|
-
- **Document Processing**: Type-safe handling of text, JSON, YAML, PDFs, and images with automatic MIME type detection
|
|
61
|
-
- **LLM Integration**: Unified interface to any model via LiteLLM proxy with
|
|
61
|
+
- **Document Processing**: Type-safe handling of text, JSON, YAML, PDFs, and images with automatic MIME type detection and provenance tracking
|
|
62
|
+
- **LLM Integration**: Unified interface to any model via LiteLLM proxy with configurable context caching
|
|
62
63
|
- **Structured Output**: Type-safe generation with Pydantic model validation
|
|
63
64
|
- **Workflow Orchestration**: Prefect-based flows and tasks with automatic retries
|
|
64
|
-
- **Observability**: Built-in distributed tracing via Laminar (LMNR) for debugging and monitoring
|
|
65
|
+
- **Observability**: Built-in distributed tracing via Laminar (LMNR) with cost tracking for debugging and monitoring
|
|
65
66
|
- **Local Development**: Simple runner for testing pipelines without infrastructure
|
|
66
67
|
|
|
67
68
|
## Installation
|
|
@@ -111,15 +112,13 @@ class AnalysisConfig(FlowConfig):
|
|
|
111
112
|
INPUT_DOCUMENT_TYPES = [InputDoc]
|
|
112
113
|
OUTPUT_DOCUMENT_TYPE = OutputDoc
|
|
113
114
|
|
|
114
|
-
# Create pipeline flow
|
|
115
|
-
@pipeline_flow
|
|
115
|
+
# Create pipeline flow with required config
|
|
116
|
+
@pipeline_flow(config=AnalysisConfig)
|
|
116
117
|
async def analyze_flow(
|
|
117
118
|
project_name: str,
|
|
118
119
|
documents: DocumentList,
|
|
119
120
|
flow_options: FlowOptions
|
|
120
121
|
) -> DocumentList:
|
|
121
|
-
config = AnalysisConfig()
|
|
122
|
-
|
|
123
122
|
# Process documents
|
|
124
123
|
outputs = []
|
|
125
124
|
for doc in documents:
|
|
@@ -136,7 +135,7 @@ async def analyze_flow(
|
|
|
136
135
|
outputs.append(output)
|
|
137
136
|
|
|
138
137
|
# RECOMMENDED: Always validate output
|
|
139
|
-
return
|
|
138
|
+
return AnalysisConfig.create_and_validate_output(outputs)
|
|
140
139
|
```
|
|
141
140
|
|
|
142
141
|
### Structured Output
|
|
@@ -178,6 +177,19 @@ doc = MyDocument.create(
|
|
|
178
177
|
# Parse back to original type
|
|
179
178
|
data = doc.parse(dict) # Returns {"key": "value"}
|
|
180
179
|
|
|
180
|
+
# Document provenance tracking (new in v0.1.14)
|
|
181
|
+
doc_with_sources = MyDocument.create(
|
|
182
|
+
name="derived.json",
|
|
183
|
+
content={"result": "processed"},
|
|
184
|
+
sources=[source_doc.sha256, "https://api.example.com/data"]
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
# Check provenance
|
|
188
|
+
for hash in doc_with_sources.get_source_documents():
|
|
189
|
+
print(f"Derived from document: {hash}")
|
|
190
|
+
for ref in doc_with_sources.get_source_references():
|
|
191
|
+
print(f"External source: {ref}")
|
|
192
|
+
|
|
181
193
|
# Temporary documents (never persisted)
|
|
182
194
|
temp = TemporaryDocument.create(
|
|
183
195
|
name="api_response.json",
|
|
@@ -211,6 +223,10 @@ if doc.is_text:
|
|
|
211
223
|
|
|
212
224
|
# Parse structured data
|
|
213
225
|
data = doc.as_json() # or as_yaml(), as_pydantic_model()
|
|
226
|
+
|
|
227
|
+
# Enhanced filtering (new in v0.1.14)
|
|
228
|
+
filtered = documents.filter_by([Doc1, Doc2, Doc3]) # Multiple types
|
|
229
|
+
named = documents.filter_by(["file1.txt", "file2.txt"]) # Multiple names
|
|
214
230
|
```
|
|
215
231
|
|
|
216
232
|
### LLM Integration
|
|
@@ -233,7 +249,7 @@ static_context = AIMessages([large_document])
|
|
|
233
249
|
# First call: caches context
|
|
234
250
|
r1 = await llm.generate(
|
|
235
251
|
model="gpt-5",
|
|
236
|
-
context=static_context, # Cached for 120 seconds
|
|
252
|
+
context=static_context, # Cached for 120 seconds by default
|
|
237
253
|
messages="Summarize" # Dynamic query
|
|
238
254
|
)
|
|
239
255
|
|
|
@@ -243,6 +259,22 @@ r2 = await llm.generate(
|
|
|
243
259
|
context=static_context, # Reused from cache!
|
|
244
260
|
messages="Key points?" # Different query
|
|
245
261
|
)
|
|
262
|
+
|
|
263
|
+
# Custom cache TTL (new in v0.1.14)
|
|
264
|
+
response = await llm.generate(
|
|
265
|
+
model="gpt-5",
|
|
266
|
+
context=static_context,
|
|
267
|
+
messages="Analyze",
|
|
268
|
+
options=ModelOptions(cache_ttl="300s") # Cache for 5 minutes
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
# Disable caching for dynamic contexts
|
|
272
|
+
response = await llm.generate(
|
|
273
|
+
model="gpt-5",
|
|
274
|
+
context=dynamic_context,
|
|
275
|
+
messages="Process",
|
|
276
|
+
options=ModelOptions(cache_ttl=None) # No caching
|
|
277
|
+
)
|
|
246
278
|
```
|
|
247
279
|
|
|
248
280
|
### Flow Configuration
|
|
@@ -256,15 +288,15 @@ class ProcessingConfig(FlowConfig):
|
|
|
256
288
|
INPUT_DOCUMENT_TYPES = [RawDataDocument]
|
|
257
289
|
OUTPUT_DOCUMENT_TYPE = ProcessedDocument # Must be different!
|
|
258
290
|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
291
|
+
# Use in flows for validation
|
|
292
|
+
@pipeline_flow(config=ProcessingConfig)
|
|
293
|
+
async def process(
|
|
294
|
+
project_name: str,
|
|
295
|
+
documents: DocumentList,
|
|
296
|
+
flow_options: FlowOptions
|
|
297
|
+
) -> DocumentList:
|
|
298
|
+
# ... processing logic ...
|
|
299
|
+
return ProcessingConfig.create_and_validate_output(outputs)
|
|
268
300
|
```
|
|
269
301
|
|
|
270
302
|
### Pipeline Decorators
|
|
@@ -272,13 +304,15 @@ class ProcessingConfig(FlowConfig):
|
|
|
272
304
|
Enhanced decorators with built-in tracing and monitoring:
|
|
273
305
|
|
|
274
306
|
```python
|
|
275
|
-
from ai_pipeline_core import pipeline_flow, pipeline_task
|
|
307
|
+
from ai_pipeline_core import pipeline_flow, pipeline_task, set_trace_cost
|
|
276
308
|
|
|
277
309
|
@pipeline_task # Automatic retry, tracing, and monitoring
|
|
278
310
|
async def process_chunk(data: str) -> str:
|
|
279
|
-
|
|
311
|
+
result = await transform(data)
|
|
312
|
+
set_trace_cost(0.05) # Track costs (new in v0.1.14)
|
|
313
|
+
return result
|
|
280
314
|
|
|
281
|
-
@pipeline_flow # Full observability and orchestration
|
|
315
|
+
@pipeline_flow(config=MyFlowConfig) # Full observability and orchestration
|
|
282
316
|
async def main_flow(
|
|
283
317
|
project_name: str,
|
|
284
318
|
documents: DocumentList,
|
|
@@ -304,6 +338,9 @@ LMNR_DEBUG=true # Enable debug traces
|
|
|
304
338
|
# Optional: Orchestration
|
|
305
339
|
PREFECT_API_URL=http://localhost:4200/api
|
|
306
340
|
PREFECT_API_KEY=your-prefect-key
|
|
341
|
+
|
|
342
|
+
# Optional: Storage (for Google Cloud Storage)
|
|
343
|
+
GCS_SERVICE_ACCOUNT_FILE=/path/to/service-account.json # GCS auth file
|
|
307
344
|
```
|
|
308
345
|
|
|
309
346
|
### Settings Management
|
|
@@ -331,7 +368,7 @@ print(settings.app_name)
|
|
|
331
368
|
|
|
332
369
|
### Framework Rules (90% Use Cases)
|
|
333
370
|
|
|
334
|
-
1. **Decorators**: Use `@
|
|
371
|
+
1. **Decorators**: Use `@pipeline_task` WITHOUT parameters, `@pipeline_flow` WITH config
|
|
335
372
|
2. **Logging**: Use `get_pipeline_logger(__name__)` - NEVER `print()` or `logging` module
|
|
336
373
|
3. **LLM calls**: Use `AIMessages` or `str`. Wrap Documents in `AIMessages`
|
|
337
374
|
4. **Options**: Omit `ModelOptions` unless specifically needed (defaults are optimal)
|
|
@@ -13,11 +13,11 @@ AI Pipeline Core is a production-ready framework that combines document processi
|
|
|
13
13
|
|
|
14
14
|
### Key Features
|
|
15
15
|
|
|
16
|
-
- **Document Processing**: Type-safe handling of text, JSON, YAML, PDFs, and images with automatic MIME type detection
|
|
17
|
-
- **LLM Integration**: Unified interface to any model via LiteLLM proxy with
|
|
16
|
+
- **Document Processing**: Type-safe handling of text, JSON, YAML, PDFs, and images with automatic MIME type detection and provenance tracking
|
|
17
|
+
- **LLM Integration**: Unified interface to any model via LiteLLM proxy with configurable context caching
|
|
18
18
|
- **Structured Output**: Type-safe generation with Pydantic model validation
|
|
19
19
|
- **Workflow Orchestration**: Prefect-based flows and tasks with automatic retries
|
|
20
|
-
- **Observability**: Built-in distributed tracing via Laminar (LMNR) for debugging and monitoring
|
|
20
|
+
- **Observability**: Built-in distributed tracing via Laminar (LMNR) with cost tracking for debugging and monitoring
|
|
21
21
|
- **Local Development**: Simple runner for testing pipelines without infrastructure
|
|
22
22
|
|
|
23
23
|
## Installation
|
|
@@ -67,15 +67,13 @@ class AnalysisConfig(FlowConfig):
|
|
|
67
67
|
INPUT_DOCUMENT_TYPES = [InputDoc]
|
|
68
68
|
OUTPUT_DOCUMENT_TYPE = OutputDoc
|
|
69
69
|
|
|
70
|
-
# Create pipeline flow
|
|
71
|
-
@pipeline_flow
|
|
70
|
+
# Create pipeline flow with required config
|
|
71
|
+
@pipeline_flow(config=AnalysisConfig)
|
|
72
72
|
async def analyze_flow(
|
|
73
73
|
project_name: str,
|
|
74
74
|
documents: DocumentList,
|
|
75
75
|
flow_options: FlowOptions
|
|
76
76
|
) -> DocumentList:
|
|
77
|
-
config = AnalysisConfig()
|
|
78
|
-
|
|
79
77
|
# Process documents
|
|
80
78
|
outputs = []
|
|
81
79
|
for doc in documents:
|
|
@@ -92,7 +90,7 @@ async def analyze_flow(
|
|
|
92
90
|
outputs.append(output)
|
|
93
91
|
|
|
94
92
|
# RECOMMENDED: Always validate output
|
|
95
|
-
return
|
|
93
|
+
return AnalysisConfig.create_and_validate_output(outputs)
|
|
96
94
|
```
|
|
97
95
|
|
|
98
96
|
### Structured Output
|
|
@@ -134,6 +132,19 @@ doc = MyDocument.create(
|
|
|
134
132
|
# Parse back to original type
|
|
135
133
|
data = doc.parse(dict) # Returns {"key": "value"}
|
|
136
134
|
|
|
135
|
+
# Document provenance tracking (new in v0.1.14)
|
|
136
|
+
doc_with_sources = MyDocument.create(
|
|
137
|
+
name="derived.json",
|
|
138
|
+
content={"result": "processed"},
|
|
139
|
+
sources=[source_doc.sha256, "https://api.example.com/data"]
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
# Check provenance
|
|
143
|
+
for hash in doc_with_sources.get_source_documents():
|
|
144
|
+
print(f"Derived from document: {hash}")
|
|
145
|
+
for ref in doc_with_sources.get_source_references():
|
|
146
|
+
print(f"External source: {ref}")
|
|
147
|
+
|
|
137
148
|
# Temporary documents (never persisted)
|
|
138
149
|
temp = TemporaryDocument.create(
|
|
139
150
|
name="api_response.json",
|
|
@@ -167,6 +178,10 @@ if doc.is_text:
|
|
|
167
178
|
|
|
168
179
|
# Parse structured data
|
|
169
180
|
data = doc.as_json() # or as_yaml(), as_pydantic_model()
|
|
181
|
+
|
|
182
|
+
# Enhanced filtering (new in v0.1.14)
|
|
183
|
+
filtered = documents.filter_by([Doc1, Doc2, Doc3]) # Multiple types
|
|
184
|
+
named = documents.filter_by(["file1.txt", "file2.txt"]) # Multiple names
|
|
170
185
|
```
|
|
171
186
|
|
|
172
187
|
### LLM Integration
|
|
@@ -189,7 +204,7 @@ static_context = AIMessages([large_document])
|
|
|
189
204
|
# First call: caches context
|
|
190
205
|
r1 = await llm.generate(
|
|
191
206
|
model="gpt-5",
|
|
192
|
-
context=static_context, # Cached for 120 seconds
|
|
207
|
+
context=static_context, # Cached for 120 seconds by default
|
|
193
208
|
messages="Summarize" # Dynamic query
|
|
194
209
|
)
|
|
195
210
|
|
|
@@ -199,6 +214,22 @@ r2 = await llm.generate(
|
|
|
199
214
|
context=static_context, # Reused from cache!
|
|
200
215
|
messages="Key points?" # Different query
|
|
201
216
|
)
|
|
217
|
+
|
|
218
|
+
# Custom cache TTL (new in v0.1.14)
|
|
219
|
+
response = await llm.generate(
|
|
220
|
+
model="gpt-5",
|
|
221
|
+
context=static_context,
|
|
222
|
+
messages="Analyze",
|
|
223
|
+
options=ModelOptions(cache_ttl="300s") # Cache for 5 minutes
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
# Disable caching for dynamic contexts
|
|
227
|
+
response = await llm.generate(
|
|
228
|
+
model="gpt-5",
|
|
229
|
+
context=dynamic_context,
|
|
230
|
+
messages="Process",
|
|
231
|
+
options=ModelOptions(cache_ttl=None) # No caching
|
|
232
|
+
)
|
|
202
233
|
```
|
|
203
234
|
|
|
204
235
|
### Flow Configuration
|
|
@@ -212,15 +243,15 @@ class ProcessingConfig(FlowConfig):
|
|
|
212
243
|
INPUT_DOCUMENT_TYPES = [RawDataDocument]
|
|
213
244
|
OUTPUT_DOCUMENT_TYPE = ProcessedDocument # Must be different!
|
|
214
245
|
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
246
|
+
# Use in flows for validation
|
|
247
|
+
@pipeline_flow(config=ProcessingConfig)
|
|
248
|
+
async def process(
|
|
249
|
+
project_name: str,
|
|
250
|
+
documents: DocumentList,
|
|
251
|
+
flow_options: FlowOptions
|
|
252
|
+
) -> DocumentList:
|
|
253
|
+
# ... processing logic ...
|
|
254
|
+
return ProcessingConfig.create_and_validate_output(outputs)
|
|
224
255
|
```
|
|
225
256
|
|
|
226
257
|
### Pipeline Decorators
|
|
@@ -228,13 +259,15 @@ class ProcessingConfig(FlowConfig):
|
|
|
228
259
|
Enhanced decorators with built-in tracing and monitoring:
|
|
229
260
|
|
|
230
261
|
```python
|
|
231
|
-
from ai_pipeline_core import pipeline_flow, pipeline_task
|
|
262
|
+
from ai_pipeline_core import pipeline_flow, pipeline_task, set_trace_cost
|
|
232
263
|
|
|
233
264
|
@pipeline_task # Automatic retry, tracing, and monitoring
|
|
234
265
|
async def process_chunk(data: str) -> str:
|
|
235
|
-
|
|
266
|
+
result = await transform(data)
|
|
267
|
+
set_trace_cost(0.05) # Track costs (new in v0.1.14)
|
|
268
|
+
return result
|
|
236
269
|
|
|
237
|
-
@pipeline_flow # Full observability and orchestration
|
|
270
|
+
@pipeline_flow(config=MyFlowConfig) # Full observability and orchestration
|
|
238
271
|
async def main_flow(
|
|
239
272
|
project_name: str,
|
|
240
273
|
documents: DocumentList,
|
|
@@ -260,6 +293,9 @@ LMNR_DEBUG=true # Enable debug traces
|
|
|
260
293
|
# Optional: Orchestration
|
|
261
294
|
PREFECT_API_URL=http://localhost:4200/api
|
|
262
295
|
PREFECT_API_KEY=your-prefect-key
|
|
296
|
+
|
|
297
|
+
# Optional: Storage (for Google Cloud Storage)
|
|
298
|
+
GCS_SERVICE_ACCOUNT_FILE=/path/to/service-account.json # GCS auth file
|
|
263
299
|
```
|
|
264
300
|
|
|
265
301
|
### Settings Management
|
|
@@ -287,7 +323,7 @@ print(settings.app_name)
|
|
|
287
323
|
|
|
288
324
|
### Framework Rules (90% Use Cases)
|
|
289
325
|
|
|
290
|
-
1. **Decorators**: Use `@
|
|
326
|
+
1. **Decorators**: Use `@pipeline_task` WITHOUT parameters, `@pipeline_flow` WITH config
|
|
291
327
|
2. **Logging**: Use `get_pipeline_logger(__name__)` - NEVER `print()` or `logging` module
|
|
292
328
|
3. **LLM calls**: Use `AIMessages` or `str`. Wrap Documents in `AIMessages`
|
|
293
329
|
4. **Options**: Omit `ModelOptions` unless specifically needed (defaults are optimal)
|
|
@@ -7,7 +7,7 @@ It combines document processing, LLM integration, and workflow orchestration int
|
|
|
7
7
|
system designed for production use.
|
|
8
8
|
|
|
9
9
|
The framework enforces best practices through strong typing (Pydantic), automatic retries,
|
|
10
|
-
cost tracking
|
|
10
|
+
and cost tracking. All I/O operations are async for maximum throughput.
|
|
11
11
|
|
|
12
12
|
**CRITICAL IMPORT RULE**:
|
|
13
13
|
Always import from the top-level package:
|
|
@@ -18,12 +18,12 @@ cost tracking, and distributed tracing. All I/O operations are async for maximum
|
|
|
18
18
|
from ai_pipeline_core.llm import generate # NO!
|
|
19
19
|
from ai_pipeline_core.documents import FlowDocument # NO!
|
|
20
20
|
|
|
21
|
-
FRAMEWORK RULES (
|
|
22
|
-
1. Decorators: Use @
|
|
21
|
+
FRAMEWORK RULES (Use by default, unless instructed otherwise):
|
|
22
|
+
1. Decorators: Use @pipeline_task WITHOUT parameters, @pipeline_flow WITH config
|
|
23
23
|
2. Logging: Use get_pipeline_logger(__name__) - NEVER print() or logging module
|
|
24
24
|
3. LLM calls: Use AIMessages or str. Wrap Documents in AIMessages; do not call .text yourself
|
|
25
|
-
4. Options:
|
|
26
|
-
5. Documents: Create with just name and content - skip description
|
|
25
|
+
4. Options: DO NOT use options parameter - omit it entirely (defaults are optimal)
|
|
26
|
+
5. Documents: Create with just name and content - skip description unless needed
|
|
27
27
|
6. FlowConfig: OUTPUT_DOCUMENT_TYPE must differ from all INPUT_DOCUMENT_TYPES
|
|
28
28
|
7. Initialization: PromptManager and logger at module scope, not in functions
|
|
29
29
|
8. DocumentList: Use default constructor - no validation flags needed
|
|
@@ -36,18 +36,22 @@ Core Capabilities:
|
|
|
36
36
|
- **LLM Integration**: Unified interface to any model via LiteLLM with caching
|
|
37
37
|
- **Structured Output**: Type-safe generation with Pydantic model validation
|
|
38
38
|
- **Workflow Orchestration**: Prefect-based flows and tasks with retries
|
|
39
|
-
- **Observability**:
|
|
39
|
+
- **Observability**: Built-in monitoring and debugging capabilities
|
|
40
40
|
- **Local Development**: Simple runner for testing without infrastructure
|
|
41
41
|
|
|
42
42
|
Quick Start:
|
|
43
43
|
>>> from ai_pipeline_core import (
|
|
44
|
-
... pipeline_flow, FlowDocument, DocumentList, FlowOptions, llm, AIMessages
|
|
44
|
+
... pipeline_flow, FlowDocument, DocumentList, FlowOptions, FlowConfig, llm, AIMessages
|
|
45
45
|
... )
|
|
46
46
|
>>>
|
|
47
47
|
>>> class OutputDoc(FlowDocument):
|
|
48
48
|
... '''Analysis result document.'''
|
|
49
49
|
>>>
|
|
50
|
-
>>>
|
|
50
|
+
>>> class MyFlowConfig(FlowConfig):
|
|
51
|
+
... INPUT_DOCUMENT_TYPES = []
|
|
52
|
+
... OUTPUT_DOCUMENT_TYPE = OutputDoc
|
|
53
|
+
>>>
|
|
54
|
+
>>> @pipeline_flow(config=MyFlowConfig)
|
|
51
55
|
>>> async def analyze_flow(
|
|
52
56
|
... project_name: str,
|
|
53
57
|
... documents: DocumentList,
|
|
@@ -55,7 +59,7 @@ Quick Start:
|
|
|
55
59
|
... ) -> DocumentList:
|
|
56
60
|
... # Messages accept AIMessages or str. Wrap documents: AIMessages([doc])
|
|
57
61
|
... response = await llm.generate(
|
|
58
|
-
...
|
|
62
|
+
... "gpt-5",
|
|
59
63
|
... messages=AIMessages([documents[0]])
|
|
60
64
|
... )
|
|
61
65
|
... result = OutputDoc.create(
|
|
@@ -76,8 +80,6 @@ Optional Environment Variables:
|
|
|
76
80
|
- PREFECT_API_KEY: Prefect API authentication key
|
|
77
81
|
- LMNR_PROJECT_API_KEY: Laminar (LMNR) API key for tracing
|
|
78
82
|
- LMNR_DEBUG: Set to "true" to enable debug-level traces
|
|
79
|
-
- LMNR_SESSION_ID: Default session ID for traces
|
|
80
|
-
- LMNR_USER_ID: Default user ID for traces
|
|
81
83
|
"""
|
|
82
84
|
|
|
83
85
|
from . import llm
|
|
@@ -88,6 +90,7 @@ from .documents import (
|
|
|
88
90
|
TaskDocument,
|
|
89
91
|
TemporaryDocument,
|
|
90
92
|
canonical_name_key,
|
|
93
|
+
is_document_sha256,
|
|
91
94
|
sanitize_url,
|
|
92
95
|
)
|
|
93
96
|
from .flow import FlowConfig, FlowOptions
|
|
@@ -98,6 +101,8 @@ from .llm import (
|
|
|
98
101
|
ModelOptions,
|
|
99
102
|
ModelResponse,
|
|
100
103
|
StructuredModelResponse,
|
|
104
|
+
generate,
|
|
105
|
+
generate_structured,
|
|
101
106
|
)
|
|
102
107
|
from .logging import (
|
|
103
108
|
LoggerMixin,
|
|
@@ -111,9 +116,9 @@ from .pipeline import pipeline_flow, pipeline_task
|
|
|
111
116
|
from .prefect import disable_run_logger, prefect_test_harness
|
|
112
117
|
from .prompt_manager import PromptManager
|
|
113
118
|
from .settings import Settings
|
|
114
|
-
from .tracing import TraceInfo, TraceLevel, trace
|
|
119
|
+
from .tracing import TraceInfo, TraceLevel, set_trace_cost, trace
|
|
115
120
|
|
|
116
|
-
__version__ = "0.
|
|
121
|
+
__version__ = "0.2.0"
|
|
117
122
|
|
|
118
123
|
__all__ = [
|
|
119
124
|
# Config/Settings
|
|
@@ -132,6 +137,7 @@ __all__ = [
|
|
|
132
137
|
"TaskDocument",
|
|
133
138
|
"TemporaryDocument",
|
|
134
139
|
"canonical_name_key",
|
|
140
|
+
"is_document_sha256",
|
|
135
141
|
"sanitize_url",
|
|
136
142
|
# Flow/Task
|
|
137
143
|
"FlowConfig",
|
|
@@ -143,7 +149,9 @@ __all__ = [
|
|
|
143
149
|
"prefect_test_harness",
|
|
144
150
|
"disable_run_logger",
|
|
145
151
|
# LLM
|
|
146
|
-
"llm",
|
|
152
|
+
"llm", # for backward compatibility
|
|
153
|
+
"generate",
|
|
154
|
+
"generate_structured",
|
|
147
155
|
"ModelName",
|
|
148
156
|
"ModelOptions",
|
|
149
157
|
"ModelResponse",
|
|
@@ -154,6 +162,9 @@ __all__ = [
|
|
|
154
162
|
"trace",
|
|
155
163
|
"TraceLevel",
|
|
156
164
|
"TraceInfo",
|
|
165
|
+
"set_trace_cost",
|
|
157
166
|
# Utils
|
|
158
167
|
"PromptManager",
|
|
168
|
+
"generate",
|
|
169
|
+
"generate_structured",
|
|
159
170
|
]
|
|
@@ -12,7 +12,7 @@ from .document_list import DocumentList
|
|
|
12
12
|
from .flow_document import FlowDocument
|
|
13
13
|
from .task_document import TaskDocument
|
|
14
14
|
from .temporary_document import TemporaryDocument
|
|
15
|
-
from .utils import canonical_name_key, sanitize_url
|
|
15
|
+
from .utils import canonical_name_key, is_document_sha256, sanitize_url
|
|
16
16
|
|
|
17
17
|
__all__ = [
|
|
18
18
|
"Document",
|
|
@@ -21,5 +21,6 @@ __all__ = [
|
|
|
21
21
|
"TaskDocument",
|
|
22
22
|
"TemporaryDocument",
|
|
23
23
|
"canonical_name_key",
|
|
24
|
+
"is_document_sha256",
|
|
24
25
|
"sanitize_url",
|
|
25
26
|
]
|