ai-pipeline-core 0.2.9__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +32 -5
- ai_pipeline_core/debug/__init__.py +26 -0
- ai_pipeline_core/debug/config.py +91 -0
- ai_pipeline_core/debug/content.py +705 -0
- ai_pipeline_core/debug/processor.py +99 -0
- ai_pipeline_core/debug/summary.py +236 -0
- ai_pipeline_core/debug/writer.py +913 -0
- ai_pipeline_core/deployment/__init__.py +46 -0
- ai_pipeline_core/deployment/base.py +681 -0
- ai_pipeline_core/deployment/contract.py +84 -0
- ai_pipeline_core/deployment/helpers.py +98 -0
- ai_pipeline_core/documents/flow_document.py +1 -1
- ai_pipeline_core/documents/task_document.py +1 -1
- ai_pipeline_core/documents/temporary_document.py +1 -1
- ai_pipeline_core/flow/config.py +13 -2
- ai_pipeline_core/flow/options.py +4 -4
- ai_pipeline_core/images/__init__.py +362 -0
- ai_pipeline_core/images/_processing.py +157 -0
- ai_pipeline_core/llm/ai_messages.py +25 -4
- ai_pipeline_core/llm/client.py +15 -19
- ai_pipeline_core/llm/model_response.py +5 -5
- ai_pipeline_core/llm/model_types.py +10 -13
- ai_pipeline_core/logging/logging_mixin.py +2 -2
- ai_pipeline_core/pipeline.py +1 -1
- ai_pipeline_core/progress.py +127 -0
- ai_pipeline_core/prompt_builder/__init__.py +5 -0
- ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +23 -0
- ai_pipeline_core/prompt_builder/global_cache.py +78 -0
- ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +6 -0
- ai_pipeline_core/prompt_builder/prompt_builder.py +253 -0
- ai_pipeline_core/prompt_builder/system_prompt.jinja2 +41 -0
- ai_pipeline_core/tracing.py +54 -2
- ai_pipeline_core/utils/deploy.py +214 -6
- ai_pipeline_core/utils/remote_deployment.py +37 -187
- {ai_pipeline_core-0.2.9.dist-info → ai_pipeline_core-0.3.3.dist-info}/METADATA +96 -27
- ai_pipeline_core-0.3.3.dist-info/RECORD +57 -0
- {ai_pipeline_core-0.2.9.dist-info → ai_pipeline_core-0.3.3.dist-info}/WHEEL +1 -1
- ai_pipeline_core/simple_runner/__init__.py +0 -14
- ai_pipeline_core/simple_runner/cli.py +0 -254
- ai_pipeline_core/simple_runner/simple_runner.py +0 -247
- ai_pipeline_core-0.2.9.dist-info/RECORD +0 -41
- {ai_pipeline_core-0.2.9.dist-info → ai_pipeline_core-0.3.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ai-pipeline-core
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.3
|
|
4
4
|
Summary: Core utilities for AI-powered processing pipelines using prefect
|
|
5
5
|
Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
|
|
6
6
|
Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
|
|
@@ -22,6 +22,7 @@ Requires-Dist: httpx>=0.28.1
|
|
|
22
22
|
Requires-Dist: jinja2>=3.1.6
|
|
23
23
|
Requires-Dist: lmnr>=0.7.18
|
|
24
24
|
Requires-Dist: openai>=1.109.1
|
|
25
|
+
Requires-Dist: pillow>=10.0.0
|
|
25
26
|
Requires-Dist: prefect-gcp[cloud-storage]>=0.6.10
|
|
26
27
|
Requires-Dist: prefect>=3.4.21
|
|
27
28
|
Requires-Dist: pydantic-settings>=2.10.1
|
|
@@ -63,7 +64,7 @@ AI Pipeline Core is a production-ready framework that combines document processi
|
|
|
63
64
|
- **Structured Output**: Type-safe generation with Pydantic model validation
|
|
64
65
|
- **Workflow Orchestration**: Prefect-based flows and tasks with automatic retries
|
|
65
66
|
- **Observability**: Built-in distributed tracing via Laminar (LMNR) with cost tracking for debugging and monitoring
|
|
66
|
-
- **
|
|
67
|
+
- **Deployment**: Unified pipeline execution for local, CLI, and production environments
|
|
67
68
|
|
|
68
69
|
## Installation
|
|
69
70
|
|
|
@@ -124,7 +125,7 @@ async def analyze_flow(
|
|
|
124
125
|
for doc in documents:
|
|
125
126
|
# Use AIMessages for LLM interaction
|
|
126
127
|
response = await llm.generate(
|
|
127
|
-
model="gpt-5",
|
|
128
|
+
model="gpt-5.1",
|
|
128
129
|
messages=AIMessages([doc])
|
|
129
130
|
)
|
|
130
131
|
|
|
@@ -151,7 +152,7 @@ class Analysis(BaseModel):
|
|
|
151
152
|
|
|
152
153
|
# Generate structured output
|
|
153
154
|
response = await llm.generate_structured(
|
|
154
|
-
model="gpt-5",
|
|
155
|
+
model="gpt-5.1",
|
|
155
156
|
response_format=Analysis,
|
|
156
157
|
messages="Analyze this product review: ..."
|
|
157
158
|
)
|
|
@@ -177,7 +178,7 @@ doc = MyDocument.create(
|
|
|
177
178
|
# Parse back to original type
|
|
178
179
|
data = doc.parse(dict) # Returns {"key": "value"}
|
|
179
180
|
|
|
180
|
-
# Document provenance tracking
|
|
181
|
+
# Document provenance tracking
|
|
181
182
|
doc_with_sources = MyDocument.create(
|
|
182
183
|
name="derived.json",
|
|
183
184
|
content={"result": "processed"},
|
|
@@ -224,15 +225,15 @@ if doc.is_text:
|
|
|
224
225
|
# Parse structured data
|
|
225
226
|
data = doc.as_json() # or as_yaml(), as_pydantic_model()
|
|
226
227
|
|
|
227
|
-
# Convert between document types
|
|
228
|
+
# Convert between document types
|
|
228
229
|
task_doc = flow_doc.model_convert(TaskDocument) # Convert FlowDocument to TaskDocument
|
|
229
230
|
new_doc = doc.model_convert(OtherDocType, content={"new": "data"}) # With content update
|
|
230
231
|
|
|
231
|
-
# Enhanced filtering
|
|
232
|
+
# Enhanced filtering
|
|
232
233
|
filtered = documents.filter_by([Doc1, Doc2, Doc3]) # Multiple types
|
|
233
234
|
named = documents.filter_by(["file1.txt", "file2.txt"]) # Multiple names
|
|
234
235
|
|
|
235
|
-
# Immutable collections
|
|
236
|
+
# Immutable collections
|
|
236
237
|
frozen_docs = DocumentList(docs, frozen=True) # Immutable document list
|
|
237
238
|
frozen_msgs = AIMessages(messages, frozen=True) # Immutable message list
|
|
238
239
|
```
|
|
@@ -246,7 +247,7 @@ from ai_pipeline_core import llm, AIMessages, ModelOptions
|
|
|
246
247
|
|
|
247
248
|
# Simple generation
|
|
248
249
|
response = await llm.generate(
|
|
249
|
-
model="gpt-5",
|
|
250
|
+
model="gpt-5.1",
|
|
250
251
|
messages="Explain quantum computing"
|
|
251
252
|
)
|
|
252
253
|
print(response.content)
|
|
@@ -256,21 +257,21 @@ static_context = AIMessages([large_document])
|
|
|
256
257
|
|
|
257
258
|
# First call: caches context
|
|
258
259
|
r1 = await llm.generate(
|
|
259
|
-
model="gpt-5",
|
|
260
|
+
model="gpt-5.1",
|
|
260
261
|
context=static_context, # Cached for 120 seconds by default
|
|
261
262
|
messages="Summarize" # Dynamic query
|
|
262
263
|
)
|
|
263
264
|
|
|
264
265
|
# Second call: reuses cache
|
|
265
266
|
r2 = await llm.generate(
|
|
266
|
-
model="gpt-5",
|
|
267
|
+
model="gpt-5.1",
|
|
267
268
|
context=static_context, # Reused from cache!
|
|
268
269
|
messages="Key points?" # Different query
|
|
269
270
|
)
|
|
270
271
|
|
|
271
|
-
# Custom cache TTL
|
|
272
|
+
# Custom cache TTL
|
|
272
273
|
response = await llm.generate(
|
|
273
|
-
model="gpt-5",
|
|
274
|
+
model="gpt-5.1",
|
|
274
275
|
context=static_context,
|
|
275
276
|
messages="Analyze",
|
|
276
277
|
options=ModelOptions(cache_ttl="300s") # Cache for 5 minutes
|
|
@@ -278,7 +279,7 @@ response = await llm.generate(
|
|
|
278
279
|
|
|
279
280
|
# Disable caching for dynamic contexts
|
|
280
281
|
response = await llm.generate(
|
|
281
|
-
model="gpt-5",
|
|
282
|
+
model="gpt-5.1",
|
|
282
283
|
context=dynamic_context,
|
|
283
284
|
messages="Process",
|
|
284
285
|
options=ModelOptions(cache_ttl=None) # No caching
|
|
@@ -317,12 +318,12 @@ from ai_pipeline_core import pipeline_flow, pipeline_task, set_trace_cost
|
|
|
317
318
|
@pipeline_task # Automatic retry, tracing, and monitoring
|
|
318
319
|
async def process_chunk(data: str) -> str:
|
|
319
320
|
result = await transform(data)
|
|
320
|
-
set_trace_cost(0.05) # Track costs
|
|
321
|
+
set_trace_cost(0.05) # Track costs
|
|
321
322
|
return result
|
|
322
323
|
|
|
323
324
|
@pipeline_flow(
|
|
324
325
|
config=MyFlowConfig,
|
|
325
|
-
trace_trim_documents=True # Trim large documents in traces
|
|
326
|
+
trace_trim_documents=True # Trim large documents in traces
|
|
326
327
|
)
|
|
327
328
|
async def main_flow(
|
|
328
329
|
project_name: str,
|
|
@@ -335,6 +336,68 @@ async def main_flow(
|
|
|
335
336
|
return DocumentList(results)
|
|
336
337
|
```
|
|
337
338
|
|
|
339
|
+
### Local Trace Debugging
|
|
340
|
+
|
|
341
|
+
Save all trace spans to the local filesystem for LLM-assisted debugging:
|
|
342
|
+
|
|
343
|
+
```bash
|
|
344
|
+
export TRACE_DEBUG_PATH=/path/to/debug/output
|
|
345
|
+
```
|
|
346
|
+
|
|
347
|
+
This creates a hierarchical directory structure that mirrors the execution flow with automatic deduplication:
|
|
348
|
+
|
|
349
|
+
```
|
|
350
|
+
20260128_152932_abc12345_my_flow/
|
|
351
|
+
├── _trace.yaml # Trace metadata
|
|
352
|
+
├── _index.yaml # Span ID → path mapping
|
|
353
|
+
├── _summary.md # Unified summary for human inspection and LLM debugging
|
|
354
|
+
├── artifacts/ # Deduplicated content storage
|
|
355
|
+
│ └── sha256/
|
|
356
|
+
│ └── ab/cd/ # Sharded by hash prefix
|
|
357
|
+
│ └── abcdef...1234.txt # Large content (>10KB)
|
|
358
|
+
└── 0001_my_flow/ # Root span (numbered for execution order)
|
|
359
|
+
├── _span.yaml # Span metadata (timing, status, I/O refs)
|
|
360
|
+
├── input.yaml # Structured inputs (inline or refs)
|
|
361
|
+
├── output.yaml # Structured outputs (inline or refs)
|
|
362
|
+
├── 0002_task_1/ # Child spans nested inside parent
|
|
363
|
+
│ ├── _span.yaml
|
|
364
|
+
│ ├── input.yaml
|
|
365
|
+
│ ├── output.yaml
|
|
366
|
+
│ └── 0003_llm_call/
|
|
367
|
+
│ ├── _span.yaml
|
|
368
|
+
│ ├── input.yaml # LLM messages with inline/external content
|
|
369
|
+
│ └── output.yaml
|
|
370
|
+
└── 0004_task_2/
|
|
371
|
+
└── ...
|
|
372
|
+
```
|
|
373
|
+
|
|
374
|
+
**Key Features:**
|
|
375
|
+
- **Automatic Deduplication**: Identical content (e.g., system prompts) stored once in `artifacts/`
|
|
376
|
+
- **Smart Externalization**: Large content (>10KB) externalized with 2KB inline previews
|
|
377
|
+
- **AI-Friendly**: Files capped at 50KB for easy LLM processing
|
|
378
|
+
- **Lossless**: Full content reconstruction via `content_ref` pointers
|
|
379
|
+
|
|
380
|
+
Example `input.yaml` with externalization:
|
|
381
|
+
```yaml
|
|
382
|
+
format_version: 3
|
|
383
|
+
type: llm_messages
|
|
384
|
+
messages:
|
|
385
|
+
- role: system
|
|
386
|
+
parts:
|
|
387
|
+
- type: text
|
|
388
|
+
size_bytes: 28500
|
|
389
|
+
content_ref: # Large content → artifact
|
|
390
|
+
hash: sha256:a1b2c3d4...
|
|
391
|
+
path: artifacts/sha256/a1/b2/a1b2c3d4...txt
|
|
392
|
+
excerpt: "You are a helpful assistant...\n[TRUNCATED]"
|
|
393
|
+
- role: user
|
|
394
|
+
parts:
|
|
395
|
+
- type: text
|
|
396
|
+
content: "Hello!" # Small content stays inline
|
|
397
|
+
```
|
|
398
|
+
|
|
399
|
+
Run `tree` on the output directory to visualize the entire execution hierarchy. Feed `_summary.md` to an LLM for debugging assistance - it combines high-level overview with detailed navigation for comprehensive trace analysis.
|
|
400
|
+
|
|
338
401
|
## Configuration
|
|
339
402
|
|
|
340
403
|
### Environment Variables
|
|
@@ -348,6 +411,9 @@ OPENAI_API_KEY=your-api-key
|
|
|
348
411
|
LMNR_PROJECT_API_KEY=your-lmnr-key
|
|
349
412
|
LMNR_DEBUG=true # Enable debug traces
|
|
350
413
|
|
|
414
|
+
# Optional: Local Trace Debugging
|
|
415
|
+
TRACE_DEBUG_PATH=/path/to/trace/output # Save traces locally for LLM-assisted debugging
|
|
416
|
+
|
|
351
417
|
# Optional: Orchestration
|
|
352
418
|
PREFECT_API_URL=http://localhost:4200/api
|
|
353
419
|
PREFECT_API_KEY=your-prefect-key
|
|
@@ -458,18 +524,21 @@ For AI assistants:
|
|
|
458
524
|
```
|
|
459
525
|
ai-pipeline-core/
|
|
460
526
|
├── ai_pipeline_core/
|
|
461
|
-
│ ├──
|
|
462
|
-
│ ├──
|
|
463
|
-
│ ├──
|
|
464
|
-
│ ├──
|
|
465
|
-
│ ├──
|
|
466
|
-
│ ├──
|
|
527
|
+
│ ├── deployment/ # Pipeline deployment and execution
|
|
528
|
+
│ ├── documents/ # Document abstraction system
|
|
529
|
+
│ ├── flow/ # Flow configuration and options
|
|
530
|
+
│ ├── llm/ # LLM client and response handling
|
|
531
|
+
│ ├── logging/ # Logging infrastructure
|
|
532
|
+
│ ├── prompt_builder/ # Document-aware prompt construction
|
|
533
|
+
│ ├── pipeline.py # Pipeline decorators
|
|
534
|
+
│ ├── progress.py # Intra-flow progress tracking
|
|
467
535
|
│ ├── prompt_manager.py # Jinja2 template management
|
|
468
|
-
│
|
|
469
|
-
|
|
470
|
-
├──
|
|
471
|
-
├──
|
|
472
|
-
|
|
536
|
+
│ ├── settings.py # Configuration management
|
|
537
|
+
│ └── tracing.py # Distributed tracing
|
|
538
|
+
├── tests/ # Comprehensive test suite
|
|
539
|
+
├── examples/ # Usage examples
|
|
540
|
+
├── API.md # Complete API reference
|
|
541
|
+
└── pyproject.toml # Project configuration
|
|
473
542
|
```
|
|
474
543
|
|
|
475
544
|
## Contributing
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
ai_pipeline_core/__init__.py,sha256=2jzEQktQJp-A3bzDU-A7c9xntnY3x9I-1XwYcojYjYE,6452
|
|
2
|
+
ai_pipeline_core/exceptions.py,sha256=vx-XLTw2fJSPs-vwtXVYtqoQUcOc0JeI7UmHqRqQYWU,1569
|
|
3
|
+
ai_pipeline_core/pipeline.py,sha256=t9qH-V6umpKY5MhGuXFgUGfdzGyxzVlS0n9RoKLfnug,28704
|
|
4
|
+
ai_pipeline_core/prefect.py,sha256=91ZgLJHsDsRUW77CpNmkKxYs3RCJuucPM3pjKmNBeDg,2199
|
|
5
|
+
ai_pipeline_core/progress.py,sha256=Ppxk4OOm84Y0x3t-Y3CmHsL4PovQLNUxXMu24zRCD-Q,3621
|
|
6
|
+
ai_pipeline_core/prompt_manager.py,sha256=FAtb1yK7bGuAeuIJ523LOX9bd7TrcHG-TqZ7Lz4RJC0,12087
|
|
7
|
+
ai_pipeline_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
+
ai_pipeline_core/settings.py,sha256=IMrFaX0i-WIlaOA5O53ipNSta6KQVSFHc1aJXmS3nSo,5078
|
|
9
|
+
ai_pipeline_core/tracing.py,sha256=HJ_DJhCEk6W_u3skecjETMQVLyOmbuPcqcOuoMIJlPs,33194
|
|
10
|
+
ai_pipeline_core/debug/__init__.py,sha256=wOc9KotFqGYzBEtZUZ7ATfJf3dXWarYm6PXs6yW9uwE,756
|
|
11
|
+
ai_pipeline_core/debug/config.py,sha256=l5WC2xbd6PgC-CcuioZg696iva_MkqyZj4C9TFdwfMs,3205
|
|
12
|
+
ai_pipeline_core/debug/content.py,sha256=REtA1cJnOJy3OqaGud59B3Bug8cOJszm8w1GCqdAKJs,26696
|
|
13
|
+
ai_pipeline_core/debug/processor.py,sha256=Cvm1HKc6lKRm80Xx7WXi_Z8pWoKH6actVZvntP9Mons,3935
|
|
14
|
+
ai_pipeline_core/debug/summary.py,sha256=pzXC7QoFOBeen_XZ-AMFAVvaOtDuf28YB-WwCbsHYdQ,8017
|
|
15
|
+
ai_pipeline_core/debug/writer.py,sha256=IF5eyML10EBFBqCGqlVwcWKraFvTgfqbU8WJ_XG_RU4,33108
|
|
16
|
+
ai_pipeline_core/deployment/__init__.py,sha256=FN2HVoM80x2GJuNs7o4DnccB8HWWibgM1pJesB942CM,1259
|
|
17
|
+
ai_pipeline_core/deployment/base.py,sha256=JYf8XLFR73c0H24dr6atK7yUcoE0vLxbYZ8EkQpEwN4,24791
|
|
18
|
+
ai_pipeline_core/deployment/contract.py,sha256=0DKt5eqNE-grcITwMNq9CuBdo5WxdopEjDeQFzFZxhU,2225
|
|
19
|
+
ai_pipeline_core/deployment/helpers.py,sha256=3nRuCyABkUEDZiL0q9u19XHpjA4527B6rsxQNOGTohw,3460
|
|
20
|
+
ai_pipeline_core/documents/__init__.py,sha256=WHStvGZiSyybOcMTYxSV24U6MA3Am_0_Az5p-DuMFrk,738
|
|
21
|
+
ai_pipeline_core/documents/document.py,sha256=hdTh36KGEcrDollTnQmTI66DJIqYfe4X42Y0q7Cm4fY,68153
|
|
22
|
+
ai_pipeline_core/documents/document_list.py,sha256=Y_NCjfM_CjkIwHRD2iyGgYBuIykN8lT2IIH_uWOiGis,16254
|
|
23
|
+
ai_pipeline_core/documents/flow_document.py,sha256=QK6RxNQu449IRAosOHSk3G_5yIq5I7yLBOSQPCd3m64,4141
|
|
24
|
+
ai_pipeline_core/documents/mime_type.py,sha256=JFEOq4HwlIW2snobyNfWwySdT7urZSWkobiRMVs2fSE,7959
|
|
25
|
+
ai_pipeline_core/documents/task_document.py,sha256=uASmAaxNkYtuqQrBM57vutFT9DXNTbqv0wbwwF55E3I,4300
|
|
26
|
+
ai_pipeline_core/documents/temporary_document.py,sha256=jaz2ZHC5CmSbVbkXdI7pOB5DGEuhH16C0Yutv-lS_UI,2708
|
|
27
|
+
ai_pipeline_core/documents/utils.py,sha256=ZyJNjFN7ihWno0K7dJZed7twYmmPLA0z40UzFw1A3A8,5465
|
|
28
|
+
ai_pipeline_core/flow/__init__.py,sha256=2BfWYMOPYW5teGzwo-qzpn_bom1lxxry0bPsjVgcsCk,188
|
|
29
|
+
ai_pipeline_core/flow/config.py,sha256=a9FALpgrFsdz-D7HU3diVeUzbaBvLwI8hsPviuj001s,19389
|
|
30
|
+
ai_pipeline_core/flow/options.py,sha256=s5GBTy5lwFa1irf8BKrWO8NMZ5s_f4tqq7Wg9WQ7TTg,2302
|
|
31
|
+
ai_pipeline_core/images/__init__.py,sha256=6R6Ncif6oRyVOH7LsdwNvEuMGHuljo-_gImY8C3Z_ow,9877
|
|
32
|
+
ai_pipeline_core/images/_processing.py,sha256=wKSBAFe5TO-mo64ll20nmN9huazHwvVWFfNJB6g7u2Q,4421
|
|
33
|
+
ai_pipeline_core/llm/__init__.py,sha256=3B_vtEzxrzidP1qOUNQ4RxlUmxZ2MBKQcUhQiTybM9g,661
|
|
34
|
+
ai_pipeline_core/llm/ai_messages.py,sha256=XR2fwzguuh7v-HQ9LEJX_xwNX1D_-6f4T7E8_iNVTS4,15680
|
|
35
|
+
ai_pipeline_core/llm/client.py,sha256=777Zf5BBRA-6g1I4Og8mitpoCPdYMp66WE52wH-1I-o,24910
|
|
36
|
+
ai_pipeline_core/llm/model_options.py,sha256=uRNIHfVeh2sgt1mZBiOUx6hPQ6GKjB8b7TytZJ6afKg,11768
|
|
37
|
+
ai_pipeline_core/llm/model_response.py,sha256=zEANsfuSAYVRKPwKx9gFIqHbdVG_1_JNMRHNoE43_YM,13503
|
|
38
|
+
ai_pipeline_core/llm/model_types.py,sha256=wx-m0up7_NncTmSYmMsL-l-RgydjjJ905u7RMEAg7tI,2710
|
|
39
|
+
ai_pipeline_core/logging/__init__.py,sha256=Nz6-ghAoENsgNmLD2ma9TW9M0U2_QfxuQ5DDW6Vt6M0,651
|
|
40
|
+
ai_pipeline_core/logging/logging.yml,sha256=YTW48keO_K5bkkb-KXGM7ZuaYKiquLsjsURei8Ql0V4,1353
|
|
41
|
+
ai_pipeline_core/logging/logging_config.py,sha256=pV2x6GgMPXrzPH27sicCSXfw56beio4C2JKCJ3NsXrg,6207
|
|
42
|
+
ai_pipeline_core/logging/logging_mixin.py,sha256=OpdR3ASiM2ZwKZYGjZRJFUloGWUCv2Grnr8RqUWlYn8,8094
|
|
43
|
+
ai_pipeline_core/prompt_builder/__init__.py,sha256=-v0SKZlir07xRzxXwv75VP66aINRUiKH0VUgB-PCDmI,195
|
|
44
|
+
ai_pipeline_core/prompt_builder/documents_prompt.jinja2,sha256=LPql5AaFhFWtDfhnBWvi-bWbz5vdgsWqKGzcqxWfLIM,1075
|
|
45
|
+
ai_pipeline_core/prompt_builder/global_cache.py,sha256=9_9zoF6-sr3KBMxF5QLD3vxqXg9B2tT8o9ViplzUCNg,2811
|
|
46
|
+
ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2,sha256=M8uPpwf-uLpsWWJT9DY_DnjrLToGPVnrD-gVhQrQdaQ,229
|
|
47
|
+
ai_pipeline_core/prompt_builder/prompt_builder.py,sha256=4TrDRPiOMFwEfi6QGfriTHfjzj_CtbEjAcgQrVfRqhw,9378
|
|
48
|
+
ai_pipeline_core/prompt_builder/system_prompt.jinja2,sha256=-1jLcfvAG07Zfl-dnYrjfVcAG4PWeeoeWpaKJGY3rKQ,3945
|
|
49
|
+
ai_pipeline_core/storage/__init__.py,sha256=tcIkjJ3zPBLCyetwiJDewBvS2sbRJrDlBh3gEsQm08E,184
|
|
50
|
+
ai_pipeline_core/storage/storage.py,sha256=ClMr419Y-eU2RuOjZYd51dC0stWQk28Vb56PvQaoUwc,20007
|
|
51
|
+
ai_pipeline_core/utils/__init__.py,sha256=TJSmEm1Quf-gKwXrxM96u2IGzVolUyeNNfLMPoLstXI,254
|
|
52
|
+
ai_pipeline_core/utils/deploy.py,sha256=N3i7B97DQJs1lwgYN3sa1UgwCNjseKXfjs50ZJUMCEI,22106
|
|
53
|
+
ai_pipeline_core/utils/remote_deployment.py,sha256=U7MNJ1SU1mg3RrJyLqpuN_4pwqm8LSsFZbypJvjGPoo,4630
|
|
54
|
+
ai_pipeline_core-0.3.3.dist-info/METADATA,sha256=WCRBGS2kO8916jlGc6jY_YuPwzw8diXfeNSrNFKxuvk,17893
|
|
55
|
+
ai_pipeline_core-0.3.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
56
|
+
ai_pipeline_core-0.3.3.dist-info/licenses/LICENSE,sha256=kKj8mfbdWwkyG3U6n7ztB3bAZlEwShTkAsvaY657i3I,1074
|
|
57
|
+
ai_pipeline_core-0.3.3.dist-info/RECORD,,
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
"""Simple pipeline execution for local development.
|
|
2
|
-
|
|
3
|
-
Utilities for running AI pipelines locally without full Prefect orchestration.
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
from .cli import run_cli
|
|
7
|
-
from .simple_runner import FlowSequence, run_pipeline, run_pipelines
|
|
8
|
-
|
|
9
|
-
__all__ = [
|
|
10
|
-
"run_cli",
|
|
11
|
-
"run_pipeline",
|
|
12
|
-
"run_pipelines",
|
|
13
|
-
"FlowSequence",
|
|
14
|
-
]
|
|
@@ -1,254 +0,0 @@
|
|
|
1
|
-
"""Command-line interface for simple pipeline execution."""
|
|
2
|
-
|
|
3
|
-
import asyncio
|
|
4
|
-
import os
|
|
5
|
-
import sys
|
|
6
|
-
from contextlib import ExitStack
|
|
7
|
-
from pathlib import Path
|
|
8
|
-
from typing import Callable, Type, TypeVar, cast
|
|
9
|
-
|
|
10
|
-
from lmnr import Laminar
|
|
11
|
-
from pydantic import ValidationError
|
|
12
|
-
from pydantic_settings import CliPositionalArg, SettingsConfigDict
|
|
13
|
-
|
|
14
|
-
from ai_pipeline_core.documents import DocumentList
|
|
15
|
-
from ai_pipeline_core.flow.options import FlowOptions
|
|
16
|
-
from ai_pipeline_core.logging import get_pipeline_logger, setup_logging
|
|
17
|
-
from ai_pipeline_core.prefect import disable_run_logger, prefect_test_harness
|
|
18
|
-
from ai_pipeline_core.settings import settings
|
|
19
|
-
|
|
20
|
-
from .simple_runner import FlowSequence, run_pipelines
|
|
21
|
-
|
|
22
|
-
logger = get_pipeline_logger(__name__)
|
|
23
|
-
|
|
24
|
-
TOptions = TypeVar("TOptions", bound=FlowOptions)
|
|
25
|
-
"""Type variable for FlowOptions subclasses used in CLI."""
|
|
26
|
-
|
|
27
|
-
InitializerFunc = Callable[[FlowOptions], tuple[str, DocumentList]] | None
|
|
28
|
-
"""Function type for custom pipeline initialization.
|
|
29
|
-
|
|
30
|
-
Initializers can create initial documents or setup project state
|
|
31
|
-
before flow execution begins.
|
|
32
|
-
|
|
33
|
-
Args:
|
|
34
|
-
FlowOptions: Parsed CLI options
|
|
35
|
-
|
|
36
|
-
Returns:
|
|
37
|
-
Tuple of (project_name, initial_documents) or None
|
|
38
|
-
"""
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
def _initialize_environment() -> None:
|
|
42
|
-
"""Initialize logging and observability systems.
|
|
43
|
-
|
|
44
|
-
Sets up the pipeline logging configuration and attempts to
|
|
45
|
-
initialize LMNR (Laminar) for distributed tracing. Failures
|
|
46
|
-
in LMNR initialization are logged but don't stop execution.
|
|
47
|
-
|
|
48
|
-
Side effects:
|
|
49
|
-
- Configures Python logging system
|
|
50
|
-
- Initializes Laminar SDK if API key is available
|
|
51
|
-
- Logs initialization status
|
|
52
|
-
|
|
53
|
-
Note:
|
|
54
|
-
Called automatically by run_cli before parsing arguments.
|
|
55
|
-
"""
|
|
56
|
-
setup_logging()
|
|
57
|
-
try:
|
|
58
|
-
Laminar.initialize()
|
|
59
|
-
logger.info("LMNR tracing initialized.")
|
|
60
|
-
except Exception as e:
|
|
61
|
-
logger.warning(f"Failed to initialize LMNR tracing: {e}")
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
def _running_under_pytest() -> bool:
|
|
65
|
-
"""Check if code is running under pytest.
|
|
66
|
-
|
|
67
|
-
Detects pytest execution context to determine whether test
|
|
68
|
-
fixtures will provide necessary contexts (like Prefect test
|
|
69
|
-
harness). This prevents duplicate context setup.
|
|
70
|
-
|
|
71
|
-
Returns:
|
|
72
|
-
True if running under pytest, False otherwise.
|
|
73
|
-
|
|
74
|
-
Detection methods:
|
|
75
|
-
- PYTEST_CURRENT_TEST environment variable (set by pytest)
|
|
76
|
-
- 'pytest' module in sys.modules (imported by test runner)
|
|
77
|
-
|
|
78
|
-
Note:
|
|
79
|
-
Used to avoid setting up test harness when pytest fixtures
|
|
80
|
-
already provide it.
|
|
81
|
-
"""
|
|
82
|
-
return "PYTEST_CURRENT_TEST" in os.environ or "pytest" in sys.modules
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
def run_cli(
|
|
86
|
-
*,
|
|
87
|
-
flows: FlowSequence,
|
|
88
|
-
options_cls: Type[TOptions],
|
|
89
|
-
initializer: InitializerFunc = None,
|
|
90
|
-
trace_name: str | None = None,
|
|
91
|
-
) -> None:
|
|
92
|
-
"""Execute pipeline flows from command-line arguments.
|
|
93
|
-
|
|
94
|
-
Environment setup:
|
|
95
|
-
- Initializes logging system
|
|
96
|
-
- Sets up LMNR tracing (if API key configured)
|
|
97
|
-
- Creates Prefect test harness (if no API key and not in pytest)
|
|
98
|
-
- Manages context stack for proper cleanup
|
|
99
|
-
|
|
100
|
-
Raises:
|
|
101
|
-
ValueError: If project name is empty after initialization.
|
|
102
|
-
|
|
103
|
-
Example:
|
|
104
|
-
>>> # In __main__.py
|
|
105
|
-
>>> from ai_pipeline_core import simple_runner
|
|
106
|
-
>>> from .flows import AnalysisFlow, SummaryFlow
|
|
107
|
-
>>> from .config import AnalysisOptions
|
|
108
|
-
>>>
|
|
109
|
-
>>> if __name__ == "__main__":
|
|
110
|
-
... simple_runner.run_cli(
|
|
111
|
-
... flows=[AnalysisFlow, SummaryFlow],
|
|
112
|
-
... options_cls=AnalysisOptions,
|
|
113
|
-
... trace_name="document-analysis"
|
|
114
|
-
... )
|
|
115
|
-
|
|
116
|
-
Command line:
|
|
117
|
-
$ python -m my_module ./output --temperature 0.5 --model gpt-5
|
|
118
|
-
$ python -m my_module ./output --start 2 # Skip first flow
|
|
119
|
-
|
|
120
|
-
Note:
|
|
121
|
-
- Field names are converted to kebab-case for CLI (max_tokens → --max-tokens)
|
|
122
|
-
- Boolean fields become flags (--verbose/--no-verbose)
|
|
123
|
-
- Field descriptions from Pydantic become help text
|
|
124
|
-
- Type hints are enforced during parsing
|
|
125
|
-
- Validation errors show helpful messages with field names
|
|
126
|
-
- Includes hints for common error types (numbers, ranges)
|
|
127
|
-
- Exits with status 1 on error
|
|
128
|
-
- Shows --help when no arguments provided
|
|
129
|
-
"""
|
|
130
|
-
# Check if no arguments provided before initialization
|
|
131
|
-
if len(sys.argv) == 1:
|
|
132
|
-
# Add --help to show usage when run without arguments
|
|
133
|
-
sys.argv.append("--help")
|
|
134
|
-
|
|
135
|
-
_initialize_environment()
|
|
136
|
-
|
|
137
|
-
class _RunnerOptions( # type: ignore[reportRedeclaration]
|
|
138
|
-
options_cls,
|
|
139
|
-
cli_parse_args=True,
|
|
140
|
-
cli_kebab_case=True,
|
|
141
|
-
cli_exit_on_error=True, # Let it exit normally on error
|
|
142
|
-
cli_prog_name="ai-pipeline",
|
|
143
|
-
cli_use_class_docs_for_groups=True,
|
|
144
|
-
):
|
|
145
|
-
"""Internal options class combining user options with CLI arguments.
|
|
146
|
-
|
|
147
|
-
Dynamically created class that inherits from user's options_cls
|
|
148
|
-
and adds standard CLI arguments for pipeline execution.
|
|
149
|
-
"""
|
|
150
|
-
|
|
151
|
-
working_directory: CliPositionalArg[Path]
|
|
152
|
-
project_name: str | None = None
|
|
153
|
-
start: int = 1
|
|
154
|
-
end: int | None = None
|
|
155
|
-
|
|
156
|
-
model_config = SettingsConfigDict(frozen=True, extra="ignore")
|
|
157
|
-
|
|
158
|
-
try:
|
|
159
|
-
opts = cast(FlowOptions, _RunnerOptions()) # type: ignore[reportCallIssue]
|
|
160
|
-
except ValidationError as e:
|
|
161
|
-
print("\nError: Invalid command line arguments\n", file=sys.stderr)
|
|
162
|
-
for error in e.errors():
|
|
163
|
-
field = " -> ".join(str(loc) for loc in error["loc"])
|
|
164
|
-
msg = error["msg"]
|
|
165
|
-
value = error.get("input", "")
|
|
166
|
-
|
|
167
|
-
# Format the field name nicely (convert from snake_case to kebab-case for CLI)
|
|
168
|
-
cli_field = field.replace("_", "-")
|
|
169
|
-
|
|
170
|
-
print(f" --{cli_field}: {msg}", file=sys.stderr)
|
|
171
|
-
if value:
|
|
172
|
-
print(f" Provided value: '{value}'", file=sys.stderr)
|
|
173
|
-
|
|
174
|
-
# Add helpful hints for common errors
|
|
175
|
-
if error["type"] == "float_parsing":
|
|
176
|
-
print(" Hint: Please provide a valid number (e.g., 0.7)", file=sys.stderr)
|
|
177
|
-
elif error["type"] == "int_parsing":
|
|
178
|
-
print(" Hint: Please provide a valid integer (e.g., 10)", file=sys.stderr)
|
|
179
|
-
elif error["type"] == "literal_error":
|
|
180
|
-
ctx = error.get("ctx", {})
|
|
181
|
-
expected = ctx.get("expected", "valid options")
|
|
182
|
-
print(f" Hint: Valid options are: {expected}", file=sys.stderr)
|
|
183
|
-
elif error["type"] in [
|
|
184
|
-
"less_than_equal",
|
|
185
|
-
"greater_than_equal",
|
|
186
|
-
"less_than",
|
|
187
|
-
"greater_than",
|
|
188
|
-
]:
|
|
189
|
-
ctx = error.get("ctx", {})
|
|
190
|
-
if "le" in ctx:
|
|
191
|
-
print(f" Hint: Value must be ≤ {ctx['le']}", file=sys.stderr)
|
|
192
|
-
elif "ge" in ctx:
|
|
193
|
-
print(f" Hint: Value must be ≥ {ctx['ge']}", file=sys.stderr)
|
|
194
|
-
elif "lt" in ctx:
|
|
195
|
-
print(f" Hint: Value must be < {ctx['lt']}", file=sys.stderr)
|
|
196
|
-
elif "gt" in ctx:
|
|
197
|
-
print(f" Hint: Value must be > {ctx['gt']}", file=sys.stderr)
|
|
198
|
-
|
|
199
|
-
print("\nRun with --help to see all available options\n", file=sys.stderr)
|
|
200
|
-
sys.exit(1)
|
|
201
|
-
|
|
202
|
-
wd: Path = cast(Path, getattr(opts, "working_directory"))
|
|
203
|
-
wd.mkdir(parents=True, exist_ok=True)
|
|
204
|
-
|
|
205
|
-
# Get project name from options or use directory basename
|
|
206
|
-
project_name = getattr(opts, "project_name", None)
|
|
207
|
-
if not project_name: # None or empty string
|
|
208
|
-
project_name = wd.name
|
|
209
|
-
|
|
210
|
-
# Ensure project_name is not empty
|
|
211
|
-
if not project_name:
|
|
212
|
-
raise ValueError("Project name cannot be empty")
|
|
213
|
-
|
|
214
|
-
# Use initializer if provided, otherwise use defaults
|
|
215
|
-
initial_documents = DocumentList([])
|
|
216
|
-
if initializer:
|
|
217
|
-
init_result = initializer(opts)
|
|
218
|
-
# Always expect tuple format from initializer
|
|
219
|
-
_, initial_documents = init_result # Ignore project name from initializer
|
|
220
|
-
|
|
221
|
-
# Save initial documents if starting from first step
|
|
222
|
-
if getattr(opts, "start", 1) == 1 and initial_documents and flows:
|
|
223
|
-
# Get config from the first flow
|
|
224
|
-
first_flow_config = getattr(flows[0], "config", None)
|
|
225
|
-
if first_flow_config:
|
|
226
|
-
asyncio.run(
|
|
227
|
-
first_flow_config.save_documents(
|
|
228
|
-
str(wd), initial_documents, validate_output_type=False
|
|
229
|
-
)
|
|
230
|
-
)
|
|
231
|
-
|
|
232
|
-
# Setup context stack with optional test harness and tracing
|
|
233
|
-
with ExitStack() as stack:
|
|
234
|
-
if trace_name:
|
|
235
|
-
stack.enter_context(
|
|
236
|
-
Laminar.start_as_current_span(
|
|
237
|
-
name=f"{trace_name}-{project_name}", input=[opts.model_dump_json()]
|
|
238
|
-
)
|
|
239
|
-
)
|
|
240
|
-
|
|
241
|
-
if not settings.prefect_api_key and not _running_under_pytest():
|
|
242
|
-
stack.enter_context(prefect_test_harness())
|
|
243
|
-
stack.enter_context(disable_run_logger())
|
|
244
|
-
|
|
245
|
-
asyncio.run(
|
|
246
|
-
run_pipelines(
|
|
247
|
-
project_name=project_name,
|
|
248
|
-
output_dir=wd,
|
|
249
|
-
flows=flows,
|
|
250
|
-
flow_options=opts,
|
|
251
|
-
start_step=getattr(opts, "start", 1),
|
|
252
|
-
end_step=getattr(opts, "end", None),
|
|
253
|
-
)
|
|
254
|
-
)
|