ai-pipeline-core 0.2.9__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. ai_pipeline_core/__init__.py +32 -5
  2. ai_pipeline_core/debug/__init__.py +26 -0
  3. ai_pipeline_core/debug/config.py +91 -0
  4. ai_pipeline_core/debug/content.py +705 -0
  5. ai_pipeline_core/debug/processor.py +99 -0
  6. ai_pipeline_core/debug/summary.py +236 -0
  7. ai_pipeline_core/debug/writer.py +913 -0
  8. ai_pipeline_core/deployment/__init__.py +46 -0
  9. ai_pipeline_core/deployment/base.py +681 -0
  10. ai_pipeline_core/deployment/contract.py +84 -0
  11. ai_pipeline_core/deployment/helpers.py +98 -0
  12. ai_pipeline_core/documents/flow_document.py +1 -1
  13. ai_pipeline_core/documents/task_document.py +1 -1
  14. ai_pipeline_core/documents/temporary_document.py +1 -1
  15. ai_pipeline_core/flow/config.py +13 -2
  16. ai_pipeline_core/flow/options.py +4 -4
  17. ai_pipeline_core/images/__init__.py +362 -0
  18. ai_pipeline_core/images/_processing.py +157 -0
  19. ai_pipeline_core/llm/ai_messages.py +25 -4
  20. ai_pipeline_core/llm/client.py +15 -19
  21. ai_pipeline_core/llm/model_response.py +5 -5
  22. ai_pipeline_core/llm/model_types.py +10 -13
  23. ai_pipeline_core/logging/logging_mixin.py +2 -2
  24. ai_pipeline_core/pipeline.py +1 -1
  25. ai_pipeline_core/progress.py +127 -0
  26. ai_pipeline_core/prompt_builder/__init__.py +5 -0
  27. ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +23 -0
  28. ai_pipeline_core/prompt_builder/global_cache.py +78 -0
  29. ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +6 -0
  30. ai_pipeline_core/prompt_builder/prompt_builder.py +253 -0
  31. ai_pipeline_core/prompt_builder/system_prompt.jinja2 +41 -0
  32. ai_pipeline_core/tracing.py +54 -2
  33. ai_pipeline_core/utils/deploy.py +214 -6
  34. ai_pipeline_core/utils/remote_deployment.py +37 -187
  35. {ai_pipeline_core-0.2.9.dist-info → ai_pipeline_core-0.3.3.dist-info}/METADATA +96 -27
  36. ai_pipeline_core-0.3.3.dist-info/RECORD +57 -0
  37. {ai_pipeline_core-0.2.9.dist-info → ai_pipeline_core-0.3.3.dist-info}/WHEEL +1 -1
  38. ai_pipeline_core/simple_runner/__init__.py +0 -14
  39. ai_pipeline_core/simple_runner/cli.py +0 -254
  40. ai_pipeline_core/simple_runner/simple_runner.py +0 -247
  41. ai_pipeline_core-0.2.9.dist-info/RECORD +0 -41
  42. {ai_pipeline_core-0.2.9.dist-info → ai_pipeline_core-0.3.3.dist-info}/licenses/LICENSE +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-pipeline-core
3
- Version: 0.2.9
3
+ Version: 0.3.3
4
4
  Summary: Core utilities for AI-powered processing pipelines using prefect
5
5
  Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
6
6
  Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
@@ -22,6 +22,7 @@ Requires-Dist: httpx>=0.28.1
22
22
  Requires-Dist: jinja2>=3.1.6
23
23
  Requires-Dist: lmnr>=0.7.18
24
24
  Requires-Dist: openai>=1.109.1
25
+ Requires-Dist: pillow>=10.0.0
25
26
  Requires-Dist: prefect-gcp[cloud-storage]>=0.6.10
26
27
  Requires-Dist: prefect>=3.4.21
27
28
  Requires-Dist: pydantic-settings>=2.10.1
@@ -63,7 +64,7 @@ AI Pipeline Core is a production-ready framework that combines document processi
63
64
  - **Structured Output**: Type-safe generation with Pydantic model validation
64
65
  - **Workflow Orchestration**: Prefect-based flows and tasks with automatic retries
65
66
  - **Observability**: Built-in distributed tracing via Laminar (LMNR) with cost tracking for debugging and monitoring
66
- - **Local Development**: Simple runner for testing pipelines without infrastructure
67
+ - **Deployment**: Unified pipeline execution for local, CLI, and production environments
67
68
 
68
69
  ## Installation
69
70
 
@@ -124,7 +125,7 @@ async def analyze_flow(
124
125
  for doc in documents:
125
126
  # Use AIMessages for LLM interaction
126
127
  response = await llm.generate(
127
- model="gpt-5",
128
+ model="gpt-5.1",
128
129
  messages=AIMessages([doc])
129
130
  )
130
131
 
@@ -151,7 +152,7 @@ class Analysis(BaseModel):
151
152
 
152
153
  # Generate structured output
153
154
  response = await llm.generate_structured(
154
- model="gpt-5",
155
+ model="gpt-5.1",
155
156
  response_format=Analysis,
156
157
  messages="Analyze this product review: ..."
157
158
  )
@@ -177,7 +178,7 @@ doc = MyDocument.create(
177
178
  # Parse back to original type
178
179
  data = doc.parse(dict) # Returns {"key": "value"}
179
180
 
180
- # Document provenance tracking (new in v0.1.14)
181
+ # Document provenance tracking
181
182
  doc_with_sources = MyDocument.create(
182
183
  name="derived.json",
183
184
  content={"result": "processed"},
@@ -224,15 +225,15 @@ if doc.is_text:
224
225
  # Parse structured data
225
226
  data = doc.as_json() # or as_yaml(), as_pydantic_model()
226
227
 
227
- # Convert between document types (new in v0.2.1)
228
+ # Convert between document types
228
229
  task_doc = flow_doc.model_convert(TaskDocument) # Convert FlowDocument to TaskDocument
229
230
  new_doc = doc.model_convert(OtherDocType, content={"new": "data"}) # With content update
230
231
 
231
- # Enhanced filtering (new in v0.1.14)
232
+ # Enhanced filtering
232
233
  filtered = documents.filter_by([Doc1, Doc2, Doc3]) # Multiple types
233
234
  named = documents.filter_by(["file1.txt", "file2.txt"]) # Multiple names
234
235
 
235
- # Immutable collections (new in v0.2.1)
236
+ # Immutable collections
236
237
  frozen_docs = DocumentList(docs, frozen=True) # Immutable document list
237
238
  frozen_msgs = AIMessages(messages, frozen=True) # Immutable message list
238
239
  ```
@@ -246,7 +247,7 @@ from ai_pipeline_core import llm, AIMessages, ModelOptions
246
247
 
247
248
  # Simple generation
248
249
  response = await llm.generate(
249
- model="gpt-5",
250
+ model="gpt-5.1",
250
251
  messages="Explain quantum computing"
251
252
  )
252
253
  print(response.content)
@@ -256,21 +257,21 @@ static_context = AIMessages([large_document])
256
257
 
257
258
  # First call: caches context
258
259
  r1 = await llm.generate(
259
- model="gpt-5",
260
+ model="gpt-5.1",
260
261
  context=static_context, # Cached for 120 seconds by default
261
262
  messages="Summarize" # Dynamic query
262
263
  )
263
264
 
264
265
  # Second call: reuses cache
265
266
  r2 = await llm.generate(
266
- model="gpt-5",
267
+ model="gpt-5.1",
267
268
  context=static_context, # Reused from cache!
268
269
  messages="Key points?" # Different query
269
270
  )
270
271
 
271
- # Custom cache TTL (new in v0.1.14)
272
+ # Custom cache TTL
272
273
  response = await llm.generate(
273
- model="gpt-5",
274
+ model="gpt-5.1",
274
275
  context=static_context,
275
276
  messages="Analyze",
276
277
  options=ModelOptions(cache_ttl="300s") # Cache for 5 minutes
@@ -278,7 +279,7 @@ response = await llm.generate(
278
279
 
279
280
  # Disable caching for dynamic contexts
280
281
  response = await llm.generate(
281
- model="gpt-5",
282
+ model="gpt-5.1",
282
283
  context=dynamic_context,
283
284
  messages="Process",
284
285
  options=ModelOptions(cache_ttl=None) # No caching
@@ -317,12 +318,12 @@ from ai_pipeline_core import pipeline_flow, pipeline_task, set_trace_cost
317
318
  @pipeline_task # Automatic retry, tracing, and monitoring
318
319
  async def process_chunk(data: str) -> str:
319
320
  result = await transform(data)
320
- set_trace_cost(0.05) # Track costs (new in v0.1.14)
321
+ set_trace_cost(0.05) # Track costs
321
322
  return result
322
323
 
323
324
  @pipeline_flow(
324
325
  config=MyFlowConfig,
325
- trace_trim_documents=True # Trim large documents in traces (new in v0.2.1)
326
+ trace_trim_documents=True # Trim large documents in traces
326
327
  )
327
328
  async def main_flow(
328
329
  project_name: str,
@@ -335,6 +336,68 @@ async def main_flow(
335
336
  return DocumentList(results)
336
337
  ```
337
338
 
339
+ ### Local Trace Debugging
340
+
341
+ Save all trace spans to the local filesystem for LLM-assisted debugging:
342
+
343
+ ```bash
344
+ export TRACE_DEBUG_PATH=/path/to/debug/output
345
+ ```
346
+
347
+ This creates a hierarchical directory structure that mirrors the execution flow with automatic deduplication:
348
+
349
+ ```
350
+ 20260128_152932_abc12345_my_flow/
351
+ ├── _trace.yaml # Trace metadata
352
+ ├── _index.yaml # Span ID → path mapping
353
+ ├── _summary.md # Unified summary for human inspection and LLM debugging
354
+ ├── artifacts/ # Deduplicated content storage
355
+ │ └── sha256/
356
+ │ └── ab/cd/ # Sharded by hash prefix
357
+ │ └── abcdef...1234.txt # Large content (>10KB)
358
+ └── 0001_my_flow/ # Root span (numbered for execution order)
359
+ ├── _span.yaml # Span metadata (timing, status, I/O refs)
360
+ ├── input.yaml # Structured inputs (inline or refs)
361
+ ├── output.yaml # Structured outputs (inline or refs)
362
+ ├── 0002_task_1/ # Child spans nested inside parent
363
+ │ ├── _span.yaml
364
+ │ ├── input.yaml
365
+ │ ├── output.yaml
366
+ │ └── 0003_llm_call/
367
+ │ ├── _span.yaml
368
+ │ ├── input.yaml # LLM messages with inline/external content
369
+ │ └── output.yaml
370
+ └── 0004_task_2/
371
+ └── ...
372
+ ```
373
+
374
+ **Key Features:**
375
+ - **Automatic Deduplication**: Identical content (e.g., system prompts) stored once in `artifacts/`
376
+ - **Smart Externalization**: Large content (>10KB) externalized with 2KB inline previews
377
+ - **AI-Friendly**: Files capped at 50KB for easy LLM processing
378
+ - **Lossless**: Full content reconstruction via `content_ref` pointers
379
+
380
+ Example `input.yaml` with externalization:
381
+ ```yaml
382
+ format_version: 3
383
+ type: llm_messages
384
+ messages:
385
+ - role: system
386
+ parts:
387
+ - type: text
388
+ size_bytes: 28500
389
+ content_ref: # Large content → artifact
390
+ hash: sha256:a1b2c3d4...
391
+ path: artifacts/sha256/a1/b2/a1b2c3d4...txt
392
+ excerpt: "You are a helpful assistant...\n[TRUNCATED]"
393
+ - role: user
394
+ parts:
395
+ - type: text
396
+ content: "Hello!" # Small content stays inline
397
+ ```
398
+
399
+ Run `tree` on the output directory to visualize the entire execution hierarchy. Feed `_summary.md` to an LLM for debugging assistance - it combines high-level overview with detailed navigation for comprehensive trace analysis.
400
+
338
401
  ## Configuration
339
402
 
340
403
  ### Environment Variables
@@ -348,6 +411,9 @@ OPENAI_API_KEY=your-api-key
348
411
  LMNR_PROJECT_API_KEY=your-lmnr-key
349
412
  LMNR_DEBUG=true # Enable debug traces
350
413
 
414
+ # Optional: Local Trace Debugging
415
+ TRACE_DEBUG_PATH=/path/to/trace/output # Save traces locally for LLM-assisted debugging
416
+
351
417
  # Optional: Orchestration
352
418
  PREFECT_API_URL=http://localhost:4200/api
353
419
  PREFECT_API_KEY=your-prefect-key
@@ -458,18 +524,21 @@ For AI assistants:
458
524
  ```
459
525
  ai-pipeline-core/
460
526
  ├── ai_pipeline_core/
461
- │ ├── documents/ # Document abstraction system
462
- │ ├── flow/ # Flow configuration and options
463
- │ ├── llm/ # LLM client and response handling
464
- │ ├── logging/ # Logging infrastructure
465
- │ ├── tracing.py # Distributed tracing
466
- │ ├── pipeline.py # Pipeline decorators
527
+ │ ├── deployment/ # Pipeline deployment and execution
528
+ │ ├── documents/ # Document abstraction system
529
+ │ ├── flow/ # Flow configuration and options
530
+ │ ├── llm/ # LLM client and response handling
531
+ │ ├── logging/ # Logging infrastructure
532
+ │ ├── prompt_builder/ # Document-aware prompt construction
533
+ │ ├── pipeline.py # Pipeline decorators
534
+ │ ├── progress.py # Intra-flow progress tracking
467
535
  │ ├── prompt_manager.py # Jinja2 template management
468
- └── settings.py # Configuration management
469
- ├── tests/ # Comprehensive test suite
470
- ├── examples/ # Usage examples
471
- ├── API.md # Complete API reference
472
- └── pyproject.toml # Project configuration
536
+ ├── settings.py # Configuration management
537
+ │ └── tracing.py # Distributed tracing
538
+ ├── tests/ # Comprehensive test suite
539
+ ├── examples/ # Usage examples
540
+ ├── API.md # Complete API reference
541
+ └── pyproject.toml # Project configuration
473
542
  ```
474
543
 
475
544
  ## Contributing
@@ -0,0 +1,57 @@
1
+ ai_pipeline_core/__init__.py,sha256=2jzEQktQJp-A3bzDU-A7c9xntnY3x9I-1XwYcojYjYE,6452
2
+ ai_pipeline_core/exceptions.py,sha256=vx-XLTw2fJSPs-vwtXVYtqoQUcOc0JeI7UmHqRqQYWU,1569
3
+ ai_pipeline_core/pipeline.py,sha256=t9qH-V6umpKY5MhGuXFgUGfdzGyxzVlS0n9RoKLfnug,28704
4
+ ai_pipeline_core/prefect.py,sha256=91ZgLJHsDsRUW77CpNmkKxYs3RCJuucPM3pjKmNBeDg,2199
5
+ ai_pipeline_core/progress.py,sha256=Ppxk4OOm84Y0x3t-Y3CmHsL4PovQLNUxXMu24zRCD-Q,3621
6
+ ai_pipeline_core/prompt_manager.py,sha256=FAtb1yK7bGuAeuIJ523LOX9bd7TrcHG-TqZ7Lz4RJC0,12087
7
+ ai_pipeline_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ ai_pipeline_core/settings.py,sha256=IMrFaX0i-WIlaOA5O53ipNSta6KQVSFHc1aJXmS3nSo,5078
9
+ ai_pipeline_core/tracing.py,sha256=HJ_DJhCEk6W_u3skecjETMQVLyOmbuPcqcOuoMIJlPs,33194
10
+ ai_pipeline_core/debug/__init__.py,sha256=wOc9KotFqGYzBEtZUZ7ATfJf3dXWarYm6PXs6yW9uwE,756
11
+ ai_pipeline_core/debug/config.py,sha256=l5WC2xbd6PgC-CcuioZg696iva_MkqyZj4C9TFdwfMs,3205
12
+ ai_pipeline_core/debug/content.py,sha256=REtA1cJnOJy3OqaGud59B3Bug8cOJszm8w1GCqdAKJs,26696
13
+ ai_pipeline_core/debug/processor.py,sha256=Cvm1HKc6lKRm80Xx7WXi_Z8pWoKH6actVZvntP9Mons,3935
14
+ ai_pipeline_core/debug/summary.py,sha256=pzXC7QoFOBeen_XZ-AMFAVvaOtDuf28YB-WwCbsHYdQ,8017
15
+ ai_pipeline_core/debug/writer.py,sha256=IF5eyML10EBFBqCGqlVwcWKraFvTgfqbU8WJ_XG_RU4,33108
16
+ ai_pipeline_core/deployment/__init__.py,sha256=FN2HVoM80x2GJuNs7o4DnccB8HWWibgM1pJesB942CM,1259
17
+ ai_pipeline_core/deployment/base.py,sha256=JYf8XLFR73c0H24dr6atK7yUcoE0vLxbYZ8EkQpEwN4,24791
18
+ ai_pipeline_core/deployment/contract.py,sha256=0DKt5eqNE-grcITwMNq9CuBdo5WxdopEjDeQFzFZxhU,2225
19
+ ai_pipeline_core/deployment/helpers.py,sha256=3nRuCyABkUEDZiL0q9u19XHpjA4527B6rsxQNOGTohw,3460
20
+ ai_pipeline_core/documents/__init__.py,sha256=WHStvGZiSyybOcMTYxSV24U6MA3Am_0_Az5p-DuMFrk,738
21
+ ai_pipeline_core/documents/document.py,sha256=hdTh36KGEcrDollTnQmTI66DJIqYfe4X42Y0q7Cm4fY,68153
22
+ ai_pipeline_core/documents/document_list.py,sha256=Y_NCjfM_CjkIwHRD2iyGgYBuIykN8lT2IIH_uWOiGis,16254
23
+ ai_pipeline_core/documents/flow_document.py,sha256=QK6RxNQu449IRAosOHSk3G_5yIq5I7yLBOSQPCd3m64,4141
24
+ ai_pipeline_core/documents/mime_type.py,sha256=JFEOq4HwlIW2snobyNfWwySdT7urZSWkobiRMVs2fSE,7959
25
+ ai_pipeline_core/documents/task_document.py,sha256=uASmAaxNkYtuqQrBM57vutFT9DXNTbqv0wbwwF55E3I,4300
26
+ ai_pipeline_core/documents/temporary_document.py,sha256=jaz2ZHC5CmSbVbkXdI7pOB5DGEuhH16C0Yutv-lS_UI,2708
27
+ ai_pipeline_core/documents/utils.py,sha256=ZyJNjFN7ihWno0K7dJZed7twYmmPLA0z40UzFw1A3A8,5465
28
+ ai_pipeline_core/flow/__init__.py,sha256=2BfWYMOPYW5teGzwo-qzpn_bom1lxxry0bPsjVgcsCk,188
29
+ ai_pipeline_core/flow/config.py,sha256=a9FALpgrFsdz-D7HU3diVeUzbaBvLwI8hsPviuj001s,19389
30
+ ai_pipeline_core/flow/options.py,sha256=s5GBTy5lwFa1irf8BKrWO8NMZ5s_f4tqq7Wg9WQ7TTg,2302
31
+ ai_pipeline_core/images/__init__.py,sha256=6R6Ncif6oRyVOH7LsdwNvEuMGHuljo-_gImY8C3Z_ow,9877
32
+ ai_pipeline_core/images/_processing.py,sha256=wKSBAFe5TO-mo64ll20nmN9huazHwvVWFfNJB6g7u2Q,4421
33
+ ai_pipeline_core/llm/__init__.py,sha256=3B_vtEzxrzidP1qOUNQ4RxlUmxZ2MBKQcUhQiTybM9g,661
34
+ ai_pipeline_core/llm/ai_messages.py,sha256=XR2fwzguuh7v-HQ9LEJX_xwNX1D_-6f4T7E8_iNVTS4,15680
35
+ ai_pipeline_core/llm/client.py,sha256=777Zf5BBRA-6g1I4Og8mitpoCPdYMp66WE52wH-1I-o,24910
36
+ ai_pipeline_core/llm/model_options.py,sha256=uRNIHfVeh2sgt1mZBiOUx6hPQ6GKjB8b7TytZJ6afKg,11768
37
+ ai_pipeline_core/llm/model_response.py,sha256=zEANsfuSAYVRKPwKx9gFIqHbdVG_1_JNMRHNoE43_YM,13503
38
+ ai_pipeline_core/llm/model_types.py,sha256=wx-m0up7_NncTmSYmMsL-l-RgydjjJ905u7RMEAg7tI,2710
39
+ ai_pipeline_core/logging/__init__.py,sha256=Nz6-ghAoENsgNmLD2ma9TW9M0U2_QfxuQ5DDW6Vt6M0,651
40
+ ai_pipeline_core/logging/logging.yml,sha256=YTW48keO_K5bkkb-KXGM7ZuaYKiquLsjsURei8Ql0V4,1353
41
+ ai_pipeline_core/logging/logging_config.py,sha256=pV2x6GgMPXrzPH27sicCSXfw56beio4C2JKCJ3NsXrg,6207
42
+ ai_pipeline_core/logging/logging_mixin.py,sha256=OpdR3ASiM2ZwKZYGjZRJFUloGWUCv2Grnr8RqUWlYn8,8094
43
+ ai_pipeline_core/prompt_builder/__init__.py,sha256=-v0SKZlir07xRzxXwv75VP66aINRUiKH0VUgB-PCDmI,195
44
+ ai_pipeline_core/prompt_builder/documents_prompt.jinja2,sha256=LPql5AaFhFWtDfhnBWvi-bWbz5vdgsWqKGzcqxWfLIM,1075
45
+ ai_pipeline_core/prompt_builder/global_cache.py,sha256=9_9zoF6-sr3KBMxF5QLD3vxqXg9B2tT8o9ViplzUCNg,2811
46
+ ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2,sha256=M8uPpwf-uLpsWWJT9DY_DnjrLToGPVnrD-gVhQrQdaQ,229
47
+ ai_pipeline_core/prompt_builder/prompt_builder.py,sha256=4TrDRPiOMFwEfi6QGfriTHfjzj_CtbEjAcgQrVfRqhw,9378
48
+ ai_pipeline_core/prompt_builder/system_prompt.jinja2,sha256=-1jLcfvAG07Zfl-dnYrjfVcAG4PWeeoeWpaKJGY3rKQ,3945
49
+ ai_pipeline_core/storage/__init__.py,sha256=tcIkjJ3zPBLCyetwiJDewBvS2sbRJrDlBh3gEsQm08E,184
50
+ ai_pipeline_core/storage/storage.py,sha256=ClMr419Y-eU2RuOjZYd51dC0stWQk28Vb56PvQaoUwc,20007
51
+ ai_pipeline_core/utils/__init__.py,sha256=TJSmEm1Quf-gKwXrxM96u2IGzVolUyeNNfLMPoLstXI,254
52
+ ai_pipeline_core/utils/deploy.py,sha256=N3i7B97DQJs1lwgYN3sa1UgwCNjseKXfjs50ZJUMCEI,22106
53
+ ai_pipeline_core/utils/remote_deployment.py,sha256=U7MNJ1SU1mg3RrJyLqpuN_4pwqm8LSsFZbypJvjGPoo,4630
54
+ ai_pipeline_core-0.3.3.dist-info/METADATA,sha256=WCRBGS2kO8916jlGc6jY_YuPwzw8diXfeNSrNFKxuvk,17893
55
+ ai_pipeline_core-0.3.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
56
+ ai_pipeline_core-0.3.3.dist-info/licenses/LICENSE,sha256=kKj8mfbdWwkyG3U6n7ztB3bAZlEwShTkAsvaY657i3I,1074
57
+ ai_pipeline_core-0.3.3.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.27.0
2
+ Generator: hatchling 1.28.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,14 +0,0 @@
1
- """Simple pipeline execution for local development.
2
-
3
- Utilities for running AI pipelines locally without full Prefect orchestration.
4
- """
5
-
6
- from .cli import run_cli
7
- from .simple_runner import FlowSequence, run_pipeline, run_pipelines
8
-
9
- __all__ = [
10
- "run_cli",
11
- "run_pipeline",
12
- "run_pipelines",
13
- "FlowSequence",
14
- ]
@@ -1,254 +0,0 @@
1
- """Command-line interface for simple pipeline execution."""
2
-
3
- import asyncio
4
- import os
5
- import sys
6
- from contextlib import ExitStack
7
- from pathlib import Path
8
- from typing import Callable, Type, TypeVar, cast
9
-
10
- from lmnr import Laminar
11
- from pydantic import ValidationError
12
- from pydantic_settings import CliPositionalArg, SettingsConfigDict
13
-
14
- from ai_pipeline_core.documents import DocumentList
15
- from ai_pipeline_core.flow.options import FlowOptions
16
- from ai_pipeline_core.logging import get_pipeline_logger, setup_logging
17
- from ai_pipeline_core.prefect import disable_run_logger, prefect_test_harness
18
- from ai_pipeline_core.settings import settings
19
-
20
- from .simple_runner import FlowSequence, run_pipelines
21
-
22
- logger = get_pipeline_logger(__name__)
23
-
24
- TOptions = TypeVar("TOptions", bound=FlowOptions)
25
- """Type variable for FlowOptions subclasses used in CLI."""
26
-
27
- InitializerFunc = Callable[[FlowOptions], tuple[str, DocumentList]] | None
28
- """Function type for custom pipeline initialization.
29
-
30
- Initializers can create initial documents or setup project state
31
- before flow execution begins.
32
-
33
- Args:
34
- FlowOptions: Parsed CLI options
35
-
36
- Returns:
37
- Tuple of (project_name, initial_documents) or None
38
- """
39
-
40
-
41
- def _initialize_environment() -> None:
42
- """Initialize logging and observability systems.
43
-
44
- Sets up the pipeline logging configuration and attempts to
45
- initialize LMNR (Laminar) for distributed tracing. Failures
46
- in LMNR initialization are logged but don't stop execution.
47
-
48
- Side effects:
49
- - Configures Python logging system
50
- - Initializes Laminar SDK if API key is available
51
- - Logs initialization status
52
-
53
- Note:
54
- Called automatically by run_cli before parsing arguments.
55
- """
56
- setup_logging()
57
- try:
58
- Laminar.initialize()
59
- logger.info("LMNR tracing initialized.")
60
- except Exception as e:
61
- logger.warning(f"Failed to initialize LMNR tracing: {e}")
62
-
63
-
64
- def _running_under_pytest() -> bool:
65
- """Check if code is running under pytest.
66
-
67
- Detects pytest execution context to determine whether test
68
- fixtures will provide necessary contexts (like Prefect test
69
- harness). This prevents duplicate context setup.
70
-
71
- Returns:
72
- True if running under pytest, False otherwise.
73
-
74
- Detection methods:
75
- - PYTEST_CURRENT_TEST environment variable (set by pytest)
76
- - 'pytest' module in sys.modules (imported by test runner)
77
-
78
- Note:
79
- Used to avoid setting up test harness when pytest fixtures
80
- already provide it.
81
- """
82
- return "PYTEST_CURRENT_TEST" in os.environ or "pytest" in sys.modules
83
-
84
-
85
- def run_cli(
86
- *,
87
- flows: FlowSequence,
88
- options_cls: Type[TOptions],
89
- initializer: InitializerFunc = None,
90
- trace_name: str | None = None,
91
- ) -> None:
92
- """Execute pipeline flows from command-line arguments.
93
-
94
- Environment setup:
95
- - Initializes logging system
96
- - Sets up LMNR tracing (if API key configured)
97
- - Creates Prefect test harness (if no API key and not in pytest)
98
- - Manages context stack for proper cleanup
99
-
100
- Raises:
101
- ValueError: If project name is empty after initialization.
102
-
103
- Example:
104
- >>> # In __main__.py
105
- >>> from ai_pipeline_core import simple_runner
106
- >>> from .flows import AnalysisFlow, SummaryFlow
107
- >>> from .config import AnalysisOptions
108
- >>>
109
- >>> if __name__ == "__main__":
110
- ... simple_runner.run_cli(
111
- ... flows=[AnalysisFlow, SummaryFlow],
112
- ... options_cls=AnalysisOptions,
113
- ... trace_name="document-analysis"
114
- ... )
115
-
116
- Command line:
117
- $ python -m my_module ./output --temperature 0.5 --model gpt-5
118
- $ python -m my_module ./output --start 2 # Skip first flow
119
-
120
- Note:
121
- - Field names are converted to kebab-case for CLI (max_tokens → --max-tokens)
122
- - Boolean fields become flags (--verbose/--no-verbose)
123
- - Field descriptions from Pydantic become help text
124
- - Type hints are enforced during parsing
125
- - Validation errors show helpful messages with field names
126
- - Includes hints for common error types (numbers, ranges)
127
- - Exits with status 1 on error
128
- - Shows --help when no arguments provided
129
- """
130
- # Check if no arguments provided before initialization
131
- if len(sys.argv) == 1:
132
- # Add --help to show usage when run without arguments
133
- sys.argv.append("--help")
134
-
135
- _initialize_environment()
136
-
137
- class _RunnerOptions( # type: ignore[reportRedeclaration]
138
- options_cls,
139
- cli_parse_args=True,
140
- cli_kebab_case=True,
141
- cli_exit_on_error=True, # Let it exit normally on error
142
- cli_prog_name="ai-pipeline",
143
- cli_use_class_docs_for_groups=True,
144
- ):
145
- """Internal options class combining user options with CLI arguments.
146
-
147
- Dynamically created class that inherits from user's options_cls
148
- and adds standard CLI arguments for pipeline execution.
149
- """
150
-
151
- working_directory: CliPositionalArg[Path]
152
- project_name: str | None = None
153
- start: int = 1
154
- end: int | None = None
155
-
156
- model_config = SettingsConfigDict(frozen=True, extra="ignore")
157
-
158
- try:
159
- opts = cast(FlowOptions, _RunnerOptions()) # type: ignore[reportCallIssue]
160
- except ValidationError as e:
161
- print("\nError: Invalid command line arguments\n", file=sys.stderr)
162
- for error in e.errors():
163
- field = " -> ".join(str(loc) for loc in error["loc"])
164
- msg = error["msg"]
165
- value = error.get("input", "")
166
-
167
- # Format the field name nicely (convert from snake_case to kebab-case for CLI)
168
- cli_field = field.replace("_", "-")
169
-
170
- print(f" --{cli_field}: {msg}", file=sys.stderr)
171
- if value:
172
- print(f" Provided value: '{value}'", file=sys.stderr)
173
-
174
- # Add helpful hints for common errors
175
- if error["type"] == "float_parsing":
176
- print(" Hint: Please provide a valid number (e.g., 0.7)", file=sys.stderr)
177
- elif error["type"] == "int_parsing":
178
- print(" Hint: Please provide a valid integer (e.g., 10)", file=sys.stderr)
179
- elif error["type"] == "literal_error":
180
- ctx = error.get("ctx", {})
181
- expected = ctx.get("expected", "valid options")
182
- print(f" Hint: Valid options are: {expected}", file=sys.stderr)
183
- elif error["type"] in [
184
- "less_than_equal",
185
- "greater_than_equal",
186
- "less_than",
187
- "greater_than",
188
- ]:
189
- ctx = error.get("ctx", {})
190
- if "le" in ctx:
191
- print(f" Hint: Value must be ≤ {ctx['le']}", file=sys.stderr)
192
- elif "ge" in ctx:
193
- print(f" Hint: Value must be ≥ {ctx['ge']}", file=sys.stderr)
194
- elif "lt" in ctx:
195
- print(f" Hint: Value must be < {ctx['lt']}", file=sys.stderr)
196
- elif "gt" in ctx:
197
- print(f" Hint: Value must be > {ctx['gt']}", file=sys.stderr)
198
-
199
- print("\nRun with --help to see all available options\n", file=sys.stderr)
200
- sys.exit(1)
201
-
202
- wd: Path = cast(Path, getattr(opts, "working_directory"))
203
- wd.mkdir(parents=True, exist_ok=True)
204
-
205
- # Get project name from options or use directory basename
206
- project_name = getattr(opts, "project_name", None)
207
- if not project_name: # None or empty string
208
- project_name = wd.name
209
-
210
- # Ensure project_name is not empty
211
- if not project_name:
212
- raise ValueError("Project name cannot be empty")
213
-
214
- # Use initializer if provided, otherwise use defaults
215
- initial_documents = DocumentList([])
216
- if initializer:
217
- init_result = initializer(opts)
218
- # Always expect tuple format from initializer
219
- _, initial_documents = init_result # Ignore project name from initializer
220
-
221
- # Save initial documents if starting from first step
222
- if getattr(opts, "start", 1) == 1 and initial_documents and flows:
223
- # Get config from the first flow
224
- first_flow_config = getattr(flows[0], "config", None)
225
- if first_flow_config:
226
- asyncio.run(
227
- first_flow_config.save_documents(
228
- str(wd), initial_documents, validate_output_type=False
229
- )
230
- )
231
-
232
- # Setup context stack with optional test harness and tracing
233
- with ExitStack() as stack:
234
- if trace_name:
235
- stack.enter_context(
236
- Laminar.start_as_current_span(
237
- name=f"{trace_name}-{project_name}", input=[opts.model_dump_json()]
238
- )
239
- )
240
-
241
- if not settings.prefect_api_key and not _running_under_pytest():
242
- stack.enter_context(prefect_test_harness())
243
- stack.enter_context(disable_run_logger())
244
-
245
- asyncio.run(
246
- run_pipelines(
247
- project_name=project_name,
248
- output_dir=wd,
249
- flows=flows,
250
- flow_options=opts,
251
- start_step=getattr(opts, "start", 1),
252
- end_step=getattr(opts, "end", None),
253
- )
254
- )