ai-pipeline-core 0.2.9__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +14 -4
- ai_pipeline_core/deployment/__init__.py +46 -0
- ai_pipeline_core/deployment/base.py +681 -0
- ai_pipeline_core/deployment/contract.py +84 -0
- ai_pipeline_core/deployment/helpers.py +98 -0
- ai_pipeline_core/documents/flow_document.py +1 -1
- ai_pipeline_core/documents/task_document.py +1 -1
- ai_pipeline_core/documents/temporary_document.py +1 -1
- ai_pipeline_core/flow/config.py +13 -2
- ai_pipeline_core/flow/options.py +1 -1
- ai_pipeline_core/llm/client.py +1 -3
- ai_pipeline_core/llm/model_types.py +0 -1
- ai_pipeline_core/pipeline.py +1 -1
- ai_pipeline_core/progress.py +127 -0
- ai_pipeline_core/prompt_builder/__init__.py +5 -0
- ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +23 -0
- ai_pipeline_core/prompt_builder/global_cache.py +78 -0
- ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +6 -0
- ai_pipeline_core/prompt_builder/prompt_builder.py +253 -0
- ai_pipeline_core/prompt_builder/system_prompt.jinja2 +41 -0
- ai_pipeline_core/tracing.py +1 -1
- ai_pipeline_core/utils/remote_deployment.py +37 -187
- {ai_pipeline_core-0.2.9.dist-info → ai_pipeline_core-0.3.0.dist-info}/METADATA +23 -20
- ai_pipeline_core-0.3.0.dist-info/RECORD +49 -0
- {ai_pipeline_core-0.2.9.dist-info → ai_pipeline_core-0.3.0.dist-info}/WHEEL +1 -1
- ai_pipeline_core/simple_runner/__init__.py +0 -14
- ai_pipeline_core/simple_runner/cli.py +0 -254
- ai_pipeline_core/simple_runner/simple_runner.py +0 -247
- ai_pipeline_core-0.2.9.dist-info/RECORD +0 -41
- {ai_pipeline_core-0.2.9.dist-info → ai_pipeline_core-0.3.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ai-pipeline-core
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Core utilities for AI-powered processing pipelines using prefect
|
|
5
5
|
Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
|
|
6
6
|
Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
|
|
@@ -63,7 +63,7 @@ AI Pipeline Core is a production-ready framework that combines document processi
|
|
|
63
63
|
- **Structured Output**: Type-safe generation with Pydantic model validation
|
|
64
64
|
- **Workflow Orchestration**: Prefect-based flows and tasks with automatic retries
|
|
65
65
|
- **Observability**: Built-in distributed tracing via Laminar (LMNR) with cost tracking for debugging and monitoring
|
|
66
|
-
- **
|
|
66
|
+
- **Deployment**: Unified pipeline execution for local, CLI, and production environments
|
|
67
67
|
|
|
68
68
|
## Installation
|
|
69
69
|
|
|
@@ -177,7 +177,7 @@ doc = MyDocument.create(
|
|
|
177
177
|
# Parse back to original type
|
|
178
178
|
data = doc.parse(dict) # Returns {"key": "value"}
|
|
179
179
|
|
|
180
|
-
# Document provenance tracking
|
|
180
|
+
# Document provenance tracking
|
|
181
181
|
doc_with_sources = MyDocument.create(
|
|
182
182
|
name="derived.json",
|
|
183
183
|
content={"result": "processed"},
|
|
@@ -224,15 +224,15 @@ if doc.is_text:
|
|
|
224
224
|
# Parse structured data
|
|
225
225
|
data = doc.as_json() # or as_yaml(), as_pydantic_model()
|
|
226
226
|
|
|
227
|
-
# Convert between document types
|
|
227
|
+
# Convert between document types
|
|
228
228
|
task_doc = flow_doc.model_convert(TaskDocument) # Convert FlowDocument to TaskDocument
|
|
229
229
|
new_doc = doc.model_convert(OtherDocType, content={"new": "data"}) # With content update
|
|
230
230
|
|
|
231
|
-
# Enhanced filtering
|
|
231
|
+
# Enhanced filtering
|
|
232
232
|
filtered = documents.filter_by([Doc1, Doc2, Doc3]) # Multiple types
|
|
233
233
|
named = documents.filter_by(["file1.txt", "file2.txt"]) # Multiple names
|
|
234
234
|
|
|
235
|
-
# Immutable collections
|
|
235
|
+
# Immutable collections
|
|
236
236
|
frozen_docs = DocumentList(docs, frozen=True) # Immutable document list
|
|
237
237
|
frozen_msgs = AIMessages(messages, frozen=True) # Immutable message list
|
|
238
238
|
```
|
|
@@ -268,7 +268,7 @@ r2 = await llm.generate(
|
|
|
268
268
|
messages="Key points?" # Different query
|
|
269
269
|
)
|
|
270
270
|
|
|
271
|
-
# Custom cache TTL
|
|
271
|
+
# Custom cache TTL
|
|
272
272
|
response = await llm.generate(
|
|
273
273
|
model="gpt-5",
|
|
274
274
|
context=static_context,
|
|
@@ -317,12 +317,12 @@ from ai_pipeline_core import pipeline_flow, pipeline_task, set_trace_cost
|
|
|
317
317
|
@pipeline_task # Automatic retry, tracing, and monitoring
|
|
318
318
|
async def process_chunk(data: str) -> str:
|
|
319
319
|
result = await transform(data)
|
|
320
|
-
set_trace_cost(0.05) # Track costs
|
|
320
|
+
set_trace_cost(0.05) # Track costs
|
|
321
321
|
return result
|
|
322
322
|
|
|
323
323
|
@pipeline_flow(
|
|
324
324
|
config=MyFlowConfig,
|
|
325
|
-
trace_trim_documents=True # Trim large documents in traces
|
|
325
|
+
trace_trim_documents=True # Trim large documents in traces
|
|
326
326
|
)
|
|
327
327
|
async def main_flow(
|
|
328
328
|
project_name: str,
|
|
@@ -458,18 +458,21 @@ For AI assistants:
|
|
|
458
458
|
```
|
|
459
459
|
ai-pipeline-core/
|
|
460
460
|
├── ai_pipeline_core/
|
|
461
|
-
│ ├──
|
|
462
|
-
│ ├──
|
|
463
|
-
│ ├──
|
|
464
|
-
│ ├──
|
|
465
|
-
│ ├──
|
|
466
|
-
│ ├──
|
|
461
|
+
│ ├── deployment/ # Pipeline deployment and execution
|
|
462
|
+
│ ├── documents/ # Document abstraction system
|
|
463
|
+
│ ├── flow/ # Flow configuration and options
|
|
464
|
+
│ ├── llm/ # LLM client and response handling
|
|
465
|
+
│ ├── logging/ # Logging infrastructure
|
|
466
|
+
│ ├── prompt_builder/ # Document-aware prompt construction
|
|
467
|
+
│ ├── pipeline.py # Pipeline decorators
|
|
468
|
+
│ ├── progress.py # Intra-flow progress tracking
|
|
467
469
|
│ ├── prompt_manager.py # Jinja2 template management
|
|
468
|
-
│
|
|
469
|
-
|
|
470
|
-
├──
|
|
471
|
-
├──
|
|
472
|
-
|
|
470
|
+
│ ├── settings.py # Configuration management
|
|
471
|
+
│ └── tracing.py # Distributed tracing
|
|
472
|
+
├── tests/ # Comprehensive test suite
|
|
473
|
+
├── examples/ # Usage examples
|
|
474
|
+
├── API.md # Complete API reference
|
|
475
|
+
└── pyproject.toml # Project configuration
|
|
473
476
|
```
|
|
474
477
|
|
|
475
478
|
## Contributing
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
ai_pipeline_core/__init__.py,sha256=q8sas8GxIyZf4h0RPqzv06ppo8hy0gl8-GjDEVh71XQ,6087
|
|
2
|
+
ai_pipeline_core/exceptions.py,sha256=vx-XLTw2fJSPs-vwtXVYtqoQUcOc0JeI7UmHqRqQYWU,1569
|
|
3
|
+
ai_pipeline_core/pipeline.py,sha256=t9qH-V6umpKY5MhGuXFgUGfdzGyxzVlS0n9RoKLfnug,28704
|
|
4
|
+
ai_pipeline_core/prefect.py,sha256=91ZgLJHsDsRUW77CpNmkKxYs3RCJuucPM3pjKmNBeDg,2199
|
|
5
|
+
ai_pipeline_core/progress.py,sha256=Ppxk4OOm84Y0x3t-Y3CmHsL4PovQLNUxXMu24zRCD-Q,3621
|
|
6
|
+
ai_pipeline_core/prompt_manager.py,sha256=FAtb1yK7bGuAeuIJ523LOX9bd7TrcHG-TqZ7Lz4RJC0,12087
|
|
7
|
+
ai_pipeline_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
+
ai_pipeline_core/settings.py,sha256=IMrFaX0i-WIlaOA5O53ipNSta6KQVSFHc1aJXmS3nSo,5078
|
|
9
|
+
ai_pipeline_core/tracing.py,sha256=YksAxjSJ7PgmrEQ5ZxfpEACZfD9G6KuV7b0LoGM-ogo,31538
|
|
10
|
+
ai_pipeline_core/deployment/__init__.py,sha256=FN2HVoM80x2GJuNs7o4DnccB8HWWibgM1pJesB942CM,1259
|
|
11
|
+
ai_pipeline_core/deployment/base.py,sha256=JYf8XLFR73c0H24dr6atK7yUcoE0vLxbYZ8EkQpEwN4,24791
|
|
12
|
+
ai_pipeline_core/deployment/contract.py,sha256=0DKt5eqNE-grcITwMNq9CuBdo5WxdopEjDeQFzFZxhU,2225
|
|
13
|
+
ai_pipeline_core/deployment/helpers.py,sha256=3nRuCyABkUEDZiL0q9u19XHpjA4527B6rsxQNOGTohw,3460
|
|
14
|
+
ai_pipeline_core/documents/__init__.py,sha256=WHStvGZiSyybOcMTYxSV24U6MA3Am_0_Az5p-DuMFrk,738
|
|
15
|
+
ai_pipeline_core/documents/document.py,sha256=hdTh36KGEcrDollTnQmTI66DJIqYfe4X42Y0q7Cm4fY,68153
|
|
16
|
+
ai_pipeline_core/documents/document_list.py,sha256=Y_NCjfM_CjkIwHRD2iyGgYBuIykN8lT2IIH_uWOiGis,16254
|
|
17
|
+
ai_pipeline_core/documents/flow_document.py,sha256=QK6RxNQu449IRAosOHSk3G_5yIq5I7yLBOSQPCd3m64,4141
|
|
18
|
+
ai_pipeline_core/documents/mime_type.py,sha256=JFEOq4HwlIW2snobyNfWwySdT7urZSWkobiRMVs2fSE,7959
|
|
19
|
+
ai_pipeline_core/documents/task_document.py,sha256=uASmAaxNkYtuqQrBM57vutFT9DXNTbqv0wbwwF55E3I,4300
|
|
20
|
+
ai_pipeline_core/documents/temporary_document.py,sha256=jaz2ZHC5CmSbVbkXdI7pOB5DGEuhH16C0Yutv-lS_UI,2708
|
|
21
|
+
ai_pipeline_core/documents/utils.py,sha256=ZyJNjFN7ihWno0K7dJZed7twYmmPLA0z40UzFw1A3A8,5465
|
|
22
|
+
ai_pipeline_core/flow/__init__.py,sha256=2BfWYMOPYW5teGzwo-qzpn_bom1lxxry0bPsjVgcsCk,188
|
|
23
|
+
ai_pipeline_core/flow/config.py,sha256=a9FALpgrFsdz-D7HU3diVeUzbaBvLwI8hsPviuj001s,19389
|
|
24
|
+
ai_pipeline_core/flow/options.py,sha256=mhToZ9u18WCMBEYJL1MYKzh8fH9lSsAUqQtU8tNnD18,2304
|
|
25
|
+
ai_pipeline_core/llm/__init__.py,sha256=3B_vtEzxrzidP1qOUNQ4RxlUmxZ2MBKQcUhQiTybM9g,661
|
|
26
|
+
ai_pipeline_core/llm/ai_messages.py,sha256=Onin3UPdbJQNl3WfY3-_jE5KRmF-ciXsa5K6UPOiy5s,14410
|
|
27
|
+
ai_pipeline_core/llm/client.py,sha256=4nCoJOdTtye1novQiUW3AFPjZBF_TfsD7J09sl9kbd4,24973
|
|
28
|
+
ai_pipeline_core/llm/model_options.py,sha256=uRNIHfVeh2sgt1mZBiOUx6hPQ6GKjB8b7TytZJ6afKg,11768
|
|
29
|
+
ai_pipeline_core/llm/model_response.py,sha256=-fKJcblDP_Z6NV9CGp4bm_hitb0Z0jyy0ZndCQUpRkQ,13493
|
|
30
|
+
ai_pipeline_core/llm/model_types.py,sha256=MukKpS7vWeWAfHhKDxRlQFm5jeBloT_o6amO4qUzjWo,2761
|
|
31
|
+
ai_pipeline_core/logging/__init__.py,sha256=Nz6-ghAoENsgNmLD2ma9TW9M0U2_QfxuQ5DDW6Vt6M0,651
|
|
32
|
+
ai_pipeline_core/logging/logging.yml,sha256=YTW48keO_K5bkkb-KXGM7ZuaYKiquLsjsURei8Ql0V4,1353
|
|
33
|
+
ai_pipeline_core/logging/logging_config.py,sha256=pV2x6GgMPXrzPH27sicCSXfw56beio4C2JKCJ3NsXrg,6207
|
|
34
|
+
ai_pipeline_core/logging/logging_mixin.py,sha256=OTye2pbUbG5oYZkI06TNkGCEa4y0ldePz5IAfdmNUPU,8090
|
|
35
|
+
ai_pipeline_core/prompt_builder/__init__.py,sha256=-v0SKZlir07xRzxXwv75VP66aINRUiKH0VUgB-PCDmI,195
|
|
36
|
+
ai_pipeline_core/prompt_builder/documents_prompt.jinja2,sha256=LPql5AaFhFWtDfhnBWvi-bWbz5vdgsWqKGzcqxWfLIM,1075
|
|
37
|
+
ai_pipeline_core/prompt_builder/global_cache.py,sha256=9_9zoF6-sr3KBMxF5QLD3vxqXg9B2tT8o9ViplzUCNg,2811
|
|
38
|
+
ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2,sha256=M8uPpwf-uLpsWWJT9DY_DnjrLToGPVnrD-gVhQrQdaQ,229
|
|
39
|
+
ai_pipeline_core/prompt_builder/prompt_builder.py,sha256=OAu3b8stzmFoAvPD7BDwnk8TkAxG8JDe3kAN7EhGTK0,9365
|
|
40
|
+
ai_pipeline_core/prompt_builder/system_prompt.jinja2,sha256=-1jLcfvAG07Zfl-dnYrjfVcAG4PWeeoeWpaKJGY3rKQ,3945
|
|
41
|
+
ai_pipeline_core/storage/__init__.py,sha256=tcIkjJ3zPBLCyetwiJDewBvS2sbRJrDlBh3gEsQm08E,184
|
|
42
|
+
ai_pipeline_core/storage/storage.py,sha256=ClMr419Y-eU2RuOjZYd51dC0stWQk28Vb56PvQaoUwc,20007
|
|
43
|
+
ai_pipeline_core/utils/__init__.py,sha256=TJSmEm1Quf-gKwXrxM96u2IGzVolUyeNNfLMPoLstXI,254
|
|
44
|
+
ai_pipeline_core/utils/deploy.py,sha256=rAtRuwkmGkc-fqvDMXpt08OzLrD7KTDMAmLDC9wYg7Y,13147
|
|
45
|
+
ai_pipeline_core/utils/remote_deployment.py,sha256=U7MNJ1SU1mg3RrJyLqpuN_4pwqm8LSsFZbypJvjGPoo,4630
|
|
46
|
+
ai_pipeline_core-0.3.0.dist-info/METADATA,sha256=qDOFXeCZIsQj85TBq59eadO_yNQQbHraP9ku3CE-xR0,15264
|
|
47
|
+
ai_pipeline_core-0.3.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
48
|
+
ai_pipeline_core-0.3.0.dist-info/licenses/LICENSE,sha256=kKj8mfbdWwkyG3U6n7ztB3bAZlEwShTkAsvaY657i3I,1074
|
|
49
|
+
ai_pipeline_core-0.3.0.dist-info/RECORD,,
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
"""Simple pipeline execution for local development.
|
|
2
|
-
|
|
3
|
-
Utilities for running AI pipelines locally without full Prefect orchestration.
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
from .cli import run_cli
|
|
7
|
-
from .simple_runner import FlowSequence, run_pipeline, run_pipelines
|
|
8
|
-
|
|
9
|
-
__all__ = [
|
|
10
|
-
"run_cli",
|
|
11
|
-
"run_pipeline",
|
|
12
|
-
"run_pipelines",
|
|
13
|
-
"FlowSequence",
|
|
14
|
-
]
|
|
@@ -1,254 +0,0 @@
|
|
|
1
|
-
"""Command-line interface for simple pipeline execution."""
|
|
2
|
-
|
|
3
|
-
import asyncio
|
|
4
|
-
import os
|
|
5
|
-
import sys
|
|
6
|
-
from contextlib import ExitStack
|
|
7
|
-
from pathlib import Path
|
|
8
|
-
from typing import Callable, Type, TypeVar, cast
|
|
9
|
-
|
|
10
|
-
from lmnr import Laminar
|
|
11
|
-
from pydantic import ValidationError
|
|
12
|
-
from pydantic_settings import CliPositionalArg, SettingsConfigDict
|
|
13
|
-
|
|
14
|
-
from ai_pipeline_core.documents import DocumentList
|
|
15
|
-
from ai_pipeline_core.flow.options import FlowOptions
|
|
16
|
-
from ai_pipeline_core.logging import get_pipeline_logger, setup_logging
|
|
17
|
-
from ai_pipeline_core.prefect import disable_run_logger, prefect_test_harness
|
|
18
|
-
from ai_pipeline_core.settings import settings
|
|
19
|
-
|
|
20
|
-
from .simple_runner import FlowSequence, run_pipelines
|
|
21
|
-
|
|
22
|
-
logger = get_pipeline_logger(__name__)
|
|
23
|
-
|
|
24
|
-
TOptions = TypeVar("TOptions", bound=FlowOptions)
|
|
25
|
-
"""Type variable for FlowOptions subclasses used in CLI."""
|
|
26
|
-
|
|
27
|
-
InitializerFunc = Callable[[FlowOptions], tuple[str, DocumentList]] | None
|
|
28
|
-
"""Function type for custom pipeline initialization.
|
|
29
|
-
|
|
30
|
-
Initializers can create initial documents or setup project state
|
|
31
|
-
before flow execution begins.
|
|
32
|
-
|
|
33
|
-
Args:
|
|
34
|
-
FlowOptions: Parsed CLI options
|
|
35
|
-
|
|
36
|
-
Returns:
|
|
37
|
-
Tuple of (project_name, initial_documents) or None
|
|
38
|
-
"""
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
def _initialize_environment() -> None:
|
|
42
|
-
"""Initialize logging and observability systems.
|
|
43
|
-
|
|
44
|
-
Sets up the pipeline logging configuration and attempts to
|
|
45
|
-
initialize LMNR (Laminar) for distributed tracing. Failures
|
|
46
|
-
in LMNR initialization are logged but don't stop execution.
|
|
47
|
-
|
|
48
|
-
Side effects:
|
|
49
|
-
- Configures Python logging system
|
|
50
|
-
- Initializes Laminar SDK if API key is available
|
|
51
|
-
- Logs initialization status
|
|
52
|
-
|
|
53
|
-
Note:
|
|
54
|
-
Called automatically by run_cli before parsing arguments.
|
|
55
|
-
"""
|
|
56
|
-
setup_logging()
|
|
57
|
-
try:
|
|
58
|
-
Laminar.initialize()
|
|
59
|
-
logger.info("LMNR tracing initialized.")
|
|
60
|
-
except Exception as e:
|
|
61
|
-
logger.warning(f"Failed to initialize LMNR tracing: {e}")
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
def _running_under_pytest() -> bool:
|
|
65
|
-
"""Check if code is running under pytest.
|
|
66
|
-
|
|
67
|
-
Detects pytest execution context to determine whether test
|
|
68
|
-
fixtures will provide necessary contexts (like Prefect test
|
|
69
|
-
harness). This prevents duplicate context setup.
|
|
70
|
-
|
|
71
|
-
Returns:
|
|
72
|
-
True if running under pytest, False otherwise.
|
|
73
|
-
|
|
74
|
-
Detection methods:
|
|
75
|
-
- PYTEST_CURRENT_TEST environment variable (set by pytest)
|
|
76
|
-
- 'pytest' module in sys.modules (imported by test runner)
|
|
77
|
-
|
|
78
|
-
Note:
|
|
79
|
-
Used to avoid setting up test harness when pytest fixtures
|
|
80
|
-
already provide it.
|
|
81
|
-
"""
|
|
82
|
-
return "PYTEST_CURRENT_TEST" in os.environ or "pytest" in sys.modules
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
def run_cli(
|
|
86
|
-
*,
|
|
87
|
-
flows: FlowSequence,
|
|
88
|
-
options_cls: Type[TOptions],
|
|
89
|
-
initializer: InitializerFunc = None,
|
|
90
|
-
trace_name: str | None = None,
|
|
91
|
-
) -> None:
|
|
92
|
-
"""Execute pipeline flows from command-line arguments.
|
|
93
|
-
|
|
94
|
-
Environment setup:
|
|
95
|
-
- Initializes logging system
|
|
96
|
-
- Sets up LMNR tracing (if API key configured)
|
|
97
|
-
- Creates Prefect test harness (if no API key and not in pytest)
|
|
98
|
-
- Manages context stack for proper cleanup
|
|
99
|
-
|
|
100
|
-
Raises:
|
|
101
|
-
ValueError: If project name is empty after initialization.
|
|
102
|
-
|
|
103
|
-
Example:
|
|
104
|
-
>>> # In __main__.py
|
|
105
|
-
>>> from ai_pipeline_core import simple_runner
|
|
106
|
-
>>> from .flows import AnalysisFlow, SummaryFlow
|
|
107
|
-
>>> from .config import AnalysisOptions
|
|
108
|
-
>>>
|
|
109
|
-
>>> if __name__ == "__main__":
|
|
110
|
-
... simple_runner.run_cli(
|
|
111
|
-
... flows=[AnalysisFlow, SummaryFlow],
|
|
112
|
-
... options_cls=AnalysisOptions,
|
|
113
|
-
... trace_name="document-analysis"
|
|
114
|
-
... )
|
|
115
|
-
|
|
116
|
-
Command line:
|
|
117
|
-
$ python -m my_module ./output --temperature 0.5 --model gpt-5
|
|
118
|
-
$ python -m my_module ./output --start 2 # Skip first flow
|
|
119
|
-
|
|
120
|
-
Note:
|
|
121
|
-
- Field names are converted to kebab-case for CLI (max_tokens → --max-tokens)
|
|
122
|
-
- Boolean fields become flags (--verbose/--no-verbose)
|
|
123
|
-
- Field descriptions from Pydantic become help text
|
|
124
|
-
- Type hints are enforced during parsing
|
|
125
|
-
- Validation errors show helpful messages with field names
|
|
126
|
-
- Includes hints for common error types (numbers, ranges)
|
|
127
|
-
- Exits with status 1 on error
|
|
128
|
-
- Shows --help when no arguments provided
|
|
129
|
-
"""
|
|
130
|
-
# Check if no arguments provided before initialization
|
|
131
|
-
if len(sys.argv) == 1:
|
|
132
|
-
# Add --help to show usage when run without arguments
|
|
133
|
-
sys.argv.append("--help")
|
|
134
|
-
|
|
135
|
-
_initialize_environment()
|
|
136
|
-
|
|
137
|
-
class _RunnerOptions( # type: ignore[reportRedeclaration]
|
|
138
|
-
options_cls,
|
|
139
|
-
cli_parse_args=True,
|
|
140
|
-
cli_kebab_case=True,
|
|
141
|
-
cli_exit_on_error=True, # Let it exit normally on error
|
|
142
|
-
cli_prog_name="ai-pipeline",
|
|
143
|
-
cli_use_class_docs_for_groups=True,
|
|
144
|
-
):
|
|
145
|
-
"""Internal options class combining user options with CLI arguments.
|
|
146
|
-
|
|
147
|
-
Dynamically created class that inherits from user's options_cls
|
|
148
|
-
and adds standard CLI arguments for pipeline execution.
|
|
149
|
-
"""
|
|
150
|
-
|
|
151
|
-
working_directory: CliPositionalArg[Path]
|
|
152
|
-
project_name: str | None = None
|
|
153
|
-
start: int = 1
|
|
154
|
-
end: int | None = None
|
|
155
|
-
|
|
156
|
-
model_config = SettingsConfigDict(frozen=True, extra="ignore")
|
|
157
|
-
|
|
158
|
-
try:
|
|
159
|
-
opts = cast(FlowOptions, _RunnerOptions()) # type: ignore[reportCallIssue]
|
|
160
|
-
except ValidationError as e:
|
|
161
|
-
print("\nError: Invalid command line arguments\n", file=sys.stderr)
|
|
162
|
-
for error in e.errors():
|
|
163
|
-
field = " -> ".join(str(loc) for loc in error["loc"])
|
|
164
|
-
msg = error["msg"]
|
|
165
|
-
value = error.get("input", "")
|
|
166
|
-
|
|
167
|
-
# Format the field name nicely (convert from snake_case to kebab-case for CLI)
|
|
168
|
-
cli_field = field.replace("_", "-")
|
|
169
|
-
|
|
170
|
-
print(f" --{cli_field}: {msg}", file=sys.stderr)
|
|
171
|
-
if value:
|
|
172
|
-
print(f" Provided value: '{value}'", file=sys.stderr)
|
|
173
|
-
|
|
174
|
-
# Add helpful hints for common errors
|
|
175
|
-
if error["type"] == "float_parsing":
|
|
176
|
-
print(" Hint: Please provide a valid number (e.g., 0.7)", file=sys.stderr)
|
|
177
|
-
elif error["type"] == "int_parsing":
|
|
178
|
-
print(" Hint: Please provide a valid integer (e.g., 10)", file=sys.stderr)
|
|
179
|
-
elif error["type"] == "literal_error":
|
|
180
|
-
ctx = error.get("ctx", {})
|
|
181
|
-
expected = ctx.get("expected", "valid options")
|
|
182
|
-
print(f" Hint: Valid options are: {expected}", file=sys.stderr)
|
|
183
|
-
elif error["type"] in [
|
|
184
|
-
"less_than_equal",
|
|
185
|
-
"greater_than_equal",
|
|
186
|
-
"less_than",
|
|
187
|
-
"greater_than",
|
|
188
|
-
]:
|
|
189
|
-
ctx = error.get("ctx", {})
|
|
190
|
-
if "le" in ctx:
|
|
191
|
-
print(f" Hint: Value must be ≤ {ctx['le']}", file=sys.stderr)
|
|
192
|
-
elif "ge" in ctx:
|
|
193
|
-
print(f" Hint: Value must be ≥ {ctx['ge']}", file=sys.stderr)
|
|
194
|
-
elif "lt" in ctx:
|
|
195
|
-
print(f" Hint: Value must be < {ctx['lt']}", file=sys.stderr)
|
|
196
|
-
elif "gt" in ctx:
|
|
197
|
-
print(f" Hint: Value must be > {ctx['gt']}", file=sys.stderr)
|
|
198
|
-
|
|
199
|
-
print("\nRun with --help to see all available options\n", file=sys.stderr)
|
|
200
|
-
sys.exit(1)
|
|
201
|
-
|
|
202
|
-
wd: Path = cast(Path, getattr(opts, "working_directory"))
|
|
203
|
-
wd.mkdir(parents=True, exist_ok=True)
|
|
204
|
-
|
|
205
|
-
# Get project name from options or use directory basename
|
|
206
|
-
project_name = getattr(opts, "project_name", None)
|
|
207
|
-
if not project_name: # None or empty string
|
|
208
|
-
project_name = wd.name
|
|
209
|
-
|
|
210
|
-
# Ensure project_name is not empty
|
|
211
|
-
if not project_name:
|
|
212
|
-
raise ValueError("Project name cannot be empty")
|
|
213
|
-
|
|
214
|
-
# Use initializer if provided, otherwise use defaults
|
|
215
|
-
initial_documents = DocumentList([])
|
|
216
|
-
if initializer:
|
|
217
|
-
init_result = initializer(opts)
|
|
218
|
-
# Always expect tuple format from initializer
|
|
219
|
-
_, initial_documents = init_result # Ignore project name from initializer
|
|
220
|
-
|
|
221
|
-
# Save initial documents if starting from first step
|
|
222
|
-
if getattr(opts, "start", 1) == 1 and initial_documents and flows:
|
|
223
|
-
# Get config from the first flow
|
|
224
|
-
first_flow_config = getattr(flows[0], "config", None)
|
|
225
|
-
if first_flow_config:
|
|
226
|
-
asyncio.run(
|
|
227
|
-
first_flow_config.save_documents(
|
|
228
|
-
str(wd), initial_documents, validate_output_type=False
|
|
229
|
-
)
|
|
230
|
-
)
|
|
231
|
-
|
|
232
|
-
# Setup context stack with optional test harness and tracing
|
|
233
|
-
with ExitStack() as stack:
|
|
234
|
-
if trace_name:
|
|
235
|
-
stack.enter_context(
|
|
236
|
-
Laminar.start_as_current_span(
|
|
237
|
-
name=f"{trace_name}-{project_name}", input=[opts.model_dump_json()]
|
|
238
|
-
)
|
|
239
|
-
)
|
|
240
|
-
|
|
241
|
-
if not settings.prefect_api_key and not _running_under_pytest():
|
|
242
|
-
stack.enter_context(prefect_test_harness())
|
|
243
|
-
stack.enter_context(disable_run_logger())
|
|
244
|
-
|
|
245
|
-
asyncio.run(
|
|
246
|
-
run_pipelines(
|
|
247
|
-
project_name=project_name,
|
|
248
|
-
output_dir=wd,
|
|
249
|
-
flows=flows,
|
|
250
|
-
flow_options=opts,
|
|
251
|
-
start_step=getattr(opts, "start", 1),
|
|
252
|
-
end_step=getattr(opts, "end", None),
|
|
253
|
-
)
|
|
254
|
-
)
|
|
@@ -1,247 +0,0 @@
|
|
|
1
|
-
"""Simple pipeline runner for local flow execution.
|
|
2
|
-
|
|
3
|
-
This module provides the core functionality for running AI pipeline flows
|
|
4
|
-
locally without full Prefect orchestration. It handles document I/O,
|
|
5
|
-
flow sequencing, and error management.
|
|
6
|
-
|
|
7
|
-
Key components:
|
|
8
|
-
- Document I/O from/to filesystem directories via FlowConfig
|
|
9
|
-
- Single and multi-flow execution
|
|
10
|
-
- Automatic document validation and passing between flows
|
|
11
|
-
- Step-based execution control (start/end steps)
|
|
12
|
-
|
|
13
|
-
Directory structure:
|
|
14
|
-
working_dir/
|
|
15
|
-
├── inputdocument/ # Documents of type InputDocument (lowercase)
|
|
16
|
-
│ ├── file1.txt
|
|
17
|
-
│ └── file1.txt.description.md # Optional description
|
|
18
|
-
└── outputdocument/ # Documents of type OutputDocument (lowercase)
|
|
19
|
-
└── result.json
|
|
20
|
-
|
|
21
|
-
Example:
|
|
22
|
-
>>> from ai_pipeline_core import simple_runner
|
|
23
|
-
>>>
|
|
24
|
-
>>> # Run single flow
|
|
25
|
-
>>> results = await simple_runner.run_pipeline(
|
|
26
|
-
... flow_func=MyFlow,
|
|
27
|
-
... config=MyConfig,
|
|
28
|
-
... project_name="test",
|
|
29
|
-
... output_dir=Path("./output"),
|
|
30
|
-
... flow_options=options
|
|
31
|
-
... )
|
|
32
|
-
|
|
33
|
-
Note:
|
|
34
|
-
Document directories are organized by document type names (lowercase)
|
|
35
|
-
for consistent structure and easy access.
|
|
36
|
-
"""
|
|
37
|
-
|
|
38
|
-
from pathlib import Path
|
|
39
|
-
from typing import Any, Callable, Sequence
|
|
40
|
-
|
|
41
|
-
from ai_pipeline_core.documents import DocumentList
|
|
42
|
-
from ai_pipeline_core.flow.options import FlowOptions
|
|
43
|
-
from ai_pipeline_core.logging import get_pipeline_logger
|
|
44
|
-
|
|
45
|
-
logger = get_pipeline_logger(__name__)
|
|
46
|
-
|
|
47
|
-
FlowSequence = Sequence[Callable[..., Any]]
|
|
48
|
-
"""Type alias for a sequence of flow functions."""
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
async def run_pipeline(
|
|
52
|
-
flow_func: Callable[..., Any],
|
|
53
|
-
project_name: str,
|
|
54
|
-
output_dir: Path,
|
|
55
|
-
flow_options: FlowOptions,
|
|
56
|
-
flow_name: str | None = None,
|
|
57
|
-
) -> DocumentList:
|
|
58
|
-
"""Execute a single pipeline flow with document I/O.
|
|
59
|
-
|
|
60
|
-
Runs a flow function with automatic document loading, validation,
|
|
61
|
-
and saving. The flow receives input documents from the filesystem
|
|
62
|
-
and saves its output for subsequent flows.
|
|
63
|
-
|
|
64
|
-
The execution proceeds through these steps:
|
|
65
|
-
1. Load input documents from output_dir subdirectories
|
|
66
|
-
2. Validate input documents against flow's config requirements
|
|
67
|
-
3. Execute flow function with documents and options
|
|
68
|
-
4. Validate output documents match config.OUTPUT_DOCUMENT_TYPE
|
|
69
|
-
5. Save output documents to output_dir subdirectories
|
|
70
|
-
|
|
71
|
-
Args:
|
|
72
|
-
flow_func: Async flow function decorated with @pipeline_flow.
|
|
73
|
-
Must accept (project_name, documents, flow_options).
|
|
74
|
-
The flow must have a config attribute set by @pipeline_flow.
|
|
75
|
-
|
|
76
|
-
project_name: Name of the project/pipeline for logging and tracking.
|
|
77
|
-
|
|
78
|
-
output_dir: Directory for loading input and saving output documents.
|
|
79
|
-
Document subdirectories are created as needed.
|
|
80
|
-
|
|
81
|
-
flow_options: Configuration options passed to the flow function.
|
|
82
|
-
Can be FlowOptions or any subclass.
|
|
83
|
-
|
|
84
|
-
flow_name: Optional display name for logging. If None, uses
|
|
85
|
-
flow_func.name or flow_func.__name__.
|
|
86
|
-
|
|
87
|
-
Returns:
|
|
88
|
-
DocumentList containing the flow's output documents.
|
|
89
|
-
|
|
90
|
-
Raises:
|
|
91
|
-
RuntimeError: If required input documents are missing or if
|
|
92
|
-
flow doesn't have a config attribute.
|
|
93
|
-
|
|
94
|
-
Example:
|
|
95
|
-
>>> from my_flows import AnalysisFlow
|
|
96
|
-
>>>
|
|
97
|
-
>>> results = await run_pipeline(
|
|
98
|
-
... flow_func=AnalysisFlow,
|
|
99
|
-
... project_name="analysis_001",
|
|
100
|
-
... output_dir=Path("./results"),
|
|
101
|
-
... flow_options=FlowOptions(temperature=0.7)
|
|
102
|
-
... )
|
|
103
|
-
>>> print(f"Generated {len(results)} documents")
|
|
104
|
-
|
|
105
|
-
Note:
|
|
106
|
-
- Flow must be async (decorated with @pipeline_flow with config)
|
|
107
|
-
- Input documents are loaded based on flow's config.INPUT_DOCUMENT_TYPES
|
|
108
|
-
- Output is validated against config.OUTPUT_DOCUMENT_TYPE
|
|
109
|
-
- All I/O is logged for debugging
|
|
110
|
-
"""
|
|
111
|
-
if flow_name is None:
|
|
112
|
-
# For Prefect Flow objects, use their name attribute
|
|
113
|
-
# For regular functions, fall back to __name__
|
|
114
|
-
flow_name = getattr(flow_func, "name", None) or getattr(flow_func, "__name__", "flow")
|
|
115
|
-
|
|
116
|
-
logger.info(f"Running Flow: {flow_name}")
|
|
117
|
-
|
|
118
|
-
# Get config from the flow function (attached by @pipeline_flow decorator)
|
|
119
|
-
config = getattr(flow_func, "config", None)
|
|
120
|
-
if config is None:
|
|
121
|
-
raise RuntimeError(
|
|
122
|
-
f"Flow {flow_name} does not have a config attribute. "
|
|
123
|
-
"Ensure it's decorated with @pipeline_flow(config=YourConfig)"
|
|
124
|
-
)
|
|
125
|
-
|
|
126
|
-
# Load input documents using FlowConfig's new async method
|
|
127
|
-
input_documents = await config.load_documents(str(output_dir))
|
|
128
|
-
|
|
129
|
-
if not config.has_input_documents(input_documents):
|
|
130
|
-
raise RuntimeError(f"Missing input documents for flow {flow_name}")
|
|
131
|
-
|
|
132
|
-
result_documents = await flow_func(project_name, input_documents, flow_options)
|
|
133
|
-
|
|
134
|
-
config.validate_output_documents(result_documents)
|
|
135
|
-
|
|
136
|
-
# Save output documents using FlowConfig's new async method
|
|
137
|
-
await config.save_documents(str(output_dir), result_documents)
|
|
138
|
-
|
|
139
|
-
logger.info(f"Completed Flow: {flow_name}")
|
|
140
|
-
|
|
141
|
-
return result_documents
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
async def run_pipelines(
|
|
145
|
-
project_name: str,
|
|
146
|
-
output_dir: Path,
|
|
147
|
-
flows: FlowSequence,
|
|
148
|
-
flow_options: FlowOptions,
|
|
149
|
-
start_step: int = 1,
|
|
150
|
-
end_step: int | None = None,
|
|
151
|
-
) -> None:
|
|
152
|
-
"""Execute multiple pipeline flows in sequence.
|
|
153
|
-
|
|
154
|
-
Runs a series of flows where each flow's output becomes the input
|
|
155
|
-
for the next flow. Supports partial execution with start/end steps
|
|
156
|
-
for debugging and resuming failed pipelines.
|
|
157
|
-
|
|
158
|
-
Execution proceeds by:
|
|
159
|
-
1. Validating step indices
|
|
160
|
-
2. For each flow in range [start_step, end_step]:
|
|
161
|
-
a. Loading input documents from output_dir
|
|
162
|
-
b. Executing flow with documents
|
|
163
|
-
c. Saving output documents to output_dir
|
|
164
|
-
d. Output becomes input for next flow
|
|
165
|
-
3. Logging progress and any failures
|
|
166
|
-
|
|
167
|
-
Steps are 1-based for user convenience. Step 1 is the first flow,
|
|
168
|
-
Step N is the Nth flow. Use start_step > 1 to skip initial flows
|
|
169
|
-
and end_step < N to stop early.
|
|
170
|
-
|
|
171
|
-
Args:
|
|
172
|
-
project_name: Name of the overall pipeline/project.
|
|
173
|
-
output_dir: Directory for document I/O between flows.
|
|
174
|
-
Shared by all flows in the sequence.
|
|
175
|
-
flows: Sequence of flow functions to execute in order.
|
|
176
|
-
Must all be async functions decorated with @pipeline_flow
|
|
177
|
-
with a config parameter.
|
|
178
|
-
flow_options: Options passed to all flows in the sequence.
|
|
179
|
-
Individual flows can use different fields.
|
|
180
|
-
start_step: First flow to execute (1-based index).
|
|
181
|
-
Default 1 starts from the beginning.
|
|
182
|
-
end_step: Last flow to execute (1-based index).
|
|
183
|
-
None runs through the last flow.
|
|
184
|
-
|
|
185
|
-
Raises:
|
|
186
|
-
ValueError: If start_step or end_step are out of range.
|
|
187
|
-
RuntimeError: If any flow doesn't have a config attribute.
|
|
188
|
-
|
|
189
|
-
Example:
|
|
190
|
-
>>> # Run full pipeline
|
|
191
|
-
>>> await run_pipelines(
|
|
192
|
-
... project_name="analysis",
|
|
193
|
-
... output_dir=Path("./work"),
|
|
194
|
-
... flows=[ExtractFlow, AnalyzeFlow, SummarizeFlow],
|
|
195
|
-
... flow_options=options
|
|
196
|
-
... )
|
|
197
|
-
>>>
|
|
198
|
-
>>> # Run only steps 2-3 (skip extraction)
|
|
199
|
-
>>> await run_pipelines(
|
|
200
|
-
... ...,
|
|
201
|
-
... start_step=2,
|
|
202
|
-
... end_step=3
|
|
203
|
-
... )
|
|
204
|
-
|
|
205
|
-
Note:
|
|
206
|
-
- Each flow must be decorated with @pipeline_flow(config=...)
|
|
207
|
-
- Each flow's output must match the next flow's input types
|
|
208
|
-
- Failed flows stop the entire pipeline
|
|
209
|
-
- Progress is logged with step numbers for debugging
|
|
210
|
-
- Documents persist in output_dir between runs
|
|
211
|
-
"""
|
|
212
|
-
num_steps = len(flows)
|
|
213
|
-
start_index = start_step - 1
|
|
214
|
-
end_index = (end_step if end_step is not None else num_steps) - 1
|
|
215
|
-
|
|
216
|
-
if (
|
|
217
|
-
not (0 <= start_index < num_steps)
|
|
218
|
-
or not (0 <= end_index < num_steps)
|
|
219
|
-
or start_index > end_index
|
|
220
|
-
):
|
|
221
|
-
raise ValueError("Invalid start/end steps.")
|
|
222
|
-
|
|
223
|
-
logger.info(f"Starting pipeline '{project_name}' (Steps {start_step} to {end_index + 1})")
|
|
224
|
-
|
|
225
|
-
for i in range(start_index, end_index + 1):
|
|
226
|
-
flow_func = flows[i]
|
|
227
|
-
# For Prefect Flow objects, use their name attribute; for functions, use __name__
|
|
228
|
-
flow_name = getattr(flow_func, "name", None) or getattr(
|
|
229
|
-
flow_func, "__name__", f"flow_{i + 1}"
|
|
230
|
-
)
|
|
231
|
-
|
|
232
|
-
logger.info(f"--- [Step {i + 1}/{num_steps}] Running Flow: {flow_name} ---")
|
|
233
|
-
|
|
234
|
-
try:
|
|
235
|
-
await run_pipeline(
|
|
236
|
-
flow_func=flow_func,
|
|
237
|
-
project_name=project_name,
|
|
238
|
-
output_dir=output_dir,
|
|
239
|
-
flow_options=flow_options,
|
|
240
|
-
flow_name=f"[Step {i + 1}/{num_steps}] {flow_name}",
|
|
241
|
-
)
|
|
242
|
-
|
|
243
|
-
except Exception as e:
|
|
244
|
-
logger.error(
|
|
245
|
-
f"--- [Step {i + 1}/{num_steps}] Flow {flow_name} Failed: {e} ---", exc_info=True
|
|
246
|
-
)
|
|
247
|
-
raise
|