ai-pipeline-core 0.2.9__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. ai_pipeline_core/__init__.py +14 -4
  2. ai_pipeline_core/deployment/__init__.py +46 -0
  3. ai_pipeline_core/deployment/base.py +681 -0
  4. ai_pipeline_core/deployment/contract.py +84 -0
  5. ai_pipeline_core/deployment/helpers.py +98 -0
  6. ai_pipeline_core/documents/flow_document.py +1 -1
  7. ai_pipeline_core/documents/task_document.py +1 -1
  8. ai_pipeline_core/documents/temporary_document.py +1 -1
  9. ai_pipeline_core/flow/config.py +13 -2
  10. ai_pipeline_core/flow/options.py +1 -1
  11. ai_pipeline_core/llm/client.py +1 -3
  12. ai_pipeline_core/llm/model_types.py +0 -1
  13. ai_pipeline_core/pipeline.py +1 -1
  14. ai_pipeline_core/progress.py +127 -0
  15. ai_pipeline_core/prompt_builder/__init__.py +5 -0
  16. ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +23 -0
  17. ai_pipeline_core/prompt_builder/global_cache.py +78 -0
  18. ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +6 -0
  19. ai_pipeline_core/prompt_builder/prompt_builder.py +253 -0
  20. ai_pipeline_core/prompt_builder/system_prompt.jinja2 +41 -0
  21. ai_pipeline_core/tracing.py +1 -1
  22. ai_pipeline_core/utils/remote_deployment.py +37 -187
  23. {ai_pipeline_core-0.2.9.dist-info → ai_pipeline_core-0.3.0.dist-info}/METADATA +23 -20
  24. ai_pipeline_core-0.3.0.dist-info/RECORD +49 -0
  25. {ai_pipeline_core-0.2.9.dist-info → ai_pipeline_core-0.3.0.dist-info}/WHEEL +1 -1
  26. ai_pipeline_core/simple_runner/__init__.py +0 -14
  27. ai_pipeline_core/simple_runner/cli.py +0 -254
  28. ai_pipeline_core/simple_runner/simple_runner.py +0 -247
  29. ai_pipeline_core-0.2.9.dist-info/RECORD +0 -41
  30. {ai_pipeline_core-0.2.9.dist-info → ai_pipeline_core-0.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-pipeline-core
3
- Version: 0.2.9
3
+ Version: 0.3.0
4
4
  Summary: Core utilities for AI-powered processing pipelines using prefect
5
5
  Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
6
6
  Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
@@ -63,7 +63,7 @@ AI Pipeline Core is a production-ready framework that combines document processi
63
63
  - **Structured Output**: Type-safe generation with Pydantic model validation
64
64
  - **Workflow Orchestration**: Prefect-based flows and tasks with automatic retries
65
65
  - **Observability**: Built-in distributed tracing via Laminar (LMNR) with cost tracking for debugging and monitoring
66
- - **Local Development**: Simple runner for testing pipelines without infrastructure
66
+ - **Deployment**: Unified pipeline execution for local, CLI, and production environments
67
67
 
68
68
  ## Installation
69
69
 
@@ -177,7 +177,7 @@ doc = MyDocument.create(
177
177
  # Parse back to original type
178
178
  data = doc.parse(dict) # Returns {"key": "value"}
179
179
 
180
- # Document provenance tracking (new in v0.1.14)
180
+ # Document provenance tracking
181
181
  doc_with_sources = MyDocument.create(
182
182
  name="derived.json",
183
183
  content={"result": "processed"},
@@ -224,15 +224,15 @@ if doc.is_text:
224
224
  # Parse structured data
225
225
  data = doc.as_json() # or as_yaml(), as_pydantic_model()
226
226
 
227
- # Convert between document types (new in v0.2.1)
227
+ # Convert between document types
228
228
  task_doc = flow_doc.model_convert(TaskDocument) # Convert FlowDocument to TaskDocument
229
229
  new_doc = doc.model_convert(OtherDocType, content={"new": "data"}) # With content update
230
230
 
231
- # Enhanced filtering (new in v0.1.14)
231
+ # Enhanced filtering
232
232
  filtered = documents.filter_by([Doc1, Doc2, Doc3]) # Multiple types
233
233
  named = documents.filter_by(["file1.txt", "file2.txt"]) # Multiple names
234
234
 
235
- # Immutable collections (new in v0.2.1)
235
+ # Immutable collections
236
236
  frozen_docs = DocumentList(docs, frozen=True) # Immutable document list
237
237
  frozen_msgs = AIMessages(messages, frozen=True) # Immutable message list
238
238
  ```
@@ -268,7 +268,7 @@ r2 = await llm.generate(
268
268
  messages="Key points?" # Different query
269
269
  )
270
270
 
271
- # Custom cache TTL (new in v0.1.14)
271
+ # Custom cache TTL
272
272
  response = await llm.generate(
273
273
  model="gpt-5",
274
274
  context=static_context,
@@ -317,12 +317,12 @@ from ai_pipeline_core import pipeline_flow, pipeline_task, set_trace_cost
317
317
  @pipeline_task # Automatic retry, tracing, and monitoring
318
318
  async def process_chunk(data: str) -> str:
319
319
  result = await transform(data)
320
- set_trace_cost(0.05) # Track costs (new in v0.1.14)
320
+ set_trace_cost(0.05) # Track costs
321
321
  return result
322
322
 
323
323
  @pipeline_flow(
324
324
  config=MyFlowConfig,
325
- trace_trim_documents=True # Trim large documents in traces (new in v0.2.1)
325
+ trace_trim_documents=True # Trim large documents in traces
326
326
  )
327
327
  async def main_flow(
328
328
  project_name: str,
@@ -458,18 +458,21 @@ For AI assistants:
458
458
  ```
459
459
  ai-pipeline-core/
460
460
  ├── ai_pipeline_core/
461
- │ ├── documents/ # Document abstraction system
462
- │ ├── flow/ # Flow configuration and options
463
- │ ├── llm/ # LLM client and response handling
464
- │ ├── logging/ # Logging infrastructure
465
- │ ├── tracing.py # Distributed tracing
466
- │ ├── pipeline.py # Pipeline decorators
461
+ │ ├── deployment/ # Pipeline deployment and execution
462
+ │ ├── documents/ # Document abstraction system
463
+ │ ├── flow/ # Flow configuration and options
464
+ │ ├── llm/ # LLM client and response handling
465
+ │ ├── logging/ # Logging infrastructure
466
+ │ ├── prompt_builder/ # Document-aware prompt construction
467
+ │ ├── pipeline.py # Pipeline decorators
468
+ │ ├── progress.py # Intra-flow progress tracking
467
469
  │ ├── prompt_manager.py # Jinja2 template management
468
- └── settings.py # Configuration management
469
- ├── tests/ # Comprehensive test suite
470
- ├── examples/ # Usage examples
471
- ├── API.md # Complete API reference
472
- └── pyproject.toml # Project configuration
470
+ ├── settings.py # Configuration management
471
+ │ └── tracing.py # Distributed tracing
472
+ ├── tests/ # Comprehensive test suite
473
+ ├── examples/ # Usage examples
474
+ ├── API.md # Complete API reference
475
+ └── pyproject.toml # Project configuration
473
476
  ```
474
477
 
475
478
  ## Contributing
@@ -0,0 +1,49 @@
1
+ ai_pipeline_core/__init__.py,sha256=q8sas8GxIyZf4h0RPqzv06ppo8hy0gl8-GjDEVh71XQ,6087
2
+ ai_pipeline_core/exceptions.py,sha256=vx-XLTw2fJSPs-vwtXVYtqoQUcOc0JeI7UmHqRqQYWU,1569
3
+ ai_pipeline_core/pipeline.py,sha256=t9qH-V6umpKY5MhGuXFgUGfdzGyxzVlS0n9RoKLfnug,28704
4
+ ai_pipeline_core/prefect.py,sha256=91ZgLJHsDsRUW77CpNmkKxYs3RCJuucPM3pjKmNBeDg,2199
5
+ ai_pipeline_core/progress.py,sha256=Ppxk4OOm84Y0x3t-Y3CmHsL4PovQLNUxXMu24zRCD-Q,3621
6
+ ai_pipeline_core/prompt_manager.py,sha256=FAtb1yK7bGuAeuIJ523LOX9bd7TrcHG-TqZ7Lz4RJC0,12087
7
+ ai_pipeline_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ ai_pipeline_core/settings.py,sha256=IMrFaX0i-WIlaOA5O53ipNSta6KQVSFHc1aJXmS3nSo,5078
9
+ ai_pipeline_core/tracing.py,sha256=YksAxjSJ7PgmrEQ5ZxfpEACZfD9G6KuV7b0LoGM-ogo,31538
10
+ ai_pipeline_core/deployment/__init__.py,sha256=FN2HVoM80x2GJuNs7o4DnccB8HWWibgM1pJesB942CM,1259
11
+ ai_pipeline_core/deployment/base.py,sha256=JYf8XLFR73c0H24dr6atK7yUcoE0vLxbYZ8EkQpEwN4,24791
12
+ ai_pipeline_core/deployment/contract.py,sha256=0DKt5eqNE-grcITwMNq9CuBdo5WxdopEjDeQFzFZxhU,2225
13
+ ai_pipeline_core/deployment/helpers.py,sha256=3nRuCyABkUEDZiL0q9u19XHpjA4527B6rsxQNOGTohw,3460
14
+ ai_pipeline_core/documents/__init__.py,sha256=WHStvGZiSyybOcMTYxSV24U6MA3Am_0_Az5p-DuMFrk,738
15
+ ai_pipeline_core/documents/document.py,sha256=hdTh36KGEcrDollTnQmTI66DJIqYfe4X42Y0q7Cm4fY,68153
16
+ ai_pipeline_core/documents/document_list.py,sha256=Y_NCjfM_CjkIwHRD2iyGgYBuIykN8lT2IIH_uWOiGis,16254
17
+ ai_pipeline_core/documents/flow_document.py,sha256=QK6RxNQu449IRAosOHSk3G_5yIq5I7yLBOSQPCd3m64,4141
18
+ ai_pipeline_core/documents/mime_type.py,sha256=JFEOq4HwlIW2snobyNfWwySdT7urZSWkobiRMVs2fSE,7959
19
+ ai_pipeline_core/documents/task_document.py,sha256=uASmAaxNkYtuqQrBM57vutFT9DXNTbqv0wbwwF55E3I,4300
20
+ ai_pipeline_core/documents/temporary_document.py,sha256=jaz2ZHC5CmSbVbkXdI7pOB5DGEuhH16C0Yutv-lS_UI,2708
21
+ ai_pipeline_core/documents/utils.py,sha256=ZyJNjFN7ihWno0K7dJZed7twYmmPLA0z40UzFw1A3A8,5465
22
+ ai_pipeline_core/flow/__init__.py,sha256=2BfWYMOPYW5teGzwo-qzpn_bom1lxxry0bPsjVgcsCk,188
23
+ ai_pipeline_core/flow/config.py,sha256=a9FALpgrFsdz-D7HU3diVeUzbaBvLwI8hsPviuj001s,19389
24
+ ai_pipeline_core/flow/options.py,sha256=mhToZ9u18WCMBEYJL1MYKzh8fH9lSsAUqQtU8tNnD18,2304
25
+ ai_pipeline_core/llm/__init__.py,sha256=3B_vtEzxrzidP1qOUNQ4RxlUmxZ2MBKQcUhQiTybM9g,661
26
+ ai_pipeline_core/llm/ai_messages.py,sha256=Onin3UPdbJQNl3WfY3-_jE5KRmF-ciXsa5K6UPOiy5s,14410
27
+ ai_pipeline_core/llm/client.py,sha256=4nCoJOdTtye1novQiUW3AFPjZBF_TfsD7J09sl9kbd4,24973
28
+ ai_pipeline_core/llm/model_options.py,sha256=uRNIHfVeh2sgt1mZBiOUx6hPQ6GKjB8b7TytZJ6afKg,11768
29
+ ai_pipeline_core/llm/model_response.py,sha256=-fKJcblDP_Z6NV9CGp4bm_hitb0Z0jyy0ZndCQUpRkQ,13493
30
+ ai_pipeline_core/llm/model_types.py,sha256=MukKpS7vWeWAfHhKDxRlQFm5jeBloT_o6amO4qUzjWo,2761
31
+ ai_pipeline_core/logging/__init__.py,sha256=Nz6-ghAoENsgNmLD2ma9TW9M0U2_QfxuQ5DDW6Vt6M0,651
32
+ ai_pipeline_core/logging/logging.yml,sha256=YTW48keO_K5bkkb-KXGM7ZuaYKiquLsjsURei8Ql0V4,1353
33
+ ai_pipeline_core/logging/logging_config.py,sha256=pV2x6GgMPXrzPH27sicCSXfw56beio4C2JKCJ3NsXrg,6207
34
+ ai_pipeline_core/logging/logging_mixin.py,sha256=OTye2pbUbG5oYZkI06TNkGCEa4y0ldePz5IAfdmNUPU,8090
35
+ ai_pipeline_core/prompt_builder/__init__.py,sha256=-v0SKZlir07xRzxXwv75VP66aINRUiKH0VUgB-PCDmI,195
36
+ ai_pipeline_core/prompt_builder/documents_prompt.jinja2,sha256=LPql5AaFhFWtDfhnBWvi-bWbz5vdgsWqKGzcqxWfLIM,1075
37
+ ai_pipeline_core/prompt_builder/global_cache.py,sha256=9_9zoF6-sr3KBMxF5QLD3vxqXg9B2tT8o9ViplzUCNg,2811
38
+ ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2,sha256=M8uPpwf-uLpsWWJT9DY_DnjrLToGPVnrD-gVhQrQdaQ,229
39
+ ai_pipeline_core/prompt_builder/prompt_builder.py,sha256=OAu3b8stzmFoAvPD7BDwnk8TkAxG8JDe3kAN7EhGTK0,9365
40
+ ai_pipeline_core/prompt_builder/system_prompt.jinja2,sha256=-1jLcfvAG07Zfl-dnYrjfVcAG4PWeeoeWpaKJGY3rKQ,3945
41
+ ai_pipeline_core/storage/__init__.py,sha256=tcIkjJ3zPBLCyetwiJDewBvS2sbRJrDlBh3gEsQm08E,184
42
+ ai_pipeline_core/storage/storage.py,sha256=ClMr419Y-eU2RuOjZYd51dC0stWQk28Vb56PvQaoUwc,20007
43
+ ai_pipeline_core/utils/__init__.py,sha256=TJSmEm1Quf-gKwXrxM96u2IGzVolUyeNNfLMPoLstXI,254
44
+ ai_pipeline_core/utils/deploy.py,sha256=rAtRuwkmGkc-fqvDMXpt08OzLrD7KTDMAmLDC9wYg7Y,13147
45
+ ai_pipeline_core/utils/remote_deployment.py,sha256=U7MNJ1SU1mg3RrJyLqpuN_4pwqm8LSsFZbypJvjGPoo,4630
46
+ ai_pipeline_core-0.3.0.dist-info/METADATA,sha256=qDOFXeCZIsQj85TBq59eadO_yNQQbHraP9ku3CE-xR0,15264
47
+ ai_pipeline_core-0.3.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
48
+ ai_pipeline_core-0.3.0.dist-info/licenses/LICENSE,sha256=kKj8mfbdWwkyG3U6n7ztB3bAZlEwShTkAsvaY657i3I,1074
49
+ ai_pipeline_core-0.3.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.27.0
2
+ Generator: hatchling 1.28.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,14 +0,0 @@
1
- """Simple pipeline execution for local development.
2
-
3
- Utilities for running AI pipelines locally without full Prefect orchestration.
4
- """
5
-
6
- from .cli import run_cli
7
- from .simple_runner import FlowSequence, run_pipeline, run_pipelines
8
-
9
- __all__ = [
10
- "run_cli",
11
- "run_pipeline",
12
- "run_pipelines",
13
- "FlowSequence",
14
- ]
@@ -1,254 +0,0 @@
1
- """Command-line interface for simple pipeline execution."""
2
-
3
- import asyncio
4
- import os
5
- import sys
6
- from contextlib import ExitStack
7
- from pathlib import Path
8
- from typing import Callable, Type, TypeVar, cast
9
-
10
- from lmnr import Laminar
11
- from pydantic import ValidationError
12
- from pydantic_settings import CliPositionalArg, SettingsConfigDict
13
-
14
- from ai_pipeline_core.documents import DocumentList
15
- from ai_pipeline_core.flow.options import FlowOptions
16
- from ai_pipeline_core.logging import get_pipeline_logger, setup_logging
17
- from ai_pipeline_core.prefect import disable_run_logger, prefect_test_harness
18
- from ai_pipeline_core.settings import settings
19
-
20
- from .simple_runner import FlowSequence, run_pipelines
21
-
22
- logger = get_pipeline_logger(__name__)
23
-
24
- TOptions = TypeVar("TOptions", bound=FlowOptions)
25
- """Type variable for FlowOptions subclasses used in CLI."""
26
-
27
- InitializerFunc = Callable[[FlowOptions], tuple[str, DocumentList]] | None
28
- """Function type for custom pipeline initialization.
29
-
30
- Initializers can create initial documents or setup project state
31
- before flow execution begins.
32
-
33
- Args:
34
- FlowOptions: Parsed CLI options
35
-
36
- Returns:
37
- Tuple of (project_name, initial_documents) or None
38
- """
39
-
40
-
41
- def _initialize_environment() -> None:
42
- """Initialize logging and observability systems.
43
-
44
- Sets up the pipeline logging configuration and attempts to
45
- initialize LMNR (Laminar) for distributed tracing. Failures
46
- in LMNR initialization are logged but don't stop execution.
47
-
48
- Side effects:
49
- - Configures Python logging system
50
- - Initializes Laminar SDK if API key is available
51
- - Logs initialization status
52
-
53
- Note:
54
- Called automatically by run_cli before parsing arguments.
55
- """
56
- setup_logging()
57
- try:
58
- Laminar.initialize()
59
- logger.info("LMNR tracing initialized.")
60
- except Exception as e:
61
- logger.warning(f"Failed to initialize LMNR tracing: {e}")
62
-
63
-
64
- def _running_under_pytest() -> bool:
65
- """Check if code is running under pytest.
66
-
67
- Detects pytest execution context to determine whether test
68
- fixtures will provide necessary contexts (like Prefect test
69
- harness). This prevents duplicate context setup.
70
-
71
- Returns:
72
- True if running under pytest, False otherwise.
73
-
74
- Detection methods:
75
- - PYTEST_CURRENT_TEST environment variable (set by pytest)
76
- - 'pytest' module in sys.modules (imported by test runner)
77
-
78
- Note:
79
- Used to avoid setting up test harness when pytest fixtures
80
- already provide it.
81
- """
82
- return "PYTEST_CURRENT_TEST" in os.environ or "pytest" in sys.modules
83
-
84
-
85
- def run_cli(
86
- *,
87
- flows: FlowSequence,
88
- options_cls: Type[TOptions],
89
- initializer: InitializerFunc = None,
90
- trace_name: str | None = None,
91
- ) -> None:
92
- """Execute pipeline flows from command-line arguments.
93
-
94
- Environment setup:
95
- - Initializes logging system
96
- - Sets up LMNR tracing (if API key configured)
97
- - Creates Prefect test harness (if no API key and not in pytest)
98
- - Manages context stack for proper cleanup
99
-
100
- Raises:
101
- ValueError: If project name is empty after initialization.
102
-
103
- Example:
104
- >>> # In __main__.py
105
- >>> from ai_pipeline_core import simple_runner
106
- >>> from .flows import AnalysisFlow, SummaryFlow
107
- >>> from .config import AnalysisOptions
108
- >>>
109
- >>> if __name__ == "__main__":
110
- ... simple_runner.run_cli(
111
- ... flows=[AnalysisFlow, SummaryFlow],
112
- ... options_cls=AnalysisOptions,
113
- ... trace_name="document-analysis"
114
- ... )
115
-
116
- Command line:
117
- $ python -m my_module ./output --temperature 0.5 --model gpt-5
118
- $ python -m my_module ./output --start 2 # Skip first flow
119
-
120
- Note:
121
- - Field names are converted to kebab-case for CLI (max_tokens → --max-tokens)
122
- - Boolean fields become flags (--verbose/--no-verbose)
123
- - Field descriptions from Pydantic become help text
124
- - Type hints are enforced during parsing
125
- - Validation errors show helpful messages with field names
126
- - Includes hints for common error types (numbers, ranges)
127
- - Exits with status 1 on error
128
- - Shows --help when no arguments provided
129
- """
130
- # Check if no arguments provided before initialization
131
- if len(sys.argv) == 1:
132
- # Add --help to show usage when run without arguments
133
- sys.argv.append("--help")
134
-
135
- _initialize_environment()
136
-
137
- class _RunnerOptions( # type: ignore[reportRedeclaration]
138
- options_cls,
139
- cli_parse_args=True,
140
- cli_kebab_case=True,
141
- cli_exit_on_error=True, # Let it exit normally on error
142
- cli_prog_name="ai-pipeline",
143
- cli_use_class_docs_for_groups=True,
144
- ):
145
- """Internal options class combining user options with CLI arguments.
146
-
147
- Dynamically created class that inherits from user's options_cls
148
- and adds standard CLI arguments for pipeline execution.
149
- """
150
-
151
- working_directory: CliPositionalArg[Path]
152
- project_name: str | None = None
153
- start: int = 1
154
- end: int | None = None
155
-
156
- model_config = SettingsConfigDict(frozen=True, extra="ignore")
157
-
158
- try:
159
- opts = cast(FlowOptions, _RunnerOptions()) # type: ignore[reportCallIssue]
160
- except ValidationError as e:
161
- print("\nError: Invalid command line arguments\n", file=sys.stderr)
162
- for error in e.errors():
163
- field = " -> ".join(str(loc) for loc in error["loc"])
164
- msg = error["msg"]
165
- value = error.get("input", "")
166
-
167
- # Format the field name nicely (convert from snake_case to kebab-case for CLI)
168
- cli_field = field.replace("_", "-")
169
-
170
- print(f" --{cli_field}: {msg}", file=sys.stderr)
171
- if value:
172
- print(f" Provided value: '{value}'", file=sys.stderr)
173
-
174
- # Add helpful hints for common errors
175
- if error["type"] == "float_parsing":
176
- print(" Hint: Please provide a valid number (e.g., 0.7)", file=sys.stderr)
177
- elif error["type"] == "int_parsing":
178
- print(" Hint: Please provide a valid integer (e.g., 10)", file=sys.stderr)
179
- elif error["type"] == "literal_error":
180
- ctx = error.get("ctx", {})
181
- expected = ctx.get("expected", "valid options")
182
- print(f" Hint: Valid options are: {expected}", file=sys.stderr)
183
- elif error["type"] in [
184
- "less_than_equal",
185
- "greater_than_equal",
186
- "less_than",
187
- "greater_than",
188
- ]:
189
- ctx = error.get("ctx", {})
190
- if "le" in ctx:
191
- print(f" Hint: Value must be ≤ {ctx['le']}", file=sys.stderr)
192
- elif "ge" in ctx:
193
- print(f" Hint: Value must be ≥ {ctx['ge']}", file=sys.stderr)
194
- elif "lt" in ctx:
195
- print(f" Hint: Value must be < {ctx['lt']}", file=sys.stderr)
196
- elif "gt" in ctx:
197
- print(f" Hint: Value must be > {ctx['gt']}", file=sys.stderr)
198
-
199
- print("\nRun with --help to see all available options\n", file=sys.stderr)
200
- sys.exit(1)
201
-
202
- wd: Path = cast(Path, getattr(opts, "working_directory"))
203
- wd.mkdir(parents=True, exist_ok=True)
204
-
205
- # Get project name from options or use directory basename
206
- project_name = getattr(opts, "project_name", None)
207
- if not project_name: # None or empty string
208
- project_name = wd.name
209
-
210
- # Ensure project_name is not empty
211
- if not project_name:
212
- raise ValueError("Project name cannot be empty")
213
-
214
- # Use initializer if provided, otherwise use defaults
215
- initial_documents = DocumentList([])
216
- if initializer:
217
- init_result = initializer(opts)
218
- # Always expect tuple format from initializer
219
- _, initial_documents = init_result # Ignore project name from initializer
220
-
221
- # Save initial documents if starting from first step
222
- if getattr(opts, "start", 1) == 1 and initial_documents and flows:
223
- # Get config from the first flow
224
- first_flow_config = getattr(flows[0], "config", None)
225
- if first_flow_config:
226
- asyncio.run(
227
- first_flow_config.save_documents(
228
- str(wd), initial_documents, validate_output_type=False
229
- )
230
- )
231
-
232
- # Setup context stack with optional test harness and tracing
233
- with ExitStack() as stack:
234
- if trace_name:
235
- stack.enter_context(
236
- Laminar.start_as_current_span(
237
- name=f"{trace_name}-{project_name}", input=[opts.model_dump_json()]
238
- )
239
- )
240
-
241
- if not settings.prefect_api_key and not _running_under_pytest():
242
- stack.enter_context(prefect_test_harness())
243
- stack.enter_context(disable_run_logger())
244
-
245
- asyncio.run(
246
- run_pipelines(
247
- project_name=project_name,
248
- output_dir=wd,
249
- flows=flows,
250
- flow_options=opts,
251
- start_step=getattr(opts, "start", 1),
252
- end_step=getattr(opts, "end", None),
253
- )
254
- )
@@ -1,247 +0,0 @@
1
- """Simple pipeline runner for local flow execution.
2
-
3
- This module provides the core functionality for running AI pipeline flows
4
- locally without full Prefect orchestration. It handles document I/O,
5
- flow sequencing, and error management.
6
-
7
- Key components:
8
- - Document I/O from/to filesystem directories via FlowConfig
9
- - Single and multi-flow execution
10
- - Automatic document validation and passing between flows
11
- - Step-based execution control (start/end steps)
12
-
13
- Directory structure:
14
- working_dir/
15
- ├── inputdocument/ # Documents of type InputDocument (lowercase)
16
- │ ├── file1.txt
17
- │ └── file1.txt.description.md # Optional description
18
- └── outputdocument/ # Documents of type OutputDocument (lowercase)
19
- └── result.json
20
-
21
- Example:
22
- >>> from ai_pipeline_core import simple_runner
23
- >>>
24
- >>> # Run single flow
25
- >>> results = await simple_runner.run_pipeline(
26
- ... flow_func=MyFlow,
27
- ... config=MyConfig,
28
- ... project_name="test",
29
- ... output_dir=Path("./output"),
30
- ... flow_options=options
31
- ... )
32
-
33
- Note:
34
- Document directories are organized by document type names (lowercase)
35
- for consistent structure and easy access.
36
- """
37
-
38
- from pathlib import Path
39
- from typing import Any, Callable, Sequence
40
-
41
- from ai_pipeline_core.documents import DocumentList
42
- from ai_pipeline_core.flow.options import FlowOptions
43
- from ai_pipeline_core.logging import get_pipeline_logger
44
-
45
- logger = get_pipeline_logger(__name__)
46
-
47
- FlowSequence = Sequence[Callable[..., Any]]
48
- """Type alias for a sequence of flow functions."""
49
-
50
-
51
- async def run_pipeline(
52
- flow_func: Callable[..., Any],
53
- project_name: str,
54
- output_dir: Path,
55
- flow_options: FlowOptions,
56
- flow_name: str | None = None,
57
- ) -> DocumentList:
58
- """Execute a single pipeline flow with document I/O.
59
-
60
- Runs a flow function with automatic document loading, validation,
61
- and saving. The flow receives input documents from the filesystem
62
- and saves its output for subsequent flows.
63
-
64
- The execution proceeds through these steps:
65
- 1. Load input documents from output_dir subdirectories
66
- 2. Validate input documents against flow's config requirements
67
- 3. Execute flow function with documents and options
68
- 4. Validate output documents match config.OUTPUT_DOCUMENT_TYPE
69
- 5. Save output documents to output_dir subdirectories
70
-
71
- Args:
72
- flow_func: Async flow function decorated with @pipeline_flow.
73
- Must accept (project_name, documents, flow_options).
74
- The flow must have a config attribute set by @pipeline_flow.
75
-
76
- project_name: Name of the project/pipeline for logging and tracking.
77
-
78
- output_dir: Directory for loading input and saving output documents.
79
- Document subdirectories are created as needed.
80
-
81
- flow_options: Configuration options passed to the flow function.
82
- Can be FlowOptions or any subclass.
83
-
84
- flow_name: Optional display name for logging. If None, uses
85
- flow_func.name or flow_func.__name__.
86
-
87
- Returns:
88
- DocumentList containing the flow's output documents.
89
-
90
- Raises:
91
- RuntimeError: If required input documents are missing or if
92
- flow doesn't have a config attribute.
93
-
94
- Example:
95
- >>> from my_flows import AnalysisFlow
96
- >>>
97
- >>> results = await run_pipeline(
98
- ... flow_func=AnalysisFlow,
99
- ... project_name="analysis_001",
100
- ... output_dir=Path("./results"),
101
- ... flow_options=FlowOptions(temperature=0.7)
102
- ... )
103
- >>> print(f"Generated {len(results)} documents")
104
-
105
- Note:
106
- - Flow must be async (decorated with @pipeline_flow with config)
107
- - Input documents are loaded based on flow's config.INPUT_DOCUMENT_TYPES
108
- - Output is validated against config.OUTPUT_DOCUMENT_TYPE
109
- - All I/O is logged for debugging
110
- """
111
- if flow_name is None:
112
- # For Prefect Flow objects, use their name attribute
113
- # For regular functions, fall back to __name__
114
- flow_name = getattr(flow_func, "name", None) or getattr(flow_func, "__name__", "flow")
115
-
116
- logger.info(f"Running Flow: {flow_name}")
117
-
118
- # Get config from the flow function (attached by @pipeline_flow decorator)
119
- config = getattr(flow_func, "config", None)
120
- if config is None:
121
- raise RuntimeError(
122
- f"Flow {flow_name} does not have a config attribute. "
123
- "Ensure it's decorated with @pipeline_flow(config=YourConfig)"
124
- )
125
-
126
- # Load input documents using FlowConfig's new async method
127
- input_documents = await config.load_documents(str(output_dir))
128
-
129
- if not config.has_input_documents(input_documents):
130
- raise RuntimeError(f"Missing input documents for flow {flow_name}")
131
-
132
- result_documents = await flow_func(project_name, input_documents, flow_options)
133
-
134
- config.validate_output_documents(result_documents)
135
-
136
- # Save output documents using FlowConfig's new async method
137
- await config.save_documents(str(output_dir), result_documents)
138
-
139
- logger.info(f"Completed Flow: {flow_name}")
140
-
141
- return result_documents
142
-
143
-
144
- async def run_pipelines(
145
- project_name: str,
146
- output_dir: Path,
147
- flows: FlowSequence,
148
- flow_options: FlowOptions,
149
- start_step: int = 1,
150
- end_step: int | None = None,
151
- ) -> None:
152
- """Execute multiple pipeline flows in sequence.
153
-
154
- Runs a series of flows where each flow's output becomes the input
155
- for the next flow. Supports partial execution with start/end steps
156
- for debugging and resuming failed pipelines.
157
-
158
- Execution proceeds by:
159
- 1. Validating step indices
160
- 2. For each flow in range [start_step, end_step]:
161
- a. Loading input documents from output_dir
162
- b. Executing flow with documents
163
- c. Saving output documents to output_dir
164
- d. Output becomes input for next flow
165
- 3. Logging progress and any failures
166
-
167
- Steps are 1-based for user convenience. Step 1 is the first flow,
168
- Step N is the Nth flow. Use start_step > 1 to skip initial flows
169
- and end_step < N to stop early.
170
-
171
- Args:
172
- project_name: Name of the overall pipeline/project.
173
- output_dir: Directory for document I/O between flows.
174
- Shared by all flows in the sequence.
175
- flows: Sequence of flow functions to execute in order.
176
- Must all be async functions decorated with @pipeline_flow
177
- with a config parameter.
178
- flow_options: Options passed to all flows in the sequence.
179
- Individual flows can use different fields.
180
- start_step: First flow to execute (1-based index).
181
- Default 1 starts from the beginning.
182
- end_step: Last flow to execute (1-based index).
183
- None runs through the last flow.
184
-
185
- Raises:
186
- ValueError: If start_step or end_step are out of range.
187
- RuntimeError: If any flow doesn't have a config attribute.
188
-
189
- Example:
190
- >>> # Run full pipeline
191
- >>> await run_pipelines(
192
- ... project_name="analysis",
193
- ... output_dir=Path("./work"),
194
- ... flows=[ExtractFlow, AnalyzeFlow, SummarizeFlow],
195
- ... flow_options=options
196
- ... )
197
- >>>
198
- >>> # Run only steps 2-3 (skip extraction)
199
- >>> await run_pipelines(
200
- ... ...,
201
- ... start_step=2,
202
- ... end_step=3
203
- ... )
204
-
205
- Note:
206
- - Each flow must be decorated with @pipeline_flow(config=...)
207
- - Each flow's output must match the next flow's input types
208
- - Failed flows stop the entire pipeline
209
- - Progress is logged with step numbers for debugging
210
- - Documents persist in output_dir between runs
211
- """
212
- num_steps = len(flows)
213
- start_index = start_step - 1
214
- end_index = (end_step if end_step is not None else num_steps) - 1
215
-
216
- if (
217
- not (0 <= start_index < num_steps)
218
- or not (0 <= end_index < num_steps)
219
- or start_index > end_index
220
- ):
221
- raise ValueError("Invalid start/end steps.")
222
-
223
- logger.info(f"Starting pipeline '{project_name}' (Steps {start_step} to {end_index + 1})")
224
-
225
- for i in range(start_index, end_index + 1):
226
- flow_func = flows[i]
227
- # For Prefect Flow objects, use their name attribute; for functions, use __name__
228
- flow_name = getattr(flow_func, "name", None) or getattr(
229
- flow_func, "__name__", f"flow_{i + 1}"
230
- )
231
-
232
- logger.info(f"--- [Step {i + 1}/{num_steps}] Running Flow: {flow_name} ---")
233
-
234
- try:
235
- await run_pipeline(
236
- flow_func=flow_func,
237
- project_name=project_name,
238
- output_dir=output_dir,
239
- flow_options=flow_options,
240
- flow_name=f"[Step {i + 1}/{num_steps}] {flow_name}",
241
- )
242
-
243
- except Exception as e:
244
- logger.error(
245
- f"--- [Step {i + 1}/{num_steps}] Flow {flow_name} Failed: {e} ---", exc_info=True
246
- )
247
- raise