ai-pipeline-core 0.2.9__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. ai_pipeline_core/__init__.py +32 -5
  2. ai_pipeline_core/debug/__init__.py +26 -0
  3. ai_pipeline_core/debug/config.py +91 -0
  4. ai_pipeline_core/debug/content.py +705 -0
  5. ai_pipeline_core/debug/processor.py +99 -0
  6. ai_pipeline_core/debug/summary.py +236 -0
  7. ai_pipeline_core/debug/writer.py +913 -0
  8. ai_pipeline_core/deployment/__init__.py +46 -0
  9. ai_pipeline_core/deployment/base.py +681 -0
  10. ai_pipeline_core/deployment/contract.py +84 -0
  11. ai_pipeline_core/deployment/helpers.py +98 -0
  12. ai_pipeline_core/documents/flow_document.py +1 -1
  13. ai_pipeline_core/documents/task_document.py +1 -1
  14. ai_pipeline_core/documents/temporary_document.py +1 -1
  15. ai_pipeline_core/flow/config.py +13 -2
  16. ai_pipeline_core/flow/options.py +4 -4
  17. ai_pipeline_core/images/__init__.py +362 -0
  18. ai_pipeline_core/images/_processing.py +157 -0
  19. ai_pipeline_core/llm/ai_messages.py +25 -4
  20. ai_pipeline_core/llm/client.py +15 -19
  21. ai_pipeline_core/llm/model_response.py +5 -5
  22. ai_pipeline_core/llm/model_types.py +10 -13
  23. ai_pipeline_core/logging/logging_mixin.py +2 -2
  24. ai_pipeline_core/pipeline.py +1 -1
  25. ai_pipeline_core/progress.py +127 -0
  26. ai_pipeline_core/prompt_builder/__init__.py +5 -0
  27. ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +23 -0
  28. ai_pipeline_core/prompt_builder/global_cache.py +78 -0
  29. ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +6 -0
  30. ai_pipeline_core/prompt_builder/prompt_builder.py +253 -0
  31. ai_pipeline_core/prompt_builder/system_prompt.jinja2 +41 -0
  32. ai_pipeline_core/tracing.py +54 -2
  33. ai_pipeline_core/utils/deploy.py +214 -6
  34. ai_pipeline_core/utils/remote_deployment.py +37 -187
  35. {ai_pipeline_core-0.2.9.dist-info → ai_pipeline_core-0.3.3.dist-info}/METADATA +96 -27
  36. ai_pipeline_core-0.3.3.dist-info/RECORD +57 -0
  37. {ai_pipeline_core-0.2.9.dist-info → ai_pipeline_core-0.3.3.dist-info}/WHEEL +1 -1
  38. ai_pipeline_core/simple_runner/__init__.py +0 -14
  39. ai_pipeline_core/simple_runner/cli.py +0 -254
  40. ai_pipeline_core/simple_runner/simple_runner.py +0 -247
  41. ai_pipeline_core-0.2.9.dist-info/RECORD +0 -41
  42. {ai_pipeline_core-0.2.9.dist-info → ai_pipeline_core-0.3.3.dist-info}/licenses/LICENSE +0 -0
@@ -1,247 +0,0 @@
1
- """Simple pipeline runner for local flow execution.
2
-
3
- This module provides the core functionality for running AI pipeline flows
4
- locally without full Prefect orchestration. It handles document I/O,
5
- flow sequencing, and error management.
6
-
7
- Key components:
8
- - Document I/O from/to filesystem directories via FlowConfig
9
- - Single and multi-flow execution
10
- - Automatic document validation and passing between flows
11
- - Step-based execution control (start/end steps)
12
-
13
- Directory structure:
14
- working_dir/
15
- ├── inputdocument/ # Documents of type InputDocument (lowercase)
16
- │ ├── file1.txt
17
- │ └── file1.txt.description.md # Optional description
18
- └── outputdocument/ # Documents of type OutputDocument (lowercase)
19
- └── result.json
20
-
21
- Example:
22
- >>> from ai_pipeline_core import simple_runner
23
- >>>
24
- >>> # Run single flow
25
- >>> results = await simple_runner.run_pipeline(
26
- ... flow_func=MyFlow,
27
- ... config=MyConfig,
28
- ... project_name="test",
29
- ... output_dir=Path("./output"),
30
- ... flow_options=options
31
- ... )
32
-
33
- Note:
34
- Document directories are organized by document type names (lowercase)
35
- for consistent structure and easy access.
36
- """
37
-
38
- from pathlib import Path
39
- from typing import Any, Callable, Sequence
40
-
41
- from ai_pipeline_core.documents import DocumentList
42
- from ai_pipeline_core.flow.options import FlowOptions
43
- from ai_pipeline_core.logging import get_pipeline_logger
44
-
45
- logger = get_pipeline_logger(__name__)
46
-
47
- FlowSequence = Sequence[Callable[..., Any]]
48
- """Type alias for a sequence of flow functions."""
49
-
50
-
51
- async def run_pipeline(
52
- flow_func: Callable[..., Any],
53
- project_name: str,
54
- output_dir: Path,
55
- flow_options: FlowOptions,
56
- flow_name: str | None = None,
57
- ) -> DocumentList:
58
- """Execute a single pipeline flow with document I/O.
59
-
60
- Runs a flow function with automatic document loading, validation,
61
- and saving. The flow receives input documents from the filesystem
62
- and saves its output for subsequent flows.
63
-
64
- The execution proceeds through these steps:
65
- 1. Load input documents from output_dir subdirectories
66
- 2. Validate input documents against flow's config requirements
67
- 3. Execute flow function with documents and options
68
- 4. Validate output documents match config.OUTPUT_DOCUMENT_TYPE
69
- 5. Save output documents to output_dir subdirectories
70
-
71
- Args:
72
- flow_func: Async flow function decorated with @pipeline_flow.
73
- Must accept (project_name, documents, flow_options).
74
- The flow must have a config attribute set by @pipeline_flow.
75
-
76
- project_name: Name of the project/pipeline for logging and tracking.
77
-
78
- output_dir: Directory for loading input and saving output documents.
79
- Document subdirectories are created as needed.
80
-
81
- flow_options: Configuration options passed to the flow function.
82
- Can be FlowOptions or any subclass.
83
-
84
- flow_name: Optional display name for logging. If None, uses
85
- flow_func.name or flow_func.__name__.
86
-
87
- Returns:
88
- DocumentList containing the flow's output documents.
89
-
90
- Raises:
91
- RuntimeError: If required input documents are missing or if
92
- flow doesn't have a config attribute.
93
-
94
- Example:
95
- >>> from my_flows import AnalysisFlow
96
- >>>
97
- >>> results = await run_pipeline(
98
- ... flow_func=AnalysisFlow,
99
- ... project_name="analysis_001",
100
- ... output_dir=Path("./results"),
101
- ... flow_options=FlowOptions(temperature=0.7)
102
- ... )
103
- >>> print(f"Generated {len(results)} documents")
104
-
105
- Note:
106
- - Flow must be async (decorated with @pipeline_flow with config)
107
- - Input documents are loaded based on flow's config.INPUT_DOCUMENT_TYPES
108
- - Output is validated against config.OUTPUT_DOCUMENT_TYPE
109
- - All I/O is logged for debugging
110
- """
111
- if flow_name is None:
112
- # For Prefect Flow objects, use their name attribute
113
- # For regular functions, fall back to __name__
114
- flow_name = getattr(flow_func, "name", None) or getattr(flow_func, "__name__", "flow")
115
-
116
- logger.info(f"Running Flow: {flow_name}")
117
-
118
- # Get config from the flow function (attached by @pipeline_flow decorator)
119
- config = getattr(flow_func, "config", None)
120
- if config is None:
121
- raise RuntimeError(
122
- f"Flow {flow_name} does not have a config attribute. "
123
- "Ensure it's decorated with @pipeline_flow(config=YourConfig)"
124
- )
125
-
126
- # Load input documents using FlowConfig's new async method
127
- input_documents = await config.load_documents(str(output_dir))
128
-
129
- if not config.has_input_documents(input_documents):
130
- raise RuntimeError(f"Missing input documents for flow {flow_name}")
131
-
132
- result_documents = await flow_func(project_name, input_documents, flow_options)
133
-
134
- config.validate_output_documents(result_documents)
135
-
136
- # Save output documents using FlowConfig's new async method
137
- await config.save_documents(str(output_dir), result_documents)
138
-
139
- logger.info(f"Completed Flow: {flow_name}")
140
-
141
- return result_documents
142
-
143
-
144
- async def run_pipelines(
145
- project_name: str,
146
- output_dir: Path,
147
- flows: FlowSequence,
148
- flow_options: FlowOptions,
149
- start_step: int = 1,
150
- end_step: int | None = None,
151
- ) -> None:
152
- """Execute multiple pipeline flows in sequence.
153
-
154
- Runs a series of flows where each flow's output becomes the input
155
- for the next flow. Supports partial execution with start/end steps
156
- for debugging and resuming failed pipelines.
157
-
158
- Execution proceeds by:
159
- 1. Validating step indices
160
- 2. For each flow in range [start_step, end_step]:
161
- a. Loading input documents from output_dir
162
- b. Executing flow with documents
163
- c. Saving output documents to output_dir
164
- d. Output becomes input for next flow
165
- 3. Logging progress and any failures
166
-
167
- Steps are 1-based for user convenience. Step 1 is the first flow,
168
- Step N is the Nth flow. Use start_step > 1 to skip initial flows
169
- and end_step < N to stop early.
170
-
171
- Args:
172
- project_name: Name of the overall pipeline/project.
173
- output_dir: Directory for document I/O between flows.
174
- Shared by all flows in the sequence.
175
- flows: Sequence of flow functions to execute in order.
176
- Must all be async functions decorated with @pipeline_flow
177
- with a config parameter.
178
- flow_options: Options passed to all flows in the sequence.
179
- Individual flows can use different fields.
180
- start_step: First flow to execute (1-based index).
181
- Default 1 starts from the beginning.
182
- end_step: Last flow to execute (1-based index).
183
- None runs through the last flow.
184
-
185
- Raises:
186
- ValueError: If start_step or end_step are out of range.
187
- RuntimeError: If any flow doesn't have a config attribute.
188
-
189
- Example:
190
- >>> # Run full pipeline
191
- >>> await run_pipelines(
192
- ... project_name="analysis",
193
- ... output_dir=Path("./work"),
194
- ... flows=[ExtractFlow, AnalyzeFlow, SummarizeFlow],
195
- ... flow_options=options
196
- ... )
197
- >>>
198
- >>> # Run only steps 2-3 (skip extraction)
199
- >>> await run_pipelines(
200
- ... ...,
201
- ... start_step=2,
202
- ... end_step=3
203
- ... )
204
-
205
- Note:
206
- - Each flow must be decorated with @pipeline_flow(config=...)
207
- - Each flow's output must match the next flow's input types
208
- - Failed flows stop the entire pipeline
209
- - Progress is logged with step numbers for debugging
210
- - Documents persist in output_dir between runs
211
- """
212
- num_steps = len(flows)
213
- start_index = start_step - 1
214
- end_index = (end_step if end_step is not None else num_steps) - 1
215
-
216
- if (
217
- not (0 <= start_index < num_steps)
218
- or not (0 <= end_index < num_steps)
219
- or start_index > end_index
220
- ):
221
- raise ValueError("Invalid start/end steps.")
222
-
223
- logger.info(f"Starting pipeline '{project_name}' (Steps {start_step} to {end_index + 1})")
224
-
225
- for i in range(start_index, end_index + 1):
226
- flow_func = flows[i]
227
- # For Prefect Flow objects, use their name attribute; for functions, use __name__
228
- flow_name = getattr(flow_func, "name", None) or getattr(
229
- flow_func, "__name__", f"flow_{i + 1}"
230
- )
231
-
232
- logger.info(f"--- [Step {i + 1}/{num_steps}] Running Flow: {flow_name} ---")
233
-
234
- try:
235
- await run_pipeline(
236
- flow_func=flow_func,
237
- project_name=project_name,
238
- output_dir=output_dir,
239
- flow_options=flow_options,
240
- flow_name=f"[Step {i + 1}/{num_steps}] {flow_name}",
241
- )
242
-
243
- except Exception as e:
244
- logger.error(
245
- f"--- [Step {i + 1}/{num_steps}] Flow {flow_name} Failed: {e} ---", exc_info=True
246
- )
247
- raise
@@ -1,41 +0,0 @@
1
- ai_pipeline_core/__init__.py,sha256=o0J4DdxbwUbZ6I07G0OVuCnc28NsywvNqEeiYuCmYI0,5720
2
- ai_pipeline_core/exceptions.py,sha256=vx-XLTw2fJSPs-vwtXVYtqoQUcOc0JeI7UmHqRqQYWU,1569
3
- ai_pipeline_core/pipeline.py,sha256=fWTVmrnOEIFge6o2NUYW2ndGef5UurpL8_fK5tkXbzI,28700
4
- ai_pipeline_core/prefect.py,sha256=91ZgLJHsDsRUW77CpNmkKxYs3RCJuucPM3pjKmNBeDg,2199
5
- ai_pipeline_core/prompt_manager.py,sha256=FAtb1yK7bGuAeuIJ523LOX9bd7TrcHG-TqZ7Lz4RJC0,12087
6
- ai_pipeline_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- ai_pipeline_core/settings.py,sha256=IMrFaX0i-WIlaOA5O53ipNSta6KQVSFHc1aJXmS3nSo,5078
8
- ai_pipeline_core/tracing.py,sha256=HT8heSwsVot6D6u8dPi-BHVlaemkPsPs5aXtG-iIzNk,31494
9
- ai_pipeline_core/documents/__init__.py,sha256=WHStvGZiSyybOcMTYxSV24U6MA3Am_0_Az5p-DuMFrk,738
10
- ai_pipeline_core/documents/document.py,sha256=hdTh36KGEcrDollTnQmTI66DJIqYfe4X42Y0q7Cm4fY,68153
11
- ai_pipeline_core/documents/document_list.py,sha256=Y_NCjfM_CjkIwHRD2iyGgYBuIykN8lT2IIH_uWOiGis,16254
12
- ai_pipeline_core/documents/flow_document.py,sha256=vSPzE4kGuDjGUfFykfpPaSfMuIO9_kDfTvdc8kZaE8U,4144
13
- ai_pipeline_core/documents/mime_type.py,sha256=JFEOq4HwlIW2snobyNfWwySdT7urZSWkobiRMVs2fSE,7959
14
- ai_pipeline_core/documents/task_document.py,sha256=4j94N-hkqXVmzjyUjbA9YW2oR4dqnOhqA3D5OWrmGkw,4303
15
- ai_pipeline_core/documents/temporary_document.py,sha256=Sam344Mm5AlZTm3_l01YdDWeF26F6pR2tytGRL1doQY,2711
16
- ai_pipeline_core/documents/utils.py,sha256=ZyJNjFN7ihWno0K7dJZed7twYmmPLA0z40UzFw1A3A8,5465
17
- ai_pipeline_core/flow/__init__.py,sha256=2BfWYMOPYW5teGzwo-qzpn_bom1lxxry0bPsjVgcsCk,188
18
- ai_pipeline_core/flow/config.py,sha256=3PCDph2n8dj-txqAvd9Wflbi_6lmfXFR9rUhM-szGSQ,18887
19
- ai_pipeline_core/flow/options.py,sha256=zn3N5DgYtlxLq0AvXfana3UOhym7A3XCheQSBIIarZE,2295
20
- ai_pipeline_core/llm/__init__.py,sha256=3B_vtEzxrzidP1qOUNQ4RxlUmxZ2MBKQcUhQiTybM9g,661
21
- ai_pipeline_core/llm/ai_messages.py,sha256=Onin3UPdbJQNl3WfY3-_jE5KRmF-ciXsa5K6UPOiy5s,14410
22
- ai_pipeline_core/llm/client.py,sha256=VQOUxGT8bUoKiD3-XX0VY5OHUX80Xdz6esrN9j2KUZ4,25013
23
- ai_pipeline_core/llm/model_options.py,sha256=uRNIHfVeh2sgt1mZBiOUx6hPQ6GKjB8b7TytZJ6afKg,11768
24
- ai_pipeline_core/llm/model_response.py,sha256=-fKJcblDP_Z6NV9CGp4bm_hitb0Z0jyy0ZndCQUpRkQ,13493
25
- ai_pipeline_core/llm/model_types.py,sha256=2J4Qsb1x21I4eo_VPeaMMOW8shOGPqzJuoGjTLcBFPM,2791
26
- ai_pipeline_core/logging/__init__.py,sha256=Nz6-ghAoENsgNmLD2ma9TW9M0U2_QfxuQ5DDW6Vt6M0,651
27
- ai_pipeline_core/logging/logging.yml,sha256=YTW48keO_K5bkkb-KXGM7ZuaYKiquLsjsURei8Ql0V4,1353
28
- ai_pipeline_core/logging/logging_config.py,sha256=pV2x6GgMPXrzPH27sicCSXfw56beio4C2JKCJ3NsXrg,6207
29
- ai_pipeline_core/logging/logging_mixin.py,sha256=OTye2pbUbG5oYZkI06TNkGCEa4y0ldePz5IAfdmNUPU,8090
30
- ai_pipeline_core/simple_runner/__init__.py,sha256=9krT-CcDAZ0jB2MjWqFYhaK5qtUDMpB5qWzjRLa4Zhk,322
31
- ai_pipeline_core/simple_runner/cli.py,sha256=p9Z1jtRMH10T5Bl3QfHPxyW6LL4qYvvXeOXbPGeeXeE,9308
32
- ai_pipeline_core/simple_runner/simple_runner.py,sha256=f6cIodYkul-Apu1d63T6kR5DZpiaCWpphUcEPp5XjFo,9102
33
- ai_pipeline_core/storage/__init__.py,sha256=tcIkjJ3zPBLCyetwiJDewBvS2sbRJrDlBh3gEsQm08E,184
34
- ai_pipeline_core/storage/storage.py,sha256=ClMr419Y-eU2RuOjZYd51dC0stWQk28Vb56PvQaoUwc,20007
35
- ai_pipeline_core/utils/__init__.py,sha256=TJSmEm1Quf-gKwXrxM96u2IGzVolUyeNNfLMPoLstXI,254
36
- ai_pipeline_core/utils/deploy.py,sha256=rAtRuwkmGkc-fqvDMXpt08OzLrD7KTDMAmLDC9wYg7Y,13147
37
- ai_pipeline_core/utils/remote_deployment.py,sha256=cPTgnS5InK08qiWnuPz3e8YKjoT3sPBloSaDfNTzghs,10137
38
- ai_pipeline_core-0.2.9.dist-info/METADATA,sha256=8p4PXSJqP5j4XV4cxjuncSN3i8914ZMupaU3EKs6Qpk,15159
39
- ai_pipeline_core-0.2.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
40
- ai_pipeline_core-0.2.9.dist-info/licenses/LICENSE,sha256=kKj8mfbdWwkyG3U6n7ztB3bAZlEwShTkAsvaY657i3I,1074
41
- ai_pipeline_core-0.2.9.dist-info/RECORD,,