ai-pipeline-core 0.1.5__tar.gz → 0.1.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.7}/PKG-INFO +92 -30
- {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.7}/README.md +90 -28
- ai_pipeline_core-0.1.7/ai_pipeline_core/__init__.py +77 -0
- {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.7}/ai_pipeline_core/documents/__init__.py +3 -0
- {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.7}/ai_pipeline_core/documents/document.py +57 -3
- ai_pipeline_core-0.1.7/ai_pipeline_core/documents/mime_type.py +110 -0
- ai_pipeline_core-0.1.7/ai_pipeline_core/flow/__init__.py +7 -0
- ai_pipeline_core-0.1.7/ai_pipeline_core/flow/options.py +26 -0
- {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.7}/ai_pipeline_core/llm/client.py +5 -3
- ai_pipeline_core-0.1.7/ai_pipeline_core/pipeline.py +418 -0
- ai_pipeline_core-0.1.7/ai_pipeline_core/prefect.py +7 -0
- ai_pipeline_core-0.1.7/ai_pipeline_core/simple_runner/__init__.py +19 -0
- ai_pipeline_core-0.1.7/ai_pipeline_core/simple_runner/cli.py +95 -0
- ai_pipeline_core-0.1.7/ai_pipeline_core/simple_runner/simple_runner.py +147 -0
- {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.7}/ai_pipeline_core/tracing.py +63 -20
- {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.7}/pyproject.toml +3 -3
- ai_pipeline_core-0.1.5/ai_pipeline_core/__init__.py +0 -36
- ai_pipeline_core-0.1.5/ai_pipeline_core/documents/mime_type.py +0 -78
- ai_pipeline_core-0.1.5/ai_pipeline_core/flow/__init__.py +0 -3
- {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.7}/.gitignore +0 -0
- {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.7}/LICENSE +0 -0
- {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.7}/ai_pipeline_core/documents/document_list.py +0 -0
- {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.7}/ai_pipeline_core/documents/flow_document.py +0 -0
- {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.7}/ai_pipeline_core/documents/task_document.py +0 -0
- {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.7}/ai_pipeline_core/documents/utils.py +0 -0
- {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.7}/ai_pipeline_core/exceptions.py +0 -0
- {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.7}/ai_pipeline_core/flow/config.py +0 -0
- {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.7}/ai_pipeline_core/llm/__init__.py +0 -0
- {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.7}/ai_pipeline_core/llm/ai_messages.py +0 -0
- {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.7}/ai_pipeline_core/llm/model_options.py +0 -0
- {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.7}/ai_pipeline_core/llm/model_response.py +0 -0
- {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.7}/ai_pipeline_core/llm/model_types.py +0 -0
- {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.7}/ai_pipeline_core/logging/__init__.py +0 -0
- {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.7}/ai_pipeline_core/logging/logging.yml +0 -0
- {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.7}/ai_pipeline_core/logging/logging_config.py +0 -0
- {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.7}/ai_pipeline_core/logging/logging_mixin.py +0 -0
- {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.7}/ai_pipeline_core/prompt_manager.py +0 -0
- {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.7}/ai_pipeline_core/py.typed +0 -0
- {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.7}/ai_pipeline_core/settings.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ai-pipeline-core
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.7
|
|
4
4
|
Summary: Core utilities for AI-powered processing pipelines using prefect
|
|
5
5
|
Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
|
|
6
6
|
Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
|
|
@@ -20,7 +20,7 @@ Classifier: Typing :: Typed
|
|
|
20
20
|
Requires-Python: >=3.12
|
|
21
21
|
Requires-Dist: httpx>=0.28.1
|
|
22
22
|
Requires-Dist: jinja2>=3.1.6
|
|
23
|
-
Requires-Dist: lmnr>=0.7.
|
|
23
|
+
Requires-Dist: lmnr>=0.7.6
|
|
24
24
|
Requires-Dist: openai>=1.99.9
|
|
25
25
|
Requires-Dist: prefect>=3.4.13
|
|
26
26
|
Requires-Dist: pydantic-settings>=2.10.1
|
|
@@ -151,40 +151,76 @@ async def process_document(doc: Document):
|
|
|
151
151
|
return response.parsed
|
|
152
152
|
```
|
|
153
153
|
|
|
154
|
-
###
|
|
154
|
+
### Enhanced Pipeline Decorators (New in v0.1.7)
|
|
155
155
|
```python
|
|
156
|
-
from
|
|
157
|
-
from ai_pipeline_core.
|
|
158
|
-
from ai_pipeline_core.
|
|
159
|
-
from ai_pipeline_core.tracing import trace
|
|
156
|
+
from ai_pipeline_core import pipeline_flow, pipeline_task
|
|
157
|
+
from ai_pipeline_core.flow import FlowOptions
|
|
158
|
+
from ai_pipeline_core.documents import DocumentList, FlowDocument
|
|
160
159
|
|
|
161
|
-
class
|
|
162
|
-
"""
|
|
163
|
-
|
|
164
|
-
|
|
160
|
+
class CustomFlowOptions(FlowOptions):
|
|
161
|
+
"""Extend base options with your custom fields"""
|
|
162
|
+
batch_size: int = 100
|
|
163
|
+
temperature: float = 0.7
|
|
165
164
|
|
|
166
|
-
|
|
167
|
-
INPUT_DOCUMENT_TYPES = [InputDocument]
|
|
168
|
-
OUTPUT_DOCUMENT_TYPE = OutputDocument
|
|
169
|
-
|
|
170
|
-
@task
|
|
171
|
-
@trace
|
|
165
|
+
@pipeline_task(trace_level="always", retries=3)
|
|
172
166
|
async def process_task(doc: Document) -> Document:
|
|
173
|
-
# Task
|
|
167
|
+
# Task with automatic tracing and retries
|
|
174
168
|
result = await process_document(doc)
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
169
|
+
return OutputDocument(name="result", content=result.encode())
|
|
170
|
+
|
|
171
|
+
@pipeline_flow(trace_level="always")
|
|
172
|
+
async def my_pipeline(
|
|
173
|
+
project_name: str,
|
|
174
|
+
documents: DocumentList,
|
|
175
|
+
flow_options: CustomFlowOptions # Type-safe custom options
|
|
176
|
+
) -> DocumentList:
|
|
177
|
+
# Pipeline flow with enforced signature and tracing
|
|
178
|
+
results = []
|
|
179
|
+
for doc in documents:
|
|
180
|
+
result = await process_task(doc)
|
|
181
|
+
results.append(result)
|
|
182
|
+
return DocumentList(results)
|
|
183
|
+
```
|
|
178
184
|
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
185
|
+
### Simple Runner Utility (New in v0.1.7)
|
|
186
|
+
```python
|
|
187
|
+
from ai_pipeline_core.simple_runner import run_cli, run_pipeline
|
|
188
|
+
from ai_pipeline_core.flow import FlowOptions
|
|
189
|
+
|
|
190
|
+
# CLI-based pipeline execution
|
|
191
|
+
if __name__ == "__main__":
|
|
192
|
+
run_cli(
|
|
193
|
+
flows=[my_pipeline],
|
|
194
|
+
flow_configs=[MyFlowConfig],
|
|
195
|
+
options_cls=CustomFlowOptions
|
|
196
|
+
)
|
|
183
197
|
|
|
184
|
-
|
|
198
|
+
# Or programmatic execution
|
|
199
|
+
async def main():
|
|
200
|
+
result = await run_pipeline(
|
|
201
|
+
project_name="my-project",
|
|
202
|
+
output_dir=Path("./output"),
|
|
203
|
+
flow=my_pipeline,
|
|
204
|
+
flow_config=MyFlowConfig,
|
|
205
|
+
flow_options=CustomFlowOptions(batch_size=50)
|
|
206
|
+
)
|
|
207
|
+
```
|
|
185
208
|
|
|
186
|
-
|
|
187
|
-
|
|
209
|
+
### Clean Prefect Decorators (New in v0.1.7)
|
|
210
|
+
```python
|
|
211
|
+
# Import clean Prefect decorators without tracing
|
|
212
|
+
from ai_pipeline_core.prefect import flow, task
|
|
213
|
+
|
|
214
|
+
# Or use pipeline decorators with tracing
|
|
215
|
+
from ai_pipeline_core import pipeline_flow, pipeline_task
|
|
216
|
+
|
|
217
|
+
@task # Clean Prefect task
|
|
218
|
+
def compute(x: int) -> int:
|
|
219
|
+
return x * 2
|
|
220
|
+
|
|
221
|
+
@pipeline_task(trace_level="always") # With tracing
|
|
222
|
+
def compute_traced(x: int) -> int:
|
|
223
|
+
return x * 2
|
|
188
224
|
```
|
|
189
225
|
|
|
190
226
|
## Core Modules
|
|
@@ -291,8 +327,14 @@ ai_pipeline_core/
|
|
|
291
327
|
│ ├── client.py # Async client implementation
|
|
292
328
|
│ └── model_options.py # Configuration models
|
|
293
329
|
├── flow/ # Prefect flow utilities
|
|
294
|
-
│
|
|
330
|
+
│ ├── config.py # Type-safe flow configuration
|
|
331
|
+
│ └── options.py # FlowOptions base class (v0.1.7)
|
|
332
|
+
├── simple_runner/ # Pipeline execution utilities (v0.1.7)
|
|
333
|
+
│ ├── cli.py # CLI interface
|
|
334
|
+
│ └── simple_runner.py # Core runner logic
|
|
295
335
|
├── logging/ # Structured logging
|
|
336
|
+
├── pipeline.py # Enhanced decorators (v0.1.7)
|
|
337
|
+
├── prefect.py # Clean Prefect exports (v0.1.7)
|
|
296
338
|
├── tracing.py # Observability decorators
|
|
297
339
|
└── settings.py # Centralized configuration
|
|
298
340
|
```
|
|
@@ -469,9 +511,29 @@ Built with:
|
|
|
469
511
|
- [LiteLLM](https://litellm.ai/) - LLM proxy
|
|
470
512
|
- [Pydantic](https://pydantic-docs.helpmanual.io/) - Data validation
|
|
471
513
|
|
|
514
|
+
## What's New in v0.1.7
|
|
515
|
+
|
|
516
|
+
### Major Additions
|
|
517
|
+
- **Enhanced Pipeline Decorators**: New `pipeline_flow` and `pipeline_task` decorators combining Prefect functionality with automatic LMNR tracing
|
|
518
|
+
- **FlowOptions Base Class**: Extensible configuration system for flows with type-safe inheritance
|
|
519
|
+
- **Simple Runner Module**: CLI and programmatic utilities for easy pipeline execution
|
|
520
|
+
- **Clean Prefect Exports**: Separate imports for Prefect decorators with and without tracing
|
|
521
|
+
- **Expanded Exports**: All major components now accessible from top-level package import
|
|
522
|
+
|
|
523
|
+
### API Improvements
|
|
524
|
+
- Better type inference for document flows with custom options
|
|
525
|
+
- Support for custom FlowOptions inheritance in pipeline flows
|
|
526
|
+
- Improved error messages for invalid flow signatures
|
|
527
|
+
- Enhanced document utility functions (`canonical_name_key`, `sanitize_url`)
|
|
528
|
+
|
|
529
|
+
### Developer Experience
|
|
530
|
+
- Simplified imports - most components available from `ai_pipeline_core` directly
|
|
531
|
+
- Better separation of concerns between clean Prefect and traced pipeline decorators
|
|
532
|
+
- More intuitive flow configuration with `FlowOptions` inheritance
|
|
533
|
+
|
|
472
534
|
## Stability Notice
|
|
473
535
|
|
|
474
|
-
**Current Version**: 0.1.
|
|
536
|
+
**Current Version**: 0.1.7
|
|
475
537
|
**Status**: Internal Preview
|
|
476
538
|
**API Stability**: Unstable - Breaking changes expected
|
|
477
539
|
**Recommended Use**: Learning and reference only
|
|
@@ -109,40 +109,76 @@ async def process_document(doc: Document):
|
|
|
109
109
|
return response.parsed
|
|
110
110
|
```
|
|
111
111
|
|
|
112
|
-
###
|
|
112
|
+
### Enhanced Pipeline Decorators (New in v0.1.7)
|
|
113
113
|
```python
|
|
114
|
-
from
|
|
115
|
-
from ai_pipeline_core.
|
|
116
|
-
from ai_pipeline_core.
|
|
117
|
-
from ai_pipeline_core.tracing import trace
|
|
114
|
+
from ai_pipeline_core import pipeline_flow, pipeline_task
|
|
115
|
+
from ai_pipeline_core.flow import FlowOptions
|
|
116
|
+
from ai_pipeline_core.documents import DocumentList, FlowDocument
|
|
118
117
|
|
|
119
|
-
class
|
|
120
|
-
"""
|
|
121
|
-
|
|
122
|
-
|
|
118
|
+
class CustomFlowOptions(FlowOptions):
|
|
119
|
+
"""Extend base options with your custom fields"""
|
|
120
|
+
batch_size: int = 100
|
|
121
|
+
temperature: float = 0.7
|
|
123
122
|
|
|
124
|
-
|
|
125
|
-
INPUT_DOCUMENT_TYPES = [InputDocument]
|
|
126
|
-
OUTPUT_DOCUMENT_TYPE = OutputDocument
|
|
127
|
-
|
|
128
|
-
@task
|
|
129
|
-
@trace
|
|
123
|
+
@pipeline_task(trace_level="always", retries=3)
|
|
130
124
|
async def process_task(doc: Document) -> Document:
|
|
131
|
-
# Task
|
|
125
|
+
# Task with automatic tracing and retries
|
|
132
126
|
result = await process_document(doc)
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
127
|
+
return OutputDocument(name="result", content=result.encode())
|
|
128
|
+
|
|
129
|
+
@pipeline_flow(trace_level="always")
|
|
130
|
+
async def my_pipeline(
|
|
131
|
+
project_name: str,
|
|
132
|
+
documents: DocumentList,
|
|
133
|
+
flow_options: CustomFlowOptions # Type-safe custom options
|
|
134
|
+
) -> DocumentList:
|
|
135
|
+
# Pipeline flow with enforced signature and tracing
|
|
136
|
+
results = []
|
|
137
|
+
for doc in documents:
|
|
138
|
+
result = await process_task(doc)
|
|
139
|
+
results.append(result)
|
|
140
|
+
return DocumentList(results)
|
|
141
|
+
```
|
|
136
142
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
143
|
+
### Simple Runner Utility (New in v0.1.7)
|
|
144
|
+
```python
|
|
145
|
+
from ai_pipeline_core.simple_runner import run_cli, run_pipeline
|
|
146
|
+
from ai_pipeline_core.flow import FlowOptions
|
|
147
|
+
|
|
148
|
+
# CLI-based pipeline execution
|
|
149
|
+
if __name__ == "__main__":
|
|
150
|
+
run_cli(
|
|
151
|
+
flows=[my_pipeline],
|
|
152
|
+
flow_configs=[MyFlowConfig],
|
|
153
|
+
options_cls=CustomFlowOptions
|
|
154
|
+
)
|
|
141
155
|
|
|
142
|
-
|
|
156
|
+
# Or programmatic execution
|
|
157
|
+
async def main():
|
|
158
|
+
result = await run_pipeline(
|
|
159
|
+
project_name="my-project",
|
|
160
|
+
output_dir=Path("./output"),
|
|
161
|
+
flow=my_pipeline,
|
|
162
|
+
flow_config=MyFlowConfig,
|
|
163
|
+
flow_options=CustomFlowOptions(batch_size=50)
|
|
164
|
+
)
|
|
165
|
+
```
|
|
143
166
|
|
|
144
|
-
|
|
145
|
-
|
|
167
|
+
### Clean Prefect Decorators (New in v0.1.7)
|
|
168
|
+
```python
|
|
169
|
+
# Import clean Prefect decorators without tracing
|
|
170
|
+
from ai_pipeline_core.prefect import flow, task
|
|
171
|
+
|
|
172
|
+
# Or use pipeline decorators with tracing
|
|
173
|
+
from ai_pipeline_core import pipeline_flow, pipeline_task
|
|
174
|
+
|
|
175
|
+
@task # Clean Prefect task
|
|
176
|
+
def compute(x: int) -> int:
|
|
177
|
+
return x * 2
|
|
178
|
+
|
|
179
|
+
@pipeline_task(trace_level="always") # With tracing
|
|
180
|
+
def compute_traced(x: int) -> int:
|
|
181
|
+
return x * 2
|
|
146
182
|
```
|
|
147
183
|
|
|
148
184
|
## Core Modules
|
|
@@ -249,8 +285,14 @@ ai_pipeline_core/
|
|
|
249
285
|
│ ├── client.py # Async client implementation
|
|
250
286
|
│ └── model_options.py # Configuration models
|
|
251
287
|
├── flow/ # Prefect flow utilities
|
|
252
|
-
│
|
|
288
|
+
│ ├── config.py # Type-safe flow configuration
|
|
289
|
+
│ └── options.py # FlowOptions base class (v0.1.7)
|
|
290
|
+
├── simple_runner/ # Pipeline execution utilities (v0.1.7)
|
|
291
|
+
│ ├── cli.py # CLI interface
|
|
292
|
+
│ └── simple_runner.py # Core runner logic
|
|
253
293
|
├── logging/ # Structured logging
|
|
294
|
+
├── pipeline.py # Enhanced decorators (v0.1.7)
|
|
295
|
+
├── prefect.py # Clean Prefect exports (v0.1.7)
|
|
254
296
|
├── tracing.py # Observability decorators
|
|
255
297
|
└── settings.py # Centralized configuration
|
|
256
298
|
```
|
|
@@ -427,9 +469,29 @@ Built with:
|
|
|
427
469
|
- [LiteLLM](https://litellm.ai/) - LLM proxy
|
|
428
470
|
- [Pydantic](https://pydantic-docs.helpmanual.io/) - Data validation
|
|
429
471
|
|
|
472
|
+
## What's New in v0.1.7
|
|
473
|
+
|
|
474
|
+
### Major Additions
|
|
475
|
+
- **Enhanced Pipeline Decorators**: New `pipeline_flow` and `pipeline_task` decorators combining Prefect functionality with automatic LMNR tracing
|
|
476
|
+
- **FlowOptions Base Class**: Extensible configuration system for flows with type-safe inheritance
|
|
477
|
+
- **Simple Runner Module**: CLI and programmatic utilities for easy pipeline execution
|
|
478
|
+
- **Clean Prefect Exports**: Separate imports for Prefect decorators with and without tracing
|
|
479
|
+
- **Expanded Exports**: All major components now accessible from top-level package import
|
|
480
|
+
|
|
481
|
+
### API Improvements
|
|
482
|
+
- Better type inference for document flows with custom options
|
|
483
|
+
- Support for custom FlowOptions inheritance in pipeline flows
|
|
484
|
+
- Improved error messages for invalid flow signatures
|
|
485
|
+
- Enhanced document utility functions (`canonical_name_key`, `sanitize_url`)
|
|
486
|
+
|
|
487
|
+
### Developer Experience
|
|
488
|
+
- Simplified imports - most components available from `ai_pipeline_core` directly
|
|
489
|
+
- Better separation of concerns between clean Prefect and traced pipeline decorators
|
|
490
|
+
- More intuitive flow configuration with `FlowOptions` inheritance
|
|
491
|
+
|
|
430
492
|
## Stability Notice
|
|
431
493
|
|
|
432
|
-
**Current Version**: 0.1.
|
|
494
|
+
**Current Version**: 0.1.7
|
|
433
495
|
**Status**: Internal Preview
|
|
434
496
|
**API Stability**: Unstable - Breaking changes expected
|
|
435
497
|
**Recommended Use**: Learning and reference only
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""Pipeline Core - Shared infrastructure for AI pipelines."""
|
|
2
|
+
|
|
3
|
+
from . import llm
|
|
4
|
+
from .documents import (
|
|
5
|
+
Document,
|
|
6
|
+
DocumentList,
|
|
7
|
+
FlowDocument,
|
|
8
|
+
TaskDocument,
|
|
9
|
+
canonical_name_key,
|
|
10
|
+
sanitize_url,
|
|
11
|
+
)
|
|
12
|
+
from .flow import FlowConfig, FlowOptions
|
|
13
|
+
from .llm import (
|
|
14
|
+
AIMessages,
|
|
15
|
+
AIMessageType,
|
|
16
|
+
ModelName,
|
|
17
|
+
ModelOptions,
|
|
18
|
+
ModelResponse,
|
|
19
|
+
StructuredModelResponse,
|
|
20
|
+
)
|
|
21
|
+
from .logging import (
|
|
22
|
+
LoggerMixin,
|
|
23
|
+
LoggingConfig,
|
|
24
|
+
StructuredLoggerMixin,
|
|
25
|
+
get_pipeline_logger,
|
|
26
|
+
setup_logging,
|
|
27
|
+
)
|
|
28
|
+
from .logging import get_pipeline_logger as get_logger
|
|
29
|
+
from .pipeline import pipeline_flow, pipeline_task
|
|
30
|
+
from .prefect import flow, task
|
|
31
|
+
from .prompt_manager import PromptManager
|
|
32
|
+
from .settings import settings
|
|
33
|
+
from .tracing import TraceInfo, TraceLevel, trace
|
|
34
|
+
|
|
35
|
+
__version__ = "0.1.7"
|
|
36
|
+
|
|
37
|
+
__all__ = [
|
|
38
|
+
# Config/Settings
|
|
39
|
+
"settings",
|
|
40
|
+
# Logging
|
|
41
|
+
"get_logger",
|
|
42
|
+
"get_pipeline_logger",
|
|
43
|
+
"LoggerMixin",
|
|
44
|
+
"LoggingConfig",
|
|
45
|
+
"setup_logging",
|
|
46
|
+
"StructuredLoggerMixin",
|
|
47
|
+
# Documents
|
|
48
|
+
"Document",
|
|
49
|
+
"DocumentList",
|
|
50
|
+
"FlowDocument",
|
|
51
|
+
"TaskDocument",
|
|
52
|
+
"canonical_name_key",
|
|
53
|
+
"sanitize_url",
|
|
54
|
+
# Flow/Task
|
|
55
|
+
"FlowConfig",
|
|
56
|
+
"FlowOptions",
|
|
57
|
+
# Prefect decorators (clean, no tracing)
|
|
58
|
+
"task",
|
|
59
|
+
"flow",
|
|
60
|
+
# Pipeline decorators (with tracing)
|
|
61
|
+
"pipeline_task",
|
|
62
|
+
"pipeline_flow",
|
|
63
|
+
# LLM
|
|
64
|
+
"llm",
|
|
65
|
+
"ModelName",
|
|
66
|
+
"ModelOptions",
|
|
67
|
+
"ModelResponse",
|
|
68
|
+
"StructuredModelResponse",
|
|
69
|
+
"AIMessages",
|
|
70
|
+
"AIMessageType",
|
|
71
|
+
# Tracing
|
|
72
|
+
"trace",
|
|
73
|
+
"TraceLevel",
|
|
74
|
+
"TraceInfo",
|
|
75
|
+
# Utils
|
|
76
|
+
"PromptManager",
|
|
77
|
+
]
|
|
@@ -2,10 +2,13 @@ from .document import Document
|
|
|
2
2
|
from .document_list import DocumentList
|
|
3
3
|
from .flow_document import FlowDocument
|
|
4
4
|
from .task_document import TaskDocument
|
|
5
|
+
from .utils import canonical_name_key, sanitize_url
|
|
5
6
|
|
|
6
7
|
__all__ = [
|
|
7
8
|
"Document",
|
|
8
9
|
"DocumentList",
|
|
9
10
|
"FlowDocument",
|
|
10
11
|
"TaskDocument",
|
|
12
|
+
"canonical_name_key",
|
|
13
|
+
"sanitize_url",
|
|
11
14
|
]
|
|
@@ -6,7 +6,7 @@ from abc import ABC, abstractmethod
|
|
|
6
6
|
from base64 import b32encode
|
|
7
7
|
from enum import StrEnum
|
|
8
8
|
from functools import cached_property
|
|
9
|
-
from typing import Any, ClassVar, Literal, Self
|
|
9
|
+
from typing import Any, ClassVar, Literal, Self, TypeVar
|
|
10
10
|
|
|
11
11
|
from pydantic import BaseModel, ConfigDict, field_serializer, field_validator
|
|
12
12
|
from ruamel.yaml import YAML
|
|
@@ -19,8 +19,11 @@ from .mime_type import (
|
|
|
19
19
|
is_image_mime_type,
|
|
20
20
|
is_pdf_mime_type,
|
|
21
21
|
is_text_mime_type,
|
|
22
|
+
is_yaml_mime_type,
|
|
22
23
|
)
|
|
23
24
|
|
|
25
|
+
TModel = TypeVar("TModel", bound=BaseModel)
|
|
26
|
+
|
|
24
27
|
|
|
25
28
|
class Document(BaseModel, ABC):
|
|
26
29
|
"""Abstract base class for all documents"""
|
|
@@ -207,15 +210,40 @@ class Document(BaseModel, ABC):
|
|
|
207
210
|
"""Parse document as JSON"""
|
|
208
211
|
return json.loads(self.as_text())
|
|
209
212
|
|
|
213
|
+
def as_pydantic_model(self, model_type: type[TModel]) -> TModel:
|
|
214
|
+
"""Parse document as a pydantic model and return the validated instance"""
|
|
215
|
+
data = self.as_yaml() if is_yaml_mime_type(self.mime_type) else self.as_json()
|
|
216
|
+
return model_type.model_validate(data)
|
|
217
|
+
|
|
210
218
|
def as_markdown_list(self) -> list[str]:
|
|
211
219
|
"""Parse document as a markdown list"""
|
|
212
220
|
return self.as_text().split(self.MARKDOWN_LIST_SEPARATOR)
|
|
213
221
|
|
|
214
222
|
@classmethod
|
|
215
|
-
def create(
|
|
223
|
+
def create(
|
|
224
|
+
cls,
|
|
225
|
+
name: str,
|
|
226
|
+
description: str | None,
|
|
227
|
+
content: bytes | str | BaseModel | list[str] | Any,
|
|
228
|
+
) -> Self:
|
|
216
229
|
"""Create a document from a name, description, and content"""
|
|
217
|
-
|
|
230
|
+
is_yaml_extension = name.endswith(".yaml") or name.endswith(".yml")
|
|
231
|
+
is_json_extension = name.endswith(".json")
|
|
232
|
+
is_markdown_extension = name.endswith(".md")
|
|
233
|
+
is_str_list = isinstance(content, list) and all(isinstance(item, str) for item in content)
|
|
234
|
+
if isinstance(content, bytes):
|
|
235
|
+
pass
|
|
236
|
+
elif isinstance(content, str):
|
|
218
237
|
content = content.encode("utf-8")
|
|
238
|
+
elif is_str_list and is_markdown_extension:
|
|
239
|
+
return cls.create_as_markdown_list(name, description, content) # type: ignore[arg-type]
|
|
240
|
+
elif is_yaml_extension:
|
|
241
|
+
return cls.create_as_yaml(name, description, content)
|
|
242
|
+
elif is_json_extension:
|
|
243
|
+
return cls.create_as_json(name, description, content)
|
|
244
|
+
else:
|
|
245
|
+
raise ValueError(f"Unsupported content type: {type(content)} for {name}")
|
|
246
|
+
|
|
219
247
|
return cls(name=name, description=description, content=content)
|
|
220
248
|
|
|
221
249
|
@classmethod
|
|
@@ -230,6 +258,32 @@ class Document(BaseModel, ABC):
|
|
|
230
258
|
content = Document.MARKDOWN_LIST_SEPARATOR.join(cleaned_items)
|
|
231
259
|
return cls.create(name, description, content)
|
|
232
260
|
|
|
261
|
+
@classmethod
|
|
262
|
+
def create_as_json(cls, name: str, description: str | None, data: Any) -> Self:
|
|
263
|
+
"""Create a document from a name, description, and JSON data"""
|
|
264
|
+
assert name.endswith(".json"), f"Document name must end with .json: {name}"
|
|
265
|
+
if isinstance(data, BaseModel):
|
|
266
|
+
data = data.model_dump(mode="json")
|
|
267
|
+
content = json.dumps(data, indent=2).encode("utf-8")
|
|
268
|
+
return cls.create(name, description, content)
|
|
269
|
+
|
|
270
|
+
@classmethod
|
|
271
|
+
def create_as_yaml(cls, name: str, description: str | None, data: Any) -> Self:
|
|
272
|
+
"""Create a document from a name, description, and YAML data"""
|
|
273
|
+
assert name.endswith(".yaml") or name.endswith(".yml"), (
|
|
274
|
+
f"Document name must end with .yaml or .yml: {name}"
|
|
275
|
+
)
|
|
276
|
+
if isinstance(data, BaseModel):
|
|
277
|
+
data = data.model_dump()
|
|
278
|
+
yaml = YAML()
|
|
279
|
+
yaml.indent(mapping=2, sequence=4, offset=2)
|
|
280
|
+
from io import BytesIO
|
|
281
|
+
|
|
282
|
+
stream = BytesIO()
|
|
283
|
+
yaml.dump(data, stream)
|
|
284
|
+
content = stream.getvalue()
|
|
285
|
+
return cls.create(name, description, content)
|
|
286
|
+
|
|
233
287
|
def serialize_model(self) -> dict[str, Any]:
|
|
234
288
|
"""Serialize document to a dictionary with proper encoding."""
|
|
235
289
|
result = {
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"""MIME type detection utilities for documents"""
|
|
2
|
+
|
|
3
|
+
import magic
|
|
4
|
+
|
|
5
|
+
from ai_pipeline_core.logging import get_pipeline_logger
|
|
6
|
+
|
|
7
|
+
logger = get_pipeline_logger(__name__)
|
|
8
|
+
|
|
9
|
+
# Extension to MIME type mapping for common formats
|
|
10
|
+
# These are formats where extension-based detection is more reliable
|
|
11
|
+
EXTENSION_MIME_MAP = {
|
|
12
|
+
"md": "text/markdown",
|
|
13
|
+
"txt": "text/plain",
|
|
14
|
+
"pdf": "application/pdf",
|
|
15
|
+
"png": "image/png",
|
|
16
|
+
"jpg": "image/jpeg",
|
|
17
|
+
"jpeg": "image/jpeg",
|
|
18
|
+
"gif": "image/gif",
|
|
19
|
+
"bmp": "image/bmp",
|
|
20
|
+
"webp": "image/webp",
|
|
21
|
+
"json": "application/json",
|
|
22
|
+
"yaml": "application/yaml",
|
|
23
|
+
"yml": "application/yaml",
|
|
24
|
+
"xml": "text/xml",
|
|
25
|
+
"html": "text/html",
|
|
26
|
+
"htm": "text/html",
|
|
27
|
+
"py": "text/x-python",
|
|
28
|
+
"css": "text/css",
|
|
29
|
+
"js": "application/javascript",
|
|
30
|
+
"ts": "application/typescript",
|
|
31
|
+
"tsx": "application/typescript",
|
|
32
|
+
"jsx": "application/javascript",
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def detect_mime_type(content: bytes, name: str) -> str:
|
|
37
|
+
"""Detect MIME type from content and filename
|
|
38
|
+
|
|
39
|
+
Uses a hybrid approach:
|
|
40
|
+
1. Check for empty content
|
|
41
|
+
2. Try extension-based detection for known formats
|
|
42
|
+
3. Fall back to magic content detection
|
|
43
|
+
4. Final fallback to application/octet-stream
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
# Check for empty content
|
|
47
|
+
if len(content) == 0:
|
|
48
|
+
return "application/x-empty"
|
|
49
|
+
|
|
50
|
+
# Try extension-based detection first for known formats
|
|
51
|
+
# This is more reliable for text formats that magic might misidentify
|
|
52
|
+
ext = name.lower().split(".")[-1] if "." in name else ""
|
|
53
|
+
if ext in EXTENSION_MIME_MAP:
|
|
54
|
+
return EXTENSION_MIME_MAP[ext]
|
|
55
|
+
|
|
56
|
+
# Try content-based detection with magic
|
|
57
|
+
try:
|
|
58
|
+
mime = magic.from_buffer(content[:1024], mime=True)
|
|
59
|
+
# If magic returns a valid mime type, use it
|
|
60
|
+
if mime and mime != "application/octet-stream":
|
|
61
|
+
return mime
|
|
62
|
+
except (AttributeError, OSError, magic.MagicException) as e:
|
|
63
|
+
logger.warning(f"MIME detection failed for {name}: {e}")
|
|
64
|
+
except Exception as e:
|
|
65
|
+
logger.error(f"Unexpected error in MIME detection for {name}: {e}")
|
|
66
|
+
|
|
67
|
+
# Final fallback based on extension or default
|
|
68
|
+
return EXTENSION_MIME_MAP.get(ext, "application/octet-stream")
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def mime_type_from_extension(name: str) -> str:
|
|
72
|
+
"""Get MIME type based on file extension
|
|
73
|
+
|
|
74
|
+
Legacy function kept for compatibility
|
|
75
|
+
"""
|
|
76
|
+
ext = name.lower().split(".")[-1] if "." in name else ""
|
|
77
|
+
return EXTENSION_MIME_MAP.get(ext, "application/octet-stream")
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def is_text_mime_type(mime_type: str) -> bool:
|
|
81
|
+
"""Check if MIME type represents text content"""
|
|
82
|
+
text_types = [
|
|
83
|
+
"text/",
|
|
84
|
+
"application/json",
|
|
85
|
+
"application/xml",
|
|
86
|
+
"application/javascript",
|
|
87
|
+
"application/yaml",
|
|
88
|
+
"application/x-yaml",
|
|
89
|
+
]
|
|
90
|
+
return any(mime_type.startswith(t) for t in text_types)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def is_json_mime_type(mime_type: str) -> bool:
|
|
94
|
+
"""Check if MIME type is JSON"""
|
|
95
|
+
return mime_type == "application/json"
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def is_yaml_mime_type(mime_type: str) -> bool:
|
|
99
|
+
"""Check if MIME type is YAML"""
|
|
100
|
+
return mime_type == "application/yaml" or mime_type == "application/x-yaml"
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def is_pdf_mime_type(mime_type: str) -> bool:
|
|
104
|
+
"""Check if MIME type is PDF"""
|
|
105
|
+
return mime_type == "application/pdf"
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def is_image_mime_type(mime_type: str) -> bool:
|
|
109
|
+
"""Check if MIME type is an image"""
|
|
110
|
+
return mime_type.startswith("image/")
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from typing import TypeVar
|
|
2
|
+
|
|
3
|
+
from pydantic import Field
|
|
4
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
5
|
+
|
|
6
|
+
from ai_pipeline_core.llm import ModelName
|
|
7
|
+
|
|
8
|
+
T = TypeVar("T", bound="FlowOptions")
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class FlowOptions(BaseSettings):
|
|
12
|
+
"""Base configuration for AI Pipeline flows."""
|
|
13
|
+
|
|
14
|
+
core_model: ModelName | str = Field(
|
|
15
|
+
default="gpt-5",
|
|
16
|
+
description="Primary model for complex analysis and generation tasks.",
|
|
17
|
+
)
|
|
18
|
+
small_model: ModelName | str = Field(
|
|
19
|
+
default="gpt-5-mini",
|
|
20
|
+
description="Fast, cost-effective model for simple tasks and orchestration.",
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
model_config = SettingsConfigDict(frozen=True, extra="ignore")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
__all__ = ["FlowOptions"]
|
|
@@ -118,11 +118,13 @@ async def _generate_with_retry(
|
|
|
118
118
|
span.set_attributes(response.get_laminar_metadata())
|
|
119
119
|
Laminar.set_span_output(response.content)
|
|
120
120
|
if not response.content:
|
|
121
|
-
# disable cache in case of empty response
|
|
122
|
-
completion_kwargs["extra_body"]["cache"] = {"no-cache": True}
|
|
123
121
|
raise ValueError(f"Model {model} returned an empty response.")
|
|
124
122
|
return response
|
|
125
123
|
except (asyncio.TimeoutError, ValueError, Exception) as e:
|
|
124
|
+
if not isinstance(e, asyncio.TimeoutError):
|
|
125
|
+
# disable cache if it's not a timeout because it may cause an error
|
|
126
|
+
completion_kwargs["extra_body"]["cache"] = {"no-cache": True}
|
|
127
|
+
|
|
126
128
|
logger.warning(
|
|
127
129
|
"LLM generation failed (attempt %d/%d): %s",
|
|
128
130
|
attempt + 1,
|
|
@@ -167,7 +169,7 @@ T = TypeVar("T", bound=BaseModel)
|
|
|
167
169
|
|
|
168
170
|
@trace(ignore_inputs=["context"])
|
|
169
171
|
async def generate_structured(
|
|
170
|
-
model: ModelName,
|
|
172
|
+
model: ModelName | str,
|
|
171
173
|
response_format: type[T],
|
|
172
174
|
*,
|
|
173
175
|
context: AIMessages = AIMessages(),
|