ai-pipeline-core 0.1.7__tar.gz → 0.1.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/PKG-INFO +35 -38
- {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/README.md +34 -37
- {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/__init__.py +7 -5
- {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/documents/__init__.py +2 -0
- {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/documents/document.py +131 -23
- ai_pipeline_core-0.1.10/ai_pipeline_core/documents/temporary_document.py +16 -0
- {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/flow/config.py +40 -1
- {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/llm/model_options.py +4 -0
- ai_pipeline_core-0.1.10/ai_pipeline_core/pipeline.py +414 -0
- {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/prompt_manager.py +7 -1
- ai_pipeline_core-0.1.10/ai_pipeline_core/simple_runner/cli.py +170 -0
- {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/simple_runner/simple_runner.py +7 -2
- {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/pyproject.toml +11 -4
- ai_pipeline_core-0.1.7/ai_pipeline_core/pipeline.py +0 -418
- ai_pipeline_core-0.1.7/ai_pipeline_core/simple_runner/cli.py +0 -95
- {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/.gitignore +0 -0
- {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/LICENSE +0 -0
- {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/documents/document_list.py +0 -0
- {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/documents/flow_document.py +0 -0
- {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/documents/mime_type.py +0 -0
- {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/documents/task_document.py +0 -0
- {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/documents/utils.py +0 -0
- {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/exceptions.py +0 -0
- {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/flow/__init__.py +0 -0
- {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/flow/options.py +0 -0
- {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/llm/__init__.py +0 -0
- {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/llm/ai_messages.py +0 -0
- {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/llm/client.py +0 -0
- {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/llm/model_response.py +0 -0
- {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/llm/model_types.py +0 -0
- {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/logging/__init__.py +0 -0
- {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/logging/logging.yml +0 -0
- {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/logging/logging_config.py +0 -0
- {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/logging/logging_mixin.py +0 -0
- {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/prefect.py +0 -0
- {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/py.typed +0 -0
- {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/settings.py +0 -0
- {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/simple_runner/__init__.py +0 -0
- {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/tracing.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ai-pipeline-core
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.10
|
|
4
4
|
Summary: Core utilities for AI-powered processing pipelines using prefect
|
|
5
5
|
Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
|
|
6
6
|
Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
|
|
@@ -151,7 +151,7 @@ async def process_document(doc: Document):
|
|
|
151
151
|
return response.parsed
|
|
152
152
|
```
|
|
153
153
|
|
|
154
|
-
### Enhanced Pipeline Decorators
|
|
154
|
+
### Enhanced Pipeline Decorators
|
|
155
155
|
```python
|
|
156
156
|
from ai_pipeline_core import pipeline_flow, pipeline_task
|
|
157
157
|
from ai_pipeline_core.flow import FlowOptions
|
|
@@ -182,7 +182,7 @@ async def my_pipeline(
|
|
|
182
182
|
return DocumentList(results)
|
|
183
183
|
```
|
|
184
184
|
|
|
185
|
-
### Simple Runner Utility
|
|
185
|
+
### Simple Runner Utility
|
|
186
186
|
```python
|
|
187
187
|
from ai_pipeline_core.simple_runner import run_cli, run_pipeline
|
|
188
188
|
from ai_pipeline_core.flow import FlowOptions
|
|
@@ -206,7 +206,7 @@ async def main():
|
|
|
206
206
|
)
|
|
207
207
|
```
|
|
208
208
|
|
|
209
|
-
### Clean Prefect Decorators
|
|
209
|
+
### Clean Prefect Decorators
|
|
210
210
|
```python
|
|
211
211
|
# Import clean Prefect decorators without tracing
|
|
212
212
|
from ai_pipeline_core.prefect import flow, task
|
|
@@ -214,12 +214,12 @@ from ai_pipeline_core.prefect import flow, task
|
|
|
214
214
|
# Or use pipeline decorators with tracing
|
|
215
215
|
from ai_pipeline_core import pipeline_flow, pipeline_task
|
|
216
216
|
|
|
217
|
-
@task # Clean Prefect task
|
|
217
|
+
@task # Clean Prefect task (supports both sync and async)
|
|
218
218
|
def compute(x: int) -> int:
|
|
219
219
|
return x * 2
|
|
220
220
|
|
|
221
|
-
@pipeline_task(trace_level="always") # With tracing
|
|
222
|
-
def compute_traced(x: int) -> int:
|
|
221
|
+
@pipeline_task(trace_level="always") # With tracing (async only)
|
|
222
|
+
async def compute_traced(x: int) -> int:
|
|
223
223
|
return x * 2
|
|
224
224
|
```
|
|
225
225
|
|
|
@@ -246,12 +246,12 @@ docs = DocumentList([doc1, doc2])
|
|
|
246
246
|
Managed AI interactions with built-in retry logic, cost tracking, and structured outputs.
|
|
247
247
|
|
|
248
248
|
**Supported Models** (via LiteLLM proxy):
|
|
249
|
-
- OpenAI:
|
|
250
|
-
- Anthropic:
|
|
251
|
-
- Google:
|
|
252
|
-
- xAI:
|
|
253
|
-
- Perplexity:
|
|
254
|
-
- And many more through LiteLLM compatibility
|
|
249
|
+
- OpenAI: gpt-5
|
|
250
|
+
- Anthropic: claude-4
|
|
251
|
+
- Google: gemini-2.5
|
|
252
|
+
- xAI: grok-3, grok-4
|
|
253
|
+
- Perplexity: sonar-pro-search
|
|
254
|
+
- And many more through LiteLLM compatibility. Every model from openrouter should work.
|
|
255
255
|
|
|
256
256
|
```python
|
|
257
257
|
from ai_pipeline_core.llm import generate_structured, AIMessages, ModelOptions
|
|
@@ -328,13 +328,13 @@ ai_pipeline_core/
|
|
|
328
328
|
│ └── model_options.py # Configuration models
|
|
329
329
|
├── flow/ # Prefect flow utilities
|
|
330
330
|
│ ├── config.py # Type-safe flow configuration
|
|
331
|
-
│ └── options.py # FlowOptions base class
|
|
332
|
-
├── simple_runner/ # Pipeline execution utilities
|
|
331
|
+
│ └── options.py # FlowOptions base class
|
|
332
|
+
├── simple_runner/ # Pipeline execution utilities
|
|
333
333
|
│ ├── cli.py # CLI interface
|
|
334
334
|
│ └── simple_runner.py # Core runner logic
|
|
335
335
|
├── logging/ # Structured logging
|
|
336
|
-
├── pipeline.py # Enhanced decorators
|
|
337
|
-
├── prefect.py # Clean Prefect exports
|
|
336
|
+
├── pipeline.py # Enhanced decorators
|
|
337
|
+
├── prefect.py # Clean Prefect exports
|
|
338
338
|
├── tracing.py # Observability decorators
|
|
339
339
|
└── settings.py # Centralized configuration
|
|
340
340
|
```
|
|
@@ -345,6 +345,7 @@ ai_pipeline_core/
|
|
|
345
345
|
```bash
|
|
346
346
|
make test # Run all tests
|
|
347
347
|
make test-cov # Run with coverage report
|
|
348
|
+
make test-showcase # Test the showcase.py CLI example
|
|
348
349
|
pytest tests/test_documents.py::TestDocument::test_creation # Single test
|
|
349
350
|
```
|
|
350
351
|
|
|
@@ -481,6 +482,22 @@ For learning purposes, see [CLAUDE.md](CLAUDE.md) for our comprehensive coding s
|
|
|
481
482
|
|
|
482
483
|
- [CLAUDE.md](CLAUDE.md) - Detailed coding standards and architecture guide
|
|
483
484
|
|
|
485
|
+
## Examples
|
|
486
|
+
|
|
487
|
+
### In This Repository
|
|
488
|
+
- [showcase.py](examples/showcase.py) - Complete example demonstrating all core features including the CLI runner
|
|
489
|
+
```bash
|
|
490
|
+
# Run the showcase example with CLI
|
|
491
|
+
python examples/showcase.py ./output --temperature 0.7 --batch-size 5
|
|
492
|
+
|
|
493
|
+
# Show help
|
|
494
|
+
python examples/showcase.py --help
|
|
495
|
+
```
|
|
496
|
+
- [showcase.jinja2](examples/showcase.jinja2) - Example Jinja2 prompt template
|
|
497
|
+
|
|
498
|
+
### Real-World Application
|
|
499
|
+
- [AI Documentation Writer](https://github.com/bbarwik/ai-documentation-writer) - Production-ready example showing how to build sophisticated AI pipelines for automated documentation generation. See [examples/ai-documentation-writer.md](examples/ai-documentation-writer.md) for a detailed overview.
|
|
500
|
+
|
|
484
501
|
### dependencies_docs/ Directory
|
|
485
502
|
> [!NOTE]
|
|
486
503
|
> The `dependencies_docs/` directory contains guides for AI assistants (like Claude Code) on how to interact with the project's external dependencies and tooling, NOT user documentation for ai-pipeline-core itself. These files are excluded from repository listings to avoid confusion.
|
|
@@ -511,29 +528,9 @@ Built with:
|
|
|
511
528
|
- [LiteLLM](https://litellm.ai/) - LLM proxy
|
|
512
529
|
- [Pydantic](https://pydantic-docs.helpmanual.io/) - Data validation
|
|
513
530
|
|
|
514
|
-
## What's New in v0.1.7
|
|
515
|
-
|
|
516
|
-
### Major Additions
|
|
517
|
-
- **Enhanced Pipeline Decorators**: New `pipeline_flow` and `pipeline_task` decorators combining Prefect functionality with automatic LMNR tracing
|
|
518
|
-
- **FlowOptions Base Class**: Extensible configuration system for flows with type-safe inheritance
|
|
519
|
-
- **Simple Runner Module**: CLI and programmatic utilities for easy pipeline execution
|
|
520
|
-
- **Clean Prefect Exports**: Separate imports for Prefect decorators with and without tracing
|
|
521
|
-
- **Expanded Exports**: All major components now accessible from top-level package import
|
|
522
|
-
|
|
523
|
-
### API Improvements
|
|
524
|
-
- Better type inference for document flows with custom options
|
|
525
|
-
- Support for custom FlowOptions inheritance in pipeline flows
|
|
526
|
-
- Improved error messages for invalid flow signatures
|
|
527
|
-
- Enhanced document utility functions (`canonical_name_key`, `sanitize_url`)
|
|
528
|
-
|
|
529
|
-
### Developer Experience
|
|
530
|
-
- Simplified imports - most components available from `ai_pipeline_core` directly
|
|
531
|
-
- Better separation of concerns between clean Prefect and traced pipeline decorators
|
|
532
|
-
- More intuitive flow configuration with `FlowOptions` inheritance
|
|
533
|
-
|
|
534
531
|
## Stability Notice
|
|
535
532
|
|
|
536
|
-
**Current Version**: 0.1.
|
|
533
|
+
**Current Version**: 0.1.10
|
|
537
534
|
**Status**: Internal Preview
|
|
538
535
|
**API Stability**: Unstable - Breaking changes expected
|
|
539
536
|
**Recommended Use**: Learning and reference only
|
|
@@ -109,7 +109,7 @@ async def process_document(doc: Document):
|
|
|
109
109
|
return response.parsed
|
|
110
110
|
```
|
|
111
111
|
|
|
112
|
-
### Enhanced Pipeline Decorators
|
|
112
|
+
### Enhanced Pipeline Decorators
|
|
113
113
|
```python
|
|
114
114
|
from ai_pipeline_core import pipeline_flow, pipeline_task
|
|
115
115
|
from ai_pipeline_core.flow import FlowOptions
|
|
@@ -140,7 +140,7 @@ async def my_pipeline(
|
|
|
140
140
|
return DocumentList(results)
|
|
141
141
|
```
|
|
142
142
|
|
|
143
|
-
### Simple Runner Utility
|
|
143
|
+
### Simple Runner Utility
|
|
144
144
|
```python
|
|
145
145
|
from ai_pipeline_core.simple_runner import run_cli, run_pipeline
|
|
146
146
|
from ai_pipeline_core.flow import FlowOptions
|
|
@@ -164,7 +164,7 @@ async def main():
|
|
|
164
164
|
)
|
|
165
165
|
```
|
|
166
166
|
|
|
167
|
-
### Clean Prefect Decorators
|
|
167
|
+
### Clean Prefect Decorators
|
|
168
168
|
```python
|
|
169
169
|
# Import clean Prefect decorators without tracing
|
|
170
170
|
from ai_pipeline_core.prefect import flow, task
|
|
@@ -172,12 +172,12 @@ from ai_pipeline_core.prefect import flow, task
|
|
|
172
172
|
# Or use pipeline decorators with tracing
|
|
173
173
|
from ai_pipeline_core import pipeline_flow, pipeline_task
|
|
174
174
|
|
|
175
|
-
@task # Clean Prefect task
|
|
175
|
+
@task # Clean Prefect task (supports both sync and async)
|
|
176
176
|
def compute(x: int) -> int:
|
|
177
177
|
return x * 2
|
|
178
178
|
|
|
179
|
-
@pipeline_task(trace_level="always") # With tracing
|
|
180
|
-
def compute_traced(x: int) -> int:
|
|
179
|
+
@pipeline_task(trace_level="always") # With tracing (async only)
|
|
180
|
+
async def compute_traced(x: int) -> int:
|
|
181
181
|
return x * 2
|
|
182
182
|
```
|
|
183
183
|
|
|
@@ -204,12 +204,12 @@ docs = DocumentList([doc1, doc2])
|
|
|
204
204
|
Managed AI interactions with built-in retry logic, cost tracking, and structured outputs.
|
|
205
205
|
|
|
206
206
|
**Supported Models** (via LiteLLM proxy):
|
|
207
|
-
- OpenAI:
|
|
208
|
-
- Anthropic:
|
|
209
|
-
- Google:
|
|
210
|
-
- xAI:
|
|
211
|
-
- Perplexity:
|
|
212
|
-
- And many more through LiteLLM compatibility
|
|
207
|
+
- OpenAI: gpt-5
|
|
208
|
+
- Anthropic: claude-4
|
|
209
|
+
- Google: gemini-2.5
|
|
210
|
+
- xAI: grok-3, grok-4
|
|
211
|
+
- Perplexity: sonar-pro-search
|
|
212
|
+
- And many more through LiteLLM compatibility. Every model from openrouter should work.
|
|
213
213
|
|
|
214
214
|
```python
|
|
215
215
|
from ai_pipeline_core.llm import generate_structured, AIMessages, ModelOptions
|
|
@@ -286,13 +286,13 @@ ai_pipeline_core/
|
|
|
286
286
|
│ └── model_options.py # Configuration models
|
|
287
287
|
├── flow/ # Prefect flow utilities
|
|
288
288
|
│ ├── config.py # Type-safe flow configuration
|
|
289
|
-
│ └── options.py # FlowOptions base class
|
|
290
|
-
├── simple_runner/ # Pipeline execution utilities
|
|
289
|
+
│ └── options.py # FlowOptions base class
|
|
290
|
+
├── simple_runner/ # Pipeline execution utilities
|
|
291
291
|
│ ├── cli.py # CLI interface
|
|
292
292
|
│ └── simple_runner.py # Core runner logic
|
|
293
293
|
├── logging/ # Structured logging
|
|
294
|
-
├── pipeline.py # Enhanced decorators
|
|
295
|
-
├── prefect.py # Clean Prefect exports
|
|
294
|
+
├── pipeline.py # Enhanced decorators
|
|
295
|
+
├── prefect.py # Clean Prefect exports
|
|
296
296
|
├── tracing.py # Observability decorators
|
|
297
297
|
└── settings.py # Centralized configuration
|
|
298
298
|
```
|
|
@@ -303,6 +303,7 @@ ai_pipeline_core/
|
|
|
303
303
|
```bash
|
|
304
304
|
make test # Run all tests
|
|
305
305
|
make test-cov # Run with coverage report
|
|
306
|
+
make test-showcase # Test the showcase.py CLI example
|
|
306
307
|
pytest tests/test_documents.py::TestDocument::test_creation # Single test
|
|
307
308
|
```
|
|
308
309
|
|
|
@@ -439,6 +440,22 @@ For learning purposes, see [CLAUDE.md](CLAUDE.md) for our comprehensive coding s
|
|
|
439
440
|
|
|
440
441
|
- [CLAUDE.md](CLAUDE.md) - Detailed coding standards and architecture guide
|
|
441
442
|
|
|
443
|
+
## Examples
|
|
444
|
+
|
|
445
|
+
### In This Repository
|
|
446
|
+
- [showcase.py](examples/showcase.py) - Complete example demonstrating all core features including the CLI runner
|
|
447
|
+
```bash
|
|
448
|
+
# Run the showcase example with CLI
|
|
449
|
+
python examples/showcase.py ./output --temperature 0.7 --batch-size 5
|
|
450
|
+
|
|
451
|
+
# Show help
|
|
452
|
+
python examples/showcase.py --help
|
|
453
|
+
```
|
|
454
|
+
- [showcase.jinja2](examples/showcase.jinja2) - Example Jinja2 prompt template
|
|
455
|
+
|
|
456
|
+
### Real-World Application
|
|
457
|
+
- [AI Documentation Writer](https://github.com/bbarwik/ai-documentation-writer) - Production-ready example showing how to build sophisticated AI pipelines for automated documentation generation. See [examples/ai-documentation-writer.md](examples/ai-documentation-writer.md) for a detailed overview.
|
|
458
|
+
|
|
442
459
|
### dependencies_docs/ Directory
|
|
443
460
|
> [!NOTE]
|
|
444
461
|
> The `dependencies_docs/` directory contains guides for AI assistants (like Claude Code) on how to interact with the project's external dependencies and tooling, NOT user documentation for ai-pipeline-core itself. These files are excluded from repository listings to avoid confusion.
|
|
@@ -469,29 +486,9 @@ Built with:
|
|
|
469
486
|
- [LiteLLM](https://litellm.ai/) - LLM proxy
|
|
470
487
|
- [Pydantic](https://pydantic-docs.helpmanual.io/) - Data validation
|
|
471
488
|
|
|
472
|
-
## What's New in v0.1.7
|
|
473
|
-
|
|
474
|
-
### Major Additions
|
|
475
|
-
- **Enhanced Pipeline Decorators**: New `pipeline_flow` and `pipeline_task` decorators combining Prefect functionality with automatic LMNR tracing
|
|
476
|
-
- **FlowOptions Base Class**: Extensible configuration system for flows with type-safe inheritance
|
|
477
|
-
- **Simple Runner Module**: CLI and programmatic utilities for easy pipeline execution
|
|
478
|
-
- **Clean Prefect Exports**: Separate imports for Prefect decorators with and without tracing
|
|
479
|
-
- **Expanded Exports**: All major components now accessible from top-level package import
|
|
480
|
-
|
|
481
|
-
### API Improvements
|
|
482
|
-
- Better type inference for document flows with custom options
|
|
483
|
-
- Support for custom FlowOptions inheritance in pipeline flows
|
|
484
|
-
- Improved error messages for invalid flow signatures
|
|
485
|
-
- Enhanced document utility functions (`canonical_name_key`, `sanitize_url`)
|
|
486
|
-
|
|
487
|
-
### Developer Experience
|
|
488
|
-
- Simplified imports - most components available from `ai_pipeline_core` directly
|
|
489
|
-
- Better separation of concerns between clean Prefect and traced pipeline decorators
|
|
490
|
-
- More intuitive flow configuration with `FlowOptions` inheritance
|
|
491
|
-
|
|
492
489
|
## Stability Notice
|
|
493
490
|
|
|
494
|
-
**Current Version**: 0.1.
|
|
491
|
+
**Current Version**: 0.1.10
|
|
495
492
|
**Status**: Internal Preview
|
|
496
493
|
**API Stability**: Unstable - Breaking changes expected
|
|
497
494
|
**Recommended Use**: Learning and reference only
|
|
@@ -6,6 +6,7 @@ from .documents import (
|
|
|
6
6
|
DocumentList,
|
|
7
7
|
FlowDocument,
|
|
8
8
|
TaskDocument,
|
|
9
|
+
TemporaryDocument,
|
|
9
10
|
canonical_name_key,
|
|
10
11
|
sanitize_url,
|
|
11
12
|
)
|
|
@@ -27,12 +28,12 @@ from .logging import (
|
|
|
27
28
|
)
|
|
28
29
|
from .logging import get_pipeline_logger as get_logger
|
|
29
30
|
from .pipeline import pipeline_flow, pipeline_task
|
|
30
|
-
from .prefect import
|
|
31
|
+
from .prefect import disable_run_logger, prefect_test_harness
|
|
31
32
|
from .prompt_manager import PromptManager
|
|
32
33
|
from .settings import settings
|
|
33
34
|
from .tracing import TraceInfo, TraceLevel, trace
|
|
34
35
|
|
|
35
|
-
__version__ = "0.1.
|
|
36
|
+
__version__ = "0.1.10"
|
|
36
37
|
|
|
37
38
|
__all__ = [
|
|
38
39
|
# Config/Settings
|
|
@@ -49,17 +50,18 @@ __all__ = [
|
|
|
49
50
|
"DocumentList",
|
|
50
51
|
"FlowDocument",
|
|
51
52
|
"TaskDocument",
|
|
53
|
+
"TemporaryDocument",
|
|
52
54
|
"canonical_name_key",
|
|
53
55
|
"sanitize_url",
|
|
54
56
|
# Flow/Task
|
|
55
57
|
"FlowConfig",
|
|
56
58
|
"FlowOptions",
|
|
57
|
-
# Prefect decorators (clean, no tracing)
|
|
58
|
-
"task",
|
|
59
|
-
"flow",
|
|
60
59
|
# Pipeline decorators (with tracing)
|
|
61
60
|
"pipeline_task",
|
|
62
61
|
"pipeline_flow",
|
|
62
|
+
# Prefect decorators (clean, no tracing)
|
|
63
|
+
"prefect_test_harness",
|
|
64
|
+
"disable_run_logger",
|
|
63
65
|
# LLM
|
|
64
66
|
"llm",
|
|
65
67
|
"ModelName",
|
|
@@ -2,6 +2,7 @@ from .document import Document
|
|
|
2
2
|
from .document_list import DocumentList
|
|
3
3
|
from .flow_document import FlowDocument
|
|
4
4
|
from .task_document import TaskDocument
|
|
5
|
+
from .temporary_document import TemporaryDocument
|
|
5
6
|
from .utils import canonical_name_key, sanitize_url
|
|
6
7
|
|
|
7
8
|
__all__ = [
|
|
@@ -9,6 +10,7 @@ __all__ = [
|
|
|
9
10
|
"DocumentList",
|
|
10
11
|
"FlowDocument",
|
|
11
12
|
"TaskDocument",
|
|
13
|
+
"TemporaryDocument",
|
|
12
14
|
"canonical_name_key",
|
|
13
15
|
"sanitize_url",
|
|
14
16
|
]
|
|
@@ -6,7 +6,19 @@ from abc import ABC, abstractmethod
|
|
|
6
6
|
from base64 import b32encode
|
|
7
7
|
from enum import StrEnum
|
|
8
8
|
from functools import cached_property
|
|
9
|
-
from
|
|
9
|
+
from io import BytesIO
|
|
10
|
+
from typing import (
|
|
11
|
+
Any,
|
|
12
|
+
ClassVar,
|
|
13
|
+
Literal,
|
|
14
|
+
Self,
|
|
15
|
+
TypeVar,
|
|
16
|
+
cast,
|
|
17
|
+
final,
|
|
18
|
+
get_args,
|
|
19
|
+
get_origin,
|
|
20
|
+
overload,
|
|
21
|
+
)
|
|
10
22
|
|
|
11
23
|
from pydantic import BaseModel, ConfigDict, field_serializer, field_validator
|
|
12
24
|
from ruamel.yaml import YAML
|
|
@@ -23,64 +35,107 @@ from .mime_type import (
|
|
|
23
35
|
)
|
|
24
36
|
|
|
25
37
|
TModel = TypeVar("TModel", bound=BaseModel)
|
|
38
|
+
ContentInput = bytes | str | BaseModel | list[str] | Any
|
|
26
39
|
|
|
27
40
|
|
|
28
41
|
class Document(BaseModel, ABC):
|
|
29
|
-
"""Abstract base class for all documents
|
|
42
|
+
"""Abstract base class for all documents.
|
|
43
|
+
|
|
44
|
+
Warning: Document subclasses should NOT start with 'Test' prefix as this
|
|
45
|
+
causes conflicts with pytest test discovery. Classes with 'Test' prefix
|
|
46
|
+
will be rejected at definition time.
|
|
47
|
+
"""
|
|
30
48
|
|
|
31
49
|
MAX_CONTENT_SIZE: ClassVar[int] = 25 * 1024 * 1024 # 25MB default
|
|
32
50
|
DESCRIPTION_EXTENSION: ClassVar[str] = ".description.md"
|
|
33
51
|
MARKDOWN_LIST_SEPARATOR: ClassVar[str] = "\n\n---\n\n"
|
|
34
52
|
|
|
53
|
+
def __init_subclass__(cls, **kwargs: Any) -> None:
|
|
54
|
+
"""Validate subclass names to prevent pytest conflicts."""
|
|
55
|
+
super().__init_subclass__(**kwargs)
|
|
56
|
+
if cls.__name__.startswith("Test"):
|
|
57
|
+
raise TypeError(
|
|
58
|
+
f"Document subclass '{cls.__name__}' cannot start with 'Test' prefix. "
|
|
59
|
+
"This causes conflicts with pytest test discovery. "
|
|
60
|
+
"Please use a different name (e.g., 'SampleDocument', 'ExampleDocument')."
|
|
61
|
+
)
|
|
62
|
+
if hasattr(cls, "FILES"):
|
|
63
|
+
files = getattr(cls, "FILES")
|
|
64
|
+
if not issubclass(files, StrEnum):
|
|
65
|
+
raise TypeError(
|
|
66
|
+
f"Document subclass '{cls.__name__}'.FILES must be an Enum of string values"
|
|
67
|
+
)
|
|
68
|
+
# Check that the Document's model_fields only contain the allowed fields
|
|
69
|
+
# It prevents AI models from adding additional fields to documents
|
|
70
|
+
allowed = {"name", "description", "content"}
|
|
71
|
+
current = set(getattr(cls, "model_fields", {}).keys())
|
|
72
|
+
extras = current - allowed
|
|
73
|
+
if extras:
|
|
74
|
+
raise TypeError(
|
|
75
|
+
f"Document subclass '{cls.__name__}' cannot declare additional fields: "
|
|
76
|
+
f"{', '.join(sorted(extras))}. Only {', '.join(sorted(allowed))} are allowed."
|
|
77
|
+
)
|
|
78
|
+
|
|
35
79
|
def __init__(self, **data: Any) -> None:
|
|
36
80
|
"""Prevent direct instantiation of abstract Document class."""
|
|
37
81
|
if type(self) is Document:
|
|
38
82
|
raise TypeError("Cannot instantiate abstract Document class directly")
|
|
39
83
|
super().__init__(**data)
|
|
40
84
|
|
|
41
|
-
# Optional enum of allowed file names. Subclasses may set this.
|
|
42
|
-
# This is used to validate the document name.
|
|
43
|
-
FILES: ClassVar[type[StrEnum] | None] = None
|
|
44
|
-
|
|
45
85
|
name: str
|
|
46
86
|
description: str | None = None
|
|
47
87
|
content: bytes
|
|
48
88
|
|
|
49
89
|
# Pydantic configuration
|
|
50
90
|
model_config = ConfigDict(
|
|
51
|
-
frozen=True,
|
|
91
|
+
frozen=True,
|
|
52
92
|
arbitrary_types_allowed=True,
|
|
93
|
+
extra="forbid",
|
|
53
94
|
)
|
|
54
95
|
|
|
55
96
|
@abstractmethod
|
|
56
|
-
def get_base_type(self) -> Literal["flow", "task"]:
|
|
97
|
+
def get_base_type(self) -> Literal["flow", "task", "temporary"]:
|
|
57
98
|
"""Get the type of the document - must be implemented by subclasses"""
|
|
58
99
|
raise NotImplementedError("Subclasses must implement this method")
|
|
59
100
|
|
|
101
|
+
@final
|
|
60
102
|
@property
|
|
61
|
-
def base_type(self) -> Literal["flow", "task"]:
|
|
103
|
+
def base_type(self) -> Literal["flow", "task", "temporary"]:
|
|
62
104
|
"""Alias for document_type for backward compatibility"""
|
|
63
105
|
return self.get_base_type()
|
|
64
106
|
|
|
107
|
+
@final
|
|
65
108
|
@property
|
|
66
109
|
def is_flow(self) -> bool:
|
|
67
110
|
"""Check if document is a flow document"""
|
|
68
111
|
return self.get_base_type() == "flow"
|
|
69
112
|
|
|
113
|
+
@final
|
|
70
114
|
@property
|
|
71
115
|
def is_task(self) -> bool:
|
|
72
116
|
"""Check if document is a task document"""
|
|
73
117
|
return self.get_base_type() == "task"
|
|
74
118
|
|
|
119
|
+
@final
|
|
120
|
+
@property
|
|
121
|
+
def is_temporary(self) -> bool:
|
|
122
|
+
"""Check if document is a temporary document"""
|
|
123
|
+
return self.get_base_type() == "temporary"
|
|
124
|
+
|
|
125
|
+
@final
|
|
75
126
|
@classmethod
|
|
76
127
|
def get_expected_files(cls) -> list[str] | None:
|
|
77
128
|
"""
|
|
78
129
|
Return the list of allowed file names for this document class, or None if unrestricted.
|
|
79
130
|
"""
|
|
80
|
-
if cls
|
|
131
|
+
if not hasattr(cls, "FILES"):
|
|
132
|
+
return None
|
|
133
|
+
files = getattr(cls, "FILES")
|
|
134
|
+
if not files:
|
|
81
135
|
return None
|
|
136
|
+
assert issubclass(files, StrEnum)
|
|
82
137
|
try:
|
|
83
|
-
values = [member.value for member in
|
|
138
|
+
values = [member.value for member in files]
|
|
84
139
|
except TypeError:
|
|
85
140
|
raise DocumentNameError(f"{cls.__name__}.FILES must be an Enum of string values")
|
|
86
141
|
if len(values) == 0:
|
|
@@ -100,14 +155,10 @@ class Document(BaseModel, ABC):
|
|
|
100
155
|
Override this method in subclasses for custom conventions (regex, prefixes, etc.).
|
|
101
156
|
Raise DocumentNameError when invalid.
|
|
102
157
|
"""
|
|
103
|
-
|
|
158
|
+
allowed = cls.get_expected_files()
|
|
159
|
+
if not allowed:
|
|
104
160
|
return
|
|
105
161
|
|
|
106
|
-
try:
|
|
107
|
-
allowed = {str(member.value) for member in cls.FILES} # type: ignore[arg-type]
|
|
108
|
-
except TypeError:
|
|
109
|
-
raise DocumentNameError(f"{cls.__name__}.FILES must be an Enum of string values")
|
|
110
|
-
|
|
111
162
|
if len(allowed) > 0 and name not in allowed:
|
|
112
163
|
allowed_str = ", ".join(sorted(allowed))
|
|
113
164
|
raise DocumentNameError(f"Invalid filename '{name}'. Allowed names: {allowed_str}")
|
|
@@ -151,16 +202,19 @@ class Document(BaseModel, ABC):
|
|
|
151
202
|
# Fall back to base64 for binary content
|
|
152
203
|
return base64.b64encode(v).decode("ascii")
|
|
153
204
|
|
|
205
|
+
@final
|
|
154
206
|
@property
|
|
155
207
|
def id(self) -> str:
|
|
156
208
|
"""Return the first 6 characters of the SHA256 hash of the content, encoded in base32"""
|
|
157
209
|
return self.sha256[:6]
|
|
158
210
|
|
|
211
|
+
@final
|
|
159
212
|
@cached_property
|
|
160
213
|
def sha256(self) -> str:
|
|
161
214
|
"""Full SHA256 hash of content, encoded in base32"""
|
|
162
215
|
return b32encode(hashlib.sha256(self.content).digest()).decode("ascii").upper()
|
|
163
216
|
|
|
217
|
+
@final
|
|
164
218
|
@property
|
|
165
219
|
def size(self) -> int:
|
|
166
220
|
"""Size of content in bytes"""
|
|
@@ -210,23 +264,61 @@ class Document(BaseModel, ABC):
|
|
|
210
264
|
"""Parse document as JSON"""
|
|
211
265
|
return json.loads(self.as_text())
|
|
212
266
|
|
|
213
|
-
|
|
267
|
+
@overload
|
|
268
|
+
def as_pydantic_model(self, model_type: type[TModel]) -> TModel: ...
|
|
269
|
+
|
|
270
|
+
@overload
|
|
271
|
+
def as_pydantic_model(self, model_type: type[list[TModel]]) -> list[TModel]: ...
|
|
272
|
+
|
|
273
|
+
def as_pydantic_model(
|
|
274
|
+
self, model_type: type[TModel] | type[list[TModel]]
|
|
275
|
+
) -> TModel | list[TModel]:
|
|
214
276
|
"""Parse document as a pydantic model and return the validated instance"""
|
|
215
277
|
data = self.as_yaml() if is_yaml_mime_type(self.mime_type) else self.as_json()
|
|
216
|
-
|
|
278
|
+
|
|
279
|
+
if get_origin(model_type) is list:
|
|
280
|
+
if not isinstance(data, list):
|
|
281
|
+
raise ValueError(f"Expected list data for {model_type}, got {type(data)}")
|
|
282
|
+
item_type = get_args(model_type)[0]
|
|
283
|
+
return [item_type.model_validate(item) for item in data]
|
|
284
|
+
|
|
285
|
+
# At this point model_type must be type[TModel], not type[list[TModel]]
|
|
286
|
+
single_model = cast(type[TModel], model_type)
|
|
287
|
+
return single_model.model_validate(data)
|
|
217
288
|
|
|
218
289
|
def as_markdown_list(self) -> list[str]:
|
|
219
290
|
"""Parse document as a markdown list"""
|
|
220
291
|
return self.as_text().split(self.MARKDOWN_LIST_SEPARATOR)
|
|
221
292
|
|
|
293
|
+
@overload
|
|
294
|
+
@classmethod
|
|
295
|
+
def create(cls, name: str, content: ContentInput, /) -> Self: ...
|
|
296
|
+
@overload
|
|
297
|
+
@classmethod
|
|
298
|
+
def create(cls, name: str, *, content: ContentInput) -> Self: ...
|
|
299
|
+
@overload
|
|
300
|
+
@classmethod
|
|
301
|
+
def create(cls, name: str, description: str | None, content: ContentInput, /) -> Self: ...
|
|
302
|
+
@overload
|
|
303
|
+
@classmethod
|
|
304
|
+
def create(cls, name: str, description: str | None, *, content: ContentInput) -> Self: ...
|
|
305
|
+
|
|
222
306
|
@classmethod
|
|
223
307
|
def create(
|
|
224
308
|
cls,
|
|
225
309
|
name: str,
|
|
226
|
-
description:
|
|
227
|
-
content:
|
|
310
|
+
description: ContentInput = None,
|
|
311
|
+
content: ContentInput = None,
|
|
228
312
|
) -> Self:
|
|
229
313
|
"""Create a document from a name, description, and content"""
|
|
314
|
+
if content is None:
|
|
315
|
+
if description is None:
|
|
316
|
+
raise ValueError(f"Unsupported content type: {type(content)} for {name}")
|
|
317
|
+
content = description
|
|
318
|
+
description = None
|
|
319
|
+
else:
|
|
320
|
+
assert description is None or isinstance(description, str)
|
|
321
|
+
|
|
230
322
|
is_yaml_extension = name.endswith(".yaml") or name.endswith(".yml")
|
|
231
323
|
is_json_extension = name.endswith(".json")
|
|
232
324
|
is_markdown_extension = name.endswith(".md")
|
|
@@ -237,6 +329,14 @@ class Document(BaseModel, ABC):
|
|
|
237
329
|
content = content.encode("utf-8")
|
|
238
330
|
elif is_str_list and is_markdown_extension:
|
|
239
331
|
return cls.create_as_markdown_list(name, description, content) # type: ignore[arg-type]
|
|
332
|
+
elif isinstance(content, list) and all(isinstance(item, BaseModel) for item in content):
|
|
333
|
+
# Handle list[BaseModel] for JSON/YAML files
|
|
334
|
+
if is_yaml_extension:
|
|
335
|
+
return cls.create_as_yaml(name, description, content)
|
|
336
|
+
elif is_json_extension:
|
|
337
|
+
return cls.create_as_json(name, description, content)
|
|
338
|
+
else:
|
|
339
|
+
raise ValueError(f"list[BaseModel] requires .json or .yaml extension, got {name}")
|
|
240
340
|
elif is_yaml_extension:
|
|
241
341
|
return cls.create_as_yaml(name, description, content)
|
|
242
342
|
elif is_json_extension:
|
|
@@ -246,6 +346,7 @@ class Document(BaseModel, ABC):
|
|
|
246
346
|
|
|
247
347
|
return cls(name=name, description=description, content=content)
|
|
248
348
|
|
|
349
|
+
@final
|
|
249
350
|
@classmethod
|
|
250
351
|
def create_as_markdown_list(cls, name: str, description: str | None, items: list[str]) -> Self:
|
|
251
352
|
"""Create a document from a name, description, and list of strings"""
|
|
@@ -258,15 +359,19 @@ class Document(BaseModel, ABC):
|
|
|
258
359
|
content = Document.MARKDOWN_LIST_SEPARATOR.join(cleaned_items)
|
|
259
360
|
return cls.create(name, description, content)
|
|
260
361
|
|
|
362
|
+
@final
|
|
261
363
|
@classmethod
|
|
262
364
|
def create_as_json(cls, name: str, description: str | None, data: Any) -> Self:
|
|
263
365
|
"""Create a document from a name, description, and JSON data"""
|
|
264
366
|
assert name.endswith(".json"), f"Document name must end with .json: {name}"
|
|
265
367
|
if isinstance(data, BaseModel):
|
|
266
368
|
data = data.model_dump(mode="json")
|
|
369
|
+
elif isinstance(data, list) and all(isinstance(item, BaseModel) for item in data):
|
|
370
|
+
data = [item.model_dump(mode="json") for item in data]
|
|
267
371
|
content = json.dumps(data, indent=2).encode("utf-8")
|
|
268
372
|
return cls.create(name, description, content)
|
|
269
373
|
|
|
374
|
+
@final
|
|
270
375
|
@classmethod
|
|
271
376
|
def create_as_yaml(cls, name: str, description: str | None, data: Any) -> Self:
|
|
272
377
|
"""Create a document from a name, description, and YAML data"""
|
|
@@ -274,16 +379,18 @@ class Document(BaseModel, ABC):
|
|
|
274
379
|
f"Document name must end with .yaml or .yml: {name}"
|
|
275
380
|
)
|
|
276
381
|
if isinstance(data, BaseModel):
|
|
277
|
-
data = data.model_dump()
|
|
382
|
+
data = data.model_dump(mode="json")
|
|
383
|
+
elif isinstance(data, list) and all(isinstance(item, BaseModel) for item in data):
|
|
384
|
+
data = [item.model_dump(mode="json") for item in data]
|
|
278
385
|
yaml = YAML()
|
|
279
386
|
yaml.indent(mapping=2, sequence=4, offset=2)
|
|
280
|
-
from io import BytesIO
|
|
281
387
|
|
|
282
388
|
stream = BytesIO()
|
|
283
389
|
yaml.dump(data, stream)
|
|
284
390
|
content = stream.getvalue()
|
|
285
391
|
return cls.create(name, description, content)
|
|
286
392
|
|
|
393
|
+
@final
|
|
287
394
|
def serialize_model(self) -> dict[str, Any]:
|
|
288
395
|
"""Serialize document to a dictionary with proper encoding."""
|
|
289
396
|
result = {
|
|
@@ -312,6 +419,7 @@ class Document(BaseModel, ABC):
|
|
|
312
419
|
|
|
313
420
|
return result
|
|
314
421
|
|
|
422
|
+
@final
|
|
315
423
|
@classmethod
|
|
316
424
|
def from_dict(cls, data: dict[str, Any]) -> Self:
|
|
317
425
|
"""Deserialize document from dictionary."""
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""Task-specific document base class."""
|
|
2
|
+
|
|
3
|
+
from typing import Literal, final
|
|
4
|
+
|
|
5
|
+
from .document import Document
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@final
|
|
9
|
+
class TemporaryDocument(Document):
|
|
10
|
+
"""
|
|
11
|
+
Temporary document is a document that is not persisted in any case.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
def get_base_type(self) -> Literal["temporary"]:
|
|
15
|
+
"""Get the document type."""
|
|
16
|
+
return "temporary"
|