ai-pipeline-core 0.1.6__tar.gz → 0.1.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/PKG-INFO +115 -36
  2. {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/README.md +113 -34
  3. ai_pipeline_core-0.1.8/ai_pipeline_core/__init__.py +77 -0
  4. {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/documents/__init__.py +3 -0
  5. {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/documents/document.py +16 -1
  6. ai_pipeline_core-0.1.8/ai_pipeline_core/flow/__init__.py +7 -0
  7. {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/flow/config.py +21 -0
  8. ai_pipeline_core-0.1.8/ai_pipeline_core/flow/options.py +26 -0
  9. {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/llm/client.py +5 -3
  10. {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/llm/model_options.py +4 -0
  11. ai_pipeline_core-0.1.8/ai_pipeline_core/pipeline.py +414 -0
  12. ai_pipeline_core-0.1.8/ai_pipeline_core/prefect.py +7 -0
  13. ai_pipeline_core-0.1.8/ai_pipeline_core/simple_runner/__init__.py +19 -0
  14. ai_pipeline_core-0.1.8/ai_pipeline_core/simple_runner/cli.py +127 -0
  15. ai_pipeline_core-0.1.8/ai_pipeline_core/simple_runner/simple_runner.py +147 -0
  16. {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/tracing.py +63 -20
  17. {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/pyproject.toml +9 -5
  18. ai_pipeline_core-0.1.6/ai_pipeline_core/__init__.py +0 -36
  19. ai_pipeline_core-0.1.6/ai_pipeline_core/flow/__init__.py +0 -3
  20. {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/.gitignore +0 -0
  21. {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/LICENSE +0 -0
  22. {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/documents/document_list.py +0 -0
  23. {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/documents/flow_document.py +0 -0
  24. {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/documents/mime_type.py +0 -0
  25. {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/documents/task_document.py +0 -0
  26. {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/documents/utils.py +0 -0
  27. {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/exceptions.py +0 -0
  28. {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/llm/__init__.py +0 -0
  29. {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/llm/ai_messages.py +0 -0
  30. {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/llm/model_response.py +0 -0
  31. {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/llm/model_types.py +0 -0
  32. {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/logging/__init__.py +0 -0
  33. {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/logging/logging.yml +0 -0
  34. {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/logging/logging_config.py +0 -0
  35. {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/logging/logging_mixin.py +0 -0
  36. {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/prompt_manager.py +0 -0
  37. {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/py.typed +0 -0
  38. {ai_pipeline_core-0.1.6 → ai_pipeline_core-0.1.8}/ai_pipeline_core/settings.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-pipeline-core
3
- Version: 0.1.6
3
+ Version: 0.1.8
4
4
  Summary: Core utilities for AI-powered processing pipelines using prefect
5
5
  Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
6
6
  Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
@@ -20,7 +20,7 @@ Classifier: Typing :: Typed
20
20
  Requires-Python: >=3.12
21
21
  Requires-Dist: httpx>=0.28.1
22
22
  Requires-Dist: jinja2>=3.1.6
23
- Requires-Dist: lmnr>=0.7.5
23
+ Requires-Dist: lmnr>=0.7.6
24
24
  Requires-Dist: openai>=1.99.9
25
25
  Requires-Dist: prefect>=3.4.13
26
26
  Requires-Dist: pydantic-settings>=2.10.1
@@ -151,40 +151,76 @@ async def process_document(doc: Document):
151
151
  return response.parsed
152
152
  ```
153
153
 
154
- ### Prefect Flow Integration
154
+ ### Enhanced Pipeline Decorators
155
155
  ```python
156
- from prefect import flow, task
157
- from ai_pipeline_core.documents import Document, DocumentList, FlowDocument
158
- from ai_pipeline_core.flow import FlowConfig
159
- from ai_pipeline_core.tracing import trace
160
-
161
- class OutputDocument(FlowDocument):
162
- """Custom output document type"""
163
- def get_type(self) -> str:
164
- return "output"
156
+ from ai_pipeline_core import pipeline_flow, pipeline_task
157
+ from ai_pipeline_core.flow import FlowOptions
158
+ from ai_pipeline_core.documents import DocumentList, FlowDocument
165
159
 
166
- class MyFlowConfig(FlowConfig):
167
- INPUT_DOCUMENT_TYPES = [InputDocument]
168
- OUTPUT_DOCUMENT_TYPE = OutputDocument
160
+ class CustomFlowOptions(FlowOptions):
161
+ """Extend base options with your custom fields"""
162
+ batch_size: int = 100
163
+ temperature: float = 0.7
169
164
 
170
- @task
171
- @trace
165
+ @pipeline_task(trace_level="always", retries=3)
172
166
  async def process_task(doc: Document) -> Document:
173
- # Task-level processing with automatic tracing
167
+ # Task with automatic tracing and retries
174
168
  result = await process_document(doc)
175
- # Convert result to JSON string for document content
176
- import json
177
- return OutputDocument(name="result", content=json.dumps(result.model_dump()).encode())
169
+ return OutputDocument(name="result", content=result.encode())
170
+
171
+ @pipeline_flow(trace_level="always")
172
+ async def my_pipeline(
173
+ project_name: str,
174
+ documents: DocumentList,
175
+ flow_options: CustomFlowOptions # Type-safe custom options
176
+ ) -> DocumentList:
177
+ # Pipeline flow with enforced signature and tracing
178
+ results = []
179
+ for doc in documents:
180
+ result = await process_task(doc)
181
+ results.append(result)
182
+ return DocumentList(results)
183
+ ```
178
184
 
179
- @flow
180
- async def my_pipeline(documents: DocumentList):
181
- config = MyFlowConfig()
182
- input_docs = config.get_input_documents(documents)
185
+ ### Simple Runner Utility
186
+ ```python
187
+ from ai_pipeline_core.simple_runner import run_cli, run_pipeline
188
+ from ai_pipeline_core.flow import FlowOptions
189
+
190
+ # CLI-based pipeline execution
191
+ if __name__ == "__main__":
192
+ run_cli(
193
+ flows=[my_pipeline],
194
+ flow_configs=[MyFlowConfig],
195
+ options_cls=CustomFlowOptions
196
+ )
183
197
 
184
- results = await process_task.map(input_docs)
198
+ # Or programmatic execution
199
+ async def main():
200
+ result = await run_pipeline(
201
+ project_name="my-project",
202
+ output_dir=Path("./output"),
203
+ flow=my_pipeline,
204
+ flow_config=MyFlowConfig,
205
+ flow_options=CustomFlowOptions(batch_size=50)
206
+ )
207
+ ```
208
+
209
+ ### Clean Prefect Decorators
210
+ ```python
211
+ # Import clean Prefect decorators without tracing
212
+ from ai_pipeline_core.prefect import flow, task
185
213
 
186
- config.validate_output_documents(results)
187
- return results
214
+ # Or use pipeline decorators with tracing
215
+ from ai_pipeline_core import pipeline_flow, pipeline_task
216
+
217
+ @task # Clean Prefect task (supports both sync and async)
218
+ def compute(x: int) -> int:
219
+ return x * 2
220
+
221
+ @pipeline_task(trace_level="always") # With tracing (async only)
222
+ async def compute_traced(x: int) -> int:
223
+ return x * 2
188
224
  ```
189
225
 
190
226
  ## Core Modules
@@ -210,12 +246,12 @@ docs = DocumentList([doc1, doc2])
210
246
  Managed AI interactions with built-in retry logic, cost tracking, and structured outputs.
211
247
 
212
248
  **Supported Models** (via LiteLLM proxy):
213
- - OpenAI: GPT-4, GPT-5 series
214
- - Anthropic: Claude 3 series
215
- - Google: Gemini 2.5 series
216
- - xAI: Grok models
217
- - Perplexity: Sonar models (with search capabilities)
218
- - And many more through LiteLLM compatibility
249
+ - OpenAI: gpt-5
250
+ - Anthropic: claude-4
251
+ - Google: gemini-2.5
252
+ - xAI: grok-3, grok-4
253
+ - Perplexity: sonar-pro-search
254
+ - And many more through LiteLLM compatibility. Every model from openrouter should work.
219
255
 
220
256
  ```python
221
257
  from ai_pipeline_core.llm import generate_structured, AIMessages, ModelOptions
@@ -291,8 +327,14 @@ ai_pipeline_core/
291
327
  │ ├── client.py # Async client implementation
292
328
  │ └── model_options.py # Configuration models
293
329
  ├── flow/ # Prefect flow utilities
294
- └── config.py # Type-safe flow configuration
330
+ ├── config.py # Type-safe flow configuration
331
+ │ └── options.py # FlowOptions base class
332
+ ├── simple_runner/ # Pipeline execution utilities
333
+ │ ├── cli.py # CLI interface
334
+ │ └── simple_runner.py # Core runner logic
295
335
  ├── logging/ # Structured logging
336
+ ├── pipeline.py # Enhanced decorators
337
+ ├── prefect.py # Clean Prefect exports
296
338
  ├── tracing.py # Observability decorators
297
339
  └── settings.py # Centralized configuration
298
340
  ```
@@ -303,6 +345,7 @@ ai_pipeline_core/
303
345
  ```bash
304
346
  make test # Run all tests
305
347
  make test-cov # Run with coverage report
348
+ make test-showcase # Test the showcase.py CLI example
306
349
  pytest tests/test_documents.py::TestDocument::test_creation # Single test
307
350
  ```
308
351
 
@@ -439,6 +482,22 @@ For learning purposes, see [CLAUDE.md](CLAUDE.md) for our comprehensive coding s
439
482
 
440
483
  - [CLAUDE.md](CLAUDE.md) - Detailed coding standards and architecture guide
441
484
 
485
+ ## Examples
486
+
487
+ ### In This Repository
488
+ - [showcase.py](examples/showcase.py) - Complete example demonstrating all core features including the CLI runner
489
+ ```bash
490
+ # Run the showcase example with CLI
491
+ python examples/showcase.py ./output --temperature 0.7 --batch-size 5
492
+
493
+ # Show help
494
+ python examples/showcase.py --help
495
+ ```
496
+ - [showcase.jinja2](examples/showcase.jinja2) - Example Jinja2 prompt template
497
+
498
+ ### Real-World Application
499
+ - [AI Documentation Writer](https://github.com/bbarwik/ai-documentation-writer) - Production-ready example showing how to build sophisticated AI pipelines for automated documentation generation. See [examples/ai-documentation-writer.md](examples/ai-documentation-writer.md) for a detailed overview.
500
+
442
501
  ### dependencies_docs/ Directory
443
502
  > [!NOTE]
444
503
  > The `dependencies_docs/` directory contains guides for AI assistants (like Claude Code) on how to interact with the project's external dependencies and tooling, NOT user documentation for ai-pipeline-core itself. These files are excluded from repository listings to avoid confusion.
@@ -469,9 +528,29 @@ Built with:
469
528
  - [LiteLLM](https://litellm.ai/) - LLM proxy
470
529
  - [Pydantic](https://pydantic-docs.helpmanual.io/) - Data validation
471
530
 
531
+ ## What's New in v0.1.8
532
+
533
+ ### Breaking Changes
534
+ - **Async-Only Pipeline Decorators**: `@pipeline_flow` and `@pipeline_task` now require `async def` functions (raises TypeError for sync)
535
+ - **Document Class Name Validation**: Document subclasses cannot start with "Test" prefix (pytest conflict prevention)
536
+ - **FlowConfig Validation**: OUTPUT_DOCUMENT_TYPE cannot be in INPUT_DOCUMENT_TYPES (prevents circular dependencies)
537
+ - **Temperature Field**: Added optional `temperature` field to `ModelOptions` for explicit control
538
+
539
+ ### Major Improvements
540
+ - **Pipeline Module Refactoring**: Reduced from ~400 to ~150 lines with cleaner Protocol-based typing
541
+ - **Enhanced Validation**: FlowConfig and Document classes now validate at definition time
542
+ - **Better CLI Support**: Auto-displays help when no arguments provided, improved context management
543
+ - **Test Suite Updates**: All tests updated to use async/await consistently
544
+
545
+ ### Documentation Updates
546
+ - Added Document naming rules to CLAUDE.md
547
+ - Added FlowConfig validation rules
548
+ - Added code elegance principles section
549
+ - Updated guide_for_ai.md to API reference format
550
+
472
551
  ## Stability Notice
473
552
 
474
- **Current Version**: 0.1.6
553
+ **Current Version**: 0.1.8
475
554
  **Status**: Internal Preview
476
555
  **API Stability**: Unstable - Breaking changes expected
477
556
  **Recommended Use**: Learning and reference only
@@ -109,40 +109,76 @@ async def process_document(doc: Document):
109
109
  return response.parsed
110
110
  ```
111
111
 
112
- ### Prefect Flow Integration
112
+ ### Enhanced Pipeline Decorators
113
113
  ```python
114
- from prefect import flow, task
115
- from ai_pipeline_core.documents import Document, DocumentList, FlowDocument
116
- from ai_pipeline_core.flow import FlowConfig
117
- from ai_pipeline_core.tracing import trace
118
-
119
- class OutputDocument(FlowDocument):
120
- """Custom output document type"""
121
- def get_type(self) -> str:
122
- return "output"
114
+ from ai_pipeline_core import pipeline_flow, pipeline_task
115
+ from ai_pipeline_core.flow import FlowOptions
116
+ from ai_pipeline_core.documents import DocumentList, FlowDocument
123
117
 
124
- class MyFlowConfig(FlowConfig):
125
- INPUT_DOCUMENT_TYPES = [InputDocument]
126
- OUTPUT_DOCUMENT_TYPE = OutputDocument
118
+ class CustomFlowOptions(FlowOptions):
119
+ """Extend base options with your custom fields"""
120
+ batch_size: int = 100
121
+ temperature: float = 0.7
127
122
 
128
- @task
129
- @trace
123
+ @pipeline_task(trace_level="always", retries=3)
130
124
  async def process_task(doc: Document) -> Document:
131
- # Task-level processing with automatic tracing
125
+ # Task with automatic tracing and retries
132
126
  result = await process_document(doc)
133
- # Convert result to JSON string for document content
134
- import json
135
- return OutputDocument(name="result", content=json.dumps(result.model_dump()).encode())
127
+ return OutputDocument(name="result", content=result.encode())
128
+
129
+ @pipeline_flow(trace_level="always")
130
+ async def my_pipeline(
131
+ project_name: str,
132
+ documents: DocumentList,
133
+ flow_options: CustomFlowOptions # Type-safe custom options
134
+ ) -> DocumentList:
135
+ # Pipeline flow with enforced signature and tracing
136
+ results = []
137
+ for doc in documents:
138
+ result = await process_task(doc)
139
+ results.append(result)
140
+ return DocumentList(results)
141
+ ```
136
142
 
137
- @flow
138
- async def my_pipeline(documents: DocumentList):
139
- config = MyFlowConfig()
140
- input_docs = config.get_input_documents(documents)
143
+ ### Simple Runner Utility
144
+ ```python
145
+ from ai_pipeline_core.simple_runner import run_cli, run_pipeline
146
+ from ai_pipeline_core.flow import FlowOptions
147
+
148
+ # CLI-based pipeline execution
149
+ if __name__ == "__main__":
150
+ run_cli(
151
+ flows=[my_pipeline],
152
+ flow_configs=[MyFlowConfig],
153
+ options_cls=CustomFlowOptions
154
+ )
141
155
 
142
- results = await process_task.map(input_docs)
156
+ # Or programmatic execution
157
+ async def main():
158
+ result = await run_pipeline(
159
+ project_name="my-project",
160
+ output_dir=Path("./output"),
161
+ flow=my_pipeline,
162
+ flow_config=MyFlowConfig,
163
+ flow_options=CustomFlowOptions(batch_size=50)
164
+ )
165
+ ```
166
+
167
+ ### Clean Prefect Decorators
168
+ ```python
169
+ # Import clean Prefect decorators without tracing
170
+ from ai_pipeline_core.prefect import flow, task
143
171
 
144
- config.validate_output_documents(results)
145
- return results
172
+ # Or use pipeline decorators with tracing
173
+ from ai_pipeline_core import pipeline_flow, pipeline_task
174
+
175
+ @task # Clean Prefect task (supports both sync and async)
176
+ def compute(x: int) -> int:
177
+ return x * 2
178
+
179
+ @pipeline_task(trace_level="always") # With tracing (async only)
180
+ async def compute_traced(x: int) -> int:
181
+ return x * 2
146
182
  ```
147
183
 
148
184
  ## Core Modules
@@ -168,12 +204,12 @@ docs = DocumentList([doc1, doc2])
168
204
  Managed AI interactions with built-in retry logic, cost tracking, and structured outputs.
169
205
 
170
206
  **Supported Models** (via LiteLLM proxy):
171
- - OpenAI: GPT-4, GPT-5 series
172
- - Anthropic: Claude 3 series
173
- - Google: Gemini 2.5 series
174
- - xAI: Grok models
175
- - Perplexity: Sonar models (with search capabilities)
176
- - And many more through LiteLLM compatibility
207
+ - OpenAI: gpt-5
208
+ - Anthropic: claude-4
209
+ - Google: gemini-2.5
210
+ - xAI: grok-3, grok-4
211
+ - Perplexity: sonar-pro-search
212
+ - And many more through LiteLLM compatibility. Every model from openrouter should work.
177
213
 
178
214
  ```python
179
215
  from ai_pipeline_core.llm import generate_structured, AIMessages, ModelOptions
@@ -249,8 +285,14 @@ ai_pipeline_core/
249
285
  │ ├── client.py # Async client implementation
250
286
  │ └── model_options.py # Configuration models
251
287
  ├── flow/ # Prefect flow utilities
252
- └── config.py # Type-safe flow configuration
288
+ ├── config.py # Type-safe flow configuration
289
+ │ └── options.py # FlowOptions base class
290
+ ├── simple_runner/ # Pipeline execution utilities
291
+ │ ├── cli.py # CLI interface
292
+ │ └── simple_runner.py # Core runner logic
253
293
  ├── logging/ # Structured logging
294
+ ├── pipeline.py # Enhanced decorators
295
+ ├── prefect.py # Clean Prefect exports
254
296
  ├── tracing.py # Observability decorators
255
297
  └── settings.py # Centralized configuration
256
298
  ```
@@ -261,6 +303,7 @@ ai_pipeline_core/
261
303
  ```bash
262
304
  make test # Run all tests
263
305
  make test-cov # Run with coverage report
306
+ make test-showcase # Test the showcase.py CLI example
264
307
  pytest tests/test_documents.py::TestDocument::test_creation # Single test
265
308
  ```
266
309
 
@@ -397,6 +440,22 @@ For learning purposes, see [CLAUDE.md](CLAUDE.md) for our comprehensive coding s
397
440
 
398
441
  - [CLAUDE.md](CLAUDE.md) - Detailed coding standards and architecture guide
399
442
 
443
+ ## Examples
444
+
445
+ ### In This Repository
446
+ - [showcase.py](examples/showcase.py) - Complete example demonstrating all core features including the CLI runner
447
+ ```bash
448
+ # Run the showcase example with CLI
449
+ python examples/showcase.py ./output --temperature 0.7 --batch-size 5
450
+
451
+ # Show help
452
+ python examples/showcase.py --help
453
+ ```
454
+ - [showcase.jinja2](examples/showcase.jinja2) - Example Jinja2 prompt template
455
+
456
+ ### Real-World Application
457
+ - [AI Documentation Writer](https://github.com/bbarwik/ai-documentation-writer) - Production-ready example showing how to build sophisticated AI pipelines for automated documentation generation. See [examples/ai-documentation-writer.md](examples/ai-documentation-writer.md) for a detailed overview.
458
+
400
459
  ### dependencies_docs/ Directory
401
460
  > [!NOTE]
402
461
  > The `dependencies_docs/` directory contains guides for AI assistants (like Claude Code) on how to interact with the project's external dependencies and tooling, NOT user documentation for ai-pipeline-core itself. These files are excluded from repository listings to avoid confusion.
@@ -427,9 +486,29 @@ Built with:
427
486
  - [LiteLLM](https://litellm.ai/) - LLM proxy
428
487
  - [Pydantic](https://pydantic-docs.helpmanual.io/) - Data validation
429
488
 
489
+ ## What's New in v0.1.8
490
+
491
+ ### Breaking Changes
492
+ - **Async-Only Pipeline Decorators**: `@pipeline_flow` and `@pipeline_task` now require `async def` functions (raises TypeError for sync)
493
+ - **Document Class Name Validation**: Document subclasses cannot start with "Test" prefix (pytest conflict prevention)
494
+ - **FlowConfig Validation**: OUTPUT_DOCUMENT_TYPE cannot be in INPUT_DOCUMENT_TYPES (prevents circular dependencies)
495
+ - **Temperature Field**: Added optional `temperature` field to `ModelOptions` for explicit control
496
+
497
+ ### Major Improvements
498
+ - **Pipeline Module Refactoring**: Reduced from ~400 to ~150 lines with cleaner Protocol-based typing
499
+ - **Enhanced Validation**: FlowConfig and Document classes now validate at definition time
500
+ - **Better CLI Support**: Auto-displays help when no arguments provided, improved context management
501
+ - **Test Suite Updates**: All tests updated to use async/await consistently
502
+
503
+ ### Documentation Updates
504
+ - Added Document naming rules to CLAUDE.md
505
+ - Added FlowConfig validation rules
506
+ - Added code elegance principles section
507
+ - Updated guide_for_ai.md to API reference format
508
+
430
509
  ## Stability Notice
431
510
 
432
- **Current Version**: 0.1.6
511
+ **Current Version**: 0.1.8
433
512
  **Status**: Internal Preview
434
513
  **API Stability**: Unstable - Breaking changes expected
435
514
  **Recommended Use**: Learning and reference only
@@ -0,0 +1,77 @@
1
+ """Pipeline Core - Shared infrastructure for AI pipelines."""
2
+
3
+ from . import llm
4
+ from .documents import (
5
+ Document,
6
+ DocumentList,
7
+ FlowDocument,
8
+ TaskDocument,
9
+ canonical_name_key,
10
+ sanitize_url,
11
+ )
12
+ from .flow import FlowConfig, FlowOptions
13
+ from .llm import (
14
+ AIMessages,
15
+ AIMessageType,
16
+ ModelName,
17
+ ModelOptions,
18
+ ModelResponse,
19
+ StructuredModelResponse,
20
+ )
21
+ from .logging import (
22
+ LoggerMixin,
23
+ LoggingConfig,
24
+ StructuredLoggerMixin,
25
+ get_pipeline_logger,
26
+ setup_logging,
27
+ )
28
+ from .logging import get_pipeline_logger as get_logger
29
+ from .pipeline import pipeline_flow, pipeline_task
30
+ from .prefect import disable_run_logger, prefect_test_harness
31
+ from .prompt_manager import PromptManager
32
+ from .settings import settings
33
+ from .tracing import TraceInfo, TraceLevel, trace
34
+
35
+ __version__ = "0.1.8"
36
+
37
+ __all__ = [
38
+ # Config/Settings
39
+ "settings",
40
+ # Logging
41
+ "get_logger",
42
+ "get_pipeline_logger",
43
+ "LoggerMixin",
44
+ "LoggingConfig",
45
+ "setup_logging",
46
+ "StructuredLoggerMixin",
47
+ # Documents
48
+ "Document",
49
+ "DocumentList",
50
+ "FlowDocument",
51
+ "TaskDocument",
52
+ "canonical_name_key",
53
+ "sanitize_url",
54
+ # Flow/Task
55
+ "FlowConfig",
56
+ "FlowOptions",
57
+ # Pipeline decorators (with tracing)
58
+ "pipeline_task",
59
+ "pipeline_flow",
60
+ # Prefect decorators (clean, no tracing)
61
+ "prefect_test_harness",
62
+ "disable_run_logger",
63
+ # LLM
64
+ "llm",
65
+ "ModelName",
66
+ "ModelOptions",
67
+ "ModelResponse",
68
+ "StructuredModelResponse",
69
+ "AIMessages",
70
+ "AIMessageType",
71
+ # Tracing
72
+ "trace",
73
+ "TraceLevel",
74
+ "TraceInfo",
75
+ # Utils
76
+ "PromptManager",
77
+ ]
@@ -2,10 +2,13 @@ from .document import Document
2
2
  from .document_list import DocumentList
3
3
  from .flow_document import FlowDocument
4
4
  from .task_document import TaskDocument
5
+ from .utils import canonical_name_key, sanitize_url
5
6
 
6
7
  __all__ = [
7
8
  "Document",
8
9
  "DocumentList",
9
10
  "FlowDocument",
10
11
  "TaskDocument",
12
+ "canonical_name_key",
13
+ "sanitize_url",
11
14
  ]
@@ -26,12 +26,27 @@ TModel = TypeVar("TModel", bound=BaseModel)
26
26
 
27
27
 
28
28
  class Document(BaseModel, ABC):
29
- """Abstract base class for all documents"""
29
+ """Abstract base class for all documents.
30
+
31
+ Warning: Document subclasses should NOT start with 'Test' prefix as this
32
+ causes conflicts with pytest test discovery. Classes with 'Test' prefix
33
+ will be rejected at definition time.
34
+ """
30
35
 
31
36
  MAX_CONTENT_SIZE: ClassVar[int] = 25 * 1024 * 1024 # 25MB default
32
37
  DESCRIPTION_EXTENSION: ClassVar[str] = ".description.md"
33
38
  MARKDOWN_LIST_SEPARATOR: ClassVar[str] = "\n\n---\n\n"
34
39
 
40
+ def __init_subclass__(cls, **kwargs: Any) -> None:
41
+ """Validate subclass names to prevent pytest conflicts."""
42
+ super().__init_subclass__(**kwargs)
43
+ if cls.__name__.startswith("Test"):
44
+ raise TypeError(
45
+ f"Document subclass '{cls.__name__}' cannot start with 'Test' prefix. "
46
+ "This causes conflicts with pytest test discovery. "
47
+ "Please use a different name (e.g., 'SampleDocument', 'ExampleDocument')."
48
+ )
49
+
35
50
  def __init__(self, **data: Any) -> None:
36
51
  """Prevent direct instantiation of abstract Document class."""
37
52
  if type(self) is Document:
@@ -0,0 +1,7 @@
1
+ from .config import FlowConfig
2
+ from .options import FlowOptions
3
+
4
+ __all__ = [
5
+ "FlowConfig",
6
+ "FlowOptions",
7
+ ]
@@ -14,6 +14,27 @@ class FlowConfig(ABC):
14
14
  INPUT_DOCUMENT_TYPES: ClassVar[list[type[FlowDocument]]]
15
15
  OUTPUT_DOCUMENT_TYPE: ClassVar[type[FlowDocument]]
16
16
 
17
+ def __init_subclass__(cls, **kwargs):
18
+ """Validate that OUTPUT_DOCUMENT_TYPE is not in INPUT_DOCUMENT_TYPES."""
19
+ super().__init_subclass__(**kwargs)
20
+
21
+ # Skip validation for the abstract base class itself
22
+ if cls.__name__ == "FlowConfig":
23
+ return
24
+
25
+ # Ensure required attributes are defined
26
+ if not hasattr(cls, "INPUT_DOCUMENT_TYPES"):
27
+ raise TypeError(f"FlowConfig {cls.__name__} must define INPUT_DOCUMENT_TYPES")
28
+ if not hasattr(cls, "OUTPUT_DOCUMENT_TYPE"):
29
+ raise TypeError(f"FlowConfig {cls.__name__} must define OUTPUT_DOCUMENT_TYPE")
30
+
31
+ # Validate that output type is not in input types
32
+ if cls.OUTPUT_DOCUMENT_TYPE in cls.INPUT_DOCUMENT_TYPES:
33
+ raise TypeError(
34
+ f"FlowConfig {cls.__name__}: OUTPUT_DOCUMENT_TYPE "
35
+ f"({cls.OUTPUT_DOCUMENT_TYPE.__name__}) cannot be in INPUT_DOCUMENT_TYPES"
36
+ )
37
+
17
38
  @classmethod
18
39
  def get_input_document_types(cls) -> list[type[FlowDocument]]:
19
40
  """
@@ -0,0 +1,26 @@
1
+ from typing import TypeVar
2
+
3
+ from pydantic import Field
4
+ from pydantic_settings import BaseSettings, SettingsConfigDict
5
+
6
+ from ai_pipeline_core.llm import ModelName
7
+
8
+ T = TypeVar("T", bound="FlowOptions")
9
+
10
+
11
+ class FlowOptions(BaseSettings):
12
+ """Base configuration for AI Pipeline flows."""
13
+
14
+ core_model: ModelName | str = Field(
15
+ default="gpt-5",
16
+ description="Primary model for complex analysis and generation tasks.",
17
+ )
18
+ small_model: ModelName | str = Field(
19
+ default="gpt-5-mini",
20
+ description="Fast, cost-effective model for simple tasks and orchestration.",
21
+ )
22
+
23
+ model_config = SettingsConfigDict(frozen=True, extra="ignore")
24
+
25
+
26
+ __all__ = ["FlowOptions"]
@@ -118,11 +118,13 @@ async def _generate_with_retry(
118
118
  span.set_attributes(response.get_laminar_metadata())
119
119
  Laminar.set_span_output(response.content)
120
120
  if not response.content:
121
- # disable cache in case of empty response
122
- completion_kwargs["extra_body"]["cache"] = {"no-cache": True}
123
121
  raise ValueError(f"Model {model} returned an empty response.")
124
122
  return response
125
123
  except (asyncio.TimeoutError, ValueError, Exception) as e:
124
+ if not isinstance(e, asyncio.TimeoutError):
125
+ # disable cache if it's not a timeout because it may cause an error
126
+ completion_kwargs["extra_body"]["cache"] = {"no-cache": True}
127
+
126
128
  logger.warning(
127
129
  "LLM generation failed (attempt %d/%d): %s",
128
130
  attempt + 1,
@@ -167,7 +169,7 @@ T = TypeVar("T", bound=BaseModel)
167
169
 
168
170
  @trace(ignore_inputs=["context"])
169
171
  async def generate_structured(
170
- model: ModelName,
172
+ model: ModelName | str,
171
173
  response_format: type[T],
172
174
  *,
173
175
  context: AIMessages = AIMessages(),
@@ -4,6 +4,7 @@ from pydantic import BaseModel
4
4
 
5
5
 
6
6
  class ModelOptions(BaseModel):
7
+ temperature: float | None = None
7
8
  system_prompt: str | None = None
8
9
  search_context_size: Literal["low", "medium", "high"] | None = None
9
10
  reasoning_effort: Literal["low", "medium", "high"] | None = None
@@ -21,6 +22,9 @@ class ModelOptions(BaseModel):
21
22
  "extra_body": {},
22
23
  }
23
24
 
25
+ if self.temperature:
26
+ kwargs["temperature"] = self.temperature
27
+
24
28
  if self.max_completion_tokens:
25
29
  kwargs["max_completion_tokens"] = self.max_completion_tokens
26
30