ai-pipeline-core 0.1.7__tar.gz → 0.1.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/PKG-INFO +35 -38
  2. {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/README.md +34 -37
  3. {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/__init__.py +7 -5
  4. {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/documents/__init__.py +2 -0
  5. {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/documents/document.py +131 -23
  6. ai_pipeline_core-0.1.10/ai_pipeline_core/documents/temporary_document.py +16 -0
  7. {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/flow/config.py +40 -1
  8. {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/llm/model_options.py +4 -0
  9. ai_pipeline_core-0.1.10/ai_pipeline_core/pipeline.py +414 -0
  10. {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/prompt_manager.py +7 -1
  11. ai_pipeline_core-0.1.10/ai_pipeline_core/simple_runner/cli.py +170 -0
  12. {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/simple_runner/simple_runner.py +7 -2
  13. {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/pyproject.toml +11 -4
  14. ai_pipeline_core-0.1.7/ai_pipeline_core/pipeline.py +0 -418
  15. ai_pipeline_core-0.1.7/ai_pipeline_core/simple_runner/cli.py +0 -95
  16. {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/.gitignore +0 -0
  17. {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/LICENSE +0 -0
  18. {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/documents/document_list.py +0 -0
  19. {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/documents/flow_document.py +0 -0
  20. {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/documents/mime_type.py +0 -0
  21. {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/documents/task_document.py +0 -0
  22. {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/documents/utils.py +0 -0
  23. {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/exceptions.py +0 -0
  24. {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/flow/__init__.py +0 -0
  25. {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/flow/options.py +0 -0
  26. {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/llm/__init__.py +0 -0
  27. {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/llm/ai_messages.py +0 -0
  28. {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/llm/client.py +0 -0
  29. {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/llm/model_response.py +0 -0
  30. {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/llm/model_types.py +0 -0
  31. {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/logging/__init__.py +0 -0
  32. {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/logging/logging.yml +0 -0
  33. {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/logging/logging_config.py +0 -0
  34. {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/logging/logging_mixin.py +0 -0
  35. {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/prefect.py +0 -0
  36. {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/py.typed +0 -0
  37. {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/settings.py +0 -0
  38. {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/simple_runner/__init__.py +0 -0
  39. {ai_pipeline_core-0.1.7 → ai_pipeline_core-0.1.10}/ai_pipeline_core/tracing.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-pipeline-core
3
- Version: 0.1.7
3
+ Version: 0.1.10
4
4
  Summary: Core utilities for AI-powered processing pipelines using prefect
5
5
  Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
6
6
  Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
@@ -151,7 +151,7 @@ async def process_document(doc: Document):
151
151
  return response.parsed
152
152
  ```
153
153
 
154
- ### Enhanced Pipeline Decorators (New in v0.1.7)
154
+ ### Enhanced Pipeline Decorators
155
155
  ```python
156
156
  from ai_pipeline_core import pipeline_flow, pipeline_task
157
157
  from ai_pipeline_core.flow import FlowOptions
@@ -182,7 +182,7 @@ async def my_pipeline(
182
182
  return DocumentList(results)
183
183
  ```
184
184
 
185
- ### Simple Runner Utility (New in v0.1.7)
185
+ ### Simple Runner Utility
186
186
  ```python
187
187
  from ai_pipeline_core.simple_runner import run_cli, run_pipeline
188
188
  from ai_pipeline_core.flow import FlowOptions
@@ -206,7 +206,7 @@ async def main():
206
206
  )
207
207
  ```
208
208
 
209
- ### Clean Prefect Decorators (New in v0.1.7)
209
+ ### Clean Prefect Decorators
210
210
  ```python
211
211
  # Import clean Prefect decorators without tracing
212
212
  from ai_pipeline_core.prefect import flow, task
@@ -214,12 +214,12 @@ from ai_pipeline_core.prefect import flow, task
214
214
  # Or use pipeline decorators with tracing
215
215
  from ai_pipeline_core import pipeline_flow, pipeline_task
216
216
 
217
- @task # Clean Prefect task
217
+ @task # Clean Prefect task (supports both sync and async)
218
218
  def compute(x: int) -> int:
219
219
  return x * 2
220
220
 
221
- @pipeline_task(trace_level="always") # With tracing
222
- def compute_traced(x: int) -> int:
221
+ @pipeline_task(trace_level="always") # With tracing (async only)
222
+ async def compute_traced(x: int) -> int:
223
223
  return x * 2
224
224
  ```
225
225
 
@@ -246,12 +246,12 @@ docs = DocumentList([doc1, doc2])
246
246
  Managed AI interactions with built-in retry logic, cost tracking, and structured outputs.
247
247
 
248
248
  **Supported Models** (via LiteLLM proxy):
249
- - OpenAI: GPT-4, GPT-5 series
250
- - Anthropic: Claude 3 series
251
- - Google: Gemini 2.5 series
252
- - xAI: Grok models
253
- - Perplexity: Sonar models (with search capabilities)
254
- - And many more through LiteLLM compatibility
249
+ - OpenAI: gpt-5
250
+ - Anthropic: claude-4
251
+ - Google: gemini-2.5
252
+ - xAI: grok-3, grok-4
253
+ - Perplexity: sonar-pro-search
254
+ - And many more through LiteLLM compatibility. Every model from openrouter should work.
255
255
 
256
256
  ```python
257
257
  from ai_pipeline_core.llm import generate_structured, AIMessages, ModelOptions
@@ -328,13 +328,13 @@ ai_pipeline_core/
328
328
  │ └── model_options.py # Configuration models
329
329
  ├── flow/ # Prefect flow utilities
330
330
  │ ├── config.py # Type-safe flow configuration
331
- │ └── options.py # FlowOptions base class (v0.1.7)
332
- ├── simple_runner/ # Pipeline execution utilities (v0.1.7)
331
+ │ └── options.py # FlowOptions base class
332
+ ├── simple_runner/ # Pipeline execution utilities
333
333
  │ ├── cli.py # CLI interface
334
334
  │ └── simple_runner.py # Core runner logic
335
335
  ├── logging/ # Structured logging
336
- ├── pipeline.py # Enhanced decorators (v0.1.7)
337
- ├── prefect.py # Clean Prefect exports (v0.1.7)
336
+ ├── pipeline.py # Enhanced decorators
337
+ ├── prefect.py # Clean Prefect exports
338
338
  ├── tracing.py # Observability decorators
339
339
  └── settings.py # Centralized configuration
340
340
  ```
@@ -345,6 +345,7 @@ ai_pipeline_core/
345
345
  ```bash
346
346
  make test # Run all tests
347
347
  make test-cov # Run with coverage report
348
+ make test-showcase # Test the showcase.py CLI example
348
349
  pytest tests/test_documents.py::TestDocument::test_creation # Single test
349
350
  ```
350
351
 
@@ -481,6 +482,22 @@ For learning purposes, see [CLAUDE.md](CLAUDE.md) for our comprehensive coding s
481
482
 
482
483
  - [CLAUDE.md](CLAUDE.md) - Detailed coding standards and architecture guide
483
484
 
485
+ ## Examples
486
+
487
+ ### In This Repository
488
+ - [showcase.py](examples/showcase.py) - Complete example demonstrating all core features including the CLI runner
489
+ ```bash
490
+ # Run the showcase example with CLI
491
+ python examples/showcase.py ./output --temperature 0.7 --batch-size 5
492
+
493
+ # Show help
494
+ python examples/showcase.py --help
495
+ ```
496
+ - [showcase.jinja2](examples/showcase.jinja2) - Example Jinja2 prompt template
497
+
498
+ ### Real-World Application
499
+ - [AI Documentation Writer](https://github.com/bbarwik/ai-documentation-writer) - Production-ready example showing how to build sophisticated AI pipelines for automated documentation generation. See [examples/ai-documentation-writer.md](examples/ai-documentation-writer.md) for a detailed overview.
500
+
484
501
  ### dependencies_docs/ Directory
485
502
  > [!NOTE]
486
503
  > The `dependencies_docs/` directory contains guides for AI assistants (like Claude Code) on how to interact with the project's external dependencies and tooling, NOT user documentation for ai-pipeline-core itself. These files are excluded from repository listings to avoid confusion.
@@ -511,29 +528,9 @@ Built with:
511
528
  - [LiteLLM](https://litellm.ai/) - LLM proxy
512
529
  - [Pydantic](https://pydantic-docs.helpmanual.io/) - Data validation
513
530
 
514
- ## What's New in v0.1.7
515
-
516
- ### Major Additions
517
- - **Enhanced Pipeline Decorators**: New `pipeline_flow` and `pipeline_task` decorators combining Prefect functionality with automatic LMNR tracing
518
- - **FlowOptions Base Class**: Extensible configuration system for flows with type-safe inheritance
519
- - **Simple Runner Module**: CLI and programmatic utilities for easy pipeline execution
520
- - **Clean Prefect Exports**: Separate imports for Prefect decorators with and without tracing
521
- - **Expanded Exports**: All major components now accessible from top-level package import
522
-
523
- ### API Improvements
524
- - Better type inference for document flows with custom options
525
- - Support for custom FlowOptions inheritance in pipeline flows
526
- - Improved error messages for invalid flow signatures
527
- - Enhanced document utility functions (`canonical_name_key`, `sanitize_url`)
528
-
529
- ### Developer Experience
530
- - Simplified imports - most components available from `ai_pipeline_core` directly
531
- - Better separation of concerns between clean Prefect and traced pipeline decorators
532
- - More intuitive flow configuration with `FlowOptions` inheritance
533
-
534
531
  ## Stability Notice
535
532
 
536
- **Current Version**: 0.1.7
533
+ **Current Version**: 0.1.10
537
534
  **Status**: Internal Preview
538
535
  **API Stability**: Unstable - Breaking changes expected
539
536
  **Recommended Use**: Learning and reference only
@@ -109,7 +109,7 @@ async def process_document(doc: Document):
109
109
  return response.parsed
110
110
  ```
111
111
 
112
- ### Enhanced Pipeline Decorators (New in v0.1.7)
112
+ ### Enhanced Pipeline Decorators
113
113
  ```python
114
114
  from ai_pipeline_core import pipeline_flow, pipeline_task
115
115
  from ai_pipeline_core.flow import FlowOptions
@@ -140,7 +140,7 @@ async def my_pipeline(
140
140
  return DocumentList(results)
141
141
  ```
142
142
 
143
- ### Simple Runner Utility (New in v0.1.7)
143
+ ### Simple Runner Utility
144
144
  ```python
145
145
  from ai_pipeline_core.simple_runner import run_cli, run_pipeline
146
146
  from ai_pipeline_core.flow import FlowOptions
@@ -164,7 +164,7 @@ async def main():
164
164
  )
165
165
  ```
166
166
 
167
- ### Clean Prefect Decorators (New in v0.1.7)
167
+ ### Clean Prefect Decorators
168
168
  ```python
169
169
  # Import clean Prefect decorators without tracing
170
170
  from ai_pipeline_core.prefect import flow, task
@@ -172,12 +172,12 @@ from ai_pipeline_core.prefect import flow, task
172
172
  # Or use pipeline decorators with tracing
173
173
  from ai_pipeline_core import pipeline_flow, pipeline_task
174
174
 
175
- @task # Clean Prefect task
175
+ @task # Clean Prefect task (supports both sync and async)
176
176
  def compute(x: int) -> int:
177
177
  return x * 2
178
178
 
179
- @pipeline_task(trace_level="always") # With tracing
180
- def compute_traced(x: int) -> int:
179
+ @pipeline_task(trace_level="always") # With tracing (async only)
180
+ async def compute_traced(x: int) -> int:
181
181
  return x * 2
182
182
  ```
183
183
 
@@ -204,12 +204,12 @@ docs = DocumentList([doc1, doc2])
204
204
  Managed AI interactions with built-in retry logic, cost tracking, and structured outputs.
205
205
 
206
206
  **Supported Models** (via LiteLLM proxy):
207
- - OpenAI: GPT-4, GPT-5 series
208
- - Anthropic: Claude 3 series
209
- - Google: Gemini 2.5 series
210
- - xAI: Grok models
211
- - Perplexity: Sonar models (with search capabilities)
212
- - And many more through LiteLLM compatibility
207
+ - OpenAI: gpt-5
208
+ - Anthropic: claude-4
209
+ - Google: gemini-2.5
210
+ - xAI: grok-3, grok-4
211
+ - Perplexity: sonar-pro-search
212
+ - And many more through LiteLLM compatibility. Every model from openrouter should work.
213
213
 
214
214
  ```python
215
215
  from ai_pipeline_core.llm import generate_structured, AIMessages, ModelOptions
@@ -286,13 +286,13 @@ ai_pipeline_core/
286
286
  │ └── model_options.py # Configuration models
287
287
  ├── flow/ # Prefect flow utilities
288
288
  │ ├── config.py # Type-safe flow configuration
289
- │ └── options.py # FlowOptions base class (v0.1.7)
290
- ├── simple_runner/ # Pipeline execution utilities (v0.1.7)
289
+ │ └── options.py # FlowOptions base class
290
+ ├── simple_runner/ # Pipeline execution utilities
291
291
  │ ├── cli.py # CLI interface
292
292
  │ └── simple_runner.py # Core runner logic
293
293
  ├── logging/ # Structured logging
294
- ├── pipeline.py # Enhanced decorators (v0.1.7)
295
- ├── prefect.py # Clean Prefect exports (v0.1.7)
294
+ ├── pipeline.py # Enhanced decorators
295
+ ├── prefect.py # Clean Prefect exports
296
296
  ├── tracing.py # Observability decorators
297
297
  └── settings.py # Centralized configuration
298
298
  ```
@@ -303,6 +303,7 @@ ai_pipeline_core/
303
303
  ```bash
304
304
  make test # Run all tests
305
305
  make test-cov # Run with coverage report
306
+ make test-showcase # Test the showcase.py CLI example
306
307
  pytest tests/test_documents.py::TestDocument::test_creation # Single test
307
308
  ```
308
309
 
@@ -439,6 +440,22 @@ For learning purposes, see [CLAUDE.md](CLAUDE.md) for our comprehensive coding s
439
440
 
440
441
  - [CLAUDE.md](CLAUDE.md) - Detailed coding standards and architecture guide
441
442
 
443
+ ## Examples
444
+
445
+ ### In This Repository
446
+ - [showcase.py](examples/showcase.py) - Complete example demonstrating all core features including the CLI runner
447
+ ```bash
448
+ # Run the showcase example with CLI
449
+ python examples/showcase.py ./output --temperature 0.7 --batch-size 5
450
+
451
+ # Show help
452
+ python examples/showcase.py --help
453
+ ```
454
+ - [showcase.jinja2](examples/showcase.jinja2) - Example Jinja2 prompt template
455
+
456
+ ### Real-World Application
457
+ - [AI Documentation Writer](https://github.com/bbarwik/ai-documentation-writer) - Production-ready example showing how to build sophisticated AI pipelines for automated documentation generation. See [examples/ai-documentation-writer.md](examples/ai-documentation-writer.md) for a detailed overview.
458
+
442
459
  ### dependencies_docs/ Directory
443
460
  > [!NOTE]
444
461
  > The `dependencies_docs/` directory contains guides for AI assistants (like Claude Code) on how to interact with the project's external dependencies and tooling, NOT user documentation for ai-pipeline-core itself. These files are excluded from repository listings to avoid confusion.
@@ -469,29 +486,9 @@ Built with:
469
486
  - [LiteLLM](https://litellm.ai/) - LLM proxy
470
487
  - [Pydantic](https://pydantic-docs.helpmanual.io/) - Data validation
471
488
 
472
- ## What's New in v0.1.7
473
-
474
- ### Major Additions
475
- - **Enhanced Pipeline Decorators**: New `pipeline_flow` and `pipeline_task` decorators combining Prefect functionality with automatic LMNR tracing
476
- - **FlowOptions Base Class**: Extensible configuration system for flows with type-safe inheritance
477
- - **Simple Runner Module**: CLI and programmatic utilities for easy pipeline execution
478
- - **Clean Prefect Exports**: Separate imports for Prefect decorators with and without tracing
479
- - **Expanded Exports**: All major components now accessible from top-level package import
480
-
481
- ### API Improvements
482
- - Better type inference for document flows with custom options
483
- - Support for custom FlowOptions inheritance in pipeline flows
484
- - Improved error messages for invalid flow signatures
485
- - Enhanced document utility functions (`canonical_name_key`, `sanitize_url`)
486
-
487
- ### Developer Experience
488
- - Simplified imports - most components available from `ai_pipeline_core` directly
489
- - Better separation of concerns between clean Prefect and traced pipeline decorators
490
- - More intuitive flow configuration with `FlowOptions` inheritance
491
-
492
489
  ## Stability Notice
493
490
 
494
- **Current Version**: 0.1.7
491
+ **Current Version**: 0.1.10
495
492
  **Status**: Internal Preview
496
493
  **API Stability**: Unstable - Breaking changes expected
497
494
  **Recommended Use**: Learning and reference only
@@ -6,6 +6,7 @@ from .documents import (
6
6
  DocumentList,
7
7
  FlowDocument,
8
8
  TaskDocument,
9
+ TemporaryDocument,
9
10
  canonical_name_key,
10
11
  sanitize_url,
11
12
  )
@@ -27,12 +28,12 @@ from .logging import (
27
28
  )
28
29
  from .logging import get_pipeline_logger as get_logger
29
30
  from .pipeline import pipeline_flow, pipeline_task
30
- from .prefect import flow, task
31
+ from .prefect import disable_run_logger, prefect_test_harness
31
32
  from .prompt_manager import PromptManager
32
33
  from .settings import settings
33
34
  from .tracing import TraceInfo, TraceLevel, trace
34
35
 
35
- __version__ = "0.1.7"
36
+ __version__ = "0.1.10"
36
37
 
37
38
  __all__ = [
38
39
  # Config/Settings
@@ -49,17 +50,18 @@ __all__ = [
49
50
  "DocumentList",
50
51
  "FlowDocument",
51
52
  "TaskDocument",
53
+ "TemporaryDocument",
52
54
  "canonical_name_key",
53
55
  "sanitize_url",
54
56
  # Flow/Task
55
57
  "FlowConfig",
56
58
  "FlowOptions",
57
- # Prefect decorators (clean, no tracing)
58
- "task",
59
- "flow",
60
59
  # Pipeline decorators (with tracing)
61
60
  "pipeline_task",
62
61
  "pipeline_flow",
62
+ # Prefect decorators (clean, no tracing)
63
+ "prefect_test_harness",
64
+ "disable_run_logger",
63
65
  # LLM
64
66
  "llm",
65
67
  "ModelName",
@@ -2,6 +2,7 @@ from .document import Document
2
2
  from .document_list import DocumentList
3
3
  from .flow_document import FlowDocument
4
4
  from .task_document import TaskDocument
5
+ from .temporary_document import TemporaryDocument
5
6
  from .utils import canonical_name_key, sanitize_url
6
7
 
7
8
  __all__ = [
@@ -9,6 +10,7 @@ __all__ = [
9
10
  "DocumentList",
10
11
  "FlowDocument",
11
12
  "TaskDocument",
13
+ "TemporaryDocument",
12
14
  "canonical_name_key",
13
15
  "sanitize_url",
14
16
  ]
@@ -6,7 +6,19 @@ from abc import ABC, abstractmethod
6
6
  from base64 import b32encode
7
7
  from enum import StrEnum
8
8
  from functools import cached_property
9
- from typing import Any, ClassVar, Literal, Self, TypeVar
9
+ from io import BytesIO
10
+ from typing import (
11
+ Any,
12
+ ClassVar,
13
+ Literal,
14
+ Self,
15
+ TypeVar,
16
+ cast,
17
+ final,
18
+ get_args,
19
+ get_origin,
20
+ overload,
21
+ )
10
22
 
11
23
  from pydantic import BaseModel, ConfigDict, field_serializer, field_validator
12
24
  from ruamel.yaml import YAML
@@ -23,64 +35,107 @@ from .mime_type import (
23
35
  )
24
36
 
25
37
  TModel = TypeVar("TModel", bound=BaseModel)
38
+ ContentInput = bytes | str | BaseModel | list[str] | Any
26
39
 
27
40
 
28
41
  class Document(BaseModel, ABC):
29
- """Abstract base class for all documents"""
42
+ """Abstract base class for all documents.
43
+
44
+ Warning: Document subclasses should NOT start with 'Test' prefix as this
45
+ causes conflicts with pytest test discovery. Classes with 'Test' prefix
46
+ will be rejected at definition time.
47
+ """
30
48
 
31
49
  MAX_CONTENT_SIZE: ClassVar[int] = 25 * 1024 * 1024 # 25MB default
32
50
  DESCRIPTION_EXTENSION: ClassVar[str] = ".description.md"
33
51
  MARKDOWN_LIST_SEPARATOR: ClassVar[str] = "\n\n---\n\n"
34
52
 
53
+ def __init_subclass__(cls, **kwargs: Any) -> None:
54
+ """Validate subclass names to prevent pytest conflicts."""
55
+ super().__init_subclass__(**kwargs)
56
+ if cls.__name__.startswith("Test"):
57
+ raise TypeError(
58
+ f"Document subclass '{cls.__name__}' cannot start with 'Test' prefix. "
59
+ "This causes conflicts with pytest test discovery. "
60
+ "Please use a different name (e.g., 'SampleDocument', 'ExampleDocument')."
61
+ )
62
+ if hasattr(cls, "FILES"):
63
+ files = getattr(cls, "FILES")
64
+ if not issubclass(files, StrEnum):
65
+ raise TypeError(
66
+ f"Document subclass '{cls.__name__}'.FILES must be an Enum of string values"
67
+ )
68
+ # Check that the Document's model_fields only contain the allowed fields
69
+ # It prevents AI models from adding additional fields to documents
70
+ allowed = {"name", "description", "content"}
71
+ current = set(getattr(cls, "model_fields", {}).keys())
72
+ extras = current - allowed
73
+ if extras:
74
+ raise TypeError(
75
+ f"Document subclass '{cls.__name__}' cannot declare additional fields: "
76
+ f"{', '.join(sorted(extras))}. Only {', '.join(sorted(allowed))} are allowed."
77
+ )
78
+
35
79
  def __init__(self, **data: Any) -> None:
36
80
  """Prevent direct instantiation of abstract Document class."""
37
81
  if type(self) is Document:
38
82
  raise TypeError("Cannot instantiate abstract Document class directly")
39
83
  super().__init__(**data)
40
84
 
41
- # Optional enum of allowed file names. Subclasses may set this.
42
- # This is used to validate the document name.
43
- FILES: ClassVar[type[StrEnum] | None] = None
44
-
45
85
  name: str
46
86
  description: str | None = None
47
87
  content: bytes
48
88
 
49
89
  # Pydantic configuration
50
90
  model_config = ConfigDict(
51
- frozen=True, # Make documents immutable
91
+ frozen=True,
52
92
  arbitrary_types_allowed=True,
93
+ extra="forbid",
53
94
  )
54
95
 
55
96
  @abstractmethod
56
- def get_base_type(self) -> Literal["flow", "task"]:
97
+ def get_base_type(self) -> Literal["flow", "task", "temporary"]:
57
98
  """Get the type of the document - must be implemented by subclasses"""
58
99
  raise NotImplementedError("Subclasses must implement this method")
59
100
 
101
+ @final
60
102
  @property
61
- def base_type(self) -> Literal["flow", "task"]:
103
+ def base_type(self) -> Literal["flow", "task", "temporary"]:
62
104
  """Alias for document_type for backward compatibility"""
63
105
  return self.get_base_type()
64
106
 
107
+ @final
65
108
  @property
66
109
  def is_flow(self) -> bool:
67
110
  """Check if document is a flow document"""
68
111
  return self.get_base_type() == "flow"
69
112
 
113
+ @final
70
114
  @property
71
115
  def is_task(self) -> bool:
72
116
  """Check if document is a task document"""
73
117
  return self.get_base_type() == "task"
74
118
 
119
+ @final
120
+ @property
121
+ def is_temporary(self) -> bool:
122
+ """Check if document is a temporary document"""
123
+ return self.get_base_type() == "temporary"
124
+
125
+ @final
75
126
  @classmethod
76
127
  def get_expected_files(cls) -> list[str] | None:
77
128
  """
78
129
  Return the list of allowed file names for this document class, or None if unrestricted.
79
130
  """
80
- if cls.FILES is None:
131
+ if not hasattr(cls, "FILES"):
132
+ return None
133
+ files = getattr(cls, "FILES")
134
+ if not files:
81
135
  return None
136
+ assert issubclass(files, StrEnum)
82
137
  try:
83
- values = [member.value for member in cls.FILES]
138
+ values = [member.value for member in files]
84
139
  except TypeError:
85
140
  raise DocumentNameError(f"{cls.__name__}.FILES must be an Enum of string values")
86
141
  if len(values) == 0:
@@ -100,14 +155,10 @@ class Document(BaseModel, ABC):
100
155
  Override this method in subclasses for custom conventions (regex, prefixes, etc.).
101
156
  Raise DocumentNameError when invalid.
102
157
  """
103
- if cls.FILES is None:
158
+ allowed = cls.get_expected_files()
159
+ if not allowed:
104
160
  return
105
161
 
106
- try:
107
- allowed = {str(member.value) for member in cls.FILES} # type: ignore[arg-type]
108
- except TypeError:
109
- raise DocumentNameError(f"{cls.__name__}.FILES must be an Enum of string values")
110
-
111
162
  if len(allowed) > 0 and name not in allowed:
112
163
  allowed_str = ", ".join(sorted(allowed))
113
164
  raise DocumentNameError(f"Invalid filename '{name}'. Allowed names: {allowed_str}")
@@ -151,16 +202,19 @@ class Document(BaseModel, ABC):
151
202
  # Fall back to base64 for binary content
152
203
  return base64.b64encode(v).decode("ascii")
153
204
 
205
+ @final
154
206
  @property
155
207
  def id(self) -> str:
156
208
  """Return the first 6 characters of the SHA256 hash of the content, encoded in base32"""
157
209
  return self.sha256[:6]
158
210
 
211
+ @final
159
212
  @cached_property
160
213
  def sha256(self) -> str:
161
214
  """Full SHA256 hash of content, encoded in base32"""
162
215
  return b32encode(hashlib.sha256(self.content).digest()).decode("ascii").upper()
163
216
 
217
+ @final
164
218
  @property
165
219
  def size(self) -> int:
166
220
  """Size of content in bytes"""
@@ -210,23 +264,61 @@ class Document(BaseModel, ABC):
210
264
  """Parse document as JSON"""
211
265
  return json.loads(self.as_text())
212
266
 
213
- def as_pydantic_model(self, model_type: type[TModel]) -> TModel:
267
+ @overload
268
+ def as_pydantic_model(self, model_type: type[TModel]) -> TModel: ...
269
+
270
+ @overload
271
+ def as_pydantic_model(self, model_type: type[list[TModel]]) -> list[TModel]: ...
272
+
273
+ def as_pydantic_model(
274
+ self, model_type: type[TModel] | type[list[TModel]]
275
+ ) -> TModel | list[TModel]:
214
276
  """Parse document as a pydantic model and return the validated instance"""
215
277
  data = self.as_yaml() if is_yaml_mime_type(self.mime_type) else self.as_json()
216
- return model_type.model_validate(data)
278
+
279
+ if get_origin(model_type) is list:
280
+ if not isinstance(data, list):
281
+ raise ValueError(f"Expected list data for {model_type}, got {type(data)}")
282
+ item_type = get_args(model_type)[0]
283
+ return [item_type.model_validate(item) for item in data]
284
+
285
+ # At this point model_type must be type[TModel], not type[list[TModel]]
286
+ single_model = cast(type[TModel], model_type)
287
+ return single_model.model_validate(data)
217
288
 
218
289
  def as_markdown_list(self) -> list[str]:
219
290
  """Parse document as a markdown list"""
220
291
  return self.as_text().split(self.MARKDOWN_LIST_SEPARATOR)
221
292
 
293
+ @overload
294
+ @classmethod
295
+ def create(cls, name: str, content: ContentInput, /) -> Self: ...
296
+ @overload
297
+ @classmethod
298
+ def create(cls, name: str, *, content: ContentInput) -> Self: ...
299
+ @overload
300
+ @classmethod
301
+ def create(cls, name: str, description: str | None, content: ContentInput, /) -> Self: ...
302
+ @overload
303
+ @classmethod
304
+ def create(cls, name: str, description: str | None, *, content: ContentInput) -> Self: ...
305
+
222
306
  @classmethod
223
307
  def create(
224
308
  cls,
225
309
  name: str,
226
- description: str | None,
227
- content: bytes | str | BaseModel | list[str] | Any,
310
+ description: ContentInput = None,
311
+ content: ContentInput = None,
228
312
  ) -> Self:
229
313
  """Create a document from a name, description, and content"""
314
+ if content is None:
315
+ if description is None:
316
+ raise ValueError(f"Unsupported content type: {type(content)} for {name}")
317
+ content = description
318
+ description = None
319
+ else:
320
+ assert description is None or isinstance(description, str)
321
+
230
322
  is_yaml_extension = name.endswith(".yaml") or name.endswith(".yml")
231
323
  is_json_extension = name.endswith(".json")
232
324
  is_markdown_extension = name.endswith(".md")
@@ -237,6 +329,14 @@ class Document(BaseModel, ABC):
237
329
  content = content.encode("utf-8")
238
330
  elif is_str_list and is_markdown_extension:
239
331
  return cls.create_as_markdown_list(name, description, content) # type: ignore[arg-type]
332
+ elif isinstance(content, list) and all(isinstance(item, BaseModel) for item in content):
333
+ # Handle list[BaseModel] for JSON/YAML files
334
+ if is_yaml_extension:
335
+ return cls.create_as_yaml(name, description, content)
336
+ elif is_json_extension:
337
+ return cls.create_as_json(name, description, content)
338
+ else:
339
+ raise ValueError(f"list[BaseModel] requires .json or .yaml extension, got {name}")
240
340
  elif is_yaml_extension:
241
341
  return cls.create_as_yaml(name, description, content)
242
342
  elif is_json_extension:
@@ -246,6 +346,7 @@ class Document(BaseModel, ABC):
246
346
 
247
347
  return cls(name=name, description=description, content=content)
248
348
 
349
+ @final
249
350
  @classmethod
250
351
  def create_as_markdown_list(cls, name: str, description: str | None, items: list[str]) -> Self:
251
352
  """Create a document from a name, description, and list of strings"""
@@ -258,15 +359,19 @@ class Document(BaseModel, ABC):
258
359
  content = Document.MARKDOWN_LIST_SEPARATOR.join(cleaned_items)
259
360
  return cls.create(name, description, content)
260
361
 
362
+ @final
261
363
  @classmethod
262
364
  def create_as_json(cls, name: str, description: str | None, data: Any) -> Self:
263
365
  """Create a document from a name, description, and JSON data"""
264
366
  assert name.endswith(".json"), f"Document name must end with .json: {name}"
265
367
  if isinstance(data, BaseModel):
266
368
  data = data.model_dump(mode="json")
369
+ elif isinstance(data, list) and all(isinstance(item, BaseModel) for item in data):
370
+ data = [item.model_dump(mode="json") for item in data]
267
371
  content = json.dumps(data, indent=2).encode("utf-8")
268
372
  return cls.create(name, description, content)
269
373
 
374
+ @final
270
375
  @classmethod
271
376
  def create_as_yaml(cls, name: str, description: str | None, data: Any) -> Self:
272
377
  """Create a document from a name, description, and YAML data"""
@@ -274,16 +379,18 @@ class Document(BaseModel, ABC):
274
379
  f"Document name must end with .yaml or .yml: {name}"
275
380
  )
276
381
  if isinstance(data, BaseModel):
277
- data = data.model_dump()
382
+ data = data.model_dump(mode="json")
383
+ elif isinstance(data, list) and all(isinstance(item, BaseModel) for item in data):
384
+ data = [item.model_dump(mode="json") for item in data]
278
385
  yaml = YAML()
279
386
  yaml.indent(mapping=2, sequence=4, offset=2)
280
- from io import BytesIO
281
387
 
282
388
  stream = BytesIO()
283
389
  yaml.dump(data, stream)
284
390
  content = stream.getvalue()
285
391
  return cls.create(name, description, content)
286
392
 
393
+ @final
287
394
  def serialize_model(self) -> dict[str, Any]:
288
395
  """Serialize document to a dictionary with proper encoding."""
289
396
  result = {
@@ -312,6 +419,7 @@ class Document(BaseModel, ABC):
312
419
 
313
420
  return result
314
421
 
422
+ @final
315
423
  @classmethod
316
424
  def from_dict(cls, data: dict[str, Any]) -> Self:
317
425
  """Deserialize document from dictionary."""
@@ -0,0 +1,16 @@
1
+ """Task-specific document base class."""
2
+
3
+ from typing import Literal, final
4
+
5
+ from .document import Document
6
+
7
+
8
+ @final
9
+ class TemporaryDocument(Document):
10
+ """
11
+ Temporary document is a document that is not persisted in any case.
12
+ """
13
+
14
+ def get_base_type(self) -> Literal["temporary"]:
15
+ """Get the document type."""
16
+ return "temporary"