ai-pipeline-core 0.1.8__py3-none-any.whl → 0.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. ai_pipeline_core/__init__.py +86 -4
  2. ai_pipeline_core/documents/__init__.py +11 -0
  3. ai_pipeline_core/documents/document.py +1107 -131
  4. ai_pipeline_core/documents/document_list.py +147 -38
  5. ai_pipeline_core/documents/flow_document.py +112 -11
  6. ai_pipeline_core/documents/mime_type.py +173 -15
  7. ai_pipeline_core/documents/task_document.py +117 -12
  8. ai_pipeline_core/documents/temporary_document.py +95 -0
  9. ai_pipeline_core/documents/utils.py +41 -9
  10. ai_pipeline_core/exceptions.py +47 -11
  11. ai_pipeline_core/flow/__init__.py +2 -0
  12. ai_pipeline_core/flow/config.py +250 -23
  13. ai_pipeline_core/flow/options.py +50 -1
  14. ai_pipeline_core/llm/__init__.py +6 -0
  15. ai_pipeline_core/llm/ai_messages.py +125 -27
  16. ai_pipeline_core/llm/client.py +278 -26
  17. ai_pipeline_core/llm/model_options.py +130 -1
  18. ai_pipeline_core/llm/model_response.py +239 -35
  19. ai_pipeline_core/llm/model_types.py +67 -0
  20. ai_pipeline_core/logging/__init__.py +13 -0
  21. ai_pipeline_core/logging/logging_config.py +72 -20
  22. ai_pipeline_core/logging/logging_mixin.py +38 -32
  23. ai_pipeline_core/pipeline.py +308 -60
  24. ai_pipeline_core/prefect.py +48 -1
  25. ai_pipeline_core/prompt_manager.py +215 -24
  26. ai_pipeline_core/settings.py +108 -4
  27. ai_pipeline_core/simple_runner/__init__.py +5 -0
  28. ai_pipeline_core/simple_runner/cli.py +145 -17
  29. ai_pipeline_core/simple_runner/simple_runner.py +244 -6
  30. ai_pipeline_core/tracing.py +232 -30
  31. ai_pipeline_core-0.1.11.dist-info/METADATA +450 -0
  32. ai_pipeline_core-0.1.11.dist-info/RECORD +36 -0
  33. ai_pipeline_core-0.1.8.dist-info/METADATA +0 -558
  34. ai_pipeline_core-0.1.8.dist-info/RECORD +0 -35
  35. {ai_pipeline_core-0.1.8.dist-info → ai_pipeline_core-0.1.11.dist-info}/WHEEL +0 -0
  36. {ai_pipeline_core-0.1.8.dist-info → ai_pipeline_core-0.1.11.dist-info}/licenses/LICENSE +0 -0
@@ -1,3 +1,8 @@
1
+ """Type-safe list container for Document objects.
2
+
3
+ @public
4
+ """
5
+
1
6
  from typing import Any, Iterable, SupportsIndex, Union, overload
2
7
 
3
8
  from typing_extensions import Self
@@ -6,14 +11,25 @@ from .document import Document
6
11
 
7
12
 
8
13
  class DocumentList(list[Document]):
9
- """
10
- A specialized list for Document objects with built-in validation.
14
+ """Type-safe container for Document objects.
15
+
16
+ @public
17
+
18
+ Specialized list with validation and filtering for documents.
19
+
20
+ Best Practice: Use default constructor in 90% of cases. Only enable
21
+ validate_same_type or validate_duplicates when you explicitly need them.
11
22
 
12
- Features:
13
- - Optionally ensures no duplicate filenames within the list
14
- - Optionally validates that all documents have the same type (for flow outputs)
15
- - Provides convenience methods for document operations
16
- - Works with both FlowDocument and TaskDocument classes
23
+ Example:
24
+ >>> # RECOMMENDED - default constructor for most cases
25
+ >>> docs = DocumentList([doc1, doc2])
26
+ >>> # Or empty initialization
27
+ >>> docs = DocumentList()
28
+ >>> docs.append(MyDocument(name="file.txt", content=b"data"))
29
+ >>>
30
+ >>> # Only use validation flags when specifically needed:
31
+ >>> docs = DocumentList(validate_same_type=True) # Rare use case
32
+ >>> doc = docs.get_by("file.txt") # Get by name
17
33
  """
18
34
 
19
35
  def __init__(
@@ -22,13 +38,14 @@ class DocumentList(list[Document]):
22
38
  validate_same_type: bool = False,
23
39
  validate_duplicates: bool = False,
24
40
  ) -> None:
25
- """
26
- Initialize DocumentList with optional initial documents.
41
+ """Initialize DocumentList.
42
+
43
+ @public
27
44
 
28
45
  Args:
29
- documents: Initial list of documents
30
- validate_same_type: If True, validates that all documents have the same type.
31
- Should be True for flow outputs, False for inputs.
46
+ documents: Initial list of documents.
47
+ validate_same_type: Enforce same document type.
48
+ validate_duplicates: Prevent duplicate filenames.
32
49
  """
33
50
  super().__init__()
34
51
  self._validate_same_type = validate_same_type
@@ -37,7 +54,11 @@ class DocumentList(list[Document]):
37
54
  self.extend(documents)
38
55
 
39
56
  def _validate_no_duplicates(self) -> None:
40
- """Validate that there are no duplicate filenames."""
57
+ """Check for duplicate document names.
58
+
59
+ Raises:
60
+ ValueError: If duplicate document names are found.
61
+ """
41
62
  if not self._validate_duplicates:
42
63
  return
43
64
 
@@ -53,7 +74,11 @@ class DocumentList(list[Document]):
53
74
  raise ValueError(f"Duplicate document names found: {unique_duplicates}")
54
75
 
55
76
  def _validate_no_description_files(self) -> None:
56
- """Validate that no documents have DESCRIPTION_EXTENSION suffix."""
77
+ """Ensure no documents use reserved description file extension.
78
+
79
+ Raises:
80
+ ValueError: If any document uses the reserved description file extension.
81
+ """
57
82
  description_files = [
58
83
  doc.name for doc in self if doc.name.endswith(Document.DESCRIPTION_EXTENSION)
59
84
  ]
@@ -64,7 +89,11 @@ class DocumentList(list[Document]):
64
89
  )
65
90
 
66
91
  def _validate_types(self) -> None:
67
- """Validate that all documents have the same class type if required."""
92
+ """Ensure all documents are of the same class type.
93
+
94
+ Raises:
95
+ ValueError: If documents have different class types.
96
+ """
68
97
  if not self._validate_same_type or not self:
69
98
  return
70
99
 
@@ -75,23 +104,23 @@ class DocumentList(list[Document]):
75
104
  raise ValueError(f"All documents must have the same type. Found types: {types}")
76
105
 
77
106
  def _validate(self) -> None:
78
- """Run all validations."""
107
+ """Run all configured validation checks."""
79
108
  self._validate_no_duplicates()
80
109
  self._validate_no_description_files()
81
110
  self._validate_types()
82
111
 
83
112
  def append(self, document: Document) -> None:
84
- """Add a document to the list with validation."""
113
+ """Add a document to the end of the list."""
85
114
  super().append(document)
86
115
  self._validate()
87
116
 
88
117
  def extend(self, documents: Iterable[Document]) -> None:
89
- """Extend the list with multiple documents with validation."""
118
+ """Add multiple documents to the list."""
90
119
  super().extend(documents)
91
120
  self._validate()
92
121
 
93
122
  def insert(self, index: SupportsIndex, document: Document) -> None:
94
- """Insert a document at the specified index with validation."""
123
+ """Insert a document at the specified position."""
95
124
  super().insert(index, document)
96
125
  self._validate()
97
126
 
@@ -102,30 +131,110 @@ class DocumentList(list[Document]):
102
131
  def __setitem__(self, index: slice, value: Iterable[Document]) -> None: ...
103
132
 
104
133
  def __setitem__(self, index: Union[SupportsIndex, slice], value: Any) -> None:
105
- """Set item with validation."""
134
+ """Set item or slice with validation."""
106
135
  super().__setitem__(index, value)
107
136
  self._validate()
108
137
 
109
138
  def __iadd__(self, other: Any) -> "Self":
110
- """In-place addition with validation."""
139
+ """In-place addition (+=) with validation.
140
+
141
+ Returns:
142
+ Self: This DocumentList after modification.
143
+ """
111
144
  result = super().__iadd__(other)
112
145
  self._validate()
113
146
  return result
114
147
 
115
- def filter_by_type(self, document_type: type[Document]) -> "DocumentList":
116
- """Return a new DocumentList containing only instances of the specified document class."""
117
- return DocumentList([doc for doc in self if type(doc) is document_type])
118
-
119
- def filter_by_types(self, document_types: list[type[Document]]) -> "DocumentList":
120
- """Return a new DocumentList containing only instances of the specified document classes."""
121
- documents = DocumentList()
122
- for document_type in document_types:
123
- documents.extend(self.filter_by_type(document_type))
124
- return documents
125
-
126
- def get_by_name(self, name: str) -> Document | None:
127
- """Get a document by its name."""
128
- for doc in self:
129
- if doc.name == name:
130
- return doc
131
- return None
148
+ @overload
149
+ def filter_by(self, arg: str) -> "DocumentList": ...
150
+
151
+ @overload
152
+ def filter_by(self, arg: type[Document]) -> "DocumentList": ...
153
+
154
+ @overload
155
+ def filter_by(self, arg: list[type[Document]]) -> "DocumentList": ...
156
+
157
+ def filter_by(self, arg: str | type[Document] | list[type[Document]]) -> "DocumentList":
158
+ """Filter documents by name or type(s).
159
+
160
+ @public
161
+
162
+ Args:
163
+ arg: Document name (str), single document type, or list of document types.
164
+
165
+ Returns:
166
+ New DocumentList with filtered documents.
167
+
168
+ Raises:
169
+ TypeError: If arg is not a valid type (str, Document type, or list of Document types).
170
+
171
+ Example:
172
+ >>> docs.filter_by("file.txt") # Filter by name
173
+ >>> docs.filter_by(MyDocument) # Filter by type
174
+ >>> docs.filter_by([Doc1, Doc2]) # Filter by multiple types
175
+ """
176
+ if isinstance(arg, str):
177
+ # Filter by name
178
+ return DocumentList([doc for doc in self if doc.name == arg])
179
+ elif isinstance(arg, type):
180
+ # Filter by single type (including subclasses)
181
+ return DocumentList([doc for doc in self if isinstance(doc, arg)])
182
+ elif isinstance(arg, list): # type: ignore[reportUnnecessaryIsInstance]
183
+ # Filter by multiple types
184
+ documents = DocumentList()
185
+ for document_type in arg:
186
+ documents.extend([doc for doc in self if isinstance(doc, document_type)])
187
+ return documents
188
+ else:
189
+ raise TypeError(f"Invalid argument type for filter_by: {type(arg)}")
190
+
191
+ @overload
192
+ def get_by(self, arg: str) -> Document: ...
193
+
194
+ @overload
195
+ def get_by(self, arg: type[Document]) -> Document: ...
196
+
197
+ @overload
198
+ def get_by(self, arg: str, required: bool = True) -> Document | None: ...
199
+
200
+ @overload
201
+ def get_by(self, arg: type[Document], required: bool = True) -> Document | None: ...
202
+
203
+ def get_by(self, arg: str | type[Document], required: bool = True) -> Document | None:
204
+ """Get a single document by name or type.
205
+
206
+ @public
207
+
208
+ Args:
209
+ arg: Document name (str) or document type.
210
+ required: If True, raises ValueError when not found. If False, returns None.
211
+
212
+ Returns:
213
+ The first matching document, or None if not found and required=False.
214
+
215
+ Raises:
216
+ ValueError: If required=True and document not found.
217
+ TypeError: If arg is not a string or Document type.
218
+
219
+ Example:
220
+ >>> doc = docs.get_by("file.txt") # Get by name, raises if not found
221
+ >>> doc = docs.get_by(MyDocument, required=False) # Returns None if not found
222
+ """
223
+ if isinstance(arg, str):
224
+ # Get by name
225
+ for doc in self:
226
+ if doc.name == arg:
227
+ return doc
228
+ if required:
229
+ raise ValueError(f"Document with name '{arg}' not found")
230
+ return None
231
+ elif isinstance(arg, type): # type: ignore[reportUnnecessaryIsInstance]
232
+ # Get by type (including subclasses)
233
+ for doc in self:
234
+ if isinstance(doc, arg):
235
+ return doc
236
+ if required:
237
+ raise ValueError(f"Document of type '{arg.__name__}' not found")
238
+ return None
239
+ else:
240
+ raise TypeError(f"Invalid argument type for get_by: {type(arg)}")
@@ -1,27 +1,128 @@
1
- """Flow-specific document base class."""
1
+ """Flow-specific document base class for persistent pipeline data.
2
2
 
3
- from typing import Any, Literal, final
3
+ @public
4
+
5
+ This module provides the FlowDocument abstract base class for documents
6
+ that need to persist across Prefect flow runs and between pipeline steps.
7
+ """
8
+
9
+ from typing import Literal, final
4
10
 
5
11
  from .document import Document
6
12
 
7
13
 
8
14
  class FlowDocument(Document):
9
- """
10
- Abstract base class for flow-specific documents.
15
+ """Abstract base class for documents that persist across flow runs.
16
+
17
+ @public
18
+
19
+ FlowDocument is used for data that needs to be saved between pipeline
20
+ steps and across multiple flow executions. These documents are typically
21
+ written to the file system using the simple_runner utilities.
22
+
23
+ Key characteristics:
24
+ - Persisted to file system between pipeline steps
25
+ - Survives across multiple flow runs
26
+ - Used for flow inputs and outputs
27
+ - Saved in directories named after the document's canonical name
28
+
29
+ Creating FlowDocuments:
30
+ **Use the `create` classmethod** for most use cases. It handles automatic
31
+ conversion of various content types. Only use __init__ when you have bytes.
32
+
33
+ >>> from enum import StrEnum
34
+ >>>
35
+ >>> # Simple document with pass:
36
+ >>> class MyDoc(FlowDocument):
37
+ ... pass
38
+ >>>
39
+ >>> # Document with restricted file names:
40
+ >>> class ConfigDoc(FlowDocument):
41
+ ... class FILES(StrEnum):
42
+ ... CONFIG = "config.yaml"
43
+ ... SETTINGS = "settings.json"
44
+ >>>
45
+ >>> # RECOMMENDED - automatic conversion:
46
+ >>> doc = MyDoc.create(name="data.json", content={"key": "value"})
47
+ >>> doc = ConfigDoc.create(name="config.yaml", content={"host": "localhost"})
48
+
49
+ Persistence:
50
+ Documents are saved to: {output_dir}/{canonical_name}/{filename}
51
+ For example: output/my_doc/data.json
11
52
 
12
- Flow documents represent inputs, outputs, and intermediate results
13
- within a Prefect flow execution context.
53
+ Note:
54
+ - Cannot instantiate FlowDocument directly - must subclass
55
+ - Used with FlowConfig to define flow input/output types
56
+ - No additional abstract methods to implement
14
57
 
15
- Compared to TaskDocument, FlowDocument are persistent across Prefect flow runs.
58
+ See Also:
59
+ TaskDocument: For temporary documents within task execution
60
+ TemporaryDocument: For documents that are never persisted
16
61
  """
17
62
 
18
- def __init__(self, **data: Any) -> None:
19
- """Prevent direct instantiation of abstract FlowDocument class."""
63
+ def __init__(
64
+ self,
65
+ *,
66
+ name: str,
67
+ content: bytes,
68
+ description: str | None = None,
69
+ ) -> None:
70
+ """Initialize a FlowDocument with raw bytes content.
71
+
72
+ Important:
73
+ **Most users should use the `create` classmethod instead of __init__.**
74
+ The create method provides automatic content conversion for various types
75
+ (str, dict, list, Pydantic models) while __init__ only accepts bytes.
76
+
77
+ Prevents direct instantiation of the abstract FlowDocument class.
78
+ FlowDocument must be subclassed for specific document types.
79
+
80
+ Args:
81
+ name: Document filename (required, keyword-only)
82
+ content: Document content as raw bytes (required, keyword-only)
83
+ description: Optional human-readable description (keyword-only)
84
+
85
+ Raises:
86
+ TypeError: If attempting to instantiate FlowDocument directly
87
+ instead of using a concrete subclass.
88
+
89
+ Example:
90
+ >>> from enum import StrEnum
91
+ >>>
92
+ >>> # Simple subclass:
93
+ >>> class MyFlowDoc(FlowDocument):
94
+ ... pass
95
+ >>>
96
+ >>> # With FILES restriction:
97
+ >>> class RestrictedDoc(FlowDocument):
98
+ ... class FILES(StrEnum):
99
+ ... DATA = "data.json"
100
+ ... METADATA = "metadata.yaml"
101
+ >>>
102
+ >>> # Direct constructor - only for bytes:
103
+ >>> doc = MyFlowDoc(name="test.bin", content=b"raw data")
104
+ >>>
105
+ >>> # RECOMMENDED - use create for automatic conversion:
106
+ >>> doc = RestrictedDoc.create(name="data.json", content={"key": "value"})
107
+ >>> # This would raise DocumentNameError:
108
+ >>> # doc = RestrictedDoc.create(name="other.json", content={})
109
+ """
20
110
  if type(self) is FlowDocument:
21
111
  raise TypeError("Cannot instantiate abstract FlowDocument class directly")
22
- super().__init__(**data)
112
+ super().__init__(name=name, content=content, description=description)
23
113
 
24
114
  @final
25
115
  def get_base_type(self) -> Literal["flow"]:
26
- """Get the document type."""
116
+ """Return the base type identifier for flow documents.
117
+
118
+ This method is final and cannot be overridden by subclasses.
119
+ It identifies this document as a flow-persistent document.
120
+
121
+ Returns:
122
+ "flow" - Indicates this document persists across flow runs.
123
+
124
+ Note:
125
+ This determines the document's lifecycle and persistence behavior
126
+ in the pipeline system.
127
+ """
27
128
  return "flow"
@@ -1,4 +1,10 @@
1
- """MIME type detection utilities for documents"""
1
+ """@internal MIME type detection utilities for documents.
2
+
3
+ This module provides functions for detecting and validating MIME types
4
+ from document content and filenames. It uses a hybrid approach combining
5
+ extension-based detection for known formats and content analysis via
6
+ python-magic for unknown files.
7
+ """
2
8
 
3
9
  import magic
4
10
 
@@ -34,15 +40,45 @@ EXTENSION_MIME_MAP = {
34
40
 
35
41
 
36
42
  def detect_mime_type(content: bytes, name: str) -> str:
37
- """Detect MIME type from content and filename
43
+ r"""Detect MIME type from document content and filename.
38
44
 
39
- Uses a hybrid approach:
40
- 1. Check for empty content
41
- 2. Try extension-based detection for known formats
42
- 3. Fall back to magic content detection
43
- 4. Final fallback to application/octet-stream
44
- """
45
+ Uses a multi-stage detection strategy for maximum accuracy:
46
+ 1. Returns 'application/x-empty' for empty content
47
+ 2. Uses extension-based detection for known formats (most reliable)
48
+ 3. Falls back to python-magic content analysis
49
+ 4. Final fallback to extension or 'application/octet-stream'
50
+
51
+ Args:
52
+ content: Document content as bytes.
53
+ name: Filename with extension.
54
+
55
+ Returns:
56
+ MIME type string (e.g., 'text/plain', 'application/json').
57
+ Never returns None or empty string.
58
+
59
+ Fallback behavior:
60
+ - Empty content: 'application/x-empty'
61
+ - Unknown extension with binary content: 'application/octet-stream'
62
+ - Magic library failure: Falls back to extension or 'application/octet-stream'
45
63
 
64
+ Performance:
65
+ Only the first 1024 bytes are analyzed for content detection.
66
+ Extension-based detection is O(1) lookup.
67
+
68
+ Note:
69
+ Extension-based detection is preferred for text formats as
70
+ content analysis can sometimes misidentify structured text.
71
+
72
+ Example:
73
+ >>> detect_mime_type(b'{"key": "value"}', "data.json")
74
+ 'application/json'
75
+ >>> detect_mime_type(b'Hello World', "text.txt")
76
+ 'text/plain'
77
+ >>> detect_mime_type(b'', "empty.txt")
78
+ 'application/x-empty'
79
+ >>> detect_mime_type(b'\\x89PNG', "image.xyz")
80
+ 'image/png' # Magic detects PNG despite wrong extension
81
+ """
46
82
  # Check for empty content
47
83
  if len(content) == 0:
48
84
  return "application/x-empty"
@@ -69,16 +105,60 @@ def detect_mime_type(content: bytes, name: str) -> str:
69
105
 
70
106
 
71
107
  def mime_type_from_extension(name: str) -> str:
72
- """Get MIME type based on file extension
108
+ """Get MIME type based solely on file extension.
109
+
110
+ Simple extension-based MIME type detection without content analysis.
111
+ This is a legacy function maintained for backward compatibility.
112
+
113
+ Args:
114
+ name: Filename with extension.
73
115
 
74
- Legacy function kept for compatibility
116
+ Returns:
117
+ MIME type based on extension, or 'application/octet-stream'
118
+ if extension is unknown.
119
+
120
+ Note:
121
+ Prefer detect_mime_type() for more accurate detection.
122
+ This function only checks the file extension.
123
+
124
+ Example:
125
+ >>> mime_type_from_extension("document.pdf")
126
+ 'application/pdf'
127
+ >>> mime_type_from_extension("unknown.xyz")
128
+ 'application/octet-stream'
75
129
  """
76
130
  ext = name.lower().split(".")[-1] if "." in name else ""
77
131
  return EXTENSION_MIME_MAP.get(ext, "application/octet-stream")
78
132
 
79
133
 
80
134
  def is_text_mime_type(mime_type: str) -> bool:
81
- """Check if MIME type represents text content"""
135
+ """Check if MIME type represents text-based content.
136
+
137
+ Determines if content can be safely decoded as text.
138
+ Includes common text formats and structured text like JSON/YAML.
139
+
140
+ Args:
141
+ mime_type: MIME type string to check.
142
+
143
+ Returns:
144
+ True if MIME type indicates text content, False otherwise.
145
+
146
+ Recognized as text:
147
+ - Any type starting with 'text/'
148
+ - application/json
149
+ - application/xml
150
+ - application/javascript
151
+ - application/yaml
152
+ - application/x-yaml
153
+
154
+ Example:
155
+ >>> is_text_mime_type('text/plain')
156
+ True
157
+ >>> is_text_mime_type('application/json')
158
+ True
159
+ >>> is_text_mime_type('image/png')
160
+ False
161
+ """
82
162
  text_types = [
83
163
  "text/",
84
164
  "application/json",
@@ -91,20 +171,98 @@ def is_text_mime_type(mime_type: str) -> bool:
91
171
 
92
172
 
93
173
  def is_json_mime_type(mime_type: str) -> bool:
94
- """Check if MIME type is JSON"""
174
+ """Check if MIME type is JSON.
175
+
176
+ Args:
177
+ mime_type: MIME type string to check.
178
+
179
+ Returns:
180
+ True if MIME type is 'application/json', False otherwise.
181
+
182
+ Note:
183
+ Only matches exact 'application/json', not variants like
184
+ 'application/ld+json' or 'application/vnd.api+json'.
185
+
186
+ Example:
187
+ >>> is_json_mime_type('application/json')
188
+ True
189
+ >>> is_json_mime_type('text/json') # Not standard JSON MIME
190
+ False
191
+ """
95
192
  return mime_type == "application/json"
96
193
 
97
194
 
98
195
  def is_yaml_mime_type(mime_type: str) -> bool:
99
- """Check if MIME type is YAML"""
196
+ """Check if MIME type is YAML.
197
+
198
+ Recognizes both standard YAML MIME types.
199
+
200
+ Args:
201
+ mime_type: MIME type string to check.
202
+
203
+ Returns:
204
+ True if MIME type is YAML, False otherwise.
205
+
206
+ Recognized types:
207
+ - application/yaml (standard)
208
+ - application/x-yaml (legacy)
209
+
210
+ Example:
211
+ >>> is_yaml_mime_type('application/yaml')
212
+ True
213
+ >>> is_yaml_mime_type('application/x-yaml')
214
+ True
215
+ """
100
216
  return mime_type == "application/yaml" or mime_type == "application/x-yaml"
101
217
 
102
218
 
103
219
  def is_pdf_mime_type(mime_type: str) -> bool:
104
- """Check if MIME type is PDF"""
220
+ """Check if MIME type is PDF.
221
+
222
+ Args:
223
+ mime_type: MIME type string to check.
224
+
225
+ Returns:
226
+ True if MIME type is 'application/pdf', False otherwise.
227
+
228
+ Note:
229
+ PDF documents require special handling in the LLM module
230
+ and are supported by certain vision-capable models.
231
+
232
+ Example:
233
+ >>> is_pdf_mime_type('application/pdf')
234
+ True
235
+ >>> is_pdf_mime_type('text/plain')
236
+ False
237
+ """
105
238
  return mime_type == "application/pdf"
106
239
 
107
240
 
108
241
  def is_image_mime_type(mime_type: str) -> bool:
109
- """Check if MIME type is an image"""
242
+ """Check if MIME type represents an image.
243
+
244
+ Args:
245
+ mime_type: MIME type string to check.
246
+
247
+ Returns:
248
+ True if MIME type starts with 'image/', False otherwise.
249
+
250
+ Recognized formats:
251
+ Any MIME type starting with 'image/' including:
252
+ - image/png
253
+ - image/jpeg
254
+ - image/gif
255
+ - image/webp
256
+ - image/svg+xml
257
+
258
+ Note:
259
+ Image documents are automatically encoded for vision-capable
260
+ LLM models in the AIMessages.document_to_prompt() method.
261
+
262
+ Example:
263
+ >>> is_image_mime_type('image/png')
264
+ True
265
+ >>> is_image_mime_type('application/pdf')
266
+ False
267
+ """
110
268
  return mime_type.startswith("image/")