ai-pipeline-core 0.2.6__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +78 -125
- ai_pipeline_core/deployment/__init__.py +34 -0
- ai_pipeline_core/deployment/base.py +861 -0
- ai_pipeline_core/deployment/contract.py +80 -0
- ai_pipeline_core/deployment/deploy.py +561 -0
- ai_pipeline_core/deployment/helpers.py +97 -0
- ai_pipeline_core/deployment/progress.py +126 -0
- ai_pipeline_core/deployment/remote.py +116 -0
- ai_pipeline_core/docs_generator/__init__.py +54 -0
- ai_pipeline_core/docs_generator/__main__.py +5 -0
- ai_pipeline_core/docs_generator/cli.py +196 -0
- ai_pipeline_core/docs_generator/extractor.py +324 -0
- ai_pipeline_core/docs_generator/guide_builder.py +644 -0
- ai_pipeline_core/docs_generator/trimmer.py +35 -0
- ai_pipeline_core/docs_generator/validator.py +114 -0
- ai_pipeline_core/document_store/__init__.py +13 -0
- ai_pipeline_core/document_store/_summary.py +9 -0
- ai_pipeline_core/document_store/_summary_worker.py +170 -0
- ai_pipeline_core/document_store/clickhouse.py +492 -0
- ai_pipeline_core/document_store/factory.py +38 -0
- ai_pipeline_core/document_store/local.py +312 -0
- ai_pipeline_core/document_store/memory.py +85 -0
- ai_pipeline_core/document_store/protocol.py +68 -0
- ai_pipeline_core/documents/__init__.py +12 -14
- ai_pipeline_core/documents/_context_vars.py +85 -0
- ai_pipeline_core/documents/_hashing.py +52 -0
- ai_pipeline_core/documents/attachment.py +85 -0
- ai_pipeline_core/documents/context.py +128 -0
- ai_pipeline_core/documents/document.py +318 -1434
- ai_pipeline_core/documents/mime_type.py +37 -82
- ai_pipeline_core/documents/utils.py +4 -12
- ai_pipeline_core/exceptions.py +10 -62
- ai_pipeline_core/images/__init__.py +309 -0
- ai_pipeline_core/images/_processing.py +151 -0
- ai_pipeline_core/llm/__init__.py +6 -4
- ai_pipeline_core/llm/ai_messages.py +130 -81
- ai_pipeline_core/llm/client.py +327 -193
- ai_pipeline_core/llm/model_options.py +14 -86
- ai_pipeline_core/llm/model_response.py +60 -103
- ai_pipeline_core/llm/model_types.py +16 -34
- ai_pipeline_core/logging/__init__.py +2 -7
- ai_pipeline_core/logging/logging.yml +1 -1
- ai_pipeline_core/logging/logging_config.py +27 -37
- ai_pipeline_core/logging/logging_mixin.py +15 -41
- ai_pipeline_core/observability/__init__.py +32 -0
- ai_pipeline_core/observability/_debug/__init__.py +30 -0
- ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
- ai_pipeline_core/observability/_debug/_config.py +95 -0
- ai_pipeline_core/observability/_debug/_content.py +764 -0
- ai_pipeline_core/observability/_debug/_processor.py +98 -0
- ai_pipeline_core/observability/_debug/_summary.py +312 -0
- ai_pipeline_core/observability/_debug/_types.py +75 -0
- ai_pipeline_core/observability/_debug/_writer.py +843 -0
- ai_pipeline_core/observability/_document_tracking.py +146 -0
- ai_pipeline_core/observability/_initialization.py +194 -0
- ai_pipeline_core/observability/_logging_bridge.py +57 -0
- ai_pipeline_core/observability/_summary.py +81 -0
- ai_pipeline_core/observability/_tracking/__init__.py +6 -0
- ai_pipeline_core/observability/_tracking/_client.py +178 -0
- ai_pipeline_core/observability/_tracking/_internal.py +28 -0
- ai_pipeline_core/observability/_tracking/_models.py +138 -0
- ai_pipeline_core/observability/_tracking/_processor.py +158 -0
- ai_pipeline_core/observability/_tracking/_service.py +311 -0
- ai_pipeline_core/observability/_tracking/_writer.py +229 -0
- ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -283
- ai_pipeline_core/pipeline/__init__.py +10 -0
- ai_pipeline_core/pipeline/decorators.py +915 -0
- ai_pipeline_core/pipeline/options.py +16 -0
- ai_pipeline_core/prompt_manager.py +16 -102
- ai_pipeline_core/settings.py +26 -31
- ai_pipeline_core/testing.py +9 -0
- ai_pipeline_core-0.4.1.dist-info/METADATA +807 -0
- ai_pipeline_core-0.4.1.dist-info/RECORD +76 -0
- {ai_pipeline_core-0.2.6.dist-info → ai_pipeline_core-0.4.1.dist-info}/WHEEL +1 -1
- ai_pipeline_core/documents/document_list.py +0 -420
- ai_pipeline_core/documents/flow_document.py +0 -112
- ai_pipeline_core/documents/task_document.py +0 -117
- ai_pipeline_core/documents/temporary_document.py +0 -74
- ai_pipeline_core/flow/__init__.py +0 -9
- ai_pipeline_core/flow/config.py +0 -483
- ai_pipeline_core/flow/options.py +0 -75
- ai_pipeline_core/pipeline.py +0 -718
- ai_pipeline_core/prefect.py +0 -63
- ai_pipeline_core/simple_runner/__init__.py +0 -14
- ai_pipeline_core/simple_runner/cli.py +0 -254
- ai_pipeline_core/simple_runner/simple_runner.py +0 -247
- ai_pipeline_core/storage/__init__.py +0 -8
- ai_pipeline_core/storage/storage.py +0 -628
- ai_pipeline_core/utils/__init__.py +0 -8
- ai_pipeline_core/utils/deploy.py +0 -373
- ai_pipeline_core/utils/remote_deployment.py +0 -269
- ai_pipeline_core-0.2.6.dist-info/METADATA +0 -500
- ai_pipeline_core-0.2.6.dist-info/RECORD +0 -41
- {ai_pipeline_core-0.2.6.dist-info → ai_pipeline_core-0.4.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -24,6 +24,8 @@ EXTENSION_MIME_MAP = {
|
|
|
24
24
|
"gif": "image/gif",
|
|
25
25
|
"bmp": "image/bmp",
|
|
26
26
|
"webp": "image/webp",
|
|
27
|
+
"heic": "image/heic",
|
|
28
|
+
"heif": "image/heif",
|
|
27
29
|
"json": "application/json",
|
|
28
30
|
"yaml": "application/yaml",
|
|
29
31
|
"yml": "application/yaml",
|
|
@@ -65,19 +67,8 @@ def detect_mime_type(content: bytes, name: str) -> str:
|
|
|
65
67
|
Only the first 1024 bytes are analyzed for content detection.
|
|
66
68
|
Extension-based detection is O(1) lookup.
|
|
67
69
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
content analysis can sometimes misidentify structured text.
|
|
71
|
-
|
|
72
|
-
Example:
|
|
73
|
-
>>> detect_mime_type(b'{"key": "value"}', "data.json")
|
|
74
|
-
'application/json'
|
|
75
|
-
>>> detect_mime_type(b'Hello World', "text.txt")
|
|
76
|
-
'text/plain'
|
|
77
|
-
>>> detect_mime_type(b'', "empty.txt")
|
|
78
|
-
'text/plain'
|
|
79
|
-
>>> detect_mime_type(b'\\x89PNG', "image.xyz")
|
|
80
|
-
'image/png' # Magic detects PNG despite wrong extension
|
|
70
|
+
Extension-based detection is preferred for text formats as
|
|
71
|
+
content analysis can sometimes misidentify structured text.
|
|
81
72
|
"""
|
|
82
73
|
# Check for empty content
|
|
83
74
|
if len(content) == 0:
|
|
@@ -97,40 +88,13 @@ def detect_mime_type(content: bytes, name: str) -> str:
|
|
|
97
88
|
return mime
|
|
98
89
|
except (AttributeError, OSError, magic.MagicException) as e:
|
|
99
90
|
logger.warning(f"MIME detection failed for {name}: {e}")
|
|
100
|
-
except Exception
|
|
101
|
-
logger.
|
|
91
|
+
except Exception:
|
|
92
|
+
logger.exception(f"Unexpected error in MIME detection for {name}")
|
|
102
93
|
|
|
103
94
|
# Final fallback based on extension or default
|
|
104
95
|
return EXTENSION_MIME_MAP.get(ext, "application/octet-stream")
|
|
105
96
|
|
|
106
97
|
|
|
107
|
-
def mime_type_from_extension(name: str) -> str:
|
|
108
|
-
"""Get MIME type based solely on file extension.
|
|
109
|
-
|
|
110
|
-
Simple extension-based MIME type detection without content analysis.
|
|
111
|
-
This is a legacy function maintained for backward compatibility.
|
|
112
|
-
|
|
113
|
-
Args:
|
|
114
|
-
name: Filename with extension.
|
|
115
|
-
|
|
116
|
-
Returns:
|
|
117
|
-
MIME type based on extension, or 'application/octet-stream'
|
|
118
|
-
if extension is unknown.
|
|
119
|
-
|
|
120
|
-
Note:
|
|
121
|
-
Prefer detect_mime_type() for more accurate detection.
|
|
122
|
-
This function only checks the file extension.
|
|
123
|
-
|
|
124
|
-
Example:
|
|
125
|
-
>>> mime_type_from_extension("document.pdf")
|
|
126
|
-
'application/pdf'
|
|
127
|
-
>>> mime_type_from_extension("unknown.xyz")
|
|
128
|
-
'application/octet-stream'
|
|
129
|
-
"""
|
|
130
|
-
ext = name.lower().split(".")[-1] if "." in name else ""
|
|
131
|
-
return EXTENSION_MIME_MAP.get(ext, "application/octet-stream")
|
|
132
|
-
|
|
133
|
-
|
|
134
98
|
def is_text_mime_type(mime_type: str) -> bool:
|
|
135
99
|
"""Check if MIME type represents text-based content.
|
|
136
100
|
|
|
@@ -151,13 +115,6 @@ def is_text_mime_type(mime_type: str) -> bool:
|
|
|
151
115
|
- application/yaml
|
|
152
116
|
- application/x-yaml
|
|
153
117
|
|
|
154
|
-
Example:
|
|
155
|
-
>>> is_text_mime_type('text/plain')
|
|
156
|
-
True
|
|
157
|
-
>>> is_text_mime_type('application/json')
|
|
158
|
-
True
|
|
159
|
-
>>> is_text_mime_type('image/png')
|
|
160
|
-
False
|
|
161
118
|
"""
|
|
162
119
|
text_types = [
|
|
163
120
|
"text/",
|
|
@@ -179,15 +136,8 @@ def is_json_mime_type(mime_type: str) -> bool:
|
|
|
179
136
|
Returns:
|
|
180
137
|
True if MIME type is 'application/json', False otherwise.
|
|
181
138
|
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
'application/ld+json' or 'application/vnd.api+json'.
|
|
185
|
-
|
|
186
|
-
Example:
|
|
187
|
-
>>> is_json_mime_type('application/json')
|
|
188
|
-
True
|
|
189
|
-
>>> is_json_mime_type('text/json') # Not standard JSON MIME
|
|
190
|
-
False
|
|
139
|
+
Only matches exact 'application/json', not variants like
|
|
140
|
+
'application/ld+json' or 'application/vnd.api+json'.
|
|
191
141
|
"""
|
|
192
142
|
return mime_type == "application/json"
|
|
193
143
|
|
|
@@ -207,13 +157,8 @@ def is_yaml_mime_type(mime_type: str) -> bool:
|
|
|
207
157
|
- application/yaml (standard)
|
|
208
158
|
- application/x-yaml (legacy)
|
|
209
159
|
|
|
210
|
-
Example:
|
|
211
|
-
>>> is_yaml_mime_type('application/yaml')
|
|
212
|
-
True
|
|
213
|
-
>>> is_yaml_mime_type('application/x-yaml')
|
|
214
|
-
True
|
|
215
160
|
"""
|
|
216
|
-
return mime_type
|
|
161
|
+
return mime_type in {"application/yaml", "application/x-yaml"}
|
|
217
162
|
|
|
218
163
|
|
|
219
164
|
def is_pdf_mime_type(mime_type: str) -> bool:
|
|
@@ -225,15 +170,8 @@ def is_pdf_mime_type(mime_type: str) -> bool:
|
|
|
225
170
|
Returns:
|
|
226
171
|
True if MIME type is 'application/pdf', False otherwise.
|
|
227
172
|
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
and are supported by certain vision-capable models.
|
|
231
|
-
|
|
232
|
-
Example:
|
|
233
|
-
>>> is_pdf_mime_type('application/pdf')
|
|
234
|
-
True
|
|
235
|
-
>>> is_pdf_mime_type('text/plain')
|
|
236
|
-
False
|
|
173
|
+
PDF documents require special handling in the LLM module
|
|
174
|
+
and are supported by certain vision-capable models.
|
|
237
175
|
"""
|
|
238
176
|
return mime_type == "application/pdf"
|
|
239
177
|
|
|
@@ -255,14 +193,31 @@ def is_image_mime_type(mime_type: str) -> bool:
|
|
|
255
193
|
- image/webp
|
|
256
194
|
- image/svg+xml
|
|
257
195
|
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
LLM models in the AIMessages.document_to_prompt() method.
|
|
261
|
-
|
|
262
|
-
Example:
|
|
263
|
-
>>> is_image_mime_type('image/png')
|
|
264
|
-
True
|
|
265
|
-
>>> is_image_mime_type('application/pdf')
|
|
266
|
-
False
|
|
196
|
+
Image documents are automatically encoded for vision-capable
|
|
197
|
+
LLM models in the AIMessages.document_to_prompt() method.
|
|
267
198
|
"""
|
|
268
199
|
return mime_type.startswith("image/")
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
LLM_SUPPORTED_IMAGE_MIME_TYPES: frozenset[str] = frozenset({
|
|
203
|
+
"image/png",
|
|
204
|
+
"image/jpeg",
|
|
205
|
+
"image/webp",
|
|
206
|
+
"image/heic",
|
|
207
|
+
"image/heif",
|
|
208
|
+
})
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def is_llm_supported_image(mime_type: str) -> bool:
|
|
212
|
+
"""Check if MIME type is an image format directly supported by LLMs.
|
|
213
|
+
|
|
214
|
+
Unsupported image formats (gif, bmp, tiff, svg, etc.) need conversion
|
|
215
|
+
to PNG before sending to the LLM.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
mime_type: MIME type string to check.
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
True if the image format is natively supported by LLMs.
|
|
222
|
+
"""
|
|
223
|
+
return mime_type in LLM_SUPPORTED_IMAGE_MIME_TYPES
|
|
@@ -5,15 +5,14 @@ canonical key generation, and hash validation used throughout the document syste
|
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
import re
|
|
8
|
-
from
|
|
8
|
+
from collections.abc import Iterable
|
|
9
|
+
from typing import Any
|
|
9
10
|
from urllib.parse import urlparse
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
def sanitize_url(url: str) -> str:
|
|
13
14
|
"""Sanitize URL or query string for use in filenames.
|
|
14
15
|
|
|
15
|
-
@public
|
|
16
|
-
|
|
17
16
|
Removes or replaces characters that are invalid in filenames.
|
|
18
17
|
|
|
19
18
|
Args:
|
|
@@ -63,15 +62,13 @@ def camel_to_snake(name: str) -> str:
|
|
|
63
62
|
|
|
64
63
|
|
|
65
64
|
def canonical_name_key(
|
|
66
|
-
obj_or_name:
|
|
65
|
+
obj_or_name: type[Any] | str,
|
|
67
66
|
*,
|
|
68
67
|
max_parent_suffixes: int = 3,
|
|
69
68
|
extra_suffixes: Iterable[str] = (),
|
|
70
69
|
) -> str:
|
|
71
70
|
"""Produce a canonical snake_case key from a class or name.
|
|
72
71
|
|
|
73
|
-
@public
|
|
74
|
-
|
|
75
72
|
Process:
|
|
76
73
|
1) Starting with the class name (or given string),
|
|
77
74
|
2) Stripping any trailing parent class names (up to `max_parent_suffixes` from the MRO),
|
|
@@ -120,8 +117,6 @@ def canonical_name_key(
|
|
|
120
117
|
def is_document_sha256(value: str) -> bool:
|
|
121
118
|
"""Check if a string is a valid base32-encoded SHA256 hash with proper entropy.
|
|
122
119
|
|
|
123
|
-
@public
|
|
124
|
-
|
|
125
120
|
This function validates that a string is not just formatted like a SHA256 hash,
|
|
126
121
|
but actually has the entropy characteristics of a real hash. It checks:
|
|
127
122
|
1. Correct length (52 characters without padding)
|
|
@@ -174,7 +169,4 @@ def is_document_sha256(value: str) -> bool:
|
|
|
174
169
|
# Require at least 8 unique characters (out of 32 possible in base32)
|
|
175
170
|
# This prevents patterns like "AAAAAAA..." from being identified as real hashes
|
|
176
171
|
unique_chars = len(set(value))
|
|
177
|
-
|
|
178
|
-
return False
|
|
179
|
-
|
|
180
|
-
return True
|
|
172
|
+
return unique_chars >= 8
|
ai_pipeline_core/exceptions.py
CHANGED
|
@@ -1,97 +1,45 @@
|
|
|
1
1
|
"""Exception hierarchy for AI Pipeline Core.
|
|
2
2
|
|
|
3
|
-
@public
|
|
4
|
-
|
|
5
3
|
This module defines the exception hierarchy used throughout the AI Pipeline Core library.
|
|
6
4
|
All exceptions inherit from PipelineCoreError, providing a consistent error handling interface.
|
|
7
5
|
"""
|
|
8
6
|
|
|
9
7
|
|
|
10
8
|
class PipelineCoreError(Exception):
|
|
11
|
-
"""Base exception for all AI Pipeline Core errors.
|
|
12
|
-
|
|
13
|
-
@public
|
|
14
|
-
"""
|
|
15
|
-
|
|
16
|
-
pass
|
|
9
|
+
"""Base exception for all AI Pipeline Core errors."""
|
|
17
10
|
|
|
18
11
|
|
|
19
12
|
class DocumentError(PipelineCoreError):
|
|
20
|
-
"""Base exception for document-related errors.
|
|
21
|
-
|
|
22
|
-
@public
|
|
23
|
-
"""
|
|
24
|
-
|
|
25
|
-
pass
|
|
13
|
+
"""Base exception for document-related errors."""
|
|
26
14
|
|
|
27
15
|
|
|
28
16
|
class DocumentValidationError(DocumentError):
|
|
29
|
-
"""Raised when document validation fails.
|
|
30
|
-
|
|
31
|
-
@public
|
|
32
|
-
"""
|
|
33
|
-
|
|
34
|
-
pass
|
|
17
|
+
"""Raised when document validation fails."""
|
|
35
18
|
|
|
36
19
|
|
|
37
20
|
class DocumentSizeError(DocumentValidationError):
|
|
38
|
-
"""Raised when document content exceeds MAX_CONTENT_SIZE limit.
|
|
39
|
-
|
|
40
|
-
@public
|
|
41
|
-
"""
|
|
42
|
-
|
|
43
|
-
pass
|
|
21
|
+
"""Raised when document content exceeds MAX_CONTENT_SIZE limit."""
|
|
44
22
|
|
|
45
23
|
|
|
46
24
|
class DocumentNameError(DocumentValidationError):
|
|
47
|
-
"""Raised when document name contains invalid characters or patterns.
|
|
48
|
-
|
|
49
|
-
@public
|
|
50
|
-
"""
|
|
51
|
-
|
|
52
|
-
pass
|
|
25
|
+
"""Raised when document name contains invalid characters or patterns."""
|
|
53
26
|
|
|
54
27
|
|
|
55
28
|
class LLMError(PipelineCoreError):
|
|
56
|
-
"""Raised when LLM generation fails after all retries.
|
|
57
|
-
|
|
58
|
-
@public
|
|
59
|
-
"""
|
|
60
|
-
|
|
61
|
-
pass
|
|
29
|
+
"""Raised when LLM generation fails after all retries."""
|
|
62
30
|
|
|
63
31
|
|
|
64
32
|
class PromptError(PipelineCoreError):
|
|
65
|
-
"""Base exception for prompt template errors.
|
|
66
|
-
|
|
67
|
-
@public
|
|
68
|
-
"""
|
|
69
|
-
|
|
70
|
-
pass
|
|
33
|
+
"""Base exception for prompt template errors."""
|
|
71
34
|
|
|
72
35
|
|
|
73
36
|
class PromptRenderError(PromptError):
|
|
74
|
-
"""Raised when Jinja2 template rendering fails.
|
|
75
|
-
|
|
76
|
-
@public
|
|
77
|
-
"""
|
|
78
|
-
|
|
79
|
-
pass
|
|
37
|
+
"""Raised when Jinja2 template rendering fails."""
|
|
80
38
|
|
|
81
39
|
|
|
82
40
|
class PromptNotFoundError(PromptError):
|
|
83
|
-
"""Raised when prompt template file is not found in search paths.
|
|
84
|
-
|
|
85
|
-
@public
|
|
86
|
-
"""
|
|
87
|
-
|
|
88
|
-
pass
|
|
41
|
+
"""Raised when prompt template file is not found in search paths."""
|
|
89
42
|
|
|
90
43
|
|
|
91
44
|
class MimeTypeError(DocumentError):
|
|
92
|
-
"""Raised when MIME type detection or validation fails.
|
|
93
|
-
|
|
94
|
-
@public
|
|
95
|
-
"""
|
|
96
|
-
|
|
97
|
-
pass
|
|
45
|
+
"""Raised when MIME type detection or validation fails."""
|
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
"""Image processing utilities for LLM vision models.
|
|
2
|
+
|
|
3
|
+
Splits large images, compresses to JPEG, and respects model-specific constraints.
|
|
4
|
+
Designed for website screenshots, document pages, and other visual content
|
|
5
|
+
sent to vision-capable LLMs.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from enum import StrEnum
|
|
9
|
+
|
|
10
|
+
from pydantic import BaseModel, Field
|
|
11
|
+
|
|
12
|
+
from ai_pipeline_core.documents import Document
|
|
13
|
+
|
|
14
|
+
from ._processing import execute_split, load_and_normalize, plan_split
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"ImageDocument",
|
|
18
|
+
"ImagePart",
|
|
19
|
+
"ImagePreset",
|
|
20
|
+
"ImageProcessingConfig",
|
|
21
|
+
"ImageProcessingError",
|
|
22
|
+
"ProcessedImage",
|
|
23
|
+
"process_image",
|
|
24
|
+
"process_image_to_documents",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class ImageDocument(Document): # noqa: RUF067
|
|
29
|
+
"""Concrete document for processed image parts."""
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# ---------------------------------------------------------------------------
|
|
33
|
+
# Configuration
|
|
34
|
+
# ---------------------------------------------------------------------------
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class ImagePreset(StrEnum): # noqa: RUF067
|
|
38
|
+
"""Presets for LLM vision model constraints."""
|
|
39
|
+
|
|
40
|
+
GEMINI = "gemini"
|
|
41
|
+
CLAUDE = "claude"
|
|
42
|
+
GPT4V = "gpt4v"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class ImageProcessingConfig(BaseModel): # noqa: RUF067
|
|
46
|
+
"""Configuration for image processing.
|
|
47
|
+
|
|
48
|
+
Use ``for_preset`` for standard configurations or construct directly for
|
|
49
|
+
custom constraints.
|
|
50
|
+
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
model_config = {"frozen": True}
|
|
54
|
+
|
|
55
|
+
max_dimension: int = Field(
|
|
56
|
+
default=3000,
|
|
57
|
+
ge=100,
|
|
58
|
+
le=8192,
|
|
59
|
+
description="Maximum width AND height in pixels",
|
|
60
|
+
)
|
|
61
|
+
max_pixels: int = Field(
|
|
62
|
+
default=9_000_000,
|
|
63
|
+
ge=10_000,
|
|
64
|
+
description="Maximum total pixels per output image part",
|
|
65
|
+
)
|
|
66
|
+
overlap_fraction: float = Field(
|
|
67
|
+
default=0.20,
|
|
68
|
+
ge=0.0,
|
|
69
|
+
le=0.5,
|
|
70
|
+
description="Overlap between adjacent vertical parts (0.0-0.5)",
|
|
71
|
+
)
|
|
72
|
+
max_parts: int = Field(
|
|
73
|
+
default=20,
|
|
74
|
+
ge=1,
|
|
75
|
+
le=100,
|
|
76
|
+
description="Maximum number of output image parts",
|
|
77
|
+
)
|
|
78
|
+
jpeg_quality: int = Field(
|
|
79
|
+
default=60,
|
|
80
|
+
ge=10,
|
|
81
|
+
le=95,
|
|
82
|
+
description="JPEG compression quality (10-95)",
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
@classmethod
|
|
86
|
+
def for_preset(cls, preset: ImagePreset) -> "ImageProcessingConfig":
|
|
87
|
+
"""Create configuration from a model preset."""
|
|
88
|
+
return _PRESETS[preset]
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
_PRESETS: dict[ImagePreset, ImageProcessingConfig] = { # noqa: RUF067
|
|
92
|
+
ImagePreset.GEMINI: ImageProcessingConfig(
|
|
93
|
+
max_dimension=3000,
|
|
94
|
+
max_pixels=9_000_000,
|
|
95
|
+
jpeg_quality=75,
|
|
96
|
+
),
|
|
97
|
+
ImagePreset.CLAUDE: ImageProcessingConfig(
|
|
98
|
+
max_dimension=1568,
|
|
99
|
+
max_pixels=1_150_000,
|
|
100
|
+
jpeg_quality=60,
|
|
101
|
+
),
|
|
102
|
+
ImagePreset.GPT4V: ImageProcessingConfig(
|
|
103
|
+
max_dimension=2048,
|
|
104
|
+
max_pixels=4_000_000,
|
|
105
|
+
jpeg_quality=70,
|
|
106
|
+
),
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
# ---------------------------------------------------------------------------
|
|
111
|
+
# Result models
|
|
112
|
+
# ---------------------------------------------------------------------------
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class ImagePart(BaseModel): # noqa: RUF067
|
|
116
|
+
"""A single processed image part."""
|
|
117
|
+
|
|
118
|
+
model_config = {"frozen": True}
|
|
119
|
+
|
|
120
|
+
data: bytes = Field(repr=False)
|
|
121
|
+
width: int
|
|
122
|
+
height: int
|
|
123
|
+
index: int = Field(ge=0, description="0-indexed position")
|
|
124
|
+
total: int = Field(ge=1, description="Total number of parts")
|
|
125
|
+
source_y: int = Field(ge=0, description="Y offset in original image")
|
|
126
|
+
source_height: int = Field(ge=1, description="Height of region in original")
|
|
127
|
+
|
|
128
|
+
@property
|
|
129
|
+
def label(self) -> str:
|
|
130
|
+
"""Human-readable label for LLM context, 1-indexed."""
|
|
131
|
+
if self.total == 1:
|
|
132
|
+
return "Full image"
|
|
133
|
+
return f"Part {self.index + 1}/{self.total}"
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class ProcessedImage(BaseModel): # noqa: RUF067
|
|
137
|
+
"""Result of image processing.
|
|
138
|
+
|
|
139
|
+
Iterable: ``for part in result`` iterates over parts.
|
|
140
|
+
"""
|
|
141
|
+
|
|
142
|
+
model_config = {"frozen": True}
|
|
143
|
+
|
|
144
|
+
parts: list[ImagePart]
|
|
145
|
+
original_width: int
|
|
146
|
+
original_height: int
|
|
147
|
+
original_bytes: int
|
|
148
|
+
output_bytes: int
|
|
149
|
+
was_trimmed: bool = Field(description="True if width was trimmed to fit")
|
|
150
|
+
warnings: list[str] = Field(default_factory=list)
|
|
151
|
+
|
|
152
|
+
@property
|
|
153
|
+
def compression_ratio(self) -> float:
|
|
154
|
+
"""Output size / input size (lower means more compression)."""
|
|
155
|
+
if self.original_bytes <= 0:
|
|
156
|
+
return 1.0
|
|
157
|
+
return self.output_bytes / self.original_bytes
|
|
158
|
+
|
|
159
|
+
def __len__(self) -> int:
|
|
160
|
+
return len(self.parts)
|
|
161
|
+
|
|
162
|
+
def __iter__(self): # type: ignore[override]
|
|
163
|
+
return iter(self.parts)
|
|
164
|
+
|
|
165
|
+
def __getitem__(self, idx: int) -> ImagePart:
|
|
166
|
+
return self.parts[idx]
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
# ---------------------------------------------------------------------------
|
|
170
|
+
# Exceptions
|
|
171
|
+
# ---------------------------------------------------------------------------
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
class ImageProcessingError(Exception): # noqa: RUF067
|
|
175
|
+
"""Image processing failed."""
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
# ---------------------------------------------------------------------------
|
|
179
|
+
# Public API
|
|
180
|
+
# ---------------------------------------------------------------------------
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def process_image( # noqa: RUF067
|
|
184
|
+
image: bytes | Document,
|
|
185
|
+
preset: ImagePreset = ImagePreset.GEMINI,
|
|
186
|
+
config: ImageProcessingConfig | None = None,
|
|
187
|
+
) -> ProcessedImage:
|
|
188
|
+
"""Process an image for LLM vision models.
|
|
189
|
+
|
|
190
|
+
Splits tall images vertically with overlap, trims width if needed, and
|
|
191
|
+
compresses to JPEG. The default preset is **GEMINI** (3 000 px, 9 M pixels).
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
image: Raw image bytes or a Document whose content is an image.
|
|
195
|
+
preset: Model preset (ignored when *config* is provided).
|
|
196
|
+
config: Custom configuration that overrides the preset.
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
A ``ProcessedImage`` containing one or more ``ImagePart`` objects.
|
|
200
|
+
|
|
201
|
+
Raises:
|
|
202
|
+
ImageProcessingError: If the image cannot be decoded or processed.
|
|
203
|
+
|
|
204
|
+
"""
|
|
205
|
+
effective = config if config is not None else ImageProcessingConfig.for_preset(preset)
|
|
206
|
+
|
|
207
|
+
# Resolve input bytes
|
|
208
|
+
raw: bytes
|
|
209
|
+
if isinstance(image, Document):
|
|
210
|
+
raw = image.content
|
|
211
|
+
elif isinstance(image, bytes): # type: ignore[reportUnnecessaryIsInstance]
|
|
212
|
+
raw = image
|
|
213
|
+
else:
|
|
214
|
+
raise ImageProcessingError(f"Unsupported image input type: {type(image)}") # pyright: ignore[reportUnreachable]
|
|
215
|
+
|
|
216
|
+
if not raw:
|
|
217
|
+
raise ImageProcessingError("Empty image data")
|
|
218
|
+
|
|
219
|
+
original_bytes = len(raw)
|
|
220
|
+
|
|
221
|
+
# Load & normalise
|
|
222
|
+
try:
|
|
223
|
+
img = load_and_normalize(raw)
|
|
224
|
+
except Exception as exc:
|
|
225
|
+
raise ImageProcessingError(f"Failed to decode image: {exc}") from exc
|
|
226
|
+
|
|
227
|
+
original_width, original_height = img.size
|
|
228
|
+
|
|
229
|
+
# Plan
|
|
230
|
+
plan = plan_split(
|
|
231
|
+
width=original_width,
|
|
232
|
+
height=original_height,
|
|
233
|
+
max_dimension=effective.max_dimension,
|
|
234
|
+
max_pixels=effective.max_pixels,
|
|
235
|
+
overlap_fraction=effective.overlap_fraction,
|
|
236
|
+
max_parts=effective.max_parts,
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
# Execute
|
|
240
|
+
raw_parts = execute_split(img, plan, effective.jpeg_quality)
|
|
241
|
+
|
|
242
|
+
# Build result
|
|
243
|
+
parts: list[ImagePart] = []
|
|
244
|
+
total = len(raw_parts)
|
|
245
|
+
total_output = 0
|
|
246
|
+
|
|
247
|
+
for idx, (data, w, h, sy, sh) in enumerate(raw_parts):
|
|
248
|
+
total_output += len(data)
|
|
249
|
+
parts.append(
|
|
250
|
+
ImagePart(
|
|
251
|
+
data=data,
|
|
252
|
+
width=w,
|
|
253
|
+
height=h,
|
|
254
|
+
index=idx,
|
|
255
|
+
total=total,
|
|
256
|
+
source_y=sy,
|
|
257
|
+
source_height=sh,
|
|
258
|
+
)
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
return ProcessedImage(
|
|
262
|
+
parts=parts,
|
|
263
|
+
original_width=original_width,
|
|
264
|
+
original_height=original_height,
|
|
265
|
+
original_bytes=original_bytes,
|
|
266
|
+
output_bytes=total_output,
|
|
267
|
+
was_trimmed=plan.trim_width is not None,
|
|
268
|
+
warnings=plan.warnings,
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def process_image_to_documents( # noqa: RUF067
|
|
273
|
+
image: bytes | Document,
|
|
274
|
+
preset: ImagePreset = ImagePreset.GEMINI,
|
|
275
|
+
config: ImageProcessingConfig | None = None,
|
|
276
|
+
name_prefix: str = "image",
|
|
277
|
+
sources: tuple[str, ...] | None = None,
|
|
278
|
+
) -> list[ImageDocument]:
|
|
279
|
+
"""Process an image and return parts as ImageDocument list.
|
|
280
|
+
|
|
281
|
+
Convenience wrapper around ``process_image`` for direct integration
|
|
282
|
+
with ``AIMessages``.
|
|
283
|
+
"""
|
|
284
|
+
result = process_image(image, preset=preset, config=config)
|
|
285
|
+
|
|
286
|
+
source_list: list[str] = list(sources or ())
|
|
287
|
+
if isinstance(image, Document):
|
|
288
|
+
source_list.append(image.sha256)
|
|
289
|
+
doc_sources = tuple(source_list) if source_list else None
|
|
290
|
+
|
|
291
|
+
documents: list[ImageDocument] = []
|
|
292
|
+
for part in result.parts:
|
|
293
|
+
if len(result.parts) == 1:
|
|
294
|
+
name = f"{name_prefix}.jpg"
|
|
295
|
+
desc = None
|
|
296
|
+
else:
|
|
297
|
+
name = f"{name_prefix}_{part.index + 1:02d}_of_{part.total:02d}.jpg"
|
|
298
|
+
desc = part.label
|
|
299
|
+
|
|
300
|
+
documents.append(
|
|
301
|
+
ImageDocument.create(
|
|
302
|
+
name=name,
|
|
303
|
+
content=part.data,
|
|
304
|
+
description=desc,
|
|
305
|
+
sources=doc_sources,
|
|
306
|
+
)
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
return documents
|