abstractcore 2.4.3__py3-none-any.whl → 2.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/cli/__init__.py +9 -0
- abstractcore/cli/main.py +759 -0
- abstractcore/cli/vision_config.py +491 -0
- abstractcore/media/handlers/__init__.py +16 -0
- abstractcore/media/handlers/anthropic_handler.py +326 -0
- abstractcore/media/handlers/local_handler.py +541 -0
- abstractcore/media/handlers/openai_handler.py +281 -0
- abstractcore/media/processors/__init__.py +13 -0
- abstractcore/media/processors/image_processor.py +610 -0
- abstractcore/media/processors/office_processor.py +490 -0
- abstractcore/media/processors/pdf_processor.py +485 -0
- abstractcore/media/processors/text_processor.py +557 -0
- abstractcore/media/utils/__init__.py +22 -0
- abstractcore/media/utils/image_scaler.py +306 -0
- abstractcore/providers/base.py +97 -0
- abstractcore/providers/huggingface_provider.py +17 -8
- abstractcore/utils/version.py +1 -1
- {abstractcore-2.4.3.dist-info → abstractcore-2.4.5.dist-info}/METADATA +1 -1
- {abstractcore-2.4.3.dist-info → abstractcore-2.4.5.dist-info}/RECORD +23 -9
- {abstractcore-2.4.3.dist-info → abstractcore-2.4.5.dist-info}/entry_points.txt +2 -0
- {abstractcore-2.4.3.dist-info → abstractcore-2.4.5.dist-info}/WHEEL +0 -0
- {abstractcore-2.4.3.dist-info → abstractcore-2.4.5.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.4.3.dist-info → abstractcore-2.4.5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,610 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Image processor for vision model support.
|
|
3
|
+
|
|
4
|
+
This module provides comprehensive image processing capabilities using PIL,
|
|
5
|
+
optimized for vision model inputs across different providers.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import base64
|
|
9
|
+
import io
|
|
10
|
+
import mimetypes
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Optional, Tuple, Dict, Any, Union
|
|
13
|
+
|
|
14
|
+
try:
|
|
15
|
+
from PIL import Image, ImageOps, ExifTags
|
|
16
|
+
PIL_AVAILABLE = True
|
|
17
|
+
except ImportError:
|
|
18
|
+
PIL_AVAILABLE = False
|
|
19
|
+
Image = None
|
|
20
|
+
ImageOps = None
|
|
21
|
+
ExifTags = None
|
|
22
|
+
|
|
23
|
+
from ..base import BaseMediaHandler, MediaProcessingError
|
|
24
|
+
from ..types import MediaContent, MediaType, ContentFormat
|
|
25
|
+
from ..utils.image_scaler import get_scaler, ScalingMode
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class ImageProcessor(BaseMediaHandler):
|
|
29
|
+
"""
|
|
30
|
+
Image processor using PIL for vision model support.
|
|
31
|
+
|
|
32
|
+
Handles image loading, preprocessing, format conversion, and optimization
|
|
33
|
+
for various vision models across different providers.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(self, **kwargs):
|
|
37
|
+
"""
|
|
38
|
+
Initialize the image processor.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
**kwargs: Configuration parameters including:
|
|
42
|
+
- max_resolution: Maximum image resolution (width, height)
|
|
43
|
+
- quality: JPEG quality (1-100)
|
|
44
|
+
- auto_rotate: Whether to auto-rotate based on EXIF
|
|
45
|
+
- resize_mode: How to resize ('fit', 'crop', 'stretch')
|
|
46
|
+
"""
|
|
47
|
+
if not PIL_AVAILABLE:
|
|
48
|
+
raise ImportError(
|
|
49
|
+
"PIL/Pillow is required for image processing. "
|
|
50
|
+
"Install with: pip install \"abstractcore[media]\""
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
super().__init__(**kwargs)
|
|
54
|
+
|
|
55
|
+
# Image processing configuration - Use maximum resolution for best quality
|
|
56
|
+
self.max_resolution = kwargs.get('max_resolution', (4096, 4096)) # Increased default for better quality
|
|
57
|
+
self.quality = kwargs.get('quality', 90) # Increased quality for better results
|
|
58
|
+
self.auto_rotate = kwargs.get('auto_rotate', True)
|
|
59
|
+
self.resize_mode = kwargs.get('resize_mode', 'fit') # 'fit', 'crop', 'stretch'
|
|
60
|
+
self.prefer_max_resolution = kwargs.get('prefer_max_resolution', True) # Always use max when possible
|
|
61
|
+
|
|
62
|
+
# Set capabilities for image processing
|
|
63
|
+
from ..types import MediaCapabilities
|
|
64
|
+
self.capabilities = MediaCapabilities(
|
|
65
|
+
vision_support=True,
|
|
66
|
+
audio_support=False,
|
|
67
|
+
video_support=False,
|
|
68
|
+
document_support=False,
|
|
69
|
+
supported_image_formats=['jpg', 'jpeg', 'png', 'gif', 'bmp', 'tif', 'tiff', 'webp'],
|
|
70
|
+
max_file_size=self.max_file_size
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
self.logger.debug(
|
|
74
|
+
f"Initialized ImageProcessor with max_resolution={self.max_resolution}, "
|
|
75
|
+
f"quality={self.quality}, auto_rotate={self.auto_rotate}, prefer_max_resolution={self.prefer_max_resolution}"
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
def _process_internal(self, file_path: Path, media_type: MediaType, **kwargs) -> MediaContent:
|
|
79
|
+
"""
|
|
80
|
+
Process an image file and return optimized content for vision models.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
file_path: Path to the image file
|
|
84
|
+
media_type: Detected media type (should be IMAGE)
|
|
85
|
+
**kwargs: Additional processing parameters:
|
|
86
|
+
- target_format: Target format ('png', 'jpeg', 'webp')
|
|
87
|
+
- max_resolution: Override default max resolution
|
|
88
|
+
- quality: Override default quality
|
|
89
|
+
- auto_rotate: Override default auto rotation
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
MediaContent with base64-encoded optimized image
|
|
93
|
+
|
|
94
|
+
Raises:
|
|
95
|
+
MediaProcessingError: If image processing fails
|
|
96
|
+
"""
|
|
97
|
+
if media_type != MediaType.IMAGE:
|
|
98
|
+
raise MediaProcessingError(f"ImageProcessor only handles images, got {media_type}")
|
|
99
|
+
|
|
100
|
+
try:
|
|
101
|
+
# Override defaults with kwargs
|
|
102
|
+
target_format = kwargs.get('target_format', 'jpeg')
|
|
103
|
+
model_name = kwargs.get('model_name', None)
|
|
104
|
+
|
|
105
|
+
# Use model-specific maximum resolution if available
|
|
106
|
+
if model_name and self.prefer_max_resolution:
|
|
107
|
+
max_resolution = self._get_model_max_resolution(model_name)
|
|
108
|
+
self.logger.debug(f"Using model-specific max resolution for {model_name}: {max_resolution}")
|
|
109
|
+
else:
|
|
110
|
+
max_resolution = kwargs.get('max_resolution', self.max_resolution)
|
|
111
|
+
|
|
112
|
+
quality = kwargs.get('quality', self.quality)
|
|
113
|
+
auto_rotate = kwargs.get('auto_rotate', self.auto_rotate)
|
|
114
|
+
|
|
115
|
+
# Load and process the image
|
|
116
|
+
with Image.open(file_path) as img:
|
|
117
|
+
# Auto-rotate based on EXIF data
|
|
118
|
+
if auto_rotate:
|
|
119
|
+
img = self._auto_rotate_image(img)
|
|
120
|
+
|
|
121
|
+
# Convert to RGB if necessary (for JPEG output)
|
|
122
|
+
if target_format.lower() in ['jpeg', 'jpg'] and img.mode in ['RGBA', 'P', 'LA']:
|
|
123
|
+
# Create white background for transparent images
|
|
124
|
+
background = Image.new('RGB', img.size, (255, 255, 255))
|
|
125
|
+
if img.mode == 'P':
|
|
126
|
+
img = img.convert('RGBA')
|
|
127
|
+
background.paste(img, mask=img.split()[-1] if img.mode == 'RGBA' else None)
|
|
128
|
+
img = background
|
|
129
|
+
|
|
130
|
+
# Resize if needed
|
|
131
|
+
if max_resolution and self._needs_resize(img.size, max_resolution):
|
|
132
|
+
img = self._resize_image(img, max_resolution)
|
|
133
|
+
|
|
134
|
+
# Optimize the image
|
|
135
|
+
img = self._optimize_image(img)
|
|
136
|
+
|
|
137
|
+
# Convert to base64
|
|
138
|
+
base64_content = self._image_to_base64(img, target_format, quality)
|
|
139
|
+
|
|
140
|
+
# Determine MIME type
|
|
141
|
+
mime_type = self._get_mime_type(target_format)
|
|
142
|
+
|
|
143
|
+
# Create metadata
|
|
144
|
+
metadata = {
|
|
145
|
+
'original_format': file_path.suffix.lower().lstrip('.'),
|
|
146
|
+
'target_format': target_format,
|
|
147
|
+
'original_size': img.size if hasattr(img, 'size') else None,
|
|
148
|
+
'final_size': img.size,
|
|
149
|
+
'color_mode': img.mode,
|
|
150
|
+
'quality': quality if target_format.lower() in ['jpeg', 'jpg'] else None,
|
|
151
|
+
'auto_rotated': auto_rotate,
|
|
152
|
+
'optimized': True
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
# Add EXIF data if available
|
|
156
|
+
if hasattr(img, '_getexif') and img._getexif():
|
|
157
|
+
metadata['has_exif'] = True
|
|
158
|
+
# Extract useful EXIF data
|
|
159
|
+
exif_data = self._extract_useful_exif(img)
|
|
160
|
+
if exif_data:
|
|
161
|
+
metadata['exif'] = exif_data
|
|
162
|
+
|
|
163
|
+
return self._create_media_content(
|
|
164
|
+
content=base64_content,
|
|
165
|
+
file_path=file_path,
|
|
166
|
+
media_type=MediaType.IMAGE,
|
|
167
|
+
content_format=ContentFormat.BASE64,
|
|
168
|
+
mime_type=mime_type,
|
|
169
|
+
**metadata
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
except Exception as e:
|
|
173
|
+
raise MediaProcessingError(f"Failed to process image {file_path}: {str(e)}") from e
|
|
174
|
+
|
|
175
|
+
def _auto_rotate_image(self, img: Image.Image) -> Image.Image:
|
|
176
|
+
"""
|
|
177
|
+
Auto-rotate image based on EXIF orientation data.
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
img: PIL Image object
|
|
181
|
+
|
|
182
|
+
Returns:
|
|
183
|
+
Rotated image
|
|
184
|
+
"""
|
|
185
|
+
try:
|
|
186
|
+
return ImageOps.exif_transpose(img)
|
|
187
|
+
except Exception:
|
|
188
|
+
# If auto-rotation fails, return original image
|
|
189
|
+
return img
|
|
190
|
+
|
|
191
|
+
def _get_model_max_resolution(self, model_name: Optional[str] = None) -> Tuple[int, int]:
|
|
192
|
+
"""
|
|
193
|
+
Get maximum resolution for a specific model or return default high resolution.
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
model_name: Name of the model to check capabilities for
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
Maximum resolution tuple (width, height)
|
|
200
|
+
"""
|
|
201
|
+
if not model_name or not self.prefer_max_resolution:
|
|
202
|
+
return self.max_resolution
|
|
203
|
+
|
|
204
|
+
try:
|
|
205
|
+
from ..capabilities import get_media_capabilities
|
|
206
|
+
caps = get_media_capabilities(model_name)
|
|
207
|
+
|
|
208
|
+
if hasattr(caps, 'image_resolutions') and caps.image_resolutions:
|
|
209
|
+
resolution_str = caps.image_resolutions
|
|
210
|
+
if isinstance(resolution_str, list) and resolution_str:
|
|
211
|
+
resolution_str = resolution_str[0]
|
|
212
|
+
else:
|
|
213
|
+
resolution_str = str(resolution_str)
|
|
214
|
+
|
|
215
|
+
# Parse resolution strings like "3584x3584", "56x56 to 3584x3584", "variable"
|
|
216
|
+
if "to" in resolution_str:
|
|
217
|
+
# Extract maximum from range like "56x56 to 3584x3584"
|
|
218
|
+
max_part = resolution_str.split("to")[-1].strip()
|
|
219
|
+
if "x" in max_part:
|
|
220
|
+
width, height = map(int, max_part.split("x"))
|
|
221
|
+
return (width, height)
|
|
222
|
+
elif "x" in resolution_str and "variable" not in resolution_str.lower():
|
|
223
|
+
# Parse direct resolution like "896x896"
|
|
224
|
+
width, height = map(int, resolution_str.split("x"))
|
|
225
|
+
return (width, height)
|
|
226
|
+
elif "variable" in resolution_str.lower():
|
|
227
|
+
# For variable resolution models, use a high default
|
|
228
|
+
return (4096, 4096)
|
|
229
|
+
|
|
230
|
+
except Exception as e:
|
|
231
|
+
self.logger.debug(f"Could not get model-specific resolution for {model_name}: {e}")
|
|
232
|
+
|
|
233
|
+
# Fallback to default high resolution
|
|
234
|
+
return self.max_resolution
|
|
235
|
+
|
|
236
|
+
def _needs_resize(self, current_size: Tuple[int, int], max_resolution: Tuple[int, int]) -> bool:
|
|
237
|
+
"""
|
|
238
|
+
Check if image needs resizing.
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
current_size: Current image size (width, height)
|
|
242
|
+
max_resolution: Maximum allowed resolution (width, height)
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
True if resizing is needed
|
|
246
|
+
"""
|
|
247
|
+
return current_size[0] > max_resolution[0] or current_size[1] > max_resolution[1]
|
|
248
|
+
|
|
249
|
+
def _resize_image(self, img: Image.Image, max_resolution: Tuple[int, int]) -> Image.Image:
|
|
250
|
+
"""
|
|
251
|
+
Resize image according to the specified mode.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
img: PIL Image object
|
|
255
|
+
max_resolution: Maximum allowed resolution (width, height)
|
|
256
|
+
|
|
257
|
+
Returns:
|
|
258
|
+
Resized image
|
|
259
|
+
"""
|
|
260
|
+
if self.resize_mode == 'fit':
|
|
261
|
+
# Maintain aspect ratio, fit within bounds
|
|
262
|
+
img.thumbnail(max_resolution, Image.Resampling.LANCZOS)
|
|
263
|
+
return img
|
|
264
|
+
elif self.resize_mode == 'crop':
|
|
265
|
+
# Maintain aspect ratio, crop to exact size
|
|
266
|
+
return ImageOps.fit(img, max_resolution, Image.Resampling.LANCZOS)
|
|
267
|
+
elif self.resize_mode == 'stretch':
|
|
268
|
+
# Stretch to exact size (may distort)
|
|
269
|
+
return img.resize(max_resolution, Image.Resampling.LANCZOS)
|
|
270
|
+
else:
|
|
271
|
+
# Default to fit
|
|
272
|
+
img.thumbnail(max_resolution, Image.Resampling.LANCZOS)
|
|
273
|
+
return img
|
|
274
|
+
|
|
275
|
+
def _optimize_image(self, img: Image.Image) -> Image.Image:
|
|
276
|
+
"""
|
|
277
|
+
Apply optimization to the image.
|
|
278
|
+
|
|
279
|
+
Args:
|
|
280
|
+
img: PIL Image object
|
|
281
|
+
|
|
282
|
+
Returns:
|
|
283
|
+
Optimized image
|
|
284
|
+
"""
|
|
285
|
+
# For now, just return the image as-is
|
|
286
|
+
# Future optimizations could include:
|
|
287
|
+
# - Color palette optimization
|
|
288
|
+
# - Compression-specific optimizations
|
|
289
|
+
# - Noise reduction
|
|
290
|
+
return img
|
|
291
|
+
|
|
292
|
+
def _image_to_base64(self, img: Image.Image, format: str, quality: int) -> str:
|
|
293
|
+
"""
|
|
294
|
+
Convert PIL Image to base64 string.
|
|
295
|
+
|
|
296
|
+
Args:
|
|
297
|
+
img: PIL Image object
|
|
298
|
+
format: Target format ('jpeg', 'png', 'webp')
|
|
299
|
+
quality: Quality setting (for JPEG/WebP)
|
|
300
|
+
|
|
301
|
+
Returns:
|
|
302
|
+
Base64-encoded image string
|
|
303
|
+
"""
|
|
304
|
+
buffer = io.BytesIO()
|
|
305
|
+
|
|
306
|
+
# Set format-specific options
|
|
307
|
+
save_kwargs = {}
|
|
308
|
+
if format.lower() in ['jpeg', 'jpg']:
|
|
309
|
+
format = 'JPEG'
|
|
310
|
+
save_kwargs['quality'] = quality
|
|
311
|
+
save_kwargs['optimize'] = True
|
|
312
|
+
elif format.lower() == 'png':
|
|
313
|
+
format = 'PNG'
|
|
314
|
+
save_kwargs['optimize'] = True
|
|
315
|
+
elif format.lower() == 'webp':
|
|
316
|
+
format = 'WebP'
|
|
317
|
+
save_kwargs['quality'] = quality
|
|
318
|
+
save_kwargs['optimize'] = True
|
|
319
|
+
|
|
320
|
+
# Save image to buffer
|
|
321
|
+
img.save(buffer, format=format, **save_kwargs)
|
|
322
|
+
buffer.seek(0)
|
|
323
|
+
|
|
324
|
+
# Encode to base64
|
|
325
|
+
return base64.b64encode(buffer.getvalue()).decode('utf-8')
|
|
326
|
+
|
|
327
|
+
def _get_mime_type(self, format: str) -> str:
|
|
328
|
+
"""
|
|
329
|
+
Get MIME type for the given format.
|
|
330
|
+
|
|
331
|
+
Args:
|
|
332
|
+
format: Image format
|
|
333
|
+
|
|
334
|
+
Returns:
|
|
335
|
+
MIME type string
|
|
336
|
+
"""
|
|
337
|
+
mime_map = {
|
|
338
|
+
'jpeg': 'image/jpeg',
|
|
339
|
+
'jpg': 'image/jpeg',
|
|
340
|
+
'png': 'image/png',
|
|
341
|
+
'gif': 'image/gif',
|
|
342
|
+
'bmp': 'image/bmp',
|
|
343
|
+
'webp': 'image/webp',
|
|
344
|
+
'tiff': 'image/tiff',
|
|
345
|
+
'tif': 'image/tiff'
|
|
346
|
+
}
|
|
347
|
+
return mime_map.get(format.lower(), 'image/jpeg')
|
|
348
|
+
|
|
349
|
+
def _extract_useful_exif(self, img: Image.Image) -> Optional[Dict[str, Any]]:
|
|
350
|
+
"""
|
|
351
|
+
Extract useful EXIF data from image.
|
|
352
|
+
|
|
353
|
+
Args:
|
|
354
|
+
img: PIL Image object
|
|
355
|
+
|
|
356
|
+
Returns:
|
|
357
|
+
Dictionary of useful EXIF data or None
|
|
358
|
+
"""
|
|
359
|
+
try:
|
|
360
|
+
exif = img._getexif()
|
|
361
|
+
if not exif:
|
|
362
|
+
return None
|
|
363
|
+
|
|
364
|
+
useful_data = {}
|
|
365
|
+
|
|
366
|
+
# Map of useful EXIF tags
|
|
367
|
+
useful_tags = {
|
|
368
|
+
'DateTime': 'datetime',
|
|
369
|
+
'DateTimeOriginal': 'datetime_original',
|
|
370
|
+
'Make': 'camera_make',
|
|
371
|
+
'Model': 'camera_model',
|
|
372
|
+
'Software': 'software',
|
|
373
|
+
'Orientation': 'orientation',
|
|
374
|
+
'XResolution': 'x_resolution',
|
|
375
|
+
'YResolution': 'y_resolution',
|
|
376
|
+
'ResolutionUnit': 'resolution_unit'
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
for tag_id, value in exif.items():
|
|
380
|
+
tag = ExifTags.TAGS.get(tag_id, tag_id)
|
|
381
|
+
if tag in useful_tags:
|
|
382
|
+
useful_data[useful_tags[tag]] = value
|
|
383
|
+
|
|
384
|
+
return useful_data if useful_data else None
|
|
385
|
+
|
|
386
|
+
except Exception:
|
|
387
|
+
return None
|
|
388
|
+
|
|
389
|
+
def get_image_info(self, file_path: Union[str, Path]) -> Dict[str, Any]:
|
|
390
|
+
"""
|
|
391
|
+
Get comprehensive information about an image without full processing.
|
|
392
|
+
|
|
393
|
+
Args:
|
|
394
|
+
file_path: Path to the image file
|
|
395
|
+
|
|
396
|
+
Returns:
|
|
397
|
+
Dictionary with image information
|
|
398
|
+
"""
|
|
399
|
+
file_path = Path(file_path)
|
|
400
|
+
|
|
401
|
+
try:
|
|
402
|
+
with Image.open(file_path) as img:
|
|
403
|
+
info = {
|
|
404
|
+
'filename': file_path.name,
|
|
405
|
+
'format': img.format,
|
|
406
|
+
'mode': img.mode,
|
|
407
|
+
'size': img.size,
|
|
408
|
+
'width': img.size[0],
|
|
409
|
+
'height': img.size[1],
|
|
410
|
+
'file_size': file_path.stat().st_size,
|
|
411
|
+
'has_transparency': img.mode in ['RGBA', 'LA', 'P'] and 'transparency' in img.info
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
# Add EXIF info if available
|
|
415
|
+
exif_data = self._extract_useful_exif(img)
|
|
416
|
+
if exif_data:
|
|
417
|
+
info['exif'] = exif_data
|
|
418
|
+
|
|
419
|
+
return info
|
|
420
|
+
|
|
421
|
+
except Exception as e:
|
|
422
|
+
return {
|
|
423
|
+
'filename': file_path.name,
|
|
424
|
+
'error': str(e),
|
|
425
|
+
'file_size': file_path.stat().st_size if file_path.exists() else 0
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
def create_thumbnail(self, file_path: Union[str, Path], size: Tuple[int, int] = (128, 128)) -> str:
|
|
429
|
+
"""
|
|
430
|
+
Create a thumbnail of the image.
|
|
431
|
+
|
|
432
|
+
Args:
|
|
433
|
+
file_path: Path to the image file
|
|
434
|
+
size: Thumbnail size (width, height)
|
|
435
|
+
|
|
436
|
+
Returns:
|
|
437
|
+
Base64-encoded thumbnail
|
|
438
|
+
"""
|
|
439
|
+
file_path = Path(file_path)
|
|
440
|
+
|
|
441
|
+
try:
|
|
442
|
+
with Image.open(file_path) as img:
|
|
443
|
+
# Auto-rotate if needed
|
|
444
|
+
if self.auto_rotate:
|
|
445
|
+
img = self._auto_rotate_image(img)
|
|
446
|
+
|
|
447
|
+
# Create thumbnail
|
|
448
|
+
img.thumbnail(size, Image.Resampling.LANCZOS)
|
|
449
|
+
|
|
450
|
+
# Convert to base64
|
|
451
|
+
return self._image_to_base64(img, 'jpeg', 75)
|
|
452
|
+
|
|
453
|
+
except Exception as e:
|
|
454
|
+
raise MediaProcessingError(f"Failed to create thumbnail for {file_path}: {str(e)}") from e
|
|
455
|
+
|
|
456
|
+
def get_processing_info(self) -> Dict[str, Any]:
|
|
457
|
+
"""
|
|
458
|
+
Get information about the image processor capabilities.
|
|
459
|
+
|
|
460
|
+
Returns:
|
|
461
|
+
Dictionary with processor information
|
|
462
|
+
"""
|
|
463
|
+
return {
|
|
464
|
+
'processor_type': 'ImageProcessor',
|
|
465
|
+
'supported_formats': ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'tiff', 'webp'],
|
|
466
|
+
'capabilities': {
|
|
467
|
+
'max_resolution': self.max_resolution,
|
|
468
|
+
'quality': self.quality,
|
|
469
|
+
'auto_rotate': self.auto_rotate,
|
|
470
|
+
'resize_mode': self.resize_mode,
|
|
471
|
+
'base64_output': True,
|
|
472
|
+
'exif_handling': True,
|
|
473
|
+
'thumbnail_creation': True,
|
|
474
|
+
'model_optimized_scaling': True
|
|
475
|
+
},
|
|
476
|
+
'dependencies': {
|
|
477
|
+
'PIL': PIL_AVAILABLE
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
def process_for_model(self, file_path: Union[str, Path], model_name: str, **kwargs) -> MediaContent:
|
|
482
|
+
"""
|
|
483
|
+
Process image optimally for a specific vision model.
|
|
484
|
+
|
|
485
|
+
Args:
|
|
486
|
+
file_path: Path to the image file
|
|
487
|
+
model_name: Name of the target vision model
|
|
488
|
+
**kwargs: Additional processing parameters:
|
|
489
|
+
- scaling_mode: ScalingMode for image scaling
|
|
490
|
+
- target_format: Target format ('png', 'jpeg', 'webp')
|
|
491
|
+
- quality: Image quality (1-100)
|
|
492
|
+
- auto_rotate: Whether to auto-rotate based on EXIF
|
|
493
|
+
|
|
494
|
+
Returns:
|
|
495
|
+
MediaContent optimized for the specified model
|
|
496
|
+
|
|
497
|
+
Raises:
|
|
498
|
+
MediaProcessingError: If processing fails
|
|
499
|
+
"""
|
|
500
|
+
file_path = Path(file_path)
|
|
501
|
+
|
|
502
|
+
try:
|
|
503
|
+
# Get scaling mode from kwargs or use default
|
|
504
|
+
scaling_mode = kwargs.get('scaling_mode', ScalingMode.FIT)
|
|
505
|
+
if isinstance(scaling_mode, str):
|
|
506
|
+
scaling_mode = ScalingMode(scaling_mode)
|
|
507
|
+
|
|
508
|
+
# Override other defaults with kwargs
|
|
509
|
+
target_format = kwargs.get('target_format', 'jpeg')
|
|
510
|
+
quality = kwargs.get('quality', self.quality)
|
|
511
|
+
auto_rotate = kwargs.get('auto_rotate', self.auto_rotate)
|
|
512
|
+
|
|
513
|
+
# Load the image
|
|
514
|
+
with Image.open(file_path) as img:
|
|
515
|
+
# Auto-rotate based on EXIF data
|
|
516
|
+
if auto_rotate:
|
|
517
|
+
img = self._auto_rotate_image(img)
|
|
518
|
+
|
|
519
|
+
# Get model-optimized scaler
|
|
520
|
+
scaler = get_scaler()
|
|
521
|
+
|
|
522
|
+
# Scale image for the specific model
|
|
523
|
+
img = scaler.scale_for_model(img, model_name, scaling_mode)
|
|
524
|
+
|
|
525
|
+
# Convert to RGB if necessary (for JPEG output)
|
|
526
|
+
if target_format.lower() in ['jpeg', 'jpg'] and img.mode in ['RGBA', 'P', 'LA']:
|
|
527
|
+
# Create white background for transparent images
|
|
528
|
+
background = Image.new('RGB', img.size, (255, 255, 255))
|
|
529
|
+
if img.mode == 'P':
|
|
530
|
+
img = img.convert('RGBA')
|
|
531
|
+
background.paste(img, mask=img.split()[-1] if img.mode == 'RGBA' else None)
|
|
532
|
+
img = background
|
|
533
|
+
|
|
534
|
+
# Optimize the image
|
|
535
|
+
img = self._optimize_image(img)
|
|
536
|
+
|
|
537
|
+
# Convert to base64
|
|
538
|
+
base64_content = self._image_to_base64(img, target_format, quality)
|
|
539
|
+
|
|
540
|
+
# Determine MIME type
|
|
541
|
+
mime_type = self._get_mime_type(target_format)
|
|
542
|
+
|
|
543
|
+
# Get optimal resolution for metadata
|
|
544
|
+
optimal_size = scaler.get_optimal_resolution(model_name, img.size)
|
|
545
|
+
|
|
546
|
+
# Create metadata
|
|
547
|
+
metadata = {
|
|
548
|
+
'original_format': file_path.suffix.lower().lstrip('.'),
|
|
549
|
+
'target_format': target_format,
|
|
550
|
+
'final_size': img.size,
|
|
551
|
+
'optimal_size_for_model': optimal_size,
|
|
552
|
+
'target_model': model_name,
|
|
553
|
+
'scaling_mode': scaling_mode.value,
|
|
554
|
+
'color_mode': img.mode,
|
|
555
|
+
'quality': quality if target_format.lower() in ['jpeg', 'jpg'] else None,
|
|
556
|
+
'auto_rotated': auto_rotate,
|
|
557
|
+
'model_optimized': True
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
# Add EXIF data if available
|
|
561
|
+
if hasattr(img, '_getexif') and img._getexif():
|
|
562
|
+
metadata['has_exif'] = True
|
|
563
|
+
# Extract useful EXIF data
|
|
564
|
+
exif_data = self._extract_useful_exif(img)
|
|
565
|
+
if exif_data:
|
|
566
|
+
metadata['exif'] = exif_data
|
|
567
|
+
|
|
568
|
+
return self._create_media_content(
|
|
569
|
+
content=base64_content,
|
|
570
|
+
file_path=file_path,
|
|
571
|
+
media_type=MediaType.IMAGE,
|
|
572
|
+
content_format=ContentFormat.BASE64,
|
|
573
|
+
mime_type=mime_type,
|
|
574
|
+
**metadata
|
|
575
|
+
)
|
|
576
|
+
|
|
577
|
+
except Exception as e:
|
|
578
|
+
raise MediaProcessingError(f"Failed to process image {file_path} for model {model_name}: {str(e)}") from e
|
|
579
|
+
|
|
580
|
+
def get_optimal_size_for_model(self, model_name: str, original_size: Tuple[int, int]) -> Tuple[int, int]:
|
|
581
|
+
"""
|
|
582
|
+
Get optimal image size for a specific model without processing the image.
|
|
583
|
+
|
|
584
|
+
Args:
|
|
585
|
+
model_name: Name of the target vision model
|
|
586
|
+
original_size: Original image size (width, height)
|
|
587
|
+
|
|
588
|
+
Returns:
|
|
589
|
+
Optimal target size (width, height) for the model
|
|
590
|
+
"""
|
|
591
|
+
scaler = get_scaler()
|
|
592
|
+
return scaler.get_optimal_resolution(model_name, original_size)
|
|
593
|
+
|
|
594
|
+
def supports_model(self, model_name: str) -> bool:
|
|
595
|
+
"""
|
|
596
|
+
Check if the processor supports optimizations for a specific model.
|
|
597
|
+
|
|
598
|
+
Args:
|
|
599
|
+
model_name: Name of the model
|
|
600
|
+
|
|
601
|
+
Returns:
|
|
602
|
+
True if model-specific optimizations are available
|
|
603
|
+
"""
|
|
604
|
+
try:
|
|
605
|
+
# Test if we can get capabilities for this model
|
|
606
|
+
scaler = get_scaler()
|
|
607
|
+
scaler._get_model_capabilities(model_name)
|
|
608
|
+
return True
|
|
609
|
+
except Exception:
|
|
610
|
+
return False
|