abstractcore 2.4.3__py3-none-any.whl → 2.4.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,610 @@
1
+ """
2
+ Image processor for vision model support.
3
+
4
+ This module provides comprehensive image processing capabilities using PIL,
5
+ optimized for vision model inputs across different providers.
6
+ """
7
+
8
+ import base64
9
+ import io
10
+ import mimetypes
11
+ from pathlib import Path
12
+ from typing import Optional, Tuple, Dict, Any, Union
13
+
14
+ try:
15
+ from PIL import Image, ImageOps, ExifTags
16
+ PIL_AVAILABLE = True
17
+ except ImportError:
18
+ PIL_AVAILABLE = False
19
+ Image = None
20
+ ImageOps = None
21
+ ExifTags = None
22
+
23
+ from ..base import BaseMediaHandler, MediaProcessingError
24
+ from ..types import MediaContent, MediaType, ContentFormat
25
+ from ..utils.image_scaler import get_scaler, ScalingMode
26
+
27
+
28
+ class ImageProcessor(BaseMediaHandler):
29
+ """
30
+ Image processor using PIL for vision model support.
31
+
32
+ Handles image loading, preprocessing, format conversion, and optimization
33
+ for various vision models across different providers.
34
+ """
35
+
36
+ def __init__(self, **kwargs):
37
+ """
38
+ Initialize the image processor.
39
+
40
+ Args:
41
+ **kwargs: Configuration parameters including:
42
+ - max_resolution: Maximum image resolution (width, height)
43
+ - quality: JPEG quality (1-100)
44
+ - auto_rotate: Whether to auto-rotate based on EXIF
45
+ - resize_mode: How to resize ('fit', 'crop', 'stretch')
46
+ """
47
+ if not PIL_AVAILABLE:
48
+ raise ImportError(
49
+ "PIL/Pillow is required for image processing. "
50
+ "Install with: pip install \"abstractcore[media]\""
51
+ )
52
+
53
+ super().__init__(**kwargs)
54
+
55
+ # Image processing configuration - Use maximum resolution for best quality
56
+ self.max_resolution = kwargs.get('max_resolution', (4096, 4096)) # Increased default for better quality
57
+ self.quality = kwargs.get('quality', 90) # Increased quality for better results
58
+ self.auto_rotate = kwargs.get('auto_rotate', True)
59
+ self.resize_mode = kwargs.get('resize_mode', 'fit') # 'fit', 'crop', 'stretch'
60
+ self.prefer_max_resolution = kwargs.get('prefer_max_resolution', True) # Always use max when possible
61
+
62
+ # Set capabilities for image processing
63
+ from ..types import MediaCapabilities
64
+ self.capabilities = MediaCapabilities(
65
+ vision_support=True,
66
+ audio_support=False,
67
+ video_support=False,
68
+ document_support=False,
69
+ supported_image_formats=['jpg', 'jpeg', 'png', 'gif', 'bmp', 'tif', 'tiff', 'webp'],
70
+ max_file_size=self.max_file_size
71
+ )
72
+
73
+ self.logger.debug(
74
+ f"Initialized ImageProcessor with max_resolution={self.max_resolution}, "
75
+ f"quality={self.quality}, auto_rotate={self.auto_rotate}, prefer_max_resolution={self.prefer_max_resolution}"
76
+ )
77
+
78
+ def _process_internal(self, file_path: Path, media_type: MediaType, **kwargs) -> MediaContent:
79
+ """
80
+ Process an image file and return optimized content for vision models.
81
+
82
+ Args:
83
+ file_path: Path to the image file
84
+ media_type: Detected media type (should be IMAGE)
85
+ **kwargs: Additional processing parameters:
86
+ - target_format: Target format ('png', 'jpeg', 'webp')
87
+ - max_resolution: Override default max resolution
88
+ - quality: Override default quality
89
+ - auto_rotate: Override default auto rotation
90
+
91
+ Returns:
92
+ MediaContent with base64-encoded optimized image
93
+
94
+ Raises:
95
+ MediaProcessingError: If image processing fails
96
+ """
97
+ if media_type != MediaType.IMAGE:
98
+ raise MediaProcessingError(f"ImageProcessor only handles images, got {media_type}")
99
+
100
+ try:
101
+ # Override defaults with kwargs
102
+ target_format = kwargs.get('target_format', 'jpeg')
103
+ model_name = kwargs.get('model_name', None)
104
+
105
+ # Use model-specific maximum resolution if available
106
+ if model_name and self.prefer_max_resolution:
107
+ max_resolution = self._get_model_max_resolution(model_name)
108
+ self.logger.debug(f"Using model-specific max resolution for {model_name}: {max_resolution}")
109
+ else:
110
+ max_resolution = kwargs.get('max_resolution', self.max_resolution)
111
+
112
+ quality = kwargs.get('quality', self.quality)
113
+ auto_rotate = kwargs.get('auto_rotate', self.auto_rotate)
114
+
115
+ # Load and process the image
116
+ with Image.open(file_path) as img:
117
+ # Auto-rotate based on EXIF data
118
+ if auto_rotate:
119
+ img = self._auto_rotate_image(img)
120
+
121
+ # Convert to RGB if necessary (for JPEG output)
122
+ if target_format.lower() in ['jpeg', 'jpg'] and img.mode in ['RGBA', 'P', 'LA']:
123
+ # Create white background for transparent images
124
+ background = Image.new('RGB', img.size, (255, 255, 255))
125
+ if img.mode == 'P':
126
+ img = img.convert('RGBA')
127
+ background.paste(img, mask=img.split()[-1] if img.mode == 'RGBA' else None)
128
+ img = background
129
+
130
+ # Resize if needed
131
+ if max_resolution and self._needs_resize(img.size, max_resolution):
132
+ img = self._resize_image(img, max_resolution)
133
+
134
+ # Optimize the image
135
+ img = self._optimize_image(img)
136
+
137
+ # Convert to base64
138
+ base64_content = self._image_to_base64(img, target_format, quality)
139
+
140
+ # Determine MIME type
141
+ mime_type = self._get_mime_type(target_format)
142
+
143
+ # Create metadata
144
+ metadata = {
145
+ 'original_format': file_path.suffix.lower().lstrip('.'),
146
+ 'target_format': target_format,
147
+ 'original_size': img.size if hasattr(img, 'size') else None,
148
+ 'final_size': img.size,
149
+ 'color_mode': img.mode,
150
+ 'quality': quality if target_format.lower() in ['jpeg', 'jpg'] else None,
151
+ 'auto_rotated': auto_rotate,
152
+ 'optimized': True
153
+ }
154
+
155
+ # Add EXIF data if available
156
+ if hasattr(img, '_getexif') and img._getexif():
157
+ metadata['has_exif'] = True
158
+ # Extract useful EXIF data
159
+ exif_data = self._extract_useful_exif(img)
160
+ if exif_data:
161
+ metadata['exif'] = exif_data
162
+
163
+ return self._create_media_content(
164
+ content=base64_content,
165
+ file_path=file_path,
166
+ media_type=MediaType.IMAGE,
167
+ content_format=ContentFormat.BASE64,
168
+ mime_type=mime_type,
169
+ **metadata
170
+ )
171
+
172
+ except Exception as e:
173
+ raise MediaProcessingError(f"Failed to process image {file_path}: {str(e)}") from e
174
+
175
+ def _auto_rotate_image(self, img: Image.Image) -> Image.Image:
176
+ """
177
+ Auto-rotate image based on EXIF orientation data.
178
+
179
+ Args:
180
+ img: PIL Image object
181
+
182
+ Returns:
183
+ Rotated image
184
+ """
185
+ try:
186
+ return ImageOps.exif_transpose(img)
187
+ except Exception:
188
+ # If auto-rotation fails, return original image
189
+ return img
190
+
191
+ def _get_model_max_resolution(self, model_name: Optional[str] = None) -> Tuple[int, int]:
192
+ """
193
+ Get maximum resolution for a specific model or return default high resolution.
194
+
195
+ Args:
196
+ model_name: Name of the model to check capabilities for
197
+
198
+ Returns:
199
+ Maximum resolution tuple (width, height)
200
+ """
201
+ if not model_name or not self.prefer_max_resolution:
202
+ return self.max_resolution
203
+
204
+ try:
205
+ from ..capabilities import get_media_capabilities
206
+ caps = get_media_capabilities(model_name)
207
+
208
+ if hasattr(caps, 'image_resolutions') and caps.image_resolutions:
209
+ resolution_str = caps.image_resolutions
210
+ if isinstance(resolution_str, list) and resolution_str:
211
+ resolution_str = resolution_str[0]
212
+ else:
213
+ resolution_str = str(resolution_str)
214
+
215
+ # Parse resolution strings like "3584x3584", "56x56 to 3584x3584", "variable"
216
+ if "to" in resolution_str:
217
+ # Extract maximum from range like "56x56 to 3584x3584"
218
+ max_part = resolution_str.split("to")[-1].strip()
219
+ if "x" in max_part:
220
+ width, height = map(int, max_part.split("x"))
221
+ return (width, height)
222
+ elif "x" in resolution_str and "variable" not in resolution_str.lower():
223
+ # Parse direct resolution like "896x896"
224
+ width, height = map(int, resolution_str.split("x"))
225
+ return (width, height)
226
+ elif "variable" in resolution_str.lower():
227
+ # For variable resolution models, use a high default
228
+ return (4096, 4096)
229
+
230
+ except Exception as e:
231
+ self.logger.debug(f"Could not get model-specific resolution for {model_name}: {e}")
232
+
233
+ # Fallback to default high resolution
234
+ return self.max_resolution
235
+
236
+ def _needs_resize(self, current_size: Tuple[int, int], max_resolution: Tuple[int, int]) -> bool:
237
+ """
238
+ Check if image needs resizing.
239
+
240
+ Args:
241
+ current_size: Current image size (width, height)
242
+ max_resolution: Maximum allowed resolution (width, height)
243
+
244
+ Returns:
245
+ True if resizing is needed
246
+ """
247
+ return current_size[0] > max_resolution[0] or current_size[1] > max_resolution[1]
248
+
249
+ def _resize_image(self, img: Image.Image, max_resolution: Tuple[int, int]) -> Image.Image:
250
+ """
251
+ Resize image according to the specified mode.
252
+
253
+ Args:
254
+ img: PIL Image object
255
+ max_resolution: Maximum allowed resolution (width, height)
256
+
257
+ Returns:
258
+ Resized image
259
+ """
260
+ if self.resize_mode == 'fit':
261
+ # Maintain aspect ratio, fit within bounds
262
+ img.thumbnail(max_resolution, Image.Resampling.LANCZOS)
263
+ return img
264
+ elif self.resize_mode == 'crop':
265
+ # Maintain aspect ratio, crop to exact size
266
+ return ImageOps.fit(img, max_resolution, Image.Resampling.LANCZOS)
267
+ elif self.resize_mode == 'stretch':
268
+ # Stretch to exact size (may distort)
269
+ return img.resize(max_resolution, Image.Resampling.LANCZOS)
270
+ else:
271
+ # Default to fit
272
+ img.thumbnail(max_resolution, Image.Resampling.LANCZOS)
273
+ return img
274
+
275
+ def _optimize_image(self, img: Image.Image) -> Image.Image:
276
+ """
277
+ Apply optimization to the image.
278
+
279
+ Args:
280
+ img: PIL Image object
281
+
282
+ Returns:
283
+ Optimized image
284
+ """
285
+ # For now, just return the image as-is
286
+ # Future optimizations could include:
287
+ # - Color palette optimization
288
+ # - Compression-specific optimizations
289
+ # - Noise reduction
290
+ return img
291
+
292
+ def _image_to_base64(self, img: Image.Image, format: str, quality: int) -> str:
293
+ """
294
+ Convert PIL Image to base64 string.
295
+
296
+ Args:
297
+ img: PIL Image object
298
+ format: Target format ('jpeg', 'png', 'webp')
299
+ quality: Quality setting (for JPEG/WebP)
300
+
301
+ Returns:
302
+ Base64-encoded image string
303
+ """
304
+ buffer = io.BytesIO()
305
+
306
+ # Set format-specific options
307
+ save_kwargs = {}
308
+ if format.lower() in ['jpeg', 'jpg']:
309
+ format = 'JPEG'
310
+ save_kwargs['quality'] = quality
311
+ save_kwargs['optimize'] = True
312
+ elif format.lower() == 'png':
313
+ format = 'PNG'
314
+ save_kwargs['optimize'] = True
315
+ elif format.lower() == 'webp':
316
+ format = 'WebP'
317
+ save_kwargs['quality'] = quality
318
+ save_kwargs['optimize'] = True
319
+
320
+ # Save image to buffer
321
+ img.save(buffer, format=format, **save_kwargs)
322
+ buffer.seek(0)
323
+
324
+ # Encode to base64
325
+ return base64.b64encode(buffer.getvalue()).decode('utf-8')
326
+
327
+ def _get_mime_type(self, format: str) -> str:
328
+ """
329
+ Get MIME type for the given format.
330
+
331
+ Args:
332
+ format: Image format
333
+
334
+ Returns:
335
+ MIME type string
336
+ """
337
+ mime_map = {
338
+ 'jpeg': 'image/jpeg',
339
+ 'jpg': 'image/jpeg',
340
+ 'png': 'image/png',
341
+ 'gif': 'image/gif',
342
+ 'bmp': 'image/bmp',
343
+ 'webp': 'image/webp',
344
+ 'tiff': 'image/tiff',
345
+ 'tif': 'image/tiff'
346
+ }
347
+ return mime_map.get(format.lower(), 'image/jpeg')
348
+
349
+ def _extract_useful_exif(self, img: Image.Image) -> Optional[Dict[str, Any]]:
350
+ """
351
+ Extract useful EXIF data from image.
352
+
353
+ Args:
354
+ img: PIL Image object
355
+
356
+ Returns:
357
+ Dictionary of useful EXIF data or None
358
+ """
359
+ try:
360
+ exif = img._getexif()
361
+ if not exif:
362
+ return None
363
+
364
+ useful_data = {}
365
+
366
+ # Map of useful EXIF tags
367
+ useful_tags = {
368
+ 'DateTime': 'datetime',
369
+ 'DateTimeOriginal': 'datetime_original',
370
+ 'Make': 'camera_make',
371
+ 'Model': 'camera_model',
372
+ 'Software': 'software',
373
+ 'Orientation': 'orientation',
374
+ 'XResolution': 'x_resolution',
375
+ 'YResolution': 'y_resolution',
376
+ 'ResolutionUnit': 'resolution_unit'
377
+ }
378
+
379
+ for tag_id, value in exif.items():
380
+ tag = ExifTags.TAGS.get(tag_id, tag_id)
381
+ if tag in useful_tags:
382
+ useful_data[useful_tags[tag]] = value
383
+
384
+ return useful_data if useful_data else None
385
+
386
+ except Exception:
387
+ return None
388
+
389
+ def get_image_info(self, file_path: Union[str, Path]) -> Dict[str, Any]:
390
+ """
391
+ Get comprehensive information about an image without full processing.
392
+
393
+ Args:
394
+ file_path: Path to the image file
395
+
396
+ Returns:
397
+ Dictionary with image information
398
+ """
399
+ file_path = Path(file_path)
400
+
401
+ try:
402
+ with Image.open(file_path) as img:
403
+ info = {
404
+ 'filename': file_path.name,
405
+ 'format': img.format,
406
+ 'mode': img.mode,
407
+ 'size': img.size,
408
+ 'width': img.size[0],
409
+ 'height': img.size[1],
410
+ 'file_size': file_path.stat().st_size,
411
+ 'has_transparency': img.mode in ['RGBA', 'LA', 'P'] and 'transparency' in img.info
412
+ }
413
+
414
+ # Add EXIF info if available
415
+ exif_data = self._extract_useful_exif(img)
416
+ if exif_data:
417
+ info['exif'] = exif_data
418
+
419
+ return info
420
+
421
+ except Exception as e:
422
+ return {
423
+ 'filename': file_path.name,
424
+ 'error': str(e),
425
+ 'file_size': file_path.stat().st_size if file_path.exists() else 0
426
+ }
427
+
428
+ def create_thumbnail(self, file_path: Union[str, Path], size: Tuple[int, int] = (128, 128)) -> str:
429
+ """
430
+ Create a thumbnail of the image.
431
+
432
+ Args:
433
+ file_path: Path to the image file
434
+ size: Thumbnail size (width, height)
435
+
436
+ Returns:
437
+ Base64-encoded thumbnail
438
+ """
439
+ file_path = Path(file_path)
440
+
441
+ try:
442
+ with Image.open(file_path) as img:
443
+ # Auto-rotate if needed
444
+ if self.auto_rotate:
445
+ img = self._auto_rotate_image(img)
446
+
447
+ # Create thumbnail
448
+ img.thumbnail(size, Image.Resampling.LANCZOS)
449
+
450
+ # Convert to base64
451
+ return self._image_to_base64(img, 'jpeg', 75)
452
+
453
+ except Exception as e:
454
+ raise MediaProcessingError(f"Failed to create thumbnail for {file_path}: {str(e)}") from e
455
+
456
+ def get_processing_info(self) -> Dict[str, Any]:
457
+ """
458
+ Get information about the image processor capabilities.
459
+
460
+ Returns:
461
+ Dictionary with processor information
462
+ """
463
+ return {
464
+ 'processor_type': 'ImageProcessor',
465
+ 'supported_formats': ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'tiff', 'webp'],
466
+ 'capabilities': {
467
+ 'max_resolution': self.max_resolution,
468
+ 'quality': self.quality,
469
+ 'auto_rotate': self.auto_rotate,
470
+ 'resize_mode': self.resize_mode,
471
+ 'base64_output': True,
472
+ 'exif_handling': True,
473
+ 'thumbnail_creation': True,
474
+ 'model_optimized_scaling': True
475
+ },
476
+ 'dependencies': {
477
+ 'PIL': PIL_AVAILABLE
478
+ }
479
+ }
480
+
481
+ def process_for_model(self, file_path: Union[str, Path], model_name: str, **kwargs) -> MediaContent:
482
+ """
483
+ Process image optimally for a specific vision model.
484
+
485
+ Args:
486
+ file_path: Path to the image file
487
+ model_name: Name of the target vision model
488
+ **kwargs: Additional processing parameters:
489
+ - scaling_mode: ScalingMode for image scaling
490
+ - target_format: Target format ('png', 'jpeg', 'webp')
491
+ - quality: Image quality (1-100)
492
+ - auto_rotate: Whether to auto-rotate based on EXIF
493
+
494
+ Returns:
495
+ MediaContent optimized for the specified model
496
+
497
+ Raises:
498
+ MediaProcessingError: If processing fails
499
+ """
500
+ file_path = Path(file_path)
501
+
502
+ try:
503
+ # Get scaling mode from kwargs or use default
504
+ scaling_mode = kwargs.get('scaling_mode', ScalingMode.FIT)
505
+ if isinstance(scaling_mode, str):
506
+ scaling_mode = ScalingMode(scaling_mode)
507
+
508
+ # Override other defaults with kwargs
509
+ target_format = kwargs.get('target_format', 'jpeg')
510
+ quality = kwargs.get('quality', self.quality)
511
+ auto_rotate = kwargs.get('auto_rotate', self.auto_rotate)
512
+
513
+ # Load the image
514
+ with Image.open(file_path) as img:
515
+ # Auto-rotate based on EXIF data
516
+ if auto_rotate:
517
+ img = self._auto_rotate_image(img)
518
+
519
+ # Get model-optimized scaler
520
+ scaler = get_scaler()
521
+
522
+ # Scale image for the specific model
523
+ img = scaler.scale_for_model(img, model_name, scaling_mode)
524
+
525
+ # Convert to RGB if necessary (for JPEG output)
526
+ if target_format.lower() in ['jpeg', 'jpg'] and img.mode in ['RGBA', 'P', 'LA']:
527
+ # Create white background for transparent images
528
+ background = Image.new('RGB', img.size, (255, 255, 255))
529
+ if img.mode == 'P':
530
+ img = img.convert('RGBA')
531
+ background.paste(img, mask=img.split()[-1] if img.mode == 'RGBA' else None)
532
+ img = background
533
+
534
+ # Optimize the image
535
+ img = self._optimize_image(img)
536
+
537
+ # Convert to base64
538
+ base64_content = self._image_to_base64(img, target_format, quality)
539
+
540
+ # Determine MIME type
541
+ mime_type = self._get_mime_type(target_format)
542
+
543
+ # Get optimal resolution for metadata
544
+ optimal_size = scaler.get_optimal_resolution(model_name, img.size)
545
+
546
+ # Create metadata
547
+ metadata = {
548
+ 'original_format': file_path.suffix.lower().lstrip('.'),
549
+ 'target_format': target_format,
550
+ 'final_size': img.size,
551
+ 'optimal_size_for_model': optimal_size,
552
+ 'target_model': model_name,
553
+ 'scaling_mode': scaling_mode.value,
554
+ 'color_mode': img.mode,
555
+ 'quality': quality if target_format.lower() in ['jpeg', 'jpg'] else None,
556
+ 'auto_rotated': auto_rotate,
557
+ 'model_optimized': True
558
+ }
559
+
560
+ # Add EXIF data if available
561
+ if hasattr(img, '_getexif') and img._getexif():
562
+ metadata['has_exif'] = True
563
+ # Extract useful EXIF data
564
+ exif_data = self._extract_useful_exif(img)
565
+ if exif_data:
566
+ metadata['exif'] = exif_data
567
+
568
+ return self._create_media_content(
569
+ content=base64_content,
570
+ file_path=file_path,
571
+ media_type=MediaType.IMAGE,
572
+ content_format=ContentFormat.BASE64,
573
+ mime_type=mime_type,
574
+ **metadata
575
+ )
576
+
577
+ except Exception as e:
578
+ raise MediaProcessingError(f"Failed to process image {file_path} for model {model_name}: {str(e)}") from e
579
+
580
+ def get_optimal_size_for_model(self, model_name: str, original_size: Tuple[int, int]) -> Tuple[int, int]:
581
+ """
582
+ Get optimal image size for a specific model without processing the image.
583
+
584
+ Args:
585
+ model_name: Name of the target vision model
586
+ original_size: Original image size (width, height)
587
+
588
+ Returns:
589
+ Optimal target size (width, height) for the model
590
+ """
591
+ scaler = get_scaler()
592
+ return scaler.get_optimal_resolution(model_name, original_size)
593
+
594
+ def supports_model(self, model_name: str) -> bool:
595
+ """
596
+ Check if the processor supports optimizations for a specific model.
597
+
598
+ Args:
599
+ model_name: Name of the model
600
+
601
+ Returns:
602
+ True if model-specific optimizations are available
603
+ """
604
+ try:
605
+ # Test if we can get capabilities for this model
606
+ scaler = get_scaler()
607
+ scaler._get_model_capabilities(model_name)
608
+ return True
609
+ except Exception:
610
+ return False