caption-flow 0.2.4__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,7 @@ from typing import List, Any, Optional, Tuple, Union
10
10
  import numpy as np
11
11
  import requests
12
12
  from PIL import Image
13
+ from ..models import ProcessingItem
13
14
 
14
15
 
15
16
  logger = logging.getLogger(__name__)
@@ -21,104 +22,10 @@ class ImageProcessor:
21
22
  def __init__(self, num_workers: int = 4):
22
23
  self.executor = ProcessPoolExecutor(max_workers=num_workers)
23
24
 
24
- async def process_batch(self, image_paths: List[Path]) -> List[np.ndarray]:
25
- """Process a batch of images in parallel."""
26
- loop = asyncio.get_event_loop()
27
-
28
- tasks = []
29
- for path in image_paths:
30
- task = loop.run_in_executor(self.executor, self._process_image, path)
31
- tasks.append(task)
32
-
33
- return await asyncio.gather(*tasks)
34
-
35
- @staticmethod
36
- def _process_image(path: Path) -> np.ndarray:
37
- """Process a single image."""
38
- img = Image.open(path)
39
-
40
- # Resize to standard size
41
- img = img.resize((224, 224), Image.Resampling.LANCZOS)
42
-
43
- # Convert to RGB if needed
44
- if img.mode != "RGB":
45
- img = img.convert("RGB")
46
-
47
- # Convert to numpy array
48
- arr = np.array(img, dtype=np.float32)
49
-
50
- # Normalize
51
- arr = arr / 255.0
52
-
53
- return arr
54
-
55
- @staticmethod
56
- def process_image_data(img_data: Union[str, bytes, Image.Image]) -> Optional[bytes]:
57
- """
58
- Process various types of image data into bytes.
59
-
60
- Args:
61
- img_data: Can be a URL string, bytes, or PIL Image
62
-
63
- Returns:
64
- Image data as bytes, or None if processing failed
65
- """
66
- try:
67
- if isinstance(img_data, str):
68
- # It's a URL - download the image
69
- try:
70
- # Download with timeout
71
- response = requests.get(
72
- img_data,
73
- timeout=30,
74
- headers={"User-Agent": "Mozilla/5.0 (captionflow-dataset-loader)"},
75
- )
76
- response.raise_for_status()
77
- image_data = response.content
78
-
79
- # Verify it's an image by trying to open it
80
- img = Image.open(BytesIO(image_data))
81
- img.verify() # Verify it's a valid image
82
-
83
- return image_data
84
-
85
- except Exception as e:
86
- logger.error(f"Failed to download image from {img_data}: {e}")
87
- return None
88
-
89
- elif hasattr(img_data, "__class__") and "Image" in str(img_data.__class__):
90
- # It's a PIL Image object
91
- import io
92
-
93
- # Save as PNG bytes
94
- img_bytes = io.BytesIO()
95
- # Convert to RGB
96
- img_data = img_data.convert("RGB")
97
- img_data.save(img_bytes, format="PNG")
98
- return img_bytes.getvalue()
99
-
100
- elif isinstance(img_data, bytes):
101
- # Already bytes - validate it's an image
102
- try:
103
- img = Image.open(BytesIO(img_data))
104
- img.verify()
105
- return img_data
106
- except Exception as e:
107
- logger.error(f"Invalid image data: {e}")
108
- return None
109
-
110
- else:
111
- logger.warning(f"Unknown image data type: {type(img_data)}")
112
- return None
113
-
114
- except Exception as e:
115
- logger.error(f"Error processing image data: {e}", exc_info=True)
116
- return None
117
-
118
25
  @staticmethod
119
- def prepare_for_inference(image: Image.Image) -> Image.Image:
26
+ def prepare_for_inference(item: ProcessingItem) -> Image.Image:
120
27
  """
121
- Prepare image for inference, handling transparency and mostly black/white images.
28
+ Prepare image for inference.
122
29
 
123
30
  Args:
124
31
  image: PIL Image to prepare
@@ -126,42 +33,22 @@ class ImageProcessor:
126
33
  Returns:
127
34
  Prepared PIL Image
128
35
  """
129
- # Convert to RGBA to handle transparency
130
- img_rgba = image.convert("RGBA")
131
- rgb_img = img_rgba.convert("RGB")
132
- np_img = np.array(rgb_img)
133
-
134
- # Calculate percentage of pixels that are (0,0,0) or (255,255,255)
135
- total_pixels = np_img.shape[0] * np_img.shape[1]
136
- black_pixels = np.all(np_img == [0, 0, 0], axis=-1).sum()
137
- white_pixels = np.all(np_img == [255, 255, 255], axis=-1).sum()
138
- black_pct = black_pixels / total_pixels
139
- white_pct = white_pixels / total_pixels
140
-
141
- threshold = 0.90 # 90% threshold
142
-
143
- is_mostly_black = black_pct >= threshold
144
- is_mostly_white = white_pct >= threshold
145
-
146
- if is_mostly_black or is_mostly_white:
147
- # Replace background with opposite color for better contrast
148
- bg_color = (255, 255, 255) if is_mostly_black else (0, 0, 0)
149
- background = Image.new("RGB", img_rgba.size, bg_color)
150
- # Use alpha channel as mask if present
151
- if img_rgba.mode == "RGBA":
152
- background.paste(img_rgba.convert("RGB"), mask=img_rgba.split()[3])
153
- else:
154
- background.paste(img_rgba.convert("RGB"))
155
-
156
- color_type = "black" if is_mostly_black else "white"
157
- pct = black_pct if is_mostly_black else white_pct
158
- logger.debug(
159
- f"Image is {pct*100:.1f}% {color_type}; background replaced with {bg_color}"
160
- )
161
-
162
- return background
163
- else:
164
- return rgb_img
36
+ # We used to do a lot more hand-holding here with transparency, but oh well.
37
+
38
+ if item.image is not None:
39
+ image = item.image
40
+ item.metadata["image_width"], item.metadata["image_height"] = image.size
41
+ item.metadata["image_format"] = image.format or "unknown"
42
+ item.image = None
43
+ return image
44
+
45
+ item.image = None
46
+ image = Image.open(BytesIO(item.image_data))
47
+ item.image_data = b""
48
+ item.metadata["image_format"] = image.format or "unknown"
49
+ item.metadata["image_width"], item.metadata["image_height"] = image.size
50
+
51
+ return image
165
52
 
166
53
  def shutdown(self):
167
54
  """Shutdown the executor."""