PyPI - caption-flow - Versions diffs - 0.2.4__py3-none-any.whl → 0.3.2__py3-none-any.whl - Mend

caption-flow 0.2.4py3-none-any.whl → 0.3.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

caption_flow/__init__.py +1 -1
caption_flow/orchestrator.py +9 -9
caption_flow/processors/base.py +3 -0
caption_flow/processors/huggingface.py +637 -464
caption_flow/processors/local_filesystem.py +2 -0
caption_flow/processors/webdataset.py +438 -538
caption_flow/storage/manager.py +328 -305
caption_flow/utils/__init__.py +0 -2
caption_flow/utils/chunk_tracker.py +197 -164
caption_flow/utils/image_processor.py +19 -132
caption_flow/workers/caption.py +191 -138
{caption_flow-0.2.4.dist-info → caption_flow-0.3.2.dist-info}/METADATA +2 -1
caption_flow-0.3.2.dist-info/RECORD +33 -0
caption_flow/utils/dataset_loader.py +0 -222
caption_flow/utils/dataset_metadata_cache.py +0 -67
caption_flow/utils/job_queue.py +0 -41
caption_flow/utils/shard_processor.py +0 -119
caption_flow/utils/shard_tracker.py +0 -83
caption_flow-0.2.4.dist-info/RECORD +0 -38
{caption_flow-0.2.4.dist-info → caption_flow-0.3.2.dist-info}/WHEEL +0 -0
{caption_flow-0.2.4.dist-info → caption_flow-0.3.2.dist-info}/entry_points.txt +0 -0
{caption_flow-0.2.4.dist-info → caption_flow-0.3.2.dist-info}/licenses/LICENSE +0 -0
{caption_flow-0.2.4.dist-info → caption_flow-0.3.2.dist-info}/top_level.txt +0 -0

caption_flow/utils/image_processor.py CHANGED Viewed

@@ -10,6 +10,7 @@ from typing import List, Any, Optional, Tuple, Union
 import numpy as np
 import requests
 from PIL import Image
+from ..models import ProcessingItem
 logger = logging.getLogger(__name__)
@@ -21,104 +22,10 @@ class ImageProcessor:
     def __init__(self, num_workers: int = 4):
         self.executor = ProcessPoolExecutor(max_workers=num_workers)
-    async def process_batch(self, image_paths: List[Path]) -> List[np.ndarray]:
-        """Process a batch of images in parallel."""
-        loop = asyncio.get_event_loop()
-        tasks = []
-        for path in image_paths:
-            task = loop.run_in_executor(self.executor, self._process_image, path)
-            tasks.append(task)
-        return await asyncio.gather(*tasks)
-    @staticmethod
-    def _process_image(path: Path) -> np.ndarray:
-        """Process a single image."""
-        img = Image.open(path)
-        # Resize to standard size
-        img = img.resize((224, 224), Image.Resampling.LANCZOS)
-        # Convert to RGB if needed
-        if img.mode != "RGB":
-            img = img.convert("RGB")
-        # Convert to numpy array
-        arr = np.array(img, dtype=np.float32)
-        # Normalize
-        arr = arr / 255.0
-        return arr
-    @staticmethod
-    def process_image_data(img_data: Union[str, bytes, Image.Image]) -> Optional[bytes]:
-        """
-        Process various types of image data into bytes.
-        Args:
-            img_data: Can be a URL string, bytes, or PIL Image
-        Returns:
-            Image data as bytes, or None if processing failed
-        """
-        try:
-            if isinstance(img_data, str):
-                # It's a URL - download the image
-                try:
-                    # Download with timeout
-                    response = requests.get(
-                        img_data,
-                        timeout=30,
-                        headers={"User-Agent": "Mozilla/5.0 (captionflow-dataset-loader)"},
-                    )
-                    response.raise_for_status()
-                    image_data = response.content
-                    # Verify it's an image by trying to open it
-                    img = Image.open(BytesIO(image_data))
-                    img.verify()  # Verify it's a valid image
-                    return image_data
-                except Exception as e:
-                    logger.error(f"Failed to download image from {img_data}: {e}")
-                    return None
-            elif hasattr(img_data, "__class__") and "Image" in str(img_data.__class__):
-                # It's a PIL Image object
-                import io
-                # Save as PNG bytes
-                img_bytes = io.BytesIO()
-                # Convert to RGB
-                img_data = img_data.convert("RGB")
-                img_data.save(img_bytes, format="PNG")
-                return img_bytes.getvalue()
-            elif isinstance(img_data, bytes):
-                # Already bytes - validate it's an image
-                try:
-                    img = Image.open(BytesIO(img_data))
-                    img.verify()
-                    return img_data
-                except Exception as e:
-                    logger.error(f"Invalid image data: {e}")
-                    return None
-            else:
-                logger.warning(f"Unknown image data type: {type(img_data)}")
-                return None
-        except Exception as e:
-            logger.error(f"Error processing image data: {e}", exc_info=True)
-            return None
     @staticmethod
-    def prepare_for_inference(image: Image.Image) -> Image.Image:
+    def prepare_for_inference(item: ProcessingItem) -> Image.Image:
         """
-        Prepare image for inference, handling transparency and mostly black/white images.
+        Prepare image for inference.
         Args:
             image: PIL Image to prepare
@@ -126,42 +33,22 @@ class ImageProcessor:
         Returns:
             Prepared PIL Image
         """
-        # Convert to RGBA to handle transparency
-        img_rgba = image.convert("RGBA")
-        rgb_img = img_rgba.convert("RGB")
-        np_img = np.array(rgb_img)
-        # Calculate percentage of pixels that are (0,0,0) or (255,255,255)
-        total_pixels = np_img.shape[0] * np_img.shape[1]
-        black_pixels = np.all(np_img == [0, 0, 0], axis=-1).sum()
-        white_pixels = np.all(np_img == [255, 255, 255], axis=-1).sum()
-        black_pct = black_pixels / total_pixels
-        white_pct = white_pixels / total_pixels
-        threshold = 0.90  # 90% threshold
-        is_mostly_black = black_pct >= threshold
-        is_mostly_white = white_pct >= threshold
-        if is_mostly_black or is_mostly_white:
-            # Replace background with opposite color for better contrast
-            bg_color = (255, 255, 255) if is_mostly_black else (0, 0, 0)
-            background = Image.new("RGB", img_rgba.size, bg_color)
-            # Use alpha channel as mask if present
-            if img_rgba.mode == "RGBA":
-                background.paste(img_rgba.convert("RGB"), mask=img_rgba.split()[3])
-            else:
-                background.paste(img_rgba.convert("RGB"))
-            color_type = "black" if is_mostly_black else "white"
-            pct = black_pct if is_mostly_black else white_pct
-            logger.debug(
-                f"Image is {pct*100:.1f}% {color_type}; background replaced with {bg_color}"
-            )
-            return background
-        else:
-            return rgb_img
+        # We used to do a lot more hand-holding here with transparency, but oh well.
+        if item.image is not None:
+            image = item.image
+            item.metadata["image_width"], item.metadata["image_height"] = image.size
+            item.metadata["image_format"] = image.format or "unknown"
+            item.image = None
+            return image
+        item.image = None
+        image = Image.open(BytesIO(item.image_data))
+        item.image_data = b""
+        item.metadata["image_format"] = image.format or "unknown"
+        item.metadata["image_width"], item.metadata["image_height"] = image.size
+        return image
     def shutdown(self):
         """Shutdown the executor."""

caption-flow 0.2.4__py3-none-any.whl → 0.3.2__py3-none-any.whl

caption-flow 0.2.4py3-none-any.whl → 0.3.2py3-none-any.whl