PyPI - imagebaker - Versions diffs - 0.0.49__tar.gz → 0.0.50__tar.gz - Mend

imagebaker 0.0.49tar.gz → 0.0.50tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (85) hide show

{imagebaker-0.0.49 → imagebaker-0.0.50}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: imagebaker
-Version: 0.0.49
+Version: 0.0.50
 Summary: A package for baking images.
 Home-page: https://github.com/q-viper/Image-Baker
 Author: Ramkrishna Acharya
@@ -21,7 +21,7 @@ License-File: LICENSE
 ![code size in bytes](https://img.shields.io/github/languages/code-size/q-viper/image-baker)
 <!-- ![Tests](https://github.com/q-viper/SmokeSim/actions/workflows/test-on-push.yml/badge.svg) -->
 ![Code Formatting](https://github.com/q-viper/image-baker/actions/workflows/black-formatter.yml/badge.svg)
+[![PyPI version](https://img.shields.io/pypi/v/imagebaker.svg)](https://pypi.org/imagebaker/)
 <p align="center">
     <img src="assets/demo.gif" alt="Centered Demo" />

{imagebaker-0.0.49 → imagebaker-0.0.50}/README.md RENAMED Viewed

@@ -5,7 +5,7 @@
 ![code size in bytes](https://img.shields.io/github/languages/code-size/q-viper/image-baker)
 <!-- ![Tests](https://github.com/q-viper/SmokeSim/actions/workflows/test-on-push.yml/badge.svg) -->
 ![Code Formatting](https://github.com/q-viper/image-baker/actions/workflows/black-formatter.yml/badge.svg)
+[![PyPI version](https://img.shields.io/pypi/v/imagebaker.svg)](https://pypi.org/imagebaker/)
 <p align="center">
     <img src="assets/demo.gif" alt="Centered Demo" />

{imagebaker-0.0.49 → imagebaker-0.0.50}/examples/loaded_models.py RENAMED Viewed

@@ -19,6 +19,7 @@ from imagebaker import logger
 #     YoloSegmentationModel,
 #     YoloSegmentationModelConfig,
 # )
 # from examples.sam_model import SegmentAnythingModel, SAMModelConfig
@@ -38,14 +39,28 @@ class DetectionModel(BaseDetectionModel):
         return [get_dummy_prediction_result(self.config.model_type)]
-# detector = RTDetrDetectionModel(RTDetrModelConfig())
+return_annotated_image = True
+# detector = RTDetrDetectionModel(
+#     RTDetrModelConfig(return_annotated_image=return_annotated_image)
+# )
 # classification = ClassificationModel(
-#     DefaultModelConfig(model_type=ModelType.CLASSIFICATION)
+#     DefaultModelConfig(
+#         model_type=ModelType.CLASSIFICATION,
+#         return_annotated_image=return_annotated_image,
+#     )
+# )
+# segmentation = YoloSegmentationModel(
+#     YoloSegmentationModelConfig(return_annotated_image=return_annotated_image)
 # )
-# segmentation = YoloSegmentationModel(YoloSegmentationModelConfig())
-# prompt = SegmentAnythingModel(SAMModelConfig())
-dummy_detector = DetectionModel(DefaultModelConfig(model_type=ModelType.DETECTION))
+# prompt = SegmentAnythingModel(
+#     SAMModelConfig(return_annotated_image=return_annotated_image)
+# )
+dummy_detector = DetectionModel(
+    DefaultModelConfig(
+        model_type=ModelType.DETECTION, return_annotated_image=return_annotated_image
+    )
+)
 LOADED_MODELS = {

{imagebaker-0.0.49 → imagebaker-0.0.50}/examples/rtdetr_v2.py RENAMED Viewed

@@ -7,6 +7,8 @@ from transformers import RTDetrV2ForObjectDetection, RTDetrImageProcessor
 from loguru import logger
 import time
+from imagebaker.utils.vis import annotate_detection
+from imagebaker.utils import generate_color_map
 # Import your base classes
 from imagebaker.models.base_model import (
@@ -15,6 +17,7 @@ from imagebaker.models.base_model import (
     ModelType,
     PredictionResult,
 )
+from imagebaker.utils import generate_color_map
 class RTDetrModelConfig(DefaultModelConfig):
@@ -61,39 +64,15 @@ class RTDetrDetectionModel(BaseDetectionModel):
         # Generate color map for annotations if not provided
         if not self.config.color_map:
-            self.generate_color_map()
+            color_map = generate_color_map(len(self.config.class_names))
+            self.config.color_map = {
+                class_name: color_map[i]
+                for i, class_name in enumerate(self.config.class_names)
+            }
         logger.info(f"Loaded model with {len(self.config.class_names)} classes")
         logger.info(f"Model running on {self.config.device}")
-    def generate_color_map(self):
-        """Generate a color map for the classes"""
-        num_classes = len(self.config.class_names)
-        np.random.seed(42)  # For reproducible colors
-        colors = {}
-        for i, class_name in enumerate(self.config.class_names):
-            # Generate distinct colors with good visibility
-            # Using HSV color space for better distribution
-            hue = i / num_classes
-            saturation = 0.8 + np.random.random() * 0.2
-            value = 0.8 + np.random.random() * 0.2
-            # Convert HSV to BGR (OpenCV uses BGR)
-            hsv_color = np.array(
-                [[[hue * 180, saturation * 255, value * 255]]], dtype=np.uint8
-            )
-            bgr_color = cv2.cvtColor(hsv_color, cv2.COLOR_HSV2BGR)[0][0]
-            # Store as (B, G, R) tuple
-            colors[class_name] = (
-                int(bgr_color[0]),
-                int(bgr_color[1]),
-                int(bgr_color[2]),
-            )
-        self.config.color_map = colors
     def preprocess(self, image: np.ndarray):
         """Convert numpy array to PIL Image for the RTDetr processor"""
         self._original_image = (
@@ -136,77 +115,6 @@ class RTDetrDetectionModel(BaseDetectionModel):
         return results[0]  # Return the first (and only) result
-    def annotate_image(
-        self, image: np.ndarray, results: List[PredictionResult]
-    ) -> np.ndarray:
-        """
-        Draw bounding boxes and labels on the image
-        Args:
-            image: The original image as a numpy array
-            results: List of PredictionResult objects
-        Returns:
-            Annotated image as a numpy array
-        """
-        annotated_image = image.copy()
-        for result in results:
-            # Extract data from result
-            box = result.rectangle  # [x1, y1, x2, y2]
-            score = result.score
-            class_name = result.class_name
-            if not box:
-                continue
-            # Get color for this class
-            color = self.config.color_map.get(
-                class_name, (0, 255, 0)
-            )  # Default to green if not found
-            # Draw bounding box
-            cv2.rectangle(
-                annotated_image,
-                (box[0], box[1]),
-                (box[2], box[3]),
-                color,
-                self.config.box_thickness,
-            )
-            # Prepare label text with class name and score
-            label_text = f"{class_name}: {score:.2f}"
-            # Calculate text size to create background rectangle
-            (text_width, text_height), baseline = cv2.getTextSize(
-                label_text,
-                self.config.font_face,
-                self.config.text_scale,
-                self.config.text_thickness,
-            )
-            # Draw text background
-            cv2.rectangle(
-                annotated_image,
-                (box[0], box[1] - text_height - 5),
-                (box[0] + text_width, box[1]),
-                color,
-                -1,  # Fill the rectangle
-            )
-            # Draw text
-            cv2.putText(
-                annotated_image,
-                label_text,
-                (box[0], box[1] - 5),
-                self.config.font_face,
-                self.config.text_scale,
-                (255, 255, 255),  # White text
-                self.config.text_thickness,
-            )
-        return annotated_image
     def postprocess(self, output) -> List[PredictionResult]:
         """Convert model output to PredictionResult objects"""
         results = []
@@ -236,19 +144,17 @@ class RTDetrDetectionModel(BaseDetectionModel):
                 rectangle=[x, y, w, h],
                 annotation_time=f"{annotation_time:.6f}",
             )
+            if self.config.return_annotated_image:
+                result.annotated_image = annotate_detection(
+                    self._original_image,
+                    [result],
+                    box_thickness=self.config.box_thickness,
+                    text_thickness=self.config.text_thickness,
+                    text_scale=self.config.text_scale,
+                    font_face=self.config.font_face,
+                    color_map=self.config.color_map,
+                )
             results.append(result)
-        # If needed, add annotated image
-        if (
-            self.config.return_annotated_image
-            and len(results) > 0
-            and hasattr(self, "_original_image")
-        ):
-            annotated_image = self.annotate_image(self._original_image, results)
-            # Update all results with the same annotated image
-            for result in results:
-                result.annotated_image = annotated_image
         return results

{imagebaker-0.0.49 → imagebaker-0.0.50}/examples/sam_model.py RENAMED Viewed

@@ -16,6 +16,7 @@ from imagebaker.models.base_model import (
     ModelType,
     PredictionResult,
 )
+from imagebaker.utils import generate_color_map, mask_to_polygons, annotate_segmentation
 class SAMModelConfig(DefaultModelConfig):
@@ -30,7 +31,7 @@ class SAMModelConfig(DefaultModelConfig):
     )
     confidence_threshold: float = 0.5
     device: str = "cuda" if torch.cuda.is_available() else "cpu"
-    return_annotated_image: bool = True
+    return_annotated_image: bool = False
     # Segmentation specific settings
     points_per_side: int = 32  # Grid size for automatic point generation
@@ -78,33 +79,10 @@ class SegmentAnythingModel(BasePromptModel):
         # Generate color map for annotations if not provided
         if not self.config.color_map:
-            self.generate_color_map()
+            self.config.color_map = generate_color_map()
         logger.info(f"Model running on {self.config.device}")
-    def generate_color_map(self, num_colors: int = 20):
-        """Generate a color map for the segmentation masks"""
-        np.random.seed(42)  # For reproducible colors
-        colors = {}
-        for i in range(num_colors):
-            # Generate distinct colors with good visibility
-            # Using HSV color space for better distribution
-            hue = i / num_colors
-            saturation = 0.8 + np.random.random() * 0.2
-            value = 0.8 + np.random.random() * 0.2
-            # Convert HSV to BGR (OpenCV uses BGR)
-            hsv_color = np.array(
-                [[[hue * 180, saturation * 255, value * 255]]], dtype=np.uint8
-            )
-            bgr_color = cv2.cvtColor(hsv_color, cv2.COLOR_HSV2BGR)[0][0]
-            # Store as (B, G, R) tuple
-            colors[i] = (int(bgr_color[0]), int(bgr_color[1]), int(bgr_color[2]))
-        self.config.color_map = colors
     def preprocess(self, image: np.ndarray):
         """Preprocess the image for SAM model"""
         self._original_image = (
@@ -168,142 +146,6 @@ class SegmentAnythingModel(BasePromptModel):
             "scores": scores,
         }
-    def mask_to_polygons(self, mask: np.ndarray) -> List[List[List[int]]]:
-        """
-        Convert a binary mask to a list of polygons.
-        Each polygon is a list of [x, y] coordinates.
-        Args:
-            mask: Binary mask as numpy array
-        Returns:
-            List of polygons, where each polygon is a list of [x, y] coordinates
-        """
-        # Find contours in the mask
-        contours = measure.find_contours(mask, 0.5)
-        # Convert to polygon format and simplify
-        polygons = []
-        for contour in contours:
-            # Skimage find_contours returns points in (row, col) format, convert to (x, y)
-            contour = np.fliplr(contour)
-            # Convert to integer coordinates
-            contour = contour.astype(np.int32)
-            # Simplify polygon with Douglas-Peucker algorithm
-            epsilon = self.config.polygon_epsilon
-            approx = cv2.approxPolyDP(contour.reshape(-1, 1, 2), epsilon, True)
-            approx = approx.reshape(-1, 2)
-            # Calculate polygon area
-            area = cv2.contourArea(approx.reshape(-1, 1, 2))
-            # Filter out small polygons
-            if area >= self.config.min_polygon_area:
-                # Convert to list format
-                poly = approx.tolist()
-                polygons.append(poly)
-        # Limit number of polygons
-        polygons = sorted(
-            polygons,
-            key=lambda p: cv2.contourArea(np.array(p).reshape(-1, 1, 2)),
-            reverse=True,
-        )
-        return polygons[: self.config.max_polygons_per_mask]
-    def annotate_image(
-        self, image: np.ndarray, results: List[PredictionResult]
-    ) -> np.ndarray:
-        """
-        Draw segmentation masks and contours on the image
-        Args:
-            image: The original image as a numpy array
-            results: List of PredictionResult objects
-        Returns:
-            Annotated image as a numpy array
-        """
-        annotated_image = image.copy()
-        mask_overlay = np.zeros_like(image)
-        for i, result in enumerate(results):
-            if (result.polygon is not None) or not result.mask:
-                continue
-            # Get color for this mask
-            color_idx = i % len(self.config.color_map)
-            color = self.config.color_map[color_idx]
-            # Create mask from polygons
-            mask = np.zeros((image.shape[0], image.shape[1]), dtype=np.uint8)
-            for poly in result.polygon:
-                # Convert polygon to numpy array
-                poly_np = np.array(poly, dtype=np.int32).reshape((-1, 1, 2))
-                # Fill polygon
-                cv2.fillPoly(mask, [poly_np], 1)
-            # Apply color to mask overlay
-            color_mask = np.zeros_like(image)
-            color_mask[mask == 1] = color
-            mask_overlay = cv2.addWeighted(mask_overlay, 1.0, color_mask, 1.0, 0)
-            # Draw contours
-            for poly in result.polygon:
-                poly_np = np.array(poly, dtype=np.int32).reshape((-1, 1, 2))
-                cv2.polylines(
-                    annotated_image,
-                    [poly_np],
-                    True,
-                    color,
-                    self.config.contour_thickness,
-                )
-            # Add label text
-            label_position = (
-                result.polygon[0][0]
-                if result.polygon and result.polygon[0]
-                else [10, 10]
-            )
-            label_text = f"{result.class_id}: {result.score:.2f}"
-            # Draw text background
-            (text_width, text_height), baseline = cv2.getTextSize(
-                label_text,
-                self.config.font_face,
-                self.config.text_scale,
-                self.config.text_thickness,
-            )
-            # Draw text background
-            cv2.rectangle(
-                annotated_image,
-                (label_position[0], label_position[1] - text_height - 5),
-                (label_position[0] + text_width, label_position[1]),
-                color,
-                -1,  # Fill the rectangle
-            )
-            # Draw text
-            cv2.putText(
-                annotated_image,
-                label_text,
-                (label_position[0], label_position[1] - 5),
-                self.config.font_face,
-                self.config.text_scale,
-                (255, 255, 255),  # White text
-                self.config.text_thickness,
-            )
-        # Blend mask overlay with original image
-        annotated_image = cv2.addWeighted(
-            annotated_image, 1.0, mask_overlay, self.config.mask_opacity, 0
-        )
-        return annotated_image
     def postprocess(self, outputs) -> List[PredictionResult]:
         """Convert model outputs to PredictionResult objects"""
         results = []
@@ -328,12 +170,21 @@ class SegmentAnythingModel(BasePromptModel):
             # Convert mask to polygons
             mask_np = mask.cpu().numpy()
-            polygons = self.mask_to_polygons(mask_np)
+            polygons = mask_to_polygons(mask_np)
             # polygons = np.array(polygons)
             if not polygons:
                 continue
             for p, polygon in enumerate(polygons):
+                annotated_image = (
+                    annotate_segmentation(
+                        self._original_image,
+                        results,
+                        self.config.color_map,
+                    )
+                    if self.config.return_annotated_image
+                    else None
+                )
                 # Create result
                 results.append(
@@ -344,17 +195,8 @@ class SegmentAnythingModel(BasePromptModel):
                         mask=np.argwhere(mask_np > 0.5),
                         polygon=np.array(polygon).astype(np.int32),
                         annotation_time=f"{annotation_time:.6f}",
+                        annotated_image=annotated_image,
                     )
                 )
-        # Add annotated image if requested
-        if (
-            self.config.return_annotated_image
-            and results
-            and hasattr(self, "_original_image")
-        ):
-            annotated = self.annotate_image(self._original_image, results)
-            for r in results:
-                r.annotated_image = annotated
         return results

imagebaker-0.0.50/examples/segmentation.py ADDED Viewed

@@ -0,0 +1,152 @@
+# based on https://docs.ultralytics.com/tasks/segment/#how-do-i-load-and-validate-a-pretrained-yolo-segmentation-model
+from typing import List, Tuple
+import numpy as np
+import cv2
+from loguru import logger
+import time
+from ultralytics import YOLO
+import torch
+from imagebaker.models.base_model import (
+    BaseSegmentationModel,
+    DefaultModelConfig,
+    ModelType,
+    PredictionResult,
+)
+from imagebaker.utils import mask_to_polygons, annotate_segmentation, generate_color_map
+class YoloSegmentationModelConfig(DefaultModelConfig):
+    model_type: ModelType = ModelType.SEGMENTATION
+    model_name: str = "YOLOv8-Segmentation"
+    model_description: str = "YOLOv8 model for instance segmentation"
+    model_version: str = "yolov8n-seg"
+    model_author: str = "Ultralytics"
+    model_license: str = "AGPL-3.0"
+    pretrained_model_name: str = "yolo11n-seg.pt"
+    confidence_threshold: float = 0.5
+    device: str = "cuda" if torch.cuda.is_available() else "cpu"
+    return_annotated_image: bool = False
+    # Segmentation specific settings
+    polygon_epsilon: float = 1.0  # Douglas-Peucker algorithm epsilon
+    min_polygon_area: int = 100  # Minimum area for a polygon to be considered
+    max_polygons_per_mask: int = 5  # Maximum number of polygons per mask
+    # Annotation parameters
+    mask_opacity: float = 0.5  # Opacity of mask overlay
+    contour_thickness: int = 2  # Thickness of contour lines
+    text_thickness: int = 1  # Thickness of text
+    text_scale: float = 0.5  # Text scale
+    font_face: int = cv2.FONT_HERSHEY_SIMPLEX
+    color_map: dict = {}  # Will be auto-generated
+class YoloSegmentationModel(BaseSegmentationModel):
+    def __init__(
+        self, config: YoloSegmentationModelConfig = YoloSegmentationModelConfig()
+    ):
+        super().__init__(config)
+    def setup(self):
+        """Initialize the YOLO model"""
+        logger.info(f"Loading YOLO model from {self.config.pretrained_model_name}")
+        # Load the YOLO model
+        self.model = YOLO(self.config.pretrained_model_name)
+        # Generate color map for annotations if not provided
+        if not self.config.color_map:
+            self.config.color_map = generate_color_map()
+        logger.info(f"Model running on {self.config.device}")
+    def preprocess(self, image: np.ndarray):
+        """Preprocess the image for the model"""
+        self._original_image = (
+            image.copy() if isinstance(image, np.ndarray) else np.array(image)
+        )
+        return image
+    def predict_mask(self, image):
+        """Run segmentation on the input image using YOLO"""
+        # Run inference
+        results = self.model(image)
+        # Extract masks, polygons, and scores
+        masks = []
+        polygons = []
+        scores = []
+        class_ids = []
+        for result in results:
+            if result.masks is not None:
+                for mask in result.masks.data:
+                    masks.append(mask.cpu().numpy())
+                for polygon in result.masks.xy:
+                    polygons.append(polygon.tolist())
+                scores.extend(result.boxes.conf.cpu().numpy().tolist())
+                class_ids.extend(result.boxes.cls.cpu().numpy().tolist())
+        return {
+            "masks": masks,
+            "polygons": polygons,
+            "scores": scores,
+            "class_ids": class_ids,
+        }
+    def postprocess(self, outputs) -> List[PredictionResult]:
+        """Convert model outputs to PredictionResult objects with polygons"""
+        results: list[PredictionResult] = []
+        masks = outputs["masks"]
+        polygons = outputs["polygons"]
+        scores = outputs["scores"]
+        class_ids = outputs["class_ids"]
+        annotation_time = time.time()
+        for i, (mask, polygon, score) in enumerate(zip(masks, polygons, scores)):
+            # Skip masks with low scores
+            if score < self.config.confidence_threshold:
+                continue
+            # Convert mask to polygons (if not already provided)
+            if not polygon:
+                polygon = mask_to_polygons(mask)
+            if not polygon:  # Skip if no valid polygons found
+                continue
+            # Create a flattened mask for the result
+            mask_coords = np.argwhere(mask > 0.5)
+            mask_coords = mask_coords.tolist() if len(mask_coords) > 0 else None
+            polygon = np.array(polygon).astype(np.int32)
+            # Create a PredictionResult
+            result = PredictionResult(
+                class_name=self.model.names[class_ids[i]],
+                class_id=i,
+                score=float(score),
+                polygon=polygon,
+                annotation_time=f"{annotation_time:.6f}",
+            )
+            results.append(result)
+        # If needed, add annotated image
+        if (
+            self.config.return_annotated_image
+            and len(results) > 0
+            and hasattr(self, "_original_image")
+        ):
+            annotated_image = annotate_segmentation(
+                self._original_image, results, self.config.color_map
+            )
+            # Update all results with the same annotated image
+            for result in results:
+                result.annotated_image = annotated_image
+        return results

imagebaker-0.0.50/imagebaker/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+from loguru import logger  # noqa
+from importlib.metadata import version, PackageNotFoundError
+logger.info("imagebaker package loaded with loguru logger.")
+try:
+    __version__ = version("imagebaker")
+except PackageNotFoundError:
+    __version__ = "0.0.0"

{imagebaker-0.0.49 → imagebaker-0.0.50}/imagebaker/core/configs/configs.py RENAMED Viewed

@@ -119,6 +119,7 @@ class CanvasConfig(BaseConfig):
     write_labels: bool = True
     write_masks: bool = True
     fps: int = 5
+    max_edge_width: int = 100
     @property
     def export_folder(self):

{imagebaker-0.0.49 → imagebaker-0.0.50}/imagebaker/core/defs/defs.py RENAMED Viewed

@@ -76,6 +76,8 @@ class LayerState:
     is_annotable: bool = True
     status: str = "Ready"
     drawing_states: list[DrawingState] = field(default_factory=list)
+    edge_opacity: int = 100
+    edge_width: int = 10
     def copy(self):
         return LayerState(
@@ -105,6 +107,8 @@ class LayerState:
                 )
                 for d in self.drawing_states
             ],
+            edge_opacity=self.edge_opacity,
+            edge_width=self.edge_width,
         )

imagebaker 0.0.49__tar.gz → 0.0.50__tar.gz

imagebaker 0.0.49tar.gz → 0.0.50tar.gz