PyPI - matrice-analytics - Versions diffs - 0.1.60__py3-none-any.whl - Mend

matrice-analytics 0.1.60__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (196) hide show

matrice_analytics/post_processing/utils/format_utils.py ADDED Viewed

@@ -0,0 +1,293 @@
+"""
+Format conversion utilities for post-processing operations.
+"""
+from typing import Any, Dict, List, Optional
+from ..core.base import ResultFormat
+def match_results_structure(results):
+    """
+    Match the results structure to the expected structure based on actual output formats.
+    Based on eg_output.json:
+    - Classification: {"category": str, "confidence": float}
+    - Detection: [{"bounding_box": {...}, "category": str, "confidence": float}, ...]
+    - Instance Segmentation: Same as detection but with "masks" field
+    - Object Tracking: {"frame_id": [{"track_id": int, "category": str, "confidence": float, "bounding_box": {...}}, ...]}
+    - Activity Recognition: {"frame_id": [{"category": str, "confidence": float, "bounding_box": {...}}, ...]} (no track_id)
+    Args:
+        results: Raw model output to analyze
+    Returns:
+        ResultFormat: Detected format type
+    """
+    if isinstance(results, list):
+        # Array format - detection, instance segmentation, or face recognition
+        if len(results) > 0 and isinstance(results[0], dict):
+            if results[0].get("masks"):
+                return ResultFormat.INSTANCE_SEGMENTATION
+            elif results[0].get("embedding") or results[0].get("landmarks"):
+                return ResultFormat.FACE_RECOGNITION
+            elif "bounding_box" in results[0] and "category" in results[0] and "confidence" in results[0]:
+                return ResultFormat.DETECTION
+        return ResultFormat.DETECTION  # Default for list format
+    elif isinstance(results, dict):
+        # Check if it's a simple classification result
+        if "category" in results and "confidence" in results and len(results) == 2:
+            return ResultFormat.CLASSIFICATION
+        # Check if it's frame-based (tracking or activity recognition)
+        # Keys should be frame numbers or frame identifiers
+        frame_keys = list(results.keys())
+        if frame_keys and all(isinstance(k, (str, int)) for k in frame_keys):
+            # Check the first frame's content to determine type
+            first_frame_data = list(results.values())[0]
+            if isinstance(first_frame_data, list) and len(first_frame_data) > 0:
+                first_detection = first_frame_data[0]
+                if isinstance(first_detection, dict):
+                    # Check for face recognition format first (has embedding or landmarks)
+                    if first_detection.get("embedding") or first_detection.get("landmarks"):
+                        return ResultFormat.FACE_RECOGNITION
+                    # Check if it has track_id (object tracking) or not (activity recognition)
+                    elif "track_id" in first_detection:
+                        return ResultFormat.OBJECT_TRACKING
+                    elif "category" in first_detection and "confidence" in first_detection:
+                        return ResultFormat.ACTIVITY_RECOGNITION
+        # If we can't determine the type, check for typical classification structure
+        if "category" in results and "confidence" in results:
+            return ResultFormat.CLASSIFICATION
+    return ResultFormat.UNKNOWN
+def convert_to_coco_format(results: Any) -> List[Dict]:
+    """
+    Convert results to COCO format.
+    Args:
+        results: Input results in any supported format
+    Returns:
+        List[Dict]: Results in COCO format
+    """
+    if isinstance(results, list):
+        # Already in detection format, convert to COCO
+        coco_results = []
+        for i, detection in enumerate(results):
+            bbox = detection.get("bounding_box", detection.get("bbox", {}))
+            # Convert to COCO bbox format [x, y, width, height]
+            if "xmin" in bbox:
+                coco_bbox = [
+                    bbox["xmin"],
+                    bbox["ymin"],
+                    bbox["xmax"] - bbox["xmin"],
+                    bbox["ymax"] - bbox["ymin"]
+                ]
+            elif "x1" in bbox:
+                coco_bbox = [
+                    bbox["x1"],
+                    bbox["y1"],
+                    bbox["x2"] - bbox["x1"],
+                    bbox["y2"] - bbox["y1"]
+                ]
+            else:
+                # Assume generic format
+                values = list(bbox.values())
+                coco_bbox = [values[0], values[1], values[2] - values[0], values[3] - values[1]]
+            coco_result = {
+                "id": i,
+                "category_id": detection.get("category_id", 0),
+                "category": detection.get("category", "unknown"),
+                "bbox": coco_bbox,
+                "score": detection.get("confidence", 0.0),
+                "area": coco_bbox[2] * coco_bbox[3]
+            }
+            if "masks" in detection:
+                coco_result["segmentation"] = detection["masks"]
+            # Add face recognition specific fields if present
+            if "embedding" in detection:
+                coco_result["embedding"] = detection["embedding"]
+            if "landmarks" in detection:
+                coco_result["landmarks"] = detection["landmarks"]
+            coco_results.append(coco_result)
+        return coco_results
+    elif isinstance(results, dict):
+        # Handle frame-based results
+        coco_results = []
+        result_id = 0
+        for frame_id, detections in results.items():
+            if isinstance(detections, list):
+                for detection in detections:
+                    bbox = detection.get("bounding_box", detection.get("bbox", {}))
+                    # Convert to COCO bbox format
+                    if "xmin" in bbox:
+                        coco_bbox = [
+                            bbox["xmin"],
+                            bbox["ymin"],
+                            bbox["xmax"] - bbox["xmin"],
+                            bbox["ymax"] - bbox["ymin"]
+                        ]
+                    else:
+                        values = list(bbox.values())
+                        coco_bbox = [values[0], values[1], values[2] - values[0], values[3] - values[1]]
+                    coco_result = {
+                        "id": result_id,
+                        "frame_id": frame_id,
+                        "category_id": detection.get("category_id", 0),
+                        "category": detection.get("category", "unknown"),
+                        "bbox": coco_bbox,
+                        "score": detection.get("confidence", 0.0),
+                        "area": coco_bbox[2] * coco_bbox[3]
+                    }
+                    if "track_id" in detection:
+                        coco_result["track_id"] = detection["track_id"]
+                    # Add face recognition specific fields if present
+                    if "embedding" in detection:
+                        coco_result["embedding"] = detection["embedding"]
+                    if "landmarks" in detection:
+                        coco_result["landmarks"] = detection["landmarks"]
+                    coco_results.append(coco_result)
+                    result_id += 1
+        return coco_results
+    return []
+def convert_to_yolo_format(results: Any) -> List[List[float]]:
+    """
+    Convert results to YOLO format (normalized coordinates).
+    Args:
+        results: Input results in any supported format
+    Returns:
+        List[List[float]]: Results in YOLO format [class_id, x_center, y_center, width, height, confidence]
+    """
+    yolo_results = []
+    if isinstance(results, list):
+        for detection in results:
+            bbox = detection.get("bounding_box", detection.get("bbox", {}))
+            # Convert to normalized center coordinates
+            if "xmin" in bbox:
+                x_center = (bbox["xmin"] + bbox["xmax"]) / 2
+                y_center = (bbox["ymin"] + bbox["ymax"]) / 2
+                width = bbox["xmax"] - bbox["xmin"]
+                height = bbox["ymax"] - bbox["ymin"]
+            else:
+                values = list(bbox.values())
+                x_center = (values[0] + values[2]) / 2
+                y_center = (values[1] + values[3]) / 2
+                width = values[2] - values[0]
+                height = values[3] - values[1]
+            yolo_result = [
+                detection.get("category_id", 0),
+                x_center,
+                y_center,
+                width,
+                height,
+                detection.get("confidence", 0.0)
+            ]
+            yolo_results.append(yolo_result)
+    return yolo_results
+def convert_to_tracking_format(detections: List[Dict], frame_id: str = "0") -> Dict:
+    """
+    Convert detection format to tracking format.
+    Args:
+        detections: List of detection dictionaries
+        frame_id: Frame identifier
+    Returns:
+        Dict: Results in tracking format
+    """
+    tracking_results = {frame_id: []}
+    for detection in detections:
+        tracking_detection = {
+            "track_id": detection.get("track_id", 0),
+            "category": detection.get("category", "unknown"),
+            "confidence": detection.get("confidence", 0.0),
+            "bounding_box": detection.get("bounding_box", detection.get("bbox", {}))
+        }
+        # Add face recognition specific fields if present
+        if "embedding" in detection:
+            tracking_detection["embedding"] = detection["embedding"]
+        if "landmarks" in detection:
+            tracking_detection["landmarks"] = detection["landmarks"]
+        tracking_results[frame_id].append(tracking_detection)
+    return tracking_results
+def convert_detection_to_tracking_format(detections: List[Dict], frame_id: str = "0") -> Dict:
+    """
+    Convert detection format to tracking format.
+    Args:
+        detections: List of detection dictionaries
+        frame_id: Frame identifier
+    Returns:
+        Dict: Results in tracking format
+    """
+    return convert_to_tracking_format(detections, frame_id)
+def convert_tracking_to_detection_format(tracking_results: Dict) -> List[Dict]:
+    """
+    Convert tracking format to detection format.
+    Args:
+        tracking_results: Tracking results dictionary
+    Returns:
+        List[Dict]: Results in detection format
+    """
+    detections = []
+    for frame_id, frame_detections in tracking_results.items():
+        if isinstance(frame_detections, list):
+            for detection in frame_detections:
+                detection_item = {
+                    "category": detection.get("category", "unknown"),
+                    "confidence": detection.get("confidence", 0.0),
+                    "bounding_box": detection.get("bounding_box", detection.get("bbox", {}))
+                }
+                if "track_id" in detection:
+                    detection_item["track_id"] = detection["track_id"]
+                # Add face recognition specific fields if present
+                if "embedding" in detection:
+                    detection_item["embedding"] = detection["embedding"]
+                if "landmarks" in detection:
+                    detection_item["landmarks"] = detection["landmarks"]
+                detections.append(detection_item)
+    return detections

matrice_analytics/post_processing/utils/geometry_utils.py ADDED Viewed

@@ -0,0 +1,300 @@
+"""
+Geometry utility functions for post-processing operations.
+"""
+import math
+from typing import List, Dict, Tuple, Union
+def point_in_polygon(point: Tuple[float, float], polygon: List[Tuple[float, float]]) -> bool:
+    """
+    Check if point is inside polygon using ray casting algorithm.
+    Args:
+        point: (x, y) coordinate tuple
+        polygon: List of (x, y) coordinate tuples defining the polygon
+    Returns:
+        bool: True if point is inside polygon
+    """
+    x, y = point
+    n = len(polygon)
+    inside = False
+    p1x, p1y = polygon[0]
+    for i in range(1, n + 1):
+        p2x, p2y = polygon[i % n]
+        if y > min(p1y, p2y):
+            if y <= max(p1y, p2y):
+                if x <= max(p1x, p2x):
+                    if p1y != p2y:
+                        xinters = (y - p1y) * (p2x - p1x) / (p2y - p1y) + p1x
+                    if p1x == p2x or x <= xinters:
+                        inside = not inside
+        p1x, p1y = p2x, p2y
+    return inside
+def get_bbox_center(bbox: Union[Dict[str, float], List[float]]) -> Tuple[float, float]:
+    """
+    Get center point of bounding box.
+    Args:
+        bbox: Bounding box dict with coordinates or list [x1, y1, x2, y2]
+    Returns:
+        Tuple[float, float]: (x, y) center coordinates
+    """
+    if isinstance(bbox, list):
+        # Handle list format [x1, y1, x2, y2]
+        if len(bbox) >= 4:
+            return ((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2)
+        return (0, 0)
+    elif isinstance(bbox, dict):
+        # Handle dict formats
+        if "xmin" in bbox and "xmax" in bbox and "ymin" in bbox and "ymax" in bbox:
+            return ((bbox["xmin"] + bbox["xmax"]) / 2, (bbox["ymin"] + bbox["ymax"]) / 2)
+        elif "x1" in bbox and "x2" in bbox and "y1" in bbox and "y2" in bbox:
+            return ((bbox["x1"] + bbox["x2"]) / 2, (bbox["y1"] + bbox["y2"]) / 2)
+        else:
+            # Handle different bbox formats
+            keys = list(bbox.keys())
+            if len(keys) >= 4:
+                values = list(bbox.values())
+                return ((values[0] + values[2]) / 2, (values[1] + values[3]) / 2)
+    return (0, 0)
+def get_bbox_bottom25_center(bbox: Union[Dict[str, float], List[float]]) -> Tuple[float, float]:
+    """
+    Get bottom 25% center point of bounding box.
+    Args:
+        bbox: Bounding box dict with coordinates or list [x1, y1, x2, y2]
+    Returns:
+        Tuple[float, float]: (x, y) coordinates at bottom 25% height from center X
+    """
+    if isinstance(bbox, list):
+        # Handle list format [x1, y1, x2, y2]
+        if len(bbox) >= 4:
+            x_center = (bbox[0] + bbox[2]) / 2
+            height = bbox[3] - bbox[1]
+            y_target = bbox[3] - 0.25 * height
+            return (x_center, y_target)
+        return (0, 0)
+    elif isinstance(bbox, dict):
+        # Handle dict formats
+        if "xmin" in bbox and "xmax" in bbox and "ymin" in bbox and "ymax" in bbox:
+            x_center = (bbox["xmin"] + bbox["xmax"]) / 2
+            height = bbox["ymax"] - bbox["ymin"]
+            y_target = bbox["ymax"] - 0.25 * height
+            return (x_center, y_target)
+        elif "x1" in bbox and "x2" in bbox and "y1" in bbox and "y2" in bbox:
+            x_center = (bbox["x1"] + bbox["x2"]) / 2
+            height = bbox["y2"] - bbox["y1"]
+            y_target = bbox["y2"] - 0.25 * height
+            return (x_center, y_target)
+        else:
+            # Handle different bbox formats
+            keys = list(bbox.keys())
+            if len(keys) >= 4:
+                values = list(bbox.values())
+                x_center = (values[0] + values[2]) / 2
+                height = values[3] - values[1]
+                y_target = values[3] - 0.25 * height
+                return (x_center, y_target)
+    return (0, 0)
+def calculate_distance(point1: Tuple[float, float], point2: Tuple[float, float]) -> float:
+    """
+    Calculate Euclidean distance between two points.
+    Args:
+        point1: First point (x, y)
+        point2: Second point (x, y)
+    Returns:
+        float: Euclidean distance
+    """
+    return math.sqrt((point1[0] - point2[0])**2 + (point1[1] - point2[1])**2)
+def calculate_bbox_overlap(bbox1: Dict[str, float], bbox2: Dict[str, float]) -> float:
+    """
+    Calculate IoU (Intersection over Union) between two bounding boxes.
+    Args:
+        bbox1: First bounding box
+        bbox2: Second bounding box
+    Returns:
+        float: IoU value between 0 and 1
+    """
+    return calculate_iou(bbox1, bbox2)
+def calculate_iou(bbox1: Dict[str, float], bbox2: Dict[str, float]) -> float:
+    """
+    Calculate IoU (Intersection over Union) between two bounding boxes.
+    Args:
+        bbox1: First bounding box
+        bbox2: Second bounding box
+    Returns:
+        float: IoU value between 0 and 1
+    """
+    # Normalize bbox format
+    def normalize_bbox_coords(bbox):
+        if "xmin" in bbox:
+            return [bbox["xmin"], bbox["ymin"], bbox["xmax"], bbox["ymax"]]
+        elif "x1" in bbox:
+            return [bbox["x1"], bbox["y1"], bbox["x2"], bbox["y2"]]
+        else:
+            values = list(bbox.values())
+            return values[:4]
+    box1 = normalize_bbox_coords(bbox1)
+    box2 = normalize_bbox_coords(bbox2)
+    # Calculate intersection
+    x1 = max(box1[0], box2[0])
+    y1 = max(box1[1], box2[1])
+    x2 = min(box1[2], box2[2])
+    y2 = min(box1[3], box2[3])
+    if x2 < x1 or y2 < y1:
+        return 0.0
+    intersection = (x2 - x1) * (y2 - y1)
+    # Calculate union
+    area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
+    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
+    union = area1 + area2 - intersection
+    return intersection / union if union > 0 else 0.0
+def get_bbox_area(bbox: Dict[str, float]) -> float:
+    """
+    Calculate area of bounding box.
+    Args:
+        bbox: Bounding box dict
+    Returns:
+        float: Area of the bounding box
+    """
+    if "xmin" in bbox and "xmax" in bbox and "ymin" in bbox and "ymax" in bbox:
+        return (bbox["xmax"] - bbox["xmin"]) * (bbox["ymax"] - bbox["ymin"])
+    elif "x1" in bbox and "x2" in bbox and "y1" in bbox and "y2" in bbox:
+        return (bbox["x2"] - bbox["x1"]) * (bbox["y2"] - bbox["y1"])
+    else:
+        values = list(bbox.values())
+        if len(values) >= 4:
+            return (values[2] - values[0]) * (values[3] - values[1])
+    return 0.0
+def normalize_bbox(bbox: Dict[str, float], image_width: float, image_height: float) -> Dict[str, float]:
+    """
+    Normalize bounding box coordinates to [0, 1] range.
+    Args:
+        bbox: Bounding box dict
+        image_width: Image width
+        image_height: Image height
+    Returns:
+        Dict[str, float]: Normalized bounding box
+    """
+    if "xmin" in bbox:
+        return {
+            "xmin": bbox["xmin"] / image_width,
+            "ymin": bbox["ymin"] / image_height,
+            "xmax": bbox["xmax"] / image_width,
+            "ymax": bbox["ymax"] / image_height
+        }
+    elif "x1" in bbox:
+        return {
+            "x1": bbox["x1"] / image_width,
+            "y1": bbox["y1"] / image_height,
+            "x2": bbox["x2"] / image_width,
+            "y2": bbox["y2"] / image_height
+        }
+    else:
+        # Handle generic format
+        keys = list(bbox.keys())
+        values = list(bbox.values())
+        normalized_values = [
+            values[0] / image_width,
+            values[1] / image_height,
+            values[2] / image_width,
+            values[3] / image_height
+        ]
+        return dict(zip(keys, normalized_values))
+def denormalize_bbox(bbox: Dict[str, float], image_width: float, image_height: float) -> Dict[str, float]:
+    """
+    Denormalize bounding box coordinates from [0, 1] range to pixel coordinates.
+    Args:
+        bbox: Normalized bounding box dict
+        image_width: Image width
+        image_height: Image height
+    Returns:
+        Dict[str, float]: Denormalized bounding box
+    """
+    if "xmin" in bbox:
+        return {
+            "xmin": bbox["xmin"] * image_width,
+            "ymin": bbox["ymin"] * image_height,
+            "xmax": bbox["xmax"] * image_width,
+            "ymax": bbox["ymax"] * image_height
+        }
+    elif "x1" in bbox:
+        return {
+            "x1": bbox["x1"] * image_width,
+            "y1": bbox["y1"] * image_height,
+            "x2": bbox["x2"] * image_width,
+            "y2": bbox["y2"] * image_height
+        }
+    else:
+        # Handle generic format
+        keys = list(bbox.keys())
+        values = list(bbox.values())
+        denormalized_values = [
+            values[0] * image_width,
+            values[1] * image_height,
+            values[2] * image_width,
+            values[3] * image_height
+        ]
+        return dict(zip(keys, denormalized_values))
+def line_segments_intersect(p1: Tuple[float, float], p2: Tuple[float, float],
+                           p3: Tuple[float, float], p4: Tuple[float, float]) -> bool:
+    """
+    Check if two line segments intersect.
+    Args:
+        p1: First point of first line segment
+        p2: Second point of first line segment
+        p3: First point of second line segment
+        p4: Second point of second line segment
+    Returns:
+        bool: True if line segments intersect
+    """
+    def ccw(A, B, C):
+        return (C[1] - A[1]) * (B[0] - A[0]) > (B[1] - A[1]) * (C[0] - A[0])
+    return ccw(p1, p3, p4) != ccw(p2, p3, p4) and ccw(p1, p2, p3) != ccw(p1, p2, p4)