PyPI - matrice-analytics - Versions diffs - 0.1.2__py3-none-any.whl - Mend

matrice-analytics 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of matrice-analytics might be problematic. Click here for more details.

Files changed (160) hide show

matrice_analytics/post_processing/ocr/easyocr_extractor.py ADDED Viewed

@@ -0,0 +1,248 @@
+import easyocr
+import numpy as np
+import torch
+class EasyOCRExtractor:
+    def __init__(self, lang=['en', 'hi', 'ar'], gpu=False, model_storage_directory=None,
+                 download_enabled=True, detector=True, recognizer=True, verbose=False):
+        """
+        Initializes the EasyOCR text extractor with optimized parameters.
+        Args:
+            lang (str or list): Language(s) to be used by EasyOCR. Default is ['en', 'hi', 'ar'].
+            gpu (bool): Enable GPU acceleration if available. Default is True.
+            model_storage_directory (str): Custom path to store models. Default is None.
+            download_enabled (bool): Allow downloading models if not found. Default is True.
+            detector (bool): Load text detection model. Default is True.
+            recognizer (bool): Load text recognition model. Default is True.
+            verbose (bool): Enable verbose output (e.g., progress bars). Default is False.
+        """
+        self.lang = lang
+        self.gpu = gpu
+        # Check if GPU is available
+        if torch.cuda.is_available():
+            self.gpu = True
+        else:
+            self.gpu = False
+        self.model_storage_directory = model_storage_directory
+        self.download_enabled = download_enabled
+        self.detector = detector
+        self.recognizer = recognizer
+        self.verbose = verbose
+        self.reader = None
+    def setup(self):
+        """
+        Initializes the EasyOCR reader if not already initialized.
+        """
+        if self.reader is None:
+            lang_list = [self.lang] if isinstance(self.lang, str) else self.lang
+            self.reader = easyocr.Reader(
+                lang_list=lang_list,
+                gpu=self.gpu,
+                model_storage_directory=self.model_storage_directory,
+                download_enabled=self.download_enabled,
+                detector=self.detector,
+                recognizer=self.recognizer,
+                verbose=self.verbose
+            )
+    def extract(self, image_np, bboxes=None, detail=1, paragraph=False,
+                decoder='greedy', beam_width=5, batch_size=1, workers=0,
+                allowlist=None, blocklist=None, min_size=10, rotation_info=None,
+                contrast_ths=0.1, adjust_contrast=0.5, text_threshold=0.7,
+                low_text=0.4, link_threshold=0.4, canvas_size=2560, mag_ratio=1.0,
+                slope_ths=0.1, ycenter_ths=0.5, height_ths=0.5, width_ths=0.5,
+                add_margin=0.1):
+        """
+        Extracts text from the given image or specific regions within the bounding boxes
+        with configurable parameters for optimal performance.
+        Args:
+            image_np (np.ndarray): Input image as a numpy array.
+            bboxes (list): List of bounding boxes. Each box is a list of [xmin, ymin, xmax, ymax].
+                          If None, OCR is performed on the entire image.
+            detail (int): Set to 0 for simple output, 1 for detailed output.
+            paragraph (bool): Combine results into paragraphs.
+            decoder (str): Decoding method ('greedy', 'beamsearch', 'wordbeamsearch').
+            beam_width (int): How many beams to keep when using beam search decoders.
+            batch_size (int): Number of images to process in a batch.
+            workers (int): Number of worker threads for data loading.
+            allowlist (str): Force recognition of only specific characters.
+            blocklist (str): Block specific characters from recognition.
+            min_size (int): Filter text boxes smaller than this pixel size.
+            rotation_info (list): List of rotation angles to try (e.g., [90, 180, 270]).
+            contrast_ths (float): Threshold for contrast adjustment.
+            adjust_contrast (float): Target contrast level for low-contrast text.
+            text_threshold (float): Text confidence threshold.
+            low_text (float): Text low-bound score.
+            link_threshold (float): Link confidence threshold.
+            canvas_size (int): Maximum image size before resizing.
+            mag_ratio (float): Image magnification ratio.
+            slope_ths (float): Maximum slope for merging boxes.
+            ycenter_ths (float): Maximum y-center shift for merging boxes.
+            height_ths (float): Maximum height difference for merging boxes.
+            width_ths (float): Maximum width for horizontal merging.
+            add_margin (float): Margin to add around text boxes.
+        Returns:
+            list: OCR results containing text, confidence, and bounding boxes.
+        """
+        # Make sure the reader is initialized
+        self.setup()
+        ocr_results = []
+        # Dictionary of readtext parameters
+        readtext_params = {
+            'decoder': decoder,
+            'beamWidth': beam_width,
+            'batch_size': batch_size,
+            'workers': workers,
+            'allowlist': allowlist,
+            'blocklist': blocklist,
+            'detail': detail,
+            'paragraph': paragraph,
+            'min_size': min_size,
+            'rotation_info': rotation_info,
+            'contrast_ths': contrast_ths,
+            'adjust_contrast': adjust_contrast,
+            'text_threshold': text_threshold,
+            'low_text': low_text,
+            'link_threshold': link_threshold,
+            'canvas_size': canvas_size,
+            'mag_ratio': mag_ratio,
+            'slope_ths': slope_ths,
+            'ycenter_ths': ycenter_ths,
+            'height_ths': height_ths,
+            'width_ths': width_ths,
+            'add_margin': add_margin
+        }
+        # If no bounding boxes, perform OCR on the entire image
+        if bboxes is None:
+            text_data = self.reader.readtext(image_np, **readtext_params)
+            if detail == 0:
+                return text_data  # Simple output format for detail=0
+            for bbox, text, conf in text_data:
+                ocr_results.append({
+                    "text": text,
+                    "confidence": conf,
+                    "bounding_box": bbox
+                })
+        else:
+            # Perform OCR on each bounding box
+            for box in bboxes:
+                xmin, ymin, xmax, ymax = map(int, box)
+                cropped_img = image_np[ymin:ymax, xmin:xmax]
+                # Skip empty crops
+                if cropped_img.size == 0 or cropped_img.shape[0] == 0 or cropped_img.shape[1] == 0:
+                    continue
+                text_data = self.reader.readtext(cropped_img, **readtext_params)
+                if detail == 0:
+                    # Adjust coordinates for the cropped region
+                    adjusted_data = []
+                    for result in text_data:
+                        if isinstance(result, tuple) and len(result) >= 1:
+                            # Adjust coordinates based on crop position
+                            adjusted_bbox = [[pt[0] + xmin, pt[1] + ymin] for pt in result[0]]
+                            if len(result) == 3:  # (bbox, text, confidence)
+                                adjusted_data.append((adjusted_bbox, result[1], result[2]))
+                            elif len(result) == 2:  # (bbox, text)
+                                adjusted_data.append((adjusted_bbox, result[1]))
+                    ocr_results.extend(adjusted_data)
+                else:
+                    for bbox, text, conf in text_data:
+                        # Adjust bounding box coordinates relative to the original image
+                        adjusted_bbox = [
+                            [pt[0] + xmin, pt[1] + ymin] for pt in bbox
+                        ]
+                        ocr_results.append({
+                            "text": text,
+                            "confidence": conf,
+                            "bounding_box": adjusted_bbox
+                        })
+        return ocr_results
+    def detect_text_regions(self, image_np, min_size=10, text_threshold=0.7,
+                            low_text=0.4, link_threshold=0.4, canvas_size=2560,
+                            mag_ratio=1.0, slope_ths=0.1, ycenter_ths=0.5,
+                            height_ths=0.5, width_ths=0.5, add_margin=0.1,
+                            optimal_num_chars=None):
+        """
+        Detects text regions in the image without performing recognition.
+        Args:
+            image_np (np.ndarray): Input image as a numpy array.
+            min_size (int): Filter text boxes smaller than this pixel size.
+            text_threshold (float): Text confidence threshold.
+            low_text (float): Text low-bound score.
+            link_threshold (float): Link confidence threshold.
+            canvas_size (int): Maximum image size before resizing.
+            mag_ratio (float): Image magnification ratio.
+            slope_ths (float): Maximum slope for merging boxes.
+            ycenter_ths (float): Maximum y-center shift for merging boxes.
+            height_ths (float): Maximum height difference for merging boxes.
+            width_ths (float): Maximum width for horizontal merging.
+            add_margin (float): Margin to add around text boxes.
+            optimal_num_chars (int): Prioritize boxes with this estimated character count.
+        Returns:
+            tuple: (horizontal_list, free_list) containing text regions
+        """
+        self.setup()
+        return self.reader.detect(
+            image_np,
+            min_size=min_size,
+            text_threshold=text_threshold,
+            low_text=low_text,
+            link_threshold=link_threshold,
+            canvas_size=canvas_size,
+            mag_ratio=mag_ratio,
+            slope_ths=slope_ths,
+            ycenter_ths=ycenter_ths,
+            height_ths=height_ths,
+            width_ths=width_ths,
+            add_margin=add_margin,
+            optimal_num_chars=optimal_num_chars
+        )
+    def recognize_from_regions(self, image_np, horizontal_list=None, free_list=None,
+                               decoder='greedy', beam_width=5, batch_size=1,
+                               workers=0, allowlist=None, blocklist=None,
+                               detail=1, paragraph=False, contrast_ths=0.1,
+                               adjust_contrast=0.5):
+        """
+        Recognizes text from previously detected regions.
+        Args:
+            image_np (np.ndarray): Input image as a numpy array.
+            horizontal_list (list): List of rectangular regions [x_min, x_max, y_min, y_max].
+            free_list (list): List of free-form regions [[x1,y1],[x2,y2],[x3,y3],[x4,y4]].
+            Other parameters: Same as extract method.
+        Returns:
+            list: OCR results for the specified regions
+        """
+        self.setup()
+        return self.reader.recognize(
+            image_np,
+            horizontal_list=horizontal_list,
+            free_list=free_list,
+            decoder=decoder,
+            beamWidth=beam_width,
+            batch_size=batch_size,
+            workers=workers,
+            allowlist=allowlist,
+            blocklist=blocklist,
+            detail=detail,
+            paragraph=paragraph,
+            contrast_ths=contrast_ths,
+            adjust_contrast=adjust_contrast
+        )

matrice_analytics/post_processing/ocr/postprocessing.py ADDED Viewed

@@ -0,0 +1,271 @@
+import re
+import logging
+class TextPostprocessor:
+    def __init__(self, logging_level=logging.INFO):
+        """
+        Initialize the text postprocessor with optional logging configuration.
+        Args:
+            logging_level: The level of logging detail. Default is INFO.
+        """
+        logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging_level)
+        self.logger = logging.getLogger('TextPostprocessor')
+        self.task_processors = {
+            "license_plate": self._process_license_plate,
+            "license_plate_india": self._process_license_plate_india,
+            "license_plate_us": self._process_license_plate_us,
+            "license_plate_eu": self._process_license_plate_eu,
+            "license_plate_qatar": self._process_license_plate_qatar,
+        }
+        self.char_substitutions = {
+            'O': '0',
+            'o': '0',
+            'I': '1',
+            'Z': '2',
+            'A': '4',
+            'L': '1',
+            'AV': 'AV',
+            'S': '5',
+            'B': '8',
+            'D': '0',
+            'Q': '0',
+            'G': '6',
+            'T': '7'
+        }
+    def postprocess(self, texts, confidences, task=None, confidence_threshold=0.25, cleanup=True, region=None):
+        """
+        Postprocesses the extracted text by cleaning and filtering low-confidence results.
+        Applies task-specific processing if a task is specified.
+        Args:
+            texts (list): List of extracted text strings.
+            confidences (list): List of confidence scores corresponding to each text.
+            task (str): Specific task for customized postprocessing. Default is None.
+            confidence_threshold (float): Minimum confidence required to keep the text. Default is 0.5.
+            cleanup (bool): Whether to perform text cleanup.
+            region (str): Specific region for license plate processing ('india', 'us', 'eu', 'qatar'). Default is None.
+        Returns:
+            list: List of processed texts with corresponding confidence scores and validity flags.
+        """
+        results = []
+        for text, confidence in zip(texts, confidences):
+            if confidence < confidence_threshold:
+                self.logger.debug(f"Text '{text}' rejected: confidence {confidence} below threshold {confidence_threshold}")
+                results.append((None, confidence, False))
+                continue
+            if cleanup:
+                processed_text = self._clean_text(text)
+            else:
+                processed_text = text
+            if task and processed_text:
+                if task == "license_plate" and region:
+                    region_task = f"license_plate_{region.lower()}"
+                    if region_task in self.task_processors:
+                        processed_text = self.task_processors[region_task](processed_text)
+                    else:
+                        processed_text = self.task_processors["license_plate"](processed_text)
+                        self.logger.warning(f"Region '{region}' not supported, using generic license plate processor")
+                elif task in self.task_processors:
+                    processed_text = self.task_processors[task](processed_text)
+                else:
+                    self.logger.warning(f"Task '{task}' not supported, skipping task-specific processing")
+            if processed_text:
+                self.logger.debug(f"Text processed successfully: '{text}' -> '{processed_text}'")
+                results.append((processed_text, confidence, True))
+            else:
+                self.logger.debug(f"Text '{text}' rejected during processing")
+                results.append((None, confidence, False))
+        return results
+    def _clean_text(self, text):
+        """
+        Basic text cleaning operations.
+        Args:
+            text (str): Text to clean.
+        Returns:
+            str: Cleaned text.
+        """
+        clean_text = text.strip()
+        clean_text = ''.join(char for char in clean_text if char.isprintable())
+        clean_text = ' '.join(clean_text.split())
+        return clean_text
+    def _process_license_plate(self, text):
+        """
+        Generic license plate processor that respects the specified region.
+        Args:
+            text (str): License plate text to process.
+        Returns:
+            str: Processed license plate text or None if invalid.
+        """
+        plate_text = text.upper()
+        plate_text = ''.join(plate_text.split())
+        if self.region and self.region.lower() == 'qatar':
+            return self._process_license_plate_qatar(plate_text)
+        elif self.region and self.region.lower() == 'india':
+            return self._process_license_plate_india(plate_text)
+        elif self.region and self.region.lower() == 'us':
+            return self._process_license_plate_us(plate_text)
+        elif self.region and self.region.lower() == 'eu':
+            return self._process_license_plate_eu(plate_text)
+        else:
+            if re.match(r'^[A-Z]{2}\d{1,2}[A-Z]{1,2}\d{4}$', plate_text):
+                return self._process_license_plate_india(plate_text)
+            elif re.match(r'^[A-Z0-9]{1,8}$', plate_text) and len(plate_text) <= 8:
+                return self._process_license_plate_us(plate_text)
+            elif re.match(r'^[A-Z]{1,3}[-\s]?[A-Z0-9]{1,4}[-\s]?[A-Z0-9]{1,3}$', plate_text):
+                return self._process_license_plate_eu(plate_text)
+            elif re.match(r'^\d{1,6}\s*[A-Z]+?$', plate_text):
+                return self._process_license_plate_qatar(plate_text)
+            else:
+                plate_text = ''.join(char for char in plate_text if char.isalnum())
+                if 4 <= len(plate_text) <= 10:
+                    return plate_text
+        self.logger.warning(f"Could not identify license plate format: '{text}'")
+        return None
+    def _process_license_plate_india(self, text):
+        plate_text = text.upper().replace(" ", "")
+        plate_text = ''.join(char for char in plate_text if char.isalnum())
+        for old, new in self.char_substitutions.items():
+            plate_text = plate_text.replace(old, new)
+        if len(plate_text) >= 7:
+            state_code = plate_text[:2]
+            rest = plate_text[2:]
+            match = re.match(r'^(\d{1,2})[ -]?([A-Z]{1,2})[ -]?(\d{4})$', rest)
+            if match and state_code in ['AN', 'AP', 'AR', 'AS', 'BR', 'CH', 'CG', 'DD', 'DL', 'GA', 'GJ', 'HP', 'HR', 'JH', 'JK', 'KA', 'KL', 'LA', 'LD', 'MH', 'ML', 'MN', 'MP', 'MZ', 'NL', 'OD', 'PB', 'PY', 'RJ', 'SK', 'TN', 'TR', 'TG', 'TS', 'UK', 'UP', 'WB']:
+                district, series, number = match.groups()
+                formatted_plate = f"{state_code}{district}{series}{number}"
+                self.logger.info(f"Processed Indian license plate: '{text}' -> '{formatted_plate}'")
+                return formatted_plate
+        self.logger.warning(f"Invalid Indian license plate format: '{text}'")
+        return None
+    def _process_license_plate_us(self, text):
+        plate_text = text.upper()
+        plate_text = ''.join(char for char in plate_text if char.isalnum())
+        for old, new in self.char_substitutions.items():
+            plate_text = plate_text.replace(old, new)
+        if re.match(r'^[A-Z]{3}\d{4}$', plate_text) or re.match(r'^\d{3}[A-Z]{4}$', plate_text):
+            self.logger.info(f"Processed US license plate (standard format): '{text}' -> '{plate_text}'")
+            return plate_text
+        if 2 <= len(plate_text) <= 8 and re.match(r'^[A-Z0-9]+$', plate_text):
+            self.logger.info(f"Processed US license plate (vanity/other format): '{text}' -> '{plate_text}'")
+            return plate_text
+        self.logger.warning(f"Invalid US license plate format: '{text}'")
+        return None
+    def _process_license_plate_eu(self, text):
+        plate_text = text.upper()
+        plate_text = ''.join(char for char in plate_text if char.isalnum() or char == '-')
+        if '-' not in plate_text and len(plate_text) > 3:
+            for i in range(1, 4):
+                if i < len(plate_text) and plate_text[i].isdigit() and plate_text[i-1].isalpha():
+                    plate_text = plate_text[:i] + '-' + plate_text[i:]
+                    break
+        for old, new in self.char_substitutions.items():
+            plate_text = plate_text.replace(old, new)
+        if re.match(r'^[A-Z]{1,3}-[A-Z]{1,2}\d{1,4}$', plate_text):
+            self.logger.info(f"Processed German license plate: '{text}' -> '{plate_text}'")
+            return plate_text
+        if re.match(r'^[A-Z]{2}\d{2}[A-Z]{3}$', plate_text):
+            self.logger.info(f"Processed UK license plate: '{text}' -> '{plate_text}'")
+            return plate_text
+        if re.match(r'^[A-Z]{2}-\d{3}-[A-Z]{2}$', plate_text) or re.match(r'^\d{4}[A-Z]{3}$', plate_text):
+            self.logger.info(f"Processed French license plate: '{text}' -> '{plate_text}'")
+            return plate_text
+        if re.match(r'^[A-Z]{2}\d{3}[A-Z]{2}$', plate_text):
+            self.logger.info(f"Processed Italian license plate: '{text}' -> '{plate_text}'")
+            return plate_text
+        if re.match(r'^\d{4}[BCDFGHJKLMNPRSTVWXYZ]{3}$', plate_text):
+            self.logger.info(f"Processed Spanish license plate: '{text}' -> '{plate_text}'")
+            return plate_text
+        if re.search(r'[A-Z]', plate_text) and re.search(r'\d', plate_text) and 4 <= len(plate_text) <= 10:
+            self.logger.info(f"Processed generic European license plate: '{text}' -> '{plate_text}'")
+            return plate_text
+        self.logger.warning(f"Invalid European license plate format: '{text}'")
+        return None
+    def _process_license_plate_qatar(self, text):
+        """
+        Process Qatar license plate text by converting Arabic numerals to Latin and keeping only digits.
+        Args:
+            text (str): License plate text to process.
+        Returns:
+            str: Processed license plate text or None if invalid.
+        """
+        # Check for Unicode escape sequences (e.g., \u0664)
+        if r'\u' in str(text):
+            self.logger.warning(f"Invalid Qatar license plate format: '{text}' contains Unicode escape sequence")
+            return None
+        # Define Arabic to Latin numeral mapping
+        arabic_to_latin = str.maketrans('٠١٢٣٤٥٦٧٨٩', '0123456789')
+        # Convert Arabic numerals to Latin and keep only alphanumeric characters
+        plate_text = text.translate(arabic_to_latin)
+        plate_text = ''.join(char for char in plate_text if char.isalnum())
+        # Apply character substitutions for common OCR errors
+        for old, new in self.char_substitutions.items():
+            plate_text = plate_text.replace(old, new)
+        # Keep only digits for Qatar license plates
+        plate_text = ''.join(char for char in plate_text if char.isdigit())
+        # Validate: Ensure the text is 1 to 6 digits
+        if re.match(r'^\d{1,6}$', plate_text):
+            self.logger.info(f"Processed Qatar license plate: '{text}' -> '{plate_text}'")
+            return plate_text
+        self.logger.warning(f"Invalid Qatar license plate format: '{text}'")
+        return None
+    def _string_similarity(self, s1, s2):
+        if len(s1) > len(s2):
+            s1, s2 = s2, s1
+        distances = range(len(s1) + 1)
+        for i2, c2 in enumerate(s2):
+            distances_ = [i2+1]
+            for i1, c1 in enumerate(s1):
+                if c1 == c2:
+                    distances_.append(distances[i1])
+                else:
+                    distances_.append(1 + min((distances[i1], distances[i1 + 1], distances_[-1])))
+            distances = distances_
+        max_len = max(len(s1), len(s2))
+        similarity = 1 - (distances[-1] / max_len if max_len > 0 else 0)
+        return similarity
+    def add_task_processor(self, task_name, processor_function):
+        self.task_processors[task_name] = processor_function
+        self.logger.info(f"Added new task processor: {task_name}")

matrice_analytics/post_processing/ocr/preprocessing.py ADDED Viewed

@@ -0,0 +1,52 @@
+import cv2
+import numpy as np
+class ImagePreprocessor:
+    def __init__(self):
+        """Initialize the image preprocessor"""
+        pass
+    def preprocess(self, image_np, resize_dim=None, grayscale=True):
+        """
+        Preprocesses the image with various operations.
+        Args:
+            image_np (np.ndarray): Input image as a numpy array.
+            resize_dim (tuple): Desired dimensions (width, height). If None, no resizing is done.
+            grayscale (bool): Whether to convert the image to grayscale.
+        Returns:
+            np.ndarray: Preprocessed image.
+        """
+        processed_image = image_np.copy()
+        # Convert to grayscale if requested
+        if grayscale:
+            if len(processed_image.shape) == 3:  # Check if image is already grayscale
+                processed_image = cv2.cvtColor(processed_image, cv2.COLOR_RGB2GRAY)
+        # Resize image if dimensions are provided
+        if resize_dim:
+            processed_image = cv2.resize(processed_image, resize_dim, interpolation=cv2.INTER_LINEAR)
+        return processed_image
+    def crop_to_bboxes(self, image_np, bboxes):
+        """
+        Crops the image to the specified bounding boxes.
+        Args:
+            image_np (np.ndarray): Input image as a numpy array.
+            bboxes (list): List of bounding boxes. Each box is a list of [xmin, ymin, xmax, ymax].
+        Returns:
+            list: List of cropped images.
+        """
+        cropped_images = []
+        for box in bboxes:
+            xmin, ymin, xmax, ymax = map(int, box)
+            cropped_img = image_np[ymin:ymax, xmin:xmax]
+            cropped_images.append(cropped_img)
+        return cropped_images