PyPI - Semapp - Versions diffs - 1.0.5__py3-none-any.whl - Mend

Semapp 1.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

semapp/Layout/__init__.py +26 -0
semapp/Layout/create_button.py +1248 -0
semapp/Layout/main_window_att.py +54 -0
semapp/Layout/settings.py +170 -0
semapp/Layout/styles.py +152 -0
semapp/Layout/toast.py +157 -0
semapp/Plot/__init__.py +8 -0
semapp/Plot/frame_attributes.py +690 -0
semapp/Plot/overview_window.py +355 -0
semapp/Plot/styles.py +55 -0
semapp/Plot/utils.py +295 -0
semapp/Processing/__init__.py +4 -0
semapp/Processing/detection.py +513 -0
semapp/Processing/klarf_reader.py +461 -0
semapp/Processing/processing.py +686 -0
semapp/Processing/rename_tif.py +498 -0
semapp/Processing/split_tif.py +323 -0
semapp/Processing/threshold.py +777 -0
semapp/__init__.py +10 -0
semapp/asset/icon.png +0 -0
semapp/main.py +103 -0
semapp-1.0.5.dist-info/METADATA +300 -0
semapp-1.0.5.dist-info/RECORD +27 -0
semapp-1.0.5.dist-info/WHEEL +5 -0
semapp-1.0.5.dist-info/entry_points.txt +2 -0
semapp-1.0.5.dist-info/licenses/LICENSE +674 -0
semapp-1.0.5.dist-info/top_level.txt +1 -0

semapp/Processing/detection.py ADDED Viewed

@@ -0,0 +1,513 @@
+"""
+Module for detecting numbers in TIFF images using OCR (Tesseract).
+"""
+import cv2
+import pytesseract
+from PIL import Image
+import os
+import numpy as np
+import pandas as pd
+import time
+from multiprocessing import Pool, cpu_count
+from functools import partial
+# Configuration: Number of CPU cores to use for multiprocessing
+# Set to None to use all available cores, or specify a number (e.g., 4 for 4 cores)
+N_CPU = 8  # None = use all cores, or set to specific number like 4, 8, etc.
+class Detection:
+    """
+    A class to handle number detection in TIFF images using OCR.
+    """
+    def __init__(self, dirname, roi=None):
+        """
+        Initialize the detection instance with necessary parameters.
+        Args:
+            dirname (str): The base directory for the files.
+            roi (tuple): Optional tuple (x, y, w, h) defining the region of interest.
+                        Default is (1100, 0, 250, 35).
+        """
+        self.dirname = dirname
+        self.roi = roi if roi is not None else (1100, 0, 250, 35)
+        self._setup_tesseract()
+    def _setup_tesseract(self):
+        """
+        Configure Tesseract OCR path automatically on Windows.
+        Searches common installation paths and sets pytesseract.tesseract_cmd.
+        Raises RuntimeError if Tesseract is not found.
+        Returns:
+            bool: True if Tesseract is configured successfully
+        """
+        import sys
+        if sys.platform == 'win32':
+            # Common paths where Tesseract might be installed
+            possible_paths = [
+                r"C:\Program Files\Tesseract-OCR\tesseract.exe",
+                r"C:\Program Files (x86)\Tesseract-OCR\tesseract.exe",
+                r"C:\Users\{}\AppData\Local\Programs\Tesseract-OCR\tesseract.exe".format(os.getenv('USERNAME', '')),
+            ]
+            # Check if Tesseract is already in PATH
+            try:
+                pytesseract.get_tesseract_version()
+                return True
+            except:
+                pass
+            # Try possible paths
+            for path in possible_paths:
+                if os.path.exists(path):
+                    pytesseract.pytesseract.tesseract_cmd = path
+                    return True
+            raise RuntimeError(
+                "Tesseract not found. Please install Tesseract OCR:\n"
+                "1. Download from: https://github.com/UB-Mannheim/tesseract/wiki\n"
+                "2. Install in default path: C:\\Program Files\\Tesseract-OCR\\"
+            )
+        return True
+    def detect_number_on_image(self, img_array, page_num=None, show_detection=False, resize_factor=1.0):
+        """
+        Detect numbers on an image (numpy array).
+        Args:
+            img_array: Image as numpy array
+            page_num: Page number (optional, for display)
+            show_detection: If True, also returns annotated image with detected zones
+            resize_factor: Factor to resize image before OCR (1.0 = no resize, 0.5 = half size for speed)
+        Returns:
+            If show_detection=False: Detected numbers or None
+            If show_detection=True: Tuple (detected_numbers, annotated_image)
+        """
+        try:
+            # Verify Tesseract is available
+            pytesseract.get_tesseract_version()
+        except Exception as e:
+            raise Exception(f"Tesseract not available: {e}")
+        # Extract ROI if specified
+        if self.roi is not None:
+            x_roi, y_roi, w_roi, h_roi = self.roi
+            # Ensure ROI is within image bounds
+            height, width = img_array.shape[:2]
+            x_roi = max(0, min(x_roi, width - 1))
+            y_roi = max(0, min(y_roi, height - 1))
+            w_roi = min(w_roi, width - x_roi)
+            h_roi = min(h_roi, height - y_roi)
+            # Extract ROI
+            roi_img = img_array[y_roi:y_roi+h_roi, x_roi:x_roi+w_roi]
+        else:
+            roi_img = img_array
+            x_roi, y_roi = 0, 0
+        # Resize for faster processing (if resize_factor < 1.0)
+        if resize_factor < 1.0 and resize_factor > 0:
+            new_w = int(roi_img.shape[1] * resize_factor)
+            new_h = int(roi_img.shape[0] * resize_factor)
+            roi_img = cv2.resize(roi_img, (new_w, new_h), interpolation=cv2.INTER_AREA)
+            # Adjust ROI coordinates for display
+            scale_x = 1.0 / resize_factor
+            scale_y = 1.0 / resize_factor
+        else:
+            scale_x = 1.0
+            scale_y = 1.0
+        # Convert to grayscale if necessary
+        if len(roi_img.shape) == 3:
+            gray = cv2.cvtColor(roi_img, cv2.COLOR_BGR2GRAY)
+            # Keep color copy for display
+            display_img = img_array.copy() if show_detection else None
+        else:
+            gray = roi_img
+            display_img = cv2.cvtColor(img_array, cv2.COLOR_GRAY2BGR) if show_detection else None
+        # Filter to reduce noise (reduced kernel for speed)
+        gray = cv2.medianBlur(gray, 3)
+        # Binarization (black and white) to improve text
+        _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+        # Tesseract configuration: optimized for speed
+        # PSM 8 (single word) is faster than PSM 6 (single uniform block)
+        custom_config = r'--oem 3 --psm 8 -c tessedit_char_whitelist=0123456789'
+        # Convert to PIL Image for pytesseract
+        pil_image = Image.fromarray(thresh)
+        # Detection with pytesseract to get bounding boxes
+        try:
+            # Get detailed data (bounding boxes)
+            data = pytesseract.image_to_data(pil_image, config=custom_config, output_type=pytesseract.Output.DICT)
+            # Read text
+            text = pytesseract.image_to_string(pil_image, config=custom_config)
+            # Clean result: keep only digits
+            digits_only = "".join(ch for ch in text if ch.isdigit())
+            # Draw rectangles around detected zones
+            if show_detection:
+                n_boxes = len(data['text'])
+                boxes_drawn = 0
+                for i in range(n_boxes):
+                    conf = int(data['conf'][i]) if data['conf'][i] != '' else -1
+                    (x, y, w, h) = (data['left'][i], data['top'][i], data['width'][i], data['height'][i])
+                    detected_text = data['text'][i].strip()
+                    # Draw all zones with confidence > 0
+                    if conf > 0 and w > 0 and h > 0:
+                        # Adjust coordinates if working on ROI and resized
+                        x_scaled = int(x * scale_x)
+                        y_scaled = int(y * scale_y)
+                        w_scaled = int(w * scale_x)
+                        h_scaled = int(h * scale_y)
+                        x_abs = x_scaled + x_roi
+                        y_abs = y_scaled + y_roi
+                        # Check if it's a digit
+                        if detected_text and detected_text.isdigit():
+                            if display_img is not None:
+                                # Green rectangle for detected digits (on full image)
+                                cv2.rectangle(display_img, (x_abs, y_abs), (x_abs + w_scaled, y_abs + h_scaled), (0, 255, 0), 3)
+                                cv2.putText(display_img, detected_text, (x_abs, max(y_abs - 5, 10)),
+                                          cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
+                            boxes_drawn += 1
+                if display_img is not None:
+                    # Draw ROI rectangle for visualization
+                    if self.roi is not None:
+                        cv2.rectangle(display_img, (x_roi, y_roi), (x_roi + w_roi, y_roi + h_roi), (255, 255, 0), 2)
+                        cv2.putText(display_img, "ROI", (x_roi, y_roi - 10),
+                                  cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 0), 2)
+        except Exception as e:
+            # Fallback: simple method without bounding boxes
+            text = pytesseract.image_to_string(pil_image, config=custom_config)
+            digits_only = "".join(ch for ch in text if ch.isdigit())
+        if digits_only:
+            if show_detection:
+                return digits_only, display_img
+            return digits_only
+        else:
+            if show_detection:
+                return None, display_img
+            return None
+    @staticmethod
+    def _process_single_page_static(args):
+        """
+        Static helper method to process a single page (for multiprocessing).
+        Args:
+            args: Tuple of (page_num, img_array, roi, resize_factor, dirname)
+        Returns:
+            Tuple (page_num, detected_number)
+        """
+        page_num, img_array, roi, resize_factor, dirname = args
+        try:
+            # Create temporary detector instance for this worker
+            temp_detector = Detection(dirname=dirname, roi=roi)
+            detected_number = temp_detector.detect_number_on_image(
+                img_array, page_num, show_detection=False, resize_factor=resize_factor
+            )
+            return (page_num, detected_number)
+        except Exception as e:
+            return (page_num, None)
+    def detect_numbers_in_tiff(self, tiff_path, verbose=True, use_multiprocessing=True, resize_factor=1.0):
+        """
+        Process a multi-page TIFF file and detect numbers on each page.
+        Args:
+            tiff_path: Path to the TIFF file
+            verbose: If True, print progress messages
+            use_multiprocessing: If True, use parallel processing (faster for many pages)
+            resize_factor: Factor to resize ROI before OCR (1.0 = no resize, 0.5 = half size for speed)
+        Returns:
+            List of tuples (page_num, detected_number)
+        """
+        if not os.path.exists(tiff_path):
+            return []
+        results = []
+        try:
+            # Open TIFF file with PIL
+            img = Image.open(tiff_path)
+            # Count number of pages
+            num_pages = img.n_frames if hasattr(img, 'n_frames') else 1
+            if verbose:
+                print(f"Found {num_pages} page(s) in TIFF file")
+                if use_multiprocessing and num_pages > 1:
+                    print(f"Using multiprocessing with {min(cpu_count(), num_pages)} cores")
+                if resize_factor < 1.0:
+                    print(f"Resizing ROI by factor {resize_factor} for speed")
+                print("Processing pages...")
+            # Load all pages into memory
+            pages_data = []
+            for page_num in range(num_pages):
+                try:
+                    img.seek(page_num)
+                    img_array = np.array(img)
+                    # Convert RGBA to RGB if necessary
+                    if len(img_array.shape) == 3 and img_array.shape[2] == 4:
+                        img_array = cv2.cvtColor(img_array, cv2.COLOR_RGBA2RGB)
+                    pages_data.append((page_num + 1, img_array))
+                except EOFError:
+                    break
+            img.close()
+            # Process pages
+            if use_multiprocessing and len(pages_data) > 1:
+                # Use multiprocessing for parallel processing
+                if N_CPU is None:
+                    num_workers = min(cpu_count(), len(pages_data))
+                else:
+                    num_workers = min(N_CPU, len(pages_data))
+                if verbose:
+                    print(f"Using {num_workers} CPU core(s) for parallel processing")
+                process_args = [(page_num, img_array, self.roi, resize_factor, self.dirname)
+                              for page_num, img_array in pages_data]
+                start_time = time.time()
+                with Pool(processes=num_workers) as pool:
+                    results = pool.map(Detection._process_single_page_static, process_args)
+                processing_time = time.time() - start_time
+                if verbose:
+                    print(f"Processing time: {processing_time:.2f} seconds ({processing_time/len(results):.3f} sec/page)")
+            else:
+                # Sequential processing
+                start_time = time.time()
+                for page_num, img_array in pages_data:
+                    if verbose and page_num % 10 == 1:
+                        print(f"  Processing page {page_num}/{num_pages}...", end='\r')
+                    detected_number = self.detect_number_on_image(
+                        img_array, page_num, show_detection=False, resize_factor=resize_factor
+                    )
+                    results.append((page_num, detected_number))
+                processing_time = time.time() - start_time
+                if verbose:
+                    print(f"\nProcessing time: {processing_time:.2f} seconds ({processing_time/len(results):.3f} sec/page)")
+            # Sort results by page number (important for multiprocessing)
+            results.sort(key=lambda x: x[0])
+            if verbose:
+                print(f"\nCompleted processing {len(results)} page(s)")
+        except Exception as e:
+            raise RuntimeError(f"Error opening TIFF file: {e}")
+        return results
+    def detect_numbers_in_directory(self, wafer_number=None, skip_if_csv_exists=True):
+        """
+        Detect numbers in all TIFF files in the directory or specific wafer folder.
+        Args:
+            wafer_number (str, optional): Specific wafer number to process.
+                                         If None, processes all TIFF files in dirname.
+            skip_if_csv_exists (bool): If True, skip directories that already have detection_results.csv
+        Returns:
+            Dictionary mapping file paths to detection results
+        """
+        results = {}
+        if wafer_number:
+            search_dir = os.path.join(self.dirname, str(wafer_number))
+        else:
+            search_dir = self.dirname
+        if not os.path.exists(search_dir):
+            return results
+        # Check if CSV already exists and skip if requested
+        if skip_if_csv_exists:
+            csv_path = os.path.join(search_dir, "detection_results.csv")
+            if os.path.exists(csv_path):
+                return results  # Already processed, skip
+        # Find all TIFF files
+        tiff_files = []
+        for root, dirs, files in os.walk(search_dir):
+            for file in files:
+                if file.lower().endswith(('.tif', '.tiff')):
+                    tiff_files.append(os.path.join(root, file))
+        # Process each TIFF file
+        for tiff_path in tiff_files:
+            try:
+                file_results = self.detect_numbers_in_tiff(tiff_path, verbose=False)
+                results[tiff_path] = file_results
+            except Exception as e:
+                results[tiff_path] = f"Error: {e}"
+        return results
+    def save_results_to_csv(self, results, output_path=None):
+        """
+        Save detection results to a CSV file.
+        Args:
+            results: Dictionary from detect_numbers_in_directory or list from detect_numbers_in_tiff
+            output_path: Path to output CSV file. If None, saves in dirname.
+        Returns:
+            Path to saved CSV file
+        """
+        if output_path is None:
+            output_path = os.path.join(self.dirname, "detection_results.csv")
+        rows = []
+        # Handle different result formats
+        if isinstance(results, dict):
+            # Results from detect_numbers_in_directory
+            for file_path, file_results in results.items():
+                if isinstance(file_results, list):
+                    for page_num, detected_number in file_results:
+                        rows.append({
+                            'File': os.path.basename(file_path),
+                            'Path': file_path,
+                            'Page': page_num,
+                            'Detected_Number': detected_number if detected_number else 'None'
+                        })
+                else:
+                    rows.append({
+                        'File': os.path.basename(file_path),
+                        'Path': file_path,
+                        'Page': 'N/A',
+                        'Detected_Number': str(file_results)
+                    })
+        elif isinstance(results, list):
+            # Results from detect_numbers_in_tiff
+            for page_num, detected_number in results:
+                rows.append({
+                    'File': 'N/A',
+                    'Path': 'N/A',
+                    'Page': page_num,
+                    'Detected_Number': detected_number if detected_number else 'None'
+                })
+        # Create DataFrame and save
+        df = pd.DataFrame(rows)
+        df.to_csv(output_path, index=False)
+        return output_path
+if __name__ == "__main__":
+    # Example usage for testing
+    # Example 1: Test on a single TIFF file
+    # Uncomment and modify the path to test
+    tiff_file = r"C:\Users\TM273821\Desktop\SEM\Detection\1\AsGa_FAV_2X_WIW_200_2X_REVIEW_03151011.tif"
+    print("=" * 60)
+    print("DETECTION TEST")
+    print("=" * 60)
+    print(f"File: {tiff_file}")
+    print(f"File exists: {os.path.exists(tiff_file)}")
+    if not os.path.exists(tiff_file):
+        print(f"ERROR: File not found: {tiff_file}")
+        exit(1)
+    try:
+        print("\nInitializing detector...")
+        detector = Detection(dirname=os.path.dirname(tiff_file))
+        print(f"ROI: {detector.roi}")
+        print(f"CPU cores available: {cpu_count()}")
+        print(f"CPU cores to use: {N_CPU if N_CPU is not None else 'All (' + str(cpu_count()) + ')'}")
+        print("\nProcessing TIFF file...")
+        start_total = time.time()
+        # Use multiprocessing and resize for speed
+        results = detector.detect_numbers_in_tiff(
+            tiff_file,
+            use_multiprocessing=True,
+            resize_factor=1.0  # Set to 0.5 for even faster processing (may reduce accuracy)
+        )
+        total_time = time.time() - start_total
+        print(f"\n{'='*60}")
+        print(f"TOTAL TIME: {total_time:.2f} seconds")
+        if len(results) > 0:
+            print(f"Time per page: {total_time/len(results):.3f} seconds")
+        print(f"{'='*60}")
+        print(f"\nTotal pages processed: {len(results)}")
+        print("\nResults:")
+        print("-" * 60)
+        for page_num, detected_number in results:
+            if detected_number:
+                print(f"Page {page_num}: {detected_number}")
+            else:
+                print(f"Page {page_num}: No number detected")
+        # Save results
+        output_csv = os.path.join(os.path.dirname(tiff_file), "detection_results.csv")
+        print(f"\nSaving results to: {output_csv}")
+        detector.save_results_to_csv(results, output_csv)
+        print("Results saved successfully!")
+    except Exception as e:
+        print(f"\nERROR: {e}")
+        import traceback
+        traceback.print_exc()
+    # Example 2: Test on a directory
+    # Uncomment and modify the path to test
+    # dirname = r"C:\Users\TM273821\Desktop\SEM\Detection"
+    # detector = Detection(dirname=dirname)
+    # results = detector.detect_numbers_in_directory()
+    # detector.save_results_to_csv(results)
+    # Example 3: Test on a specific wafer folder
+    # Uncomment and modify the paths to test
+    # dirname = r"C:\Users\TM273821\Desktop\SEM\Detection"
+    # wafer_number = "1"
+    # detector = Detection(dirname=dirname)
+    # results = detector.detect_numbers_in_directory(wafer_number=wafer_number)
+    # output_path = os.path.join(dirname, wafer_number, "detection_results.csv")
+    # detector.save_results_to_csv(results, output_path)
+    # Example 4: Test with custom ROI
+    # Uncomment and modify the path to test
+    # tiff_file = r"C:\Users\TM273821\Desktop\SEM\Detection\AsGa_FAV_2X_WIW_200_2X_REVIEW_03151011.tif"
+    # custom_roi = (1100, 0, 250, 35)  # x, y, width, height
+    # detector = Detection(dirname=os.path.dirname(tiff_file), roi=custom_roi)
+    # results = detector.detect_numbers_in_tiff(tiff_file)
+    # print("\nResults with custom ROI:")
+    # for page_num, detected_number in results:
+    #     print(f"Page {page_num}: {detected_number}")
+    # print("Detection module loaded. Uncomment examples in __main__ to test.")