PyPI - natural-pdf - Versions diffs - 0.2.12__py3-none-any.whl → 0.2.13__py3-none-any.whl - Mend

natural-pdf 0.2.12py3-none-any.whl → 0.2.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

natural_pdf/core/highlighting_service.py +40 -10
natural_pdf/elements/base.py +15 -1
natural_pdf/elements/region.py +32 -2
natural_pdf/vision/__init__.py +1 -2
natural_pdf/vision/mixin.py +67 -27
natural_pdf/vision/results.py +49 -5
natural_pdf/vision/similarity.py +195 -23
natural_pdf/vision/template_matching.py +209 -0
{natural_pdf-0.2.12.dist-info → natural_pdf-0.2.13.dist-info}/METADATA +1 -1
{natural_pdf-0.2.12.dist-info → natural_pdf-0.2.13.dist-info}/RECORD +20 -19
temp/test_draw_guides.py +25 -0
temp/test_draw_guides_interactive.py +30 -0
temp/test_guide_draw_notebook.py +47 -0
temp/test_inline_js.py +22 -0
temp/test_widget_functionality.py +68 -0
temp/test_widget_simple.py +41 -0
temp/debug_cell_extraction.py +0 -42
temp/debug_exclusion_overlap.py +0 -43
temp/debug_exclusions_guides.py +0 -67
temp/debug_extra_guide.py +0 -41
temp/debug_outer_boundaries.py +0 -46
temp/debug_st_search.py +0 -33
{natural_pdf-0.2.12.dist-info → natural_pdf-0.2.13.dist-info}/WHEEL +0 -0
{natural_pdf-0.2.12.dist-info → natural_pdf-0.2.13.dist-info}/entry_points.txt +0 -0
{natural_pdf-0.2.12.dist-info → natural_pdf-0.2.13.dist-info}/licenses/LICENSE +0 -0
{natural_pdf-0.2.12.dist-info → natural_pdf-0.2.13.dist-info}/top_level.txt +0 -0

natural_pdf/vision/similarity.py CHANGED Viewed

@@ -7,6 +7,8 @@ import numpy as np
 from PIL import Image
 from tqdm.auto import tqdm
+from .template_matching import TemplateMatcher
 @dataclass
 class MatchCandidate:
@@ -17,7 +19,12 @@ class MatchCandidate:
     confidence: float
-def compute_phash(image: Image.Image, hash_size: int = 8, blur_radius: float = 0) -> int:
+def compute_phash(
+    image: Image.Image,
+    hash_size: int = 8,
+    blur_radius: float = 0,
+    mask_threshold: Optional[float] = None,
+) -> int:
     """
     Compute perceptual hash of an image using DCT.
@@ -25,6 +32,8 @@ def compute_phash(image: Image.Image, hash_size: int = 8, blur_radius: float = 0
         image: PIL Image to hash
         hash_size: Size of the hash (8 = 64 bit hash)
         blur_radius: Optional blur to apply before hashing (makes more tolerant)
+        mask_threshold: If provided, pixels >= this value (0-255 scale) are replaced with median
+                       before hashing. Useful for ignoring white backgrounds.
     Returns:
         Integer hash value
@@ -39,6 +48,25 @@ def compute_phash(image: Image.Image, hash_size: int = 8, blur_radius: float = 0
         image = image.filter(ImageFilter.GaussianBlur(radius=blur_radius))
+    # Apply masking if threshold provided
+    if mask_threshold is not None:
+        # For phash, masking works by normalizing the background
+        # This makes the hash focus on relative differences rather than absolute values
+        img_array = np.array(image, dtype=np.float32)
+        # Normalize by subtracting a representative background value
+        # Use the most common bright value as the background
+        bright_pixels = img_array[img_array >= mask_threshold]
+        if len(bright_pixels) > 0:
+            # Use the mode of bright pixels as background
+            background_val = np.median(bright_pixels)
+            # Normalize the image by subtracting background
+            # This makes different backgrounds appear similar
+            img_array = np.clip(img_array - background_val + 128, 0, 255)
+        # Convert back to PIL Image
+        image = Image.fromarray(img_array.astype(np.uint8))
     # Resize to 32x32 (4x the hash size for DCT)
     highfreq_factor = 4
     img_size = hash_size * highfreq_factor
@@ -80,12 +108,13 @@ def hash_similarity(hash1: int, hash2: int, hash_size: int = 64) -> float:
 class VisualMatcher:
-    """Handles visual similarity matching using perceptual hashing"""
+    """Handles visual similarity matching using perceptual hashing or template matching"""
     def __init__(self, hash_size: int = 12):
         self.hash_size = hash_size
         self.hash_bits = hash_size * hash_size
         self._cache = {}
+        self.template_matcher = TemplateMatcher()  # Default zncc
     def _get_search_scales(self, sizes: Optional[Union[float, Tuple, List]]) -> List[float]:
         """
@@ -172,20 +201,22 @@ class VisualMatcher:
         target: Image.Image,
         template_hash: Optional[int] = None,
         confidence_threshold: float = 0.6,
-        step_factor: float = 0.1,
+        step: Optional[int] = None,
         sizes: Optional[Union[float, Tuple, List]] = None,
         show_progress: bool = True,
         progress_callback: Optional[Callable[[], None]] = None,
+        method: str = "phash",
+        mask_threshold: Optional[float] = None,
     ) -> List[MatchCandidate]:
         """
-        Find all matches of template in target image using sliding window.
+        Find all matches of template in target image.
         Args:
             template: Template image to search for
             target: Target image to search in
-            template_hash: Pre-computed hash of template (optional)
+            template_hash: Pre-computed hash of template (optional, only for phash)
             confidence_threshold: Minimum similarity score (0-1)
-            step_factor: Step size as fraction of template size
+            step: Step size in pixels for sliding window
             sizes: Size variations to search. Can be:
                    - float: ±percentage (e.g., 0.2 = 80%-120%)
                    - tuple(min, max): search range with smart logarithmic steps
@@ -193,15 +224,153 @@ class VisualMatcher:
                    - list: exact sizes to try (e.g., [0.8, 1.0, 1.2])
             show_progress: Show progress bar for sliding window search
             progress_callback: Optional callback function to call for each window checked
+            method: "phash" (default) or "template" for template matching
+            mask_threshold: Pixels >= this value (0-1 scale) are treated as background.
+                           - For template matching: pixels are ignored in correlation
+                           - For phash: background is normalized before hashing
+                           Useful for logos/text on varying backgrounds (e.g., 0.95)
         Returns:
             List of MatchCandidate objects
         """
+        if method == "template":
+            # Use template matching
+            return self._template_match(
+                template,
+                target,
+                confidence_threshold,
+                step,
+                sizes,
+                show_progress,
+                progress_callback,
+                mask_threshold,
+            )
+        else:
+            # Use existing perceptual hash matching
+            return self._phash_match(
+                template,
+                target,
+                template_hash,
+                confidence_threshold,
+                step,
+                sizes,
+                show_progress,
+                progress_callback,
+                mask_threshold,
+            )
+    def _template_match(
+        self, template, target, threshold, step, sizes, show_progress, callback, mask_threshold
+    ):
+        """Template matching implementation"""
+        matches = []
+        template_w, template_h = template.size
+        target_w, target_h = target.size
+        # Convert to grayscale numpy arrays
+        target_gray = np.array(target.convert("L"), dtype=np.float32) / 255.0
+        # Determine scales to search
+        scales = self._get_search_scales(sizes)
+        # Default step size if not provided
+        if step is None:
+            step = 1
+        # Calculate total operations for progress bar
+        total_operations = 0
+        if show_progress and not callback:
+            for scale in scales:
+                scaled_w = int(template_w * scale)
+                scaled_h = int(template_h * scale)
+                if scaled_w <= target_w and scaled_h <= target_h:
+                    # Compute score map size
+                    out_h = (target_h - scaled_h) // step + 1
+                    out_w = (target_w - scaled_w) // step + 1
+                    total_operations += out_h * out_w
+        # Setup progress bar
+        progress_bar = None
+        if show_progress and not callback and total_operations > 0:
+            progress_bar = tqdm(
+                total=total_operations, desc="Template matching", unit="position", leave=False
+            )
+        # Search at each scale
+        for scale in scales:
+            # Resize template
+            scaled_w = int(template_w * scale)
+            scaled_h = int(template_h * scale)
+            if scaled_w > target_w or scaled_h > target_h:
+                continue
+            scaled_template = template.resize((scaled_w, scaled_h), Image.Resampling.LANCZOS)
+            template_gray = np.array(scaled_template.convert("L"), dtype=np.float32) / 255.0
+            # Run template matching
+            scores = self.template_matcher.match_template(
+                target_gray, template_gray, step, mask_threshold
+            )
+            # Find peaks above threshold
+            y_indices, x_indices = np.where(scores >= threshold)
+            # Update progress
+            if progress_bar:
+                progress_bar.update(scores.size)
+            elif callback:
+                for _ in range(scores.size):
+                    callback()
+            for i in range(len(y_indices)):
+                y_idx = y_indices[i]
+                x_idx = x_indices[i]
+                score = scores[y_idx, x_idx]
+                # Convert back to image coordinates
+                x = x_idx * step
+                y = y_idx * step
+                matches.append(
+                    MatchCandidate(
+                        bbox=(x, y, x + scaled_w, y + scaled_h),
+                        hash_value=0,  # Not used for template matching
+                        confidence=float(score),
+                    )
+                )
+        # Close progress bar
+        if progress_bar:
+            progress_bar.close()
+        # Remove overlapping matches
+        return self._filter_overlapping_matches(matches)
+    def _phash_match(
+        self,
+        template,
+        target,
+        template_hash,
+        threshold,
+        step,
+        sizes,
+        show_progress,
+        callback,
+        mask_threshold=None,
+    ):
+        """Original perceptual hash matching"""
         matches = []
         # Compute template hash if not provided
         if template_hash is None:
-            template_hash = compute_phash(template, self.hash_size)
+            # Convert mask threshold from 0-1 to 0-255 for PIL Image
+            mask_threshold_255 = int(mask_threshold * 255) if mask_threshold is not None else None
+            template_hash = compute_phash(
+                template, self.hash_size, mask_threshold=mask_threshold_255
+            )
         template_w, template_h = template.size
         target_w, target_h = target.size
@@ -209,22 +378,24 @@ class VisualMatcher:
         # Determine scales to search
         scales = self._get_search_scales(sizes)
+        # Default step size if not provided (10% of template size)
+        if step is None:
+            step = max(1, int(min(template_w, template_h) * 0.1))
         # Calculate total iterations for progress bar
         total_iterations = 0
-        if show_progress and not progress_callback:
+        if show_progress and not callback:
             for scale in scales:
                 scaled_w = int(template_w * scale)
                 scaled_h = int(template_h * scale)
                 if scaled_w <= target_w and scaled_h <= target_h:
-                    step_x = max(1, int(scaled_w * step_factor))
-                    step_y = max(1, int(scaled_h * step_factor))
-                    x_steps = len(range(0, target_w - scaled_w + 1, step_x))
-                    y_steps = len(range(0, target_h - scaled_h + 1, step_y))
+                    x_steps = len(range(0, target_w - scaled_w + 1, step))
+                    y_steps = len(range(0, target_h - scaled_h + 1, step))
                     total_iterations += x_steps * y_steps
         # Setup progress bar if needed (only if no callback provided)
         progress_bar = None
-        if show_progress and not progress_callback and total_iterations > 0:
+        if show_progress and not callback and total_iterations > 0:
             progress_bar = tqdm(total=total_iterations, desc="Scanning", unit="window", leave=False)
         # Search at each scale
@@ -236,13 +407,9 @@ class VisualMatcher:
             if scaled_w > target_w or scaled_h > target_h:
                 continue
-            # Calculate step size
-            step_x = max(1, int(scaled_w * step_factor))
-            step_y = max(1, int(scaled_h * step_factor))
             # Sliding window search
-            for y in range(0, target_h - scaled_h + 1, step_y):
-                for x in range(0, target_w - scaled_w + 1, step_x):
+            for y in range(0, target_h - scaled_h + 1, step):
+                for x in range(0, target_w - scaled_w + 1, step):
                     # Extract window
                     window = target.crop((x, y, x + scaled_w, y + scaled_h))
@@ -251,10 +418,15 @@ class VisualMatcher:
                         window = window.resize((template_w, template_h), Image.Resampling.LANCZOS)
                     # Compute hash and similarity
-                    window_hash = compute_phash(window, self.hash_size)
+                    mask_threshold_255 = (
+                        int(mask_threshold * 255) if mask_threshold is not None else None
+                    )
+                    window_hash = compute_phash(
+                        window, self.hash_size, mask_threshold=mask_threshold_255
+                    )
                     similarity = hash_similarity(template_hash, window_hash, self.hash_bits)
-                    if similarity >= confidence_threshold:
+                    if similarity >= threshold:
                         # Convert back to target image coordinates
                         bbox = (x, y, x + scaled_w, y + scaled_h)
                         matches.append(MatchCandidate(bbox, window_hash, similarity))
@@ -262,8 +434,8 @@ class VisualMatcher:
                     # Update progress
                     if progress_bar:
                         progress_bar.update(1)
-                    elif progress_callback:
-                        progress_callback()
+                    elif callback:
+                        callback()
         # Close progress bar
         if progress_bar:

natural_pdf/vision/template_matching.py ADDED Viewed

@@ -0,0 +1,209 @@
+"""Pure NumPy template matching implementation"""
+from dataclasses import dataclass
+from typing import List, Optional, Tuple
+import numpy as np
+@dataclass
+class TemplateMatch:
+    """Result of template matching"""
+    bbox: Tuple[int, int, int, int]  # x0, y0, x1, y1
+    score: float  # 0-1, higher is better
+class TemplateMatcher:
+    """Pure NumPy template matching implementation"""
+    def __init__(self, method: str = "zncc"):
+        """
+        Args:
+            method: Matching method
+                - "zncc": Zero-mean Normalized Cross-Correlation (default, recommended)
+                - "ncc": Normalized Cross-Correlation
+                - "ssd": Sum of Squared Differences
+        """
+        self.method = method
+    def match_template(
+        self,
+        image: np.ndarray,
+        template: np.ndarray,
+        step: int = 1,
+        mask_threshold: Optional[float] = None,
+    ) -> np.ndarray:
+        """
+        Compute similarity map between image and template.
+        Args:
+            image: Target image (grayscale, normalized 0-1)
+            template: Template to search for (grayscale, normalized 0-1)
+            step: Step size for sliding window (1 = pixel perfect, >1 = faster)
+            mask_threshold: If provided, pixels >= this value in template are masked (ignored).
+                           Useful for ignoring white backgrounds (e.g., 0.95 for near-white)
+        Returns:
+            2D array of match scores
+        """
+        if self.method == "zncc":
+            return self._zncc(image, template, step, mask_threshold)
+        elif self.method == "ncc":
+            return self._ncc(image, template, step, mask_threshold)
+        elif self.method == "ssd":
+            return self._ssd(image, template, step, mask_threshold)
+        else:
+            # Default to zncc
+            return self._zncc(image, template, step, mask_threshold)
+    def _zncc(
+        self,
+        image: np.ndarray,
+        template: np.ndarray,
+        step: int = 1,
+        mask_threshold: Optional[float] = None,
+    ) -> np.ndarray:
+        """Zero-mean Normalized Cross-Correlation - most robust"""
+        h, w = template.shape
+        img_h, img_w = image.shape
+        out_h = (img_h - h) // step + 1
+        out_w = (img_w - w) // step + 1
+        result = np.zeros((out_h, out_w))
+        # Create mask if threshold provided
+        if mask_threshold is not None:
+            mask = template < mask_threshold  # True for pixels to keep
+            if np.sum(mask) == 0:
+                # All pixels are masked - return zeros
+                return result
+        else:
+            mask = np.ones_like(template, dtype=bool)
+        # Precompute template statistics on non-masked pixels
+        masked_template = template[mask]
+        if len(masked_template) == 0:
+            return result
+        template_mean = np.mean(masked_template)
+        template_centered = np.zeros_like(template)
+        template_centered[mask] = template[mask] - template_mean
+        template_std = np.sqrt(np.sum(template_centered[mask] ** 2))
+        # Handle uniform template case
+        if template_std == 0:
+            # Template has no variation - fall back to checking if means match
+            for i in range(out_h):
+                for j in range(out_w):
+                    y = i * step
+                    x = j * step
+                    window = image[y : y + h, x : x + w]
+                    window_masked = window[mask]
+                    window_mean = np.mean(window_masked)
+                    window_std = np.std(window_masked)
+                    # Perfect match if window also has same mean and no variation
+                    if abs(window_mean - template_mean) < 0.01 and window_std < 0.01:
+                        result[i, j] = 1.0
+            return result
+        for i in range(out_h):
+            for j in range(out_w):
+                y = i * step
+                x = j * step
+                window = image[y : y + h, x : x + w]
+                # Apply mask to window
+                window_masked = window[mask]
+                window_mean = np.mean(window_masked)
+                window_centered = np.zeros_like(window)
+                window_centered[mask] = window[mask] - window_mean
+                window_std = np.sqrt(np.sum(window_centered[mask] ** 2))
+                if window_std > 0:
+                    correlation = np.sum(window_centered[mask] * template_centered[mask])
+                    result[i, j] = correlation / (template_std * window_std)
+        return np.clip(result, -1, 1)
+    def _ncc(
+        self,
+        image: np.ndarray,
+        template: np.ndarray,
+        step: int = 1,
+        mask_threshold: Optional[float] = None,
+    ) -> np.ndarray:
+        """Normalized Cross-Correlation"""
+        h, w = template.shape
+        img_h, img_w = image.shape
+        out_h = (img_h - h) // step + 1
+        out_w = (img_w - w) // step + 1
+        result = np.zeros((out_h, out_w))
+        # Create mask if threshold provided
+        if mask_threshold is not None:
+            mask = template < mask_threshold  # True for pixels to keep
+            if np.sum(mask) == 0:
+                return result
+        else:
+            mask = np.ones_like(template, dtype=bool)
+        template_norm = np.sqrt(np.sum(template[mask] ** 2))
+        if template_norm == 0:
+            return result
+        for i in range(out_h):
+            for j in range(out_w):
+                y = i * step
+                x = j * step
+                window = image[y : y + h, x : x + w]
+                window_norm = np.sqrt(np.sum(window[mask] ** 2))
+                if window_norm > 0:
+                    correlation = np.sum(window[mask] * template[mask])
+                    result[i, j] = correlation / (template_norm * window_norm)
+        return result
+    def _ssd(
+        self,
+        image: np.ndarray,
+        template: np.ndarray,
+        step: int = 1,
+        mask_threshold: Optional[float] = None,
+    ) -> np.ndarray:
+        """Sum of Squared Differences - converted to similarity score"""
+        h, w = template.shape
+        img_h, img_w = image.shape
+        out_h = (img_h - h) // step + 1
+        out_w = (img_w - w) // step + 1
+        result = np.zeros((out_h, out_w))
+        # Create mask if threshold provided
+        if mask_threshold is not None:
+            mask = template < mask_threshold  # True for pixels to keep
+            if np.sum(mask) == 0:
+                return result
+        else:
+            mask = np.ones_like(template, dtype=bool)
+        # Number of valid pixels for normalization
+        n_valid = np.sum(mask)
+        if n_valid == 0:
+            return result
+        for i in range(out_h):
+            for j in range(out_w):
+                y = i * step
+                x = j * step
+                window = image[y : y + h, x : x + w]
+                # Only compute SSD on non-masked pixels
+                diff = window - template
+                ssd = np.sum((diff[mask]) ** 2) / n_valid
+                result[i, j] = 1.0 / (1.0 + ssd)  # Convert to similarity
+        return result

{natural_pdf-0.2.12.dist-info → natural_pdf-0.2.13.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: natural-pdf
-Version: 0.2.12
+Version: 0.2.13
 Summary: A more intuitive interface for working with PDFs
 Author-email: Jonathan Soma <jonathan.soma@gmail.com>
 License-Expression: MIT

{natural_pdf-0.2.12.dist-info → natural_pdf-0.2.13.dist-info}/RECORD RENAMED Viewed

@@ -26,7 +26,7 @@ natural_pdf/classification/results.py,sha256=5ha77CxK0GYwkBMJbvUBZkBjsL5GpOveIZD
 natural_pdf/collections/mixins.py,sha256=Se2C5AcpP9B5E0d0pIrey6-f_P32tAXTK4M7666MNj0,5688
 natural_pdf/core/__init__.py,sha256=QC8H4M3KbXwMFiQORZ0pdPlzx1Ix6oKKQSS7Ib2KEaA,38
 natural_pdf/core/element_manager.py,sha256=KPuKM7SstfErTkRnGq4vrgE0Tv8iazN13Jp7yAXGKso,55575
-natural_pdf/core/highlighting_service.py,sha256=7on8nErhi50CEH2L4XzGIZ6tIqZtMzmmFlp-2lmwnYE,68856
+natural_pdf/core/highlighting_service.py,sha256=wEV-koqHoHf7S3wZ3j8D2L-ucGp3Nd0YhhStz9yqeLc,70406
 natural_pdf/core/page.py,sha256=Pid5hqVjcyX-gcCzxCJ62k6AQhNbUMNM_5QmEcylIjM,155264
 natural_pdf/core/page_collection.py,sha256=IjdFq9q0D0P6ZKWInf0H25rLzxfMb7RsUXucogkhNkU,63169
 natural_pdf/core/page_groupby.py,sha256=V2e_RNlHaasUzYm2h2vNJI7_aV_fl3_pg7kU3F2j0z8,8218
@@ -39,12 +39,12 @@ natural_pdf/describe/elements.py,sha256=3Y541z5TQ2obrfZFiFi1YQMsCt3oYrhMHpD5j1tu
 natural_pdf/describe/mixin.py,sha256=rkX14aGrSz7Jvxx8Rbxv3eSfbO-_29DipwpstrV2pDQ,3109
 natural_pdf/describe/summary.py,sha256=cfT4ZQkeatCDAOwWPwhtEVXisNgk6E57fAXAnoRysSU,7645
 natural_pdf/elements/__init__.py,sha256=ICNikmLeIEuSYypz-KnkBn8xR1hR7rge4hsa1KLkyWY,42
-natural_pdf/elements/base.py,sha256=92ukTtRCQFsa5KvKflChCt4mt0ZGS4ecGYCQTNMO4zU,58907
+natural_pdf/elements/base.py,sha256=DozTl9IS3DtSqBNArUEtHeuIiDcNWUW_gFKoUebmC4M,59573
 natural_pdf/elements/element_collection.py,sha256=idM_BUWEfbCJ5Sq0Ae_KfbVHy8TdkNfzs7iWkFe_j2I,130707
 natural_pdf/elements/image.py,sha256=zu-P2Y8fRoEXf6IeZU0EYRWsgZ6I_a5vy1FA3VXTGkQ,1424
 natural_pdf/elements/line.py,sha256=TFn7KXjPT_jUQyQyabU0F7XYU4dC-qadwodJMZF4DCU,3844
 natural_pdf/elements/rect.py,sha256=0lNkVkPkvbRbrFED856RXoUcTcDkeeOIs5xldKGAQT8,3324
-natural_pdf/elements/region.py,sha256=HF6KzeuudO9upVLIrPsp3omcziLcILE3nnzl1a-LvK0,165400
+natural_pdf/elements/region.py,sha256=Lf2wZgZn-C7g__eK6adgkKPjFoWbjj6A6GLnz0pn5_w,166733
 natural_pdf/elements/text.py,sha256=829uSJv9E-8cC6T6iR_Va7Xtv54pJoyRN78fq4NN1d4,20687
 natural_pdf/export/mixin.py,sha256=L1q3MIEFWuvie4j4_EmW7GT3NerbZ1as0XMUoqTS7gM,5083
 natural_pdf/exporters/__init__.py,sha256=QffoARekR6WzXEd05oxOytly4qPdBizuIF-SUkeFpig,643
@@ -101,38 +101,39 @@ natural_pdf/utils/packaging.py,sha256=TM0jafwS5yVbTGC-RMi4TyWunf9cUUo9h5J6rMzkT-
 natural_pdf/utils/reading_order.py,sha256=u7XyVZdKMPMK0CL1C7xFogKnZ92b0JKT068KFjQWe18,7437
 natural_pdf/utils/text_extraction.py,sha256=CCwPTmMoTgtQt2P00X_ADIf6ZGNfxvjCO9FO0_HqG40,13900
 natural_pdf/utils/visualization.py,sha256=zhZEHgYnZFuX7YxTHXF8Y3D97uHp2beTKMaC-JkCFwk,22364
-natural_pdf/vision/__init__.py,sha256=RymMY-3WLQBlOZ4Dx4MmL9UH6I65hNjkwUJ7ymO5JfM,287
-natural_pdf/vision/mixin.py,sha256=OJwBABr74TWxP5seTKUmGj5zE9mWsBP_UKWU-Pr8V9A,8720
-natural_pdf/vision/results.py,sha256=F2zXG3MVZIpOUvPkJHotOq6-9rFz68BaO_8pnSndlOs,5119
-natural_pdf/vision/similarity.py,sha256=YH8legN-t9uf1b_XULi4JLNDaRfPNKQwU1FZ4Qu08jY,11740
+natural_pdf/vision/__init__.py,sha256=TkoQtdODlh0n_99dsjLIWKE9dgK0m4jfrui_cQ3gTwU,221
+natural_pdf/vision/mixin.py,sha256=wlsX42cFUnUepZHsEfKBqXiDEPUwBG6-KN2Cx5qz_lw,10812
+natural_pdf/vision/results.py,sha256=_NBRCKtDd1M3sWK7zHSym7-jpQqW4kR_iFFL4PvnBNo,6649
+natural_pdf/vision/similarity.py,sha256=HWmXDBNLSOlRWH-_1K3FVR7tSsRuMFqXZwrVhhg2ZzU,17925
+natural_pdf/vision/template_matching.py,sha256=91XQt5tp-vmcMX_4b2Bz-YwIAlb-hc8E5ih_qAHQuCk,7145
 natural_pdf/widgets/__init__.py,sha256=QTVaUmsw__FCweFYZebwPssQxxUFUMd0wpm_cUbGZJY,181
 natural_pdf/widgets/viewer.py,sha256=KW3JogdR2TMg2ECUMYp8hwd060hfg8EsYBWxb5IEzBY,24942
-natural_pdf-0.2.12.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
+natural_pdf-0.2.13.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
 optimization/memory_comparison.py,sha256=0i_foFSRmppj-fY069qjwH36s_zkx-1L2ASAAlepWzA,6541
 optimization/pdf_analyzer.py,sha256=HjrmTgu2qchxPeDckc5kjgxppGwd40UESrYS9Myj7pY,19352
 optimization/performance_analysis.py,sha256=JBXnR9hc7Ix7YCnt3EJPSpsyqIUgKsc7GEffQ_TDCBk,13033
 optimization/test_cleanup_methods.py,sha256=PmLOL4MRgvV0j_DW9W1TS8MsGGgu57QCuq6_5y7zK3s,6209
 optimization/test_memory_fix.py,sha256=A3knK74fNhvHknDbLhbTmA276x1ifl-3ivJ_7BhVSTI,6170
-temp/debug_cell_extraction.py,sha256=nE0Z470P40v8xZfWO1V3qgNaejs_pernEQaUOFeOJ1U,1527
-temp/debug_exclusion_overlap.py,sha256=RptJXwqBXy5gsvMF037KEx1o2QgjwEDkMB6TD5aJdqA,1644
-temp/debug_exclusions_guides.py,sha256=s8siep9te1KRJ2j0vH1tvDQnBlz7PKbHeCiYMrZL8jE,2096
-temp/debug_extra_guide.py,sha256=95Tim-YnmAR4kICw2XDKVDvlW5WsjK_51cv5-EV11rc,1236
-temp/debug_outer_boundaries.py,sha256=uJUJwojTxOU4VtbGUouuhV65IYzS6NDIVKxnS7o64nU,1456
-temp/debug_st_search.py,sha256=F4c_mUVi_d5AKaKIpQ0AnW1amDqAwALoQQj7wZj--J0,1021
 temp/fix_page_exclusions.py,sha256=YIj62zF38TdoBARAuSIvEbetl_JfXG-mp4v9p355qmo,1358
+temp/test_draw_guides.py,sha256=_eSSBElGHQkd2QD_KA_Okw70v0dlY5m-1-C5SQwKAJw,642
+temp/test_draw_guides_interactive.py,sha256=FsH-2ZQGsGx_8QfVCWUAkLbOcJz-VfiwROzQD4AD7kQ,926
 temp/test_exclusion_with_debug.py,sha256=CScxHvb43KrB5dzXuTOhuzjcBXZBdfYB5ygiKkEW26g,1393
 temp/test_find_exclusions_fix.py,sha256=1l5aEqnElcl3kiykdtmJFlVxQ1xMKGm1UckGYEQg--c,2103
 temp/test_find_exclusions_fix_no_recursion.py,sha256=qZspTBwxunRM93N_-fZ2fR5Lodj0ArQX3h10HlTXhfc,3592
 temp/test_fix_real_pdf.py,sha256=uuylxmpeAEbIix9wjl0Gri1sZlN61dBWTq6ZCyfvzF8,1454
 temp/test_fix_working.py,sha256=-Ryre1rXYA2EG_lmPZGYEGi8yz0slhHEXPJMYexZW84,1750
 temp/test_fixed_pdf_exclusions.py,sha256=Q5zxooKDvtTXo-dDsx3nsQw1ZVHX3TW47iZ_dXpFdrY,2168
+temp/test_guide_draw_notebook.py,sha256=9yYRV5mfmVHiL1lnwNj-vksw45d1oWbAZpDGA7yZf-M,1583
 temp/test_horizontal_top_bottom.py,sha256=Mb3tjt9Z3wOTpzFOgK7i0K-j-_ynNh4vDu2x1L3nu-s,2163
+temp/test_inline_js.py,sha256=xuQH8VQn7L4sogv6wd_Rwudx5p_Lt6we1h7U1LPTH-g,646
 temp/test_marker_order.py,sha256=TFZkMxRiNoZGVcdDivYnkIDNvwHaiyKUdYoy2rTTIiI,1417
 temp/test_original_exclusions_now_work.py,sha256=G6LmaF-P9Qhj0j4lT_4ncfCddllfP6L8F_x2prUBr9w,1904
 temp/test_pdf_exclusions_with_guides.py,sha256=QaMl0frgKC8kCPQ2BUI8kqyvqsIjQPXKV_St1rK3zxg,2754
 temp/test_region_exclusions_detailed.py,sha256=EftdW3JY3JH_LX5QlWKt-4drM-joPggK2fKUZRXVTMA,814
 temp/test_stripes_real_pdf.py,sha256=FIvDoJrnuioOMw1A0aTCCfZLeg99lusfe0Fb0MiqnhQ,2618
 temp/test_vertical_stripes.py,sha256=Yf3TJfb_faqAFzlgb7i5u6dDHjF4UMSHIGM99vangRk,1877
+temp/test_widget_functionality.py,sha256=jsEGHYK1dWWa8uEcfGRRj1ReHRMzNoIaMZU4d-o-Djs,2448
+temp/test_widget_simple.py,sha256=Vy_DKgPhPhUQ8nKw_KnhGTpwtmh5EEic0avEyW9hbOQ,1398
 tools/bad_pdf_eval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tools/bad_pdf_eval/analyser.py,sha256=oqSTo3NLyignp_XdCO9_SRCUUXMU8lfgDavKYZYNxws,13690
 tools/bad_pdf_eval/collate_summaries.py,sha256=L_YsdiqmwGIHYWTVJqo6gyazyn3GIQgpfGGKk8uwckk,5159
@@ -144,8 +145,8 @@ tools/bad_pdf_eval/llm_enrich.py,sha256=mCh4KGi1HmIkzGjj5rrHz1Osd7sEX1IZ_FW08H1t
 tools/bad_pdf_eval/llm_enrich_with_retry.py,sha256=XUtPF1hUvqd3frDXT0wDTXoonuAivhjM5vgFdZ-tm0A,9373
 tools/bad_pdf_eval/reporter.py,sha256=e1g__mkSB4q02p3mGWOwMhvFs7F2HJosNBxup0-LkyU,400
 tools/bad_pdf_eval/utils.py,sha256=hR95XQ7qf7Cu6BdyX0L7ggGVx-ah5sK0jHWblTJUUic,4896
-natural_pdf-0.2.12.dist-info/METADATA,sha256=jRNM0JxYvPDuqzD63earjbaUwQgXCjPYPLC5pLl49Uk,6960
-natural_pdf-0.2.12.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-natural_pdf-0.2.12.dist-info/entry_points.txt,sha256=1R_KMv7g60UBBpRqGfw7bppsMNGdayR-iJlb9ohEk_8,81
-natural_pdf-0.2.12.dist-info/top_level.txt,sha256=ZDKhxE_tg508o9BpagsjCGcI8GY4cF_8bg0e0IaLsPI,41
-natural_pdf-0.2.12.dist-info/RECORD,,
+natural_pdf-0.2.13.dist-info/METADATA,sha256=k3WrL3HrPJRbK8Bu5PVIkNlJImAh5N8KC1M_7rZc2WM,6960
+natural_pdf-0.2.13.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+natural_pdf-0.2.13.dist-info/entry_points.txt,sha256=1R_KMv7g60UBBpRqGfw7bppsMNGdayR-iJlb9ohEk_8,81
+natural_pdf-0.2.13.dist-info/top_level.txt,sha256=ZDKhxE_tg508o9BpagsjCGcI8GY4cF_8bg0e0IaLsPI,41
+natural_pdf-0.2.13.dist-info/RECORD,,

temp/test_draw_guides.py ADDED Viewed

@@ -0,0 +1,25 @@
+"""Example usage of the interactive guide drawing feature"""
+# In a Jupyter notebook:
+from natural_pdf import NaturalPDF
+# Load a PDF
+pdf = NaturalPDF.from_file("your_pdf.pdf")
+page = pdf[0]
+# Create guides
+guides = page.guides()
+# Detect some initial guides (optional)
+guides.vertical.from_lines(n=5)
+guides.horizontal.from_lines(n=5)
+# Open interactive editor for vertical guides
+guides.vertical.draw()
+# Open interactive editor for horizontal guides
+guides.horizontal.draw(width=600)  # Smaller widget
+# After editing, the guides are automatically updated
+# You can now use them to extract tables:
+table = page.extract_table(guides)

natural-pdf 0.2.12__py3-none-any.whl → 0.2.13__py3-none-any.whl

natural-pdf 0.2.12py3-none-any.whl → 0.2.13py3-none-any.whl