PyPI - natural-pdf - Versions diffs - 0.2.12__py3-none-any.whl → 0.2.13__py3-none-any.whl - Mend

natural-pdf 0.2.12py3-none-any.whl → 0.2.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

natural_pdf/core/highlighting_service.py +40 -10
natural_pdf/elements/base.py +15 -1
natural_pdf/elements/region.py +32 -2
natural_pdf/vision/__init__.py +1 -2
natural_pdf/vision/mixin.py +67 -27
natural_pdf/vision/results.py +49 -5
natural_pdf/vision/similarity.py +195 -23
natural_pdf/vision/template_matching.py +209 -0
{natural_pdf-0.2.12.dist-info → natural_pdf-0.2.13.dist-info}/METADATA +1 -1
{natural_pdf-0.2.12.dist-info → natural_pdf-0.2.13.dist-info}/RECORD +20 -19
temp/test_draw_guides.py +25 -0
temp/test_draw_guides_interactive.py +30 -0
temp/test_guide_draw_notebook.py +47 -0
temp/test_inline_js.py +22 -0
temp/test_widget_functionality.py +68 -0
temp/test_widget_simple.py +41 -0
temp/debug_cell_extraction.py +0 -42
temp/debug_exclusion_overlap.py +0 -43
temp/debug_exclusions_guides.py +0 -67
temp/debug_extra_guide.py +0 -41
temp/debug_outer_boundaries.py +0 -46
temp/debug_st_search.py +0 -33
{natural_pdf-0.2.12.dist-info → natural_pdf-0.2.13.dist-info}/WHEEL +0 -0
{natural_pdf-0.2.12.dist-info → natural_pdf-0.2.13.dist-info}/entry_points.txt +0 -0
{natural_pdf-0.2.12.dist-info → natural_pdf-0.2.13.dist-info}/licenses/LICENSE +0 -0
{natural_pdf-0.2.12.dist-info → natural_pdf-0.2.13.dist-info}/top_level.txt +0 -0

natural_pdf/core/highlighting_service.py CHANGED Viewed

@@ -92,6 +92,16 @@ class HighlightRenderer:
     def _draw_highlights(self):
         """Draws all highlight shapes, borders, vertices, and attributes."""
+        # Get the pdfplumber page offset for coordinate translation
+        page_offset_x = 0
+        page_offset_y = 0
+        if hasattr(self.page, "_page") and hasattr(self.page._page, "bbox"):
+            # PDFPlumber page bbox might have negative offsets
+            page_offset_x = -self.page._page.bbox[0]
+            page_offset_y = -self.page._page.bbox[1]
+            logger.debug(f"Applying highlight offset: x={page_offset_x}, y={page_offset_y}")
         for highlight in self.highlights:
             # Create a transparent overlay for this single highlight
             overlay = Image.new("RGBA", self.base_image.size, (0, 0, 0, 0))
@@ -101,7 +111,11 @@ class HighlightRenderer:
             if highlight.is_polygon:
                 scaled_polygon = [
-                    (p[0] * self.scale_factor, p[1] * self.scale_factor) for p in highlight.polygon
+                    (
+                        (p[0] + page_offset_x) * self.scale_factor,
+                        (p[1] + page_offset_y) * self.scale_factor,
+                    )
+                    for p in highlight.polygon
                 ]
                 # Draw polygon fill and border
                 draw.polygon(
@@ -117,11 +131,16 @@ class HighlightRenderer:
             else:  # Rectangle
                 x0, top, x1, bottom = highlight.bbox
                 x0_s, top_s, x1_s, bottom_s = (
-                    x0 * self.scale_factor,
-                    top * self.scale_factor,
-                    x1 * self.scale_factor,
-                    bottom * self.scale_factor,
+                    (x0 + page_offset_x) * self.scale_factor,
+                    (top + page_offset_y) * self.scale_factor,
+                    (x1 + page_offset_x) * self.scale_factor,
+                    (bottom + page_offset_y) * self.scale_factor,
                 )
+                logger.debug(f"Original bbox: ({x0}, {top}, {x1}, {bottom})")
+                logger.debug(
+                    f"Offset bbox: ({x0 + page_offset_x}, {top + page_offset_y}, {x1 + page_offset_x}, {bottom + page_offset_y})"
+                )
+                logger.debug(f"Scaled bbox: ({x0_s}, {top_s}, {x1_s}, {bottom_s})")
                 scaled_bbox = [x0_s, top_s, x1_s, bottom_s]
                 # Draw rectangle fill and border
                 draw.rectangle(
@@ -1482,11 +1501,22 @@ class HighlightingService:
                 offset_x = crop_offset[0] * scale_factor
                 offset_y = crop_offset[1] * scale_factor
+            # Add pdfplumber page offset for coordinate translation
+            page_offset_x = 0
+            page_offset_y = 0
+            if hasattr(page, "_page") and hasattr(page._page, "bbox"):
+                # PDFPlumber page bbox might have negative offsets
+                page_offset_x = -page._page.bbox[0]
+                page_offset_y = -page._page.bbox[1]
             # Draw the highlight
             if polygon:
                 # Scale polygon points and apply offset
                 scaled_polygon = [
-                    (p[0] * scale_factor - offset_x, p[1] * scale_factor - offset_y)
+                    (
+                        (p[0] + page_offset_x) * scale_factor - offset_x,
+                        (p[1] + page_offset_y) * scale_factor - offset_y,
+                    )
                     for p in polygon
                 ]
                 draw.polygon(
@@ -1496,10 +1526,10 @@ class HighlightingService:
                 # Scale bbox and apply offset
                 x0, y0, x1, y1 = bbox
                 scaled_bbox = [
-                    x0 * scale_factor - offset_x,
-                    y0 * scale_factor - offset_y,
-                    x1 * scale_factor - offset_x,
-                    y1 * scale_factor - offset_y,
+                    (x0 + page_offset_x) * scale_factor - offset_x,
+                    (y0 + page_offset_y) * scale_factor - offset_y,
+                    (x1 + page_offset_x) * scale_factor - offset_x,
+                    (y1 + page_offset_y) * scale_factor - offset_y,
                 ]
                 draw.rectangle(
                     scaled_bbox, fill=color, outline=(color[0], color[1], color[2], BORDER_ALPHA)

natural_pdf/elements/base.py CHANGED Viewed

@@ -106,6 +106,7 @@ class DirectionalMixin:
         include_source: bool = False,
         until: Optional[str] = None,
         include_endpoint: bool = True,
+        offset: float = 0.1,
         **kwargs,
     ) -> "Region":
         """
@@ -118,6 +119,7 @@ class DirectionalMixin:
             include_source: Whether to include this element/region's area in the result
             until: Optional selector string to specify a boundary element
             include_endpoint: Whether to include the boundary element found by 'until'
+            offset: Pixel offset when excluding source/endpoint (default: 0.1)
             **kwargs: Additional parameters for the 'until' selector search
         Returns:
@@ -127,7 +129,7 @@ class DirectionalMixin:
         is_horizontal = direction in ("left", "right")
         is_positive = direction in ("right", "below")  # right/below are positive directions
-        pixel_offset = 1  # Offset for excluding elements/endpoints
+        pixel_offset = offset  # Use provided offset for excluding elements/endpoints
         # 1. Determine initial boundaries based on direction and include_source
         if is_horizontal:
@@ -260,6 +262,7 @@ class DirectionalMixin:
         include_source: bool = False,
         until: Optional[str] = None,
         include_endpoint: bool = True,
+        offset: float = 0.1,
         **kwargs,
     ) -> "Region":
         """
@@ -271,6 +274,7 @@ class DirectionalMixin:
             include_source: Whether to include this element/region in the result (default: False)
             until: Optional selector string to specify an upper boundary element
             include_endpoint: Whether to include the boundary element in the region (default: True)
+            offset: Pixel offset when excluding source/endpoint (default: 0.1)
             **kwargs: Additional parameters
         Returns:
@@ -295,6 +299,7 @@ class DirectionalMixin:
             include_source=include_source,
             until=until,
             include_endpoint=include_endpoint,
+            offset=offset,
             **kwargs,
         )
@@ -305,6 +310,7 @@ class DirectionalMixin:
         include_source: bool = False,
         until: Optional[str] = None,
         include_endpoint: bool = True,
+        offset: float = 0.1,
         **kwargs,
     ) -> "Region":
         """
@@ -316,6 +322,7 @@ class DirectionalMixin:
             include_source: Whether to include this element/region in the result (default: False)
             until: Optional selector string to specify a lower boundary element
             include_endpoint: Whether to include the boundary element in the region (default: True)
+            offset: Pixel offset when excluding source/endpoint (default: 0.1)
             **kwargs: Additional parameters
         Returns:
@@ -340,6 +347,7 @@ class DirectionalMixin:
             include_source=include_source,
             until=until,
             include_endpoint=include_endpoint,
+            offset=offset,
             **kwargs,
         )
@@ -350,6 +358,7 @@ class DirectionalMixin:
         include_source: bool = False,
         until: Optional[str] = None,
         include_endpoint: bool = True,
+        offset: float = 0.1,
         **kwargs,
     ) -> "Region":
         """
@@ -361,6 +370,7 @@ class DirectionalMixin:
             include_source: Whether to include this element/region in the result (default: False)
             until: Optional selector string to specify a left boundary element
             include_endpoint: Whether to include the boundary element in the region (default: True)
+            offset: Pixel offset when excluding source/endpoint (default: 0.1)
             **kwargs: Additional parameters
         Returns:
@@ -385,6 +395,7 @@ class DirectionalMixin:
             include_source=include_source,
             until=until,
             include_endpoint=include_endpoint,
+            offset=offset,
             **kwargs,
         )
@@ -395,6 +406,7 @@ class DirectionalMixin:
         include_source: bool = False,
         until: Optional[str] = None,
         include_endpoint: bool = True,
+        offset: float = 0.1,
         **kwargs,
     ) -> "Region":
         """
@@ -406,6 +418,7 @@ class DirectionalMixin:
             include_source: Whether to include this element/region in the result (default: False)
             until: Optional selector string to specify a right boundary element
             include_endpoint: Whether to include the boundary element in the region (default: True)
+            offset: Pixel offset when excluding source/endpoint (default: 0.1)
             **kwargs: Additional parameters
         Returns:
@@ -430,6 +443,7 @@ class DirectionalMixin:
             include_source=include_source,
             until=until,
             include_endpoint=include_endpoint,
+            offset=offset,
             **kwargs,
         )

natural_pdf/elements/region.py CHANGED Viewed

@@ -45,6 +45,7 @@ from natural_pdf.utils.locks import pdf_render_lock  # Import the lock
 # Import new utils
 from natural_pdf.utils.text_extraction import filter_chars_spatially, generate_text_layout
+from natural_pdf.vision.mixin import VisualSearchMixin
 # Import viewer widget support
 from natural_pdf.widgets.viewer import _IPYWIDGETS_AVAILABLE, InteractiveViewerWidget
@@ -80,6 +81,7 @@ class Region(
     ExtractionMixin,
     ShapeDetectionMixin,
     DescribeMixin,
+    VisualSearchMixin,
     Visualizable,
 ):
     """Represents a rectangular region on a page.
@@ -1692,7 +1694,21 @@ class Region(
         else:
             filtered_page = base_plumber_page
-        cropped = filtered_page.crop(self.bbox)
+        # Ensure bbox is within pdfplumber page bounds
+        page_bbox = filtered_page.bbox
+        clipped_bbox = (
+            max(self.bbox[0], page_bbox[0]),  # x0
+            max(self.bbox[1], page_bbox[1]),  # y0
+            min(self.bbox[2], page_bbox[2]),  # x1
+            min(self.bbox[3], page_bbox[3]),  # y1
+        )
+        # Only crop if the clipped bbox is valid (has positive width and height)
+        if clipped_bbox[2] > clipped_bbox[0] and clipped_bbox[3] > clipped_bbox[1]:
+            cropped = filtered_page.crop(clipped_bbox)
+        else:
+            # If the region is completely outside the page bounds, return empty list
+            return []
         # Extract all tables from the cropped area
         tables = cropped.extract_tables(table_settings)
@@ -1786,7 +1802,21 @@ class Region(
             filtered_page = base_plumber_page
         # Now crop the (possibly filtered) page to the region bbox
-        cropped = filtered_page.crop(self.bbox)
+        # Ensure bbox is within pdfplumber page bounds
+        page_bbox = filtered_page.bbox
+        clipped_bbox = (
+            max(self.bbox[0], page_bbox[0]),  # x0
+            max(self.bbox[1], page_bbox[1]),  # y0
+            min(self.bbox[2], page_bbox[2]),  # x1
+            min(self.bbox[3], page_bbox[3]),  # y1
+        )
+        # Only crop if the clipped bbox is valid (has positive width and height)
+        if clipped_bbox[2] > clipped_bbox[0] and clipped_bbox[3] > clipped_bbox[1]:
+            cropped = filtered_page.crop(clipped_bbox)
+        else:
+            # If the region is completely outside the page bounds, return empty table
+            return []
         # Extract the single largest table from the cropped area
         table = cropped.extract_table(table_settings)

natural_pdf/vision/__init__.py CHANGED Viewed

@@ -1,7 +1,6 @@
 """Vision module for visual similarity and pattern matching"""
 from .mixin import VisualSearchMixin
-from .results import Match, MatchResults
 from .similarity import VisualMatcher, compute_phash
-__all__ = ["VisualMatcher", "compute_phash", "Match", "MatchResults", "VisualSearchMixin"]
+__all__ = ["VisualMatcher", "compute_phash", "VisualSearchMixin"]

natural_pdf/vision/mixin.py CHANGED Viewed

@@ -6,9 +6,6 @@ import numpy as np
 from PIL import Image
 from tqdm.auto import tqdm
-from .results import Match, MatchResults
-from .similarity import VisualMatcher, compute_phash
 class VisualSearchMixin:
     """Add find_similar method to classes that include this mixin"""
@@ -21,11 +18,12 @@ class VisualSearchMixin:
         sizes: Optional[Union[float, Tuple, List]] = (0.8, 1.2),
         resolution: int = 72,
         hash_size: int = 20,
-        step_factor: float = 0.1,
+        step: Optional[int] = None,
+        method: str = "phash",
         max_per_page: Optional[int] = None,
         show_progress: bool = True,
         **kwargs,
-    ) -> MatchResults:
+    ) -> "MatchResults":
         """
         Find regions visually similar to the given example(s).
@@ -35,15 +33,19 @@ class VisualSearchMixin:
             confidence: Minimum similarity score (0-1)
             sizes: Size variations to search. Can be:
                    - float: ±percentage (e.g., 0.2 = 80%-120%)
-                   - tuple(min, max): search range with smart logarithmic steps (default: (0.8, 1.0))
+                   - tuple(min, max): search range with smart logarithmic steps (default: (0.8, 1.2))
                    - tuple(min, max, step): explicit step size
                    - list: exact sizes to try (e.g., [0.8, 1.0, 1.2])
             resolution: Resolution for image comparison (DPI) (default: 72)
-            hash_size: Size of perceptual hash grid (default: 12)
-            step_factor: Step size as fraction of template size (default: 0.1)
+            hash_size: Size of perceptual hash grid (default: 20)
+            step: Step size in pixels for sliding window
+            method: Matching algorithm - "phash" (default) or "template"
             max_per_page: Maximum matches to return per page
             show_progress: Show progress bar for multi-page searches (default: True)
-            **kwargs: Additional options
+            **kwargs: Additional options including:
+                mask_threshold: For both template and phash methods, pixels >= this value are masked.
+                               For template matching: pixels are ignored in matching (e.g., 0.95)
+                               For phash: pixels are replaced with median before hashing (e.g., 0.95)
         Returns:
             MatchResults collection
@@ -55,15 +57,25 @@ class VisualSearchMixin:
         if not isinstance(examples, list):
             examples = [examples]
+        from .similarity import VisualMatcher, compute_phash
         # Initialize matcher with specified hash size
         matcher = VisualMatcher(hash_size=hash_size)
         # Prepare templates
         templates = []
+        # Extract mask_threshold from kwargs for phash
+        mask_threshold = kwargs.get("mask_threshold")
+        mask_threshold_255 = (
+            int(mask_threshold * 255) if mask_threshold is not None and method == "phash" else None
+        )
         for example in examples:
             # Render the example region/element
             example_image = example.render(resolution=resolution, crop=True)
-            template_hash = compute_phash(example_image, hash_size=hash_size)
+            template_hash = compute_phash(
+                example_image, hash_size=hash_size, mask_threshold=mask_threshold_255
+            )
             templates.append({"image": example_image, "hash": template_hash, "source": example})
         # Get pages to search based on the object type
@@ -76,6 +88,8 @@ class VisualSearchMixin:
             pages_to_search = self.pages
         elif hasattr(self, "number"):  # Single page
             pages_to_search = [self]
+        elif hasattr(self, "page") and hasattr(self, "bbox"):  # Region
+            pages_to_search = [self]
         else:
             raise TypeError(f"Cannot search in {type(self)}")
@@ -86,10 +100,16 @@ class VisualSearchMixin:
             scales = matcher._get_search_scales(sizes)
             # Pre-calculate for all pages and templates
-            for page in pages_to_search:
-                # Estimate page image size
-                page_w = int(page.width * resolution / 72.0)
-                page_h = int(page.height * resolution / 72.0)
+            for search_obj in pages_to_search:
+                # Estimate image size based on object type
+                if hasattr(search_obj, "page") and hasattr(search_obj, "bbox"):
+                    # Region
+                    page_w = int(search_obj.width * resolution / 72.0)
+                    page_h = int(search_obj.height * resolution / 72.0)
+                else:
+                    # Page
+                    page_w = int(search_obj.width * resolution / 72.0)
+                    page_h = int(search_obj.height * resolution / 72.0)
                 for template_data in templates:
                     template_w, template_h = template_data["image"].size
@@ -99,11 +119,15 @@ class VisualSearchMixin:
                         scaled_h = int(template_h * scale)
                         if scaled_w <= page_w and scaled_h <= page_h:
-                            step_x = max(1, int(scaled_w * step_factor))
-                            step_y = max(1, int(scaled_h * step_factor))
-                            x_windows = len(range(0, page_w - scaled_w + 1, step_x))
-                            y_windows = len(range(0, page_h - scaled_h + 1, step_y))
+                            # Determine step size
+                            if step is not None:
+                                actual_step = step
+                            else:
+                                # Default to 10% of template size
+                                actual_step = max(1, int(min(scaled_w, scaled_h) * 0.1))
+                            x_windows = len(range(0, page_w - scaled_w + 1, actual_step))
+                            y_windows = len(range(0, page_h - scaled_h + 1, actual_step))
                             total_operations += x_windows * y_windows
         # Search each page
@@ -124,9 +148,20 @@ class VisualSearchMixin:
                 mininterval=0.1,  # Minimum time between updates (seconds)
             )
-        for page_idx, page in enumerate(pages_to_search):
-            # Render the full page once
-            page_image = page.render(resolution=resolution)
+        for page_idx, search_obj in enumerate(pages_to_search):
+            # Determine if we're searching in a page or a region
+            if hasattr(search_obj, "page") and hasattr(search_obj, "bbox"):
+                # This is a Region - render only the region area
+                region = search_obj
+                page = region.page
+                page_image = region.render(resolution=resolution, crop=True)
+                # Region offset for coordinate conversion
+                region_x0, region_y0 = region.x0, region.top
+            else:
+                # This is a Page - render the full page
+                page = search_obj
+                page_image = page.render(resolution=resolution)
+                region_x0, region_y0 = 0, 0
             # Convert page coordinates to image coordinates
             scale = resolution / 72.0  # PDF is 72 DPI
@@ -168,7 +203,8 @@ class VisualSearchMixin:
                     template_hash=template_hash,
                     confidence_threshold=confidence,
                     sizes=sizes,
-                    step_factor=step_factor,
+                    step=step,
+                    method=method,
                     show_progress=False,  # We handle progress ourselves
                     progress_callback=update_progress if progress_bar else None,
                     **kwargs,
@@ -180,10 +216,12 @@ class VisualSearchMixin:
                     # Convert from image pixels to PDF points
                     # No flipping needed! PDF coordinates map directly to PIL coordinates
-                    pdf_x0 = img_x0 / scale
-                    pdf_y0 = img_y0 / scale
-                    pdf_x1 = img_x1 / scale
-                    pdf_y1 = img_y1 / scale
+                    pdf_x0 = img_x0 / scale + region_x0
+                    pdf_y0 = img_y0 / scale + region_y0
+                    pdf_x1 = img_x1 / scale + region_x0
+                    pdf_y1 = img_y1 / scale + region_y0
+                    from .results import Match
                     # Create Match object
                     match = Match(
@@ -206,4 +244,6 @@ class VisualSearchMixin:
         if progress_bar:
             progress_bar.close()
+        from .results import MatchResults
         return MatchResults(all_matches)

natural_pdf/vision/results.py CHANGED Viewed

@@ -2,7 +2,6 @@
 from typing import TYPE_CHECKING, Any, Iterator, List, Optional, Tuple
-# Import Region directly as it's a base class
 from natural_pdf.elements.region import Region
 if TYPE_CHECKING:
@@ -39,16 +38,41 @@ class Match(Region):
 class MatchResults:
-    """Collection of Match objects with transformation methods"""
+    """
+    Collection of Match objects with transformation methods.
+    Matches are automatically sorted by confidence (highest first), so:
+    - matches[0] is the best match
+    - Iteration yields matches from best to worst
+    - The .top(n) method returns the n best matches
+    Example:
+        >>> matches = page.find_similar(logo_region)
+        >>> print(f"Found {len(matches)} matches")
+        >>>
+        >>> # Best match
+        >>> best = matches[0]
+        >>> print(f"Best match confidence: {best.confidence:.3f}")
+        >>>
+        >>> # Top 5 matches
+        >>> for match in matches.top(5):
+        ...     print(f"Confidence: {match.confidence:.3f} at page {match.page.number}")
+        >>>
+        >>> # All matches above 90% confidence
+        >>> high_conf = matches.filter_by_confidence(0.9)
+    """
     def __init__(self, matches: List[Match]):
-        """Initialize with list of Match objects"""
+        """Initialize with list of Match objects, automatically sorted by confidence"""
         # Import here to avoid circular import
         from natural_pdf.elements.element_collection import ElementCollection
+        # Sort matches by confidence (highest first)
+        sorted_matches = sorted(matches, key=lambda m: m.confidence, reverse=True)
         # Create a base ElementCollection
-        self._collection = ElementCollection(matches)
-        self._matches = matches
+        self._collection = ElementCollection(sorted_matches)
+        self._matches = sorted_matches
     def __len__(self):
         return len(self._matches)
@@ -68,6 +92,26 @@ class MatchResults:
         """Filter matches by minimum confidence"""
         return self.filter(lambda m: m.confidence >= min_confidence)
+    def top(self, n: int) -> "MatchResults":
+        """
+        Get the top N matches with highest confidence.
+        Args:
+            n: Number of top matches to return
+        Returns:
+            New MatchResults with only the top N matches
+        Example:
+            >>> matches = page.find_similar(logo)
+            >>> best_5 = matches.top(5)
+            >>> for match in best_5:
+            ...     print(f"Confidence: {match.confidence:.3f}")
+        """
+        # Since matches are already sorted by confidence, just take first n
+        top_matches = self._matches[:n]
+        return MatchResults(top_matches)
     def pages(self):
         """Get unique pages containing matches"""
         # Import here to avoid circular import

natural-pdf 0.2.12__py3-none-any.whl → 0.2.13__py3-none-any.whl

natural-pdf 0.2.12py3-none-any.whl → 0.2.13py3-none-any.whl