PyPI - natural-pdf - Versions diffs - 0.2.12__py3-none-any.whl → 0.2.15__py3-none-any.whl - Mend

natural-pdf 0.2.12py3-none-any.whl → 0.2.15py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

natural_pdf/core/highlighting_service.py +40 -10
natural_pdf/elements/base.py +18 -1
natural_pdf/elements/element_collection.py +153 -15
natural_pdf/elements/rect.py +34 -0
natural_pdf/elements/region.py +55 -3
natural_pdf/elements/text.py +20 -2
natural_pdf/selectors/parser.py +28 -1
natural_pdf/vision/__init__.py +1 -2
natural_pdf/vision/mixin.py +67 -27
natural_pdf/vision/results.py +49 -5
natural_pdf/vision/similarity.py +195 -23
natural_pdf/vision/template_matching.py +209 -0
{natural_pdf-0.2.12.dist-info → natural_pdf-0.2.15.dist-info}/METADATA +1 -1
{natural_pdf-0.2.12.dist-info → natural_pdf-0.2.15.dist-info}/RECORD +24 -23
temp/test_draw_guides.py +25 -0
temp/test_draw_guides_interactive.py +30 -0
temp/test_guide_draw_notebook.py +47 -0
temp/test_inline_js.py +22 -0
temp/test_widget_functionality.py +68 -0
temp/test_widget_simple.py +41 -0
temp/debug_cell_extraction.py +0 -42
temp/debug_exclusion_overlap.py +0 -43
temp/debug_exclusions_guides.py +0 -67
temp/debug_extra_guide.py +0 -41
temp/debug_outer_boundaries.py +0 -46
temp/debug_st_search.py +0 -33
{natural_pdf-0.2.12.dist-info → natural_pdf-0.2.15.dist-info}/WHEEL +0 -0
{natural_pdf-0.2.12.dist-info → natural_pdf-0.2.15.dist-info}/entry_points.txt +0 -0
{natural_pdf-0.2.12.dist-info → natural_pdf-0.2.15.dist-info}/licenses/LICENSE +0 -0
{natural_pdf-0.2.12.dist-info → natural_pdf-0.2.15.dist-info}/top_level.txt +0 -0

natural_pdf/selectors/parser.py CHANGED Viewed

@@ -423,7 +423,33 @@ def parse_selector(selector: str) -> Dict[str, Any]:
         # Check for other pseudo-class blocks `:name` or `:name(...)`
         pseudo_match = pseudo_pattern.match(selector)
         if pseudo_match:
+            # --- NEW: robustly capture arguments that may contain nested parentheses --- #
             name, args_str = pseudo_match.groups()
+            match_end_idx = pseudo_match.end()
+            # If the args_str contains unmatched opening parens, continue scanning the
+            # selector until parentheses are balanced. This allows patterns like
+            # :contains((Tre) Ofertu) or complex regex with grouping.
+            if args_str is not None and args_str.count("(") > args_str.count(")"):
+                balance = args_str.count("(") - args_str.count(")")
+                i = match_end_idx
+                while i < len(selector) and balance > 0:
+                    char = selector[i]
+                    # Append char to args_str as we extend the capture
+                    args_str += char
+                    if char == "(":
+                        balance += 1
+                    elif char == ")":
+                        balance -= 1
+                    i += 1
+                # After loop, ensure parentheses are balanced; otherwise raise error
+                if balance != 0:
+                    raise ValueError(
+                        f"Mismatched parentheses in pseudo-class :{name}(). Full selector: '{original_selector_for_error}'"
+                    )
+                # Update where the selector should be sliced off from
+                match_end_idx = i
             name = name.lower()  # Normalize pseudo-class name
             processed_args = args_str  # Keep as string initially, or None
@@ -436,7 +462,8 @@ def parse_selector(selector: str) -> Dict[str, Any]:
             # else: args remain None
             result["pseudo_classes"].append({"name": name, "args": processed_args})
-            selector = selector[pseudo_match.end() :].strip()
+            # IMPORTANT: use match_end_idx (may have been extended)
+            selector = selector[match_end_idx:].strip()
             processed_chunk = True
             continue

natural_pdf/vision/__init__.py CHANGED Viewed

@@ -1,7 +1,6 @@
 """Vision module for visual similarity and pattern matching"""
 from .mixin import VisualSearchMixin
-from .results import Match, MatchResults
 from .similarity import VisualMatcher, compute_phash
-__all__ = ["VisualMatcher", "compute_phash", "Match", "MatchResults", "VisualSearchMixin"]
+__all__ = ["VisualMatcher", "compute_phash", "VisualSearchMixin"]

natural_pdf/vision/mixin.py CHANGED Viewed

@@ -6,9 +6,6 @@ import numpy as np
 from PIL import Image
 from tqdm.auto import tqdm
-from .results import Match, MatchResults
-from .similarity import VisualMatcher, compute_phash
 class VisualSearchMixin:
     """Add find_similar method to classes that include this mixin"""
@@ -21,11 +18,12 @@ class VisualSearchMixin:
         sizes: Optional[Union[float, Tuple, List]] = (0.8, 1.2),
         resolution: int = 72,
         hash_size: int = 20,
-        step_factor: float = 0.1,
+        step: Optional[int] = None,
+        method: str = "phash",
         max_per_page: Optional[int] = None,
         show_progress: bool = True,
         **kwargs,
-    ) -> MatchResults:
+    ) -> "MatchResults":
         """
         Find regions visually similar to the given example(s).
@@ -35,15 +33,19 @@ class VisualSearchMixin:
             confidence: Minimum similarity score (0-1)
             sizes: Size variations to search. Can be:
                    - float: ±percentage (e.g., 0.2 = 80%-120%)
-                   - tuple(min, max): search range with smart logarithmic steps (default: (0.8, 1.0))
+                   - tuple(min, max): search range with smart logarithmic steps (default: (0.8, 1.2))
                    - tuple(min, max, step): explicit step size
                    - list: exact sizes to try (e.g., [0.8, 1.0, 1.2])
             resolution: Resolution for image comparison (DPI) (default: 72)
-            hash_size: Size of perceptual hash grid (default: 12)
-            step_factor: Step size as fraction of template size (default: 0.1)
+            hash_size: Size of perceptual hash grid (default: 20)
+            step: Step size in pixels for sliding window
+            method: Matching algorithm - "phash" (default) or "template"
             max_per_page: Maximum matches to return per page
             show_progress: Show progress bar for multi-page searches (default: True)
-            **kwargs: Additional options
+            **kwargs: Additional options including:
+                mask_threshold: For both template and phash methods, pixels >= this value are masked.
+                               For template matching: pixels are ignored in matching (e.g., 0.95)
+                               For phash: pixels are replaced with median before hashing (e.g., 0.95)
         Returns:
             MatchResults collection
@@ -55,15 +57,25 @@ class VisualSearchMixin:
         if not isinstance(examples, list):
             examples = [examples]
+        from .similarity import VisualMatcher, compute_phash
         # Initialize matcher with specified hash size
         matcher = VisualMatcher(hash_size=hash_size)
         # Prepare templates
         templates = []
+        # Extract mask_threshold from kwargs for phash
+        mask_threshold = kwargs.get("mask_threshold")
+        mask_threshold_255 = (
+            int(mask_threshold * 255) if mask_threshold is not None and method == "phash" else None
+        )
         for example in examples:
             # Render the example region/element
             example_image = example.render(resolution=resolution, crop=True)
-            template_hash = compute_phash(example_image, hash_size=hash_size)
+            template_hash = compute_phash(
+                example_image, hash_size=hash_size, mask_threshold=mask_threshold_255
+            )
             templates.append({"image": example_image, "hash": template_hash, "source": example})
         # Get pages to search based on the object type
@@ -76,6 +88,8 @@ class VisualSearchMixin:
             pages_to_search = self.pages
         elif hasattr(self, "number"):  # Single page
             pages_to_search = [self]
+        elif hasattr(self, "page") and hasattr(self, "bbox"):  # Region
+            pages_to_search = [self]
         else:
             raise TypeError(f"Cannot search in {type(self)}")
@@ -86,10 +100,16 @@ class VisualSearchMixin:
             scales = matcher._get_search_scales(sizes)
             # Pre-calculate for all pages and templates
-            for page in pages_to_search:
-                # Estimate page image size
-                page_w = int(page.width * resolution / 72.0)
-                page_h = int(page.height * resolution / 72.0)
+            for search_obj in pages_to_search:
+                # Estimate image size based on object type
+                if hasattr(search_obj, "page") and hasattr(search_obj, "bbox"):
+                    # Region
+                    page_w = int(search_obj.width * resolution / 72.0)
+                    page_h = int(search_obj.height * resolution / 72.0)
+                else:
+                    # Page
+                    page_w = int(search_obj.width * resolution / 72.0)
+                    page_h = int(search_obj.height * resolution / 72.0)
                 for template_data in templates:
                     template_w, template_h = template_data["image"].size
@@ -99,11 +119,15 @@ class VisualSearchMixin:
                         scaled_h = int(template_h * scale)
                         if scaled_w <= page_w and scaled_h <= page_h:
-                            step_x = max(1, int(scaled_w * step_factor))
-                            step_y = max(1, int(scaled_h * step_factor))
-                            x_windows = len(range(0, page_w - scaled_w + 1, step_x))
-                            y_windows = len(range(0, page_h - scaled_h + 1, step_y))
+                            # Determine step size
+                            if step is not None:
+                                actual_step = step
+                            else:
+                                # Default to 10% of template size
+                                actual_step = max(1, int(min(scaled_w, scaled_h) * 0.1))
+                            x_windows = len(range(0, page_w - scaled_w + 1, actual_step))
+                            y_windows = len(range(0, page_h - scaled_h + 1, actual_step))
                             total_operations += x_windows * y_windows
         # Search each page
@@ -124,9 +148,20 @@ class VisualSearchMixin:
                 mininterval=0.1,  # Minimum time between updates (seconds)
             )
-        for page_idx, page in enumerate(pages_to_search):
-            # Render the full page once
-            page_image = page.render(resolution=resolution)
+        for page_idx, search_obj in enumerate(pages_to_search):
+            # Determine if we're searching in a page or a region
+            if hasattr(search_obj, "page") and hasattr(search_obj, "bbox"):
+                # This is a Region - render only the region area
+                region = search_obj
+                page = region.page
+                page_image = region.render(resolution=resolution, crop=True)
+                # Region offset for coordinate conversion
+                region_x0, region_y0 = region.x0, region.top
+            else:
+                # This is a Page - render the full page
+                page = search_obj
+                page_image = page.render(resolution=resolution)
+                region_x0, region_y0 = 0, 0
             # Convert page coordinates to image coordinates
             scale = resolution / 72.0  # PDF is 72 DPI
@@ -168,7 +203,8 @@ class VisualSearchMixin:
                     template_hash=template_hash,
                     confidence_threshold=confidence,
                     sizes=sizes,
-                    step_factor=step_factor,
+                    step=step,
+                    method=method,
                     show_progress=False,  # We handle progress ourselves
                     progress_callback=update_progress if progress_bar else None,
                     **kwargs,
@@ -180,10 +216,12 @@ class VisualSearchMixin:
                     # Convert from image pixels to PDF points
                     # No flipping needed! PDF coordinates map directly to PIL coordinates
-                    pdf_x0 = img_x0 / scale
-                    pdf_y0 = img_y0 / scale
-                    pdf_x1 = img_x1 / scale
-                    pdf_y1 = img_y1 / scale
+                    pdf_x0 = img_x0 / scale + region_x0
+                    pdf_y0 = img_y0 / scale + region_y0
+                    pdf_x1 = img_x1 / scale + region_x0
+                    pdf_y1 = img_y1 / scale + region_y0
+                    from .results import Match
                     # Create Match object
                     match = Match(
@@ -206,4 +244,6 @@ class VisualSearchMixin:
         if progress_bar:
             progress_bar.close()
+        from .results import MatchResults
         return MatchResults(all_matches)

natural_pdf/vision/results.py CHANGED Viewed

@@ -2,7 +2,6 @@
 from typing import TYPE_CHECKING, Any, Iterator, List, Optional, Tuple
-# Import Region directly as it's a base class
 from natural_pdf.elements.region import Region
 if TYPE_CHECKING:
@@ -39,16 +38,41 @@ class Match(Region):
 class MatchResults:
-    """Collection of Match objects with transformation methods"""
+    """
+    Collection of Match objects with transformation methods.
+    Matches are automatically sorted by confidence (highest first), so:
+    - matches[0] is the best match
+    - Iteration yields matches from best to worst
+    - The .top(n) method returns the n best matches
+    Example:
+        >>> matches = page.find_similar(logo_region)
+        >>> print(f"Found {len(matches)} matches")
+        >>>
+        >>> # Best match
+        >>> best = matches[0]
+        >>> print(f"Best match confidence: {best.confidence:.3f}")
+        >>>
+        >>> # Top 5 matches
+        >>> for match in matches.top(5):
+        ...     print(f"Confidence: {match.confidence:.3f} at page {match.page.number}")
+        >>>
+        >>> # All matches above 90% confidence
+        >>> high_conf = matches.filter_by_confidence(0.9)
+    """
     def __init__(self, matches: List[Match]):
-        """Initialize with list of Match objects"""
+        """Initialize with list of Match objects, automatically sorted by confidence"""
         # Import here to avoid circular import
         from natural_pdf.elements.element_collection import ElementCollection
+        # Sort matches by confidence (highest first)
+        sorted_matches = sorted(matches, key=lambda m: m.confidence, reverse=True)
         # Create a base ElementCollection
-        self._collection = ElementCollection(matches)
-        self._matches = matches
+        self._collection = ElementCollection(sorted_matches)
+        self._matches = sorted_matches
     def __len__(self):
         return len(self._matches)
@@ -68,6 +92,26 @@ class MatchResults:
         """Filter matches by minimum confidence"""
         return self.filter(lambda m: m.confidence >= min_confidence)
+    def top(self, n: int) -> "MatchResults":
+        """
+        Get the top N matches with highest confidence.
+        Args:
+            n: Number of top matches to return
+        Returns:
+            New MatchResults with only the top N matches
+        Example:
+            >>> matches = page.find_similar(logo)
+            >>> best_5 = matches.top(5)
+            >>> for match in best_5:
+            ...     print(f"Confidence: {match.confidence:.3f}")
+        """
+        # Since matches are already sorted by confidence, just take first n
+        top_matches = self._matches[:n]
+        return MatchResults(top_matches)
     def pages(self):
         """Get unique pages containing matches"""
         # Import here to avoid circular import

natural_pdf/vision/similarity.py CHANGED Viewed

@@ -7,6 +7,8 @@ import numpy as np
 from PIL import Image
 from tqdm.auto import tqdm
+from .template_matching import TemplateMatcher
 @dataclass
 class MatchCandidate:
@@ -17,7 +19,12 @@ class MatchCandidate:
     confidence: float
-def compute_phash(image: Image.Image, hash_size: int = 8, blur_radius: float = 0) -> int:
+def compute_phash(
+    image: Image.Image,
+    hash_size: int = 8,
+    blur_radius: float = 0,
+    mask_threshold: Optional[float] = None,
+) -> int:
     """
     Compute perceptual hash of an image using DCT.
@@ -25,6 +32,8 @@ def compute_phash(image: Image.Image, hash_size: int = 8, blur_radius: float = 0
         image: PIL Image to hash
         hash_size: Size of the hash (8 = 64 bit hash)
         blur_radius: Optional blur to apply before hashing (makes more tolerant)
+        mask_threshold: If provided, pixels >= this value (0-255 scale) are replaced with median
+                       before hashing. Useful for ignoring white backgrounds.
     Returns:
         Integer hash value
@@ -39,6 +48,25 @@ def compute_phash(image: Image.Image, hash_size: int = 8, blur_radius: float = 0
         image = image.filter(ImageFilter.GaussianBlur(radius=blur_radius))
+    # Apply masking if threshold provided
+    if mask_threshold is not None:
+        # For phash, masking works by normalizing the background
+        # This makes the hash focus on relative differences rather than absolute values
+        img_array = np.array(image, dtype=np.float32)
+        # Normalize by subtracting a representative background value
+        # Use the most common bright value as the background
+        bright_pixels = img_array[img_array >= mask_threshold]
+        if len(bright_pixels) > 0:
+            # Use the mode of bright pixels as background
+            background_val = np.median(bright_pixels)
+            # Normalize the image by subtracting background
+            # This makes different backgrounds appear similar
+            img_array = np.clip(img_array - background_val + 128, 0, 255)
+        # Convert back to PIL Image
+        image = Image.fromarray(img_array.astype(np.uint8))
     # Resize to 32x32 (4x the hash size for DCT)
     highfreq_factor = 4
     img_size = hash_size * highfreq_factor
@@ -80,12 +108,13 @@ def hash_similarity(hash1: int, hash2: int, hash_size: int = 64) -> float:
 class VisualMatcher:
-    """Handles visual similarity matching using perceptual hashing"""
+    """Handles visual similarity matching using perceptual hashing or template matching"""
     def __init__(self, hash_size: int = 12):
         self.hash_size = hash_size
         self.hash_bits = hash_size * hash_size
         self._cache = {}
+        self.template_matcher = TemplateMatcher()  # Default zncc
     def _get_search_scales(self, sizes: Optional[Union[float, Tuple, List]]) -> List[float]:
         """
@@ -172,20 +201,22 @@ class VisualMatcher:
         target: Image.Image,
         template_hash: Optional[int] = None,
         confidence_threshold: float = 0.6,
-        step_factor: float = 0.1,
+        step: Optional[int] = None,
         sizes: Optional[Union[float, Tuple, List]] = None,
         show_progress: bool = True,
         progress_callback: Optional[Callable[[], None]] = None,
+        method: str = "phash",
+        mask_threshold: Optional[float] = None,
     ) -> List[MatchCandidate]:
         """
-        Find all matches of template in target image using sliding window.
+        Find all matches of template in target image.
         Args:
             template: Template image to search for
             target: Target image to search in
-            template_hash: Pre-computed hash of template (optional)
+            template_hash: Pre-computed hash of template (optional, only for phash)
             confidence_threshold: Minimum similarity score (0-1)
-            step_factor: Step size as fraction of template size
+            step: Step size in pixels for sliding window
             sizes: Size variations to search. Can be:
                    - float: ±percentage (e.g., 0.2 = 80%-120%)
                    - tuple(min, max): search range with smart logarithmic steps
@@ -193,15 +224,153 @@ class VisualMatcher:
                    - list: exact sizes to try (e.g., [0.8, 1.0, 1.2])
             show_progress: Show progress bar for sliding window search
             progress_callback: Optional callback function to call for each window checked
+            method: "phash" (default) or "template" for template matching
+            mask_threshold: Pixels >= this value (0-1 scale) are treated as background.
+                           - For template matching: pixels are ignored in correlation
+                           - For phash: background is normalized before hashing
+                           Useful for logos/text on varying backgrounds (e.g., 0.95)
         Returns:
             List of MatchCandidate objects
         """
+        if method == "template":
+            # Use template matching
+            return self._template_match(
+                template,
+                target,
+                confidence_threshold,
+                step,
+                sizes,
+                show_progress,
+                progress_callback,
+                mask_threshold,
+            )
+        else:
+            # Use existing perceptual hash matching
+            return self._phash_match(
+                template,
+                target,
+                template_hash,
+                confidence_threshold,
+                step,
+                sizes,
+                show_progress,
+                progress_callback,
+                mask_threshold,
+            )
+    def _template_match(
+        self, template, target, threshold, step, sizes, show_progress, callback, mask_threshold
+    ):
+        """Template matching implementation"""
+        matches = []
+        template_w, template_h = template.size
+        target_w, target_h = target.size
+        # Convert to grayscale numpy arrays
+        target_gray = np.array(target.convert("L"), dtype=np.float32) / 255.0
+        # Determine scales to search
+        scales = self._get_search_scales(sizes)
+        # Default step size if not provided
+        if step is None:
+            step = 1
+        # Calculate total operations for progress bar
+        total_operations = 0
+        if show_progress and not callback:
+            for scale in scales:
+                scaled_w = int(template_w * scale)
+                scaled_h = int(template_h * scale)
+                if scaled_w <= target_w and scaled_h <= target_h:
+                    # Compute score map size
+                    out_h = (target_h - scaled_h) // step + 1
+                    out_w = (target_w - scaled_w) // step + 1
+                    total_operations += out_h * out_w
+        # Setup progress bar
+        progress_bar = None
+        if show_progress and not callback and total_operations > 0:
+            progress_bar = tqdm(
+                total=total_operations, desc="Template matching", unit="position", leave=False
+            )
+        # Search at each scale
+        for scale in scales:
+            # Resize template
+            scaled_w = int(template_w * scale)
+            scaled_h = int(template_h * scale)
+            if scaled_w > target_w or scaled_h > target_h:
+                continue
+            scaled_template = template.resize((scaled_w, scaled_h), Image.Resampling.LANCZOS)
+            template_gray = np.array(scaled_template.convert("L"), dtype=np.float32) / 255.0
+            # Run template matching
+            scores = self.template_matcher.match_template(
+                target_gray, template_gray, step, mask_threshold
+            )
+            # Find peaks above threshold
+            y_indices, x_indices = np.where(scores >= threshold)
+            # Update progress
+            if progress_bar:
+                progress_bar.update(scores.size)
+            elif callback:
+                for _ in range(scores.size):
+                    callback()
+            for i in range(len(y_indices)):
+                y_idx = y_indices[i]
+                x_idx = x_indices[i]
+                score = scores[y_idx, x_idx]
+                # Convert back to image coordinates
+                x = x_idx * step
+                y = y_idx * step
+                matches.append(
+                    MatchCandidate(
+                        bbox=(x, y, x + scaled_w, y + scaled_h),
+                        hash_value=0,  # Not used for template matching
+                        confidence=float(score),
+                    )
+                )
+        # Close progress bar
+        if progress_bar:
+            progress_bar.close()
+        # Remove overlapping matches
+        return self._filter_overlapping_matches(matches)
+    def _phash_match(
+        self,
+        template,
+        target,
+        template_hash,
+        threshold,
+        step,
+        sizes,
+        show_progress,
+        callback,
+        mask_threshold=None,
+    ):
+        """Original perceptual hash matching"""
         matches = []
         # Compute template hash if not provided
         if template_hash is None:
-            template_hash = compute_phash(template, self.hash_size)
+            # Convert mask threshold from 0-1 to 0-255 for PIL Image
+            mask_threshold_255 = int(mask_threshold * 255) if mask_threshold is not None else None
+            template_hash = compute_phash(
+                template, self.hash_size, mask_threshold=mask_threshold_255
+            )
         template_w, template_h = template.size
         target_w, target_h = target.size
@@ -209,22 +378,24 @@ class VisualMatcher:
         # Determine scales to search
         scales = self._get_search_scales(sizes)
+        # Default step size if not provided (10% of template size)
+        if step is None:
+            step = max(1, int(min(template_w, template_h) * 0.1))
         # Calculate total iterations for progress bar
         total_iterations = 0
-        if show_progress and not progress_callback:
+        if show_progress and not callback:
             for scale in scales:
                 scaled_w = int(template_w * scale)
                 scaled_h = int(template_h * scale)
                 if scaled_w <= target_w and scaled_h <= target_h:
-                    step_x = max(1, int(scaled_w * step_factor))
-                    step_y = max(1, int(scaled_h * step_factor))
-                    x_steps = len(range(0, target_w - scaled_w + 1, step_x))
-                    y_steps = len(range(0, target_h - scaled_h + 1, step_y))
+                    x_steps = len(range(0, target_w - scaled_w + 1, step))
+                    y_steps = len(range(0, target_h - scaled_h + 1, step))
                     total_iterations += x_steps * y_steps
         # Setup progress bar if needed (only if no callback provided)
         progress_bar = None
-        if show_progress and not progress_callback and total_iterations > 0:
+        if show_progress and not callback and total_iterations > 0:
             progress_bar = tqdm(total=total_iterations, desc="Scanning", unit="window", leave=False)
         # Search at each scale
@@ -236,13 +407,9 @@ class VisualMatcher:
             if scaled_w > target_w or scaled_h > target_h:
                 continue
-            # Calculate step size
-            step_x = max(1, int(scaled_w * step_factor))
-            step_y = max(1, int(scaled_h * step_factor))
             # Sliding window search
-            for y in range(0, target_h - scaled_h + 1, step_y):
-                for x in range(0, target_w - scaled_w + 1, step_x):
+            for y in range(0, target_h - scaled_h + 1, step):
+                for x in range(0, target_w - scaled_w + 1, step):
                     # Extract window
                     window = target.crop((x, y, x + scaled_w, y + scaled_h))
@@ -251,10 +418,15 @@ class VisualMatcher:
                         window = window.resize((template_w, template_h), Image.Resampling.LANCZOS)
                     # Compute hash and similarity
-                    window_hash = compute_phash(window, self.hash_size)
+                    mask_threshold_255 = (
+                        int(mask_threshold * 255) if mask_threshold is not None else None
+                    )
+                    window_hash = compute_phash(
+                        window, self.hash_size, mask_threshold=mask_threshold_255
+                    )
                     similarity = hash_similarity(template_hash, window_hash, self.hash_bits)
-                    if similarity >= confidence_threshold:
+                    if similarity >= threshold:
                         # Convert back to target image coordinates
                         bbox = (x, y, x + scaled_w, y + scaled_h)
                         matches.append(MatchCandidate(bbox, window_hash, similarity))
@@ -262,8 +434,8 @@ class VisualMatcher:
                     # Update progress
                     if progress_bar:
                         progress_bar.update(1)
-                    elif progress_callback:
-                        progress_callback()
+                    elif callback:
+                        callback()
         # Close progress bar
         if progress_bar:

natural-pdf 0.2.12__py3-none-any.whl → 0.2.15__py3-none-any.whl

natural-pdf 0.2.12py3-none-any.whl → 0.2.15py3-none-any.whl