natural-pdf 0.2.12__py3-none-any.whl → 0.2.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -423,7 +423,33 @@ def parse_selector(selector: str) -> Dict[str, Any]:
423
423
  # Check for other pseudo-class blocks `:name` or `:name(...)`
424
424
  pseudo_match = pseudo_pattern.match(selector)
425
425
  if pseudo_match:
426
+ # --- NEW: robustly capture arguments that may contain nested parentheses --- #
426
427
  name, args_str = pseudo_match.groups()
428
+ match_end_idx = pseudo_match.end()
429
+
430
+ # If the args_str contains unmatched opening parens, continue scanning the
431
+ # selector until parentheses are balanced. This allows patterns like
432
+ # :contains((Tre) Ofertu) or complex regex with grouping.
433
+ if args_str is not None and args_str.count("(") > args_str.count(")"):
434
+ balance = args_str.count("(") - args_str.count(")")
435
+ i = match_end_idx
436
+ while i < len(selector) and balance > 0:
437
+ char = selector[i]
438
+ # Append char to args_str as we extend the capture
439
+ args_str += char
440
+ if char == "(":
441
+ balance += 1
442
+ elif char == ")":
443
+ balance -= 1
444
+ i += 1
445
+ # After loop, ensure parentheses are balanced; otherwise raise error
446
+ if balance != 0:
447
+ raise ValueError(
448
+ f"Mismatched parentheses in pseudo-class :{name}(). Full selector: '{original_selector_for_error}'"
449
+ )
450
+ # Update where the selector should be sliced off from
451
+ match_end_idx = i
452
+
427
453
  name = name.lower() # Normalize pseudo-class name
428
454
  processed_args = args_str # Keep as string initially, or None
429
455
 
@@ -436,7 +462,8 @@ def parse_selector(selector: str) -> Dict[str, Any]:
436
462
  # else: args remain None
437
463
 
438
464
  result["pseudo_classes"].append({"name": name, "args": processed_args})
439
- selector = selector[pseudo_match.end() :].strip()
465
+ # IMPORTANT: use match_end_idx (may have been extended)
466
+ selector = selector[match_end_idx:].strip()
440
467
  processed_chunk = True
441
468
  continue
442
469
 
@@ -1,7 +1,6 @@
1
1
  """Vision module for visual similarity and pattern matching"""
2
2
 
3
3
  from .mixin import VisualSearchMixin
4
- from .results import Match, MatchResults
5
4
  from .similarity import VisualMatcher, compute_phash
6
5
 
7
- __all__ = ["VisualMatcher", "compute_phash", "Match", "MatchResults", "VisualSearchMixin"]
6
+ __all__ = ["VisualMatcher", "compute_phash", "VisualSearchMixin"]
@@ -6,9 +6,6 @@ import numpy as np
6
6
  from PIL import Image
7
7
  from tqdm.auto import tqdm
8
8
 
9
- from .results import Match, MatchResults
10
- from .similarity import VisualMatcher, compute_phash
11
-
12
9
 
13
10
  class VisualSearchMixin:
14
11
  """Add find_similar method to classes that include this mixin"""
@@ -21,11 +18,12 @@ class VisualSearchMixin:
21
18
  sizes: Optional[Union[float, Tuple, List]] = (0.8, 1.2),
22
19
  resolution: int = 72,
23
20
  hash_size: int = 20,
24
- step_factor: float = 0.1,
21
+ step: Optional[int] = None,
22
+ method: str = "phash",
25
23
  max_per_page: Optional[int] = None,
26
24
  show_progress: bool = True,
27
25
  **kwargs,
28
- ) -> MatchResults:
26
+ ) -> "MatchResults":
29
27
  """
30
28
  Find regions visually similar to the given example(s).
31
29
 
@@ -35,15 +33,19 @@ class VisualSearchMixin:
35
33
  confidence: Minimum similarity score (0-1)
36
34
  sizes: Size variations to search. Can be:
37
35
  - float: ±percentage (e.g., 0.2 = 80%-120%)
38
- - tuple(min, max): search range with smart logarithmic steps (default: (0.8, 1.0))
36
+ - tuple(min, max): search range with smart logarithmic steps (default: (0.8, 1.2))
39
37
  - tuple(min, max, step): explicit step size
40
38
  - list: exact sizes to try (e.g., [0.8, 1.0, 1.2])
41
39
  resolution: Resolution for image comparison (DPI) (default: 72)
42
- hash_size: Size of perceptual hash grid (default: 12)
43
- step_factor: Step size as fraction of template size (default: 0.1)
40
+ hash_size: Size of perceptual hash grid (default: 20)
41
+ step: Step size in pixels for sliding window
42
+ method: Matching algorithm - "phash" (default) or "template"
44
43
  max_per_page: Maximum matches to return per page
45
44
  show_progress: Show progress bar for multi-page searches (default: True)
46
- **kwargs: Additional options
45
+ **kwargs: Additional options including:
46
+ mask_threshold: For both template and phash methods, pixels >= this value are masked.
47
+ For template matching: pixels are ignored in matching (e.g., 0.95)
48
+ For phash: pixels are replaced with median before hashing (e.g., 0.95)
47
49
 
48
50
  Returns:
49
51
  MatchResults collection
@@ -55,15 +57,25 @@ class VisualSearchMixin:
55
57
  if not isinstance(examples, list):
56
58
  examples = [examples]
57
59
 
60
+ from .similarity import VisualMatcher, compute_phash
61
+
58
62
  # Initialize matcher with specified hash size
59
63
  matcher = VisualMatcher(hash_size=hash_size)
60
64
 
61
65
  # Prepare templates
62
66
  templates = []
67
+ # Extract mask_threshold from kwargs for phash
68
+ mask_threshold = kwargs.get("mask_threshold")
69
+ mask_threshold_255 = (
70
+ int(mask_threshold * 255) if mask_threshold is not None and method == "phash" else None
71
+ )
72
+
63
73
  for example in examples:
64
74
  # Render the example region/element
65
75
  example_image = example.render(resolution=resolution, crop=True)
66
- template_hash = compute_phash(example_image, hash_size=hash_size)
76
+ template_hash = compute_phash(
77
+ example_image, hash_size=hash_size, mask_threshold=mask_threshold_255
78
+ )
67
79
  templates.append({"image": example_image, "hash": template_hash, "source": example})
68
80
 
69
81
  # Get pages to search based on the object type
@@ -76,6 +88,8 @@ class VisualSearchMixin:
76
88
  pages_to_search = self.pages
77
89
  elif hasattr(self, "number"): # Single page
78
90
  pages_to_search = [self]
91
+ elif hasattr(self, "page") and hasattr(self, "bbox"): # Region
92
+ pages_to_search = [self]
79
93
  else:
80
94
  raise TypeError(f"Cannot search in {type(self)}")
81
95
 
@@ -86,10 +100,16 @@ class VisualSearchMixin:
86
100
  scales = matcher._get_search_scales(sizes)
87
101
 
88
102
  # Pre-calculate for all pages and templates
89
- for page in pages_to_search:
90
- # Estimate page image size
91
- page_w = int(page.width * resolution / 72.0)
92
- page_h = int(page.height * resolution / 72.0)
103
+ for search_obj in pages_to_search:
104
+ # Estimate image size based on object type
105
+ if hasattr(search_obj, "page") and hasattr(search_obj, "bbox"):
106
+ # Region
107
+ page_w = int(search_obj.width * resolution / 72.0)
108
+ page_h = int(search_obj.height * resolution / 72.0)
109
+ else:
110
+ # Page
111
+ page_w = int(search_obj.width * resolution / 72.0)
112
+ page_h = int(search_obj.height * resolution / 72.0)
93
113
 
94
114
  for template_data in templates:
95
115
  template_w, template_h = template_data["image"].size
@@ -99,11 +119,15 @@ class VisualSearchMixin:
99
119
  scaled_h = int(template_h * scale)
100
120
 
101
121
  if scaled_w <= page_w and scaled_h <= page_h:
102
- step_x = max(1, int(scaled_w * step_factor))
103
- step_y = max(1, int(scaled_h * step_factor))
104
-
105
- x_windows = len(range(0, page_w - scaled_w + 1, step_x))
106
- y_windows = len(range(0, page_h - scaled_h + 1, step_y))
122
+ # Determine step size
123
+ if step is not None:
124
+ actual_step = step
125
+ else:
126
+ # Default to 10% of template size
127
+ actual_step = max(1, int(min(scaled_w, scaled_h) * 0.1))
128
+
129
+ x_windows = len(range(0, page_w - scaled_w + 1, actual_step))
130
+ y_windows = len(range(0, page_h - scaled_h + 1, actual_step))
107
131
  total_operations += x_windows * y_windows
108
132
 
109
133
  # Search each page
@@ -124,9 +148,20 @@ class VisualSearchMixin:
124
148
  mininterval=0.1, # Minimum time between updates (seconds)
125
149
  )
126
150
 
127
- for page_idx, page in enumerate(pages_to_search):
128
- # Render the full page once
129
- page_image = page.render(resolution=resolution)
151
+ for page_idx, search_obj in enumerate(pages_to_search):
152
+ # Determine if we're searching in a page or a region
153
+ if hasattr(search_obj, "page") and hasattr(search_obj, "bbox"):
154
+ # This is a Region - render only the region area
155
+ region = search_obj
156
+ page = region.page
157
+ page_image = region.render(resolution=resolution, crop=True)
158
+ # Region offset for coordinate conversion
159
+ region_x0, region_y0 = region.x0, region.top
160
+ else:
161
+ # This is a Page - render the full page
162
+ page = search_obj
163
+ page_image = page.render(resolution=resolution)
164
+ region_x0, region_y0 = 0, 0
130
165
 
131
166
  # Convert page coordinates to image coordinates
132
167
  scale = resolution / 72.0 # PDF is 72 DPI
@@ -168,7 +203,8 @@ class VisualSearchMixin:
168
203
  template_hash=template_hash,
169
204
  confidence_threshold=confidence,
170
205
  sizes=sizes,
171
- step_factor=step_factor,
206
+ step=step,
207
+ method=method,
172
208
  show_progress=False, # We handle progress ourselves
173
209
  progress_callback=update_progress if progress_bar else None,
174
210
  **kwargs,
@@ -180,10 +216,12 @@ class VisualSearchMixin:
180
216
 
181
217
  # Convert from image pixels to PDF points
182
218
  # No flipping needed! PDF coordinates map directly to PIL coordinates
183
- pdf_x0 = img_x0 / scale
184
- pdf_y0 = img_y0 / scale
185
- pdf_x1 = img_x1 / scale
186
- pdf_y1 = img_y1 / scale
219
+ pdf_x0 = img_x0 / scale + region_x0
220
+ pdf_y0 = img_y0 / scale + region_y0
221
+ pdf_x1 = img_x1 / scale + region_x0
222
+ pdf_y1 = img_y1 / scale + region_y0
223
+
224
+ from .results import Match
187
225
 
188
226
  # Create Match object
189
227
  match = Match(
@@ -206,4 +244,6 @@ class VisualSearchMixin:
206
244
  if progress_bar:
207
245
  progress_bar.close()
208
246
 
247
+ from .results import MatchResults
248
+
209
249
  return MatchResults(all_matches)
@@ -2,7 +2,6 @@
2
2
 
3
3
  from typing import TYPE_CHECKING, Any, Iterator, List, Optional, Tuple
4
4
 
5
- # Import Region directly as it's a base class
6
5
  from natural_pdf.elements.region import Region
7
6
 
8
7
  if TYPE_CHECKING:
@@ -39,16 +38,41 @@ class Match(Region):
39
38
 
40
39
 
41
40
  class MatchResults:
42
- """Collection of Match objects with transformation methods"""
41
+ """
42
+ Collection of Match objects with transformation methods.
43
+
44
+ Matches are automatically sorted by confidence (highest first), so:
45
+ - matches[0] is the best match
46
+ - Iteration yields matches from best to worst
47
+ - The .top(n) method returns the n best matches
48
+
49
+ Example:
50
+ >>> matches = page.find_similar(logo_region)
51
+ >>> print(f"Found {len(matches)} matches")
52
+ >>>
53
+ >>> # Best match
54
+ >>> best = matches[0]
55
+ >>> print(f"Best match confidence: {best.confidence:.3f}")
56
+ >>>
57
+ >>> # Top 5 matches
58
+ >>> for match in matches.top(5):
59
+ ... print(f"Confidence: {match.confidence:.3f} at page {match.page.number}")
60
+ >>>
61
+ >>> # All matches above 90% confidence
62
+ >>> high_conf = matches.filter_by_confidence(0.9)
63
+ """
43
64
 
44
65
  def __init__(self, matches: List[Match]):
45
- """Initialize with list of Match objects"""
66
+ """Initialize with list of Match objects, automatically sorted by confidence"""
46
67
  # Import here to avoid circular import
47
68
  from natural_pdf.elements.element_collection import ElementCollection
48
69
 
70
+ # Sort matches by confidence (highest first)
71
+ sorted_matches = sorted(matches, key=lambda m: m.confidence, reverse=True)
72
+
49
73
  # Create a base ElementCollection
50
- self._collection = ElementCollection(matches)
51
- self._matches = matches
74
+ self._collection = ElementCollection(sorted_matches)
75
+ self._matches = sorted_matches
52
76
 
53
77
  def __len__(self):
54
78
  return len(self._matches)
@@ -68,6 +92,26 @@ class MatchResults:
68
92
  """Filter matches by minimum confidence"""
69
93
  return self.filter(lambda m: m.confidence >= min_confidence)
70
94
 
95
+ def top(self, n: int) -> "MatchResults":
96
+ """
97
+ Get the top N matches with highest confidence.
98
+
99
+ Args:
100
+ n: Number of top matches to return
101
+
102
+ Returns:
103
+ New MatchResults with only the top N matches
104
+
105
+ Example:
106
+ >>> matches = page.find_similar(logo)
107
+ >>> best_5 = matches.top(5)
108
+ >>> for match in best_5:
109
+ ... print(f"Confidence: {match.confidence:.3f}")
110
+ """
111
+ # Since matches are already sorted by confidence, just take first n
112
+ top_matches = self._matches[:n]
113
+ return MatchResults(top_matches)
114
+
71
115
  def pages(self):
72
116
  """Get unique pages containing matches"""
73
117
  # Import here to avoid circular import
@@ -7,6 +7,8 @@ import numpy as np
7
7
  from PIL import Image
8
8
  from tqdm.auto import tqdm
9
9
 
10
+ from .template_matching import TemplateMatcher
11
+
10
12
 
11
13
  @dataclass
12
14
  class MatchCandidate:
@@ -17,7 +19,12 @@ class MatchCandidate:
17
19
  confidence: float
18
20
 
19
21
 
20
- def compute_phash(image: Image.Image, hash_size: int = 8, blur_radius: float = 0) -> int:
22
+ def compute_phash(
23
+ image: Image.Image,
24
+ hash_size: int = 8,
25
+ blur_radius: float = 0,
26
+ mask_threshold: Optional[float] = None,
27
+ ) -> int:
21
28
  """
22
29
  Compute perceptual hash of an image using DCT.
23
30
 
@@ -25,6 +32,8 @@ def compute_phash(image: Image.Image, hash_size: int = 8, blur_radius: float = 0
25
32
  image: PIL Image to hash
26
33
  hash_size: Size of the hash (8 = 64 bit hash)
27
34
  blur_radius: Optional blur to apply before hashing (makes more tolerant)
35
+ mask_threshold: If provided, pixels >= this value (0-255 scale) are replaced with median
36
+ before hashing. Useful for ignoring white backgrounds.
28
37
 
29
38
  Returns:
30
39
  Integer hash value
@@ -39,6 +48,25 @@ def compute_phash(image: Image.Image, hash_size: int = 8, blur_radius: float = 0
39
48
 
40
49
  image = image.filter(ImageFilter.GaussianBlur(radius=blur_radius))
41
50
 
51
+ # Apply masking if threshold provided
52
+ if mask_threshold is not None:
53
+ # For phash, masking works by normalizing the background
54
+ # This makes the hash focus on relative differences rather than absolute values
55
+ img_array = np.array(image, dtype=np.float32)
56
+
57
+ # Normalize by subtracting a representative background value
58
+ # Use the most common bright value as the background
59
+ bright_pixels = img_array[img_array >= mask_threshold]
60
+ if len(bright_pixels) > 0:
61
+ # Use the mode of bright pixels as background
62
+ background_val = np.median(bright_pixels)
63
+ # Normalize the image by subtracting background
64
+ # This makes different backgrounds appear similar
65
+ img_array = np.clip(img_array - background_val + 128, 0, 255)
66
+
67
+ # Convert back to PIL Image
68
+ image = Image.fromarray(img_array.astype(np.uint8))
69
+
42
70
  # Resize to 32x32 (4x the hash size for DCT)
43
71
  highfreq_factor = 4
44
72
  img_size = hash_size * highfreq_factor
@@ -80,12 +108,13 @@ def hash_similarity(hash1: int, hash2: int, hash_size: int = 64) -> float:
80
108
 
81
109
 
82
110
  class VisualMatcher:
83
- """Handles visual similarity matching using perceptual hashing"""
111
+ """Handles visual similarity matching using perceptual hashing or template matching"""
84
112
 
85
113
  def __init__(self, hash_size: int = 12):
86
114
  self.hash_size = hash_size
87
115
  self.hash_bits = hash_size * hash_size
88
116
  self._cache = {}
117
+ self.template_matcher = TemplateMatcher() # Default zncc
89
118
 
90
119
  def _get_search_scales(self, sizes: Optional[Union[float, Tuple, List]]) -> List[float]:
91
120
  """
@@ -172,20 +201,22 @@ class VisualMatcher:
172
201
  target: Image.Image,
173
202
  template_hash: Optional[int] = None,
174
203
  confidence_threshold: float = 0.6,
175
- step_factor: float = 0.1,
204
+ step: Optional[int] = None,
176
205
  sizes: Optional[Union[float, Tuple, List]] = None,
177
206
  show_progress: bool = True,
178
207
  progress_callback: Optional[Callable[[], None]] = None,
208
+ method: str = "phash",
209
+ mask_threshold: Optional[float] = None,
179
210
  ) -> List[MatchCandidate]:
180
211
  """
181
- Find all matches of template in target image using sliding window.
212
+ Find all matches of template in target image.
182
213
 
183
214
  Args:
184
215
  template: Template image to search for
185
216
  target: Target image to search in
186
- template_hash: Pre-computed hash of template (optional)
217
+ template_hash: Pre-computed hash of template (optional, only for phash)
187
218
  confidence_threshold: Minimum similarity score (0-1)
188
- step_factor: Step size as fraction of template size
219
+ step: Step size in pixels for sliding window
189
220
  sizes: Size variations to search. Can be:
190
221
  - float: ±percentage (e.g., 0.2 = 80%-120%)
191
222
  - tuple(min, max): search range with smart logarithmic steps
@@ -193,15 +224,153 @@ class VisualMatcher:
193
224
  - list: exact sizes to try (e.g., [0.8, 1.0, 1.2])
194
225
  show_progress: Show progress bar for sliding window search
195
226
  progress_callback: Optional callback function to call for each window checked
227
+ method: "phash" (default) or "template" for template matching
228
+ mask_threshold: Pixels >= this value (0-1 scale) are treated as background.
229
+ - For template matching: pixels are ignored in correlation
230
+ - For phash: background is normalized before hashing
231
+ Useful for logos/text on varying backgrounds (e.g., 0.95)
196
232
 
197
233
  Returns:
198
234
  List of MatchCandidate objects
199
235
  """
236
+ if method == "template":
237
+ # Use template matching
238
+ return self._template_match(
239
+ template,
240
+ target,
241
+ confidence_threshold,
242
+ step,
243
+ sizes,
244
+ show_progress,
245
+ progress_callback,
246
+ mask_threshold,
247
+ )
248
+ else:
249
+ # Use existing perceptual hash matching
250
+ return self._phash_match(
251
+ template,
252
+ target,
253
+ template_hash,
254
+ confidence_threshold,
255
+ step,
256
+ sizes,
257
+ show_progress,
258
+ progress_callback,
259
+ mask_threshold,
260
+ )
261
+
262
+ def _template_match(
263
+ self, template, target, threshold, step, sizes, show_progress, callback, mask_threshold
264
+ ):
265
+ """Template matching implementation"""
266
+ matches = []
267
+
268
+ template_w, template_h = template.size
269
+ target_w, target_h = target.size
270
+
271
+ # Convert to grayscale numpy arrays
272
+ target_gray = np.array(target.convert("L"), dtype=np.float32) / 255.0
273
+
274
+ # Determine scales to search
275
+ scales = self._get_search_scales(sizes)
276
+
277
+ # Default step size if not provided
278
+ if step is None:
279
+ step = 1
280
+
281
+ # Calculate total operations for progress bar
282
+ total_operations = 0
283
+ if show_progress and not callback:
284
+ for scale in scales:
285
+ scaled_w = int(template_w * scale)
286
+ scaled_h = int(template_h * scale)
287
+
288
+ if scaled_w <= target_w and scaled_h <= target_h:
289
+ # Compute score map size
290
+ out_h = (target_h - scaled_h) // step + 1
291
+ out_w = (target_w - scaled_w) // step + 1
292
+ total_operations += out_h * out_w
293
+
294
+ # Setup progress bar
295
+ progress_bar = None
296
+ if show_progress and not callback and total_operations > 0:
297
+ progress_bar = tqdm(
298
+ total=total_operations, desc="Template matching", unit="position", leave=False
299
+ )
300
+
301
+ # Search at each scale
302
+ for scale in scales:
303
+ # Resize template
304
+ scaled_w = int(template_w * scale)
305
+ scaled_h = int(template_h * scale)
306
+
307
+ if scaled_w > target_w or scaled_h > target_h:
308
+ continue
309
+
310
+ scaled_template = template.resize((scaled_w, scaled_h), Image.Resampling.LANCZOS)
311
+ template_gray = np.array(scaled_template.convert("L"), dtype=np.float32) / 255.0
312
+
313
+ # Run template matching
314
+ scores = self.template_matcher.match_template(
315
+ target_gray, template_gray, step, mask_threshold
316
+ )
317
+
318
+ # Find peaks above threshold
319
+ y_indices, x_indices = np.where(scores >= threshold)
320
+
321
+ # Update progress
322
+ if progress_bar:
323
+ progress_bar.update(scores.size)
324
+ elif callback:
325
+ for _ in range(scores.size):
326
+ callback()
327
+
328
+ for i in range(len(y_indices)):
329
+ y_idx = y_indices[i]
330
+ x_idx = x_indices[i]
331
+ score = scores[y_idx, x_idx]
332
+
333
+ # Convert back to image coordinates
334
+ x = x_idx * step
335
+ y = y_idx * step
336
+
337
+ matches.append(
338
+ MatchCandidate(
339
+ bbox=(x, y, x + scaled_w, y + scaled_h),
340
+ hash_value=0, # Not used for template matching
341
+ confidence=float(score),
342
+ )
343
+ )
344
+
345
+ # Close progress bar
346
+ if progress_bar:
347
+ progress_bar.close()
348
+
349
+ # Remove overlapping matches
350
+ return self._filter_overlapping_matches(matches)
351
+
352
+ def _phash_match(
353
+ self,
354
+ template,
355
+ target,
356
+ template_hash,
357
+ threshold,
358
+ step,
359
+ sizes,
360
+ show_progress,
361
+ callback,
362
+ mask_threshold=None,
363
+ ):
364
+ """Original perceptual hash matching"""
200
365
  matches = []
201
366
 
202
367
  # Compute template hash if not provided
203
368
  if template_hash is None:
204
- template_hash = compute_phash(template, self.hash_size)
369
+ # Convert mask threshold from 0-1 to 0-255 for PIL Image
370
+ mask_threshold_255 = int(mask_threshold * 255) if mask_threshold is not None else None
371
+ template_hash = compute_phash(
372
+ template, self.hash_size, mask_threshold=mask_threshold_255
373
+ )
205
374
 
206
375
  template_w, template_h = template.size
207
376
  target_w, target_h = target.size
@@ -209,22 +378,24 @@ class VisualMatcher:
209
378
  # Determine scales to search
210
379
  scales = self._get_search_scales(sizes)
211
380
 
381
+ # Default step size if not provided (10% of template size)
382
+ if step is None:
383
+ step = max(1, int(min(template_w, template_h) * 0.1))
384
+
212
385
  # Calculate total iterations for progress bar
213
386
  total_iterations = 0
214
- if show_progress and not progress_callback:
387
+ if show_progress and not callback:
215
388
  for scale in scales:
216
389
  scaled_w = int(template_w * scale)
217
390
  scaled_h = int(template_h * scale)
218
391
  if scaled_w <= target_w and scaled_h <= target_h:
219
- step_x = max(1, int(scaled_w * step_factor))
220
- step_y = max(1, int(scaled_h * step_factor))
221
- x_steps = len(range(0, target_w - scaled_w + 1, step_x))
222
- y_steps = len(range(0, target_h - scaled_h + 1, step_y))
392
+ x_steps = len(range(0, target_w - scaled_w + 1, step))
393
+ y_steps = len(range(0, target_h - scaled_h + 1, step))
223
394
  total_iterations += x_steps * y_steps
224
395
 
225
396
  # Setup progress bar if needed (only if no callback provided)
226
397
  progress_bar = None
227
- if show_progress and not progress_callback and total_iterations > 0:
398
+ if show_progress and not callback and total_iterations > 0:
228
399
  progress_bar = tqdm(total=total_iterations, desc="Scanning", unit="window", leave=False)
229
400
 
230
401
  # Search at each scale
@@ -236,13 +407,9 @@ class VisualMatcher:
236
407
  if scaled_w > target_w or scaled_h > target_h:
237
408
  continue
238
409
 
239
- # Calculate step size
240
- step_x = max(1, int(scaled_w * step_factor))
241
- step_y = max(1, int(scaled_h * step_factor))
242
-
243
410
  # Sliding window search
244
- for y in range(0, target_h - scaled_h + 1, step_y):
245
- for x in range(0, target_w - scaled_w + 1, step_x):
411
+ for y in range(0, target_h - scaled_h + 1, step):
412
+ for x in range(0, target_w - scaled_w + 1, step):
246
413
  # Extract window
247
414
  window = target.crop((x, y, x + scaled_w, y + scaled_h))
248
415
 
@@ -251,10 +418,15 @@ class VisualMatcher:
251
418
  window = window.resize((template_w, template_h), Image.Resampling.LANCZOS)
252
419
 
253
420
  # Compute hash and similarity
254
- window_hash = compute_phash(window, self.hash_size)
421
+ mask_threshold_255 = (
422
+ int(mask_threshold * 255) if mask_threshold is not None else None
423
+ )
424
+ window_hash = compute_phash(
425
+ window, self.hash_size, mask_threshold=mask_threshold_255
426
+ )
255
427
  similarity = hash_similarity(template_hash, window_hash, self.hash_bits)
256
428
 
257
- if similarity >= confidence_threshold:
429
+ if similarity >= threshold:
258
430
  # Convert back to target image coordinates
259
431
  bbox = (x, y, x + scaled_w, y + scaled_h)
260
432
  matches.append(MatchCandidate(bbox, window_hash, similarity))
@@ -262,8 +434,8 @@ class VisualMatcher:
262
434
  # Update progress
263
435
  if progress_bar:
264
436
  progress_bar.update(1)
265
- elif progress_callback:
266
- progress_callback()
437
+ elif callback:
438
+ callback()
267
439
 
268
440
  # Close progress bar
269
441
  if progress_bar: