natural-pdf 0.2.12__py3-none-any.whl → 0.2.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- natural_pdf/core/highlighting_service.py +40 -10
- natural_pdf/elements/base.py +18 -1
- natural_pdf/elements/element_collection.py +153 -15
- natural_pdf/elements/rect.py +34 -0
- natural_pdf/elements/region.py +55 -3
- natural_pdf/elements/text.py +20 -2
- natural_pdf/selectors/parser.py +28 -1
- natural_pdf/vision/__init__.py +1 -2
- natural_pdf/vision/mixin.py +67 -27
- natural_pdf/vision/results.py +49 -5
- natural_pdf/vision/similarity.py +195 -23
- natural_pdf/vision/template_matching.py +209 -0
- {natural_pdf-0.2.12.dist-info → natural_pdf-0.2.15.dist-info}/METADATA +1 -1
- {natural_pdf-0.2.12.dist-info → natural_pdf-0.2.15.dist-info}/RECORD +24 -23
- temp/test_draw_guides.py +25 -0
- temp/test_draw_guides_interactive.py +30 -0
- temp/test_guide_draw_notebook.py +47 -0
- temp/test_inline_js.py +22 -0
- temp/test_widget_functionality.py +68 -0
- temp/test_widget_simple.py +41 -0
- temp/debug_cell_extraction.py +0 -42
- temp/debug_exclusion_overlap.py +0 -43
- temp/debug_exclusions_guides.py +0 -67
- temp/debug_extra_guide.py +0 -41
- temp/debug_outer_boundaries.py +0 -46
- temp/debug_st_search.py +0 -33
- {natural_pdf-0.2.12.dist-info → natural_pdf-0.2.15.dist-info}/WHEEL +0 -0
- {natural_pdf-0.2.12.dist-info → natural_pdf-0.2.15.dist-info}/entry_points.txt +0 -0
- {natural_pdf-0.2.12.dist-info → natural_pdf-0.2.15.dist-info}/licenses/LICENSE +0 -0
- {natural_pdf-0.2.12.dist-info → natural_pdf-0.2.15.dist-info}/top_level.txt +0 -0
natural_pdf/selectors/parser.py
CHANGED
@@ -423,7 +423,33 @@ def parse_selector(selector: str) -> Dict[str, Any]:
|
|
423
423
|
# Check for other pseudo-class blocks `:name` or `:name(...)`
|
424
424
|
pseudo_match = pseudo_pattern.match(selector)
|
425
425
|
if pseudo_match:
|
426
|
+
# --- NEW: robustly capture arguments that may contain nested parentheses --- #
|
426
427
|
name, args_str = pseudo_match.groups()
|
428
|
+
match_end_idx = pseudo_match.end()
|
429
|
+
|
430
|
+
# If the args_str contains unmatched opening parens, continue scanning the
|
431
|
+
# selector until parentheses are balanced. This allows patterns like
|
432
|
+
# :contains((Tre) Ofertu) or complex regex with grouping.
|
433
|
+
if args_str is not None and args_str.count("(") > args_str.count(")"):
|
434
|
+
balance = args_str.count("(") - args_str.count(")")
|
435
|
+
i = match_end_idx
|
436
|
+
while i < len(selector) and balance > 0:
|
437
|
+
char = selector[i]
|
438
|
+
# Append char to args_str as we extend the capture
|
439
|
+
args_str += char
|
440
|
+
if char == "(":
|
441
|
+
balance += 1
|
442
|
+
elif char == ")":
|
443
|
+
balance -= 1
|
444
|
+
i += 1
|
445
|
+
# After loop, ensure parentheses are balanced; otherwise raise error
|
446
|
+
if balance != 0:
|
447
|
+
raise ValueError(
|
448
|
+
f"Mismatched parentheses in pseudo-class :{name}(). Full selector: '{original_selector_for_error}'"
|
449
|
+
)
|
450
|
+
# Update where the selector should be sliced off from
|
451
|
+
match_end_idx = i
|
452
|
+
|
427
453
|
name = name.lower() # Normalize pseudo-class name
|
428
454
|
processed_args = args_str # Keep as string initially, or None
|
429
455
|
|
@@ -436,7 +462,8 @@ def parse_selector(selector: str) -> Dict[str, Any]:
|
|
436
462
|
# else: args remain None
|
437
463
|
|
438
464
|
result["pseudo_classes"].append({"name": name, "args": processed_args})
|
439
|
-
|
465
|
+
# IMPORTANT: use match_end_idx (may have been extended)
|
466
|
+
selector = selector[match_end_idx:].strip()
|
440
467
|
processed_chunk = True
|
441
468
|
continue
|
442
469
|
|
natural_pdf/vision/__init__.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
"""Vision module for visual similarity and pattern matching"""
|
2
2
|
|
3
3
|
from .mixin import VisualSearchMixin
|
4
|
-
from .results import Match, MatchResults
|
5
4
|
from .similarity import VisualMatcher, compute_phash
|
6
5
|
|
7
|
-
__all__ = ["VisualMatcher", "compute_phash", "
|
6
|
+
__all__ = ["VisualMatcher", "compute_phash", "VisualSearchMixin"]
|
natural_pdf/vision/mixin.py
CHANGED
@@ -6,9 +6,6 @@ import numpy as np
|
|
6
6
|
from PIL import Image
|
7
7
|
from tqdm.auto import tqdm
|
8
8
|
|
9
|
-
from .results import Match, MatchResults
|
10
|
-
from .similarity import VisualMatcher, compute_phash
|
11
|
-
|
12
9
|
|
13
10
|
class VisualSearchMixin:
|
14
11
|
"""Add find_similar method to classes that include this mixin"""
|
@@ -21,11 +18,12 @@ class VisualSearchMixin:
|
|
21
18
|
sizes: Optional[Union[float, Tuple, List]] = (0.8, 1.2),
|
22
19
|
resolution: int = 72,
|
23
20
|
hash_size: int = 20,
|
24
|
-
|
21
|
+
step: Optional[int] = None,
|
22
|
+
method: str = "phash",
|
25
23
|
max_per_page: Optional[int] = None,
|
26
24
|
show_progress: bool = True,
|
27
25
|
**kwargs,
|
28
|
-
) -> MatchResults:
|
26
|
+
) -> "MatchResults":
|
29
27
|
"""
|
30
28
|
Find regions visually similar to the given example(s).
|
31
29
|
|
@@ -35,15 +33,19 @@ class VisualSearchMixin:
|
|
35
33
|
confidence: Minimum similarity score (0-1)
|
36
34
|
sizes: Size variations to search. Can be:
|
37
35
|
- float: ±percentage (e.g., 0.2 = 80%-120%)
|
38
|
-
- tuple(min, max): search range with smart logarithmic steps (default: (0.8, 1.
|
36
|
+
- tuple(min, max): search range with smart logarithmic steps (default: (0.8, 1.2))
|
39
37
|
- tuple(min, max, step): explicit step size
|
40
38
|
- list: exact sizes to try (e.g., [0.8, 1.0, 1.2])
|
41
39
|
resolution: Resolution for image comparison (DPI) (default: 72)
|
42
|
-
hash_size: Size of perceptual hash grid (default:
|
43
|
-
|
40
|
+
hash_size: Size of perceptual hash grid (default: 20)
|
41
|
+
step: Step size in pixels for sliding window
|
42
|
+
method: Matching algorithm - "phash" (default) or "template"
|
44
43
|
max_per_page: Maximum matches to return per page
|
45
44
|
show_progress: Show progress bar for multi-page searches (default: True)
|
46
|
-
**kwargs: Additional options
|
45
|
+
**kwargs: Additional options including:
|
46
|
+
mask_threshold: For both template and phash methods, pixels >= this value are masked.
|
47
|
+
For template matching: pixels are ignored in matching (e.g., 0.95)
|
48
|
+
For phash: pixels are replaced with median before hashing (e.g., 0.95)
|
47
49
|
|
48
50
|
Returns:
|
49
51
|
MatchResults collection
|
@@ -55,15 +57,25 @@ class VisualSearchMixin:
|
|
55
57
|
if not isinstance(examples, list):
|
56
58
|
examples = [examples]
|
57
59
|
|
60
|
+
from .similarity import VisualMatcher, compute_phash
|
61
|
+
|
58
62
|
# Initialize matcher with specified hash size
|
59
63
|
matcher = VisualMatcher(hash_size=hash_size)
|
60
64
|
|
61
65
|
# Prepare templates
|
62
66
|
templates = []
|
67
|
+
# Extract mask_threshold from kwargs for phash
|
68
|
+
mask_threshold = kwargs.get("mask_threshold")
|
69
|
+
mask_threshold_255 = (
|
70
|
+
int(mask_threshold * 255) if mask_threshold is not None and method == "phash" else None
|
71
|
+
)
|
72
|
+
|
63
73
|
for example in examples:
|
64
74
|
# Render the example region/element
|
65
75
|
example_image = example.render(resolution=resolution, crop=True)
|
66
|
-
template_hash = compute_phash(
|
76
|
+
template_hash = compute_phash(
|
77
|
+
example_image, hash_size=hash_size, mask_threshold=mask_threshold_255
|
78
|
+
)
|
67
79
|
templates.append({"image": example_image, "hash": template_hash, "source": example})
|
68
80
|
|
69
81
|
# Get pages to search based on the object type
|
@@ -76,6 +88,8 @@ class VisualSearchMixin:
|
|
76
88
|
pages_to_search = self.pages
|
77
89
|
elif hasattr(self, "number"): # Single page
|
78
90
|
pages_to_search = [self]
|
91
|
+
elif hasattr(self, "page") and hasattr(self, "bbox"): # Region
|
92
|
+
pages_to_search = [self]
|
79
93
|
else:
|
80
94
|
raise TypeError(f"Cannot search in {type(self)}")
|
81
95
|
|
@@ -86,10 +100,16 @@ class VisualSearchMixin:
|
|
86
100
|
scales = matcher._get_search_scales(sizes)
|
87
101
|
|
88
102
|
# Pre-calculate for all pages and templates
|
89
|
-
for
|
90
|
-
# Estimate
|
91
|
-
|
92
|
-
|
103
|
+
for search_obj in pages_to_search:
|
104
|
+
# Estimate image size based on object type
|
105
|
+
if hasattr(search_obj, "page") and hasattr(search_obj, "bbox"):
|
106
|
+
# Region
|
107
|
+
page_w = int(search_obj.width * resolution / 72.0)
|
108
|
+
page_h = int(search_obj.height * resolution / 72.0)
|
109
|
+
else:
|
110
|
+
# Page
|
111
|
+
page_w = int(search_obj.width * resolution / 72.0)
|
112
|
+
page_h = int(search_obj.height * resolution / 72.0)
|
93
113
|
|
94
114
|
for template_data in templates:
|
95
115
|
template_w, template_h = template_data["image"].size
|
@@ -99,11 +119,15 @@ class VisualSearchMixin:
|
|
99
119
|
scaled_h = int(template_h * scale)
|
100
120
|
|
101
121
|
if scaled_w <= page_w and scaled_h <= page_h:
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
122
|
+
# Determine step size
|
123
|
+
if step is not None:
|
124
|
+
actual_step = step
|
125
|
+
else:
|
126
|
+
# Default to 10% of template size
|
127
|
+
actual_step = max(1, int(min(scaled_w, scaled_h) * 0.1))
|
128
|
+
|
129
|
+
x_windows = len(range(0, page_w - scaled_w + 1, actual_step))
|
130
|
+
y_windows = len(range(0, page_h - scaled_h + 1, actual_step))
|
107
131
|
total_operations += x_windows * y_windows
|
108
132
|
|
109
133
|
# Search each page
|
@@ -124,9 +148,20 @@ class VisualSearchMixin:
|
|
124
148
|
mininterval=0.1, # Minimum time between updates (seconds)
|
125
149
|
)
|
126
150
|
|
127
|
-
for page_idx,
|
128
|
-
#
|
129
|
-
|
151
|
+
for page_idx, search_obj in enumerate(pages_to_search):
|
152
|
+
# Determine if we're searching in a page or a region
|
153
|
+
if hasattr(search_obj, "page") and hasattr(search_obj, "bbox"):
|
154
|
+
# This is a Region - render only the region area
|
155
|
+
region = search_obj
|
156
|
+
page = region.page
|
157
|
+
page_image = region.render(resolution=resolution, crop=True)
|
158
|
+
# Region offset for coordinate conversion
|
159
|
+
region_x0, region_y0 = region.x0, region.top
|
160
|
+
else:
|
161
|
+
# This is a Page - render the full page
|
162
|
+
page = search_obj
|
163
|
+
page_image = page.render(resolution=resolution)
|
164
|
+
region_x0, region_y0 = 0, 0
|
130
165
|
|
131
166
|
# Convert page coordinates to image coordinates
|
132
167
|
scale = resolution / 72.0 # PDF is 72 DPI
|
@@ -168,7 +203,8 @@ class VisualSearchMixin:
|
|
168
203
|
template_hash=template_hash,
|
169
204
|
confidence_threshold=confidence,
|
170
205
|
sizes=sizes,
|
171
|
-
|
206
|
+
step=step,
|
207
|
+
method=method,
|
172
208
|
show_progress=False, # We handle progress ourselves
|
173
209
|
progress_callback=update_progress if progress_bar else None,
|
174
210
|
**kwargs,
|
@@ -180,10 +216,12 @@ class VisualSearchMixin:
|
|
180
216
|
|
181
217
|
# Convert from image pixels to PDF points
|
182
218
|
# No flipping needed! PDF coordinates map directly to PIL coordinates
|
183
|
-
pdf_x0 = img_x0 / scale
|
184
|
-
pdf_y0 = img_y0 / scale
|
185
|
-
pdf_x1 = img_x1 / scale
|
186
|
-
pdf_y1 = img_y1 / scale
|
219
|
+
pdf_x0 = img_x0 / scale + region_x0
|
220
|
+
pdf_y0 = img_y0 / scale + region_y0
|
221
|
+
pdf_x1 = img_x1 / scale + region_x0
|
222
|
+
pdf_y1 = img_y1 / scale + region_y0
|
223
|
+
|
224
|
+
from .results import Match
|
187
225
|
|
188
226
|
# Create Match object
|
189
227
|
match = Match(
|
@@ -206,4 +244,6 @@ class VisualSearchMixin:
|
|
206
244
|
if progress_bar:
|
207
245
|
progress_bar.close()
|
208
246
|
|
247
|
+
from .results import MatchResults
|
248
|
+
|
209
249
|
return MatchResults(all_matches)
|
natural_pdf/vision/results.py
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
|
3
3
|
from typing import TYPE_CHECKING, Any, Iterator, List, Optional, Tuple
|
4
4
|
|
5
|
-
# Import Region directly as it's a base class
|
6
5
|
from natural_pdf.elements.region import Region
|
7
6
|
|
8
7
|
if TYPE_CHECKING:
|
@@ -39,16 +38,41 @@ class Match(Region):
|
|
39
38
|
|
40
39
|
|
41
40
|
class MatchResults:
|
42
|
-
"""
|
41
|
+
"""
|
42
|
+
Collection of Match objects with transformation methods.
|
43
|
+
|
44
|
+
Matches are automatically sorted by confidence (highest first), so:
|
45
|
+
- matches[0] is the best match
|
46
|
+
- Iteration yields matches from best to worst
|
47
|
+
- The .top(n) method returns the n best matches
|
48
|
+
|
49
|
+
Example:
|
50
|
+
>>> matches = page.find_similar(logo_region)
|
51
|
+
>>> print(f"Found {len(matches)} matches")
|
52
|
+
>>>
|
53
|
+
>>> # Best match
|
54
|
+
>>> best = matches[0]
|
55
|
+
>>> print(f"Best match confidence: {best.confidence:.3f}")
|
56
|
+
>>>
|
57
|
+
>>> # Top 5 matches
|
58
|
+
>>> for match in matches.top(5):
|
59
|
+
... print(f"Confidence: {match.confidence:.3f} at page {match.page.number}")
|
60
|
+
>>>
|
61
|
+
>>> # All matches above 90% confidence
|
62
|
+
>>> high_conf = matches.filter_by_confidence(0.9)
|
63
|
+
"""
|
43
64
|
|
44
65
|
def __init__(self, matches: List[Match]):
|
45
|
-
"""Initialize with list of Match objects"""
|
66
|
+
"""Initialize with list of Match objects, automatically sorted by confidence"""
|
46
67
|
# Import here to avoid circular import
|
47
68
|
from natural_pdf.elements.element_collection import ElementCollection
|
48
69
|
|
70
|
+
# Sort matches by confidence (highest first)
|
71
|
+
sorted_matches = sorted(matches, key=lambda m: m.confidence, reverse=True)
|
72
|
+
|
49
73
|
# Create a base ElementCollection
|
50
|
-
self._collection = ElementCollection(
|
51
|
-
self._matches =
|
74
|
+
self._collection = ElementCollection(sorted_matches)
|
75
|
+
self._matches = sorted_matches
|
52
76
|
|
53
77
|
def __len__(self):
|
54
78
|
return len(self._matches)
|
@@ -68,6 +92,26 @@ class MatchResults:
|
|
68
92
|
"""Filter matches by minimum confidence"""
|
69
93
|
return self.filter(lambda m: m.confidence >= min_confidence)
|
70
94
|
|
95
|
+
def top(self, n: int) -> "MatchResults":
|
96
|
+
"""
|
97
|
+
Get the top N matches with highest confidence.
|
98
|
+
|
99
|
+
Args:
|
100
|
+
n: Number of top matches to return
|
101
|
+
|
102
|
+
Returns:
|
103
|
+
New MatchResults with only the top N matches
|
104
|
+
|
105
|
+
Example:
|
106
|
+
>>> matches = page.find_similar(logo)
|
107
|
+
>>> best_5 = matches.top(5)
|
108
|
+
>>> for match in best_5:
|
109
|
+
... print(f"Confidence: {match.confidence:.3f}")
|
110
|
+
"""
|
111
|
+
# Since matches are already sorted by confidence, just take first n
|
112
|
+
top_matches = self._matches[:n]
|
113
|
+
return MatchResults(top_matches)
|
114
|
+
|
71
115
|
def pages(self):
|
72
116
|
"""Get unique pages containing matches"""
|
73
117
|
# Import here to avoid circular import
|
natural_pdf/vision/similarity.py
CHANGED
@@ -7,6 +7,8 @@ import numpy as np
|
|
7
7
|
from PIL import Image
|
8
8
|
from tqdm.auto import tqdm
|
9
9
|
|
10
|
+
from .template_matching import TemplateMatcher
|
11
|
+
|
10
12
|
|
11
13
|
@dataclass
|
12
14
|
class MatchCandidate:
|
@@ -17,7 +19,12 @@ class MatchCandidate:
|
|
17
19
|
confidence: float
|
18
20
|
|
19
21
|
|
20
|
-
def compute_phash(
|
22
|
+
def compute_phash(
|
23
|
+
image: Image.Image,
|
24
|
+
hash_size: int = 8,
|
25
|
+
blur_radius: float = 0,
|
26
|
+
mask_threshold: Optional[float] = None,
|
27
|
+
) -> int:
|
21
28
|
"""
|
22
29
|
Compute perceptual hash of an image using DCT.
|
23
30
|
|
@@ -25,6 +32,8 @@ def compute_phash(image: Image.Image, hash_size: int = 8, blur_radius: float = 0
|
|
25
32
|
image: PIL Image to hash
|
26
33
|
hash_size: Size of the hash (8 = 64 bit hash)
|
27
34
|
blur_radius: Optional blur to apply before hashing (makes more tolerant)
|
35
|
+
mask_threshold: If provided, pixels >= this value (0-255 scale) are replaced with median
|
36
|
+
before hashing. Useful for ignoring white backgrounds.
|
28
37
|
|
29
38
|
Returns:
|
30
39
|
Integer hash value
|
@@ -39,6 +48,25 @@ def compute_phash(image: Image.Image, hash_size: int = 8, blur_radius: float = 0
|
|
39
48
|
|
40
49
|
image = image.filter(ImageFilter.GaussianBlur(radius=blur_radius))
|
41
50
|
|
51
|
+
# Apply masking if threshold provided
|
52
|
+
if mask_threshold is not None:
|
53
|
+
# For phash, masking works by normalizing the background
|
54
|
+
# This makes the hash focus on relative differences rather than absolute values
|
55
|
+
img_array = np.array(image, dtype=np.float32)
|
56
|
+
|
57
|
+
# Normalize by subtracting a representative background value
|
58
|
+
# Use the most common bright value as the background
|
59
|
+
bright_pixels = img_array[img_array >= mask_threshold]
|
60
|
+
if len(bright_pixels) > 0:
|
61
|
+
# Use the mode of bright pixels as background
|
62
|
+
background_val = np.median(bright_pixels)
|
63
|
+
# Normalize the image by subtracting background
|
64
|
+
# This makes different backgrounds appear similar
|
65
|
+
img_array = np.clip(img_array - background_val + 128, 0, 255)
|
66
|
+
|
67
|
+
# Convert back to PIL Image
|
68
|
+
image = Image.fromarray(img_array.astype(np.uint8))
|
69
|
+
|
42
70
|
# Resize to 32x32 (4x the hash size for DCT)
|
43
71
|
highfreq_factor = 4
|
44
72
|
img_size = hash_size * highfreq_factor
|
@@ -80,12 +108,13 @@ def hash_similarity(hash1: int, hash2: int, hash_size: int = 64) -> float:
|
|
80
108
|
|
81
109
|
|
82
110
|
class VisualMatcher:
|
83
|
-
"""Handles visual similarity matching using perceptual hashing"""
|
111
|
+
"""Handles visual similarity matching using perceptual hashing or template matching"""
|
84
112
|
|
85
113
|
def __init__(self, hash_size: int = 12):
|
86
114
|
self.hash_size = hash_size
|
87
115
|
self.hash_bits = hash_size * hash_size
|
88
116
|
self._cache = {}
|
117
|
+
self.template_matcher = TemplateMatcher() # Default zncc
|
89
118
|
|
90
119
|
def _get_search_scales(self, sizes: Optional[Union[float, Tuple, List]]) -> List[float]:
|
91
120
|
"""
|
@@ -172,20 +201,22 @@ class VisualMatcher:
|
|
172
201
|
target: Image.Image,
|
173
202
|
template_hash: Optional[int] = None,
|
174
203
|
confidence_threshold: float = 0.6,
|
175
|
-
|
204
|
+
step: Optional[int] = None,
|
176
205
|
sizes: Optional[Union[float, Tuple, List]] = None,
|
177
206
|
show_progress: bool = True,
|
178
207
|
progress_callback: Optional[Callable[[], None]] = None,
|
208
|
+
method: str = "phash",
|
209
|
+
mask_threshold: Optional[float] = None,
|
179
210
|
) -> List[MatchCandidate]:
|
180
211
|
"""
|
181
|
-
Find all matches of template in target image
|
212
|
+
Find all matches of template in target image.
|
182
213
|
|
183
214
|
Args:
|
184
215
|
template: Template image to search for
|
185
216
|
target: Target image to search in
|
186
|
-
template_hash: Pre-computed hash of template (optional)
|
217
|
+
template_hash: Pre-computed hash of template (optional, only for phash)
|
187
218
|
confidence_threshold: Minimum similarity score (0-1)
|
188
|
-
|
219
|
+
step: Step size in pixels for sliding window
|
189
220
|
sizes: Size variations to search. Can be:
|
190
221
|
- float: ±percentage (e.g., 0.2 = 80%-120%)
|
191
222
|
- tuple(min, max): search range with smart logarithmic steps
|
@@ -193,15 +224,153 @@ class VisualMatcher:
|
|
193
224
|
- list: exact sizes to try (e.g., [0.8, 1.0, 1.2])
|
194
225
|
show_progress: Show progress bar for sliding window search
|
195
226
|
progress_callback: Optional callback function to call for each window checked
|
227
|
+
method: "phash" (default) or "template" for template matching
|
228
|
+
mask_threshold: Pixels >= this value (0-1 scale) are treated as background.
|
229
|
+
- For template matching: pixels are ignored in correlation
|
230
|
+
- For phash: background is normalized before hashing
|
231
|
+
Useful for logos/text on varying backgrounds (e.g., 0.95)
|
196
232
|
|
197
233
|
Returns:
|
198
234
|
List of MatchCandidate objects
|
199
235
|
"""
|
236
|
+
if method == "template":
|
237
|
+
# Use template matching
|
238
|
+
return self._template_match(
|
239
|
+
template,
|
240
|
+
target,
|
241
|
+
confidence_threshold,
|
242
|
+
step,
|
243
|
+
sizes,
|
244
|
+
show_progress,
|
245
|
+
progress_callback,
|
246
|
+
mask_threshold,
|
247
|
+
)
|
248
|
+
else:
|
249
|
+
# Use existing perceptual hash matching
|
250
|
+
return self._phash_match(
|
251
|
+
template,
|
252
|
+
target,
|
253
|
+
template_hash,
|
254
|
+
confidence_threshold,
|
255
|
+
step,
|
256
|
+
sizes,
|
257
|
+
show_progress,
|
258
|
+
progress_callback,
|
259
|
+
mask_threshold,
|
260
|
+
)
|
261
|
+
|
262
|
+
def _template_match(
|
263
|
+
self, template, target, threshold, step, sizes, show_progress, callback, mask_threshold
|
264
|
+
):
|
265
|
+
"""Template matching implementation"""
|
266
|
+
matches = []
|
267
|
+
|
268
|
+
template_w, template_h = template.size
|
269
|
+
target_w, target_h = target.size
|
270
|
+
|
271
|
+
# Convert to grayscale numpy arrays
|
272
|
+
target_gray = np.array(target.convert("L"), dtype=np.float32) / 255.0
|
273
|
+
|
274
|
+
# Determine scales to search
|
275
|
+
scales = self._get_search_scales(sizes)
|
276
|
+
|
277
|
+
# Default step size if not provided
|
278
|
+
if step is None:
|
279
|
+
step = 1
|
280
|
+
|
281
|
+
# Calculate total operations for progress bar
|
282
|
+
total_operations = 0
|
283
|
+
if show_progress and not callback:
|
284
|
+
for scale in scales:
|
285
|
+
scaled_w = int(template_w * scale)
|
286
|
+
scaled_h = int(template_h * scale)
|
287
|
+
|
288
|
+
if scaled_w <= target_w and scaled_h <= target_h:
|
289
|
+
# Compute score map size
|
290
|
+
out_h = (target_h - scaled_h) // step + 1
|
291
|
+
out_w = (target_w - scaled_w) // step + 1
|
292
|
+
total_operations += out_h * out_w
|
293
|
+
|
294
|
+
# Setup progress bar
|
295
|
+
progress_bar = None
|
296
|
+
if show_progress and not callback and total_operations > 0:
|
297
|
+
progress_bar = tqdm(
|
298
|
+
total=total_operations, desc="Template matching", unit="position", leave=False
|
299
|
+
)
|
300
|
+
|
301
|
+
# Search at each scale
|
302
|
+
for scale in scales:
|
303
|
+
# Resize template
|
304
|
+
scaled_w = int(template_w * scale)
|
305
|
+
scaled_h = int(template_h * scale)
|
306
|
+
|
307
|
+
if scaled_w > target_w or scaled_h > target_h:
|
308
|
+
continue
|
309
|
+
|
310
|
+
scaled_template = template.resize((scaled_w, scaled_h), Image.Resampling.LANCZOS)
|
311
|
+
template_gray = np.array(scaled_template.convert("L"), dtype=np.float32) / 255.0
|
312
|
+
|
313
|
+
# Run template matching
|
314
|
+
scores = self.template_matcher.match_template(
|
315
|
+
target_gray, template_gray, step, mask_threshold
|
316
|
+
)
|
317
|
+
|
318
|
+
# Find peaks above threshold
|
319
|
+
y_indices, x_indices = np.where(scores >= threshold)
|
320
|
+
|
321
|
+
# Update progress
|
322
|
+
if progress_bar:
|
323
|
+
progress_bar.update(scores.size)
|
324
|
+
elif callback:
|
325
|
+
for _ in range(scores.size):
|
326
|
+
callback()
|
327
|
+
|
328
|
+
for i in range(len(y_indices)):
|
329
|
+
y_idx = y_indices[i]
|
330
|
+
x_idx = x_indices[i]
|
331
|
+
score = scores[y_idx, x_idx]
|
332
|
+
|
333
|
+
# Convert back to image coordinates
|
334
|
+
x = x_idx * step
|
335
|
+
y = y_idx * step
|
336
|
+
|
337
|
+
matches.append(
|
338
|
+
MatchCandidate(
|
339
|
+
bbox=(x, y, x + scaled_w, y + scaled_h),
|
340
|
+
hash_value=0, # Not used for template matching
|
341
|
+
confidence=float(score),
|
342
|
+
)
|
343
|
+
)
|
344
|
+
|
345
|
+
# Close progress bar
|
346
|
+
if progress_bar:
|
347
|
+
progress_bar.close()
|
348
|
+
|
349
|
+
# Remove overlapping matches
|
350
|
+
return self._filter_overlapping_matches(matches)
|
351
|
+
|
352
|
+
def _phash_match(
|
353
|
+
self,
|
354
|
+
template,
|
355
|
+
target,
|
356
|
+
template_hash,
|
357
|
+
threshold,
|
358
|
+
step,
|
359
|
+
sizes,
|
360
|
+
show_progress,
|
361
|
+
callback,
|
362
|
+
mask_threshold=None,
|
363
|
+
):
|
364
|
+
"""Original perceptual hash matching"""
|
200
365
|
matches = []
|
201
366
|
|
202
367
|
# Compute template hash if not provided
|
203
368
|
if template_hash is None:
|
204
|
-
|
369
|
+
# Convert mask threshold from 0-1 to 0-255 for PIL Image
|
370
|
+
mask_threshold_255 = int(mask_threshold * 255) if mask_threshold is not None else None
|
371
|
+
template_hash = compute_phash(
|
372
|
+
template, self.hash_size, mask_threshold=mask_threshold_255
|
373
|
+
)
|
205
374
|
|
206
375
|
template_w, template_h = template.size
|
207
376
|
target_w, target_h = target.size
|
@@ -209,22 +378,24 @@ class VisualMatcher:
|
|
209
378
|
# Determine scales to search
|
210
379
|
scales = self._get_search_scales(sizes)
|
211
380
|
|
381
|
+
# Default step size if not provided (10% of template size)
|
382
|
+
if step is None:
|
383
|
+
step = max(1, int(min(template_w, template_h) * 0.1))
|
384
|
+
|
212
385
|
# Calculate total iterations for progress bar
|
213
386
|
total_iterations = 0
|
214
|
-
if show_progress and not
|
387
|
+
if show_progress and not callback:
|
215
388
|
for scale in scales:
|
216
389
|
scaled_w = int(template_w * scale)
|
217
390
|
scaled_h = int(template_h * scale)
|
218
391
|
if scaled_w <= target_w and scaled_h <= target_h:
|
219
|
-
|
220
|
-
|
221
|
-
x_steps = len(range(0, target_w - scaled_w + 1, step_x))
|
222
|
-
y_steps = len(range(0, target_h - scaled_h + 1, step_y))
|
392
|
+
x_steps = len(range(0, target_w - scaled_w + 1, step))
|
393
|
+
y_steps = len(range(0, target_h - scaled_h + 1, step))
|
223
394
|
total_iterations += x_steps * y_steps
|
224
395
|
|
225
396
|
# Setup progress bar if needed (only if no callback provided)
|
226
397
|
progress_bar = None
|
227
|
-
if show_progress and not
|
398
|
+
if show_progress and not callback and total_iterations > 0:
|
228
399
|
progress_bar = tqdm(total=total_iterations, desc="Scanning", unit="window", leave=False)
|
229
400
|
|
230
401
|
# Search at each scale
|
@@ -236,13 +407,9 @@ class VisualMatcher:
|
|
236
407
|
if scaled_w > target_w or scaled_h > target_h:
|
237
408
|
continue
|
238
409
|
|
239
|
-
# Calculate step size
|
240
|
-
step_x = max(1, int(scaled_w * step_factor))
|
241
|
-
step_y = max(1, int(scaled_h * step_factor))
|
242
|
-
|
243
410
|
# Sliding window search
|
244
|
-
for y in range(0, target_h - scaled_h + 1,
|
245
|
-
for x in range(0, target_w - scaled_w + 1,
|
411
|
+
for y in range(0, target_h - scaled_h + 1, step):
|
412
|
+
for x in range(0, target_w - scaled_w + 1, step):
|
246
413
|
# Extract window
|
247
414
|
window = target.crop((x, y, x + scaled_w, y + scaled_h))
|
248
415
|
|
@@ -251,10 +418,15 @@ class VisualMatcher:
|
|
251
418
|
window = window.resize((template_w, template_h), Image.Resampling.LANCZOS)
|
252
419
|
|
253
420
|
# Compute hash and similarity
|
254
|
-
|
421
|
+
mask_threshold_255 = (
|
422
|
+
int(mask_threshold * 255) if mask_threshold is not None else None
|
423
|
+
)
|
424
|
+
window_hash = compute_phash(
|
425
|
+
window, self.hash_size, mask_threshold=mask_threshold_255
|
426
|
+
)
|
255
427
|
similarity = hash_similarity(template_hash, window_hash, self.hash_bits)
|
256
428
|
|
257
|
-
if similarity >=
|
429
|
+
if similarity >= threshold:
|
258
430
|
# Convert back to target image coordinates
|
259
431
|
bbox = (x, y, x + scaled_w, y + scaled_h)
|
260
432
|
matches.append(MatchCandidate(bbox, window_hash, similarity))
|
@@ -262,8 +434,8 @@ class VisualMatcher:
|
|
262
434
|
# Update progress
|
263
435
|
if progress_bar:
|
264
436
|
progress_bar.update(1)
|
265
|
-
elif
|
266
|
-
|
437
|
+
elif callback:
|
438
|
+
callback()
|
267
439
|
|
268
440
|
# Close progress bar
|
269
441
|
if progress_bar:
|