natural-pdf 0.2.12__py3-none-any.whl → 0.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- natural_pdf/core/highlighting_service.py +40 -10
- natural_pdf/elements/base.py +15 -1
- natural_pdf/elements/region.py +32 -2
- natural_pdf/vision/__init__.py +1 -2
- natural_pdf/vision/mixin.py +67 -27
- natural_pdf/vision/results.py +49 -5
- natural_pdf/vision/similarity.py +195 -23
- natural_pdf/vision/template_matching.py +209 -0
- {natural_pdf-0.2.12.dist-info → natural_pdf-0.2.13.dist-info}/METADATA +1 -1
- {natural_pdf-0.2.12.dist-info → natural_pdf-0.2.13.dist-info}/RECORD +20 -19
- temp/test_draw_guides.py +25 -0
- temp/test_draw_guides_interactive.py +30 -0
- temp/test_guide_draw_notebook.py +47 -0
- temp/test_inline_js.py +22 -0
- temp/test_widget_functionality.py +68 -0
- temp/test_widget_simple.py +41 -0
- temp/debug_cell_extraction.py +0 -42
- temp/debug_exclusion_overlap.py +0 -43
- temp/debug_exclusions_guides.py +0 -67
- temp/debug_extra_guide.py +0 -41
- temp/debug_outer_boundaries.py +0 -46
- temp/debug_st_search.py +0 -33
- {natural_pdf-0.2.12.dist-info → natural_pdf-0.2.13.dist-info}/WHEEL +0 -0
- {natural_pdf-0.2.12.dist-info → natural_pdf-0.2.13.dist-info}/entry_points.txt +0 -0
- {natural_pdf-0.2.12.dist-info → natural_pdf-0.2.13.dist-info}/licenses/LICENSE +0 -0
- {natural_pdf-0.2.12.dist-info → natural_pdf-0.2.13.dist-info}/top_level.txt +0 -0
@@ -92,6 +92,16 @@ class HighlightRenderer:
|
|
92
92
|
|
93
93
|
def _draw_highlights(self):
|
94
94
|
"""Draws all highlight shapes, borders, vertices, and attributes."""
|
95
|
+
# Get the pdfplumber page offset for coordinate translation
|
96
|
+
page_offset_x = 0
|
97
|
+
page_offset_y = 0
|
98
|
+
|
99
|
+
if hasattr(self.page, "_page") and hasattr(self.page._page, "bbox"):
|
100
|
+
# PDFPlumber page bbox might have negative offsets
|
101
|
+
page_offset_x = -self.page._page.bbox[0]
|
102
|
+
page_offset_y = -self.page._page.bbox[1]
|
103
|
+
logger.debug(f"Applying highlight offset: x={page_offset_x}, y={page_offset_y}")
|
104
|
+
|
95
105
|
for highlight in self.highlights:
|
96
106
|
# Create a transparent overlay for this single highlight
|
97
107
|
overlay = Image.new("RGBA", self.base_image.size, (0, 0, 0, 0))
|
@@ -101,7 +111,11 @@ class HighlightRenderer:
|
|
101
111
|
|
102
112
|
if highlight.is_polygon:
|
103
113
|
scaled_polygon = [
|
104
|
-
(
|
114
|
+
(
|
115
|
+
(p[0] + page_offset_x) * self.scale_factor,
|
116
|
+
(p[1] + page_offset_y) * self.scale_factor,
|
117
|
+
)
|
118
|
+
for p in highlight.polygon
|
105
119
|
]
|
106
120
|
# Draw polygon fill and border
|
107
121
|
draw.polygon(
|
@@ -117,11 +131,16 @@ class HighlightRenderer:
|
|
117
131
|
else: # Rectangle
|
118
132
|
x0, top, x1, bottom = highlight.bbox
|
119
133
|
x0_s, top_s, x1_s, bottom_s = (
|
120
|
-
x0 * self.scale_factor,
|
121
|
-
top * self.scale_factor,
|
122
|
-
x1 * self.scale_factor,
|
123
|
-
bottom * self.scale_factor,
|
134
|
+
(x0 + page_offset_x) * self.scale_factor,
|
135
|
+
(top + page_offset_y) * self.scale_factor,
|
136
|
+
(x1 + page_offset_x) * self.scale_factor,
|
137
|
+
(bottom + page_offset_y) * self.scale_factor,
|
124
138
|
)
|
139
|
+
logger.debug(f"Original bbox: ({x0}, {top}, {x1}, {bottom})")
|
140
|
+
logger.debug(
|
141
|
+
f"Offset bbox: ({x0 + page_offset_x}, {top + page_offset_y}, {x1 + page_offset_x}, {bottom + page_offset_y})"
|
142
|
+
)
|
143
|
+
logger.debug(f"Scaled bbox: ({x0_s}, {top_s}, {x1_s}, {bottom_s})")
|
125
144
|
scaled_bbox = [x0_s, top_s, x1_s, bottom_s]
|
126
145
|
# Draw rectangle fill and border
|
127
146
|
draw.rectangle(
|
@@ -1482,11 +1501,22 @@ class HighlightingService:
|
|
1482
1501
|
offset_x = crop_offset[0] * scale_factor
|
1483
1502
|
offset_y = crop_offset[1] * scale_factor
|
1484
1503
|
|
1504
|
+
# Add pdfplumber page offset for coordinate translation
|
1505
|
+
page_offset_x = 0
|
1506
|
+
page_offset_y = 0
|
1507
|
+
if hasattr(page, "_page") and hasattr(page._page, "bbox"):
|
1508
|
+
# PDFPlumber page bbox might have negative offsets
|
1509
|
+
page_offset_x = -page._page.bbox[0]
|
1510
|
+
page_offset_y = -page._page.bbox[1]
|
1511
|
+
|
1485
1512
|
# Draw the highlight
|
1486
1513
|
if polygon:
|
1487
1514
|
# Scale polygon points and apply offset
|
1488
1515
|
scaled_polygon = [
|
1489
|
-
(
|
1516
|
+
(
|
1517
|
+
(p[0] + page_offset_x) * scale_factor - offset_x,
|
1518
|
+
(p[1] + page_offset_y) * scale_factor - offset_y,
|
1519
|
+
)
|
1490
1520
|
for p in polygon
|
1491
1521
|
]
|
1492
1522
|
draw.polygon(
|
@@ -1496,10 +1526,10 @@ class HighlightingService:
|
|
1496
1526
|
# Scale bbox and apply offset
|
1497
1527
|
x0, y0, x1, y1 = bbox
|
1498
1528
|
scaled_bbox = [
|
1499
|
-
x0 * scale_factor - offset_x,
|
1500
|
-
y0 * scale_factor - offset_y,
|
1501
|
-
x1 * scale_factor - offset_x,
|
1502
|
-
y1 * scale_factor - offset_y,
|
1529
|
+
(x0 + page_offset_x) * scale_factor - offset_x,
|
1530
|
+
(y0 + page_offset_y) * scale_factor - offset_y,
|
1531
|
+
(x1 + page_offset_x) * scale_factor - offset_x,
|
1532
|
+
(y1 + page_offset_y) * scale_factor - offset_y,
|
1503
1533
|
]
|
1504
1534
|
draw.rectangle(
|
1505
1535
|
scaled_bbox, fill=color, outline=(color[0], color[1], color[2], BORDER_ALPHA)
|
natural_pdf/elements/base.py
CHANGED
@@ -106,6 +106,7 @@ class DirectionalMixin:
|
|
106
106
|
include_source: bool = False,
|
107
107
|
until: Optional[str] = None,
|
108
108
|
include_endpoint: bool = True,
|
109
|
+
offset: float = 0.1,
|
109
110
|
**kwargs,
|
110
111
|
) -> "Region":
|
111
112
|
"""
|
@@ -118,6 +119,7 @@ class DirectionalMixin:
|
|
118
119
|
include_source: Whether to include this element/region's area in the result
|
119
120
|
until: Optional selector string to specify a boundary element
|
120
121
|
include_endpoint: Whether to include the boundary element found by 'until'
|
122
|
+
offset: Pixel offset when excluding source/endpoint (default: 0.1)
|
121
123
|
**kwargs: Additional parameters for the 'until' selector search
|
122
124
|
|
123
125
|
Returns:
|
@@ -127,7 +129,7 @@ class DirectionalMixin:
|
|
127
129
|
|
128
130
|
is_horizontal = direction in ("left", "right")
|
129
131
|
is_positive = direction in ("right", "below") # right/below are positive directions
|
130
|
-
pixel_offset =
|
132
|
+
pixel_offset = offset # Use provided offset for excluding elements/endpoints
|
131
133
|
|
132
134
|
# 1. Determine initial boundaries based on direction and include_source
|
133
135
|
if is_horizontal:
|
@@ -260,6 +262,7 @@ class DirectionalMixin:
|
|
260
262
|
include_source: bool = False,
|
261
263
|
until: Optional[str] = None,
|
262
264
|
include_endpoint: bool = True,
|
265
|
+
offset: float = 0.1,
|
263
266
|
**kwargs,
|
264
267
|
) -> "Region":
|
265
268
|
"""
|
@@ -271,6 +274,7 @@ class DirectionalMixin:
|
|
271
274
|
include_source: Whether to include this element/region in the result (default: False)
|
272
275
|
until: Optional selector string to specify an upper boundary element
|
273
276
|
include_endpoint: Whether to include the boundary element in the region (default: True)
|
277
|
+
offset: Pixel offset when excluding source/endpoint (default: 0.1)
|
274
278
|
**kwargs: Additional parameters
|
275
279
|
|
276
280
|
Returns:
|
@@ -295,6 +299,7 @@ class DirectionalMixin:
|
|
295
299
|
include_source=include_source,
|
296
300
|
until=until,
|
297
301
|
include_endpoint=include_endpoint,
|
302
|
+
offset=offset,
|
298
303
|
**kwargs,
|
299
304
|
)
|
300
305
|
|
@@ -305,6 +310,7 @@ class DirectionalMixin:
|
|
305
310
|
include_source: bool = False,
|
306
311
|
until: Optional[str] = None,
|
307
312
|
include_endpoint: bool = True,
|
313
|
+
offset: float = 0.1,
|
308
314
|
**kwargs,
|
309
315
|
) -> "Region":
|
310
316
|
"""
|
@@ -316,6 +322,7 @@ class DirectionalMixin:
|
|
316
322
|
include_source: Whether to include this element/region in the result (default: False)
|
317
323
|
until: Optional selector string to specify a lower boundary element
|
318
324
|
include_endpoint: Whether to include the boundary element in the region (default: True)
|
325
|
+
offset: Pixel offset when excluding source/endpoint (default: 0.1)
|
319
326
|
**kwargs: Additional parameters
|
320
327
|
|
321
328
|
Returns:
|
@@ -340,6 +347,7 @@ class DirectionalMixin:
|
|
340
347
|
include_source=include_source,
|
341
348
|
until=until,
|
342
349
|
include_endpoint=include_endpoint,
|
350
|
+
offset=offset,
|
343
351
|
**kwargs,
|
344
352
|
)
|
345
353
|
|
@@ -350,6 +358,7 @@ class DirectionalMixin:
|
|
350
358
|
include_source: bool = False,
|
351
359
|
until: Optional[str] = None,
|
352
360
|
include_endpoint: bool = True,
|
361
|
+
offset: float = 0.1,
|
353
362
|
**kwargs,
|
354
363
|
) -> "Region":
|
355
364
|
"""
|
@@ -361,6 +370,7 @@ class DirectionalMixin:
|
|
361
370
|
include_source: Whether to include this element/region in the result (default: False)
|
362
371
|
until: Optional selector string to specify a left boundary element
|
363
372
|
include_endpoint: Whether to include the boundary element in the region (default: True)
|
373
|
+
offset: Pixel offset when excluding source/endpoint (default: 0.1)
|
364
374
|
**kwargs: Additional parameters
|
365
375
|
|
366
376
|
Returns:
|
@@ -385,6 +395,7 @@ class DirectionalMixin:
|
|
385
395
|
include_source=include_source,
|
386
396
|
until=until,
|
387
397
|
include_endpoint=include_endpoint,
|
398
|
+
offset=offset,
|
388
399
|
**kwargs,
|
389
400
|
)
|
390
401
|
|
@@ -395,6 +406,7 @@ class DirectionalMixin:
|
|
395
406
|
include_source: bool = False,
|
396
407
|
until: Optional[str] = None,
|
397
408
|
include_endpoint: bool = True,
|
409
|
+
offset: float = 0.1,
|
398
410
|
**kwargs,
|
399
411
|
) -> "Region":
|
400
412
|
"""
|
@@ -406,6 +418,7 @@ class DirectionalMixin:
|
|
406
418
|
include_source: Whether to include this element/region in the result (default: False)
|
407
419
|
until: Optional selector string to specify a right boundary element
|
408
420
|
include_endpoint: Whether to include the boundary element in the region (default: True)
|
421
|
+
offset: Pixel offset when excluding source/endpoint (default: 0.1)
|
409
422
|
**kwargs: Additional parameters
|
410
423
|
|
411
424
|
Returns:
|
@@ -430,6 +443,7 @@ class DirectionalMixin:
|
|
430
443
|
include_source=include_source,
|
431
444
|
until=until,
|
432
445
|
include_endpoint=include_endpoint,
|
446
|
+
offset=offset,
|
433
447
|
**kwargs,
|
434
448
|
)
|
435
449
|
|
natural_pdf/elements/region.py
CHANGED
@@ -45,6 +45,7 @@ from natural_pdf.utils.locks import pdf_render_lock # Import the lock
|
|
45
45
|
|
46
46
|
# Import new utils
|
47
47
|
from natural_pdf.utils.text_extraction import filter_chars_spatially, generate_text_layout
|
48
|
+
from natural_pdf.vision.mixin import VisualSearchMixin
|
48
49
|
|
49
50
|
# Import viewer widget support
|
50
51
|
from natural_pdf.widgets.viewer import _IPYWIDGETS_AVAILABLE, InteractiveViewerWidget
|
@@ -80,6 +81,7 @@ class Region(
|
|
80
81
|
ExtractionMixin,
|
81
82
|
ShapeDetectionMixin,
|
82
83
|
DescribeMixin,
|
84
|
+
VisualSearchMixin,
|
83
85
|
Visualizable,
|
84
86
|
):
|
85
87
|
"""Represents a rectangular region on a page.
|
@@ -1692,7 +1694,21 @@ class Region(
|
|
1692
1694
|
else:
|
1693
1695
|
filtered_page = base_plumber_page
|
1694
1696
|
|
1695
|
-
|
1697
|
+
# Ensure bbox is within pdfplumber page bounds
|
1698
|
+
page_bbox = filtered_page.bbox
|
1699
|
+
clipped_bbox = (
|
1700
|
+
max(self.bbox[0], page_bbox[0]), # x0
|
1701
|
+
max(self.bbox[1], page_bbox[1]), # y0
|
1702
|
+
min(self.bbox[2], page_bbox[2]), # x1
|
1703
|
+
min(self.bbox[3], page_bbox[3]), # y1
|
1704
|
+
)
|
1705
|
+
|
1706
|
+
# Only crop if the clipped bbox is valid (has positive width and height)
|
1707
|
+
if clipped_bbox[2] > clipped_bbox[0] and clipped_bbox[3] > clipped_bbox[1]:
|
1708
|
+
cropped = filtered_page.crop(clipped_bbox)
|
1709
|
+
else:
|
1710
|
+
# If the region is completely outside the page bounds, return empty list
|
1711
|
+
return []
|
1696
1712
|
|
1697
1713
|
# Extract all tables from the cropped area
|
1698
1714
|
tables = cropped.extract_tables(table_settings)
|
@@ -1786,7 +1802,21 @@ class Region(
|
|
1786
1802
|
filtered_page = base_plumber_page
|
1787
1803
|
|
1788
1804
|
# Now crop the (possibly filtered) page to the region bbox
|
1789
|
-
|
1805
|
+
# Ensure bbox is within pdfplumber page bounds
|
1806
|
+
page_bbox = filtered_page.bbox
|
1807
|
+
clipped_bbox = (
|
1808
|
+
max(self.bbox[0], page_bbox[0]), # x0
|
1809
|
+
max(self.bbox[1], page_bbox[1]), # y0
|
1810
|
+
min(self.bbox[2], page_bbox[2]), # x1
|
1811
|
+
min(self.bbox[3], page_bbox[3]), # y1
|
1812
|
+
)
|
1813
|
+
|
1814
|
+
# Only crop if the clipped bbox is valid (has positive width and height)
|
1815
|
+
if clipped_bbox[2] > clipped_bbox[0] and clipped_bbox[3] > clipped_bbox[1]:
|
1816
|
+
cropped = filtered_page.crop(clipped_bbox)
|
1817
|
+
else:
|
1818
|
+
# If the region is completely outside the page bounds, return empty table
|
1819
|
+
return []
|
1790
1820
|
|
1791
1821
|
# Extract the single largest table from the cropped area
|
1792
1822
|
table = cropped.extract_table(table_settings)
|
natural_pdf/vision/__init__.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
"""Vision module for visual similarity and pattern matching"""
|
2
2
|
|
3
3
|
from .mixin import VisualSearchMixin
|
4
|
-
from .results import Match, MatchResults
|
5
4
|
from .similarity import VisualMatcher, compute_phash
|
6
5
|
|
7
|
-
__all__ = ["VisualMatcher", "compute_phash", "
|
6
|
+
__all__ = ["VisualMatcher", "compute_phash", "VisualSearchMixin"]
|
natural_pdf/vision/mixin.py
CHANGED
@@ -6,9 +6,6 @@ import numpy as np
|
|
6
6
|
from PIL import Image
|
7
7
|
from tqdm.auto import tqdm
|
8
8
|
|
9
|
-
from .results import Match, MatchResults
|
10
|
-
from .similarity import VisualMatcher, compute_phash
|
11
|
-
|
12
9
|
|
13
10
|
class VisualSearchMixin:
|
14
11
|
"""Add find_similar method to classes that include this mixin"""
|
@@ -21,11 +18,12 @@ class VisualSearchMixin:
|
|
21
18
|
sizes: Optional[Union[float, Tuple, List]] = (0.8, 1.2),
|
22
19
|
resolution: int = 72,
|
23
20
|
hash_size: int = 20,
|
24
|
-
|
21
|
+
step: Optional[int] = None,
|
22
|
+
method: str = "phash",
|
25
23
|
max_per_page: Optional[int] = None,
|
26
24
|
show_progress: bool = True,
|
27
25
|
**kwargs,
|
28
|
-
) -> MatchResults:
|
26
|
+
) -> "MatchResults":
|
29
27
|
"""
|
30
28
|
Find regions visually similar to the given example(s).
|
31
29
|
|
@@ -35,15 +33,19 @@ class VisualSearchMixin:
|
|
35
33
|
confidence: Minimum similarity score (0-1)
|
36
34
|
sizes: Size variations to search. Can be:
|
37
35
|
- float: ±percentage (e.g., 0.2 = 80%-120%)
|
38
|
-
- tuple(min, max): search range with smart logarithmic steps (default: (0.8, 1.
|
36
|
+
- tuple(min, max): search range with smart logarithmic steps (default: (0.8, 1.2))
|
39
37
|
- tuple(min, max, step): explicit step size
|
40
38
|
- list: exact sizes to try (e.g., [0.8, 1.0, 1.2])
|
41
39
|
resolution: Resolution for image comparison (DPI) (default: 72)
|
42
|
-
hash_size: Size of perceptual hash grid (default:
|
43
|
-
|
40
|
+
hash_size: Size of perceptual hash grid (default: 20)
|
41
|
+
step: Step size in pixels for sliding window
|
42
|
+
method: Matching algorithm - "phash" (default) or "template"
|
44
43
|
max_per_page: Maximum matches to return per page
|
45
44
|
show_progress: Show progress bar for multi-page searches (default: True)
|
46
|
-
**kwargs: Additional options
|
45
|
+
**kwargs: Additional options including:
|
46
|
+
mask_threshold: For both template and phash methods, pixels >= this value are masked.
|
47
|
+
For template matching: pixels are ignored in matching (e.g., 0.95)
|
48
|
+
For phash: pixels are replaced with median before hashing (e.g., 0.95)
|
47
49
|
|
48
50
|
Returns:
|
49
51
|
MatchResults collection
|
@@ -55,15 +57,25 @@ class VisualSearchMixin:
|
|
55
57
|
if not isinstance(examples, list):
|
56
58
|
examples = [examples]
|
57
59
|
|
60
|
+
from .similarity import VisualMatcher, compute_phash
|
61
|
+
|
58
62
|
# Initialize matcher with specified hash size
|
59
63
|
matcher = VisualMatcher(hash_size=hash_size)
|
60
64
|
|
61
65
|
# Prepare templates
|
62
66
|
templates = []
|
67
|
+
# Extract mask_threshold from kwargs for phash
|
68
|
+
mask_threshold = kwargs.get("mask_threshold")
|
69
|
+
mask_threshold_255 = (
|
70
|
+
int(mask_threshold * 255) if mask_threshold is not None and method == "phash" else None
|
71
|
+
)
|
72
|
+
|
63
73
|
for example in examples:
|
64
74
|
# Render the example region/element
|
65
75
|
example_image = example.render(resolution=resolution, crop=True)
|
66
|
-
template_hash = compute_phash(
|
76
|
+
template_hash = compute_phash(
|
77
|
+
example_image, hash_size=hash_size, mask_threshold=mask_threshold_255
|
78
|
+
)
|
67
79
|
templates.append({"image": example_image, "hash": template_hash, "source": example})
|
68
80
|
|
69
81
|
# Get pages to search based on the object type
|
@@ -76,6 +88,8 @@ class VisualSearchMixin:
|
|
76
88
|
pages_to_search = self.pages
|
77
89
|
elif hasattr(self, "number"): # Single page
|
78
90
|
pages_to_search = [self]
|
91
|
+
elif hasattr(self, "page") and hasattr(self, "bbox"): # Region
|
92
|
+
pages_to_search = [self]
|
79
93
|
else:
|
80
94
|
raise TypeError(f"Cannot search in {type(self)}")
|
81
95
|
|
@@ -86,10 +100,16 @@ class VisualSearchMixin:
|
|
86
100
|
scales = matcher._get_search_scales(sizes)
|
87
101
|
|
88
102
|
# Pre-calculate for all pages and templates
|
89
|
-
for
|
90
|
-
# Estimate
|
91
|
-
|
92
|
-
|
103
|
+
for search_obj in pages_to_search:
|
104
|
+
# Estimate image size based on object type
|
105
|
+
if hasattr(search_obj, "page") and hasattr(search_obj, "bbox"):
|
106
|
+
# Region
|
107
|
+
page_w = int(search_obj.width * resolution / 72.0)
|
108
|
+
page_h = int(search_obj.height * resolution / 72.0)
|
109
|
+
else:
|
110
|
+
# Page
|
111
|
+
page_w = int(search_obj.width * resolution / 72.0)
|
112
|
+
page_h = int(search_obj.height * resolution / 72.0)
|
93
113
|
|
94
114
|
for template_data in templates:
|
95
115
|
template_w, template_h = template_data["image"].size
|
@@ -99,11 +119,15 @@ class VisualSearchMixin:
|
|
99
119
|
scaled_h = int(template_h * scale)
|
100
120
|
|
101
121
|
if scaled_w <= page_w and scaled_h <= page_h:
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
122
|
+
# Determine step size
|
123
|
+
if step is not None:
|
124
|
+
actual_step = step
|
125
|
+
else:
|
126
|
+
# Default to 10% of template size
|
127
|
+
actual_step = max(1, int(min(scaled_w, scaled_h) * 0.1))
|
128
|
+
|
129
|
+
x_windows = len(range(0, page_w - scaled_w + 1, actual_step))
|
130
|
+
y_windows = len(range(0, page_h - scaled_h + 1, actual_step))
|
107
131
|
total_operations += x_windows * y_windows
|
108
132
|
|
109
133
|
# Search each page
|
@@ -124,9 +148,20 @@ class VisualSearchMixin:
|
|
124
148
|
mininterval=0.1, # Minimum time between updates (seconds)
|
125
149
|
)
|
126
150
|
|
127
|
-
for page_idx,
|
128
|
-
#
|
129
|
-
|
151
|
+
for page_idx, search_obj in enumerate(pages_to_search):
|
152
|
+
# Determine if we're searching in a page or a region
|
153
|
+
if hasattr(search_obj, "page") and hasattr(search_obj, "bbox"):
|
154
|
+
# This is a Region - render only the region area
|
155
|
+
region = search_obj
|
156
|
+
page = region.page
|
157
|
+
page_image = region.render(resolution=resolution, crop=True)
|
158
|
+
# Region offset for coordinate conversion
|
159
|
+
region_x0, region_y0 = region.x0, region.top
|
160
|
+
else:
|
161
|
+
# This is a Page - render the full page
|
162
|
+
page = search_obj
|
163
|
+
page_image = page.render(resolution=resolution)
|
164
|
+
region_x0, region_y0 = 0, 0
|
130
165
|
|
131
166
|
# Convert page coordinates to image coordinates
|
132
167
|
scale = resolution / 72.0 # PDF is 72 DPI
|
@@ -168,7 +203,8 @@ class VisualSearchMixin:
|
|
168
203
|
template_hash=template_hash,
|
169
204
|
confidence_threshold=confidence,
|
170
205
|
sizes=sizes,
|
171
|
-
|
206
|
+
step=step,
|
207
|
+
method=method,
|
172
208
|
show_progress=False, # We handle progress ourselves
|
173
209
|
progress_callback=update_progress if progress_bar else None,
|
174
210
|
**kwargs,
|
@@ -180,10 +216,12 @@ class VisualSearchMixin:
|
|
180
216
|
|
181
217
|
# Convert from image pixels to PDF points
|
182
218
|
# No flipping needed! PDF coordinates map directly to PIL coordinates
|
183
|
-
pdf_x0 = img_x0 / scale
|
184
|
-
pdf_y0 = img_y0 / scale
|
185
|
-
pdf_x1 = img_x1 / scale
|
186
|
-
pdf_y1 = img_y1 / scale
|
219
|
+
pdf_x0 = img_x0 / scale + region_x0
|
220
|
+
pdf_y0 = img_y0 / scale + region_y0
|
221
|
+
pdf_x1 = img_x1 / scale + region_x0
|
222
|
+
pdf_y1 = img_y1 / scale + region_y0
|
223
|
+
|
224
|
+
from .results import Match
|
187
225
|
|
188
226
|
# Create Match object
|
189
227
|
match = Match(
|
@@ -206,4 +244,6 @@ class VisualSearchMixin:
|
|
206
244
|
if progress_bar:
|
207
245
|
progress_bar.close()
|
208
246
|
|
247
|
+
from .results import MatchResults
|
248
|
+
|
209
249
|
return MatchResults(all_matches)
|
natural_pdf/vision/results.py
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
|
3
3
|
from typing import TYPE_CHECKING, Any, Iterator, List, Optional, Tuple
|
4
4
|
|
5
|
-
# Import Region directly as it's a base class
|
6
5
|
from natural_pdf.elements.region import Region
|
7
6
|
|
8
7
|
if TYPE_CHECKING:
|
@@ -39,16 +38,41 @@ class Match(Region):
|
|
39
38
|
|
40
39
|
|
41
40
|
class MatchResults:
|
42
|
-
"""
|
41
|
+
"""
|
42
|
+
Collection of Match objects with transformation methods.
|
43
|
+
|
44
|
+
Matches are automatically sorted by confidence (highest first), so:
|
45
|
+
- matches[0] is the best match
|
46
|
+
- Iteration yields matches from best to worst
|
47
|
+
- The .top(n) method returns the n best matches
|
48
|
+
|
49
|
+
Example:
|
50
|
+
>>> matches = page.find_similar(logo_region)
|
51
|
+
>>> print(f"Found {len(matches)} matches")
|
52
|
+
>>>
|
53
|
+
>>> # Best match
|
54
|
+
>>> best = matches[0]
|
55
|
+
>>> print(f"Best match confidence: {best.confidence:.3f}")
|
56
|
+
>>>
|
57
|
+
>>> # Top 5 matches
|
58
|
+
>>> for match in matches.top(5):
|
59
|
+
... print(f"Confidence: {match.confidence:.3f} at page {match.page.number}")
|
60
|
+
>>>
|
61
|
+
>>> # All matches above 90% confidence
|
62
|
+
>>> high_conf = matches.filter_by_confidence(0.9)
|
63
|
+
"""
|
43
64
|
|
44
65
|
def __init__(self, matches: List[Match]):
|
45
|
-
"""Initialize with list of Match objects"""
|
66
|
+
"""Initialize with list of Match objects, automatically sorted by confidence"""
|
46
67
|
# Import here to avoid circular import
|
47
68
|
from natural_pdf.elements.element_collection import ElementCollection
|
48
69
|
|
70
|
+
# Sort matches by confidence (highest first)
|
71
|
+
sorted_matches = sorted(matches, key=lambda m: m.confidence, reverse=True)
|
72
|
+
|
49
73
|
# Create a base ElementCollection
|
50
|
-
self._collection = ElementCollection(
|
51
|
-
self._matches =
|
74
|
+
self._collection = ElementCollection(sorted_matches)
|
75
|
+
self._matches = sorted_matches
|
52
76
|
|
53
77
|
def __len__(self):
|
54
78
|
return len(self._matches)
|
@@ -68,6 +92,26 @@ class MatchResults:
|
|
68
92
|
"""Filter matches by minimum confidence"""
|
69
93
|
return self.filter(lambda m: m.confidence >= min_confidence)
|
70
94
|
|
95
|
+
def top(self, n: int) -> "MatchResults":
|
96
|
+
"""
|
97
|
+
Get the top N matches with highest confidence.
|
98
|
+
|
99
|
+
Args:
|
100
|
+
n: Number of top matches to return
|
101
|
+
|
102
|
+
Returns:
|
103
|
+
New MatchResults with only the top N matches
|
104
|
+
|
105
|
+
Example:
|
106
|
+
>>> matches = page.find_similar(logo)
|
107
|
+
>>> best_5 = matches.top(5)
|
108
|
+
>>> for match in best_5:
|
109
|
+
... print(f"Confidence: {match.confidence:.3f}")
|
110
|
+
"""
|
111
|
+
# Since matches are already sorted by confidence, just take first n
|
112
|
+
top_matches = self._matches[:n]
|
113
|
+
return MatchResults(top_matches)
|
114
|
+
|
71
115
|
def pages(self):
|
72
116
|
"""Get unique pages containing matches"""
|
73
117
|
# Import here to avoid circular import
|