natural-pdf 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,146 @@
1
+ """Match results for visual similarity search"""
2
+
3
+ from typing import TYPE_CHECKING, Any, Iterator, List, Optional, Tuple
4
+
5
+ # Import Region directly as it's a base class
6
+ from natural_pdf.elements.region import Region
7
+
8
+ if TYPE_CHECKING:
9
+ from natural_pdf.core.page_collection import PageCollection
10
+ from natural_pdf.elements.element_collection import ElementCollection
11
+
12
+
13
+ class Match(Region):
14
+ """A region that was found via visual similarity search"""
15
+
16
+ def __init__(self, page, bbox, confidence, source_example=None, metadata=None):
17
+ """
18
+ Initialize a Match object.
19
+
20
+ Args:
21
+ page: Page containing the match
22
+ bbox: Bounding box of the match
23
+ confidence: Similarity confidence (0-1)
24
+ source_example: The example/template that led to this match
25
+ metadata: Additional metadata about the match
26
+ """
27
+ super().__init__(page, bbox)
28
+ self.confidence = confidence
29
+ self.source_example = source_example
30
+ self.metadata = metadata or {}
31
+
32
+ @property
33
+ def pdf(self):
34
+ """Get the PDF containing this match"""
35
+ return self.page.pdf
36
+
37
+ def __repr__(self):
38
+ return f"<Match page={self.page.number} confidence={self.confidence:.2f} bbox={self.bbox}>"
39
+
40
+
41
+ class MatchResults:
42
+ """Collection of Match objects with transformation methods"""
43
+
44
+ def __init__(self, matches: List[Match]):
45
+ """Initialize with list of Match objects"""
46
+ # Import here to avoid circular import
47
+ from natural_pdf.elements.element_collection import ElementCollection
48
+
49
+ # Create a base ElementCollection
50
+ self._collection = ElementCollection(matches)
51
+ self._matches = matches
52
+
53
+ def __len__(self):
54
+ return len(self._matches)
55
+
56
+ def __iter__(self):
57
+ return iter(self._matches)
58
+
59
+ def __getitem__(self, key):
60
+ return self._matches[key]
61
+
62
+ def filter(self, filter_func) -> "MatchResults":
63
+ """Filter matches by a function"""
64
+ filtered = [m for m in self if filter_func(m)]
65
+ return MatchResults(filtered)
66
+
67
+ def filter_by_confidence(self, min_confidence: float) -> "MatchResults":
68
+ """Filter matches by minimum confidence"""
69
+ return self.filter(lambda m: m.confidence >= min_confidence)
70
+
71
+ def pages(self):
72
+ """Get unique pages containing matches"""
73
+ # Import here to avoid circular import
74
+ from natural_pdf.core.page_collection import PageCollection
75
+
76
+ # Get unique pages while preserving order
77
+ seen = set()
78
+ unique_pages = []
79
+ for match in self:
80
+ if match.page not in seen:
81
+ seen.add(match.page)
82
+ unique_pages.append(match.page)
83
+
84
+ # Attach matches to each page
85
+ for page in unique_pages:
86
+ page._matches = MatchResults([m for m in self if m.page == page])
87
+
88
+ return PageCollection(unique_pages)
89
+
90
+ def pdfs(self):
91
+ """Get unique PDFs containing matches"""
92
+ # Import here to avoid circular import
93
+ from natural_pdf.core.pdf_collection import PDFCollection
94
+
95
+ # Get unique PDFs while preserving order
96
+ seen = set()
97
+ unique_pdfs = []
98
+ for match in self:
99
+ if match.pdf not in seen:
100
+ seen.add(match.pdf)
101
+ unique_pdfs.append(match.pdf)
102
+
103
+ # Attach matches to each PDF
104
+ for pdf in unique_pdfs:
105
+ pdf._matches = MatchResults([m for m in self if m.pdf == pdf])
106
+
107
+ return PDFCollection(unique_pdfs)
108
+
109
+ def group_by_page(self) -> Iterator[Tuple[Any, "MatchResults"]]:
110
+ """Group matches by page"""
111
+ from itertools import groupby
112
+
113
+ # Sort by PDF filename and page number
114
+ sorted_matches = sorted(self, key=lambda m: (getattr(m.pdf, "filename", ""), m.page.number))
115
+
116
+ for page, matches in groupby(sorted_matches, key=lambda m: m.page):
117
+ yield page, MatchResults(list(matches))
118
+
119
+ def sort_by_confidence(self, descending: bool = True) -> "MatchResults":
120
+ """Sort matches by confidence score"""
121
+ sorted_matches = sorted(self, key=lambda m: m.confidence, reverse=descending)
122
+ return MatchResults(sorted_matches)
123
+
124
+ def regions(self):
125
+ """Get all matches as an ElementCollection of regions"""
126
+ # Import here to avoid circular import
127
+ from natural_pdf.elements.element_collection import ElementCollection
128
+
129
+ # Matches are already Region objects, so just wrap them
130
+ return ElementCollection(list(self))
131
+
132
+ def show(self, **kwargs):
133
+ """Show all matches using ElementCollection.show()"""
134
+ # Get regions and show them
135
+ return self.regions().show(**kwargs)
136
+
137
+ def __repr__(self):
138
+ if len(self) == 0:
139
+ return "<MatchResults: empty>"
140
+ elif len(self) == 1:
141
+ return f"<MatchResults: 1 match>"
142
+ else:
143
+ conf_range = (
144
+ f"{min(m.confidence for m in self):.2f}-{max(m.confidence for m in self):.2f}"
145
+ )
146
+ return f"<MatchResults: {len(self)} matches, confidence {conf_range}>"
@@ -0,0 +1,321 @@
1
+ """Visual similarity matching using perceptual hashing"""
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Callable, List, Optional, Tuple, Union
5
+
6
+ import numpy as np
7
+ from PIL import Image
8
+ from tqdm.auto import tqdm
9
+
10
+
11
+ @dataclass
12
+ class MatchCandidate:
13
+ """Candidate match during sliding window search"""
14
+
15
+ bbox: Tuple[float, float, float, float]
16
+ hash_value: int
17
+ confidence: float
18
+
19
+
20
+ def compute_phash(image: Image.Image, hash_size: int = 8, blur_radius: float = 0) -> int:
21
+ """
22
+ Compute perceptual hash of an image using DCT.
23
+
24
+ Args:
25
+ image: PIL Image to hash
26
+ hash_size: Size of the hash (8 = 64 bit hash)
27
+ blur_radius: Optional blur to apply before hashing (makes more tolerant)
28
+
29
+ Returns:
30
+ Integer hash value
31
+ """
32
+ # Convert to grayscale
33
+ if image.mode != "L":
34
+ image = image.convert("L")
35
+
36
+ # Optional blur to reduce sensitivity to minor variations
37
+ if blur_radius > 0:
38
+ from PIL import ImageFilter
39
+
40
+ image = image.filter(ImageFilter.GaussianBlur(radius=blur_radius))
41
+
42
+ # Resize to 32x32 (4x the hash size for DCT)
43
+ highfreq_factor = 4
44
+ img_size = hash_size * highfreq_factor
45
+ image = image.resize((img_size, img_size), Image.Resampling.LANCZOS)
46
+
47
+ # Convert to numpy array
48
+ pixels = np.array(image, dtype=np.float32)
49
+
50
+ # Apply DCT
51
+ from scipy.fftpack import dct
52
+
53
+ dct_coef = dct(dct(pixels, axis=0), axis=1)
54
+
55
+ # Keep top-left 8x8 (low frequencies)
56
+ dct_low = dct_coef[:hash_size, :hash_size]
57
+
58
+ # Compute median excluding the DC component
59
+ dct_low_no_dc = dct_low.flatten()[1:] # Skip first element (DC)
60
+ median = np.median(dct_low_no_dc)
61
+
62
+ # Create binary hash
63
+ diff = dct_low.flatten() > median
64
+
65
+ # Convert to integer
66
+ return sum(2**i for i, v in enumerate(diff) if v)
67
+
68
+
69
+ def hamming_distance(hash1: int, hash2: int, hash_size: int = 64) -> int:
70
+ """Calculate Hamming distance between two hashes"""
71
+ # XOR and count set bits
72
+ xor = hash1 ^ hash2
73
+ return bin(xor).count("1")
74
+
75
+
76
+ def hash_similarity(hash1: int, hash2: int, hash_size: int = 64) -> float:
77
+ """Calculate similarity score between two hashes (0-1)"""
78
+ distance = hamming_distance(hash1, hash2, hash_size)
79
+ return 1.0 - (distance / hash_size)
80
+
81
+
82
+ class VisualMatcher:
83
+ """Handles visual similarity matching using perceptual hashing"""
84
+
85
+ def __init__(self, hash_size: int = 12):
86
+ self.hash_size = hash_size
87
+ self.hash_bits = hash_size * hash_size
88
+ self._cache = {}
89
+
90
+ def _get_search_scales(self, sizes: Optional[Union[float, Tuple, List]]) -> List[float]:
91
+ """
92
+ Convert various size input formats to a list of scales to search.
93
+
94
+ Args:
95
+ sizes: Can be:
96
+ - None: just 1.0
97
+ - float: ±percentage (e.g., 0.2 = 80%-120%)
98
+ - tuple(min, max): range with smart logarithmic steps
99
+ - tuple(min, max, step): explicit step size
100
+ - list: exact sizes to use
101
+
102
+ Returns:
103
+ List of scale factors to search
104
+ """
105
+ if sizes is None:
106
+ return [1.0]
107
+
108
+ # List of exact sizes
109
+ if isinstance(sizes, list):
110
+ return sorted(sizes)
111
+
112
+ # Single float: ±percentage
113
+ if isinstance(sizes, (int, float)):
114
+ if sizes <= 0:
115
+ return [1.0]
116
+ # Convert to min/max range
117
+ min_scale = max(0.1, 1.0 - sizes)
118
+ max_scale = 1.0 + sizes
119
+ # Use tuple logic below
120
+ sizes = (min_scale, max_scale)
121
+
122
+ # Tuple handling
123
+ if isinstance(sizes, tuple):
124
+ if len(sizes) == 2:
125
+ min_scale, max_scale = sizes
126
+ if min_scale >= max_scale:
127
+ return [min_scale]
128
+
129
+ # Smart defaults with logarithmic spacing
130
+ # Calculate range ratio to determine number of steps
131
+ ratio = max_scale / min_scale
132
+
133
+ if ratio <= 1.5: # Small range (e.g., 0.8-1.2)
134
+ num_steps = 5
135
+ elif ratio <= 3.0: # Medium range (e.g., 0.5-1.5)
136
+ num_steps = 7
137
+ else: # Large range (e.g., 0.5-2.0)
138
+ num_steps = 9
139
+
140
+ # Generate logarithmically spaced scales
141
+ log_min = np.log(min_scale)
142
+ log_max = np.log(max_scale)
143
+ log_scales = np.linspace(log_min, log_max, num_steps)
144
+ scales = np.exp(log_scales).tolist()
145
+
146
+ # Ensure 1.0 is included if in range
147
+ if min_scale <= 1.0 <= max_scale and 1.0 not in scales:
148
+ # Find closest scale and replace with 1.0
149
+ closest_idx = np.argmin([abs(s - 1.0) for s in scales])
150
+ scales[closest_idx] = 1.0
151
+
152
+ return scales
153
+
154
+ elif len(sizes) == 3:
155
+ # Explicit (min, max, step)
156
+ min_scale, max_scale, step = sizes
157
+ scales = []
158
+ current = min_scale
159
+ while current <= max_scale:
160
+ scales.append(current)
161
+ current += step
162
+ # Ensure max is included if close
163
+ if scales[-1] < max_scale and (max_scale - scales[-1]) < step * 0.1:
164
+ scales[-1] = max_scale
165
+ return scales
166
+
167
+ raise ValueError(f"Invalid sizes format: {sizes}")
168
+
169
+ def find_matches_in_image(
170
+ self,
171
+ template: Image.Image,
172
+ target: Image.Image,
173
+ template_hash: Optional[int] = None,
174
+ confidence_threshold: float = 0.6,
175
+ step_factor: float = 0.1,
176
+ sizes: Optional[Union[float, Tuple, List]] = None,
177
+ show_progress: bool = True,
178
+ progress_callback: Optional[Callable[[], None]] = None,
179
+ ) -> List[MatchCandidate]:
180
+ """
181
+ Find all matches of template in target image using sliding window.
182
+
183
+ Args:
184
+ template: Template image to search for
185
+ target: Target image to search in
186
+ template_hash: Pre-computed hash of template (optional)
187
+ confidence_threshold: Minimum similarity score (0-1)
188
+ step_factor: Step size as fraction of template size
189
+ sizes: Size variations to search. Can be:
190
+ - float: ±percentage (e.g., 0.2 = 80%-120%)
191
+ - tuple(min, max): search range with smart logarithmic steps
192
+ - tuple(min, max, step): explicit step size
193
+ - list: exact sizes to try (e.g., [0.8, 1.0, 1.2])
194
+ show_progress: Show progress bar for sliding window search
195
+ progress_callback: Optional callback function to call for each window checked
196
+
197
+ Returns:
198
+ List of MatchCandidate objects
199
+ """
200
+ matches = []
201
+
202
+ # Compute template hash if not provided
203
+ if template_hash is None:
204
+ template_hash = compute_phash(template, self.hash_size)
205
+
206
+ template_w, template_h = template.size
207
+ target_w, target_h = target.size
208
+
209
+ # Determine scales to search
210
+ scales = self._get_search_scales(sizes)
211
+
212
+ # Calculate total iterations for progress bar
213
+ total_iterations = 0
214
+ if show_progress and not progress_callback:
215
+ for scale in scales:
216
+ scaled_w = int(template_w * scale)
217
+ scaled_h = int(template_h * scale)
218
+ if scaled_w <= target_w and scaled_h <= target_h:
219
+ step_x = max(1, int(scaled_w * step_factor))
220
+ step_y = max(1, int(scaled_h * step_factor))
221
+ x_steps = len(range(0, target_w - scaled_w + 1, step_x))
222
+ y_steps = len(range(0, target_h - scaled_h + 1, step_y))
223
+ total_iterations += x_steps * y_steps
224
+
225
+ # Setup progress bar if needed (only if no callback provided)
226
+ progress_bar = None
227
+ if show_progress and not progress_callback and total_iterations > 0:
228
+ progress_bar = tqdm(total=total_iterations, desc="Scanning", unit="window", leave=False)
229
+
230
+ # Search at each scale
231
+ for scale in scales:
232
+ # Scale template size
233
+ scaled_w = int(template_w * scale)
234
+ scaled_h = int(template_h * scale)
235
+
236
+ if scaled_w > target_w or scaled_h > target_h:
237
+ continue
238
+
239
+ # Calculate step size
240
+ step_x = max(1, int(scaled_w * step_factor))
241
+ step_y = max(1, int(scaled_h * step_factor))
242
+
243
+ # Sliding window search
244
+ for y in range(0, target_h - scaled_h + 1, step_y):
245
+ for x in range(0, target_w - scaled_w + 1, step_x):
246
+ # Extract window
247
+ window = target.crop((x, y, x + scaled_w, y + scaled_h))
248
+
249
+ # Resize to template size if scaled
250
+ if scale != 1.0:
251
+ window = window.resize((template_w, template_h), Image.Resampling.LANCZOS)
252
+
253
+ # Compute hash and similarity
254
+ window_hash = compute_phash(window, self.hash_size)
255
+ similarity = hash_similarity(template_hash, window_hash, self.hash_bits)
256
+
257
+ if similarity >= confidence_threshold:
258
+ # Convert back to target image coordinates
259
+ bbox = (x, y, x + scaled_w, y + scaled_h)
260
+ matches.append(MatchCandidate(bbox, window_hash, similarity))
261
+
262
+ # Update progress
263
+ if progress_bar:
264
+ progress_bar.update(1)
265
+ elif progress_callback:
266
+ progress_callback()
267
+
268
+ # Close progress bar
269
+ if progress_bar:
270
+ progress_bar.close()
271
+
272
+ # Remove overlapping matches (keep highest confidence)
273
+ return self._filter_overlapping_matches(matches)
274
+
275
+ def _filter_overlapping_matches(
276
+ self, matches: List[MatchCandidate], overlap_threshold: float = 0.5
277
+ ) -> List[MatchCandidate]:
278
+ """Remove overlapping matches, keeping the highest confidence ones"""
279
+ if not matches:
280
+ return matches
281
+
282
+ # Sort by confidence (highest first)
283
+ sorted_matches = sorted(matches, key=lambda m: m.confidence, reverse=True)
284
+ filtered = []
285
+
286
+ for candidate in sorted_matches:
287
+ # Check if this overlaps significantly with any already selected match
288
+ keep = True
289
+ for selected in filtered:
290
+ overlap = self._calculate_overlap(candidate.bbox, selected.bbox)
291
+ if overlap > overlap_threshold:
292
+ keep = False
293
+ break
294
+
295
+ if keep:
296
+ filtered.append(candidate)
297
+
298
+ return filtered
299
+
300
+ def _calculate_overlap(self, bbox1: Tuple, bbox2: Tuple) -> float:
301
+ """Calculate intersection over union (IoU) for two bboxes"""
302
+ x1_min, y1_min, x1_max, y1_max = bbox1
303
+ x2_min, y2_min, x2_max, y2_max = bbox2
304
+
305
+ # Calculate intersection
306
+ intersect_xmin = max(x1_min, x2_min)
307
+ intersect_ymin = max(y1_min, y2_min)
308
+ intersect_xmax = min(x1_max, x2_max)
309
+ intersect_ymax = min(y1_max, y2_max)
310
+
311
+ if intersect_xmax < intersect_xmin or intersect_ymax < intersect_ymin:
312
+ return 0.0
313
+
314
+ intersect_area = (intersect_xmax - intersect_xmin) * (intersect_ymax - intersect_ymin)
315
+
316
+ # Calculate union
317
+ area1 = (x1_max - x1_min) * (y1_max - y1_min)
318
+ area2 = (x2_max - x2_min) * (y2_max - y2_min)
319
+ union_area = area1 + area2 - intersect_area
320
+
321
+ return intersect_area / union_area if union_area > 0 else 0.0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: natural-pdf
3
- Version: 0.2.3
3
+ Version: 0.2.5
4
4
  Summary: A more intuitive interface for working with PDFs
5
5
  Author-email: Jonathan Soma <jonathan.soma@gmail.com>
6
6
  License-Expression: MIT
@@ -2,7 +2,7 @@ natural_pdf/__init__.py,sha256=N4pR0LbuPEnUYFZqbdVqc_FGKldgwPQc1wjJhYKTBBM,3417
2
2
  natural_pdf/cli.py,sha256=SkPwhhMM-GhLsj3O1n1Agxz4KOxcZ08sj8hVQSFJB5c,4064
3
3
  natural_pdf/text_mixin.py,sha256=eFCiHj6Okcw3aum4955BepcI2NPRalkf9UFFVTc_H30,4012
4
4
  natural_pdf/analyzers/__init__.py,sha256=3XGoNq3OgiVkZP7tOdeP5XVUl7fDgyztdA8DlOcMLXg,1138
5
- natural_pdf/analyzers/guides.py,sha256=9FUbxk4XBOyktXgq9q5-bB949JFrzT1kBPikg2ENoIw,150032
5
+ natural_pdf/analyzers/guides.py,sha256=mLWPPEwywo_FbU3gSoegiRlzxYmkHEo2c4DLX9krH9k,157691
6
6
  natural_pdf/analyzers/shape_detection_mixin.py,sha256=mgpyJ4jIulz9l9HCqThabJIsLSrXh9BB2AmLxUoHmw0,62584
7
7
  natural_pdf/analyzers/text_options.py,sha256=qEkDaYWla0rIM_gszEOsu52q7C_dAfV81P2HLJZM2sw,3333
8
8
  natural_pdf/analyzers/text_structure.py,sha256=3WWusi-BI0krUnJxB05DD6XmKj5qRNvQBqH7zOQGm1M,28451
@@ -25,26 +25,26 @@ natural_pdf/classification/mixin.py,sha256=CXygXXhe_qx1563SmIjiu4uSnZkxCkuRR4fGv
25
25
  natural_pdf/classification/results.py,sha256=5ha77CxK0GYwkBMJbvUBZkBjsL5GpOveIZDK9nO4j8I,3239
26
26
  natural_pdf/collections/mixins.py,sha256=Se2C5AcpP9B5E0d0pIrey6-f_P32tAXTK4M7666MNj0,5688
27
27
  natural_pdf/core/__init__.py,sha256=QC8H4M3KbXwMFiQORZ0pdPlzx1Ix6oKKQSS7Ib2KEaA,38
28
- natural_pdf/core/element_manager.py,sha256=DRZvntd99wjXy6KeDjCq5uRhjMftZop9QklOZqlUH8M,55349
28
+ natural_pdf/core/element_manager.py,sha256=KPuKM7SstfErTkRnGq4vrgE0Tv8iazN13Jp7yAXGKso,55575
29
29
  natural_pdf/core/highlighting_service.py,sha256=7on8nErhi50CEH2L4XzGIZ6tIqZtMzmmFlp-2lmwnYE,68856
30
- natural_pdf/core/page.py,sha256=4-il2WPMVX4hNSgQ5P6yLc1-3jXfi73WCrpF9912ct4,142472
30
+ natural_pdf/core/page.py,sha256=Q3hBvB9KFB8doeXY7YVQt3G1ULdBDfA-0BQD6YPN4oo,144640
31
31
  natural_pdf/core/page_collection.py,sha256=hEeXs_fzB73XZ8ZkHz2kIuSgBYcVYydvGMMdGuB1rvw,52486
32
32
  natural_pdf/core/page_groupby.py,sha256=550ME6kd-h-2u75oUIIIqTYsmh8VvdQO1nXXioL8J6A,7378
33
- natural_pdf/core/pdf.py,sha256=q54DyhXwAS_zAmsBd3PsCezu1wyQOYmGmB3iKfP8gAM,101884
34
- natural_pdf/core/pdf_collection.py,sha256=8tM0qVWS1L5Hwv5cXuZ2X8znAYOjKmlERX62bksDlJU,30144
35
- natural_pdf/core/render_spec.py,sha256=3GTfnlv8JKzePrruLq_dNr3HFeWMVcZT2fwWmJN44NI,14456
33
+ natural_pdf/core/pdf.py,sha256=VslSn00So6157XfiYbrB9URpx5VlWyshQOt7upi9us4,104248
34
+ natural_pdf/core/pdf_collection.py,sha256=s3ogu4CEHrHMTRqQMJUKJZ-9Ii8b_B9dWbVLTFj0s7g,34992
35
+ natural_pdf/core/render_spec.py,sha256=rLicaS9EPyojpJcjy2Lzn5DLWQwjrFyDJyRo7jbjdGU,14505
36
36
  natural_pdf/describe/__init__.py,sha256=kIV7ORmWWB1SAur7nK2aAwR-wHqSedhKfUsaUl4hG0A,586
37
37
  natural_pdf/describe/base.py,sha256=Of9WVo9XuShXoeyJr0RN2CpLhF_CeiOjazl-or53RKU,18173
38
38
  natural_pdf/describe/elements.py,sha256=JicXC9SJmmasqxalpCXA47-kVwv-6JnR3Xiu778aNHM,12634
39
39
  natural_pdf/describe/mixin.py,sha256=rkX14aGrSz7Jvxx8Rbxv3eSfbO-_29DipwpstrV2pDQ,3109
40
40
  natural_pdf/describe/summary.py,sha256=cfT4ZQkeatCDAOwWPwhtEVXisNgk6E57fAXAnoRysSU,7645
41
41
  natural_pdf/elements/__init__.py,sha256=ICNikmLeIEuSYypz-KnkBn8xR1hR7rge4hsa1KLkyWY,42
42
- natural_pdf/elements/base.py,sha256=xXdNV1_gt4T_V_4m6qJDieWiysvJxUBhSEEAJzMOzqo,55094
42
+ natural_pdf/elements/base.py,sha256=aj-eXOQQlhKv9lYeUlUs9aKNcUebtG_dqxURZHZVZ58,55509
43
43
  natural_pdf/elements/element_collection.py,sha256=slCUnOT04sNOTjSGgmhjcCKKPVPtdDPwU7PX1ebzGMw,101342
44
44
  natural_pdf/elements/image.py,sha256=zu-P2Y8fRoEXf6IeZU0EYRWsgZ6I_a5vy1FA3VXTGkQ,1424
45
45
  natural_pdf/elements/line.py,sha256=TFn7KXjPT_jUQyQyabU0F7XYU4dC-qadwodJMZF4DCU,3844
46
46
  natural_pdf/elements/rect.py,sha256=0lNkVkPkvbRbrFED856RXoUcTcDkeeOIs5xldKGAQT8,3324
47
- natural_pdf/elements/region.py,sha256=Onok5VzmF1CvMCa3UGLUszCuhL-CCGk_IgtSUDva-Cc,155314
47
+ natural_pdf/elements/region.py,sha256=_NNBewHlyUHvA4g9kApilP6it0cn2IRlcGG4r993oUI,156660
48
48
  natural_pdf/elements/text.py,sha256=829uSJv9E-8cC6T6iR_Va7Xtv54pJoyRN78fq4NN1d4,20687
49
49
  natural_pdf/export/mixin.py,sha256=L1q3MIEFWuvie4j4_EmW7GT3NerbZ1as0XMUoqTS7gM,5083
50
50
  natural_pdf/exporters/__init__.py,sha256=QffoARekR6WzXEd05oxOytly4qPdBizuIF-SUkeFpig,643
@@ -87,7 +87,7 @@ natural_pdf/search/searchable_mixin.py,sha256=hqQ_AuID5eTGRCtKYdFLZ1zF35y73uk3x1
87
87
  natural_pdf/selectors/__init__.py,sha256=oZGeqSv53EqmIZOhcnawuaGGlRg1h79vArXuZCWKm4A,123
88
88
  natural_pdf/selectors/parser.py,sha256=pw0M8ICKPMOzZPzWpLsQMG_lnl8PewGIdIG3ciukabk,38877
89
89
  natural_pdf/tables/__init__.py,sha256=sCvCGbGsL6BiqlNxAYfVv003bIDLI11FmjHhaWfcU6w,104
90
- natural_pdf/tables/result.py,sha256=1pcelNZvOb6Anlwj08Z1XU-YK1ihlCsLpYMRA3Zc4JM,7242
90
+ natural_pdf/tables/result.py,sha256=-8ctA-jCJYSHtlfAoqTvhUwO5zSP2BQxxetAjqEsNyg,8665
91
91
  natural_pdf/templates/__init__.py,sha256=jYBxzfi73vew0f6yhIh1MlRxw4F_TVN2hKQR0YXOFe0,20
92
92
  natural_pdf/utils/__init__.py,sha256=s3M8FggaK1P3EBYn6R_-HgSDjNc9C73gyKe1hihtNWg,43
93
93
  natural_pdf/utils/bidi_mirror.py,sha256=jJEES0xDrMfo5Me8kHMxHv4COS51PitnYi2EvKv3HCE,1151
@@ -100,9 +100,13 @@ natural_pdf/utils/packaging.py,sha256=TM0jafwS5yVbTGC-RMi4TyWunf9cUUo9h5J6rMzkT-
100
100
  natural_pdf/utils/reading_order.py,sha256=u7XyVZdKMPMK0CL1C7xFogKnZ92b0JKT068KFjQWe18,7437
101
101
  natural_pdf/utils/text_extraction.py,sha256=CCwPTmMoTgtQt2P00X_ADIf6ZGNfxvjCO9FO0_HqG40,13900
102
102
  natural_pdf/utils/visualization.py,sha256=zhZEHgYnZFuX7YxTHXF8Y3D97uHp2beTKMaC-JkCFwk,22364
103
+ natural_pdf/vision/__init__.py,sha256=RymMY-3WLQBlOZ4Dx4MmL9UH6I65hNjkwUJ7ymO5JfM,287
104
+ natural_pdf/vision/mixin.py,sha256=OJwBABr74TWxP5seTKUmGj5zE9mWsBP_UKWU-Pr8V9A,8720
105
+ natural_pdf/vision/results.py,sha256=F2zXG3MVZIpOUvPkJHotOq6-9rFz68BaO_8pnSndlOs,5119
106
+ natural_pdf/vision/similarity.py,sha256=YH8legN-t9uf1b_XULi4JLNDaRfPNKQwU1FZ4Qu08jY,11740
103
107
  natural_pdf/widgets/__init__.py,sha256=QTVaUmsw__FCweFYZebwPssQxxUFUMd0wpm_cUbGZJY,181
104
108
  natural_pdf/widgets/viewer.py,sha256=KW3JogdR2TMg2ECUMYp8hwd060hfg8EsYBWxb5IEzBY,24942
105
- natural_pdf-0.2.3.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
109
+ natural_pdf-0.2.5.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
106
110
  optimization/memory_comparison.py,sha256=0i_foFSRmppj-fY069qjwH36s_zkx-1L2ASAAlepWzA,6541
107
111
  optimization/pdf_analyzer.py,sha256=HjrmTgu2qchxPeDckc5kjgxppGwd40UESrYS9Myj7pY,19352
108
112
  optimization/performance_analysis.py,sha256=JBXnR9hc7Ix7YCnt3EJPSpsyqIUgKsc7GEffQ_TDCBk,13033
@@ -119,8 +123,8 @@ tools/bad_pdf_eval/llm_enrich.py,sha256=mCh4KGi1HmIkzGjj5rrHz1Osd7sEX1IZ_FW08H1t
119
123
  tools/bad_pdf_eval/llm_enrich_with_retry.py,sha256=XUtPF1hUvqd3frDXT0wDTXoonuAivhjM5vgFdZ-tm0A,9373
120
124
  tools/bad_pdf_eval/reporter.py,sha256=e1g__mkSB4q02p3mGWOwMhvFs7F2HJosNBxup0-LkyU,400
121
125
  tools/bad_pdf_eval/utils.py,sha256=hR95XQ7qf7Cu6BdyX0L7ggGVx-ah5sK0jHWblTJUUic,4896
122
- natural_pdf-0.2.3.dist-info/METADATA,sha256=lyx6Cx1xPGhy-p1m0wRfTvv4zSJ4ZJnNo7DeGQZ99yU,6959
123
- natural_pdf-0.2.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
124
- natural_pdf-0.2.3.dist-info/entry_points.txt,sha256=1R_KMv7g60UBBpRqGfw7bppsMNGdayR-iJlb9ohEk_8,81
125
- natural_pdf-0.2.3.dist-info/top_level.txt,sha256=80t0F2ZeX4vN4Ke5iTflcOk_PN_0USn33ha3X6X86Ik,36
126
- natural_pdf-0.2.3.dist-info/RECORD,,
126
+ natural_pdf-0.2.5.dist-info/METADATA,sha256=H9nhjh1zRBmz2vUTe_j6FT-Zvn1sgoWT0nyoZG5GTYg,6959
127
+ natural_pdf-0.2.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
128
+ natural_pdf-0.2.5.dist-info/entry_points.txt,sha256=1R_KMv7g60UBBpRqGfw7bppsMNGdayR-iJlb9ohEk_8,81
129
+ natural_pdf-0.2.5.dist-info/top_level.txt,sha256=80t0F2ZeX4vN4Ke5iTflcOk_PN_0USn33ha3X6X86Ik,36
130
+ natural_pdf-0.2.5.dist-info/RECORD,,