natural-pdf 0.2.16__py3-none-any.whl → 0.2.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,346 @@
1
+ """Centralized utilities for section extraction to avoid code duplication.
2
+
3
+ This module provides the core logic for get_sections() and get_section_between()
4
+ functionality that's used across Page, PDF, Region, and Flow classes.
5
+ """
6
+
7
+ import logging
8
+ from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Union
9
+
10
+ if TYPE_CHECKING:
11
+ from natural_pdf.core.page import Page
12
+ from natural_pdf.elements.base import Element
13
+ from natural_pdf.elements.region import Region
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ def calculate_section_bounds(
19
+ start_element: "Element",
20
+ end_element: "Element",
21
+ include_boundaries: str,
22
+ orientation: str,
23
+ parent_bounds: Tuple[float, float, float, float],
24
+ ) -> Tuple[float, float, float, float]:
25
+ """
26
+ Calculate the bounding box for a section between two elements.
27
+
28
+ This centralizes the logic for determining section boundaries based on
29
+ the include_boundaries parameter and orientation.
30
+
31
+ Args:
32
+ start_element: Element marking the start of the section
33
+ end_element: Element marking the end of the section
34
+ include_boundaries: How to include boundary elements: 'start', 'end', 'both', or 'none'
35
+ orientation: 'vertical' or 'horizontal' - determines section direction
36
+ parent_bounds: The bounding box (x0, top, x1, bottom) of the parent region
37
+
38
+ Returns:
39
+ Tuple of (x0, top, x1, bottom) representing the section bounds
40
+ """
41
+ parent_x0, parent_top, parent_x1, parent_bottom = parent_bounds
42
+
43
+ if orientation == "vertical":
44
+ # Use full width of the parent region for vertical sections
45
+ x0 = parent_x0
46
+ x1 = parent_x1
47
+
48
+ # Determine vertical boundaries based on include_boundaries
49
+ if include_boundaries == "both":
50
+ # Include both boundary elements
51
+ top = start_element.top
52
+ bottom = end_element.bottom
53
+ elif include_boundaries == "start":
54
+ # Include start element, exclude end element
55
+ top = start_element.top
56
+ bottom = end_element.top # Stop at the top of end element
57
+ elif include_boundaries == "end":
58
+ # Exclude start element, include end element
59
+ top = start_element.bottom # Start at the bottom of start element
60
+ bottom = end_element.bottom
61
+ else: # "none"
62
+ # Exclude both boundary elements
63
+ top = start_element.bottom # Start at the bottom of start element
64
+ bottom = end_element.top # Stop at the top of end element
65
+
66
+ else: # horizontal
67
+ # Use full height of the parent region for horizontal sections
68
+ top = parent_top
69
+ bottom = parent_bottom
70
+
71
+ # Determine horizontal boundaries based on include_boundaries
72
+ if include_boundaries == "both":
73
+ # Include both boundary elements
74
+ x0 = start_element.x0
75
+ x1 = end_element.x1
76
+ elif include_boundaries == "start":
77
+ # Include start element, exclude end element
78
+ x0 = start_element.x0
79
+ x1 = end_element.x0 # Stop at the left of end element
80
+ elif include_boundaries == "end":
81
+ # Exclude start element, include end element
82
+ x0 = start_element.x1 # Start at the right of start element
83
+ x1 = end_element.x1
84
+ else: # "none"
85
+ # Exclude both boundary elements
86
+ x0 = start_element.x1 # Start at the right of start element
87
+ x1 = end_element.x0 # Stop at the left of end element
88
+
89
+ return (x0, top, x1, bottom)
90
+
91
+
92
+ def validate_section_bounds(bounds: Tuple[float, float, float, float], orientation: str) -> bool:
93
+ """
94
+ Validate that section bounds are valid (not inverted).
95
+
96
+ Args:
97
+ bounds: The bounding box (x0, top, x1, bottom) to validate
98
+ orientation: 'vertical' or 'horizontal' - determines which dimension to check
99
+
100
+ Returns:
101
+ True if bounds are valid, False otherwise
102
+ """
103
+ x0, top, x1, bottom = bounds
104
+
105
+ if orientation == "vertical":
106
+ if top >= bottom:
107
+ logger.debug(f"Invalid vertical section boundaries: top={top} >= bottom={bottom}")
108
+ return False
109
+ else: # horizontal
110
+ if x0 >= x1:
111
+ logger.debug(f"Invalid horizontal section boundaries: x0={x0} >= x1={x1}")
112
+ return False
113
+
114
+ return True
115
+
116
+
117
+ def pair_boundary_elements(
118
+ start_elements: List["Element"],
119
+ end_elements: Optional[List["Element"]],
120
+ orientation: str = "vertical",
121
+ ) -> List[Tuple["Element", Optional["Element"]]]:
122
+ """
123
+ Pair up start and end boundary elements for section extraction.
124
+
125
+ This implements the logic for matching start elements with their corresponding
126
+ end elements, handling cases where end_elements is None or has different length.
127
+
128
+ Args:
129
+ start_elements: List of elements marking section starts
130
+ end_elements: Optional list of elements marking section ends
131
+ orientation: 'vertical' or 'horizontal' - affects element ordering
132
+
133
+ Returns:
134
+ List of (start_element, end_element) tuples
135
+ """
136
+ if not start_elements:
137
+ return []
138
+
139
+ # Sort elements by position
140
+ if orientation == "vertical":
141
+ start_elements = sorted(start_elements, key=lambda e: (e.top, e.x0))
142
+ if end_elements:
143
+ end_elements = sorted(end_elements, key=lambda e: (e.top, e.x0))
144
+ else:
145
+ start_elements = sorted(start_elements, key=lambda e: (e.x0, e.top))
146
+ if end_elements:
147
+ end_elements = sorted(end_elements, key=lambda e: (e.x0, e.top))
148
+
149
+ pairs = []
150
+
151
+ if not end_elements:
152
+ # No end elements - pair each start with the next start
153
+ for i in range(len(start_elements) - 1):
154
+ pairs.append((start_elements[i], start_elements[i + 1]))
155
+ # Last element has no pair unless we want to go to end of container
156
+ pairs.append((start_elements[-1], None))
157
+ else:
158
+ # Match each start with the next end that comes after it
159
+ used_ends = set()
160
+
161
+ for start in start_elements:
162
+ # Find the first unused end element that comes after this start
163
+ matching_end = None
164
+
165
+ for end in end_elements:
166
+ if end in used_ends:
167
+ continue
168
+
169
+ # Check if end comes after start
170
+ if orientation == "vertical":
171
+ if end.top > start.bottom or (end.top == start.bottom and end.x0 >= start.x0):
172
+ matching_end = end
173
+ break
174
+ else: # horizontal
175
+ if end.x0 > start.x1 or (end.x0 == start.x1 and end.top >= start.top):
176
+ matching_end = end
177
+ break
178
+
179
+ if matching_end:
180
+ pairs.append((start, matching_end))
181
+ used_ends.add(matching_end)
182
+ else:
183
+ # No matching end found
184
+ pairs.append((start, None))
185
+
186
+ return pairs
187
+
188
+
189
+ def process_selector_to_elements(
190
+ selector_or_elements: Union[str, List["Element"], "Element", None],
191
+ search_context: Any, # Can be Page, Region, Flow, etc.
192
+ find_method_name: str = "find_all",
193
+ ) -> List["Element"]:
194
+ """
195
+ Process a selector string or element list into a normalized list of elements.
196
+
197
+ Args:
198
+ selector_or_elements: Selector string, element, list of elements, or None
199
+ search_context: Object with find_all method (Page, Region, etc.)
200
+ find_method_name: Name of the method to call for searching (default: "find_all")
201
+
202
+ Returns:
203
+ List of elements (empty list if None or no matches)
204
+ """
205
+ if selector_or_elements is None:
206
+ return []
207
+
208
+ if isinstance(selector_or_elements, str):
209
+ # It's a selector string - search for matching elements
210
+ if hasattr(search_context, find_method_name):
211
+ result = getattr(search_context, find_method_name)(selector_or_elements)
212
+ if hasattr(result, "elements"):
213
+ return result.elements
214
+ elif isinstance(result, list):
215
+ return result
216
+ else:
217
+ return []
218
+ else:
219
+ logger.warning(f"Search context {type(search_context)} lacks {find_method_name} method")
220
+ return []
221
+
222
+ # Handle single element
223
+ if hasattr(selector_or_elements, "bbox"): # Duck typing for Element
224
+ return [selector_or_elements]
225
+
226
+ # Handle ElementCollection or similar
227
+ if hasattr(selector_or_elements, "elements"):
228
+ return selector_or_elements.elements
229
+
230
+ # Handle list/iterable
231
+ if hasattr(selector_or_elements, "__iter__"):
232
+ return list(selector_or_elements)
233
+
234
+ return []
235
+
236
+
237
+ def extract_sections_from_region(
238
+ region: "Region",
239
+ start_elements: Union[str, List["Element"], None],
240
+ end_elements: Union[str, List["Element"], None] = None,
241
+ include_boundaries: str = "both",
242
+ orientation: str = "vertical",
243
+ get_section_between_func: Optional[Any] = None,
244
+ ) -> List["Region"]:
245
+ """
246
+ Core implementation of get_sections() that can be reused across classes.
247
+
248
+ This implements the full logic for extracting multiple sections from a region
249
+ based on start/end boundary elements.
250
+
251
+ Args:
252
+ region: The region to extract sections from
253
+ start_elements: Elements or selector marking section starts
254
+ end_elements: Optional elements or selector marking section ends
255
+ include_boundaries: How to include boundary elements
256
+ orientation: Section orientation ('vertical' or 'horizontal')
257
+ get_section_between_func: Optional custom function to create sections
258
+
259
+ Returns:
260
+ List of Region objects representing the sections
261
+ """
262
+ # Process selectors to get element lists
263
+ start_elements = process_selector_to_elements(start_elements, region)
264
+ end_elements = process_selector_to_elements(end_elements, region) if end_elements else []
265
+
266
+ # Validate inputs
267
+ if not start_elements:
268
+ logger.debug("No start elements found for section extraction")
269
+ return []
270
+
271
+ # Get all elements in the region and sort by position
272
+ all_elements = region.get_elements()
273
+ if not all_elements:
274
+ return []
275
+
276
+ # Sort elements based on orientation
277
+ if orientation == "vertical":
278
+ all_elements.sort(key=lambda e: (e.top, e.x0))
279
+ else:
280
+ all_elements.sort(key=lambda e: (e.x0, e.top))
281
+
282
+ # Create element index map
283
+ element_to_index = {el: i for i, el in enumerate(all_elements)}
284
+
285
+ # Build boundary list with indices
286
+ boundaries = []
287
+
288
+ # Add start boundaries
289
+ for elem in start_elements:
290
+ idx = element_to_index.get(elem)
291
+ if idx is not None:
292
+ boundaries.append({"index": idx, "element": elem, "type": "start"})
293
+
294
+ # Add end boundaries
295
+ for elem in end_elements:
296
+ idx = element_to_index.get(elem)
297
+ if idx is not None:
298
+ boundaries.append({"index": idx, "element": elem, "type": "end"})
299
+
300
+ # Sort boundaries by document order
301
+ boundaries.sort(key=lambda x: x["index"])
302
+
303
+ # Generate sections
304
+ sections = []
305
+ current_start = None
306
+ section_func = get_section_between_func or region.get_section_between
307
+
308
+ for boundary in boundaries:
309
+ if boundary["type"] == "start":
310
+ if current_start is None:
311
+ # Start a new section
312
+ current_start = boundary
313
+ elif not end_elements:
314
+ # No end elements specified - use starts as both start and end
315
+ # Create section from previous start to this start (which acts as end)
316
+ start_elem = current_start["element"]
317
+ end_elem = boundary["element"] # Use the actual boundary element as end
318
+
319
+ section = section_func(start_elem, end_elem, include_boundaries, orientation)
320
+ sections.append(section)
321
+
322
+ # This boundary becomes the new start
323
+ current_start = boundary
324
+
325
+ elif boundary["type"] == "end" and current_start:
326
+ # Create section from current start to this end
327
+ section = section_func(
328
+ current_start["element"], boundary["element"], include_boundaries, orientation
329
+ )
330
+ sections.append(section)
331
+ current_start = None
332
+
333
+ # Handle final section if we have an unclosed start
334
+ if current_start:
335
+ start_elem = current_start["element"]
336
+ # For the final section, we need to go to the end of the region
337
+ # Create a dummy end element at the region boundary
338
+ if orientation == "vertical":
339
+ # Create section to bottom of region
340
+ section = section_func(start_elem, None, include_boundaries, orientation)
341
+ else:
342
+ # Create section to right edge of region
343
+ section = section_func(start_elem, None, include_boundaries, orientation)
344
+ sections.append(section)
345
+
346
+ return sections
@@ -0,0 +1,169 @@
1
+ """Spatial utilities for consistent element-region relationships.
2
+
3
+ This module centralizes the logic for determining whether elements belong to regions,
4
+ ensuring consistent behavior across Region, Page, and Flow components.
5
+
6
+ The default strategy is 'center' - an element belongs to a region if its center
7
+ point falls within that region. This prevents double-counting of elements at
8
+ boundaries and provides predictable behavior for operations like get_sections()
9
+ with include_boundaries='none'.
10
+
11
+ Example:
12
+ from natural_pdf.utils.spatial import is_element_in_region
13
+
14
+ # Check if element is in region using center-based logic (default)
15
+ if is_element_in_region(element, region):
16
+ print("Element is in region")
17
+
18
+ # Use different strategies
19
+ if is_element_in_region(element, region, strategy="intersects"):
20
+ print("Element overlaps with region")
21
+ """
22
+
23
+ import logging
24
+ from typing import TYPE_CHECKING, Literal, Optional
25
+
26
+ if TYPE_CHECKING:
27
+ from natural_pdf.elements.base import Element
28
+ from natural_pdf.elements.region import Region
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+ # Element inclusion strategies
33
+ InclusionStrategy = Literal["center", "intersects", "contains"]
34
+
35
+
36
+ def is_element_in_region(
37
+ element: "Element",
38
+ region: "Region",
39
+ strategy: InclusionStrategy = "center",
40
+ check_page: bool = True
41
+ ) -> bool:
42
+ """
43
+ Unified function to check if an element is inside a region.
44
+
45
+ This centralizes the logic used across Region, Page, and Flow to ensure
46
+ consistent behavior throughout the library.
47
+
48
+ Args:
49
+ element: The element to check
50
+ region: The region to check against
51
+ strategy: The inclusion strategy to use:
52
+ - "center": Element belongs if its center point is inside (default)
53
+ - "intersects": Element belongs if any part overlaps
54
+ - "contains": Element belongs only if fully contained
55
+ check_page: Whether to verify element and region are on the same page
56
+
57
+ Returns:
58
+ bool: True if element is in region according to the strategy
59
+ """
60
+ # Validate inputs
61
+ if not hasattr(element, "bbox") or not element.bbox:
62
+ logger.debug(f"Element lacks bbox attributes: {element}")
63
+ return False
64
+
65
+ if not hasattr(region, "bbox") or not region.bbox:
66
+ logger.debug(f"Region lacks bbox attributes: {region}")
67
+ return False
68
+
69
+ # Check page membership if requested
70
+ if check_page:
71
+ if not hasattr(element, "page") or not hasattr(region, "page"):
72
+ return False
73
+ if element.page != region.page:
74
+ return False
75
+
76
+ # Apply the appropriate strategy
77
+ if strategy == "center":
78
+ # Use existing region method if available
79
+ if hasattr(region, "is_element_center_inside"):
80
+ return region.is_element_center_inside(element)
81
+ else:
82
+ # Fallback calculation
83
+ elem_center_x = (element.x0 + element.x1) / 2
84
+ elem_center_y = (element.top + element.bottom) / 2
85
+
86
+ # Use region's is_point_inside if available
87
+ if hasattr(region, "is_point_inside"):
88
+ return region.is_point_inside(elem_center_x, elem_center_y)
89
+ else:
90
+ # Simple bounds check
91
+ return (region.x0 <= elem_center_x <= region.x1 and
92
+ region.top <= elem_center_y <= region.bottom)
93
+
94
+ elif strategy == "intersects":
95
+ # Use existing region method if available
96
+ if hasattr(region, "intersects"):
97
+ return region.intersects(element)
98
+ else:
99
+ # Simple bbox overlap check
100
+ return not (element.x1 < region.x0 or
101
+ element.x0 > region.x1 or
102
+ element.bottom < region.top or
103
+ element.top > region.bottom)
104
+
105
+ elif strategy == "contains":
106
+ # Use existing region method if available
107
+ if hasattr(region, "contains"):
108
+ return region.contains(element)
109
+ else:
110
+ # Simple full containment check
111
+ return (region.x0 <= element.x0 and
112
+ element.x1 <= region.x1 and
113
+ region.top <= element.top and
114
+ element.bottom <= region.bottom)
115
+
116
+ else:
117
+ raise ValueError(f"Unknown inclusion strategy: {strategy}")
118
+
119
+
120
+ def get_inclusion_strategy() -> InclusionStrategy:
121
+ """
122
+ Get the current global inclusion strategy.
123
+
124
+ This could be made configurable via environment variable or settings.
125
+ For now, returns the default strategy.
126
+
127
+ Returns:
128
+ The current inclusion strategy (default: "center")
129
+ """
130
+ # Could read from settings or environment
131
+ # return os.environ.get("NATURAL_PDF_INCLUSION_STRATEGY", "center")
132
+ return "center"
133
+
134
+
135
+ def calculate_element_overlap_percentage(
136
+ element: "Element",
137
+ region: "Region"
138
+ ) -> float:
139
+ """
140
+ Calculate what percentage of an element overlaps with a region.
141
+
142
+ Args:
143
+ element: The element to check
144
+ region: The region to check against
145
+
146
+ Returns:
147
+ float: Percentage of element area that overlaps with region (0.0 to 1.0)
148
+ """
149
+ if not hasattr(element, "bbox") or not hasattr(region, "bbox"):
150
+ return 0.0
151
+
152
+ # Calculate intersection bounds
153
+ intersect_x0 = max(element.x0, region.x0)
154
+ intersect_y0 = max(element.top, region.top)
155
+ intersect_x1 = min(element.x1, region.x1)
156
+ intersect_y1 = min(element.bottom, region.bottom)
157
+
158
+ # Check if there's an intersection
159
+ if intersect_x1 <= intersect_x0 or intersect_y1 <= intersect_y0:
160
+ return 0.0
161
+
162
+ # Calculate areas
163
+ element_area = (element.x1 - element.x0) * (element.bottom - element.top)
164
+ if element_area == 0:
165
+ return 0.0
166
+
167
+ intersect_area = (intersect_x1 - intersect_x0) * (intersect_y1 - intersect_y0)
168
+
169
+ return intersect_area / element_area
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: natural-pdf
3
- Version: 0.2.16
3
+ Version: 0.2.17
4
4
  Summary: A more intuitive interface for working with PDFs
5
5
  Author-email: Jonathan Soma <jonathan.soma@gmail.com>
6
6
  License-Expression: MIT
@@ -1,8 +1,8 @@
1
- natural_pdf/__init__.py,sha256=N4pR0LbuPEnUYFZqbdVqc_FGKldgwPQc1wjJhYKTBBM,3417
1
+ natural_pdf/__init__.py,sha256=N9ubwsFpmPj7WHA6Uewgn6IbmU2r0BeUGIdIhmTl6nw,4701
2
2
  natural_pdf/cli.py,sha256=0zO9ZoRiP8JmyGBaVavrMATnvbARWTl7WD2PEefu9BM,4061
3
3
  natural_pdf/text_mixin.py,sha256=eFCiHj6Okcw3aum4955BepcI2NPRalkf9UFFVTc_H30,4012
4
4
  natural_pdf/analyzers/__init__.py,sha256=3XGoNq3OgiVkZP7tOdeP5XVUl7fDgyztdA8DlOcMLXg,1138
5
- natural_pdf/analyzers/guides.py,sha256=B2_Etb0o-lOku-FQw-T1Fo1qxbcAXT4FB0hdp-5kXRs,188171
5
+ natural_pdf/analyzers/guides.py,sha256=BqFgt-bRSOkEoFCvNsYyY8j__00X-8DJ_TLb2Hx9qsQ,202430
6
6
  natural_pdf/analyzers/shape_detection_mixin.py,sha256=mgpyJ4jIulz9l9HCqThabJIsLSrXh9BB2AmLxUoHmw0,62584
7
7
  natural_pdf/analyzers/text_options.py,sha256=qEkDaYWla0rIM_gszEOsu52q7C_dAfV81P2HLJZM2sw,3333
8
8
  natural_pdf/analyzers/text_structure.py,sha256=3WWusi-BI0krUnJxB05DD6XmKj5qRNvQBqH7zOQGm1M,28451
@@ -25,27 +25,27 @@ natural_pdf/classification/mixin.py,sha256=CXygXXhe_qx1563SmIjiu4uSnZkxCkuRR4fGv
25
25
  natural_pdf/classification/results.py,sha256=5ha77CxK0GYwkBMJbvUBZkBjsL5GpOveIZDK9nO4j8I,3239
26
26
  natural_pdf/collections/mixins.py,sha256=Se2C5AcpP9B5E0d0pIrey6-f_P32tAXTK4M7666MNj0,5688
27
27
  natural_pdf/core/__init__.py,sha256=QC8H4M3KbXwMFiQORZ0pdPlzx1Ix6oKKQSS7Ib2KEaA,38
28
- natural_pdf/core/element_manager.py,sha256=KPuKM7SstfErTkRnGq4vrgE0Tv8iazN13Jp7yAXGKso,55575
28
+ natural_pdf/core/element_manager.py,sha256=619R97OtMd7uhaax7fZNJmhy9GxSs9HCNP4OzGgP828,55882
29
29
  natural_pdf/core/highlighting_service.py,sha256=wEV-koqHoHf7S3wZ3j8D2L-ucGp3Nd0YhhStz9yqeLc,70406
30
- natural_pdf/core/page.py,sha256=Pid5hqVjcyX-gcCzxCJ62k6AQhNbUMNM_5QmEcylIjM,155264
31
- natural_pdf/core/page_collection.py,sha256=IjdFq9q0D0P6ZKWInf0H25rLzxfMb7RsUXucogkhNkU,63169
30
+ natural_pdf/core/page.py,sha256=-0OaIoXz0zjT_jnPjjI2jpb8vvNKh-1W56auA5UBhTA,158791
31
+ natural_pdf/core/page_collection.py,sha256=bLZ3TqTQbmP3oYrbfEi7HUoPMbcGplEtUMZ3Z1y7fuw,66728
32
32
  natural_pdf/core/page_groupby.py,sha256=V2e_RNlHaasUzYm2h2vNJI7_aV_fl3_pg7kU3F2j0z8,8218
33
- natural_pdf/core/pdf.py,sha256=ovdeu9TRPnVYyMltD7QpcdcFYBLZFXh3LlfC5ifj6RY,104227
33
+ natural_pdf/core/pdf.py,sha256=i8dYCimL_k5FV6BmPI1a2Dk7XZfwLP8TziXr2n3O_fI,105639
34
34
  natural_pdf/core/pdf_collection.py,sha256=s3ogu4CEHrHMTRqQMJUKJZ-9Ii8b_B9dWbVLTFj0s7g,34992
35
35
  natural_pdf/core/render_spec.py,sha256=y9QkMiIvWaKiEBlV0TjyldADIEUY3YfWLQXxStHu1S4,15480
36
36
  natural_pdf/describe/__init__.py,sha256=kIV7ORmWWB1SAur7nK2aAwR-wHqSedhKfUsaUl4hG0A,586
37
- natural_pdf/describe/base.py,sha256=LYbDjjQYOIZsYBbBQH3UP6XyWArJJvRc8LUugeVdJw0,18178
37
+ natural_pdf/describe/base.py,sha256=M4TGXR8ppTvznTnA1ZDgMQMkDpgu1pwGMNaOcgHf2iY,20154
38
38
  natural_pdf/describe/elements.py,sha256=3Y541z5TQ2obrfZFiFi1YQMsCt3oYrhMHpD5j1tuppw,12639
39
39
  natural_pdf/describe/mixin.py,sha256=rkX14aGrSz7Jvxx8Rbxv3eSfbO-_29DipwpstrV2pDQ,3109
40
40
  natural_pdf/describe/summary.py,sha256=cfT4ZQkeatCDAOwWPwhtEVXisNgk6E57fAXAnoRysSU,7645
41
41
  natural_pdf/elements/__init__.py,sha256=ICNikmLeIEuSYypz-KnkBn8xR1hR7rge4hsa1KLkyWY,42
42
- natural_pdf/elements/base.py,sha256=iJHEejlYu8RNvlKYK2UHAnAlz6tXkiEaGnG2xYtVnuU,59635
43
- natural_pdf/elements/element_collection.py,sha256=CVfnprzKTLeGSpvhGL2ZQVzZ8veSoFtCBlSSGDmX5lY,136594
42
+ natural_pdf/elements/base.py,sha256=YYdoss63yv3IzQeuHbNypo7VLz2UJDFK5b6lqQe5tR8,76090
43
+ natural_pdf/elements/element_collection.py,sha256=dlKoIaqmK_pC_cEcTX9LA2bNbZmc8iXcTTDfpHDlyUM,139812
44
44
  natural_pdf/elements/image.py,sha256=zu-P2Y8fRoEXf6IeZU0EYRWsgZ6I_a5vy1FA3VXTGkQ,1424
45
45
  natural_pdf/elements/line.py,sha256=TFn7KXjPT_jUQyQyabU0F7XYU4dC-qadwodJMZF4DCU,3844
46
46
  natural_pdf/elements/rect.py,sha256=kmUmhwnihd-aTweAO-LsngRDo5Iqmx7lcSa8ZBlE_2E,4544
47
- natural_pdf/elements/region.py,sha256=DM8o0ptm86B2ouOqDgUK_av9cCN6G5iIlJ1VC3KfVWk,167379
48
- natural_pdf/elements/text.py,sha256=dOiss-cSBYnK9j7KqmqmvJcCidBcIhckLJCW8lVz2es,21210
47
+ natural_pdf/elements/region.py,sha256=qJ86iToSjrCUjVrEbO0M0S1nTuZDW9tpI4jF9T5xJKs,168777
48
+ natural_pdf/elements/text.py,sha256=Jo4gnrsJe1PStdoWF2Bt8RSeSmOcfA9DxvMJl7EoAmI,21344
49
49
  natural_pdf/export/mixin.py,sha256=L1q3MIEFWuvie4j4_EmW7GT3NerbZ1as0XMUoqTS7gM,5083
50
50
  natural_pdf/exporters/__init__.py,sha256=QffoARekR6WzXEd05oxOytly4qPdBizuIF-SUkeFpig,643
51
51
  natural_pdf/exporters/base.py,sha256=379sioW_hbkGb21sEVuJhbkkDO5MFsFtTUNO5TgG2YU,2101
@@ -62,9 +62,9 @@ natural_pdf/extraction/mixin.py,sha256=dBcp96R8zMQqaRHiB8vpyad8GR89gv5RPXlr8Mt0a
62
62
  natural_pdf/extraction/result.py,sha256=PDaCCN2LQBbHsZy0_lrQ0ROeMsnmH1WRoXWOjk9M2o4,1825
63
63
  natural_pdf/flows/__init__.py,sha256=cUN4A8hTDLZSRr4PO2W_lR4z6hWpbNG8Seox-IIcrLU,277
64
64
  natural_pdf/flows/collections.py,sha256=ErkHWdX6W_y1SjkcA_bGM0uUYRGPWWpRkHip6LHpej0,25740
65
- natural_pdf/flows/element.py,sha256=AWXGfAo0yhHTA5h0u4teXhRaV_z3McSPGOMFQaSdkJQ,24973
66
- natural_pdf/flows/flow.py,sha256=BuT3DBqNvLEqYle66-nZFO91i_1s98CAat28Dg-JjGU,86149
67
- natural_pdf/flows/region.py,sha256=r_cFtBlmPi7ADN3k8oYA1s_vyz8GeQLCnYcv58Zt5eM,52263
65
+ natural_pdf/flows/element.py,sha256=rDfWICK2gXBMXiqX8D_l7866dkQwgAlJMkCFAFoz6xM,25044
66
+ natural_pdf/flows/flow.py,sha256=MEls08CtkVox41du0wvkL3u11CAYzidQ6WxN1-vthUs,70591
67
+ natural_pdf/flows/region.py,sha256=HMk4xfYJiKgER2KzRIcmXb1Vfp9amnyy0ay8YrLtV8w,55362
68
68
  natural_pdf/ocr/__init__.py,sha256=VY8hhvDPf7Gh2lB-d2QRmghLLyTy6ydxlgo1cS4dOSk,2482
69
69
  natural_pdf/ocr/engine.py,sha256=SwNlWydtHbrIghV5JD_j5B4-rnjCMYIWUIEARag-zHw,11839
70
70
  natural_pdf/ocr/engine_doctr.py,sha256=ptKrupMWoulZb-R93zr9btoe94JPWU7vlJuN7OBJEIM,17740
@@ -85,7 +85,7 @@ natural_pdf/search/search_options.py,sha256=sq_e8_jSROicD94b_xtDtLnjEr_Zsy4icjzP
85
85
  natural_pdf/search/search_service_protocol.py,sha256=u8pbuWP96fnQEe6mnreY9DrdiDAHP6ZCY7phvSbFlP8,6697
86
86
  natural_pdf/search/searchable_mixin.py,sha256=hqQ_AuID5eTGRCtKYdFLZ1zF35y73uk3x1M1VW9Il8U,23514
87
87
  natural_pdf/selectors/__init__.py,sha256=oZGeqSv53EqmIZOhcnawuaGGlRg1h79vArXuZCWKm4A,123
88
- natural_pdf/selectors/parser.py,sha256=WOoJlCxyz6F50uWJEBWSucANWMvihroezMBZMPCZxzE,40388
88
+ natural_pdf/selectors/parser.py,sha256=HbPgmtXXA4lRSAVkCzw6vpCi3oh66e-53yUEPhYLGX8,46909
89
89
  natural_pdf/tables/__init__.py,sha256=sCvCGbGsL6BiqlNxAYfVv003bIDLI11FmjHhaWfcU6w,104
90
90
  natural_pdf/tables/result.py,sha256=-8ctA-jCJYSHtlfAoqTvhUwO5zSP2BQxxetAjqEsNyg,8665
91
91
  natural_pdf/templates/__init__.py,sha256=jYBxzfi73vew0f6yhIh1MlRxw4F_TVN2hKQR0YXOFe0,20
@@ -98,7 +98,10 @@ natural_pdf/utils/identifiers.py,sha256=P7n6owcubnF8oAMa_UfYtENmIaJQdH_AMC9Jbs2b
98
98
  natural_pdf/utils/layout.py,sha256=tJRRzwUVP0EeqqbGzr9yOuE5qFvhjZ9A44BuItmKGaU,753
99
99
  natural_pdf/utils/locks.py,sha256=7HJqV0VsNcOfISnbw8goCKWP5ck11uSJo6T_x9XIPKI,215
100
100
  natural_pdf/utils/packaging.py,sha256=TM0jafwS5yVbTGC-RMi4TyWunf9cUUo9h5J6rMzkT-o,22444
101
+ natural_pdf/utils/pdfminer_patches.py,sha256=Ob81OMoNUGMUIy9nMw3deSQ_Z6cQmhbRlHUC3EHw2jk,4201
101
102
  natural_pdf/utils/reading_order.py,sha256=u7XyVZdKMPMK0CL1C7xFogKnZ92b0JKT068KFjQWe18,7437
103
+ natural_pdf/utils/sections.py,sha256=HZX7829-fquKgIF7vUN2tL10-aXckEaM25g_2VcgWU4,12941
104
+ natural_pdf/utils/spatial.py,sha256=JOH2LHnF5WBDcjNQsHQdj458zwUgKtSWW7Tj0motn70,5968
102
105
  natural_pdf/utils/text_extraction.py,sha256=CCwPTmMoTgtQt2P00X_ADIf6ZGNfxvjCO9FO0_HqG40,13900
103
106
  natural_pdf/utils/visualization.py,sha256=zhZEHgYnZFuX7YxTHXF8Y3D97uHp2beTKMaC-JkCFwk,22364
104
107
  natural_pdf/vision/__init__.py,sha256=TkoQtdODlh0n_99dsjLIWKE9dgK0m4jfrui_cQ3gTwU,221
@@ -108,7 +111,7 @@ natural_pdf/vision/similarity.py,sha256=HWmXDBNLSOlRWH-_1K3FVR7tSsRuMFqXZwrVhhg2
108
111
  natural_pdf/vision/template_matching.py,sha256=91XQt5tp-vmcMX_4b2Bz-YwIAlb-hc8E5ih_qAHQuCk,7145
109
112
  natural_pdf/widgets/__init__.py,sha256=QTVaUmsw__FCweFYZebwPssQxxUFUMd0wpm_cUbGZJY,181
110
113
  natural_pdf/widgets/viewer.py,sha256=KW3JogdR2TMg2ECUMYp8hwd060hfg8EsYBWxb5IEzBY,24942
111
- natural_pdf-0.2.16.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
114
+ natural_pdf-0.2.17.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
112
115
  optimization/memory_comparison.py,sha256=0i_foFSRmppj-fY069qjwH36s_zkx-1L2ASAAlepWzA,6541
113
116
  optimization/pdf_analyzer.py,sha256=HjrmTgu2qchxPeDckc5kjgxppGwd40UESrYS9Myj7pY,19352
114
117
  optimization/performance_analysis.py,sha256=JBXnR9hc7Ix7YCnt3EJPSpsyqIUgKsc7GEffQ_TDCBk,13033
@@ -145,8 +148,8 @@ tools/bad_pdf_eval/llm_enrich.py,sha256=mCh4KGi1HmIkzGjj5rrHz1Osd7sEX1IZ_FW08H1t
145
148
  tools/bad_pdf_eval/llm_enrich_with_retry.py,sha256=XUtPF1hUvqd3frDXT0wDTXoonuAivhjM5vgFdZ-tm0A,9373
146
149
  tools/bad_pdf_eval/reporter.py,sha256=e1g__mkSB4q02p3mGWOwMhvFs7F2HJosNBxup0-LkyU,400
147
150
  tools/bad_pdf_eval/utils.py,sha256=hR95XQ7qf7Cu6BdyX0L7ggGVx-ah5sK0jHWblTJUUic,4896
148
- natural_pdf-0.2.16.dist-info/METADATA,sha256=o2x-_hFHY64xJCCZM7CwEHo96vAU8XzFejobi1neE2g,6960
149
- natural_pdf-0.2.16.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
150
- natural_pdf-0.2.16.dist-info/entry_points.txt,sha256=1R_KMv7g60UBBpRqGfw7bppsMNGdayR-iJlb9ohEk_8,81
151
- natural_pdf-0.2.16.dist-info/top_level.txt,sha256=ZDKhxE_tg508o9BpagsjCGcI8GY4cF_8bg0e0IaLsPI,41
152
- natural_pdf-0.2.16.dist-info/RECORD,,
151
+ natural_pdf-0.2.17.dist-info/METADATA,sha256=8K5PCwh_OuI8vkWRLChHeT-LuEd0sRmigkRm55ZNeDo,6960
152
+ natural_pdf-0.2.17.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
153
+ natural_pdf-0.2.17.dist-info/entry_points.txt,sha256=1R_KMv7g60UBBpRqGfw7bppsMNGdayR-iJlb9ohEk_8,81
154
+ natural_pdf-0.2.17.dist-info/top_level.txt,sha256=ZDKhxE_tg508o9BpagsjCGcI8GY4cF_8bg0e0IaLsPI,41
155
+ natural_pdf-0.2.17.dist-info/RECORD,,