natural-pdf 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. docs/api/index.md +386 -0
  2. docs/assets/favicon.png +3 -0
  3. docs/assets/favicon.svg +3 -0
  4. docs/assets/javascripts/custom.js +17 -0
  5. docs/assets/logo.svg +3 -0
  6. docs/assets/sample-screen.png +0 -0
  7. docs/assets/social-preview.png +17 -0
  8. docs/assets/social-preview.svg +17 -0
  9. docs/assets/stylesheets/custom.css +65 -0
  10. docs/document-qa/index.ipynb +435 -0
  11. docs/document-qa/index.md +79 -0
  12. docs/element-selection/index.ipynb +915 -0
  13. docs/element-selection/index.md +229 -0
  14. docs/index.md +170 -0
  15. docs/installation/index.md +69 -0
  16. docs/interactive-widget/index.ipynb +962 -0
  17. docs/interactive-widget/index.md +12 -0
  18. docs/layout-analysis/index.ipynb +818 -0
  19. docs/layout-analysis/index.md +185 -0
  20. docs/ocr/index.md +222 -0
  21. docs/pdf-navigation/index.ipynb +314 -0
  22. docs/pdf-navigation/index.md +97 -0
  23. docs/regions/index.ipynb +816 -0
  24. docs/regions/index.md +294 -0
  25. docs/tables/index.ipynb +658 -0
  26. docs/tables/index.md +144 -0
  27. docs/text-analysis/index.ipynb +370 -0
  28. docs/text-analysis/index.md +105 -0
  29. docs/text-extraction/index.ipynb +1478 -0
  30. docs/text-extraction/index.md +292 -0
  31. docs/tutorials/01-loading-and-extraction.ipynb +1696 -0
  32. docs/tutorials/01-loading-and-extraction.md +95 -0
  33. docs/tutorials/02-finding-elements.ipynb +340 -0
  34. docs/tutorials/02-finding-elements.md +149 -0
  35. docs/tutorials/03-extracting-blocks.ipynb +147 -0
  36. docs/tutorials/03-extracting-blocks.md +48 -0
  37. docs/tutorials/04-table-extraction.ipynb +114 -0
  38. docs/tutorials/04-table-extraction.md +50 -0
  39. docs/tutorials/05-excluding-content.ipynb +270 -0
  40. docs/tutorials/05-excluding-content.md +109 -0
  41. docs/tutorials/06-document-qa.ipynb +332 -0
  42. docs/tutorials/06-document-qa.md +91 -0
  43. docs/tutorials/07-layout-analysis.ipynb +260 -0
  44. docs/tutorials/07-layout-analysis.md +66 -0
  45. docs/tutorials/07-working-with-regions.ipynb +409 -0
  46. docs/tutorials/07-working-with-regions.md +151 -0
  47. docs/tutorials/08-spatial-navigation.ipynb +508 -0
  48. docs/tutorials/08-spatial-navigation.md +190 -0
  49. docs/tutorials/09-section-extraction.ipynb +2434 -0
  50. docs/tutorials/09-section-extraction.md +256 -0
  51. docs/tutorials/10-form-field-extraction.ipynb +484 -0
  52. docs/tutorials/10-form-field-extraction.md +201 -0
  53. docs/tutorials/11-enhanced-table-processing.ipynb +54 -0
  54. docs/tutorials/11-enhanced-table-processing.md +9 -0
  55. docs/tutorials/12-ocr-integration.ipynb +586 -0
  56. docs/tutorials/12-ocr-integration.md +188 -0
  57. docs/tutorials/13-semantic-search.ipynb +1888 -0
  58. docs/tutorials/13-semantic-search.md +77 -0
  59. docs/visual-debugging/index.ipynb +2970 -0
  60. docs/visual-debugging/index.md +157 -0
  61. docs/visual-debugging/region.png +0 -0
  62. natural_pdf/__init__.py +39 -20
  63. natural_pdf/analyzers/__init__.py +2 -1
  64. natural_pdf/analyzers/layout/base.py +32 -24
  65. natural_pdf/analyzers/layout/docling.py +131 -72
  66. natural_pdf/analyzers/layout/layout_analyzer.py +156 -113
  67. natural_pdf/analyzers/layout/layout_manager.py +98 -58
  68. natural_pdf/analyzers/layout/layout_options.py +32 -17
  69. natural_pdf/analyzers/layout/paddle.py +152 -95
  70. natural_pdf/analyzers/layout/surya.py +164 -92
  71. natural_pdf/analyzers/layout/tatr.py +149 -84
  72. natural_pdf/analyzers/layout/yolo.py +84 -44
  73. natural_pdf/analyzers/text_options.py +22 -15
  74. natural_pdf/analyzers/text_structure.py +131 -85
  75. natural_pdf/analyzers/utils.py +30 -23
  76. natural_pdf/collections/pdf_collection.py +125 -97
  77. natural_pdf/core/__init__.py +1 -1
  78. natural_pdf/core/element_manager.py +416 -337
  79. natural_pdf/core/highlighting_service.py +268 -196
  80. natural_pdf/core/page.py +907 -513
  81. natural_pdf/core/pdf.py +385 -287
  82. natural_pdf/elements/__init__.py +1 -1
  83. natural_pdf/elements/base.py +302 -214
  84. natural_pdf/elements/collections.py +708 -508
  85. natural_pdf/elements/line.py +39 -36
  86. natural_pdf/elements/rect.py +32 -30
  87. natural_pdf/elements/region.py +854 -883
  88. natural_pdf/elements/text.py +122 -99
  89. natural_pdf/exporters/__init__.py +0 -1
  90. natural_pdf/exporters/searchable_pdf.py +261 -102
  91. natural_pdf/ocr/__init__.py +23 -14
  92. natural_pdf/ocr/engine.py +17 -8
  93. natural_pdf/ocr/engine_easyocr.py +63 -47
  94. natural_pdf/ocr/engine_paddle.py +97 -68
  95. natural_pdf/ocr/engine_surya.py +54 -44
  96. natural_pdf/ocr/ocr_manager.py +88 -62
  97. natural_pdf/ocr/ocr_options.py +16 -10
  98. natural_pdf/qa/__init__.py +1 -1
  99. natural_pdf/qa/document_qa.py +119 -111
  100. natural_pdf/search/__init__.py +37 -31
  101. natural_pdf/search/haystack_search_service.py +312 -189
  102. natural_pdf/search/haystack_utils.py +186 -122
  103. natural_pdf/search/search_options.py +25 -14
  104. natural_pdf/search/search_service_protocol.py +12 -6
  105. natural_pdf/search/searchable_mixin.py +261 -176
  106. natural_pdf/selectors/__init__.py +2 -1
  107. natural_pdf/selectors/parser.py +159 -316
  108. natural_pdf/templates/__init__.py +1 -1
  109. natural_pdf/utils/highlighting.py +8 -2
  110. natural_pdf/utils/reading_order.py +65 -63
  111. natural_pdf/utils/text_extraction.py +195 -0
  112. natural_pdf/utils/visualization.py +70 -61
  113. natural_pdf/widgets/__init__.py +2 -3
  114. natural_pdf/widgets/viewer.py +749 -718
  115. {natural_pdf-0.1.4.dist-info → natural_pdf-0.1.5.dist-info}/METADATA +15 -1
  116. natural_pdf-0.1.5.dist-info/RECORD +134 -0
  117. natural_pdf-0.1.5.dist-info/top_level.txt +5 -0
  118. notebooks/Examples.ipynb +1293 -0
  119. pdfs/.gitkeep +0 -0
  120. pdfs/01-practice.pdf +543 -0
  121. pdfs/0500000US42001.pdf +0 -0
  122. pdfs/0500000US42007.pdf +0 -0
  123. pdfs/2014 Statistics.pdf +0 -0
  124. pdfs/2019 Statistics.pdf +0 -0
  125. pdfs/Atlanta_Public_Schools_GA_sample.pdf +0 -0
  126. pdfs/needs-ocr.pdf +0 -0
  127. tests/test_loading.py +50 -0
  128. tests/test_optional_deps.py +298 -0
  129. natural_pdf-0.1.4.dist-info/RECORD +0 -61
  130. natural_pdf-0.1.4.dist-info/top_level.txt +0 -1
  131. {natural_pdf-0.1.4.dist-info → natural_pdf-0.1.5.dist-info}/WHEEL +0 -0
  132. {natural_pdf-0.1.4.dist-info → natural_pdf-0.1.5.dist-info}/licenses/LICENSE +0 -0
@@ -1,24 +1,33 @@
1
1
  """
2
2
  Base Element class for natural-pdf.
3
3
  """
4
- from typing import Any, Dict, List, Optional, TYPE_CHECKING, Union, Tuple
4
+
5
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
6
+
5
7
  from PIL import Image
6
8
 
7
9
  if TYPE_CHECKING:
8
10
  from natural_pdf.core.page import Page
9
- from natural_pdf.elements.region import Region
10
11
  from natural_pdf.elements.base import Element
11
12
  from natural_pdf.elements.collections import ElementCollection
13
+ from natural_pdf.elements.region import Region
12
14
 
13
15
 
14
16
  class DirectionalMixin:
15
17
  """
16
18
  Mixin class providing directional methods for both Element and Region classes.
17
19
  """
18
-
19
- def _direction(self, direction: str, size: Optional[float] = None,
20
- cross_size: str = "full", include_element: bool = False,
21
- until: Optional[str] = None, include_endpoint: bool = True, **kwargs) -> 'Region':
20
+
21
+ def _direction(
22
+ self,
23
+ direction: str,
24
+ size: Optional[float] = None,
25
+ cross_size: str = "full",
26
+ include_element: bool = False,
27
+ until: Optional[str] = None,
28
+ include_endpoint: bool = True,
29
+ **kwargs,
30
+ ) -> "Region":
22
31
  """
23
32
  Protected helper method to create a region in a specified direction relative to this element/region.
24
33
 
@@ -34,11 +43,11 @@ class DirectionalMixin:
34
43
  Returns:
35
44
  Region object
36
45
  """
37
- import math # Use math.inf for infinity
46
+ import math # Use math.inf for infinity
38
47
 
39
- is_horizontal = direction in ('left', 'right')
40
- is_positive = direction in ('right', 'below') # right/below are positive directions
41
- pixel_offset = 1 # Offset for excluding elements/endpoints
48
+ is_horizontal = direction in ("left", "right")
49
+ is_positive = direction in ("right", "below") # right/below are positive directions
50
+ pixel_offset = 1 # Offset for excluding elements/endpoints
42
51
 
43
52
  # 1. Determine initial boundaries based on direction and include_element
44
53
  if is_horizontal:
@@ -47,38 +56,44 @@ class DirectionalMixin:
47
56
  y1 = self.page.height if cross_size == "full" else self.bottom
48
57
 
49
58
  # Initial primary boundaries (horizontal)
50
- if is_positive: # right
59
+ if is_positive: # right
51
60
  x0_initial = self.x0 if include_element else self.x1 + pixel_offset
52
- x1_initial = self.x1 # This edge moves
53
- else: # left
54
- x0_initial = self.x0 # This edge moves
61
+ x1_initial = self.x1 # This edge moves
62
+ else: # left
63
+ x0_initial = self.x0 # This edge moves
55
64
  x1_initial = self.x1 if include_element else self.x0 - pixel_offset
56
- else: # Vertical
65
+ else: # Vertical
57
66
  # Initial cross-boundaries (horizontal)
58
67
  x0 = 0 if cross_size == "full" else self.x0
59
68
  x1 = self.page.width if cross_size == "full" else self.x1
60
69
 
61
70
  # Initial primary boundaries (vertical)
62
- if is_positive: # below
71
+ if is_positive: # below
63
72
  y0_initial = self.top if include_element else self.bottom + pixel_offset
64
- y1_initial = self.bottom # This edge moves
65
- else: # above
66
- y0_initial = self.top # This edge moves
73
+ y1_initial = self.bottom # This edge moves
74
+ else: # above
75
+ y0_initial = self.top # This edge moves
67
76
  y1_initial = self.bottom if include_element else self.top - pixel_offset
68
77
 
69
78
  # 2. Calculate the final primary boundary, considering 'size' or page limits
70
79
  if is_horizontal:
71
- if is_positive: # right
72
- x1_final = min(self.page.width, x1_initial + (size if size is not None else (self.page.width - x1_initial)))
80
+ if is_positive: # right
81
+ x1_final = min(
82
+ self.page.width,
83
+ x1_initial + (size if size is not None else (self.page.width - x1_initial)),
84
+ )
73
85
  x0_final = x0_initial
74
- else: # left
86
+ else: # left
75
87
  x0_final = max(0, x0_initial - (size if size is not None else x0_initial))
76
88
  x1_final = x1_initial
77
- else: # Vertical
78
- if is_positive: # below
79
- y1_final = min(self.page.height, y1_initial + (size if size is not None else (self.page.height - y1_initial)))
89
+ else: # Vertical
90
+ if is_positive: # below
91
+ y1_final = min(
92
+ self.page.height,
93
+ y1_initial + (size if size is not None else (self.page.height - y1_initial)),
94
+ )
80
95
  y0_final = y0_initial
81
- else: # above
96
+ else: # above
82
97
  y0_final = max(0, y0_initial - (size if size is not None else y0_initial))
83
98
  y1_final = y1_initial
84
99
 
@@ -89,16 +104,16 @@ class DirectionalMixin:
89
104
  matches_in_direction = []
90
105
 
91
106
  # Filter and sort matches based on direction
92
- if direction == 'above':
107
+ if direction == "above":
93
108
  matches_in_direction = [m for m in all_matches if m.bottom <= self.top]
94
109
  matches_in_direction.sort(key=lambda e: e.bottom, reverse=True)
95
- elif direction == 'below':
110
+ elif direction == "below":
96
111
  matches_in_direction = [m for m in all_matches if m.top >= self.bottom]
97
112
  matches_in_direction.sort(key=lambda e: e.top)
98
- elif direction == 'left':
113
+ elif direction == "left":
99
114
  matches_in_direction = [m for m in all_matches if m.x1 <= self.x0]
100
115
  matches_in_direction.sort(key=lambda e: e.x1, reverse=True)
101
- elif direction == 'right':
116
+ elif direction == "right":
102
117
  matches_in_direction = [m for m in all_matches if m.x0 >= self.x1]
103
118
  matches_in_direction.sort(key=lambda e: e.x0)
104
119
 
@@ -107,25 +122,29 @@ class DirectionalMixin:
107
122
 
108
123
  # Adjust the primary boundary based on the target
109
124
  if is_horizontal:
110
- if is_positive: # right
125
+ if is_positive: # right
111
126
  x1_final = target.x1 if include_endpoint else target.x0 - pixel_offset
112
- else: # left
127
+ else: # left
113
128
  x0_final = target.x0 if include_endpoint else target.x1 + pixel_offset
114
- else: # Vertical
115
- if is_positive: # below
129
+ else: # Vertical
130
+ if is_positive: # below
116
131
  y1_final = target.bottom if include_endpoint else target.top - pixel_offset
117
- else: # above
132
+ else: # above
118
133
  y0_final = target.top if include_endpoint else target.bottom + pixel_offset
119
134
 
120
135
  # Adjust cross boundaries if cross_size is 'element'
121
136
  if cross_size == "element":
122
- if is_horizontal: # Adjust y0, y1
123
- target_y0 = target.top if include_endpoint else target.bottom # Use opposite boundary if excluding
137
+ if is_horizontal: # Adjust y0, y1
138
+ target_y0 = (
139
+ target.top if include_endpoint else target.bottom
140
+ ) # Use opposite boundary if excluding
124
141
  target_y1 = target.bottom if include_endpoint else target.top
125
142
  y0 = min(y0, target_y0)
126
143
  y1 = max(y1, target_y1)
127
- else: # Adjust x0, x1
128
- target_x0 = target.x0 if include_endpoint else target.x1 # Use opposite boundary if excluding
144
+ else: # Adjust x0, x1
145
+ target_x0 = (
146
+ target.x0 if include_endpoint else target.x1
147
+ ) # Use opposite boundary if excluding
129
148
  target_x1 = target.x1 if include_endpoint else target.x0
130
149
  x0 = min(x0, target_x0)
131
150
  x1 = max(x1, target_x1)
@@ -145,6 +164,7 @@ class DirectionalMixin:
145
164
 
146
165
  # 5. Create and return appropriate object based on self type
147
166
  from natural_pdf.elements.region import Region
167
+
148
168
  result = Region(self.page, final_bbox)
149
169
  result.source_element = self
150
170
  result.includes_source = include_element
@@ -154,11 +174,18 @@ class DirectionalMixin:
154
174
 
155
175
  return result
156
176
 
157
- def above(self, height: Optional[float] = None, width: str = "full", include_element: bool = False,
158
- until: Optional[str] = None, include_endpoint: bool = True, **kwargs) -> 'Region':
177
+ def above(
178
+ self,
179
+ height: Optional[float] = None,
180
+ width: str = "full",
181
+ include_element: bool = False,
182
+ until: Optional[str] = None,
183
+ include_endpoint: bool = True,
184
+ **kwargs,
185
+ ) -> "Region":
159
186
  """
160
187
  Select region above this element/region.
161
-
188
+
162
189
  Args:
163
190
  height: Height of the region above, in points
164
191
  width: Width mode - "full" for full page width or "element" for element width
@@ -166,25 +193,32 @@ class DirectionalMixin:
166
193
  until: Optional selector string to specify an upper boundary element
167
194
  include_endpoint: Whether to include the boundary element in the region (default: True)
168
195
  **kwargs: Additional parameters
169
-
196
+
170
197
  Returns:
171
198
  Region object representing the area above
172
199
  """
173
200
  return self._direction(
174
- direction='above',
201
+ direction="above",
175
202
  size=height,
176
203
  cross_size=width,
177
204
  include_element=include_element,
178
205
  until=until,
179
206
  include_endpoint=include_endpoint,
180
- **kwargs
207
+ **kwargs,
181
208
  )
182
209
 
183
- def below(self, height: Optional[float] = None, width: str = "full", include_element: bool = False,
184
- until: Optional[str] = None, include_endpoint: bool = True, **kwargs) -> 'Region':
210
+ def below(
211
+ self,
212
+ height: Optional[float] = None,
213
+ width: str = "full",
214
+ include_element: bool = False,
215
+ until: Optional[str] = None,
216
+ include_endpoint: bool = True,
217
+ **kwargs,
218
+ ) -> "Region":
185
219
  """
186
220
  Select region below this element/region.
187
-
221
+
188
222
  Args:
189
223
  height: Height of the region below, in points
190
224
  width: Width mode - "full" for full page width or "element" for element width
@@ -192,25 +226,32 @@ class DirectionalMixin:
192
226
  until: Optional selector string to specify a lower boundary element
193
227
  include_endpoint: Whether to include the boundary element in the region (default: True)
194
228
  **kwargs: Additional parameters
195
-
229
+
196
230
  Returns:
197
231
  Region object representing the area below
198
232
  """
199
233
  return self._direction(
200
- direction='below',
234
+ direction="below",
201
235
  size=height,
202
236
  cross_size=width,
203
237
  include_element=include_element,
204
238
  until=until,
205
239
  include_endpoint=include_endpoint,
206
- **kwargs
240
+ **kwargs,
207
241
  )
208
242
 
209
- def left(self, width: Optional[float] = None, height: str = "full", include_element: bool = False,
210
- until: Optional[str] = None, include_endpoint: bool = True, **kwargs) -> 'Region':
243
+ def left(
244
+ self,
245
+ width: Optional[float] = None,
246
+ height: str = "full",
247
+ include_element: bool = False,
248
+ until: Optional[str] = None,
249
+ include_endpoint: bool = True,
250
+ **kwargs,
251
+ ) -> "Region":
211
252
  """
212
253
  Select region to the left of this element/region.
213
-
254
+
214
255
  Args:
215
256
  width: Width of the region to the left, in points
216
257
  height: Height mode - "full" for full page height or "element" for element height
@@ -218,25 +259,32 @@ class DirectionalMixin:
218
259
  until: Optional selector string to specify a left boundary element
219
260
  include_endpoint: Whether to include the boundary element in the region (default: True)
220
261
  **kwargs: Additional parameters
221
-
262
+
222
263
  Returns:
223
264
  Region object representing the area to the left
224
265
  """
225
266
  return self._direction(
226
- direction='left',
267
+ direction="left",
227
268
  size=width,
228
269
  cross_size=height,
229
270
  include_element=include_element,
230
271
  until=until,
231
272
  include_endpoint=include_endpoint,
232
- **kwargs
273
+ **kwargs,
233
274
  )
234
275
 
235
- def right(self, width: Optional[float] = None, height: str = "full", include_element: bool = False,
236
- until: Optional[str] = None, include_endpoint: bool = True, **kwargs) -> 'Region':
276
+ def right(
277
+ self,
278
+ width: Optional[float] = None,
279
+ height: str = "full",
280
+ include_element: bool = False,
281
+ until: Optional[str] = None,
282
+ include_endpoint: bool = True,
283
+ **kwargs,
284
+ ) -> "Region":
237
285
  """
238
286
  Select region to the right of this element/region.
239
-
287
+
240
288
  Args:
241
289
  width: Width of the region to the right, in points
242
290
  height: Height mode - "full" for full page height or "element" for element height
@@ -244,33 +292,35 @@ class DirectionalMixin:
244
292
  until: Optional selector string to specify a right boundary element
245
293
  include_endpoint: Whether to include the boundary element in the region (default: True)
246
294
  **kwargs: Additional parameters
247
-
295
+
248
296
  Returns:
249
297
  Region object representing the area to the right
250
298
  """
251
299
  return self._direction(
252
- direction='right',
300
+ direction="right",
253
301
  size=width,
254
302
  cross_size=height,
255
303
  include_element=include_element,
256
304
  until=until,
257
305
  include_endpoint=include_endpoint,
258
- **kwargs
306
+ **kwargs,
259
307
  )
260
308
 
261
- def expand(self,
262
- left: float = 0,
263
- right: float = 0,
264
- top_expand: float = 0, # Renamed to avoid conflict
265
- bottom_expand: float = 0, # Renamed to avoid conflict
266
- width_factor: float = 1.0,
267
- height_factor: float = 1.0,
268
- # Keep original parameter names for backward compatibility
269
- top: float = None,
270
- bottom: float = None) -> 'Region':
309
+ def expand(
310
+ self,
311
+ left: float = 0,
312
+ right: float = 0,
313
+ top_expand: float = 0, # Renamed to avoid conflict
314
+ bottom_expand: float = 0, # Renamed to avoid conflict
315
+ width_factor: float = 1.0,
316
+ height_factor: float = 1.0,
317
+ # Keep original parameter names for backward compatibility
318
+ top: float = None,
319
+ bottom: float = None,
320
+ ) -> "Region":
271
321
  """
272
322
  Create a new region expanded from this element/region.
273
-
323
+
274
324
  Args:
275
325
  left: Amount to expand left edge (positive value expands leftwards)
276
326
  right: Amount to expand right edge (positive value expands rightwards)
@@ -280,7 +330,7 @@ class DirectionalMixin:
280
330
  height_factor: Factor to multiply height by (applied after absolute expansion)
281
331
  top: (DEPRECATED, use top_expand) Amount to expand top edge (upward)
282
332
  bottom: (DEPRECATED, use bottom_expand) Amount to expand bottom edge (downward)
283
-
333
+
284
334
  Returns:
285
335
  New expanded Region object
286
336
  """
@@ -289,39 +339,39 @@ class DirectionalMixin:
289
339
  new_x1 = self.x1
290
340
  new_top = self.top
291
341
  new_bottom = self.bottom
292
-
342
+
293
343
  # Handle the deprecated parameter names for backward compatibility
294
344
  if top is not None:
295
345
  top_expand = top
296
346
  if bottom is not None:
297
347
  bottom_expand = bottom
298
-
348
+
299
349
  # Apply absolute expansions first
300
350
  new_x0 -= left
301
351
  new_x1 += right
302
352
  new_top -= top_expand # Expand upward (decrease top coordinate)
303
353
  new_bottom += bottom_expand # Expand downward (increase bottom coordinate)
304
-
354
+
305
355
  # Apply percentage factors if provided
306
356
  if width_factor != 1.0 or height_factor != 1.0:
307
357
  # Calculate center point *after* absolute expansion
308
358
  center_x = (new_x0 + new_x1) / 2
309
359
  center_y = (new_top + new_bottom) / 2
310
-
360
+
311
361
  # Calculate current width and height *after* absolute expansion
312
362
  current_width = new_x1 - new_x0
313
363
  current_height = new_bottom - new_top
314
-
364
+
315
365
  # Calculate new width and height
316
366
  new_width = current_width * width_factor
317
367
  new_height = current_height * height_factor
318
-
368
+
319
369
  # Adjust coordinates based on the new dimensions, keeping the center
320
370
  new_x0 = center_x - new_width / 2
321
371
  new_x1 = center_x + new_width / 2
322
372
  new_top = center_y - new_height / 2
323
373
  new_bottom = center_y + new_height / 2
324
-
374
+
325
375
  # Clamp coordinates to page boundaries
326
376
  new_x0 = max(0, new_x0)
327
377
  new_top = max(0, new_top)
@@ -329,124 +379,129 @@ class DirectionalMixin:
329
379
  new_bottom = min(self.page.height, new_bottom)
330
380
 
331
381
  # Ensure coordinates are valid (x0 <= x1, top <= bottom)
332
- if new_x0 > new_x1: new_x0 = new_x1 = (new_x0 + new_x1) / 2
333
- if new_top > new_bottom: new_top = new_bottom = (new_top + new_bottom) / 2
382
+ if new_x0 > new_x1:
383
+ new_x0 = new_x1 = (new_x0 + new_x1) / 2
384
+ if new_top > new_bottom:
385
+ new_top = new_bottom = (new_top + new_bottom) / 2
334
386
 
335
387
  # Create new region with expanded bbox
336
388
  from natural_pdf.elements.region import Region
389
+
337
390
  new_region = Region(self.page, (new_x0, new_top, new_x1, new_bottom))
338
-
391
+
339
392
  return new_region
340
393
 
341
394
 
342
395
  class Element(DirectionalMixin):
343
396
  """
344
397
  Base class for all PDF elements.
345
-
398
+
346
399
  This class provides common properties and methods for all PDF elements,
347
400
  such as text, rectangles, lines, etc.
348
401
  """
349
-
350
- def __init__(self, obj: Dict[str, Any], page: 'Page'):
402
+
403
+ def __init__(self, obj: Dict[str, Any], page: "Page"):
351
404
  """
352
405
  Initialize base element.
353
-
406
+
354
407
  Args:
355
408
  obj: The underlying pdfplumber object
356
409
  page: The parent Page object
357
410
  """
358
411
  self._obj = obj
359
412
  self._page = page
360
-
413
+
361
414
  @property
362
415
  def type(self) -> str:
363
416
  """Element type."""
364
- return self._obj.get('object_type', 'unknown')
365
-
417
+ return self._obj.get("object_type", "unknown")
418
+
366
419
  @property
367
420
  def bbox(self) -> Tuple[float, float, float, float]:
368
421
  """Bounding box (x0, top, x1, bottom)."""
369
422
  return (self.x0, self.top, self.x1, self.bottom)
370
-
423
+
371
424
  @property
372
425
  def x0(self) -> float:
373
426
  """Left x-coordinate."""
374
427
  if self.has_polygon:
375
428
  return min(pt[0] for pt in self.polygon)
376
- return self._obj.get('x0', 0)
377
-
429
+ return self._obj.get("x0", 0)
430
+
378
431
  @property
379
432
  def top(self) -> float:
380
433
  """Top y-coordinate."""
381
434
  if self.has_polygon:
382
435
  return min(pt[1] for pt in self.polygon)
383
- return self._obj.get('top', 0)
384
-
436
+ return self._obj.get("top", 0)
437
+
385
438
  @property
386
439
  def x1(self) -> float:
387
440
  """Right x-coordinate."""
388
441
  if self.has_polygon:
389
442
  return max(pt[0] for pt in self.polygon)
390
- return self._obj.get('x1', 0)
391
-
443
+ return self._obj.get("x1", 0)
444
+
392
445
  @property
393
446
  def bottom(self) -> float:
394
447
  """Bottom y-coordinate."""
395
448
  if self.has_polygon:
396
449
  return max(pt[1] for pt in self.polygon)
397
- return self._obj.get('bottom', 0)
398
-
450
+ return self._obj.get("bottom", 0)
451
+
399
452
  @property
400
453
  def width(self) -> float:
401
454
  """Element width."""
402
455
  return self.x1 - self.x0
403
-
456
+
404
457
  @property
405
458
  def height(self) -> float:
406
459
  """Element height."""
407
460
  return self.bottom - self.top
408
-
461
+
409
462
  @property
410
463
  def has_polygon(self) -> bool:
411
464
  """Check if this element has polygon coordinates."""
412
- return ('polygon' in self._obj and self._obj['polygon'] and len(self._obj['polygon']) >= 3) or hasattr(self, '_polygon')
413
-
465
+ return (
466
+ "polygon" in self._obj and self._obj["polygon"] and len(self._obj["polygon"]) >= 3
467
+ ) or hasattr(self, "_polygon")
468
+
414
469
  @property
415
470
  def polygon(self) -> List[Tuple[float, float]]:
416
471
  """Get polygon coordinates if available, otherwise return rectangle corners."""
417
- if hasattr(self, '_polygon') and self._polygon:
472
+ if hasattr(self, "_polygon") and self._polygon:
418
473
  return self._polygon
419
- elif 'polygon' in self._obj and self._obj['polygon']:
420
- return self._obj['polygon']
474
+ elif "polygon" in self._obj and self._obj["polygon"]:
475
+ return self._obj["polygon"]
421
476
  else:
422
477
  # Create rectangle corners as fallback
423
478
  return [
424
- (self._obj.get('x0', 0), self._obj.get('top', 0)), # top-left
425
- (self._obj.get('x1', 0), self._obj.get('top', 0)), # top-right
426
- (self._obj.get('x1', 0), self._obj.get('bottom', 0)), # bottom-right
427
- (self._obj.get('x0', 0), self._obj.get('bottom', 0)) # bottom-left
479
+ (self._obj.get("x0", 0), self._obj.get("top", 0)), # top-left
480
+ (self._obj.get("x1", 0), self._obj.get("top", 0)), # top-right
481
+ (self._obj.get("x1", 0), self._obj.get("bottom", 0)), # bottom-right
482
+ (self._obj.get("x0", 0), self._obj.get("bottom", 0)), # bottom-left
428
483
  ]
429
-
484
+
430
485
  def is_point_inside(self, x: float, y: float) -> bool:
431
486
  """
432
487
  Check if a point is inside this element using ray casting algorithm for polygons.
433
-
488
+
434
489
  Args:
435
490
  x: X-coordinate to check
436
491
  y: Y-coordinate to check
437
-
492
+
438
493
  Returns:
439
494
  True if the point is inside the element
440
495
  """
441
496
  if not self.has_polygon:
442
497
  # Use simple rectangle check
443
498
  return (self.x0 <= x <= self.x1) and (self.top <= y <= self.bottom)
444
-
499
+
445
500
  # Ray casting algorithm for complex polygons
446
501
  poly = self.polygon
447
502
  n = len(poly)
448
503
  inside = False
449
-
504
+
450
505
  p1x, p1y = poly[0]
451
506
  for i in range(1, n + 1):
452
507
  p2x, p2y = poly[i % n]
@@ -456,30 +511,36 @@ class Element(DirectionalMixin):
456
511
  if p1x == p2x or x <= xinters:
457
512
  inside = not inside
458
513
  p1x, p1y = p2x, p2y
459
-
514
+
460
515
  return inside
461
-
516
+
462
517
  @property
463
- def page(self) -> 'Page':
518
+ def page(self) -> "Page":
464
519
  """Get the parent page."""
465
520
  return self._page
466
-
467
- def next(self, selector: Optional[str] = None, limit: int = 10, apply_exclusions: bool = True, **kwargs) -> Optional['Element']:
521
+
522
+ def next(
523
+ self,
524
+ selector: Optional[str] = None,
525
+ limit: int = 10,
526
+ apply_exclusions: bool = True,
527
+ **kwargs,
528
+ ) -> Optional["Element"]:
468
529
  """
469
530
  Find next element in reading order.
470
-
531
+
471
532
  Args:
472
533
  selector: Optional selector to filter by
473
534
  limit: Maximum number of elements to search through (default: 10)
474
535
  apply_exclusions: Whether to apply exclusion regions (default: True)
475
536
  **kwargs: Additional parameters
476
-
537
+
477
538
  Returns:
478
539
  Next element or None if not found
479
540
  """
480
541
  # Get all elements in reading order
481
- all_elements = self.page.find_all('*', apply_exclusions=apply_exclusions)
482
-
542
+ all_elements = self.page.find_all("*", apply_exclusions=apply_exclusions)
543
+
483
544
  # Find our index in the list
484
545
  try:
485
546
  # Compare by object identity since bbox could match multiple elements
@@ -487,40 +548,47 @@ class Element(DirectionalMixin):
487
548
  except StopIteration:
488
549
  # If not found, it might have been filtered out by exclusions
489
550
  return None
490
-
551
+
491
552
  # Search for next matching element
492
553
  if selector:
493
554
  # Filter elements after this one
494
- candidates = all_elements[idx+1:]
555
+ candidates = all_elements[idx + 1 :]
495
556
  # Limit search range for performance
496
557
  candidates = candidates[:limit] if limit else candidates
497
-
558
+
498
559
  # Find matching elements
499
560
  from natural_pdf.elements.collections import ElementCollection
561
+
500
562
  matches = ElementCollection(candidates).find_all(selector, **kwargs)
501
563
  return matches[0] if matches else None
502
564
  elif idx + 1 < len(all_elements):
503
565
  # No selector, just return the next element
504
566
  return all_elements[idx + 1]
505
-
567
+
506
568
  return None
507
-
508
- def prev(self, selector: Optional[str] = None, limit: int = 10, apply_exclusions: bool = True, **kwargs) -> Optional['Element']:
569
+
570
+ def prev(
571
+ self,
572
+ selector: Optional[str] = None,
573
+ limit: int = 10,
574
+ apply_exclusions: bool = True,
575
+ **kwargs,
576
+ ) -> Optional["Element"]:
509
577
  """
510
578
  Find previous element in reading order.
511
-
579
+
512
580
  Args:
513
581
  selector: Optional selector to filter by
514
582
  limit: Maximum number of elements to search through (default: 10)
515
583
  apply_exclusions: Whether to apply exclusion regions (default: True)
516
584
  **kwargs: Additional parameters
517
-
585
+
518
586
  Returns:
519
587
  Previous element or None if not found
520
588
  """
521
589
  # Get all elements in reading order
522
- all_elements = self.page.find_all('*', apply_exclusions=apply_exclusions)
523
-
590
+ all_elements = self.page.find_all("*", apply_exclusions=apply_exclusions)
591
+
524
592
  # Find our index in the list
525
593
  try:
526
594
  # Compare by object identity since bbox could match multiple elements
@@ -528,7 +596,7 @@ class Element(DirectionalMixin):
528
596
  except StopIteration:
529
597
  # If not found, it might have been filtered out by exclusions
530
598
  return None
531
-
599
+
532
600
  # Search for previous matching element
533
601
  if selector:
534
602
  # Select elements before this one
@@ -537,27 +605,34 @@ class Element(DirectionalMixin):
537
605
  candidates = candidates[::-1]
538
606
  # Limit search range for performance
539
607
  candidates = candidates[:limit] if limit else candidates
540
-
608
+
541
609
  # Find matching elements using ElementCollection
542
610
  from natural_pdf.elements.collections import ElementCollection
611
+
543
612
  matches = ElementCollection(candidates).find_all(selector, **kwargs)
544
- return matches[0] if matches else None # find_all returns a collection
613
+ return matches[0] if matches else None # find_all returns a collection
545
614
  elif idx > 0:
546
615
  # No selector, just return the previous element
547
616
  return all_elements[idx - 1]
548
-
617
+
549
618
  return None
550
-
551
- def nearest(self, selector: str, max_distance: Optional[float] = None, apply_exclusions: bool = True, **kwargs) -> Optional['Element']:
619
+
620
+ def nearest(
621
+ self,
622
+ selector: str,
623
+ max_distance: Optional[float] = None,
624
+ apply_exclusions: bool = True,
625
+ **kwargs,
626
+ ) -> Optional["Element"]:
552
627
  """
553
628
  Find nearest element matching selector.
554
-
629
+
555
630
  Args:
556
631
  selector: CSS-like selector string
557
632
  max_distance: Maximum distance to search (default: None = unlimited)
558
633
  apply_exclusions: Whether to apply exclusion regions (default: True)
559
634
  **kwargs: Additional parameters
560
-
635
+
561
636
  Returns:
562
637
  Nearest element or None if not found
563
638
  """
@@ -565,56 +640,59 @@ class Element(DirectionalMixin):
565
640
  matches = self.page.find_all(selector, apply_exclusions=apply_exclusions, **kwargs)
566
641
  if not matches:
567
642
  return None
568
-
643
+
569
644
  # Calculate distance to center point of this element
570
645
  self_center_x = (self.x0 + self.x1) / 2
571
646
  self_center_y = (self.top + self.bottom) / 2
572
-
647
+
573
648
  # Calculate distances to each match
574
649
  distances = []
575
650
  for match in matches:
576
651
  if match is self: # Skip self
577
652
  continue
578
-
653
+
579
654
  match_center_x = (match.x0 + match.x1) / 2
580
655
  match_center_y = (match.top + match.bottom) / 2
581
-
656
+
582
657
  # Euclidean distance
583
- distance = ((match_center_x - self_center_x) ** 2 +
584
- (match_center_y - self_center_y) ** 2) ** 0.5
585
-
658
+ distance = (
659
+ (match_center_x - self_center_x) ** 2 + (match_center_y - self_center_y) ** 2
660
+ ) ** 0.5
661
+
586
662
  # Filter by max_distance if specified
587
663
  if max_distance is None or distance <= max_distance:
588
664
  distances.append((match, distance))
589
-
665
+
590
666
  # Sort by distance and return the closest
591
667
  if distances:
592
668
  distances.sort(key=lambda x: x[1])
593
669
  return distances[0][0]
594
-
670
+
595
671
  return None
596
-
597
- def until(self, selector: str, include_endpoint: bool = True, width: str = "element", **kwargs) -> 'Region':
672
+
673
+ def until(
674
+ self, selector: str, include_endpoint: bool = True, width: str = "element", **kwargs
675
+ ) -> "Region":
598
676
  """
599
677
  Select content from this element until matching selector.
600
-
678
+
601
679
  Args:
602
680
  selector: CSS-like selector string
603
681
  include_endpoint: Whether to include the endpoint element in the region (default: True)
604
682
  width: Width mode - "element" to use element widths or "full" for full page width
605
683
  **kwargs: Additional selection parameters
606
-
684
+
607
685
  Returns:
608
686
  Region object representing the selected content
609
687
  """
610
688
  from natural_pdf.elements.region import Region
611
-
689
+
612
690
  # Find the target element
613
691
  target = self.page.find(selector, **kwargs)
614
692
  if not target:
615
693
  # If target not found, return a region with just this element
616
694
  return Region(self.page, self.bbox)
617
-
695
+
618
696
  # Use full page width if requested
619
697
  if width == "full":
620
698
  x0 = 0
@@ -622,12 +700,16 @@ class Element(DirectionalMixin):
622
700
  # Determine vertical bounds based on element positions
623
701
  if target.top >= self.bottom: # Target is below this element
624
702
  top = self.top
625
- bottom = target.bottom if include_endpoint else target.top - 1 # Subtract 1 pixel when excluding
703
+ bottom = (
704
+ target.bottom if include_endpoint else target.top - 1
705
+ ) # Subtract 1 pixel when excluding
626
706
  else: # Target is above this element
627
- top = target.top if include_endpoint else target.bottom + 1 # Add 1 pixel when excluding
707
+ top = (
708
+ target.top if include_endpoint else target.bottom + 1
709
+ ) # Add 1 pixel when excluding
628
710
  bottom = self.bottom
629
711
  return Region(self.page, (x0, top, x1, bottom))
630
-
712
+
631
713
  # Otherwise use element-based width
632
714
  # Determine the correct order for creating the region
633
715
  # If the target is below this element (normal reading order)
@@ -635,12 +717,16 @@ class Element(DirectionalMixin):
635
717
  x0 = min(self.x0, target.x0 if include_endpoint else target.x1)
636
718
  x1 = max(self.x1, target.x1 if include_endpoint else target.x0)
637
719
  top = self.top
638
- bottom = target.bottom if include_endpoint else target.top - 1 # Subtract 1 pixel when excluding
720
+ bottom = (
721
+ target.bottom if include_endpoint else target.top - 1
722
+ ) # Subtract 1 pixel when excluding
639
723
  # If the target is above this element (reverse reading order)
640
724
  elif target.bottom <= self.top:
641
725
  x0 = min(self.x0, target.x0 if include_endpoint else target.x1)
642
726
  x1 = max(self.x1, target.x1 if include_endpoint else target.x0)
643
- top = target.top if include_endpoint else target.bottom + 1 # Add 1 pixel when excluding
727
+ top = (
728
+ target.top if include_endpoint else target.bottom + 1
729
+ ) # Add 1 pixel when excluding
644
730
  bottom = self.bottom
645
731
  # If they're side by side, use the horizontal version
646
732
  elif target.x0 >= self.x1: # Target is to the right
@@ -653,47 +739,49 @@ class Element(DirectionalMixin):
653
739
  x1 = self.x1
654
740
  top = min(self.top, target.top if include_endpoint else target.bottom)
655
741
  bottom = max(self.bottom, target.bottom if include_endpoint else target.top)
656
-
742
+
657
743
  region = Region(self.page, (x0, top, x1, bottom))
658
744
  region.source_element = self
659
745
  region.end_element = target
660
746
  return region
661
-
747
+
662
748
  # Note: select_until method removed in favor of until()
663
-
749
+
664
750
  def extract_text(self, preserve_whitespace=True, use_exclusions=True, **kwargs) -> str:
665
751
  """
666
752
  Extract text from this element.
667
-
753
+
668
754
  Args:
669
755
  preserve_whitespace: Whether to keep blank characters (default: True)
670
756
  use_exclusions: Whether to apply exclusion regions (default: True)
671
757
  **kwargs: Additional extraction parameters
672
-
758
+
673
759
  Returns:
674
760
  Extracted text as string
675
761
  """
676
762
  # Default implementation - override in subclasses
677
763
  return ""
678
-
764
+
679
765
  # Note: extract_text_compat method removed
680
-
681
- def highlight(self,
682
- label: Optional[str] = None,
683
- color: Optional[Union[Tuple, str]] = None, # Allow string color
684
- use_color_cycling: bool = False,
685
- include_attrs: Optional[List[str]] = None,
686
- existing: str = 'append') -> 'Element':
766
+
767
+ def highlight(
768
+ self,
769
+ label: Optional[str] = None,
770
+ color: Optional[Union[Tuple, str]] = None, # Allow string color
771
+ use_color_cycling: bool = False,
772
+ include_attrs: Optional[List[str]] = None,
773
+ existing: str = "append",
774
+ ) -> "Element":
687
775
  """
688
776
  Highlight this element on the page.
689
-
777
+
690
778
  Args:
691
779
  label: Optional label for the highlight
692
780
  color: Color tuple/string for the highlight, or None to use automatic color
693
781
  use_color_cycling: Force color cycling even with no label (default: False)
694
782
  include_attrs: List of attribute names to display on the highlight (e.g., ['confidence', 'type'])
695
783
  existing: How to handle existing highlights - 'append' (default) or 'replace'
696
-
784
+
697
785
  Returns:
698
786
  Self for method chaining
699
787
  """
@@ -708,7 +796,7 @@ class Element(DirectionalMixin):
708
796
  "use_color_cycling": use_color_cycling,
709
797
  "element": self, # Pass the element itself so attributes can be accessed
710
798
  "include_attrs": include_attrs,
711
- "existing": existing
799
+ "existing": existing,
712
800
  }
713
801
 
714
802
  # Call the appropriate service method based on geometry
@@ -720,13 +808,15 @@ class Element(DirectionalMixin):
720
808
  highlighter.add(**highlight_args)
721
809
 
722
810
  return self
723
-
724
- def show(self,
725
- scale: float = 2.0,
726
- labels: bool = True,
727
- legend_position: str = 'right',
728
- color: Optional[Union[Tuple, str]] = "red", # Default color for single element
729
- label: Optional[str] = None) -> Optional['Image.Image']:
811
+
812
+ def show(
813
+ self,
814
+ scale: float = 2.0,
815
+ labels: bool = True,
816
+ legend_position: str = "right",
817
+ color: Optional[Union[Tuple, str]] = "red", # Default color for single element
818
+ label: Optional[str] = None,
819
+ ) -> Optional["Image.Image"]:
730
820
  """
731
821
  Show the page with only this element highlighted temporarily.
732
822
 
@@ -740,12 +830,12 @@ class Element(DirectionalMixin):
740
830
  Returns:
741
831
  PIL Image of the page with only this element highlighted, or None if error.
742
832
  """
743
- if not hasattr(self, 'page') or not self.page:
833
+ if not hasattr(self, "page") or not self.page:
744
834
  logger.warning(f"Cannot show element, missing 'page' attribute: {self}")
745
835
  return None
746
- if not hasattr(self.page, '_highlighter') or not self.page._highlighter:
747
- logger.warning(f"Cannot show element, page lacks highlighter service: {self}")
748
- return None
836
+ if not hasattr(self.page, "_highlighter") or not self.page._highlighter:
837
+ logger.warning(f"Cannot show element, page lacks highlighter service: {self}")
838
+ return None
749
839
 
750
840
  service = self.page._highlighter
751
841
 
@@ -757,15 +847,15 @@ class Element(DirectionalMixin):
757
847
  "page_index": self.page.index,
758
848
  "bbox": self.bbox if not self.has_polygon else None,
759
849
  "polygon": self.polygon if self.has_polygon else None,
760
- "color": color, # Use provided or default color
850
+ "color": color, # Use provided or default color
761
851
  "label": display_label,
762
- "use_color_cycling": False # Explicitly false for single preview
852
+ "use_color_cycling": False, # Explicitly false for single preview
763
853
  }
764
854
 
765
855
  # Check if we actually got geometry data
766
- if temp_highlight_data['bbox'] is None and temp_highlight_data['polygon'] is None:
767
- logger.warning(f"Cannot show element, failed to get bbox or polygon: {self}")
768
- return None
856
+ if temp_highlight_data["bbox"] is None and temp_highlight_data["polygon"] is None:
857
+ logger.warning(f"Cannot show element, failed to get bbox or polygon: {self}")
858
+ return None
769
859
 
770
860
  # Use render_preview to show only this highlight
771
861
  try:
@@ -774,49 +864,47 @@ class Element(DirectionalMixin):
774
864
  temporary_highlights=[temp_highlight_data],
775
865
  scale=scale,
776
866
  labels=labels,
777
- legend_position=legend_position
867
+ legend_position=legend_position,
778
868
  )
779
869
  except Exception as e:
780
870
  logger.error(f"Error calling render_preview for element {self}: {e}", exc_info=True)
781
871
  return None
782
-
783
- def save(self,
784
- filename: str,
785
- scale: float = 2.0,
786
- labels: bool = True,
787
- legend_position: str = 'right') -> None:
872
+
873
+ def save(
874
+ self, filename: str, scale: float = 2.0, labels: bool = True, legend_position: str = "right"
875
+ ) -> None:
788
876
  """
789
877
  Save the page with this element highlighted to an image file.
790
-
878
+
791
879
  Args:
792
880
  filename: Path to save the image to
793
881
  scale: Scale factor for rendering
794
882
  labels: Whether to include a legend for labels
795
883
  legend_position: Position of the legend
796
-
884
+
797
885
  Returns:
798
886
  Self for method chaining
799
887
  """
800
888
  # Save the highlighted image
801
889
  self.page.save_image(filename, scale=scale, labels=labels, legend_position=legend_position)
802
890
  return self
803
-
891
+
804
892
  # Note: save_image method removed in favor of save()
805
-
893
+
806
894
  def __repr__(self) -> str:
807
895
  """String representation of the element."""
808
896
  return f"<{self.__class__.__name__} bbox={self.bbox}>"
809
897
 
810
- def find(self, selector: str, apply_exclusions=True, **kwargs) -> Optional['Element']:
898
+ def find(self, selector: str, apply_exclusions=True, **kwargs) -> Optional["Element"]:
811
899
  """
812
900
  Find first element within this element's bounds matching the selector.
813
901
  Creates a temporary region to perform the search.
814
-
902
+
815
903
  Args:
816
904
  selector: CSS-like selector string
817
905
  apply_exclusions: Whether to apply exclusion regions
818
906
  **kwargs: Additional parameters for element filtering
819
-
907
+
820
908
  Returns:
821
909
  First matching element or None
822
910
  """
@@ -826,16 +914,16 @@ class Element(DirectionalMixin):
826
914
  temp_region = Region(self.page, self.bbox)
827
915
  return temp_region.find(selector, apply_exclusions=apply_exclusions, **kwargs)
828
916
 
829
- def find_all(self, selector: str, apply_exclusions=True, **kwargs) -> 'ElementCollection':
917
+ def find_all(self, selector: str, apply_exclusions=True, **kwargs) -> "ElementCollection":
830
918
  """
831
919
  Find all elements within this element's bounds matching the selector.
832
920
  Creates a temporary region to perform the search.
833
-
921
+
834
922
  Args:
835
923
  selector: CSS-like selector string
836
924
  apply_exclusions: Whether to apply exclusion regions
837
925
  **kwargs: Additional parameters for element filtering
838
-
926
+
839
927
  Returns:
840
928
  ElementCollection with matching elements
841
929
  """
@@ -843,4 +931,4 @@ class Element(DirectionalMixin):
843
931
 
844
932
  # Create a temporary region from this element's bounds
845
933
  temp_region = Region(self.page, self.bbox)
846
- return temp_region.find_all(selector, apply_exclusions=apply_exclusions, **kwargs)
934
+ return temp_region.find_all(selector, apply_exclusions=apply_exclusions, **kwargs)