natural-pdf 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. docs/api/index.md +386 -0
  2. docs/assets/favicon.png +3 -0
  3. docs/assets/favicon.svg +3 -0
  4. docs/assets/javascripts/custom.js +17 -0
  5. docs/assets/logo.svg +3 -0
  6. docs/assets/sample-screen.png +0 -0
  7. docs/assets/social-preview.png +17 -0
  8. docs/assets/social-preview.svg +17 -0
  9. docs/assets/stylesheets/custom.css +65 -0
  10. docs/document-qa/index.ipynb +435 -0
  11. docs/document-qa/index.md +79 -0
  12. docs/element-selection/index.ipynb +915 -0
  13. docs/element-selection/index.md +229 -0
  14. docs/index.md +170 -0
  15. docs/installation/index.md +69 -0
  16. docs/interactive-widget/index.ipynb +962 -0
  17. docs/interactive-widget/index.md +12 -0
  18. docs/layout-analysis/index.ipynb +818 -0
  19. docs/layout-analysis/index.md +185 -0
  20. docs/ocr/index.md +209 -0
  21. docs/pdf-navigation/index.ipynb +314 -0
  22. docs/pdf-navigation/index.md +97 -0
  23. docs/regions/index.ipynb +816 -0
  24. docs/regions/index.md +294 -0
  25. docs/tables/index.ipynb +658 -0
  26. docs/tables/index.md +144 -0
  27. docs/text-analysis/index.ipynb +370 -0
  28. docs/text-analysis/index.md +105 -0
  29. docs/text-extraction/index.ipynb +1478 -0
  30. docs/text-extraction/index.md +292 -0
  31. docs/tutorials/01-loading-and-extraction.ipynb +1710 -0
  32. docs/tutorials/01-loading-and-extraction.md +95 -0
  33. docs/tutorials/02-finding-elements.ipynb +340 -0
  34. docs/tutorials/02-finding-elements.md +149 -0
  35. docs/tutorials/03-extracting-blocks.ipynb +147 -0
  36. docs/tutorials/03-extracting-blocks.md +48 -0
  37. docs/tutorials/04-table-extraction.ipynb +114 -0
  38. docs/tutorials/04-table-extraction.md +50 -0
  39. docs/tutorials/05-excluding-content.ipynb +270 -0
  40. docs/tutorials/05-excluding-content.md +109 -0
  41. docs/tutorials/06-document-qa.ipynb +332 -0
  42. docs/tutorials/06-document-qa.md +91 -0
  43. docs/tutorials/07-layout-analysis.ipynb +288 -0
  44. docs/tutorials/07-layout-analysis.md +66 -0
  45. docs/tutorials/07-working-with-regions.ipynb +413 -0
  46. docs/tutorials/07-working-with-regions.md +151 -0
  47. docs/tutorials/08-spatial-navigation.ipynb +508 -0
  48. docs/tutorials/08-spatial-navigation.md +190 -0
  49. docs/tutorials/09-section-extraction.ipynb +2434 -0
  50. docs/tutorials/09-section-extraction.md +256 -0
  51. docs/tutorials/10-form-field-extraction.ipynb +512 -0
  52. docs/tutorials/10-form-field-extraction.md +201 -0
  53. docs/tutorials/11-enhanced-table-processing.ipynb +54 -0
  54. docs/tutorials/11-enhanced-table-processing.md +9 -0
  55. docs/tutorials/12-ocr-integration.ipynb +604 -0
  56. docs/tutorials/12-ocr-integration.md +175 -0
  57. docs/tutorials/13-semantic-search.ipynb +1328 -0
  58. docs/tutorials/13-semantic-search.md +77 -0
  59. docs/visual-debugging/index.ipynb +2970 -0
  60. docs/visual-debugging/index.md +157 -0
  61. docs/visual-debugging/region.png +0 -0
  62. natural_pdf/__init__.py +50 -33
  63. natural_pdf/analyzers/__init__.py +2 -1
  64. natural_pdf/analyzers/layout/base.py +32 -24
  65. natural_pdf/analyzers/layout/docling.py +131 -72
  66. natural_pdf/analyzers/layout/gemini.py +264 -0
  67. natural_pdf/analyzers/layout/layout_analyzer.py +156 -113
  68. natural_pdf/analyzers/layout/layout_manager.py +125 -58
  69. natural_pdf/analyzers/layout/layout_options.py +43 -17
  70. natural_pdf/analyzers/layout/paddle.py +152 -95
  71. natural_pdf/analyzers/layout/surya.py +164 -92
  72. natural_pdf/analyzers/layout/tatr.py +149 -84
  73. natural_pdf/analyzers/layout/yolo.py +89 -45
  74. natural_pdf/analyzers/text_options.py +22 -15
  75. natural_pdf/analyzers/text_structure.py +131 -85
  76. natural_pdf/analyzers/utils.py +30 -23
  77. natural_pdf/collections/pdf_collection.py +146 -97
  78. natural_pdf/core/__init__.py +1 -1
  79. natural_pdf/core/element_manager.py +419 -337
  80. natural_pdf/core/highlighting_service.py +268 -196
  81. natural_pdf/core/page.py +1044 -521
  82. natural_pdf/core/pdf.py +516 -313
  83. natural_pdf/elements/__init__.py +1 -1
  84. natural_pdf/elements/base.py +307 -225
  85. natural_pdf/elements/collections.py +805 -543
  86. natural_pdf/elements/line.py +39 -36
  87. natural_pdf/elements/rect.py +32 -30
  88. natural_pdf/elements/region.py +889 -879
  89. natural_pdf/elements/text.py +127 -99
  90. natural_pdf/exporters/__init__.py +0 -1
  91. natural_pdf/exporters/searchable_pdf.py +261 -102
  92. natural_pdf/ocr/__init__.py +57 -35
  93. natural_pdf/ocr/engine.py +150 -46
  94. natural_pdf/ocr/engine_easyocr.py +146 -150
  95. natural_pdf/ocr/engine_paddle.py +118 -175
  96. natural_pdf/ocr/engine_surya.py +78 -141
  97. natural_pdf/ocr/ocr_factory.py +114 -0
  98. natural_pdf/ocr/ocr_manager.py +122 -124
  99. natural_pdf/ocr/ocr_options.py +16 -20
  100. natural_pdf/ocr/utils.py +98 -0
  101. natural_pdf/qa/__init__.py +1 -1
  102. natural_pdf/qa/document_qa.py +119 -111
  103. natural_pdf/search/__init__.py +37 -31
  104. natural_pdf/search/haystack_search_service.py +312 -189
  105. natural_pdf/search/haystack_utils.py +186 -122
  106. natural_pdf/search/search_options.py +25 -14
  107. natural_pdf/search/search_service_protocol.py +12 -6
  108. natural_pdf/search/searchable_mixin.py +261 -176
  109. natural_pdf/selectors/__init__.py +2 -1
  110. natural_pdf/selectors/parser.py +159 -316
  111. natural_pdf/templates/__init__.py +1 -1
  112. natural_pdf/templates/spa/css/style.css +334 -0
  113. natural_pdf/templates/spa/index.html +31 -0
  114. natural_pdf/templates/spa/js/app.js +472 -0
  115. natural_pdf/templates/spa/words.txt +235976 -0
  116. natural_pdf/utils/debug.py +32 -0
  117. natural_pdf/utils/highlighting.py +8 -2
  118. natural_pdf/utils/identifiers.py +29 -0
  119. natural_pdf/utils/packaging.py +418 -0
  120. natural_pdf/utils/reading_order.py +65 -63
  121. natural_pdf/utils/text_extraction.py +195 -0
  122. natural_pdf/utils/visualization.py +70 -61
  123. natural_pdf/widgets/__init__.py +2 -3
  124. natural_pdf/widgets/viewer.py +749 -718
  125. {natural_pdf-0.1.4.dist-info → natural_pdf-0.1.6.dist-info}/METADATA +53 -17
  126. natural_pdf-0.1.6.dist-info/RECORD +141 -0
  127. {natural_pdf-0.1.4.dist-info → natural_pdf-0.1.6.dist-info}/WHEEL +1 -1
  128. natural_pdf-0.1.6.dist-info/top_level.txt +4 -0
  129. notebooks/Examples.ipynb +1293 -0
  130. pdfs/.gitkeep +0 -0
  131. pdfs/01-practice.pdf +543 -0
  132. pdfs/0500000US42001.pdf +0 -0
  133. pdfs/0500000US42007.pdf +0 -0
  134. pdfs/2014 Statistics.pdf +0 -0
  135. pdfs/2019 Statistics.pdf +0 -0
  136. pdfs/Atlanta_Public_Schools_GA_sample.pdf +0 -0
  137. pdfs/needs-ocr.pdf +0 -0
  138. natural_pdf/templates/ocr_debug.html +0 -517
  139. natural_pdf-0.1.4.dist-info/RECORD +0 -61
  140. natural_pdf-0.1.4.dist-info/top_level.txt +0 -1
  141. {natural_pdf-0.1.4.dist-info → natural_pdf-0.1.6.dist-info}/licenses/LICENSE +0 -0
@@ -1,24 +1,33 @@
1
1
  """
2
2
  Base Element class for natural-pdf.
3
3
  """
4
- from typing import Any, Dict, List, Optional, TYPE_CHECKING, Union, Tuple
4
+
5
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
6
+
5
7
  from PIL import Image
6
8
 
7
9
  if TYPE_CHECKING:
8
10
  from natural_pdf.core.page import Page
9
- from natural_pdf.elements.region import Region
10
11
  from natural_pdf.elements.base import Element
11
12
  from natural_pdf.elements.collections import ElementCollection
13
+ from natural_pdf.elements.region import Region
12
14
 
13
15
 
14
16
  class DirectionalMixin:
15
17
  """
16
18
  Mixin class providing directional methods for both Element and Region classes.
17
19
  """
18
-
19
- def _direction(self, direction: str, size: Optional[float] = None,
20
- cross_size: str = "full", include_element: bool = False,
21
- until: Optional[str] = None, include_endpoint: bool = True, **kwargs) -> 'Region':
20
+
21
+ def _direction(
22
+ self,
23
+ direction: str,
24
+ size: Optional[float] = None,
25
+ cross_size: str = "full",
26
+ include_element: bool = False,
27
+ until: Optional[str] = None,
28
+ include_endpoint: bool = True,
29
+ **kwargs,
30
+ ) -> "Region":
22
31
  """
23
32
  Protected helper method to create a region in a specified direction relative to this element/region.
24
33
 
@@ -34,11 +43,11 @@ class DirectionalMixin:
34
43
  Returns:
35
44
  Region object
36
45
  """
37
- import math # Use math.inf for infinity
46
+ import math # Use math.inf for infinity
38
47
 
39
- is_horizontal = direction in ('left', 'right')
40
- is_positive = direction in ('right', 'below') # right/below are positive directions
41
- pixel_offset = 1 # Offset for excluding elements/endpoints
48
+ is_horizontal = direction in ("left", "right")
49
+ is_positive = direction in ("right", "below") # right/below are positive directions
50
+ pixel_offset = 1 # Offset for excluding elements/endpoints
42
51
 
43
52
  # 1. Determine initial boundaries based on direction and include_element
44
53
  if is_horizontal:
@@ -47,38 +56,44 @@ class DirectionalMixin:
47
56
  y1 = self.page.height if cross_size == "full" else self.bottom
48
57
 
49
58
  # Initial primary boundaries (horizontal)
50
- if is_positive: # right
59
+ if is_positive: # right
51
60
  x0_initial = self.x0 if include_element else self.x1 + pixel_offset
52
- x1_initial = self.x1 # This edge moves
53
- else: # left
54
- x0_initial = self.x0 # This edge moves
61
+ x1_initial = self.x1 # This edge moves
62
+ else: # left
63
+ x0_initial = self.x0 # This edge moves
55
64
  x1_initial = self.x1 if include_element else self.x0 - pixel_offset
56
- else: # Vertical
65
+ else: # Vertical
57
66
  # Initial cross-boundaries (horizontal)
58
67
  x0 = 0 if cross_size == "full" else self.x0
59
68
  x1 = self.page.width if cross_size == "full" else self.x1
60
69
 
61
70
  # Initial primary boundaries (vertical)
62
- if is_positive: # below
71
+ if is_positive: # below
63
72
  y0_initial = self.top if include_element else self.bottom + pixel_offset
64
- y1_initial = self.bottom # This edge moves
65
- else: # above
66
- y0_initial = self.top # This edge moves
73
+ y1_initial = self.bottom # This edge moves
74
+ else: # above
75
+ y0_initial = self.top # This edge moves
67
76
  y1_initial = self.bottom if include_element else self.top - pixel_offset
68
77
 
69
78
  # 2. Calculate the final primary boundary, considering 'size' or page limits
70
79
  if is_horizontal:
71
- if is_positive: # right
72
- x1_final = min(self.page.width, x1_initial + (size if size is not None else (self.page.width - x1_initial)))
80
+ if is_positive: # right
81
+ x1_final = min(
82
+ self.page.width,
83
+ x1_initial + (size if size is not None else (self.page.width - x1_initial)),
84
+ )
73
85
  x0_final = x0_initial
74
- else: # left
86
+ else: # left
75
87
  x0_final = max(0, x0_initial - (size if size is not None else x0_initial))
76
88
  x1_final = x1_initial
77
- else: # Vertical
78
- if is_positive: # below
79
- y1_final = min(self.page.height, y1_initial + (size if size is not None else (self.page.height - y1_initial)))
89
+ else: # Vertical
90
+ if is_positive: # below
91
+ y1_final = min(
92
+ self.page.height,
93
+ y1_initial + (size if size is not None else (self.page.height - y1_initial)),
94
+ )
80
95
  y0_final = y0_initial
81
- else: # above
96
+ else: # above
82
97
  y0_final = max(0, y0_initial - (size if size is not None else y0_initial))
83
98
  y1_final = y1_initial
84
99
 
@@ -89,16 +104,16 @@ class DirectionalMixin:
89
104
  matches_in_direction = []
90
105
 
91
106
  # Filter and sort matches based on direction
92
- if direction == 'above':
107
+ if direction == "above":
93
108
  matches_in_direction = [m for m in all_matches if m.bottom <= self.top]
94
109
  matches_in_direction.sort(key=lambda e: e.bottom, reverse=True)
95
- elif direction == 'below':
110
+ elif direction == "below":
96
111
  matches_in_direction = [m for m in all_matches if m.top >= self.bottom]
97
112
  matches_in_direction.sort(key=lambda e: e.top)
98
- elif direction == 'left':
113
+ elif direction == "left":
99
114
  matches_in_direction = [m for m in all_matches if m.x1 <= self.x0]
100
115
  matches_in_direction.sort(key=lambda e: e.x1, reverse=True)
101
- elif direction == 'right':
116
+ elif direction == "right":
102
117
  matches_in_direction = [m for m in all_matches if m.x0 >= self.x1]
103
118
  matches_in_direction.sort(key=lambda e: e.x0)
104
119
 
@@ -107,25 +122,29 @@ class DirectionalMixin:
107
122
 
108
123
  # Adjust the primary boundary based on the target
109
124
  if is_horizontal:
110
- if is_positive: # right
125
+ if is_positive: # right
111
126
  x1_final = target.x1 if include_endpoint else target.x0 - pixel_offset
112
- else: # left
127
+ else: # left
113
128
  x0_final = target.x0 if include_endpoint else target.x1 + pixel_offset
114
- else: # Vertical
115
- if is_positive: # below
129
+ else: # Vertical
130
+ if is_positive: # below
116
131
  y1_final = target.bottom if include_endpoint else target.top - pixel_offset
117
- else: # above
132
+ else: # above
118
133
  y0_final = target.top if include_endpoint else target.bottom + pixel_offset
119
134
 
120
135
  # Adjust cross boundaries if cross_size is 'element'
121
136
  if cross_size == "element":
122
- if is_horizontal: # Adjust y0, y1
123
- target_y0 = target.top if include_endpoint else target.bottom # Use opposite boundary if excluding
137
+ if is_horizontal: # Adjust y0, y1
138
+ target_y0 = (
139
+ target.top if include_endpoint else target.bottom
140
+ ) # Use opposite boundary if excluding
124
141
  target_y1 = target.bottom if include_endpoint else target.top
125
142
  y0 = min(y0, target_y0)
126
143
  y1 = max(y1, target_y1)
127
- else: # Adjust x0, x1
128
- target_x0 = target.x0 if include_endpoint else target.x1 # Use opposite boundary if excluding
144
+ else: # Adjust x0, x1
145
+ target_x0 = (
146
+ target.x0 if include_endpoint else target.x1
147
+ ) # Use opposite boundary if excluding
129
148
  target_x1 = target.x1 if include_endpoint else target.x0
130
149
  x0 = min(x0, target_x0)
131
150
  x1 = max(x1, target_x1)
@@ -145,6 +164,7 @@ class DirectionalMixin:
145
164
 
146
165
  # 5. Create and return appropriate object based on self type
147
166
  from natural_pdf.elements.region import Region
167
+
148
168
  result = Region(self.page, final_bbox)
149
169
  result.source_element = self
150
170
  result.includes_source = include_element
@@ -154,11 +174,18 @@ class DirectionalMixin:
154
174
 
155
175
  return result
156
176
 
157
- def above(self, height: Optional[float] = None, width: str = "full", include_element: bool = False,
158
- until: Optional[str] = None, include_endpoint: bool = True, **kwargs) -> 'Region':
177
+ def above(
178
+ self,
179
+ height: Optional[float] = None,
180
+ width: str = "full",
181
+ include_element: bool = False,
182
+ until: Optional[str] = None,
183
+ include_endpoint: bool = True,
184
+ **kwargs,
185
+ ) -> "Region":
159
186
  """
160
187
  Select region above this element/region.
161
-
188
+
162
189
  Args:
163
190
  height: Height of the region above, in points
164
191
  width: Width mode - "full" for full page width or "element" for element width
@@ -166,25 +193,32 @@ class DirectionalMixin:
166
193
  until: Optional selector string to specify an upper boundary element
167
194
  include_endpoint: Whether to include the boundary element in the region (default: True)
168
195
  **kwargs: Additional parameters
169
-
196
+
170
197
  Returns:
171
198
  Region object representing the area above
172
199
  """
173
200
  return self._direction(
174
- direction='above',
201
+ direction="above",
175
202
  size=height,
176
203
  cross_size=width,
177
204
  include_element=include_element,
178
205
  until=until,
179
206
  include_endpoint=include_endpoint,
180
- **kwargs
207
+ **kwargs,
181
208
  )
182
209
 
183
- def below(self, height: Optional[float] = None, width: str = "full", include_element: bool = False,
184
- until: Optional[str] = None, include_endpoint: bool = True, **kwargs) -> 'Region':
210
+ def below(
211
+ self,
212
+ height: Optional[float] = None,
213
+ width: str = "full",
214
+ include_element: bool = False,
215
+ until: Optional[str] = None,
216
+ include_endpoint: bool = True,
217
+ **kwargs,
218
+ ) -> "Region":
185
219
  """
186
220
  Select region below this element/region.
187
-
221
+
188
222
  Args:
189
223
  height: Height of the region below, in points
190
224
  width: Width mode - "full" for full page width or "element" for element width
@@ -192,25 +226,32 @@ class DirectionalMixin:
192
226
  until: Optional selector string to specify a lower boundary element
193
227
  include_endpoint: Whether to include the boundary element in the region (default: True)
194
228
  **kwargs: Additional parameters
195
-
229
+
196
230
  Returns:
197
231
  Region object representing the area below
198
232
  """
199
233
  return self._direction(
200
- direction='below',
234
+ direction="below",
201
235
  size=height,
202
236
  cross_size=width,
203
237
  include_element=include_element,
204
238
  until=until,
205
239
  include_endpoint=include_endpoint,
206
- **kwargs
240
+ **kwargs,
207
241
  )
208
242
 
209
- def left(self, width: Optional[float] = None, height: str = "full", include_element: bool = False,
210
- until: Optional[str] = None, include_endpoint: bool = True, **kwargs) -> 'Region':
243
+ def left(
244
+ self,
245
+ width: Optional[float] = None,
246
+ height: str = "full",
247
+ include_element: bool = False,
248
+ until: Optional[str] = None,
249
+ include_endpoint: bool = True,
250
+ **kwargs,
251
+ ) -> "Region":
211
252
  """
212
253
  Select region to the left of this element/region.
213
-
254
+
214
255
  Args:
215
256
  width: Width of the region to the left, in points
216
257
  height: Height mode - "full" for full page height or "element" for element height
@@ -218,25 +259,32 @@ class DirectionalMixin:
218
259
  until: Optional selector string to specify a left boundary element
219
260
  include_endpoint: Whether to include the boundary element in the region (default: True)
220
261
  **kwargs: Additional parameters
221
-
262
+
222
263
  Returns:
223
264
  Region object representing the area to the left
224
265
  """
225
266
  return self._direction(
226
- direction='left',
267
+ direction="left",
227
268
  size=width,
228
269
  cross_size=height,
229
270
  include_element=include_element,
230
271
  until=until,
231
272
  include_endpoint=include_endpoint,
232
- **kwargs
273
+ **kwargs,
233
274
  )
234
275
 
235
- def right(self, width: Optional[float] = None, height: str = "full", include_element: bool = False,
236
- until: Optional[str] = None, include_endpoint: bool = True, **kwargs) -> 'Region':
276
+ def right(
277
+ self,
278
+ width: Optional[float] = None,
279
+ height: str = "full",
280
+ include_element: bool = False,
281
+ until: Optional[str] = None,
282
+ include_endpoint: bool = True,
283
+ **kwargs,
284
+ ) -> "Region":
237
285
  """
238
286
  Select region to the right of this element/region.
239
-
287
+
240
288
  Args:
241
289
  width: Width of the region to the right, in points
242
290
  height: Height mode - "full" for full page height or "element" for element height
@@ -244,43 +292,45 @@ class DirectionalMixin:
244
292
  until: Optional selector string to specify a right boundary element
245
293
  include_endpoint: Whether to include the boundary element in the region (default: True)
246
294
  **kwargs: Additional parameters
247
-
295
+
248
296
  Returns:
249
297
  Region object representing the area to the right
250
298
  """
251
299
  return self._direction(
252
- direction='right',
300
+ direction="right",
253
301
  size=width,
254
302
  cross_size=height,
255
303
  include_element=include_element,
256
304
  until=until,
257
305
  include_endpoint=include_endpoint,
258
- **kwargs
306
+ **kwargs,
259
307
  )
260
308
 
261
- def expand(self,
262
- left: float = 0,
263
- right: float = 0,
264
- top_expand: float = 0, # Renamed to avoid conflict
265
- bottom_expand: float = 0, # Renamed to avoid conflict
266
- width_factor: float = 1.0,
267
- height_factor: float = 1.0,
268
- # Keep original parameter names for backward compatibility
269
- top: float = None,
270
- bottom: float = None) -> 'Region':
309
+ def to_region(
310
+ self
311
+ ):
312
+ return self.expand()
313
+
314
+ def expand(
315
+ self,
316
+ left: float = 0,
317
+ right: float = 0,
318
+ top: float = 0,
319
+ bottom: float = 0,
320
+ width_factor: float = 1.0,
321
+ height_factor: float = 1.0,
322
+ ) -> "Region":
271
323
  """
272
324
  Create a new region expanded from this element/region.
273
-
325
+
274
326
  Args:
275
327
  left: Amount to expand left edge (positive value expands leftwards)
276
328
  right: Amount to expand right edge (positive value expands rightwards)
277
- top_expand: Amount to expand top edge (positive value expands upwards)
278
- bottom_expand: Amount to expand bottom edge (positive value expands downwards)
329
+ top: Amount to expand top edge (positive value expands upwards)
330
+ bottom: Amount to expand bottom edge (positive value expands downwards)
279
331
  width_factor: Factor to multiply width by (applied after absolute expansion)
280
332
  height_factor: Factor to multiply height by (applied after absolute expansion)
281
- top: (DEPRECATED, use top_expand) Amount to expand top edge (upward)
282
- bottom: (DEPRECATED, use bottom_expand) Amount to expand bottom edge (downward)
283
-
333
+
284
334
  Returns:
285
335
  New expanded Region object
286
336
  """
@@ -289,39 +339,33 @@ class DirectionalMixin:
289
339
  new_x1 = self.x1
290
340
  new_top = self.top
291
341
  new_bottom = self.bottom
292
-
293
- # Handle the deprecated parameter names for backward compatibility
294
- if top is not None:
295
- top_expand = top
296
- if bottom is not None:
297
- bottom_expand = bottom
298
-
342
+
299
343
  # Apply absolute expansions first
300
344
  new_x0 -= left
301
345
  new_x1 += right
302
- new_top -= top_expand # Expand upward (decrease top coordinate)
303
- new_bottom += bottom_expand # Expand downward (increase bottom coordinate)
304
-
346
+ new_top -= top # Expand upward (decrease top coordinate)
347
+ new_bottom += bottom # Expand downward (increase bottom coordinate)
348
+
305
349
  # Apply percentage factors if provided
306
350
  if width_factor != 1.0 or height_factor != 1.0:
307
351
  # Calculate center point *after* absolute expansion
308
352
  center_x = (new_x0 + new_x1) / 2
309
353
  center_y = (new_top + new_bottom) / 2
310
-
354
+
311
355
  # Calculate current width and height *after* absolute expansion
312
356
  current_width = new_x1 - new_x0
313
357
  current_height = new_bottom - new_top
314
-
358
+
315
359
  # Calculate new width and height
316
360
  new_width = current_width * width_factor
317
361
  new_height = current_height * height_factor
318
-
362
+
319
363
  # Adjust coordinates based on the new dimensions, keeping the center
320
364
  new_x0 = center_x - new_width / 2
321
365
  new_x1 = center_x + new_width / 2
322
366
  new_top = center_y - new_height / 2
323
367
  new_bottom = center_y + new_height / 2
324
-
368
+
325
369
  # Clamp coordinates to page boundaries
326
370
  new_x0 = max(0, new_x0)
327
371
  new_top = max(0, new_top)
@@ -329,124 +373,129 @@ class DirectionalMixin:
329
373
  new_bottom = min(self.page.height, new_bottom)
330
374
 
331
375
  # Ensure coordinates are valid (x0 <= x1, top <= bottom)
332
- if new_x0 > new_x1: new_x0 = new_x1 = (new_x0 + new_x1) / 2
333
- if new_top > new_bottom: new_top = new_bottom = (new_top + new_bottom) / 2
376
+ if new_x0 > new_x1:
377
+ new_x0 = new_x1 = (new_x0 + new_x1) / 2
378
+ if new_top > new_bottom:
379
+ new_top = new_bottom = (new_top + new_bottom) / 2
334
380
 
335
381
  # Create new region with expanded bbox
336
382
  from natural_pdf.elements.region import Region
383
+
337
384
  new_region = Region(self.page, (new_x0, new_top, new_x1, new_bottom))
338
-
385
+
339
386
  return new_region
340
387
 
341
388
 
342
389
  class Element(DirectionalMixin):
343
390
  """
344
391
  Base class for all PDF elements.
345
-
392
+
346
393
  This class provides common properties and methods for all PDF elements,
347
394
  such as text, rectangles, lines, etc.
348
395
  """
349
-
350
- def __init__(self, obj: Dict[str, Any], page: 'Page'):
396
+
397
+ def __init__(self, obj: Dict[str, Any], page: "Page"):
351
398
  """
352
399
  Initialize base element.
353
-
400
+
354
401
  Args:
355
402
  obj: The underlying pdfplumber object
356
403
  page: The parent Page object
357
404
  """
358
405
  self._obj = obj
359
406
  self._page = page
360
-
407
+
361
408
  @property
362
409
  def type(self) -> str:
363
410
  """Element type."""
364
- return self._obj.get('object_type', 'unknown')
365
-
411
+ return self._obj.get("object_type", "unknown")
412
+
366
413
  @property
367
414
  def bbox(self) -> Tuple[float, float, float, float]:
368
415
  """Bounding box (x0, top, x1, bottom)."""
369
416
  return (self.x0, self.top, self.x1, self.bottom)
370
-
417
+
371
418
  @property
372
419
  def x0(self) -> float:
373
420
  """Left x-coordinate."""
374
421
  if self.has_polygon:
375
422
  return min(pt[0] for pt in self.polygon)
376
- return self._obj.get('x0', 0)
377
-
423
+ return self._obj.get("x0", 0)
424
+
378
425
  @property
379
426
  def top(self) -> float:
380
427
  """Top y-coordinate."""
381
428
  if self.has_polygon:
382
429
  return min(pt[1] for pt in self.polygon)
383
- return self._obj.get('top', 0)
384
-
430
+ return self._obj.get("top", 0)
431
+
385
432
  @property
386
433
  def x1(self) -> float:
387
434
  """Right x-coordinate."""
388
435
  if self.has_polygon:
389
436
  return max(pt[0] for pt in self.polygon)
390
- return self._obj.get('x1', 0)
391
-
437
+ return self._obj.get("x1", 0)
438
+
392
439
  @property
393
440
  def bottom(self) -> float:
394
441
  """Bottom y-coordinate."""
395
442
  if self.has_polygon:
396
443
  return max(pt[1] for pt in self.polygon)
397
- return self._obj.get('bottom', 0)
398
-
444
+ return self._obj.get("bottom", 0)
445
+
399
446
  @property
400
447
  def width(self) -> float:
401
448
  """Element width."""
402
449
  return self.x1 - self.x0
403
-
450
+
404
451
  @property
405
452
  def height(self) -> float:
406
453
  """Element height."""
407
454
  return self.bottom - self.top
408
-
455
+
409
456
  @property
410
457
  def has_polygon(self) -> bool:
411
458
  """Check if this element has polygon coordinates."""
412
- return ('polygon' in self._obj and self._obj['polygon'] and len(self._obj['polygon']) >= 3) or hasattr(self, '_polygon')
413
-
459
+ return (
460
+ "polygon" in self._obj and self._obj["polygon"] and len(self._obj["polygon"]) >= 3
461
+ ) or hasattr(self, "_polygon")
462
+
414
463
  @property
415
464
  def polygon(self) -> List[Tuple[float, float]]:
416
465
  """Get polygon coordinates if available, otherwise return rectangle corners."""
417
- if hasattr(self, '_polygon') and self._polygon:
466
+ if hasattr(self, "_polygon") and self._polygon:
418
467
  return self._polygon
419
- elif 'polygon' in self._obj and self._obj['polygon']:
420
- return self._obj['polygon']
468
+ elif "polygon" in self._obj and self._obj["polygon"]:
469
+ return self._obj["polygon"]
421
470
  else:
422
471
  # Create rectangle corners as fallback
423
472
  return [
424
- (self._obj.get('x0', 0), self._obj.get('top', 0)), # top-left
425
- (self._obj.get('x1', 0), self._obj.get('top', 0)), # top-right
426
- (self._obj.get('x1', 0), self._obj.get('bottom', 0)), # bottom-right
427
- (self._obj.get('x0', 0), self._obj.get('bottom', 0)) # bottom-left
473
+ (self._obj.get("x0", 0), self._obj.get("top", 0)), # top-left
474
+ (self._obj.get("x1", 0), self._obj.get("top", 0)), # top-right
475
+ (self._obj.get("x1", 0), self._obj.get("bottom", 0)), # bottom-right
476
+ (self._obj.get("x0", 0), self._obj.get("bottom", 0)), # bottom-left
428
477
  ]
429
-
478
+
430
479
  def is_point_inside(self, x: float, y: float) -> bool:
431
480
  """
432
481
  Check if a point is inside this element using ray casting algorithm for polygons.
433
-
482
+
434
483
  Args:
435
484
  x: X-coordinate to check
436
485
  y: Y-coordinate to check
437
-
486
+
438
487
  Returns:
439
488
  True if the point is inside the element
440
489
  """
441
490
  if not self.has_polygon:
442
491
  # Use simple rectangle check
443
492
  return (self.x0 <= x <= self.x1) and (self.top <= y <= self.bottom)
444
-
493
+
445
494
  # Ray casting algorithm for complex polygons
446
495
  poly = self.polygon
447
496
  n = len(poly)
448
497
  inside = False
449
-
498
+
450
499
  p1x, p1y = poly[0]
451
500
  for i in range(1, n + 1):
452
501
  p2x, p2y = poly[i % n]
@@ -456,30 +505,36 @@ class Element(DirectionalMixin):
456
505
  if p1x == p2x or x <= xinters:
457
506
  inside = not inside
458
507
  p1x, p1y = p2x, p2y
459
-
508
+
460
509
  return inside
461
-
510
+
462
511
  @property
463
- def page(self) -> 'Page':
512
+ def page(self) -> "Page":
464
513
  """Get the parent page."""
465
514
  return self._page
466
-
467
- def next(self, selector: Optional[str] = None, limit: int = 10, apply_exclusions: bool = True, **kwargs) -> Optional['Element']:
515
+
516
+ def next(
517
+ self,
518
+ selector: Optional[str] = None,
519
+ limit: int = 10,
520
+ apply_exclusions: bool = True,
521
+ **kwargs,
522
+ ) -> Optional["Element"]:
468
523
  """
469
524
  Find next element in reading order.
470
-
525
+
471
526
  Args:
472
527
  selector: Optional selector to filter by
473
528
  limit: Maximum number of elements to search through (default: 10)
474
529
  apply_exclusions: Whether to apply exclusion regions (default: True)
475
530
  **kwargs: Additional parameters
476
-
531
+
477
532
  Returns:
478
533
  Next element or None if not found
479
534
  """
480
535
  # Get all elements in reading order
481
- all_elements = self.page.find_all('*', apply_exclusions=apply_exclusions)
482
-
536
+ all_elements = self.page.find_all("*", apply_exclusions=apply_exclusions)
537
+
483
538
  # Find our index in the list
484
539
  try:
485
540
  # Compare by object identity since bbox could match multiple elements
@@ -487,40 +542,47 @@ class Element(DirectionalMixin):
487
542
  except StopIteration:
488
543
  # If not found, it might have been filtered out by exclusions
489
544
  return None
490
-
545
+
491
546
  # Search for next matching element
492
547
  if selector:
493
548
  # Filter elements after this one
494
- candidates = all_elements[idx+1:]
549
+ candidates = all_elements[idx + 1 :]
495
550
  # Limit search range for performance
496
551
  candidates = candidates[:limit] if limit else candidates
497
-
552
+
498
553
  # Find matching elements
499
554
  from natural_pdf.elements.collections import ElementCollection
555
+
500
556
  matches = ElementCollection(candidates).find_all(selector, **kwargs)
501
557
  return matches[0] if matches else None
502
558
  elif idx + 1 < len(all_elements):
503
559
  # No selector, just return the next element
504
560
  return all_elements[idx + 1]
505
-
561
+
506
562
  return None
507
-
508
- def prev(self, selector: Optional[str] = None, limit: int = 10, apply_exclusions: bool = True, **kwargs) -> Optional['Element']:
563
+
564
+ def prev(
565
+ self,
566
+ selector: Optional[str] = None,
567
+ limit: int = 10,
568
+ apply_exclusions: bool = True,
569
+ **kwargs,
570
+ ) -> Optional["Element"]:
509
571
  """
510
572
  Find previous element in reading order.
511
-
573
+
512
574
  Args:
513
575
  selector: Optional selector to filter by
514
576
  limit: Maximum number of elements to search through (default: 10)
515
577
  apply_exclusions: Whether to apply exclusion regions (default: True)
516
578
  **kwargs: Additional parameters
517
-
579
+
518
580
  Returns:
519
581
  Previous element or None if not found
520
582
  """
521
583
  # Get all elements in reading order
522
- all_elements = self.page.find_all('*', apply_exclusions=apply_exclusions)
523
-
584
+ all_elements = self.page.find_all("*", apply_exclusions=apply_exclusions)
585
+
524
586
  # Find our index in the list
525
587
  try:
526
588
  # Compare by object identity since bbox could match multiple elements
@@ -528,7 +590,7 @@ class Element(DirectionalMixin):
528
590
  except StopIteration:
529
591
  # If not found, it might have been filtered out by exclusions
530
592
  return None
531
-
593
+
532
594
  # Search for previous matching element
533
595
  if selector:
534
596
  # Select elements before this one
@@ -537,27 +599,34 @@ class Element(DirectionalMixin):
537
599
  candidates = candidates[::-1]
538
600
  # Limit search range for performance
539
601
  candidates = candidates[:limit] if limit else candidates
540
-
602
+
541
603
  # Find matching elements using ElementCollection
542
604
  from natural_pdf.elements.collections import ElementCollection
605
+
543
606
  matches = ElementCollection(candidates).find_all(selector, **kwargs)
544
- return matches[0] if matches else None # find_all returns a collection
607
+ return matches[0] if matches else None # find_all returns a collection
545
608
  elif idx > 0:
546
609
  # No selector, just return the previous element
547
610
  return all_elements[idx - 1]
548
-
611
+
549
612
  return None
550
-
551
- def nearest(self, selector: str, max_distance: Optional[float] = None, apply_exclusions: bool = True, **kwargs) -> Optional['Element']:
613
+
614
+ def nearest(
615
+ self,
616
+ selector: str,
617
+ max_distance: Optional[float] = None,
618
+ apply_exclusions: bool = True,
619
+ **kwargs,
620
+ ) -> Optional["Element"]:
552
621
  """
553
622
  Find nearest element matching selector.
554
-
623
+
555
624
  Args:
556
625
  selector: CSS-like selector string
557
626
  max_distance: Maximum distance to search (default: None = unlimited)
558
627
  apply_exclusions: Whether to apply exclusion regions (default: True)
559
628
  **kwargs: Additional parameters
560
-
629
+
561
630
  Returns:
562
631
  Nearest element or None if not found
563
632
  """
@@ -565,56 +634,59 @@ class Element(DirectionalMixin):
565
634
  matches = self.page.find_all(selector, apply_exclusions=apply_exclusions, **kwargs)
566
635
  if not matches:
567
636
  return None
568
-
637
+
569
638
  # Calculate distance to center point of this element
570
639
  self_center_x = (self.x0 + self.x1) / 2
571
640
  self_center_y = (self.top + self.bottom) / 2
572
-
641
+
573
642
  # Calculate distances to each match
574
643
  distances = []
575
644
  for match in matches:
576
645
  if match is self: # Skip self
577
646
  continue
578
-
647
+
579
648
  match_center_x = (match.x0 + match.x1) / 2
580
649
  match_center_y = (match.top + match.bottom) / 2
581
-
650
+
582
651
  # Euclidean distance
583
- distance = ((match_center_x - self_center_x) ** 2 +
584
- (match_center_y - self_center_y) ** 2) ** 0.5
585
-
652
+ distance = (
653
+ (match_center_x - self_center_x) ** 2 + (match_center_y - self_center_y) ** 2
654
+ ) ** 0.5
655
+
586
656
  # Filter by max_distance if specified
587
657
  if max_distance is None or distance <= max_distance:
588
658
  distances.append((match, distance))
589
-
659
+
590
660
  # Sort by distance and return the closest
591
661
  if distances:
592
662
  distances.sort(key=lambda x: x[1])
593
663
  return distances[0][0]
594
-
664
+
595
665
  return None
596
-
597
- def until(self, selector: str, include_endpoint: bool = True, width: str = "element", **kwargs) -> 'Region':
666
+
667
+ def until(
668
+ self, selector: str, include_endpoint: bool = True, width: str = "element", **kwargs
669
+ ) -> "Region":
598
670
  """
599
671
  Select content from this element until matching selector.
600
-
672
+
601
673
  Args:
602
674
  selector: CSS-like selector string
603
675
  include_endpoint: Whether to include the endpoint element in the region (default: True)
604
676
  width: Width mode - "element" to use element widths or "full" for full page width
605
677
  **kwargs: Additional selection parameters
606
-
678
+
607
679
  Returns:
608
680
  Region object representing the selected content
609
681
  """
610
682
  from natural_pdf.elements.region import Region
611
-
683
+
612
684
  # Find the target element
613
685
  target = self.page.find(selector, **kwargs)
614
686
  if not target:
615
687
  # If target not found, return a region with just this element
616
688
  return Region(self.page, self.bbox)
617
-
689
+
618
690
  # Use full page width if requested
619
691
  if width == "full":
620
692
  x0 = 0
@@ -622,12 +694,16 @@ class Element(DirectionalMixin):
622
694
  # Determine vertical bounds based on element positions
623
695
  if target.top >= self.bottom: # Target is below this element
624
696
  top = self.top
625
- bottom = target.bottom if include_endpoint else target.top - 1 # Subtract 1 pixel when excluding
697
+ bottom = (
698
+ target.bottom if include_endpoint else target.top - 1
699
+ ) # Subtract 1 pixel when excluding
626
700
  else: # Target is above this element
627
- top = target.top if include_endpoint else target.bottom + 1 # Add 1 pixel when excluding
701
+ top = (
702
+ target.top if include_endpoint else target.bottom + 1
703
+ ) # Add 1 pixel when excluding
628
704
  bottom = self.bottom
629
705
  return Region(self.page, (x0, top, x1, bottom))
630
-
706
+
631
707
  # Otherwise use element-based width
632
708
  # Determine the correct order for creating the region
633
709
  # If the target is below this element (normal reading order)
@@ -635,12 +711,16 @@ class Element(DirectionalMixin):
635
711
  x0 = min(self.x0, target.x0 if include_endpoint else target.x1)
636
712
  x1 = max(self.x1, target.x1 if include_endpoint else target.x0)
637
713
  top = self.top
638
- bottom = target.bottom if include_endpoint else target.top - 1 # Subtract 1 pixel when excluding
714
+ bottom = (
715
+ target.bottom if include_endpoint else target.top - 1
716
+ ) # Subtract 1 pixel when excluding
639
717
  # If the target is above this element (reverse reading order)
640
718
  elif target.bottom <= self.top:
641
719
  x0 = min(self.x0, target.x0 if include_endpoint else target.x1)
642
720
  x1 = max(self.x1, target.x1 if include_endpoint else target.x0)
643
- top = target.top if include_endpoint else target.bottom + 1 # Add 1 pixel when excluding
721
+ top = (
722
+ target.top if include_endpoint else target.bottom + 1
723
+ ) # Add 1 pixel when excluding
644
724
  bottom = self.bottom
645
725
  # If they're side by side, use the horizontal version
646
726
  elif target.x0 >= self.x1: # Target is to the right
@@ -653,47 +733,49 @@ class Element(DirectionalMixin):
653
733
  x1 = self.x1
654
734
  top = min(self.top, target.top if include_endpoint else target.bottom)
655
735
  bottom = max(self.bottom, target.bottom if include_endpoint else target.top)
656
-
736
+
657
737
  region = Region(self.page, (x0, top, x1, bottom))
658
738
  region.source_element = self
659
739
  region.end_element = target
660
740
  return region
661
-
741
+
662
742
  # Note: select_until method removed in favor of until()
663
-
743
+
664
744
  def extract_text(self, preserve_whitespace=True, use_exclusions=True, **kwargs) -> str:
665
745
  """
666
746
  Extract text from this element.
667
-
747
+
668
748
  Args:
669
749
  preserve_whitespace: Whether to keep blank characters (default: True)
670
750
  use_exclusions: Whether to apply exclusion regions (default: True)
671
751
  **kwargs: Additional extraction parameters
672
-
752
+
673
753
  Returns:
674
754
  Extracted text as string
675
755
  """
676
756
  # Default implementation - override in subclasses
677
757
  return ""
678
-
758
+
679
759
  # Note: extract_text_compat method removed
680
-
681
- def highlight(self,
682
- label: Optional[str] = None,
683
- color: Optional[Union[Tuple, str]] = None, # Allow string color
684
- use_color_cycling: bool = False,
685
- include_attrs: Optional[List[str]] = None,
686
- existing: str = 'append') -> 'Element':
760
+
761
+ def highlight(
762
+ self,
763
+ label: Optional[str] = None,
764
+ color: Optional[Union[Tuple, str]] = None, # Allow string color
765
+ use_color_cycling: bool = False,
766
+ include_attrs: Optional[List[str]] = None,
767
+ existing: str = "append",
768
+ ) -> "Element":
687
769
  """
688
770
  Highlight this element on the page.
689
-
771
+
690
772
  Args:
691
773
  label: Optional label for the highlight
692
774
  color: Color tuple/string for the highlight, or None to use automatic color
693
775
  use_color_cycling: Force color cycling even with no label (default: False)
694
776
  include_attrs: List of attribute names to display on the highlight (e.g., ['confidence', 'type'])
695
777
  existing: How to handle existing highlights - 'append' (default) or 'replace'
696
-
778
+
697
779
  Returns:
698
780
  Self for method chaining
699
781
  """
@@ -708,7 +790,7 @@ class Element(DirectionalMixin):
708
790
  "use_color_cycling": use_color_cycling,
709
791
  "element": self, # Pass the element itself so attributes can be accessed
710
792
  "include_attrs": include_attrs,
711
- "existing": existing
793
+ "existing": existing,
712
794
  }
713
795
 
714
796
  # Call the appropriate service method based on geometry
@@ -720,13 +802,15 @@ class Element(DirectionalMixin):
720
802
  highlighter.add(**highlight_args)
721
803
 
722
804
  return self
723
-
724
- def show(self,
725
- scale: float = 2.0,
726
- labels: bool = True,
727
- legend_position: str = 'right',
728
- color: Optional[Union[Tuple, str]] = "red", # Default color for single element
729
- label: Optional[str] = None) -> Optional['Image.Image']:
805
+
806
+ def show(
807
+ self,
808
+ scale: float = 2.0,
809
+ labels: bool = True,
810
+ legend_position: str = "right",
811
+ color: Optional[Union[Tuple, str]] = "red", # Default color for single element
812
+ label: Optional[str] = None,
813
+ ) -> Optional["Image.Image"]:
730
814
  """
731
815
  Show the page with only this element highlighted temporarily.
732
816
 
@@ -740,12 +824,12 @@ class Element(DirectionalMixin):
740
824
  Returns:
741
825
  PIL Image of the page with only this element highlighted, or None if error.
742
826
  """
743
- if not hasattr(self, 'page') or not self.page:
827
+ if not hasattr(self, "page") or not self.page:
744
828
  logger.warning(f"Cannot show element, missing 'page' attribute: {self}")
745
829
  return None
746
- if not hasattr(self.page, '_highlighter') or not self.page._highlighter:
747
- logger.warning(f"Cannot show element, page lacks highlighter service: {self}")
748
- return None
830
+ if not hasattr(self.page, "_highlighter") or not self.page._highlighter:
831
+ logger.warning(f"Cannot show element, page lacks highlighter service: {self}")
832
+ return None
749
833
 
750
834
  service = self.page._highlighter
751
835
 
@@ -757,15 +841,15 @@ class Element(DirectionalMixin):
757
841
  "page_index": self.page.index,
758
842
  "bbox": self.bbox if not self.has_polygon else None,
759
843
  "polygon": self.polygon if self.has_polygon else None,
760
- "color": color, # Use provided or default color
844
+ "color": color, # Use provided or default color
761
845
  "label": display_label,
762
- "use_color_cycling": False # Explicitly false for single preview
846
+ "use_color_cycling": False, # Explicitly false for single preview
763
847
  }
764
848
 
765
849
  # Check if we actually got geometry data
766
- if temp_highlight_data['bbox'] is None and temp_highlight_data['polygon'] is None:
767
- logger.warning(f"Cannot show element, failed to get bbox or polygon: {self}")
768
- return None
850
+ if temp_highlight_data["bbox"] is None and temp_highlight_data["polygon"] is None:
851
+ logger.warning(f"Cannot show element, failed to get bbox or polygon: {self}")
852
+ return None
769
853
 
770
854
  # Use render_preview to show only this highlight
771
855
  try:
@@ -774,49 +858,47 @@ class Element(DirectionalMixin):
774
858
  temporary_highlights=[temp_highlight_data],
775
859
  scale=scale,
776
860
  labels=labels,
777
- legend_position=legend_position
861
+ legend_position=legend_position,
778
862
  )
779
863
  except Exception as e:
780
864
  logger.error(f"Error calling render_preview for element {self}: {e}", exc_info=True)
781
865
  return None
782
-
783
- def save(self,
784
- filename: str,
785
- scale: float = 2.0,
786
- labels: bool = True,
787
- legend_position: str = 'right') -> None:
866
+
867
+ def save(
868
+ self, filename: str, scale: float = 2.0, labels: bool = True, legend_position: str = "right"
869
+ ) -> None:
788
870
  """
789
871
  Save the page with this element highlighted to an image file.
790
-
872
+
791
873
  Args:
792
874
  filename: Path to save the image to
793
875
  scale: Scale factor for rendering
794
876
  labels: Whether to include a legend for labels
795
877
  legend_position: Position of the legend
796
-
878
+
797
879
  Returns:
798
880
  Self for method chaining
799
881
  """
800
882
  # Save the highlighted image
801
883
  self.page.save_image(filename, scale=scale, labels=labels, legend_position=legend_position)
802
884
  return self
803
-
885
+
804
886
  # Note: save_image method removed in favor of save()
805
-
887
+
806
888
  def __repr__(self) -> str:
807
889
  """String representation of the element."""
808
890
  return f"<{self.__class__.__name__} bbox={self.bbox}>"
809
891
 
810
- def find(self, selector: str, apply_exclusions=True, **kwargs) -> Optional['Element']:
892
+ def find(self, selector: str, apply_exclusions=True, **kwargs) -> Optional["Element"]:
811
893
  """
812
894
  Find first element within this element's bounds matching the selector.
813
895
  Creates a temporary region to perform the search.
814
-
896
+
815
897
  Args:
816
898
  selector: CSS-like selector string
817
899
  apply_exclusions: Whether to apply exclusion regions
818
900
  **kwargs: Additional parameters for element filtering
819
-
901
+
820
902
  Returns:
821
903
  First matching element or None
822
904
  """
@@ -826,16 +908,16 @@ class Element(DirectionalMixin):
826
908
  temp_region = Region(self.page, self.bbox)
827
909
  return temp_region.find(selector, apply_exclusions=apply_exclusions, **kwargs)
828
910
 
829
- def find_all(self, selector: str, apply_exclusions=True, **kwargs) -> 'ElementCollection':
911
+ def find_all(self, selector: str, apply_exclusions=True, **kwargs) -> "ElementCollection":
830
912
  """
831
913
  Find all elements within this element's bounds matching the selector.
832
914
  Creates a temporary region to perform the search.
833
-
915
+
834
916
  Args:
835
917
  selector: CSS-like selector string
836
918
  apply_exclusions: Whether to apply exclusion regions
837
919
  **kwargs: Additional parameters for element filtering
838
-
920
+
839
921
  Returns:
840
922
  ElementCollection with matching elements
841
923
  """
@@ -843,4 +925,4 @@ class Element(DirectionalMixin):
843
925
 
844
926
  # Create a temporary region from this element's bounds
845
927
  temp_region = Region(self.page, self.bbox)
846
- return temp_region.find_all(selector, apply_exclusions=apply_exclusions, **kwargs)
928
+ return temp_region.find_all(selector, apply_exclusions=apply_exclusions, **kwargs)