natural-pdf 25.3.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. examples/__init__.py +3 -0
  2. examples/another_exclusion_example.py +20 -0
  3. examples/basic_usage.py +190 -0
  4. examples/boundary_exclusion_test.py +137 -0
  5. examples/boundary_inclusion_fix_test.py +157 -0
  6. examples/chainable_layout_example.py +70 -0
  7. examples/color_basic_test.py +49 -0
  8. examples/color_name_example.py +71 -0
  9. examples/color_test.py +62 -0
  10. examples/debug_ocr.py +91 -0
  11. examples/direct_ocr_test.py +148 -0
  12. examples/direct_paddle_test.py +99 -0
  13. examples/direct_qa_example.py +165 -0
  14. examples/document_layout_analysis.py +123 -0
  15. examples/document_qa_example.py +185 -0
  16. examples/exclusion_count_debug.py +128 -0
  17. examples/exclusion_debug.py +107 -0
  18. examples/exclusion_example.py +150 -0
  19. examples/exclusion_optimization_example.py +190 -0
  20. examples/extract_text_test.py +128 -0
  21. examples/font_aware_example.py +101 -0
  22. examples/font_variant_example.py +124 -0
  23. examples/footer_overlap_test.py +124 -0
  24. examples/highlight_all_example.py +82 -0
  25. examples/highlight_attributes_test.py +114 -0
  26. examples/highlight_confidence_display.py +122 -0
  27. examples/highlight_demo.py +110 -0
  28. examples/highlight_float_test.py +71 -0
  29. examples/highlight_test.py +147 -0
  30. examples/highlighting_example.py +123 -0
  31. examples/image_width_example.py +84 -0
  32. examples/improved_api_example.py +128 -0
  33. examples/layout_confidence_display_test.py +65 -0
  34. examples/layout_confidence_test.py +82 -0
  35. examples/layout_coordinate_debug.py +258 -0
  36. examples/layout_highlight_test.py +77 -0
  37. examples/logging_example.py +70 -0
  38. examples/ocr_comprehensive.py +193 -0
  39. examples/ocr_debug_example.py +87 -0
  40. examples/ocr_default_test.py +97 -0
  41. examples/ocr_engine_comparison.py +235 -0
  42. examples/ocr_example.py +89 -0
  43. examples/ocr_simplified_params.py +79 -0
  44. examples/ocr_visualization.py +102 -0
  45. examples/ocr_visualization_test.py +121 -0
  46. examples/paddle_layout_example.py +315 -0
  47. examples/paddle_layout_simple.py +74 -0
  48. examples/paddleocr_example.py +224 -0
  49. examples/page_collection_example.py +103 -0
  50. examples/polygon_highlight_example.py +83 -0
  51. examples/position_methods_example.py +134 -0
  52. examples/region_boundary_test.py +73 -0
  53. examples/region_exclusion_test.py +149 -0
  54. examples/region_expand_example.py +109 -0
  55. examples/region_image_example.py +116 -0
  56. examples/region_ocr_test.py +119 -0
  57. examples/region_sections_example.py +115 -0
  58. examples/school_books.py +49 -0
  59. examples/school_books_all.py +52 -0
  60. examples/scouring.py +36 -0
  61. examples/section_extraction_example.py +232 -0
  62. examples/simple_document_qa.py +97 -0
  63. examples/spatial_navigation_example.py +108 -0
  64. examples/table_extraction_example.py +135 -0
  65. examples/table_structure_detection.py +155 -0
  66. examples/tatr_cells_test.py +56 -0
  67. examples/tatr_ocr_table_test.py +94 -0
  68. examples/text_search_example.py +122 -0
  69. examples/text_style_example.py +110 -0
  70. examples/tiny-text.py +61 -0
  71. examples/until_boundaries_example.py +156 -0
  72. examples/until_example.py +112 -0
  73. examples/very_basics.py +15 -0
  74. natural_pdf/__init__.py +55 -0
  75. natural_pdf/analyzers/__init__.py +9 -0
  76. natural_pdf/analyzers/document_layout.py +736 -0
  77. natural_pdf/analyzers/text_structure.py +153 -0
  78. natural_pdf/core/__init__.py +3 -0
  79. natural_pdf/core/page.py +2376 -0
  80. natural_pdf/core/pdf.py +572 -0
  81. natural_pdf/elements/__init__.py +3 -0
  82. natural_pdf/elements/base.py +553 -0
  83. natural_pdf/elements/collections.py +770 -0
  84. natural_pdf/elements/line.py +124 -0
  85. natural_pdf/elements/rect.py +122 -0
  86. natural_pdf/elements/region.py +1366 -0
  87. natural_pdf/elements/text.py +304 -0
  88. natural_pdf/ocr/__init__.py +62 -0
  89. natural_pdf/ocr/easyocr_engine.py +254 -0
  90. natural_pdf/ocr/engine.py +158 -0
  91. natural_pdf/ocr/paddleocr_engine.py +263 -0
  92. natural_pdf/qa/__init__.py +3 -0
  93. natural_pdf/qa/document_qa.py +405 -0
  94. natural_pdf/selectors/__init__.py +4 -0
  95. natural_pdf/selectors/parser.py +360 -0
  96. natural_pdf/templates/__init__.py +1 -0
  97. natural_pdf/templates/ocr_debug.html +517 -0
  98. natural_pdf/utils/__init__.py +4 -0
  99. natural_pdf/utils/highlighting.py +605 -0
  100. natural_pdf/utils/ocr.py +515 -0
  101. natural_pdf/utils/reading_order.py +227 -0
  102. natural_pdf/utils/visualization.py +151 -0
  103. natural_pdf-25.3.16.dist-info/LICENSE +21 -0
  104. natural_pdf-25.3.16.dist-info/METADATA +268 -0
  105. natural_pdf-25.3.16.dist-info/RECORD +109 -0
  106. natural_pdf-25.3.16.dist-info/WHEEL +5 -0
  107. natural_pdf-25.3.16.dist-info/top_level.txt +3 -0
  108. tests/__init__.py +3 -0
  109. tests/test_pdf.py +39 -0
@@ -0,0 +1,553 @@
1
+ """
2
+ Base Element class for natural-pdf.
3
+ """
4
+ from typing import Any, Dict, List, Optional, TYPE_CHECKING, Union, Tuple
5
+ from PIL import Image
6
+
7
+ if TYPE_CHECKING:
8
+ from natural_pdf.core.page import Page
9
+ from natural_pdf.elements.region import Region
10
+
11
+
12
+ class Element:
13
+ """
14
+ Base class for all PDF elements.
15
+
16
+ This class provides common properties and methods for all PDF elements,
17
+ such as text, rectangles, lines, etc.
18
+ """
19
+
20
+ def __init__(self, obj: Dict[str, Any], page: 'Page'):
21
+ """
22
+ Initialize base element.
23
+
24
+ Args:
25
+ obj: The underlying pdfplumber object
26
+ page: The parent Page object
27
+ """
28
+ self._obj = obj
29
+ self._page = page
30
+
31
+ @property
32
+ def type(self) -> str:
33
+ """Element type."""
34
+ return self._obj.get('object_type', 'unknown')
35
+
36
+ @property
37
+ def bbox(self) -> Tuple[float, float, float, float]:
38
+ """Bounding box (x0, top, x1, bottom)."""
39
+ return (self.x0, self.top, self.x1, self.bottom)
40
+
41
+ @property
42
+ def x0(self) -> float:
43
+ """Left x-coordinate."""
44
+ if self.has_polygon:
45
+ return min(pt[0] for pt in self.polygon)
46
+ return self._obj.get('x0', 0)
47
+
48
+ @property
49
+ def top(self) -> float:
50
+ """Top y-coordinate."""
51
+ if self.has_polygon:
52
+ return min(pt[1] for pt in self.polygon)
53
+ return self._obj.get('top', 0)
54
+
55
+ @property
56
+ def x1(self) -> float:
57
+ """Right x-coordinate."""
58
+ if self.has_polygon:
59
+ return max(pt[0] for pt in self.polygon)
60
+ return self._obj.get('x1', 0)
61
+
62
+ @property
63
+ def bottom(self) -> float:
64
+ """Bottom y-coordinate."""
65
+ if self.has_polygon:
66
+ return max(pt[1] for pt in self.polygon)
67
+ return self._obj.get('bottom', 0)
68
+
69
+ @property
70
+ def width(self) -> float:
71
+ """Element width."""
72
+ return self.x1 - self.x0
73
+
74
+ @property
75
+ def height(self) -> float:
76
+ """Element height."""
77
+ return self.bottom - self.top
78
+
79
+ @property
80
+ def has_polygon(self) -> bool:
81
+ """Check if this element has polygon coordinates."""
82
+ return ('polygon' in self._obj and self._obj['polygon'] and len(self._obj['polygon']) >= 3) or hasattr(self, '_polygon')
83
+
84
+ @property
85
+ def polygon(self) -> List[Tuple[float, float]]:
86
+ """Get polygon coordinates if available, otherwise return rectangle corners."""
87
+ if hasattr(self, '_polygon') and self._polygon:
88
+ return self._polygon
89
+ elif 'polygon' in self._obj and self._obj['polygon']:
90
+ return self._obj['polygon']
91
+ else:
92
+ # Create rectangle corners as fallback
93
+ return [
94
+ (self._obj.get('x0', 0), self._obj.get('top', 0)), # top-left
95
+ (self._obj.get('x1', 0), self._obj.get('top', 0)), # top-right
96
+ (self._obj.get('x1', 0), self._obj.get('bottom', 0)), # bottom-right
97
+ (self._obj.get('x0', 0), self._obj.get('bottom', 0)) # bottom-left
98
+ ]
99
+
100
+ def is_point_inside(self, x: float, y: float) -> bool:
101
+ """
102
+ Check if a point is inside this element using ray casting algorithm for polygons.
103
+
104
+ Args:
105
+ x: X-coordinate to check
106
+ y: Y-coordinate to check
107
+
108
+ Returns:
109
+ True if the point is inside the element
110
+ """
111
+ if not self.has_polygon:
112
+ # Use simple rectangle check
113
+ return (self.x0 <= x <= self.x1) and (self.top <= y <= self.bottom)
114
+
115
+ # Ray casting algorithm for complex polygons
116
+ poly = self.polygon
117
+ n = len(poly)
118
+ inside = False
119
+
120
+ p1x, p1y = poly[0]
121
+ for i in range(1, n + 1):
122
+ p2x, p2y = poly[i % n]
123
+ if y > min(p1y, p2y) and y <= max(p1y, p2y) and x <= max(p1x, p2x):
124
+ if p1y != p2y:
125
+ xinters = (y - p1y) * (p2x - p1x) / (p2y - p1y) + p1x
126
+ if p1x == p2x or x <= xinters:
127
+ inside = not inside
128
+ p1x, p1y = p2x, p2y
129
+
130
+ return inside
131
+
132
+ @property
133
+ def page(self) -> 'Page':
134
+ """Get the parent page."""
135
+ return self._page
136
+
137
+ def above(self, height: Optional[float] = None, width: str = "full", include_element: bool = False,
138
+ until: Optional[str] = None, include_endpoint: bool = True, **kwargs) -> 'Region':
139
+ """
140
+ Select region above this element.
141
+
142
+ Args:
143
+ height: Height of the region above, in points
144
+ width: Width mode - "full" for full page width or "element" for element width
145
+ include_element: Whether to include this element in the region (default: False)
146
+ until: Optional selector string to specify an upper boundary element
147
+ include_endpoint: Whether to include the boundary element in the region (default: True)
148
+ **kwargs: Additional parameters
149
+
150
+ Returns:
151
+ Region object representing the area above
152
+ """
153
+ from natural_pdf.elements.region import Region
154
+
155
+ # Determine bottom boundary based on include_element
156
+ bottom = self.bottom if include_element else self.top - 1 # Subtract 1 pixel offset to create a gap
157
+
158
+ # Calculate initial bounding box for region
159
+ if width == "full":
160
+ x0 = 0
161
+ x1 = self.page.width
162
+ elif width == "element":
163
+ x0 = self.x0
164
+ x1 = self.x1
165
+ else:
166
+ raise ValueError("Width must be 'full' or 'element'")
167
+
168
+ # If an "until" selector is specified, find the target element
169
+ if until:
170
+ # Need to find all matches and find the first one above this element
171
+ # instead of just page.find() which might return any match
172
+ all_matches = self.page.find_all(until, **kwargs)
173
+
174
+ # Sort by vertical position (bottom to top)
175
+ matches_above = [m for m in all_matches if m.bottom <= self.top]
176
+ matches_above.sort(key=lambda e: e.bottom, reverse=True)
177
+
178
+ if matches_above:
179
+ # Use the first match above this element (closest one)
180
+ target = matches_above[0]
181
+
182
+ # Target is above this element - use it for the top boundary
183
+ top = target.top if include_endpoint else target.bottom + 1 # Add 1 pixel offset when excluding
184
+
185
+ # Use the selector match for width if not using full width
186
+ if width == "element":
187
+ x0 = min(x0, target.x0 if include_endpoint else target.x1)
188
+ x1 = max(x1, target.x1 if include_endpoint else target.x0)
189
+ else:
190
+ # No targets found above this element - use requested height
191
+ top = max(0, bottom - (height or bottom))
192
+ else:
193
+ # No "until" selector - use requested height
194
+ top = max(0, bottom - (height or bottom))
195
+
196
+ bbox = (x0, top, x1, bottom)
197
+ region = Region(self.page, bbox)
198
+ region.source_element = self # Reference to element that created this region
199
+ region.includes_source = include_element # Whether region includes the source element
200
+ return region
201
+
202
+ def below(self, height: Optional[float] = None, width: str = "full", include_element: bool = False,
203
+ until: Optional[str] = None, include_endpoint: bool = True, **kwargs) -> 'Region':
204
+ """
205
+ Select region below this element.
206
+
207
+ Args:
208
+ height: Height of the region below, in points
209
+ width: Width mode - "full" for full page width or "element" for element width
210
+ include_element: Whether to include this element in the region (default: False)
211
+ until: Optional selector string to specify a lower boundary element
212
+ include_endpoint: Whether to include the boundary element in the region (default: True)
213
+ **kwargs: Additional parameters
214
+
215
+ Returns:
216
+ Region object representing the area below
217
+ """
218
+ from natural_pdf.elements.region import Region
219
+
220
+ # Determine top boundary based on include_element
221
+ top = self.top if include_element else self.bottom + 1 # Add 1 pixel offset to create a gap
222
+
223
+ # Calculate initial bounding box for region
224
+ if width == "full":
225
+ x0 = 0
226
+ x1 = self.page.width
227
+ elif width == "element":
228
+ x0 = self.x0
229
+ x1 = self.x1
230
+ else:
231
+ raise ValueError("Width must be 'full' or 'element'")
232
+
233
+ # If an "until" selector is specified, find the target element
234
+ if until:
235
+ # Need to find all matches and find the first one below this element
236
+ # instead of just page.find() which might return any match
237
+ all_matches = self.page.find_all(until, **kwargs)
238
+
239
+ # Sort by vertical position (top to bottom)
240
+ matches_below = [m for m in all_matches if m.top >= self.bottom]
241
+ matches_below.sort(key=lambda e: e.top)
242
+
243
+ if matches_below:
244
+ # Use the first match below this element
245
+ target = matches_below[0]
246
+
247
+ # Target is below this element - use it for the bottom boundary
248
+ bottom = target.bottom if include_endpoint else target.top - 1 # Subtract 1 pixel offset when excluding
249
+
250
+ # Use the selector match for width if not using full width
251
+ if width == "element":
252
+ x0 = min(x0, target.x0 if include_endpoint else target.x1)
253
+ x1 = max(x1, target.x1 if include_endpoint else target.x0)
254
+ else:
255
+ # No targets found below this element - use requested height
256
+ bottom = min(self.page.height, top + (height or (self.page.height - top)))
257
+ else:
258
+ # No "until" selector - use requested height
259
+ bottom = min(self.page.height, top + (height or (self.page.height - top)))
260
+
261
+ bbox = (x0, top, x1, bottom)
262
+ region = Region(self.page, bbox)
263
+ region.source_element = self # Reference to element that created this region
264
+ region.includes_source = include_element # Whether region includes the source element
265
+ return region
266
+
267
+ def next(self, selector: Optional[str] = None, limit: int = 10, apply_exclusions: bool = True, **kwargs) -> Optional['Element']:
268
+ """
269
+ Find next element in reading order.
270
+
271
+ Args:
272
+ selector: Optional selector to filter by
273
+ limit: Maximum number of elements to search through (default: 10)
274
+ apply_exclusions: Whether to apply exclusion regions (default: True)
275
+ **kwargs: Additional parameters
276
+
277
+ Returns:
278
+ Next element or None if not found
279
+ """
280
+ # Get all elements in reading order
281
+ all_elements = self.page.find_all('*', apply_exclusions=apply_exclusions)
282
+
283
+ # Find our index in the list
284
+ try:
285
+ # Compare by object identity since bbox could match multiple elements
286
+ idx = next(i for i, elem in enumerate(all_elements) if elem is self)
287
+ except StopIteration:
288
+ # If not found, it might have been filtered out by exclusions
289
+ return None
290
+
291
+ # Search for next matching element
292
+ if selector:
293
+ # Filter elements after this one
294
+ candidates = all_elements[idx+1:]
295
+ # Limit search range for performance
296
+ candidates = candidates[:limit] if limit else candidates
297
+
298
+ # Find matching elements
299
+ matches = self.page.filter_elements(candidates, selector, **kwargs)
300
+ return matches[0] if matches else None
301
+ elif idx + 1 < len(all_elements):
302
+ # No selector, just return the next element
303
+ return all_elements[idx + 1]
304
+
305
+ return None
306
+
307
+ def prev(self, selector: Optional[str] = None, limit: int = 10, apply_exclusions: bool = True, **kwargs) -> Optional['Element']:
308
+ """
309
+ Find previous element in reading order.
310
+
311
+ Args:
312
+ selector: Optional selector to filter by
313
+ limit: Maximum number of elements to search through (default: 10)
314
+ apply_exclusions: Whether to apply exclusion regions (default: True)
315
+ **kwargs: Additional parameters
316
+
317
+ Returns:
318
+ Previous element or None if not found
319
+ """
320
+ # Get all elements in reading order
321
+ all_elements = self.page.find_all('*', apply_exclusions=apply_exclusions)
322
+
323
+ # Find our index in the list
324
+ try:
325
+ # Compare by object identity since bbox could match multiple elements
326
+ idx = next(i for i, elem in enumerate(all_elements) if elem is self)
327
+ except StopIteration:
328
+ # If not found, it might have been filtered out by exclusions
329
+ return None
330
+
331
+ # Search for previous matching element
332
+ if selector:
333
+ # Filter elements before this one
334
+ candidates = all_elements[:idx]
335
+ # Reverse to start from closest to this element
336
+ candidates = candidates[::-1]
337
+ # Limit search range for performance
338
+ candidates = candidates[:limit] if limit else candidates
339
+
340
+ # Find matching elements
341
+ matches = self.page.filter_elements(candidates, selector, **kwargs)
342
+ return matches[0] if matches else None
343
+ elif idx > 0:
344
+ # No selector, just return the previous element
345
+ return all_elements[idx - 1]
346
+
347
+ return None
348
+
349
+ def nearest(self, selector: str, max_distance: Optional[float] = None, apply_exclusions: bool = True, **kwargs) -> Optional['Element']:
350
+ """
351
+ Find nearest element matching selector.
352
+
353
+ Args:
354
+ selector: CSS-like selector string
355
+ max_distance: Maximum distance to search (default: None = unlimited)
356
+ apply_exclusions: Whether to apply exclusion regions (default: True)
357
+ **kwargs: Additional parameters
358
+
359
+ Returns:
360
+ Nearest element or None if not found
361
+ """
362
+ # Find matching elements
363
+ matches = self.page.find_all(selector, apply_exclusions=apply_exclusions, **kwargs)
364
+ if not matches:
365
+ return None
366
+
367
+ # Calculate distance to center point of this element
368
+ self_center_x = (self.x0 + self.x1) / 2
369
+ self_center_y = (self.top + self.bottom) / 2
370
+
371
+ # Calculate distances to each match
372
+ distances = []
373
+ for match in matches:
374
+ if match is self: # Skip self
375
+ continue
376
+
377
+ match_center_x = (match.x0 + match.x1) / 2
378
+ match_center_y = (match.top + match.bottom) / 2
379
+
380
+ # Euclidean distance
381
+ distance = ((match_center_x - self_center_x) ** 2 +
382
+ (match_center_y - self_center_y) ** 2) ** 0.5
383
+
384
+ # Filter by max_distance if specified
385
+ if max_distance is None or distance <= max_distance:
386
+ distances.append((match, distance))
387
+
388
+ # Sort by distance and return the closest
389
+ if distances:
390
+ distances.sort(key=lambda x: x[1])
391
+ return distances[0][0]
392
+
393
+ return None
394
+
395
+ def until(self, selector: str, include_endpoint: bool = True, width: str = "element", **kwargs) -> 'Region':
396
+ """
397
+ Select content from this element until matching selector.
398
+
399
+ Args:
400
+ selector: CSS-like selector string
401
+ include_endpoint: Whether to include the endpoint element in the region (default: True)
402
+ width: Width mode - "element" to use element widths or "full" for full page width
403
+ **kwargs: Additional selection parameters
404
+
405
+ Returns:
406
+ Region object representing the selected content
407
+ """
408
+ from natural_pdf.elements.region import Region
409
+
410
+ # Find the target element
411
+ target = self.page.find(selector, **kwargs)
412
+ if not target:
413
+ # If target not found, return a region with just this element
414
+ return Region(self.page, self.bbox)
415
+
416
+ # Use full page width if requested
417
+ if width == "full":
418
+ x0 = 0
419
+ x1 = self.page.width
420
+ # Determine vertical bounds based on element positions
421
+ if target.top >= self.bottom: # Target is below this element
422
+ top = self.top
423
+ bottom = target.bottom if include_endpoint else target.top - 1 # Subtract 1 pixel when excluding
424
+ else: # Target is above this element
425
+ top = target.top if include_endpoint else target.bottom + 1 # Add 1 pixel when excluding
426
+ bottom = self.bottom
427
+ return Region(self.page, (x0, top, x1, bottom))
428
+
429
+ # Otherwise use element-based width
430
+ # Determine the correct order for creating the region
431
+ # If the target is below this element (normal reading order)
432
+ if target.top >= self.bottom:
433
+ x0 = min(self.x0, target.x0 if include_endpoint else target.x1)
434
+ x1 = max(self.x1, target.x1 if include_endpoint else target.x0)
435
+ top = self.top
436
+ bottom = target.bottom if include_endpoint else target.top - 1 # Subtract 1 pixel when excluding
437
+ # If the target is above this element (reverse reading order)
438
+ elif target.bottom <= self.top:
439
+ x0 = min(self.x0, target.x0 if include_endpoint else target.x1)
440
+ x1 = max(self.x1, target.x1 if include_endpoint else target.x0)
441
+ top = target.top if include_endpoint else target.bottom + 1 # Add 1 pixel when excluding
442
+ bottom = self.bottom
443
+ # If they're side by side, use the horizontal version
444
+ elif target.x0 >= self.x1: # Target is to the right
445
+ x0 = self.x0
446
+ x1 = target.x1 if include_endpoint else target.x0
447
+ top = min(self.top, target.top if include_endpoint else target.bottom)
448
+ bottom = max(self.bottom, target.bottom if include_endpoint else target.top)
449
+ else: # Target is to the left
450
+ x0 = target.x0 if include_endpoint else target.x1
451
+ x1 = self.x1
452
+ top = min(self.top, target.top if include_endpoint else target.bottom)
453
+ bottom = max(self.bottom, target.bottom if include_endpoint else target.top)
454
+
455
+ region = Region(self.page, (x0, top, x1, bottom))
456
+ region.source_element = self
457
+ region.end_element = target
458
+ return region
459
+
460
+ # Note: select_until method removed in favor of until()
461
+
462
+ def extract_text(self, preserve_whitespace=True, use_exclusions=True, **kwargs) -> str:
463
+ """
464
+ Extract text from this element.
465
+
466
+ Args:
467
+ preserve_whitespace: Whether to keep blank characters (default: True)
468
+ use_exclusions: Whether to apply exclusion regions (default: True)
469
+ **kwargs: Additional extraction parameters
470
+
471
+ Returns:
472
+ Extracted text as string
473
+ """
474
+ # Default implementation - override in subclasses
475
+ return ""
476
+
477
+ # Note: extract_text_compat method removed
478
+
479
+ def highlight(self,
480
+ label: Optional[str] = None,
481
+ color: Optional[Tuple[int, int, int, int]] = None,
482
+ use_color_cycling: bool = False,
483
+ include_attrs: Optional[List[str]] = None,
484
+ existing: str = 'append') -> 'Element':
485
+ """
486
+ Highlight this element on the page.
487
+
488
+ Args:
489
+ label: Optional label for the highlight
490
+ color: RGBA color tuple for the highlight, or None to use automatic color
491
+ use_color_cycling: Force color cycling even with no label (default: False)
492
+ include_attrs: List of attribute names to display on the highlight (e.g., ['confidence', 'type'])
493
+ existing: How to handle existing highlights - 'append' (default) or 'replace'
494
+
495
+ Returns:
496
+ Self for method chaining
497
+ """
498
+ # Add highlight to the page's highlight manager
499
+ self.page._highlight_mgr.add_highlight(
500
+ self.bbox,
501
+ color,
502
+ label,
503
+ use_color_cycling,
504
+ element=self, # Pass the element itself so attributes can be accessed
505
+ include_attrs=include_attrs,
506
+ existing=existing
507
+ )
508
+ return self
509
+
510
+ def show(self,
511
+ scale: float = 2.0,
512
+ labels: bool = True,
513
+ legend_position: str = 'right') -> Image.Image:
514
+ """
515
+ Show the page with this element highlighted.
516
+
517
+ Args:
518
+ scale: Scale factor for rendering
519
+ labels: Whether to include a legend for labels
520
+ legend_position: Position of the legend
521
+
522
+ Returns:
523
+ PIL Image of the page with this element highlighted
524
+ """
525
+ # Get the highlighted image from the page
526
+ return self.page.show(scale=scale, labels=labels, legend_position=legend_position)
527
+
528
+ def save(self,
529
+ filename: str,
530
+ scale: float = 2.0,
531
+ labels: bool = True,
532
+ legend_position: str = 'right') -> None:
533
+ """
534
+ Save the page with this element highlighted to an image file.
535
+
536
+ Args:
537
+ filename: Path to save the image to
538
+ scale: Scale factor for rendering
539
+ labels: Whether to include a legend for labels
540
+ legend_position: Position of the legend
541
+
542
+ Returns:
543
+ Self for method chaining
544
+ """
545
+ # Save the highlighted image
546
+ self.page.save_image(filename, scale=scale, labels=labels, legend_position=legend_position)
547
+ return self
548
+
549
+ # Note: save_image method removed in favor of save()
550
+
551
+ def __repr__(self) -> str:
552
+ """String representation of the element."""
553
+ return f"<{self.__class__.__name__} bbox={self.bbox}>"