natural-pdf 0.1.40__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. natural_pdf/__init__.py +6 -7
  2. natural_pdf/analyzers/__init__.py +6 -1
  3. natural_pdf/analyzers/guides.py +354 -258
  4. natural_pdf/analyzers/layout/layout_analyzer.py +2 -3
  5. natural_pdf/analyzers/layout/layout_manager.py +18 -4
  6. natural_pdf/analyzers/layout/paddle.py +11 -0
  7. natural_pdf/analyzers/layout/surya.py +2 -3
  8. natural_pdf/analyzers/shape_detection_mixin.py +25 -34
  9. natural_pdf/analyzers/text_structure.py +2 -2
  10. natural_pdf/classification/manager.py +1 -1
  11. natural_pdf/collections/mixins.py +3 -2
  12. natural_pdf/core/highlighting_service.py +743 -32
  13. natural_pdf/core/page.py +236 -383
  14. natural_pdf/core/page_collection.py +1249 -0
  15. natural_pdf/core/pdf.py +172 -83
  16. natural_pdf/{collections → core}/pdf_collection.py +18 -11
  17. natural_pdf/core/render_spec.py +335 -0
  18. natural_pdf/describe/base.py +1 -1
  19. natural_pdf/elements/__init__.py +1 -0
  20. natural_pdf/elements/base.py +108 -83
  21. natural_pdf/elements/{collections.py → element_collection.py} +566 -1487
  22. natural_pdf/elements/line.py +0 -1
  23. natural_pdf/elements/rect.py +0 -1
  24. natural_pdf/elements/region.py +318 -243
  25. natural_pdf/elements/text.py +9 -7
  26. natural_pdf/exporters/base.py +2 -2
  27. natural_pdf/exporters/original_pdf.py +1 -1
  28. natural_pdf/exporters/paddleocr.py +2 -4
  29. natural_pdf/exporters/searchable_pdf.py +3 -2
  30. natural_pdf/extraction/mixin.py +1 -3
  31. natural_pdf/flows/collections.py +1 -69
  32. natural_pdf/flows/element.py +4 -4
  33. natural_pdf/flows/flow.py +1200 -243
  34. natural_pdf/flows/region.py +707 -261
  35. natural_pdf/ocr/ocr_options.py +0 -2
  36. natural_pdf/ocr/utils.py +2 -1
  37. natural_pdf/qa/document_qa.py +21 -5
  38. natural_pdf/search/search_service_protocol.py +1 -1
  39. natural_pdf/selectors/parser.py +2 -2
  40. natural_pdf/tables/result.py +35 -1
  41. natural_pdf/text_mixin.py +7 -3
  42. natural_pdf/utils/debug.py +2 -1
  43. natural_pdf/utils/highlighting.py +1 -0
  44. natural_pdf/utils/layout.py +2 -2
  45. natural_pdf/utils/packaging.py +4 -3
  46. natural_pdf/utils/text_extraction.py +15 -12
  47. natural_pdf/utils/visualization.py +385 -0
  48. {natural_pdf-0.1.40.dist-info → natural_pdf-0.2.0.dist-info}/METADATA +7 -3
  49. {natural_pdf-0.1.40.dist-info → natural_pdf-0.2.0.dist-info}/RECORD +55 -53
  50. optimization/memory_comparison.py +1 -1
  51. optimization/pdf_analyzer.py +2 -2
  52. {natural_pdf-0.1.40.dist-info → natural_pdf-0.2.0.dist-info}/WHEEL +0 -0
  53. {natural_pdf-0.1.40.dist-info → natural_pdf-0.2.0.dist-info}/entry_points.txt +0 -0
  54. {natural_pdf-0.1.40.dist-info → natural_pdf-0.2.0.dist-info}/licenses/LICENSE +0 -0
  55. {natural_pdf-0.1.40.dist-info → natural_pdf-0.2.0.dist-info}/top_level.txt +0 -0
natural_pdf/core/page.py CHANGED
@@ -16,6 +16,7 @@ from typing import ( # Added overload
16
16
  Callable,
17
17
  Dict,
18
18
  List,
19
+ Literal,
19
20
  Optional,
20
21
  Tuple,
21
22
  Union,
@@ -26,7 +27,7 @@ import pdfplumber
26
27
  from PIL import Image, ImageDraw
27
28
  from tqdm.auto import tqdm # Added tqdm import
28
29
 
29
- from natural_pdf.elements.collections import ElementCollection
30
+ from natural_pdf.elements.element_collection import ElementCollection
30
31
  from natural_pdf.elements.region import Region
31
32
  from natural_pdf.selectors.parser import parse_selector
32
33
  from natural_pdf.utils.locks import pdf_render_lock # Import from utils instead
@@ -38,7 +39,6 @@ if TYPE_CHECKING:
38
39
  from natural_pdf.core.highlighting_service import HighlightingService
39
40
  from natural_pdf.core.pdf import PDF
40
41
  from natural_pdf.elements.base import Element
41
- from natural_pdf.elements.collections import ElementCollection
42
42
 
43
43
  # # New Imports
44
44
  import itertools
@@ -61,12 +61,19 @@ from natural_pdf.classification.manager import ClassificationManager # For type
61
61
  # # --- Classification Imports --- #
62
62
  from natural_pdf.classification.mixin import ClassificationMixin # Import classification mixin
63
63
  from natural_pdf.core.element_manager import ElementManager
64
+
65
+ # Add new import
66
+ from natural_pdf.core.render_spec import RenderSpec, Visualizable
64
67
  from natural_pdf.describe.mixin import DescribeMixin # Import describe mixin
65
68
  from natural_pdf.elements.base import Element # Import base element
66
69
  from natural_pdf.elements.text import TextElement
70
+ from natural_pdf.extraction.mixin import ExtractionMixin # Import extraction mixin
67
71
  from natural_pdf.ocr import OCRManager, OCROptions
68
72
  from natural_pdf.ocr.utils import _apply_ocr_correction_to_elements
69
73
  from natural_pdf.qa import DocumentQA, get_qa_engine
74
+
75
+ # --- Text update mixin import --- #
76
+ from natural_pdf.text_mixin import TextMixin
70
77
  from natural_pdf.utils.locks import pdf_render_lock # Import the lock
71
78
 
72
79
  # # Import new utils
@@ -75,10 +82,6 @@ from natural_pdf.widgets.viewer import _IPYWIDGETS_AVAILABLE, InteractiveViewerW
75
82
 
76
83
  # --- End Classification Imports --- #
77
84
 
78
- # --- Text update mixin import --- #
79
- from natural_pdf.text_mixin import TextMixin
80
- from natural_pdf.extraction.mixin import ExtractionMixin # Import extraction mixin
81
-
82
85
 
83
86
  try:
84
87
  from deskew import determine_skew
@@ -92,7 +95,14 @@ except ImportError:
92
95
  logger = logging.getLogger(__name__)
93
96
 
94
97
 
95
- class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin, DescribeMixin):
98
+ class Page(
99
+ TextMixin,
100
+ ClassificationMixin,
101
+ ExtractionMixin,
102
+ ShapeDetectionMixin,
103
+ DescribeMixin,
104
+ Visualizable,
105
+ ):
96
106
  """Enhanced Page wrapper built on top of pdfplumber.Page.
97
107
 
98
108
  This class provides a fluent interface for working with PDF pages,
@@ -262,6 +272,77 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
262
272
  self._load_elements()
263
273
  self._to_image_cache: Dict[tuple, Optional["Image.Image"]] = {}
264
274
 
275
+ def _get_render_specs(
276
+ self,
277
+ mode: Literal["show", "render"] = "show",
278
+ color: Optional[Union[str, Tuple[int, int, int]]] = None,
279
+ highlights: Optional[List[Dict[str, Any]]] = None,
280
+ crop: Union[bool, Literal["content"]] = False,
281
+ crop_bbox: Optional[Tuple[float, float, float, float]] = None,
282
+ **kwargs,
283
+ ) -> List[RenderSpec]:
284
+ """Get render specifications for this page.
285
+
286
+ Args:
287
+ mode: Rendering mode - 'show' includes page highlights, 'render' is clean
288
+ color: Default color for highlights in show mode
289
+ highlights: Additional highlight groups to show
290
+ crop: Whether to crop the page
291
+ crop_bbox: Explicit crop bounds
292
+ **kwargs: Additional parameters
293
+
294
+ Returns:
295
+ List containing a single RenderSpec for this page
296
+ """
297
+ spec = RenderSpec(page=self)
298
+
299
+ # Handle cropping
300
+ if crop_bbox:
301
+ spec.crop_bbox = crop_bbox
302
+ elif crop == "content":
303
+ # Calculate content bounds from all elements
304
+ elements = self.get_elements(apply_exclusions=False)
305
+ if elements:
306
+ # Get bounding box of all elements
307
+ x_coords = []
308
+ y_coords = []
309
+ for elem in elements:
310
+ if hasattr(elem, "bbox") and elem.bbox:
311
+ x0, y0, x1, y1 = elem.bbox
312
+ x_coords.extend([x0, x1])
313
+ y_coords.extend([y0, y1])
314
+
315
+ if x_coords and y_coords:
316
+ spec.crop_bbox = (min(x_coords), min(y_coords), max(x_coords), max(y_coords))
317
+ elif crop is True:
318
+ # Crop to full page (no-op, but included for consistency)
319
+ spec.crop_bbox = (0, 0, self.width, self.height)
320
+
321
+ # Add highlights in show mode
322
+ if mode == "show":
323
+ # Add page's persistent highlights if any
324
+ page_highlights = self._highlighter.get_highlights_for_page(self.index)
325
+ for highlight in page_highlights:
326
+ spec.add_highlight(
327
+ bbox=highlight.bbox,
328
+ polygon=highlight.polygon,
329
+ color=highlight.color,
330
+ label=highlight.label,
331
+ element=None, # Persistent highlights don't have element refs
332
+ )
333
+
334
+ # Add additional highlight groups if provided
335
+ if highlights:
336
+ for group in highlights:
337
+ elements = group.get("elements", [])
338
+ group_color = group.get("color", color)
339
+ group_label = group.get("label")
340
+
341
+ for elem in elements:
342
+ spec.add_highlight(element=elem, color=group_color, label=group_label)
343
+
344
+ return [spec]
345
+
265
346
  @property
266
347
  def pdf(self) -> "PDF":
267
348
  """Provides public access to the parent PDF object."""
@@ -322,7 +403,7 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
322
403
  exclusion_func_or_region: Either a callable function returning a Region,
323
404
  a Region object, or another object with a valid .bbox attribute.
324
405
  label: Optional label for this exclusion (e.g., 'header', 'footer').
325
- method: Exclusion method - 'region' (exclude all elements in bounding box) or
406
+ method: Exclusion method - 'region' (exclude all elements in bounding box) or
326
407
  'element' (exclude only the specific elements). Default: 'region'.
327
408
 
328
409
  Returns:
@@ -346,7 +427,8 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
346
427
  # Likewise, if an ElementCollection is passed we iterate over its
347
428
  # elements and create Regions for each one.
348
429
  # ------------------------------------------------------------------
349
- from natural_pdf.elements.collections import ElementCollection # local import to avoid cycle
430
+ # Import ElementCollection from the new module path (old path removed)
431
+ from natural_pdf.elements.element_collection import ElementCollection
350
432
 
351
433
  # Selector string ---------------------------------------------------
352
434
  if isinstance(exclusion_func_or_region, str):
@@ -368,7 +450,12 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
368
450
  else: # method == "region"
369
451
  for el in matching_elements:
370
452
  try:
371
- bbox_coords = (float(el.x0), float(el.top), float(el.x1), float(el.bottom))
453
+ bbox_coords = (
454
+ float(el.x0),
455
+ float(el.top),
456
+ float(el.x1),
457
+ float(el.bottom),
458
+ )
372
459
  region = Region(self, bbox_coords, label=label)
373
460
  # Store directly as a Region tuple so we don't recurse endlessly
374
461
  self._exclusions.append((region, label, method))
@@ -376,9 +463,12 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
376
463
  f"Page {self.index}: Added exclusion region from selector '{selector_str}' -> {bbox_coords}"
377
464
  )
378
465
  except Exception as e:
379
- logger.warning(
380
- f"Page {self.index}: Failed to create exclusion region from element {el}: {e}"
466
+ # Re-raise so calling code/test sees the failure immediately
467
+ logger.error(
468
+ f"Page {self.index}: Failed to create exclusion region from element {el}: {e}",
469
+ exc_info=False,
381
470
  )
471
+ raise
382
472
  return self # Completed processing for selector input
383
473
 
384
474
  # ElementCollection -----------------------------------------------
@@ -406,9 +496,11 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
406
496
  f"Page {self.index}: Added exclusion region from ElementCollection element {bbox_coords}"
407
497
  )
408
498
  except Exception as e:
409
- logger.warning(
410
- f"Page {self.index}: Failed to convert ElementCollection element to Region: {e}"
499
+ logger.error(
500
+ f"Page {self.index}: Failed to convert ElementCollection element to Region: {e}",
501
+ exc_info=False,
411
502
  )
503
+ raise
412
504
  return self # Completed processing for ElementCollection input
413
505
 
414
506
  # ------------------------------------------------------------------
@@ -425,7 +517,11 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
425
517
  elif isinstance(exclusion_func_or_region, Region):
426
518
  # Store Region objects directly, assigning the label
427
519
  exclusion_func_or_region.label = label # Assign label
428
- exclusion_data = (exclusion_func_or_region, label, method) # Store as tuple for consistency
520
+ exclusion_data = (
521
+ exclusion_func_or_region,
522
+ label,
523
+ method,
524
+ ) # Store as tuple for consistency
429
525
  logger.debug(
430
526
  f"Page {self.index}: Added Region exclusion '{label}' with method '{method}': {exclusion_func_or_region}"
431
527
  )
@@ -547,7 +643,7 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
547
643
  else:
548
644
  # New format: (exclusion_item, label, method)
549
645
  exclusion_item, label, method = exclusion_data
550
-
646
+
551
647
  exclusion_label = label if label else f"exclusion {i}"
552
648
 
553
649
  # Process callable exclusion functions
@@ -609,7 +705,7 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
609
705
  ) -> List["Element"]:
610
706
  """
611
707
  Filters a list of elements, removing those based on exclusion rules.
612
- Handles both region-based exclusions (exclude all in area) and
708
+ Handles both region-based exclusions (exclude all in area) and
613
709
  element-based exclusions (exclude only specific elements).
614
710
 
615
711
  Args:
@@ -633,7 +729,7 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
633
729
 
634
730
  # Collect element-based exclusions
635
731
  excluded_elements = set() # Use set for O(1) lookup
636
-
732
+
637
733
  for exclusion_data in self._exclusions:
638
734
  # Handle both old format (2-tuple) and new format (3-tuple)
639
735
  if len(exclusion_data) == 2:
@@ -641,15 +737,15 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
641
737
  method = "region"
642
738
  else:
643
739
  exclusion_item, label, method = exclusion_data
644
-
740
+
645
741
  # Skip callables (already handled in _get_exclusion_regions)
646
742
  if callable(exclusion_item):
647
743
  continue
648
-
744
+
649
745
  # Skip regions (already in exclusion_regions)
650
746
  if isinstance(exclusion_item, Region):
651
747
  continue
652
-
748
+
653
749
  # Handle element-based exclusions
654
750
  if method == "element" and hasattr(exclusion_item, "bbox"):
655
751
  excluded_elements.add(id(exclusion_item))
@@ -665,10 +761,10 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
665
761
  filtered_elements = []
666
762
  region_excluded_count = 0
667
763
  element_excluded_count = 0
668
-
764
+
669
765
  for element in elements:
670
766
  exclude = False
671
-
767
+
672
768
  # Check element-based exclusions first (faster)
673
769
  if id(element) in excluded_elements:
674
770
  exclude = True
@@ -685,7 +781,7 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
685
781
  if debug_exclusions:
686
782
  print(f" Element {element} excluded by region {region}")
687
783
  break # No need to check other regions for this element
688
-
784
+
689
785
  if not exclude:
690
786
  filtered_elements.append(element)
691
787
 
@@ -837,7 +933,9 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
837
933
  Returns:
838
934
  ElementCollection with matching elements.
839
935
  """
840
- from natural_pdf.elements.collections import ElementCollection # Import here for type hint
936
+ from natural_pdf.elements.element_collection import ( # Import here for type hint
937
+ ElementCollection,
938
+ )
841
939
 
842
940
  if selector is not None and text is not None:
843
941
  raise ValueError("Provide either 'selector' or 'text', not both.")
@@ -1324,7 +1422,12 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
1324
1422
  return self._page.crop(bbox, **kwargs)
1325
1423
 
1326
1424
  def extract_text(
1327
- self, preserve_whitespace=True, use_exclusions=True, debug_exclusions=False, content_filter=None, **kwargs
1425
+ self,
1426
+ preserve_whitespace=True,
1427
+ use_exclusions=True,
1428
+ debug_exclusions=False,
1429
+ content_filter=None,
1430
+ **kwargs,
1328
1431
  ) -> str:
1329
1432
  """
1330
1433
  Extract text from this page, respecting exclusions and using pdfplumber's
@@ -1363,11 +1466,15 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
1363
1466
 
1364
1467
  # 2. Apply element-based exclusions if enabled
1365
1468
  if use_exclusions and self._exclusions:
1366
- # Filter word elements through _filter_elements_by_exclusions
1469
+ # Filter word elements through _filter_elements_by_exclusions
1367
1470
  # This handles both element-based and region-based exclusions
1368
- word_elements = self._filter_elements_by_exclusions(word_elements, debug_exclusions=debug)
1471
+ word_elements = self._filter_elements_by_exclusions(
1472
+ word_elements, debug_exclusions=debug
1473
+ )
1369
1474
  if debug:
1370
- logger.debug(f"Page {self.number}: {len(word_elements)} words remaining after exclusion filtering.")
1475
+ logger.debug(
1476
+ f"Page {self.number}: {len(word_elements)} words remaining after exclusion filtering."
1477
+ )
1371
1478
 
1372
1479
  # 3. Get region-based exclusions for spatial filtering
1373
1480
  apply_exclusions_flag = kwargs.get("use_exclusions", use_exclusions)
@@ -1375,7 +1482,9 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
1375
1482
  if apply_exclusions_flag and self._exclusions:
1376
1483
  exclusion_regions = self._get_exclusion_regions(include_callable=True, debug=debug)
1377
1484
  if debug:
1378
- logger.debug(f"Page {self.number}: Found {len(exclusion_regions)} region exclusions for spatial filtering.")
1485
+ logger.debug(
1486
+ f"Page {self.number}: Found {len(exclusion_regions)} region exclusions for spatial filtering."
1487
+ )
1379
1488
  elif debug:
1380
1489
  logger.debug(f"Page {self.number}: Not applying exclusions.")
1381
1490
 
@@ -1656,7 +1765,7 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
1656
1765
  table_settings.setdefault("join_y_tolerance", join)
1657
1766
 
1658
1767
  raw_tables = self._page.extract_tables(table_settings)
1659
-
1768
+
1660
1769
  # Apply RTL text processing to all extracted tables
1661
1770
  if raw_tables:
1662
1771
  processed_tables = []
@@ -1674,7 +1783,7 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
1674
1783
  processed_table.append(processed_row)
1675
1784
  processed_tables.append(processed_table)
1676
1785
  return processed_tables
1677
-
1786
+
1678
1787
  return raw_tables
1679
1788
  else:
1680
1789
  raise ValueError(
@@ -1743,7 +1852,7 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
1743
1852
  label: Optional[str] = None,
1744
1853
  use_color_cycling: bool = False,
1745
1854
  element: Optional[Any] = None,
1746
- include_attrs: Optional[List[str]] = None,
1855
+ annotate: Optional[List[str]] = None,
1747
1856
  existing: str = "append",
1748
1857
  ) -> "Page":
1749
1858
  """
@@ -1756,7 +1865,7 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
1756
1865
  label: Optional label for the highlight.
1757
1866
  use_color_cycling: If True and no label/color, use next cycle color.
1758
1867
  element: Optional original element being highlighted (for attribute extraction).
1759
- include_attrs: List of attribute names from 'element' to display.
1868
+ annotate: List of attribute names from 'element' to display.
1760
1869
  existing: How to handle existing highlights ('append' or 'replace').
1761
1870
 
1762
1871
  Returns:
@@ -1770,7 +1879,7 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
1770
1879
  label=label,
1771
1880
  use_color_cycling=use_color_cycling,
1772
1881
  element=element,
1773
- include_attrs=include_attrs,
1882
+ annotate=annotate,
1774
1883
  existing=existing,
1775
1884
  )
1776
1885
  return self
@@ -1782,7 +1891,7 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
1782
1891
  label: Optional[str] = None,
1783
1892
  use_color_cycling: bool = False,
1784
1893
  element: Optional[Any] = None,
1785
- include_attrs: Optional[List[str]] = None,
1894
+ annotate: Optional[List[str]] = None,
1786
1895
  existing: str = "append",
1787
1896
  ) -> "Page":
1788
1897
  """
@@ -1795,7 +1904,7 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
1795
1904
  label: Optional label for the highlight.
1796
1905
  use_color_cycling: If True and no label/color, use next cycle color.
1797
1906
  element: Optional original element being highlighted (for attribute extraction).
1798
- include_attrs: List of attribute names from 'element' to display.
1907
+ annotate: List of attribute names from 'element' to display.
1799
1908
  existing: How to handle existing highlights ('append' or 'replace').
1800
1909
 
1801
1910
  Returns:
@@ -1808,41 +1917,11 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
1808
1917
  label=label,
1809
1918
  use_color_cycling=use_color_cycling,
1810
1919
  element=element,
1811
- include_attrs=include_attrs,
1920
+ annotate=annotate,
1812
1921
  existing=existing,
1813
1922
  )
1814
1923
  return self
1815
1924
 
1816
- def show(
1817
- self,
1818
- resolution: float = 144,
1819
- width: Optional[int] = None,
1820
- labels: bool = True,
1821
- legend_position: str = "right",
1822
- render_ocr: bool = False,
1823
- ) -> Optional[Image.Image]:
1824
- """
1825
- Generates and returns an image of the page with persistent highlights rendered.
1826
-
1827
- Args:
1828
- resolution: Resolution in DPI for rendering (default: 144 DPI, equivalent to previous scale=2.0).
1829
- width: Optional width for the output image.
1830
- labels: Whether to include a legend for labels.
1831
- legend_position: Position of the legend.
1832
- render_ocr: Whether to render OCR text.
1833
-
1834
- Returns:
1835
- PIL Image object of the page with highlights, or None if rendering fails.
1836
- """
1837
- return self.to_image(
1838
- resolution=resolution,
1839
- width=width,
1840
- labels=labels,
1841
- legend_position=legend_position,
1842
- render_ocr=render_ocr,
1843
- include_highlights=True, # Ensure highlights are requested
1844
- )
1845
-
1846
1925
  def save_image(
1847
1926
  self,
1848
1927
  filename: str,
@@ -1870,17 +1949,38 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
1870
1949
  Returns:
1871
1950
  Self for method chaining.
1872
1951
  """
1873
- # Use to_image to generate and save the image
1874
- self.to_image(
1875
- path=filename,
1876
- width=width,
1877
- labels=labels,
1878
- legend_position=legend_position,
1879
- render_ocr=render_ocr,
1880
- include_highlights=include_highlights,
1881
- resolution=resolution,
1882
- **kwargs,
1883
- )
1952
+ # Use export() to save the image
1953
+ if include_highlights:
1954
+ self.export(
1955
+ path=filename,
1956
+ resolution=resolution,
1957
+ width=width,
1958
+ labels=labels,
1959
+ legend_position=legend_position,
1960
+ render_ocr=render_ocr,
1961
+ **kwargs,
1962
+ )
1963
+ else:
1964
+ # For saving without highlights, use render() and save manually
1965
+ img = self.render(resolution=resolution, **kwargs)
1966
+ if img:
1967
+ # Resize if width is specified
1968
+ if width is not None and width > 0 and img.width > 0:
1969
+ aspect_ratio = img.height / img.width
1970
+ height = int(width * aspect_ratio)
1971
+ try:
1972
+ img = img.resize((width, height), Image.Resampling.LANCZOS)
1973
+ except Exception as e:
1974
+ logger.warning(f"Could not resize image: {e}")
1975
+
1976
+ # Save the image
1977
+ try:
1978
+ if os.path.dirname(filename):
1979
+ os.makedirs(os.path.dirname(filename), exist_ok=True)
1980
+ img.save(filename)
1981
+ except Exception as e:
1982
+ logger.error(f"Failed to save image to {filename}: {e}")
1983
+
1884
1984
  return self
1885
1985
 
1886
1986
  def clear_highlights(self) -> "Page":
@@ -1923,280 +2023,6 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
1923
2023
  # Return the collection of elements which now have style attributes
1924
2024
  return processed_elements_collection
1925
2025
 
1926
- def to_image(
1927
- self,
1928
- path: Optional[str] = None,
1929
- width: Optional[int] = None,
1930
- labels: bool = True,
1931
- legend_position: str = "right",
1932
- render_ocr: bool = False,
1933
- resolution: Optional[float] = None,
1934
- include_highlights: bool = True,
1935
- exclusions: Optional[str] = None, # New parameter
1936
- **kwargs,
1937
- ) -> Optional[Image.Image]:
1938
- """
1939
- Generate a PIL image of the page, using HighlightingService if needed.
1940
-
1941
- Args:
1942
- path: Optional path to save the image to.
1943
- width: Optional width for the output image.
1944
- labels: Whether to include a legend for highlights.
1945
- legend_position: Position of the legend.
1946
- render_ocr: Whether to render OCR text on highlights.
1947
- resolution: Resolution in DPI for base page image. If None, uses global setting or defaults to 144 DPI.
1948
- include_highlights: Whether to render highlights.
1949
- exclusions: Accepts one of the following:
1950
- • None – no masking (default)
1951
- • "mask" – mask using solid white (back-compat)
1952
- • CSS/HTML colour string (e.g. "red", "#ff0000", "#ff000080")
1953
- • Tuple of RGB or RGBA values (ints 0-255 or floats 0-1)
1954
- All excluded regions are filled with this colour.
1955
- **kwargs: Additional parameters for pdfplumber.to_image.
1956
-
1957
- Returns:
1958
- PIL Image of the page, or None if rendering fails.
1959
- """
1960
- # Apply global options as defaults, but allow explicit parameters to override
1961
- import natural_pdf
1962
-
1963
- # Determine if this is likely a computational use (OCR, analysis, etc.)
1964
- # If resolution is explicitly provided but width is not, assume computational use
1965
- # and don't apply global display width settings
1966
- is_computational_use = (resolution is not None and width is None and
1967
- kwargs.get('include_highlights', True) is False)
1968
-
1969
- # Use global options if parameters are not explicitly set
1970
- if width is None and not is_computational_use:
1971
- width = natural_pdf.options.image.width
1972
- if resolution is None:
1973
- if natural_pdf.options.image.resolution is not None:
1974
- resolution = natural_pdf.options.image.resolution
1975
- else:
1976
- resolution = 144 # Default resolution when none specified
1977
- # 1. Create cache key (excluding path)
1978
- cache_key_parts = [
1979
- width,
1980
- labels,
1981
- legend_position,
1982
- render_ocr,
1983
- resolution,
1984
- include_highlights,
1985
- exclusions,
1986
- ]
1987
- # Convert kwargs to a stable, hashable representation
1988
- sorted_kwargs_list = []
1989
- for k, v in sorted(kwargs.items()):
1990
- if isinstance(v, list):
1991
- try:
1992
- v = tuple(v) # Convert lists to tuples
1993
- except TypeError: # pragma: no cover
1994
- # If list contains unhashable items, fall back to repr or skip
1995
- # For simplicity, we'll try to proceed; hashing will fail if v remains unhashable
1996
- logger.warning(
1997
- f"Cache key generation: List item in kwargs['{k}'] could not be converted to tuple due to unhashable elements."
1998
- )
1999
- sorted_kwargs_list.append((k, v))
2000
-
2001
- cache_key_parts.append(tuple(sorted_kwargs_list))
2002
-
2003
- try:
2004
- cache_key = tuple(cache_key_parts)
2005
- except TypeError as e: # pragma: no cover
2006
- logger.warning(
2007
- f"Page {self.index}: Could not create cache key for to_image due to unhashable item: {e}. Proceeding without cache for this call."
2008
- )
2009
- cache_key = None # Fallback to not using cache for this call
2010
-
2011
- image_to_return: Optional[Image.Image] = None
2012
-
2013
- # 2. Check cache
2014
- if cache_key is not None and cache_key in self._to_image_cache:
2015
- image_to_return = self._to_image_cache[cache_key]
2016
- logger.debug(f"Page {self.index}: Returning cached image for key: {cache_key}")
2017
- else:
2018
- # --- This is the original logic to generate the image ---
2019
- rendered_image_component: Optional[Image.Image] = (
2020
- None # Renamed from 'image' in original
2021
- )
2022
- render_resolution = resolution
2023
- thread_id = threading.current_thread().name
2024
- logger.debug(
2025
- f"[{thread_id}] Page {self.index}: Attempting to acquire pdf_render_lock for to_image..."
2026
- )
2027
- lock_wait_start = time.monotonic()
2028
- try:
2029
- # Acquire the global PDF rendering lock
2030
- with pdf_render_lock:
2031
- lock_acquired_time = time.monotonic()
2032
- logger.debug(
2033
- f"[{thread_id}] Page {self.index}: Acquired pdf_render_lock (waited {lock_acquired_time - lock_wait_start:.2f}s). Starting render..."
2034
- )
2035
- if include_highlights:
2036
- # Delegate rendering to the central service
2037
- rendered_image_component = self._highlighter.render_page(
2038
- page_index=self.index,
2039
- resolution=render_resolution,
2040
- labels=labels,
2041
- legend_position=legend_position,
2042
- render_ocr=render_ocr,
2043
- **kwargs,
2044
- )
2045
- else:
2046
- rendered_image_component = render_plain_page(self, render_resolution)
2047
- except Exception as e:
2048
- logger.error(f"Error rendering page {self.index}: {e}", exc_info=True)
2049
- # rendered_image_component remains None
2050
- finally:
2051
- render_end_time = time.monotonic()
2052
- logger.debug(
2053
- f"[{thread_id}] Page {self.index}: Released pdf_render_lock. Total render time (incl. lock wait): {render_end_time - lock_wait_start:.2f}s"
2054
- )
2055
-
2056
- if rendered_image_component is None:
2057
- if cache_key is not None:
2058
- self._to_image_cache[cache_key] = None # Cache the failure
2059
- # Save the image if path is provided (will try to save None, handled by PIL/OS)
2060
- if path:
2061
- try:
2062
- if os.path.dirname(path):
2063
- os.makedirs(os.path.dirname(path), exist_ok=True)
2064
- if rendered_image_component is not None: # Should be None here
2065
- rendered_image_component.save(path) # This line won't be hit if None
2066
- # else: logger.debug("Not saving None image") # Not strictly needed
2067
- except Exception as save_error: # pragma: no cover
2068
- logger.error(f"Failed to save image to {path}: {save_error}")
2069
- return None
2070
-
2071
- # --- Apply exclusion masking if requested ---
2072
- # This modifies 'rendered_image_component'
2073
- image_after_masking = rendered_image_component # Start with the rendered image
2074
-
2075
- # Determine if masking is requested and establish the fill colour
2076
- mask_requested = exclusions is not None and self._exclusions
2077
- mask_color: Union[str, Tuple[int, int, int, int]] = "white" # default
2078
-
2079
- if mask_requested:
2080
- if exclusions != "mask":
2081
- # Attempt to parse custom colour input
2082
- try:
2083
- if isinstance(exclusions, tuple):
2084
- # Handle RGB/RGBA tuples with ints 0-255 or floats 0-1
2085
- processed = []
2086
- all_float = all(isinstance(c, float) for c in exclusions)
2087
- for i, c in enumerate(exclusions):
2088
- if isinstance(c, float):
2089
- val = int(c * 255) if all_float or i == 3 else int(c)
2090
- else:
2091
- val = int(c)
2092
- processed.append(max(0, min(255, val)))
2093
- if len(processed) == 3:
2094
- processed.append(255) # add full alpha
2095
- mask_color = tuple(processed) # type: ignore[assignment]
2096
- elif isinstance(exclusions, str):
2097
- # Try using the optional 'colour' library for rich parsing
2098
- try:
2099
- from colour import Color # type: ignore
2100
-
2101
- color_obj = Color(exclusions)
2102
- mask_color = (
2103
- int(color_obj.red * 255),
2104
- int(color_obj.green * 255),
2105
- int(color_obj.blue * 255),
2106
- 255,
2107
- )
2108
- except Exception:
2109
- # Fallback: if parsing fails, treat as plain string accepted by PIL
2110
- mask_color = exclusions # e.g. "red"
2111
- else:
2112
- logger.warning(
2113
- f"Unsupported exclusions colour spec: {exclusions!r}. Using white."
2114
- )
2115
- except Exception as colour_parse_err: # pragma: no cover
2116
- logger.warning(
2117
- f"Failed to parse exclusions colour {exclusions!r}: {colour_parse_err}. Using white."
2118
- )
2119
-
2120
- try:
2121
- # Ensure image is mutable (RGB or RGBA)
2122
- if image_after_masking.mode not in ("RGB", "RGBA"):
2123
- image_after_masking = image_after_masking.convert("RGB")
2124
-
2125
- exclusion_regions = self._get_exclusion_regions(
2126
- include_callable=True, debug=False
2127
- )
2128
- if exclusion_regions:
2129
- draw = ImageDraw.Draw(image_after_masking)
2130
- # Scaling factor for converting PDF pts → image px
2131
- img_scale = render_resolution / 72.0
2132
-
2133
- # Determine fill colour compatible with current mode
2134
- def _mode_compatible(colour):
2135
- if isinstance(colour, tuple) and image_after_masking.mode != "RGBA":
2136
- return colour[:3] # drop alpha for RGB images
2137
- return colour
2138
-
2139
- fill_colour = _mode_compatible(mask_color)
2140
-
2141
- for region in exclusion_regions:
2142
- img_x0 = region.x0 * img_scale
2143
- img_top = region.top * img_scale
2144
- img_x1 = region.x1 * img_scale
2145
- img_bottom = region.bottom * img_scale
2146
-
2147
- img_coords = (
2148
- max(0, img_x0),
2149
- max(0, img_top),
2150
- min(image_after_masking.width, img_x1),
2151
- min(image_after_masking.height, img_bottom),
2152
- )
2153
- if img_coords[0] < img_coords[2] and img_coords[1] < img_coords[3]:
2154
- draw.rectangle(img_coords, fill=fill_colour)
2155
- else: # pragma: no cover
2156
- logger.warning(
2157
- f"Skipping invalid exclusion rect for masking: {img_coords}"
2158
- )
2159
- del draw # Release drawing context
2160
- except Exception as mask_error: # pragma: no cover
2161
- logger.error(
2162
- f"Error applying exclusion mask to page {self.index}: {mask_error}",
2163
- exc_info=True,
2164
- )
2165
- # Continue with potentially unmasked or partially masked image
2166
-
2167
- # --- Resize the final image if width is provided ---
2168
- image_final_content = image_after_masking # Start with image after masking
2169
- if width is not None and width > 0 and image_final_content.width > 0:
2170
- aspect_ratio = image_final_content.height / image_final_content.width
2171
- height = int(width * aspect_ratio)
2172
- try:
2173
- image_final_content = image_final_content.resize(
2174
- (width, height), Image.Resampling.LANCZOS
2175
- )
2176
- except Exception as resize_error: # pragma: no cover
2177
- logger.warning(f"Could not resize image: {resize_error}")
2178
- # image_final_content remains the un-resized version if resize fails
2179
-
2180
- # Store in cache
2181
- if cache_key is not None:
2182
- self._to_image_cache[cache_key] = image_final_content
2183
- logger.debug(f"Page {self.index}: Cached image for key: {cache_key}")
2184
- image_to_return = image_final_content
2185
- # --- End of cache miss block ---
2186
-
2187
- # Save the image (either from cache or newly generated) if path is provided
2188
- if path and image_to_return:
2189
- try:
2190
- # Ensure directory exists
2191
- if os.path.dirname(path): # Only call makedirs if there's a directory part
2192
- os.makedirs(os.path.dirname(path), exist_ok=True)
2193
- image_to_return.save(path)
2194
- logger.debug(f"Saved page image to: {path}")
2195
- except Exception as save_error: # pragma: no cover
2196
- logger.error(f"Failed to save image to {path}: {save_error}")
2197
-
2198
- return image_to_return
2199
-
2200
2026
  def _create_text_elements_from_ocr(
2201
2027
  self, ocr_results: List[Dict[str, Any]], image_width=None, image_height=None
2202
2028
  ) -> List["TextElement"]:
@@ -2309,7 +2135,8 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
2309
2135
  # Get base image without highlights using the determined resolution
2310
2136
  # Use the global PDF rendering lock
2311
2137
  with pdf_render_lock:
2312
- image = self.to_image(resolution=final_resolution, include_highlights=False)
2138
+ # Use render() for clean image without highlights
2139
+ image = self.render(resolution=final_resolution)
2313
2140
  if not image:
2314
2141
  logger.error(
2315
2142
  f" Failed to render page {self.number} to image for OCR extraction."
@@ -2491,7 +2318,7 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
2491
2318
  return self
2492
2319
 
2493
2320
  def get_section_between(
2494
- self, start_element=None, end_element=None, boundary_inclusion="both"
2321
+ self, start_element=None, end_element=None, include_boundaries="both"
2495
2322
  ) -> Optional["Region"]: # Return Optional
2496
2323
  """
2497
2324
  Get a section between two elements on this page.
@@ -2504,7 +2331,7 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
2504
2331
  return page_region.get_section_between(
2505
2332
  start_element=start_element,
2506
2333
  end_element=end_element,
2507
- boundary_inclusion=boundary_inclusion,
2334
+ include_boundaries=include_boundaries,
2508
2335
  )
2509
2336
  except Exception as e:
2510
2337
  logger.error(
@@ -2526,7 +2353,7 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
2526
2353
  self,
2527
2354
  start_elements=None,
2528
2355
  end_elements=None,
2529
- boundary_inclusion="start",
2356
+ include_boundaries="start",
2530
2357
  y_threshold=5.0,
2531
2358
  bounding_box=None,
2532
2359
  ) -> "ElementCollection[Region]":
@@ -2567,8 +2394,8 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
2567
2394
  end_elements = []
2568
2395
 
2569
2396
  valid_inclusions = ["start", "end", "both", "none"]
2570
- if boundary_inclusion not in valid_inclusions:
2571
- raise ValueError(f"boundary_inclusion must be one of {valid_inclusions}")
2397
+ if include_boundaries not in valid_inclusions:
2398
+ raise ValueError(f"include_boundaries must be one of {valid_inclusions}")
2572
2399
 
2573
2400
  if not start_elements:
2574
2401
  # Return an empty ElementCollection if no start elements
@@ -2600,12 +2427,12 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
2600
2427
  # Determine region boundaries
2601
2428
  sec_top = (
2602
2429
  current_start_element.top
2603
- if boundary_inclusion in ["start", "both"]
2430
+ if include_boundaries in ["start", "both"]
2604
2431
  else current_start_element.bottom
2605
2432
  )
2606
2433
  sec_bottom = (
2607
2434
  end_boundary_el.top
2608
- if boundary_inclusion not in ["end", "both"]
2435
+ if include_boundaries not in ["end", "both"]
2609
2436
  else end_boundary_el.bottom
2610
2437
  )
2611
2438
 
@@ -2627,12 +2454,12 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
2627
2454
  end_boundary_el = element
2628
2455
  sec_top = (
2629
2456
  current_start_element.top
2630
- if boundary_inclusion in ["start", "both"]
2457
+ if include_boundaries in ["start", "both"]
2631
2458
  else current_start_element.bottom
2632
2459
  )
2633
2460
  sec_bottom = (
2634
2461
  end_boundary_el.bottom
2635
- if boundary_inclusion in ["end", "both"]
2462
+ if include_boundaries in ["end", "both"]
2636
2463
  else end_boundary_el.top
2637
2464
  )
2638
2465
 
@@ -2652,7 +2479,7 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
2652
2479
  if active_section_started:
2653
2480
  sec_top = (
2654
2481
  current_start_element.top
2655
- if boundary_inclusion in ["start", "both"]
2482
+ if include_boundaries in ["start", "both"]
2656
2483
  else current_start_element.bottom
2657
2484
  )
2658
2485
  x0, _, x1, page_bottom = get_bounds()
@@ -3069,13 +2896,8 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
3069
2896
  else default_resolution
3070
2897
  )
3071
2898
 
3072
- # Use to_image, ensuring no highlights interfere
3073
- img = self.to_image(
3074
- resolution=resolution,
3075
- include_highlights=False,
3076
- labels=False,
3077
- exclusions=None, # Don't mask exclusions for classification input image
3078
- )
2899
+ # Use render() for clean image without highlights
2900
+ img = self.render(resolution=resolution)
3079
2901
  if img is None:
3080
2902
  raise ValueError(
3081
2903
  "Cannot classify page with 'vision' model: Failed to render image."
@@ -3134,7 +2956,8 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
3134
2956
  logger.debug(f"Page {self.number}: Detecting skew angle (resolution={resolution} DPI)...")
3135
2957
  try:
3136
2958
  # Render the page at the specified detection resolution
3137
- img = self.to_image(resolution=resolution, include_highlights=False)
2959
+ # Use render() for clean image without highlights
2960
+ img = self.render(resolution=resolution)
3138
2961
  if not img:
3139
2962
  logger.warning(f"Page {self.number}: Failed to render image for skew detection.")
3140
2963
  self._skew_angle = None
@@ -3213,7 +3036,8 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
3213
3036
 
3214
3037
  try:
3215
3038
  # Render the original page at the desired output resolution
3216
- img = self.to_image(resolution=resolution, include_highlights=False)
3039
+ # Use render() for clean image without highlights
3040
+ img = self.render(resolution=resolution)
3217
3041
  if not img:
3218
3042
  logger.error(f"Page {self.number}: Failed to render image for deskewing.")
3219
3043
  return None
@@ -3303,32 +3127,33 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
3303
3127
  def _apply_rtl_processing_to_text(self, text: str) -> str:
3304
3128
  """
3305
3129
  Apply RTL (Right-to-Left) text processing to a string.
3306
-
3130
+
3307
3131
  This converts visual order text (as stored in PDFs) to logical order
3308
3132
  for proper display of Arabic, Hebrew, and other RTL scripts.
3309
-
3133
+
3310
3134
  Args:
3311
3135
  text: Input text string in visual order
3312
-
3136
+
3313
3137
  Returns:
3314
3138
  Text string in logical order
3315
3139
  """
3316
3140
  if not text or not text.strip():
3317
3141
  return text
3318
-
3142
+
3319
3143
  # Quick check for RTL characters - if none found, return as-is
3320
3144
  import unicodedata
3321
-
3145
+
3322
3146
  def _contains_rtl(s):
3323
3147
  return any(unicodedata.bidirectional(ch) in ("R", "AL", "AN") for ch in s)
3324
-
3148
+
3325
3149
  if not _contains_rtl(text):
3326
3150
  return text
3327
-
3151
+
3328
3152
  try:
3329
3153
  from bidi.algorithm import get_display # type: ignore
3154
+
3330
3155
  from natural_pdf.utils.bidi_mirror import mirror_brackets
3331
-
3156
+
3332
3157
  # Apply BiDi algorithm to convert from visual to logical order
3333
3158
  # Process line by line to handle mixed content properly
3334
3159
  processed_lines = []
@@ -3341,9 +3166,9 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
3341
3166
  processed_lines.append(mirror_brackets(logical_line))
3342
3167
  else:
3343
3168
  processed_lines.append(line)
3344
-
3169
+
3345
3170
  return "\n".join(processed_lines)
3346
-
3171
+
3347
3172
  except (ImportError, Exception):
3348
3173
  # If bidi library is not available or fails, return original text
3349
3174
  return text
@@ -3361,3 +3186,31 @@ class Page(TextMixin, ClassificationMixin, ExtractionMixin, ShapeDetectionMixin,
3361
3186
  def images(self) -> List[Any]:
3362
3187
  """Get all embedded raster images on this page."""
3363
3188
  return self._element_mgr.images
3189
+
3190
+ def highlights(self, show: bool = False) -> "HighlightContext":
3191
+ """
3192
+ Create a highlight context for accumulating highlights.
3193
+
3194
+ This allows for clean syntax to show multiple highlight groups:
3195
+
3196
+ Example:
3197
+ with page.highlights() as h:
3198
+ h.add(page.find_all('table'), label='tables', color='blue')
3199
+ h.add(page.find_all('text:bold'), label='bold text', color='red')
3200
+ h.show()
3201
+
3202
+ Or with automatic display:
3203
+ with page.highlights(show=True) as h:
3204
+ h.add(page.find_all('table'), label='tables')
3205
+ h.add(page.find_all('text:bold'), label='bold')
3206
+ # Automatically shows when exiting the context
3207
+
3208
+ Args:
3209
+ show: If True, automatically show highlights when exiting context
3210
+
3211
+ Returns:
3212
+ HighlightContext for accumulating highlights
3213
+ """
3214
+ from natural_pdf.core.highlighting_service import HighlightContext
3215
+
3216
+ return HighlightContext(self, show_on_exit=show)