PyPI - natural-pdf - Versions diffs - 0.1.38__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

natural-pdf 0.1.38py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

natural_pdf/__init__.py +11 -6
natural_pdf/analyzers/__init__.py +6 -1
natural_pdf/analyzers/guides.py +354 -258
natural_pdf/analyzers/layout/layout_analyzer.py +2 -3
natural_pdf/analyzers/layout/layout_manager.py +18 -4
natural_pdf/analyzers/layout/paddle.py +11 -0
natural_pdf/analyzers/layout/surya.py +2 -3
natural_pdf/analyzers/shape_detection_mixin.py +25 -34
natural_pdf/analyzers/text_structure.py +2 -2
natural_pdf/classification/manager.py +1 -1
natural_pdf/collections/mixins.py +3 -2
natural_pdf/core/highlighting_service.py +743 -32
natural_pdf/core/page.py +252 -399
natural_pdf/core/page_collection.py +1249 -0
natural_pdf/core/pdf.py +231 -89
natural_pdf/{collections → core}/pdf_collection.py +18 -11
natural_pdf/core/render_spec.py +335 -0
natural_pdf/describe/base.py +1 -1
natural_pdf/elements/__init__.py +1 -0
natural_pdf/elements/base.py +108 -83
natural_pdf/elements/{collections.py → element_collection.py} +575 -1372
natural_pdf/elements/line.py +0 -1
natural_pdf/elements/rect.py +0 -1
natural_pdf/elements/region.py +405 -280
natural_pdf/elements/text.py +9 -7
natural_pdf/exporters/base.py +2 -2
natural_pdf/exporters/original_pdf.py +1 -1
natural_pdf/exporters/paddleocr.py +2 -4
natural_pdf/exporters/searchable_pdf.py +3 -2
natural_pdf/extraction/mixin.py +1 -3
natural_pdf/flows/collections.py +1 -69
natural_pdf/flows/element.py +25 -0
natural_pdf/flows/flow.py +1658 -19
natural_pdf/flows/region.py +757 -263
natural_pdf/ocr/ocr_options.py +0 -2
natural_pdf/ocr/utils.py +2 -1
natural_pdf/qa/document_qa.py +21 -5
natural_pdf/search/search_service_protocol.py +1 -1
natural_pdf/selectors/parser.py +35 -2
natural_pdf/tables/result.py +35 -1
natural_pdf/text_mixin.py +101 -0
natural_pdf/utils/debug.py +2 -1
natural_pdf/utils/highlighting.py +1 -0
natural_pdf/utils/layout.py +2 -2
natural_pdf/utils/packaging.py +4 -3
natural_pdf/utils/text_extraction.py +15 -12
natural_pdf/utils/visualization.py +385 -0
{natural_pdf-0.1.38.dist-info → natural_pdf-0.2.0.dist-info}/METADATA +7 -3
{natural_pdf-0.1.38.dist-info → natural_pdf-0.2.0.dist-info}/RECORD +55 -52
optimization/memory_comparison.py +1 -1
optimization/pdf_analyzer.py +2 -2
{natural_pdf-0.1.38.dist-info → natural_pdf-0.2.0.dist-info}/WHEEL +0 -0
{natural_pdf-0.1.38.dist-info → natural_pdf-0.2.0.dist-info}/entry_points.txt +0 -0
{natural_pdf-0.1.38.dist-info → natural_pdf-0.2.0.dist-info}/licenses/LICENSE +0 -0
{natural_pdf-0.1.38.dist-info → natural_pdf-0.2.0.dist-info}/top_level.txt +0 -0

natural_pdf/core/pdf.py CHANGED Viewed

@@ -16,6 +16,7 @@ from typing import (
     Dict,
     Iterable,
     List,
+    Literal,
     Optional,
     Tuple,
     Type,
@@ -31,6 +32,7 @@ from natural_pdf.classification.manager import ClassificationError
 from natural_pdf.classification.mixin import ClassificationMixin
 from natural_pdf.classification.results import ClassificationResult
 from natural_pdf.core.highlighting_service import HighlightingService
+from natural_pdf.core.render_spec import RenderSpec, Visualizable
 from natural_pdf.elements.base import Element
 from natural_pdf.elements.region import Region
 from natural_pdf.export.mixin import ExportMixin
@@ -38,8 +40,12 @@ from natural_pdf.extraction.manager import StructuredDataManager
 from natural_pdf.extraction.mixin import ExtractionMixin
 from natural_pdf.ocr import OCRManager, OCROptions
 from natural_pdf.selectors.parser import parse_selector
+from natural_pdf.text_mixin import TextMixin
 from natural_pdf.utils.locks import pdf_render_lock
+if TYPE_CHECKING:
+    from natural_pdf.elements.element_collection import ElementCollection
 try:
     from typing import Any as TypingAny
@@ -103,7 +109,6 @@ except ImportError:
 from collections.abc import Sequence
 class _LazyPageList(Sequence):
     """A lightweight, list-like object that lazily instantiates natural-pdf Page objects.
@@ -141,18 +146,18 @@ class _LazyPageList(Sequence):
     """
     def __init__(
-        self,
-        parent_pdf: "PDF",
-        plumber_pdf: "pdfplumber.PDF",
-        font_attrs=None,
+        self,
+        parent_pdf: "PDF",
+        plumber_pdf: "pdfplumber.PDF",
+        font_attrs=None,
         load_text=True,
-        indices: Optional[List[int]] = None
+        indices: Optional[List[int]] = None,
     ):
         self._parent_pdf = parent_pdf
         self._plumber_pdf = plumber_pdf
         self._font_attrs = font_attrs
         self._load_text = load_text
         # If indices is provided, this is a sliced view
         if indices is not None:
             self._indices = indices
@@ -180,23 +185,23 @@ class _LazyPageList(Sequence):
                 font_attrs=self._font_attrs,
                 load_text=self._load_text,
             )
             # Apply any stored exclusions to the newly created page
-            if hasattr(self._parent_pdf, '_exclusions'):
+            if hasattr(self._parent_pdf, "_exclusions"):
                 for exclusion_data in self._parent_pdf._exclusions:
                     exclusion_func, label = exclusion_data
                     try:
                         cached.add_exclusion(exclusion_func, label=label)
                     except Exception as e:
                         logger.warning(f"Failed to apply exclusion to page {cached.number}: {e}")
             # Apply any stored regions to the newly created page
-            if hasattr(self._parent_pdf, '_regions'):
+            if hasattr(self._parent_pdf, "_regions"):
                 for region_data in self._parent_pdf._regions:
                     region_func, name = region_data
                     try:
                         region_instance = region_func(cached)
-                        if region_instance and hasattr(region_instance, '__class__'):
+                        if region_instance and hasattr(region_instance, "__class__"):
                             # Check if it's a Region-like object (avoid importing Region here)
                             cached.add_region(region_instance, name=name, source="named")
                         elif region_instance is not None:
@@ -205,7 +210,7 @@ class _LazyPageList(Sequence):
                             )
                     except Exception as e:
                         logger.warning(f"Failed to apply region to page {cached.number}: {e}")
             self._cache[index] = cached
         return cached
@@ -215,7 +220,7 @@ class _LazyPageList(Sequence):
     def __getitem__(self, key):
         if isinstance(key, slice):
-            # Get the slice of our current indices
+            # Get the slice of our current indices
             slice_indices = range(*key.indices(len(self)))
             # Extract the actual page indices for this slice
             actual_indices = [self._indices[i] for i in slice_indices]
@@ -225,7 +230,7 @@ class _LazyPageList(Sequence):
                 self._plumber_pdf,
                 font_attrs=self._font_attrs,
                 load_text=self._load_text,
-                indices=actual_indices
+                indices=actual_indices,
             )
         elif isinstance(key, int):
             if key < 0:
@@ -247,7 +252,7 @@ class _LazyPageList(Sequence):
 # --- End Lazy Page List Helper --- #
-class PDF(ExtractionMixin, ExportMixin, ClassificationMixin):
+class PDF(TextMixin, ExtractionMixin, ExportMixin, ClassificationMixin, Visualizable):
     """Enhanced PDF wrapper built on top of pdfplumber.
     This class provides a fluent interface for working with PDF documents,
@@ -576,7 +581,7 @@ class PDF(ExtractionMixin, ExportMixin, ClassificationMixin):
                 print(f"Page {page.index} has {len(page.chars)} characters")
             ```
         """
-        from natural_pdf.elements.collections import PageCollection
+        from natural_pdf.core.page_collection import PageCollection
         if not hasattr(self, "_pages"):
             raise AttributeError("PDF pages not yet initialized.")
@@ -608,7 +613,7 @@ class PDF(ExtractionMixin, ExportMixin, ClassificationMixin):
             raise AttributeError("PDF pages not yet initialized.")
         self._exclusions = []
         # Clear exclusions only from already-created (cached) pages to avoid forcing page creation
         for i in range(len(self._pages)):
             if self._pages._cache[i] is not None:  # Only clear from existing pages
@@ -618,9 +623,7 @@ class PDF(ExtractionMixin, ExportMixin, ClassificationMixin):
                     logger.warning(f"Failed to clear exclusions from existing page {i}: {e}")
         return self
-    def add_exclusion(
-        self, exclusion_func, label: str = None
-    ) -> "PDF":
+    def add_exclusion(self, exclusion_func, label: str = None) -> "PDF":
         """Add an exclusion function to the PDF.
         Exclusion functions define regions of each page that should be ignored during
@@ -669,12 +672,12 @@ class PDF(ExtractionMixin, ExportMixin, ClassificationMixin):
         # Support selector strings and ElementCollection objects directly.
         # Store exclusion and apply only to already-created pages.
         # ------------------------------------------------------------------
-        from natural_pdf.elements.collections import ElementCollection  # local import
+        from natural_pdf.elements.element_collection import ElementCollection  # local import
         if isinstance(exclusion_func, str) or isinstance(exclusion_func, ElementCollection):
             # Store for bookkeeping and lazy application
             self._exclusions.append((exclusion_func, label))
             # Apply only to already-created (cached) pages to avoid forcing page creation
             for i in range(len(self._pages)):
                 if self._pages._cache[i] is not None:  # Only apply to existing pages
@@ -842,11 +845,11 @@ class PDF(ExtractionMixin, ExportMixin, ClassificationMixin):
                     "include_highlights": False,
                     "exclusions": "mask" if apply_exclusions else None,
                 }
-                img = page.to_image(**to_image_kwargs)
+                # Use render() for clean image without highlights
+                img = page.render(resolution=final_resolution)
                 if img is None:
                     logger.error(f"  Failed to render page {page.number} to image.")
                     continue
-                    continue
                 images_pil.append(img)
                 page_image_map.append((page, img))
         except Exception as e:
@@ -1140,7 +1143,7 @@ class PDF(ExtractionMixin, ExportMixin, ClassificationMixin):
             if page_elements:
                 all_elements.extend(page_elements.elements)
-        from natural_pdf.elements.collections import ElementCollection
+        from natural_pdf.elements.element_collection import ElementCollection
         return ElementCollection(all_elements)
@@ -1229,6 +1232,62 @@ class PDF(ExtractionMixin, ExportMixin, ClassificationMixin):
         return all_tables
+    def get_sections(
+        self,
+        start_elements=None,
+        end_elements=None,
+        new_section_on_page_break=False,
+        include_boundaries="both",
+    ) -> "ElementCollection":
+        """
+        Extract sections from the entire PDF based on start/end elements.
+        This method delegates to the PageCollection.get_sections() method,
+        providing a convenient way to extract document sections across all pages.
+        Args:
+            start_elements: Elements or selector string that mark the start of sections (optional)
+            end_elements: Elements or selector string that mark the end of sections (optional)
+            new_section_on_page_break: Whether to start a new section at page boundaries (default: False)
+            include_boundaries: How to include boundary elements: 'start', 'end', 'both', or 'none' (default: 'both')
+        Returns:
+            ElementCollection of Region objects representing the extracted sections
+        Example:
+            Extract sections between headers:
+            ```python
+            pdf = npdf.PDF("document.pdf")
+            # Get sections between headers
+            sections = pdf.get_sections(
+                start_elements='text[size>14]:bold',
+                end_elements='text[size>14]:bold'
+            )
+            # Get sections that break at page boundaries
+            sections = pdf.get_sections(
+                start_elements='text:contains("Chapter")',
+                new_section_on_page_break=True
+            )
+            ```
+        Note:
+            You can provide only start_elements, only end_elements, or both.
+            - With only start_elements: sections go from each start to the next start (or end of document)
+            - With only end_elements: sections go from beginning of document to each end
+            - With both: sections go from each start to the corresponding end
+        """
+        if not hasattr(self, "_pages"):
+            raise AttributeError("PDF pages not yet initialized.")
+        return self.pages.get_sections(
+            start_elements=start_elements,
+            end_elements=end_elements,
+            new_section_on_page_break=new_section_on_page_break,
+            include_boundaries=include_boundaries,
+        )
     def save_searchable(self, output_path: Union[str, "Path"], dpi: int = 300, **kwargs):
         """
         DEPRECATED: Use save_pdf(..., ocr=True) instead.
@@ -1363,6 +1422,36 @@ class PDF(ExtractionMixin, ExportMixin, ClassificationMixin):
                 # Re-raise exception from exporter
                 raise e
+    def _get_render_specs(
+        self,
+        mode: Literal["show", "render"] = "show",
+        color: Optional[Union[str, Tuple[int, int, int]]] = None,
+        highlights: Optional[List[Dict[str, Any]]] = None,
+        crop: Union[bool, Literal["content"]] = False,
+        crop_bbox: Optional[Tuple[float, float, float, float]] = None,
+        **kwargs,
+    ) -> List[RenderSpec]:
+        """Get render specifications for this PDF.
+        For PDF objects, this delegates to the pages collection to handle
+        multi-page rendering.
+        Args:
+            mode: Rendering mode - 'show' includes highlights, 'render' is clean
+            color: Color for highlighting pages in show mode
+            highlights: Additional highlight groups to show
+            crop: Whether to crop pages
+            crop_bbox: Explicit crop bounds
+            **kwargs: Additional parameters
+        Returns:
+            List of RenderSpec objects, one per page
+        """
+        # Delegate to pages collection
+        return self.pages._get_render_specs(
+            mode=mode, color=color, highlights=highlights, crop=crop, crop_bbox=crop_bbox, **kwargs
+        )
     def ask(
         self,
         question: str,
@@ -1387,14 +1476,20 @@ class PDF(ExtractionMixin, ExportMixin, ClassificationMixin):
             Dict containing: answer, confidence, found, page_num, source_elements, etc.
         """
         # Delegate to ask_batch and return the first result
-        results = self.ask_batch([question], mode=mode, pages=pages, min_confidence=min_confidence, model=model, **kwargs)
-        return results[0] if results else {
-            "answer": None,
-            "confidence": 0.0,
-            "found": False,
-            "page_num": None,
-            "source_elements": [],
-        }
+        results = self.ask_batch(
+            [question], mode=mode, pages=pages, min_confidence=min_confidence, model=model, **kwargs
+        )
+        return (
+            results[0]
+            if results
+            else {
+                "answer": None,
+                "confidence": 0.0,
+                "found": False,
+                "page_num": None,
+                "source_elements": [],
+            }
+        )
     def ask_batch(
         self,
@@ -1464,7 +1559,9 @@ class PDF(ExtractionMixin, ExportMixin, ClassificationMixin):
                 for _ in questions
             ]
-        logger.info(f"Processing {len(questions)} question(s) across {len(target_pages)} page(s) using batch QA...")
+        logger.info(
+            f"Processing {len(questions)} question(s) across {len(target_pages)} page(s) using batch QA..."
+        )
         # Collect all page images and metadata for batch processing
         page_images = []
@@ -1474,26 +1571,26 @@ class PDF(ExtractionMixin, ExportMixin, ClassificationMixin):
         for page in target_pages:
             # Get page image
             try:
-                page_image = page.to_image(resolution=150, include_highlights=False)
+                # Use render() for clean image without highlights
+                page_image = page.render(resolution=150)
                 if page_image is None:
                     logger.warning(f"Failed to render image for page {page.number}, skipping")
                     continue
                 # Get text elements for word boxes
                 elements = page.find_all("text")
                 if not elements:
                     logger.warning(f"No text elements found on page {page.number}")
                     word_boxes = []
                 else:
-                    word_boxes = qa_engine._get_word_boxes_from_elements(elements, offset_x=0, offset_y=0)
+                    word_boxes = qa_engine._get_word_boxes_from_elements(
+                        elements, offset_x=0, offset_y=0
+                    )
                 page_images.append(page_image)
                 page_word_boxes.append(word_boxes)
-                page_metadata.append({
-                    "page_number": page.number,
-                    "page_object": page
-                })
+                page_metadata.append({"page_number": page.number, "page_object": page})
             except Exception as e:
                 logger.warning(f"Error processing page {page.number}: {e}")
                 continue
@@ -1513,22 +1610,24 @@ class PDF(ExtractionMixin, ExportMixin, ClassificationMixin):
         # Process all questions against all pages in batch
         all_results = []
         for question_text in questions:
             question_results = []
             # Ask this question against each page (but in batch per page)
-            for i, (page_image, word_boxes, page_meta) in enumerate(zip(page_images, page_word_boxes, page_metadata)):
+            for i, (page_image, word_boxes, page_meta) in enumerate(
+                zip(page_images, page_word_boxes, page_metadata)
+            ):
                 try:
-                    # Use the DocumentQA batch interface
+                    # Use the DocumentQA batch interface
                     page_result = qa_engine.ask(
                         image=page_image,
                         question=question_text,
                         word_boxes=word_boxes,
                         min_confidence=min_confidence,
-                        **kwargs
+                        **kwargs,
                     )
                     if page_result and page_result.found:
                         # Add page metadata to result
                         page_result_dict = {
@@ -1536,30 +1635,34 @@ class PDF(ExtractionMixin, ExportMixin, ClassificationMixin):
                             "confidence": page_result.confidence,
                             "found": page_result.found,
                             "page_num": page_meta["page_number"],
-                            "source_elements": getattr(page_result, 'source_elements', []),
-                            "start": getattr(page_result, 'start', -1),
-                            "end": getattr(page_result, 'end', -1),
+                            "source_elements": getattr(page_result, "source_elements", []),
+                            "start": getattr(page_result, "start", -1),
+                            "end": getattr(page_result, "end", -1),
                         }
                         question_results.append(page_result_dict)
                 except Exception as e:
-                    logger.warning(f"Error processing question '{question_text}' on page {page_meta['page_number']}: {e}")
+                    logger.warning(
+                        f"Error processing question '{question_text}' on page {page_meta['page_number']}: {e}"
+                    )
                     continue
             # Sort results by confidence and take the best one for this question
             question_results.sort(key=lambda x: x.get("confidence", 0), reverse=True)
             if question_results:
                 all_results.append(question_results[0])
             else:
                 # No results found for this question
-                all_results.append({
-                    "answer": None,
-                    "confidence": 0.0,
-                    "found": False,
-                    "page_num": None,
-                    "source_elements": [],
-                })
+                all_results.append(
+                    {
+                        "answer": None,
+                        "confidence": 0.0,
+                        "found": False,
+                        "page_num": None,
+                        "source_elements": [],
+                    }
+                )
         return all_results
@@ -1703,32 +1806,28 @@ class PDF(ExtractionMixin, ExportMixin, ClassificationMixin):
             logger.error(f"Failed to export correction task: {e}")
             raise
-    def correct_ocr(
+    def update_text(
         self,
-        correction_callback: Callable[[Any], Optional[str]],
+        transform: Callable[[Any], Optional[str]],
         pages: Optional[Union[Iterable[int], range, slice]] = None,
+        selector: str = "text",
         max_workers: Optional[int] = None,
         progress_callback: Optional[Callable[[], None]] = None,
     ) -> "PDF":
         """
-        Applies corrections to OCR text elements using a callback function.
-        Applies corrections to OCR text elements using a callback function.
+        Applies corrections to text elements using a callback function.
         Args:
-            correction_callback: Function that takes an element and returns corrected text or None
             correction_callback: Function that takes an element and returns corrected text or None
             pages: Optional page indices/slice to limit the scope of correction
-            max_workers: Maximum number of threads to use for parallel execution
-            progress_callback: Optional callback function for progress updates
+            selector: Selector to apply corrections to (default: "text")
             max_workers: Maximum number of threads to use for parallel execution
             progress_callback: Optional callback function for progress updates
         Returns:
             Self for method chaining
-            Self for method chaining
         """
         target_page_indices = []
-        target_page_indices = []
         if pages is None:
             target_page_indices = list(range(len(self._pages)))
         elif isinstance(pages, slice):
@@ -1741,32 +1840,31 @@ class PDF(ExtractionMixin, ExportMixin, ClassificationMixin):
                         raise IndexError(f"Page index {idx} out of range (0-{len(self._pages)-1}).")
             except (IndexError, TypeError, ValueError) as e:
                 raise ValueError(f"Invalid page index in 'pages': {pages}. Error: {e}") from e
-                raise ValueError(f"Invalid page index in 'pages': {pages}. Error: {e}") from e
         else:
             raise TypeError("'pages' must be None, a slice, or an iterable of page indices.")
-            raise TypeError("'pages' must be None, a slice, or an iterable of page indices.")
         if not target_page_indices:
-            logger.warning("No pages selected for OCR correction.")
+            logger.warning("No pages selected for text update.")
             return self
-        logger.info(f"Starting OCR correction for pages: {target_page_indices}")
-        logger.info(f"Starting OCR correction for pages: {target_page_indices}")
+        logger.info(
+            f"Starting text update for pages: {target_page_indices} with selector='{selector}'"
+        )
         for page_idx in target_page_indices:
             page = self._pages[page_idx]
             try:
-                page.correct_ocr(
-                    correction_callback=correction_callback,
+                page.update_text(
+                    transform=transform,
+                    selector=selector,
                     max_workers=max_workers,
                     progress_callback=progress_callback,
                 )
             except Exception as e:
-                logger.error(f"Error during correct_ocr on page {page_idx}: {e}")
-                logger.error(f"Error during correct_ocr on page {page_idx}: {e}")
+                logger.error(f"Error during text update on page {page_idx}: {e}")
+                logger.error(f"Error during text update on page {page_idx}: {e}")
-        logger.info("OCR correction process finished.")
-        logger.info("OCR correction process finished.")
+        logger.info("Text update process finished.")
         return self
     def __len__(self) -> int:
@@ -1781,9 +1879,10 @@ class PDF(ExtractionMixin, ExportMixin, ClassificationMixin):
             raise AttributeError("PDF pages not initialized yet.")
         if isinstance(key, slice):
-            from natural_pdf.elements.collections import PageCollection
+            from natural_pdf.core.page_collection import PageCollection
             # Use the lazy page list's slicing which returns another _LazyPageList
-            lazy_slice = self._pages[key]
+            lazy_slice = self._pages[key]
             # Wrap in PageCollection for compatibility
             return PageCollection(lazy_slice)
         elif isinstance(key, int):
@@ -2126,10 +2225,9 @@ class PDF(ExtractionMixin, ExportMixin, ClassificationMixin):
             try:
                 for page in tqdm(self.pages, desc="Rendering Pages"):
-                    img = page.to_image(
+                    # Use render() for clean images
+                    img = page.render(
                         resolution=resolution,
-                        include_highlights=include_highlights,
-                        labels=labels,
                         **kwargs,
                     )
                     if img:
@@ -2359,3 +2457,47 @@ class PDF(ExtractionMixin, ExportMixin, ClassificationMixin):
                     os.unlink(path)
             except Exception as e:
                 logger.warning(f"Failed to clean up temporary file '{path}': {e}")
+    def analyze_layout(self, *args, **kwargs) -> "ElementCollection[Region]":
+        """
+        Analyzes the layout of all pages in the PDF.
+        This is a convenience method that calls analyze_layout on the PDF's
+        page collection.
+        Args:
+            *args: Positional arguments passed to pages.analyze_layout().
+            **kwargs: Keyword arguments passed to pages.analyze_layout().
+        Returns:
+            An ElementCollection of all detected Region objects.
+        """
+        return self.pages.analyze_layout(*args, **kwargs)
+    def highlights(self, show: bool = False) -> "HighlightContext":
+        """
+        Create a highlight context for accumulating highlights.
+        This allows for clean syntax to show multiple highlight groups:
+        Example:
+            with pdf.highlights() as h:
+                h.add(pdf.find_all('table'), label='tables', color='blue')
+                h.add(pdf.find_all('text:bold'), label='bold text', color='red')
+                h.show()
+        Or with automatic display:
+            with pdf.highlights(show=True) as h:
+                h.add(pdf.find_all('table'), label='tables')
+                h.add(pdf.find_all('text:bold'), label='bold')
+                # Automatically shows when exiting the context
+        Args:
+            show: If True, automatically show highlights when exiting context
+        Returns:
+            HighlightContext for accumulating highlights
+        """
+        from natural_pdf.core.highlighting_service import HighlightContext
+        return HighlightContext(self, show_on_exit=show)

natural-pdf 0.1.38__py3-none-any.whl → 0.2.0__py3-none-any.whl

natural-pdf 0.1.38py3-none-any.whl → 0.2.0py3-none-any.whl