PyPI - natural-pdf - Versions diffs - 0.1.33__py3-none-any.whl → 0.1.35__py3-none-any.whl - Mend

natural-pdf 0.1.33py3-none-any.whl → 0.1.35py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

natural_pdf/analyzers/__init__.py +2 -2
natural_pdf/analyzers/guides.py +751 -607
natural_pdf/analyzers/layout/base.py +53 -6
natural_pdf/analyzers/layout/layout_analyzer.py +3 -1
natural_pdf/analyzers/layout/layout_manager.py +18 -14
natural_pdf/analyzers/layout/layout_options.py +1 -0
natural_pdf/analyzers/layout/paddle.py +102 -64
natural_pdf/analyzers/layout/table_structure_utils.py +3 -1
natural_pdf/analyzers/layout/yolo.py +2 -6
natural_pdf/analyzers/shape_detection_mixin.py +15 -6
natural_pdf/classification/manager.py +92 -77
natural_pdf/classification/mixin.py +49 -5
natural_pdf/classification/results.py +1 -1
natural_pdf/cli.py +7 -3
natural_pdf/collections/pdf_collection.py +96 -101
natural_pdf/core/element_manager.py +131 -45
natural_pdf/core/highlighting_service.py +5 -6
natural_pdf/core/page.py +120 -23
natural_pdf/core/pdf.py +477 -75
natural_pdf/describe/__init__.py +18 -12
natural_pdf/describe/base.py +179 -172
natural_pdf/describe/elements.py +155 -155
natural_pdf/describe/mixin.py +27 -19
natural_pdf/describe/summary.py +44 -55
natural_pdf/elements/base.py +134 -18
natural_pdf/elements/collections.py +90 -18
natural_pdf/elements/image.py +2 -1
natural_pdf/elements/line.py +0 -31
natural_pdf/elements/rect.py +0 -14
natural_pdf/elements/region.py +222 -108
natural_pdf/elements/text.py +18 -12
natural_pdf/exporters/__init__.py +4 -1
natural_pdf/exporters/original_pdf.py +12 -4
natural_pdf/extraction/mixin.py +66 -10
natural_pdf/extraction/result.py +1 -1
natural_pdf/flows/flow.py +63 -4
natural_pdf/flows/region.py +4 -4
natural_pdf/ocr/engine.py +83 -2
natural_pdf/ocr/engine_paddle.py +5 -5
natural_pdf/ocr/ocr_factory.py +2 -1
natural_pdf/ocr/ocr_manager.py +24 -13
natural_pdf/ocr/ocr_options.py +3 -10
natural_pdf/qa/document_qa.py +21 -8
natural_pdf/qa/qa_result.py +3 -7
natural_pdf/search/__init__.py +3 -2
natural_pdf/search/lancedb_search_service.py +5 -6
natural_pdf/search/numpy_search_service.py +5 -2
natural_pdf/selectors/parser.py +51 -6
natural_pdf/tables/__init__.py +2 -2
natural_pdf/tables/result.py +7 -6
natural_pdf/utils/bidi_mirror.py +2 -1
natural_pdf/utils/reading_order.py +3 -2
natural_pdf/utils/visualization.py +3 -3
natural_pdf/widgets/viewer.py +0 -1
{natural_pdf-0.1.33.dist-info → natural_pdf-0.1.35.dist-info}/METADATA +1 -1
natural_pdf-0.1.35.dist-info/RECORD +121 -0
optimization/memory_comparison.py +73 -58
optimization/pdf_analyzer.py +141 -96
optimization/performance_analysis.py +111 -110
optimization/test_cleanup_methods.py +47 -36
optimization/test_memory_fix.py +40 -39
tools/bad_pdf_eval/__init__.py +0 -1
tools/bad_pdf_eval/analyser.py +35 -18
tools/bad_pdf_eval/collate_summaries.py +22 -18
tools/bad_pdf_eval/compile_attempts_markdown.py +127 -0
tools/bad_pdf_eval/eval_suite.py +21 -9
tools/bad_pdf_eval/evaluate_quality.py +198 -0
tools/bad_pdf_eval/export_enrichment_csv.py +12 -8
tools/bad_pdf_eval/llm_enrich.py +71 -39
tools/bad_pdf_eval/llm_enrich_with_retry.py +289 -0
tools/bad_pdf_eval/reporter.py +1 -1
tools/bad_pdf_eval/utils.py +7 -4
natural_pdf-0.1.33.dist-info/RECORD +0 -118
{natural_pdf-0.1.33.dist-info → natural_pdf-0.1.35.dist-info}/WHEEL +0 -0
{natural_pdf-0.1.33.dist-info → natural_pdf-0.1.35.dist-info}/entry_points.txt +0 -0
{natural_pdf-0.1.33.dist-info → natural_pdf-0.1.35.dist-info}/licenses/LICENSE +0 -0
{natural_pdf-0.1.33.dist-info → natural_pdf-0.1.35.dist-info}/top_level.txt +0 -0

natural_pdf/analyzers/guides.py CHANGED Viewed

@@ -2,59 +2,58 @@
 import json
 import logging
-from typing import Any, Dict, List, Literal, Optional, Tuple, Union, TYPE_CHECKING
 from collections import UserList
+from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Tuple, Union
 import numpy as np
 from PIL import Image, ImageDraw
 if TYPE_CHECKING:
     from natural_pdf.core.page import Page
-    from natural_pdf.elements.region import Region
     from natural_pdf.elements.base import Element
     from natural_pdf.elements.collections import ElementCollection
+    from natural_pdf.elements.region import Region
 logger = logging.getLogger(__name__)
 def _normalize_markers(
-    markers: Union[str, List[str], "ElementCollection", None],
-    obj: Union["Page", "Region"]
+    markers: Union[str, List[str], "ElementCollection", None], obj: Union["Page", "Region"]
 ) -> List[str]:
     """
     Normalize markers parameter to a list of text strings for guide creation.
     Args:
         markers: Can be:
             - str: single selector or text string
-            - List[str]: list of selectors or text strings
+            - List[str]: list of selectors or text strings
             - ElementCollection: collection of elements to extract text from
             - None: empty list
         obj: Object to search for elements if markers contains selectors
     Returns:
         List of text strings to search for
     """
     if markers is None:
         return []
     if isinstance(markers, str):
         # Single selector or text string
-        if markers.startswith(('text', 'region', 'line', 'rect', 'blob', 'image')):
+        if markers.startswith(("text", "region", "line", "rect", "blob", "image")):
             # It's a CSS selector, find elements and extract text
-            if hasattr(obj, 'find_all'):
+            if hasattr(obj, "find_all"):
                 elements = obj.find_all(markers)
-                return [elem.text if hasattr(elem, 'text') else str(elem) for elem in elements]
+                return [elem.text if hasattr(elem, "text") else str(elem) for elem in elements]
             else:
                 logger.warning(f"Object {obj} doesn't support find_all for selector '{markers}'")
                 return [markers]  # Treat as literal text
         else:
             # Treat as literal text
             return [markers]
-    elif hasattr(markers, '__iter__') and not isinstance(markers, str):
+    elif hasattr(markers, "__iter__") and not isinstance(markers, str):
         # It might be an ElementCollection or list
-        if hasattr(markers, 'extract_each_text'):
+        if hasattr(markers, "extract_each_text"):
             # It's an ElementCollection
             try:
                 return markers.extract_each_text()
@@ -63,9 +62,9 @@ def _normalize_markers(
                 # Fallback: try to get text from individual elements
                 texts = []
                 for elem in markers:
-                    if hasattr(elem, 'text'):
+                    if hasattr(elem, "text"):
                         texts.append(elem.text)
-                    elif hasattr(elem, 'extract_text'):
+                    elif hasattr(elem, "extract_text"):
                         texts.append(elem.extract_text())
                     else:
                         texts.append(str(elem))
@@ -75,26 +74,31 @@ def _normalize_markers(
             result = []
             for marker in markers:
                 if isinstance(marker, str):
-                    if marker.startswith(('text', 'region', 'line', 'rect', 'blob', 'image')):
+                    if marker.startswith(("text", "region", "line", "rect", "blob", "image")):
                         # It's a selector
-                        if hasattr(obj, 'find_all'):
+                        if hasattr(obj, "find_all"):
                             elements = obj.find_all(marker)
-                            result.extend([elem.text if hasattr(elem, 'text') else str(elem) for elem in elements])
+                            result.extend(
+                                [
+                                    elem.text if hasattr(elem, "text") else str(elem)
+                                    for elem in elements
+                                ]
+                            )
                         else:
                             result.append(marker)  # Treat as literal
                     else:
                         # Literal text
                         result.append(marker)
-                elif hasattr(marker, 'text'):
+                elif hasattr(marker, "text"):
                     # It's an element object
                     result.append(marker.text)
-                elif hasattr(marker, 'extract_text'):
+                elif hasattr(marker, "extract_text"):
                     # It's an element that can extract text
                     result.append(marker.extract_text())
                 else:
                     result.append(str(marker))
             return result
     else:
         # Unknown type, try to convert to string
         return [str(markers)]
@@ -102,44 +106,46 @@ def _normalize_markers(
 class GuidesList(UserList):
     """A list of guide coordinates that also provides methods for creating guides."""
     def __init__(self, parent_guides: "Guides", axis: Literal["vertical", "horizontal"], data=None):
         super().__init__(data or [])
         self._parent = parent_guides
         self._axis = axis
     def from_content(
         self,
         markers: Union[str, List[str], "ElementCollection", None],
         obj: Optional[Union["Page", "Region"]] = None,
-        align: Literal['left', 'right', 'center', 'between'] = 'left',
+        align: Literal["left", "right", "center", "between"] = "left",
         outer: bool = True,
-        tolerance: float = 5
+        tolerance: float = 5,
+        *,
+        append: bool = False,
     ) -> "Guides":
         """
         Create guides from content markers and add to this axis.
         Args:
             markers: Content to search for. Can be:
                 - str: single selector (e.g., 'text:contains("Name")') or literal text
-                - List[str]: list of selectors or literal text strings
+                - List[str]: list of selectors or literal text strings
                 - ElementCollection: collection of elements to extract text from
                 - None: no markers
             obj: Page/Region to search (uses parent's context if None)
             align: How to align guides relative to found elements
             outer: Whether to add outer boundary guides
             tolerance: Tolerance for snapping to element edges
         Returns:
             Parent Guides object for chaining
         """
         target_obj = obj or self._parent.context
         if target_obj is None:
             raise ValueError("No object provided and no context available")
         # Normalize markers to list of text strings
         marker_texts = _normalize_markers(markers, target_obj)
         # Create guides for this axis
         new_guides = Guides.from_content(
             obj=target_obj,
@@ -147,15 +153,21 @@ class GuidesList(UserList):
             markers=marker_texts,
             align=align,
             outer=outer,
-            tolerance=tolerance
+            tolerance=tolerance,
         )
-        # Add to our list
-        if self._axis == 'vertical':
-            self.extend(new_guides.vertical)
+        # Replace or append based on parameter
+        if append:
+            if self._axis == "vertical":
+                self.extend(new_guides.vertical)
+            else:
+                self.extend(new_guides.horizontal)
         else:
-            self.extend(new_guides.horizontal)
+            if self._axis == "vertical":
+                self.data = list(new_guides.vertical)
+            else:
+                self.data = list(new_guides.horizontal)
         # Remove duplicates while preserving order
         seen = set()
         unique = []
@@ -164,26 +176,27 @@ class GuidesList(UserList):
                 seen.add(x)
                 unique.append(x)
         self.data = unique
         return self._parent  # Return parent for chaining
     def from_lines(
         self,
         obj: Optional[Union["Page", "Region"]] = None,
-        threshold: Union[float, str] = 'auto',
+        threshold: Union[float, str] = "auto",
         source_label: Optional[str] = None,
         max_lines: Optional[int] = None,
         outer: bool = False,
-        detection_method: str = 'vector',
+        detection_method: str = "vector",
         resolution: int = 192,
         *,
         n: Optional[int] = None,
         min_gap: Optional[int] = None,
-        **detect_kwargs
+        append: bool = False,
+        **detect_kwargs,
     ) -> "Guides":
         """
         Create guides from detected line elements.
         Args:
             obj: Page/Region to search (uses parent's context if None)
             threshold: Line detection threshold ('auto' or float 0.0-1.0)
@@ -198,14 +211,14 @@ class GuidesList(UserList):
             resolution: DPI for pixel-based detection (default: 192)
             **detect_kwargs: Additional parameters for pixel-based detection
                 (e.g., min_gap_h, min_gap_v, binarization_method, etc.)
         Returns:
             Parent Guides object for chaining
         """
         target_obj = obj or self._parent.context
         if target_obj is None:
             raise ValueError("No object provided and no context available")
         # Resolve max_lines via alias `n` (n takes priority)
         if n is not None:
             if n <= 0:
@@ -213,16 +226,16 @@ class GuidesList(UserList):
             max_lines = n
         # Set appropriate max_lines parameter for underlying API
-        max_lines_h = max_lines if self._axis == 'horizontal' else None
-        max_lines_v = max_lines if self._axis == 'vertical' else None
+        max_lines_h = max_lines if self._axis == "horizontal" else None
+        max_lines_v = max_lines if self._axis == "vertical" else None
         # Map generic `min_gap` to axis-specific argument expected by detection
         if min_gap is not None:
             if min_gap < 1:
                 raise ValueError("min_gap must be ≥ 1 pixel")
-            axis_key = 'min_gap_h' if self._axis == 'horizontal' else 'min_gap_v'
+            axis_key = "min_gap_h" if self._axis == "horizontal" else "min_gap_v"
             detect_kwargs.setdefault(axis_key, min_gap)
         # Create guides for this axis
         new_guides = Guides.from_lines(
             obj=target_obj,
@@ -234,15 +247,21 @@ class GuidesList(UserList):
             outer=outer,
             detection_method=detection_method,
             resolution=resolution,
-            **detect_kwargs
+            **detect_kwargs,
         )
-        # Add to our list
-        if self._axis == 'vertical':
-            self.extend(new_guides.vertical)
+        # Replace or append based on parameter
+        if append:
+            if self._axis == "vertical":
+                self.extend(new_guides.vertical)
+            else:
+                self.extend(new_guides.horizontal)
         else:
-            self.extend(new_guides.horizontal)
+            if self._axis == "vertical":
+                self.data = list(new_guides.vertical)
+            else:
+                self.data = list(new_guides.horizontal)
         # Remove duplicates
         seen = set()
         unique = []
@@ -251,41 +270,42 @@ class GuidesList(UserList):
                 seen.add(x)
                 unique.append(x)
         self.data = unique
         return self._parent
     def from_whitespace(
-        self,
-        obj: Optional[Union["Page", "Region"]] = None,
-        min_gap: float = 10
+        self, obj: Optional[Union["Page", "Region"]] = None, min_gap: float = 10,
+        *, append: bool = False
     ) -> "Guides":
         """
         Create guides from whitespace gaps.
         Args:
             obj: Page/Region to analyze (uses parent's context if None)
             min_gap: Minimum gap size to consider
         Returns:
             Parent Guides object for chaining
         """
         target_obj = obj or self._parent.context
         if target_obj is None:
             raise ValueError("No object provided and no context available")
         # Create guides for this axis
-        new_guides = Guides.from_whitespace(
-            obj=target_obj,
-            axis=self._axis,
-            min_gap=min_gap
-        )
-        # Add to our list
-        if self._axis == 'vertical':
-            self.extend(new_guides.vertical)
+        new_guides = Guides.from_whitespace(obj=target_obj, axis=self._axis, min_gap=min_gap)
+        # Replace or append
+        if append:
+            if self._axis == "vertical":
+                self.extend(new_guides.vertical)
+            else:
+                self.extend(new_guides.horizontal)
         else:
-            self.extend(new_guides.horizontal)
+            if self._axis == "vertical":
+                self.data = list(new_guides.vertical)
+            else:
+                self.data = list(new_guides.horizontal)
         # Remove duplicates
         seen = set()
         unique = []
@@ -294,37 +314,33 @@ class GuidesList(UserList):
                 seen.add(x)
                 unique.append(x)
         self.data = unique
         return self._parent
     def divide(self, n: int = 2, obj: Optional[Union["Page", "Region"]] = None) -> "Guides":
         """
         Divide the space evenly along this axis.
         Args:
             n: Number of divisions (creates n-1 guides)
             obj: Object to divide (uses parent's context if None)
         Returns:
             Parent Guides object for chaining
         """
         target_obj = obj or self._parent.context
         if target_obj is None:
             raise ValueError("No object provided and no context available")
         # Create guides using divide
-        new_guides = Guides.divide(
-            obj=target_obj,
-            n=n,
-            axis=self._axis
-        )
-        # Add to our list
-        if self._axis == 'vertical':
-            self.extend(new_guides.vertical)
+        new_guides = Guides.divide(obj=target_obj, n=n, axis=self._axis)
+        # Replace existing guides instead of extending (no append option here)
+        if self._axis == "vertical":
+            self.data = list(new_guides.vertical)
         else:
-            self.extend(new_guides.horizontal)
+            self.data = list(new_guides.horizontal)
         # Remove duplicates
         seen = set()
         unique = []
@@ -333,45 +349,45 @@ class GuidesList(UserList):
                 seen.add(x)
                 unique.append(x)
         self.data = unique
         return self._parent
     def snap_to_whitespace(
         self,
         min_gap: float = 10.0,
-        detection_method: str = 'pixels',
-        threshold: Union[float, str] = 'auto',
-        on_no_snap: str = 'warn',
-        obj: Optional[Union["Page", "Region"]] = None
+        detection_method: str = "pixels",
+        threshold: Union[float, str] = "auto",
+        on_no_snap: str = "warn",
+        obj: Optional[Union["Page", "Region"]] = None,
     ) -> "Guides":
         """
         Snap guides in this axis to whitespace gaps.
         Args:
             min_gap: Minimum gap size to consider
             detection_method: 'pixels' or 'text' for gap detection
             threshold: Threshold for whitespace detection (0.0-1.0) or 'auto'
             on_no_snap: What to do when snapping fails ('warn', 'raise', 'ignore')
             obj: Object to analyze (uses parent's context if None)
         Returns:
             Parent Guides object for chaining
         """
         target_obj = obj or self._parent.context
         if target_obj is None:
             raise ValueError("No object provided and no context available")
         # Use the parent's snap_to_whitespace but only for this axis
         original_guides = self.data.copy()
         # Temporarily set the parent's guides to only this axis
-        if self._axis == 'vertical':
+        if self._axis == "vertical":
             original_horizontal = self._parent.horizontal.data.copy()
             self._parent.horizontal.data = []
         else:
             original_vertical = self._parent.vertical.data.copy()
             self._parent.vertical.data = []
         try:
             # Call the parent's method
             self._parent.snap_to_whitespace(
@@ -379,140 +395,143 @@ class GuidesList(UserList):
                 min_gap=min_gap,
                 detection_method=detection_method,
                 threshold=threshold,
-                on_no_snap=on_no_snap
+                on_no_snap=on_no_snap,
             )
             # Update our data from the parent
-            if self._axis == 'vertical':
+            if self._axis == "vertical":
                 self.data = self._parent.vertical.data.copy()
             else:
                 self.data = self._parent.horizontal.data.copy()
         finally:
             # Restore the other axis
-            if self._axis == 'vertical':
+            if self._axis == "vertical":
                 self._parent.horizontal.data = original_horizontal
             else:
                 self._parent.vertical.data = original_vertical
         return self._parent
     def snap_to_content(
         self,
-        markers: Union[str, List[str], "ElementCollection", None] = 'text',
-        align: Literal['left', 'right', 'center'] = 'left',
+        markers: Union[str, List[str], "ElementCollection", None] = "text",
+        align: Literal["left", "right", "center"] = "left",
         tolerance: float = 5,
-        obj: Optional[Union["Page", "Region"]] = None
+        obj: Optional[Union["Page", "Region"]] = None,
     ) -> "Guides":
         """
         Snap guides in this axis to nearby text content.
         Args:
             markers: Content to snap to. Can be:
                 - str: single selector or literal text (default: 'text' for all text)
-                - List[str]: list of selectors or literal text strings
+                - List[str]: list of selectors or literal text strings
                 - ElementCollection: collection of elements
                 - None: no markers (no snapping)
             align: How to align to the found text
             tolerance: Maximum distance to move when snapping
             obj: Object to search (uses parent's context if None)
         Returns:
             Parent Guides object for chaining
         """
         target_obj = obj or self._parent.context
         if target_obj is None:
             raise ValueError("No object provided and no context available")
         # Handle special case of 'text' as a selector for all text
-        if markers == 'text':
+        if markers == "text":
             # Get all text elements
-            if hasattr(target_obj, 'find_all'):
-                text_elements = target_obj.find_all('text')
-                if hasattr(text_elements, 'elements'):
+            if hasattr(target_obj, "find_all"):
+                text_elements = target_obj.find_all("text")
+                if hasattr(text_elements, "elements"):
                     text_elements = text_elements.elements
                 # Snap each guide to the nearest text element
                 for i, guide_pos in enumerate(self.data):
-                    best_distance = float('inf')
+                    best_distance = float("inf")
                     best_pos = guide_pos
                     for elem in text_elements:
                         # Calculate target position based on alignment
-                        if self._axis == 'vertical':
-                            if align == 'left':
+                        if self._axis == "vertical":
+                            if align == "left":
                                 elem_pos = elem.x0
-                            elif align == 'right':
+                            elif align == "right":
                                 elem_pos = elem.x1
                             else:  # center
                                 elem_pos = (elem.x0 + elem.x1) / 2
                         else:  # horizontal
-                            if align == 'left':  # top for horizontal
+                            if align == "left":  # top for horizontal
                                 elem_pos = elem.top
-                            elif align == 'right':  # bottom for horizontal
+                            elif align == "right":  # bottom for horizontal
                                 elem_pos = elem.bottom
                             else:  # center
                                 elem_pos = (elem.top + elem.bottom) / 2
                         # Check if this is closer than current best
                         distance = abs(guide_pos - elem_pos)
                         if distance < best_distance and distance <= tolerance:
                             best_distance = distance
                             best_pos = elem_pos
                     # Update guide position if we found a good snap
                     if best_pos != guide_pos:
                         self.data[i] = best_pos
-                        logger.debug(f"Snapped {self._axis} guide from {guide_pos:.1f} to {best_pos:.1f}")
+                        logger.debug(
+                            f"Snapped {self._axis} guide from {guide_pos:.1f} to {best_pos:.1f}"
+                        )
             else:
                 logger.warning("Object does not support find_all for text snapping")
         else:
             # Original behavior for specific markers
             marker_texts = _normalize_markers(markers, target_obj)
             # Find each marker and snap guides
             for marker in marker_texts:
-                if hasattr(target_obj, 'find'):
+                if hasattr(target_obj, "find"):
                     element = target_obj.find(f'text:contains("{marker}")')
                     if not element:
                         logger.warning(f"Could not find text '{marker}' for snapping")
                         continue
                     # Determine target position based on alignment
-                    if self._axis == 'vertical':
-                        if align == 'left':
+                    if self._axis == "vertical":
+                        if align == "left":
                             target_pos = element.x0
-                        elif align == 'right':
+                        elif align == "right":
                             target_pos = element.x1
                         else:  # center
                             target_pos = (element.x0 + element.x1) / 2
                     else:  # horizontal
-                        if align == 'left':  # top for horizontal
+                        if align == "left":  # top for horizontal
                             target_pos = element.top
-                        elif align == 'right':  # bottom for horizontal
+                        elif align == "right":  # bottom for horizontal
                             target_pos = element.bottom
                         else:  # center
                             target_pos = (element.top + element.bottom) / 2
                     # Find closest guide and snap if within tolerance
                     if self.data:
-                        closest_idx = min(range(len(self.data)),
-                                           key=lambda i: abs(self.data[i] - target_pos))
+                        closest_idx = min(
+                            range(len(self.data)), key=lambda i: abs(self.data[i] - target_pos)
+                        )
                         if abs(self.data[closest_idx] - target_pos) <= tolerance:
                             self.data[closest_idx] = target_pos
         # Sort after snapping
         self.data.sort()
         return self._parent
     def shift(self, index: int, offset: float) -> "Guides":
         """
         Move a specific guide in this axis by a offset amount.
         Args:
             index: Index of the guide to move
             offset: Amount to move (positive = right/down)
         Returns:
             Parent Guides object for chaining
         """
@@ -521,18 +540,18 @@ class GuidesList(UserList):
             self.data.sort()
         else:
             logger.warning(f"Guide index {index} out of range for {self._axis} axis")
         return self._parent
     def add(self, position: Union[float, List[float]]) -> "Guides":
         """
         Add one or more guides at the specified position(s).
         Args:
             position: Coordinate(s) to add guide(s) at. Can be:
                 - float: single position
                 - List[float]: multiple positions
         Returns:
             Parent Guides object for chaining
         """
@@ -543,34 +562,34 @@ class GuidesList(UserList):
         else:
             # Add single position
             self.append(float(position))
         self.data.sort()
         return self._parent
     def remove_at(self, index: int) -> "Guides":
         """
         Remove a guide by index.
         Args:
             index: Index of guide to remove
         Returns:
             Parent Guides object for chaining
         """
         if 0 <= index < len(self.data):
             self.data.pop(index)
         return self._parent
     def clear_all(self) -> "Guides":
         """
         Remove all guides from this axis.
         Returns:
             Parent Guides object for chaining
         """
         self.data.clear()
         return self._parent
     def __add__(self, other):
         """Handle addition of GuidesList objects by returning combined data."""
         if isinstance(other, GuidesList):
@@ -584,11 +603,11 @@ class GuidesList(UserList):
 class Guides:
     """
     Manages vertical and horizontal guide lines for table extraction and layout analysis.
     Guides are collections of coordinates that can be used to define table boundaries,
     column positions, or general layout structures. They can be created through various
     detection methods or manually specified.
     Attributes:
         verticals: List of x-coordinates for vertical guide lines
         horizontals: List of y-coordinates for horizontal guide lines
@@ -596,7 +615,7 @@ class Guides:
         bounds: Optional bounding box (x0, y0, x1, y1) for relative coordinate conversion
         snap_behavior: How to handle failed snapping operations ('warn', 'ignore', 'raise')
     """
     def __init__(
         self,
         verticals: Optional[Union[List[float], "Page", "Region"]] = None,
@@ -604,52 +623,63 @@ class Guides:
         context: Optional[Union["Page", "Region"]] = None,
         bounds: Optional[Tuple[float, float, float, float]] = None,
         relative: bool = False,
-        snap_behavior: Literal['raise', 'warn', 'ignore'] = 'warn'
+        snap_behavior: Literal["raise", "warn", "ignore"] = "warn",
     ):
         """
         Initialize a Guides object.
         Args:
             verticals: List of x-coordinates for vertical guides, or a Page/Region as context
-            horizontals: List of y-coordinates for horizontal guides
+            horizontals: List of y-coordinates for horizontal guides
             context: Page or Region object these guides were created from
             bounds: Bounding box (x0, top, x1, bottom) if context not provided
             relative: Whether coordinates are relative (0-1) or absolute
             snap_behavior: How to handle snapping conflicts ('raise', 'warn', or 'ignore')
         """
         # Handle Guides(page) shorthand
-        if verticals is not None and not isinstance(verticals, (list, tuple)) and horizontals is None and context is None:
+        if (
+            verticals is not None
+            and not isinstance(verticals, (list, tuple))
+            and horizontals is None
+            and context is None
+        ):
             # First argument is a page/region, not coordinates
             context = verticals
             verticals = None
         self.context = context
         self.bounds = bounds
         self.relative = relative
         self.snap_behavior = snap_behavior
         # Initialize with GuidesList instances
         self._vertical = GuidesList(self, "vertical", sorted([float(x) for x in (verticals or [])]))
-        self._horizontal = GuidesList(self, "horizontal", sorted([float(y) for y in (horizontals or [])]))
+        self._horizontal = GuidesList(
+            self, "horizontal", sorted([float(y) for y in (horizontals or [])])
+        )
         # Determine bounds from context if needed
         if self.bounds is None and self.context is not None:
-            if hasattr(self.context, 'bbox'):
+            if hasattr(self.context, "bbox"):
                 self.bounds = self.context.bbox
-            elif hasattr(self.context, 'x0'):
-                self.bounds = (self.context.x0, self.context.top,
-                             self.context.x1, self.context.bottom)
+            elif hasattr(self.context, "x0"):
+                self.bounds = (
+                    self.context.x0,
+                    self.context.top,
+                    self.context.x1,
+                    self.context.bottom,
+                )
         # Convert relative to absolute if needed
         if self.relative and self.bounds:
             x0, top, x1, bottom = self.bounds
             width = x1 - x0
             height = bottom - top
             self._vertical.data = [x0 + v * width for v in self._vertical]
             self._horizontal.data = [top + h * height for h in self._horizontal]
             self.relative = False
     @property
     def vertical(self) -> GuidesList:
         """Get vertical guide coordinates."""
@@ -665,8 +695,10 @@ class Guides:
             self._vertical.data = sorted([float(x) for x in value.vertical])
         elif isinstance(value, str):
             # Explicitly reject strings to avoid confusing iteration over characters
-            raise TypeError(f"vertical cannot be a string, got '{value}'. Use a list of coordinates or Guides object.")
-        elif hasattr(value, '__iter__'):
+            raise TypeError(
+                f"vertical cannot be a string, got '{value}'. Use a list of coordinates or Guides object."
+            )
+        elif hasattr(value, "__iter__"):
             # Handle list/tuple of coordinates
             try:
                 self._vertical.data = sorted([float(x) for x in value])
@@ -690,8 +722,10 @@ class Guides:
             self._horizontal.data = sorted([float(y) for y in value.horizontal])
         elif isinstance(value, str):
             # Explicitly reject strings
-            raise TypeError(f"horizontal cannot be a string, got '{value}'. Use a list of coordinates or Guides object.")
-        elif hasattr(value, '__iter__'):
+            raise TypeError(
+                f"horizontal cannot be a string, got '{value}'. Use a list of coordinates or Guides object."
+            )
+        elif hasattr(value, "__iter__"):
             # Handle list/tuple of coordinates
             try:
                 self._horizontal.data = sorted([float(y) for y in value])
@@ -699,24 +733,24 @@ class Guides:
                 raise TypeError(f"horizontal must contain numeric values, got {value}: {e}")
         else:
             raise TypeError(f"horizontal must be a list, Guides object, or None, got {type(value)}")
     def _get_context_bounds(self) -> Optional[Tuple[float, float, float, float]]:
         """Get bounds from context if available."""
         if self.context is None:
             return None
-        if hasattr(self.context, 'bbox'):
+        if hasattr(self.context, "bbox"):
             return self.context.bbox
-        elif hasattr(self.context, 'x0') and hasattr(self.context, 'top'):
+        elif hasattr(self.context, "x0") and hasattr(self.context, "top"):
             return (self.context.x0, self.context.top, self.context.x1, self.context.bottom)
-        elif hasattr(self.context, 'width') and hasattr(self.context, 'height'):
+        elif hasattr(self.context, "width") and hasattr(self.context, "height"):
             return (0, 0, self.context.width, self.context.height)
         return None
     # -------------------------------------------------------------------------
     # Factory Methods
     # -------------------------------------------------------------------------
     @classmethod
     def divide(
         cls,
@@ -724,28 +758,28 @@ class Guides:
         n: Optional[int] = None,
         cols: Optional[int] = None,
         rows: Optional[int] = None,
-        axis: Literal['vertical', 'horizontal', 'both'] = 'both'
+        axis: Literal["vertical", "horizontal", "both"] = "both",
     ) -> "Guides":
         """
         Create guides by evenly dividing an object.
         Args:
             obj: Object to divide (Page, Region, or bbox tuple)
             n: Number of divisions (creates n+1 guides). Used if cols/rows not specified.
             cols: Number of columns (creates cols+1 vertical guides)
             rows: Number of rows (creates rows+1 horizontal guides)
             axis: Which axis to divide along
         Returns:
             New Guides object with evenly spaced lines
         Examples:
             # Divide into 3 columns
             guides = Guides.divide(page, cols=3)
             # Divide into 5 rows
             guides = Guides.divide(region, rows=5)
             # Divide both axes
             guides = Guides.divide(page, cols=3, rows=5)
         """
@@ -755,52 +789,52 @@ class Guides:
             context = None
         else:
             context = obj
-            if hasattr(obj, 'bbox'):
+            if hasattr(obj, "bbox"):
                 bounds = obj.bbox
-            elif hasattr(obj, 'x0'):
+            elif hasattr(obj, "x0"):
                 bounds = (obj.x0, obj.top, obj.x1, obj.bottom)
             else:
                 bounds = (0, 0, obj.width, obj.height)
         x0, y0, x1, y1 = bounds
         verticals = []
         horizontals = []
         # Handle vertical guides
-        if axis in ('vertical', 'both'):
+        if axis in ("vertical", "both"):
             n_vertical = cols + 1 if cols is not None else (n + 1 if n is not None else 0)
             if n_vertical > 0:
                 for i in range(n_vertical):
                     x = x0 + (x1 - x0) * i / (n_vertical - 1)
                     verticals.append(float(x))
         # Handle horizontal guides
-        if axis in ('horizontal', 'both'):
+        if axis in ("horizontal", "both"):
             n_horizontal = rows + 1 if rows is not None else (n + 1 if n is not None else 0)
             if n_horizontal > 0:
                 for i in range(n_horizontal):
                     y = y0 + (y1 - y0) * i / (n_horizontal - 1)
                     horizontals.append(float(y))
         return cls(verticals=verticals, horizontals=horizontals, context=context, bounds=bounds)
     @classmethod
     def from_lines(
         cls,
         obj: Union["Page", "Region"],
-        axis: Literal['vertical', 'horizontal', 'both'] = 'both',
-        threshold: Union[float, str] = 'auto',
+        axis: Literal["vertical", "horizontal", "both"] = "both",
+        threshold: Union[float, str] = "auto",
         source_label: Optional[str] = None,
         max_lines_h: Optional[int] = None,
         max_lines_v: Optional[int] = None,
         outer: bool = False,
-        detection_method: str = 'vector',
+        detection_method: str = "vector",
         resolution: int = 192,
-        **detect_kwargs
+        **detect_kwargs,
     ) -> "Guides":
         """
         Create guides from detected line elements.
         Args:
             obj: Page or Region to detect lines from
             axis: Which orientations to detect
@@ -818,108 +852,128 @@ class Guides:
                 - morph_op_h/v: Morphological operations ('open', 'close', 'none')
                 - smoothing_sigma_h/v: Gaussian smoothing sigma
                 - method: 'projection' (default) or 'lsd' (requires opencv)
         Returns:
             New Guides object with detected line positions
         """
         # Get bounds for potential outer guides
-        if hasattr(obj, 'bbox'):
+        if hasattr(obj, "bbox"):
             bounds = obj.bbox
-        elif hasattr(obj, 'x0'):
+        elif hasattr(obj, "x0"):
             bounds = (obj.x0, obj.top, obj.x1, obj.bottom)
-        elif hasattr(obj, 'width'):
+        elif hasattr(obj, "width"):
             bounds = (0, 0, obj.width, obj.height)
         else:
             bounds = None
         verticals = []
         horizontals = []
-        if detection_method == 'pixels':
+        if detection_method == "pixels":
             # Use pixel-based line detection
-            if not hasattr(obj, 'detect_lines'):
+            if not hasattr(obj, "detect_lines"):
                 raise ValueError(f"Object {obj} does not support pixel-based line detection")
             # Set up detection parameters
             detect_params = {
-                'resolution': resolution,
-                'source_label': source_label or 'guides_detection',
-                'horizontal': axis in ('horizontal', 'both'),
-                'vertical': axis in ('vertical', 'both'),
-                'replace': True,  # Replace any existing lines with this source
-                'method': detect_kwargs.get('method', 'projection'),
+                "resolution": resolution,
+                "source_label": source_label or "guides_detection",
+                "horizontal": axis in ("horizontal", "both"),
+                "vertical": axis in ("vertical", "both"),
+                "replace": True,  # Replace any existing lines with this source
+                "method": detect_kwargs.get("method", "projection"),
             }
             # Handle threshold parameter
-            if threshold == 'auto':
+            if threshold == "auto":
                 # Auto mode: use very low thresholds with max_lines constraints
-                detect_params['peak_threshold_h'] = 0.0
-                detect_params['peak_threshold_v'] = 0.0
-                detect_params['max_lines_h'] = max_lines_h
-                detect_params['max_lines_v'] = max_lines_v
+                detect_params["peak_threshold_h"] = 0.0
+                detect_params["peak_threshold_v"] = 0.0
+                detect_params["max_lines_h"] = max_lines_h
+                detect_params["max_lines_v"] = max_lines_v
             else:
                 # Fixed threshold mode
-                detect_params['peak_threshold_h'] = float(threshold) if axis in ('horizontal', 'both') else 1.0
-                detect_params['peak_threshold_v'] = float(threshold) if axis in ('vertical', 'both') else 1.0
-                detect_params['max_lines_h'] = max_lines_h
-                detect_params['max_lines_v'] = max_lines_v
+                detect_params["peak_threshold_h"] = (
+                    float(threshold) if axis in ("horizontal", "both") else 1.0
+                )
+                detect_params["peak_threshold_v"] = (
+                    float(threshold) if axis in ("vertical", "both") else 1.0
+                )
+                detect_params["max_lines_h"] = max_lines_h
+                detect_params["max_lines_v"] = max_lines_v
             # Add any additional detection parameters
-            for key in ['min_gap_h', 'min_gap_v', 'binarization_method',
-                       'adaptive_thresh_block_size', 'adaptive_thresh_C_val',
-                       'morph_op_h', 'morph_kernel_h', 'morph_op_v', 'morph_kernel_v',
-                       'smoothing_sigma_h', 'smoothing_sigma_v', 'peak_width_rel_height']:
+            for key in [
+                "min_gap_h",
+                "min_gap_v",
+                "binarization_method",
+                "adaptive_thresh_block_size",
+                "adaptive_thresh_C_val",
+                "morph_op_h",
+                "morph_kernel_h",
+                "morph_op_v",
+                "morph_kernel_v",
+                "smoothing_sigma_h",
+                "smoothing_sigma_v",
+                "peak_width_rel_height",
+            ]:
                 if key in detect_kwargs:
                     detect_params[key] = detect_kwargs[key]
             # Perform the detection
             obj.detect_lines(**detect_params)
             # Now get the detected lines and use them
-            if hasattr(obj, 'lines'):
+            if hasattr(obj, "lines"):
                 lines = obj.lines
-            elif hasattr(obj, 'find_all'):
-                lines = obj.find_all('line')
+            elif hasattr(obj, "find_all"):
+                lines = obj.find_all("line")
             else:
                 lines = []
             # Filter by the source we just used
-            lines = [l for l in lines if getattr(l, 'source', None) == detect_params['source_label']]
+            lines = [
+                l for l in lines if getattr(l, "source", None) == detect_params["source_label"]
+            ]
         else:  # detection_method == 'vector' (default)
             # Get existing lines from the object
-            if hasattr(obj, 'lines'):
+            if hasattr(obj, "lines"):
                 lines = obj.lines
-            elif hasattr(obj, 'find_all'):
-                lines = obj.find_all('line')
+            elif hasattr(obj, "find_all"):
+                lines = obj.find_all("line")
             else:
                 logger.warning(f"Object {obj} has no lines or find_all method")
                 lines = []
             # Filter by source if specified
             if source_label:
-                lines = [l for l in lines if getattr(l, 'source', None) == source_label]
+                lines = [l for l in lines if getattr(l, "source", None) == source_label]
         # Process lines (same logic for both methods)
         # Separate lines by orientation and collect with metadata for ranking
         h_line_data = []  # (y_coord, length, line_obj)
         v_line_data = []  # (x_coord, length, line_obj)
         for line in lines:
-            if hasattr(line, 'is_horizontal') and hasattr(line, 'is_vertical'):
-                if line.is_horizontal and axis in ('horizontal', 'both'):
+            if hasattr(line, "is_horizontal") and hasattr(line, "is_vertical"):
+                if line.is_horizontal and axis in ("horizontal", "both"):
                     # Use the midpoint y-coordinate for horizontal lines
                     y = (line.top + line.bottom) / 2
                     # Calculate line length for ranking
-                    length = getattr(line, 'width', abs(getattr(line, 'x1', 0) - getattr(line, 'x0', 0)))
+                    length = getattr(
+                        line, "width", abs(getattr(line, "x1", 0) - getattr(line, "x0", 0))
+                    )
                     h_line_data.append((y, length, line))
-                elif line.is_vertical and axis in ('vertical', 'both'):
+                elif line.is_vertical and axis in ("vertical", "both"):
                     # Use the midpoint x-coordinate for vertical lines
                     x = (line.x0 + line.x1) / 2
                     # Calculate line length for ranking
-                    length = getattr(line, 'height', abs(getattr(line, 'bottom', 0) - getattr(line, 'top', 0)))
+                    length = getattr(
+                        line, "height", abs(getattr(line, "bottom", 0) - getattr(line, "top", 0))
+                    )
                     v_line_data.append((x, length, line))
         # Process horizontal lines
         if max_lines_h is not None and h_line_data:
             # Sort by length (longer lines are typically more significant)
@@ -928,12 +982,14 @@ class Guides:
             selected_h = h_line_data[:max_lines_h]
             # Extract just the coordinates and sort by position
             horizontals = sorted([coord for coord, _, _ in selected_h])
-            logger.debug(f"Selected {len(horizontals)} horizontal lines from {len(h_line_data)} candidates")
+            logger.debug(
+                f"Selected {len(horizontals)} horizontal lines from {len(h_line_data)} candidates"
+            )
         else:
             # Use all horizontal lines (original behavior)
             horizontals = [coord for coord, _, _ in h_line_data]
             horizontals = sorted(list(set(horizontals)))
         # Process vertical lines
         if max_lines_v is not None and v_line_data:
             # Sort by length (longer lines are typically more significant)
@@ -942,115 +998,117 @@ class Guides:
             selected_v = v_line_data[:max_lines_v]
             # Extract just the coordinates and sort by position
             verticals = sorted([coord for coord, _, _ in selected_v])
-            logger.debug(f"Selected {len(verticals)} vertical lines from {len(v_line_data)} candidates")
+            logger.debug(
+                f"Selected {len(verticals)} vertical lines from {len(v_line_data)} candidates"
+            )
         else:
             # Use all vertical lines (original behavior)
             verticals = [coord for coord, _, _ in v_line_data]
             verticals = sorted(list(set(verticals)))
         # Add outer guides if requested
         if outer and bounds:
-            if axis in ('vertical', 'both'):
+            if axis in ("vertical", "both"):
                 if not verticals or verticals[0] > bounds[0]:
                     verticals.insert(0, bounds[0])  # x0
                 if not verticals or verticals[-1] < bounds[2]:
                     verticals.append(bounds[2])  # x1
-            if axis in ('horizontal', 'both'):
+            if axis in ("horizontal", "both"):
                 if not horizontals or horizontals[0] > bounds[1]:
                     horizontals.insert(0, bounds[1])  # y0
                 if not horizontals or horizontals[-1] < bounds[3]:
                     horizontals.append(bounds[3])  # y1
         # Remove duplicates and sort again
         verticals = sorted(list(set(verticals)))
         horizontals = sorted(list(set(horizontals)))
         return cls(verticals=verticals, horizontals=horizontals, context=obj, bounds=bounds)
     @classmethod
     def from_content(
         cls,
         obj: Union["Page", "Region"],
-        axis: Literal['vertical', 'horizontal'] = 'vertical',
+        axis: Literal["vertical", "horizontal"] = "vertical",
         markers: Union[str, List[str], "ElementCollection", None] = None,
-        align: Literal['left', 'right', 'center', 'between'] = 'left',
+        align: Literal["left", "right", "center", "between"] = "left",
         outer: bool = True,
-        tolerance: float = 5
+        tolerance: float = 5,
     ) -> "Guides":
         """
         Create guides based on text content positions.
         Args:
             obj: Page or Region to search for content
             axis: Whether to create vertical or horizontal guides
             markers: Content to search for. Can be:
                 - str: single selector (e.g., 'text:contains("Name")') or literal text
-                - List[str]: list of selectors or literal text strings
+                - List[str]: list of selectors or literal text strings
                 - ElementCollection: collection of elements to extract text from
                 - None: no markers
             align: Where to place guides relative to found text
             outer: Whether to add guides at the boundaries
             tolerance: Maximum distance to search for text
         Returns:
             New Guides object aligned to text content
         """
         guides_coords = []
         bounds = None
         # Get bounds from object
-        if hasattr(obj, 'bbox'):
+        if hasattr(obj, "bbox"):
             bounds = obj.bbox
-        elif hasattr(obj, 'x0'):
+        elif hasattr(obj, "x0"):
             bounds = (obj.x0, obj.top, obj.x1, obj.bottom)
-        elif hasattr(obj, 'width'):
+        elif hasattr(obj, "width"):
             bounds = (0, 0, obj.width, obj.height)
         # Normalize markers to list of text strings
         marker_texts = _normalize_markers(markers, obj)
         # Find each marker and determine guide position
         for marker in marker_texts:
-            if hasattr(obj, 'find'):
+            if hasattr(obj, "find"):
                 element = obj.find(f'text:contains("{marker}")')
                 if element:
-                    if axis == 'vertical':
-                        if align == 'left':
+                    if axis == "vertical":
+                        if align == "left":
                             guides_coords.append(element.x0)
-                        elif align == 'right':
+                        elif align == "right":
                             guides_coords.append(element.x1)
-                        elif align == 'center':
+                        elif align == "center":
                             guides_coords.append((element.x0 + element.x1) / 2)
-                        elif align == 'between':
+                        elif align == "between":
                             # For between, collect left edges for processing later
                             guides_coords.append(element.x0)
                     else:  # horizontal
-                        if align == 'left':  # top for horizontal
+                        if align == "left":  # top for horizontal
                             guides_coords.append(element.top)
-                        elif align == 'right':  # bottom for horizontal
+                        elif align == "right":  # bottom for horizontal
                             guides_coords.append(element.bottom)
-                        elif align == 'center':
+                        elif align == "center":
                             guides_coords.append((element.top + element.bottom) / 2)
-                        elif align == 'between':
+                        elif align == "between":
                             # For between, collect top edges for processing later
                             guides_coords.append(element.top)
         # Handle 'between' alignment - find midpoints between adjacent markers
-        if align == 'between' and len(guides_coords) >= 2:
+        if align == "between" and len(guides_coords) >= 2:
             # We need to get the right and left edges of each marker
             marker_bounds = []
             for marker in marker_texts:
-                if hasattr(obj, 'find'):
+                if hasattr(obj, "find"):
                     element = obj.find(f'text:contains("{marker}")')
                     if element:
-                        if axis == 'vertical':
+                        if axis == "vertical":
                             marker_bounds.append((element.x0, element.x1))
                         else:  # horizontal
                             marker_bounds.append((element.top, element.bottom))
             # Sort markers by their left edge (or top edge for horizontal)
             marker_bounds.sort(key=lambda x: x[0])
             # Create guides at midpoints between adjacent markers
             between_coords = []
             for i in range(len(marker_bounds) - 1):
@@ -1059,79 +1117,78 @@ class Guides:
                 left_edge_next = marker_bounds[i + 1][0]
                 midpoint = (right_edge_current + left_edge_next) / 2
                 between_coords.append(midpoint)
             guides_coords = between_coords
         # Add outer guides if requested
         if outer and bounds:
-            if axis == 'vertical':
+            if axis == "vertical":
                 guides_coords.insert(0, bounds[0])  # x0
-                guides_coords.append(bounds[2])     # x1
+                guides_coords.append(bounds[2])  # x1
             else:
                 guides_coords.insert(0, bounds[1])  # y0
-                guides_coords.append(bounds[3])     # y1
+                guides_coords.append(bounds[3])  # y1
         # Remove duplicates and sort
         guides_coords = sorted(list(set(guides_coords)))
         # Create guides object
-        if axis == 'vertical':
+        if axis == "vertical":
             return cls(verticals=guides_coords, context=obj, bounds=bounds)
         else:
             return cls(horizontals=guides_coords, context=obj, bounds=bounds)
     @classmethod
     def from_whitespace(
         cls,
         obj: Union["Page", "Region"],
-        axis: Literal['vertical', 'horizontal', 'both'] = 'both',
-        min_gap: float = 10
+        axis: Literal["vertical", "horizontal", "both"] = "both",
+        min_gap: float = 10,
     ) -> "Guides":
         """
         Create guides by detecting whitespace gaps.
         Args:
             obj: Page or Region to analyze
             min_gap: Minimum gap size to consider as whitespace
             axis: Which axes to analyze for gaps
         Returns:
             New Guides object positioned at whitespace gaps
         """
         # This is a placeholder - would need sophisticated gap detection
         logger.info("Whitespace detection not yet implemented, using divide instead")
         return cls.divide(obj, n=3, axis=axis)
     @classmethod
-    def new(
-        cls,
-        context: Optional[Union["Page", "Region"]] = None
-    ) -> "Guides":
+    def new(cls, context: Optional[Union["Page", "Region"]] = None) -> "Guides":
         """
         Create a new empty Guides object, optionally with a context.
         This provides a clean way to start building guides through chaining:
         guides = Guides.new(page).add_content(axis='vertical', markers=[...])
         Args:
             context: Optional Page or Region to use as default context for operations
         Returns:
             New empty Guides object
         """
         return cls(verticals=[], horizontals=[], context=context)
     # -------------------------------------------------------------------------
     # Manipulation Methods
     # -------------------------------------------------------------------------
     def snap_to_whitespace(
         self,
-        axis: str = 'vertical',
+        axis: str = "vertical",
         min_gap: float = 10.0,
-        detection_method: str = 'pixels',  # 'pixels' or 'text'
-        threshold: Union[float, str] = 'auto',  # threshold for what counts as a trough (0.0-1.0) or 'auto'
-        on_no_snap: str = 'warn'
+        detection_method: str = "pixels",  # 'pixels' or 'text'
+        threshold: Union[
+            float, str
+        ] = "auto",  # threshold for what counts as a trough (0.0-1.0) or 'auto'
+        on_no_snap: str = "warn",
     ) -> "Guides":
         """
         Snap guides to nearby whitespace gaps (troughs) using optimal assignment.
@@ -1161,11 +1218,11 @@ class Guides:
             logger.warning("No text elements found for whitespace detection")
             return self
-        if axis == 'vertical':
+        if axis == "vertical":
             gaps = self._find_vertical_whitespace_gaps(text_elements, min_gap, threshold)
             if gaps:
                 self._snap_guides_to_gaps(self.vertical.data, gaps, axis)
-        elif axis == 'horizontal':
+        elif axis == "horizontal":
             gaps = self._find_horizontal_whitespace_gaps(text_elements, min_gap, threshold)
             if gaps:
                 self._snap_guides_to_gaps(self.horizontal.data, gaps, axis)
@@ -1177,25 +1234,22 @@ class Guides:
         self.horizontal.data[:] = [float(y) for y in self.horizontal.data]
         return self
     def shift(
-        self,
-        index: int,
-        offset: float,
-        axis: Literal['vertical', 'horizontal'] = 'vertical'
+        self, index: int, offset: float, axis: Literal["vertical", "horizontal"] = "vertical"
     ) -> "Guides":
         """
         Move a specific guide by a offset amount.
         Args:
             index: Index of the guide to move
             offset: Amount to move (positive = right/down)
             axis: Which guide list to modify
         Returns:
             Self for method chaining
         """
-        if axis == 'vertical':
+        if axis == "vertical":
             if 0 <= index < len(self.vertical):
                 self.vertical[index] += offset
                 self.vertical = sorted(self.vertical)
@@ -1207,123 +1261,127 @@ class Guides:
                 self.horizontal = sorted(self.horizontal)
             else:
                 logger.warning(f"Horizontal guide index {index} out of range")
         return self
     def add_vertical(self, x: float) -> "Guides":
         """Add a vertical guide at the specified x-coordinate."""
         self.vertical.append(x)
         self.vertical = sorted(self.vertical)
         return self
     def add_horizontal(self, y: float) -> "Guides":
         """Add a horizontal guide at the specified y-coordinate."""
         self.horizontal.append(y)
         self.horizontal = sorted(self.horizontal)
         return self
     def remove_vertical(self, index: int) -> "Guides":
         """Remove a vertical guide by index."""
         if 0 <= index < len(self.vertical):
             self.vertical.pop(index)
         return self
     def remove_horizontal(self, index: int) -> "Guides":
         """Remove a horizontal guide by index."""
         if 0 <= index < len(self.horizontal):
             self.horizontal.pop(index)
         return self
     # -------------------------------------------------------------------------
     # Operations
     # -------------------------------------------------------------------------
     def __add__(self, other: "Guides") -> "Guides":
         """
         Combine two guide sets.
         Returns:
             New Guides object with combined coordinates
         """
         # Combine and deduplicate coordinates, ensuring Python floats
         combined_verticals = sorted([float(x) for x in set(self.vertical + other.vertical)])
         combined_horizontals = sorted([float(y) for y in set(self.horizontal + other.horizontal)])
         # Use context from self if available
         return Guides(
             verticals=combined_verticals,
             horizontals=combined_horizontals,
             context=self.context or other.context,
-            bounds=self.bounds or other.bounds
+            bounds=self.bounds or other.bounds,
         )
     def show(self, on=None, **kwargs):
         """
         Display the guides overlaid on a page or region.
         Args:
             on: Page, Region, PIL Image, or string to display guides on.
                 If None, uses self.context (the object guides were created from).
                 If string 'page', uses the page from self.context.
             **kwargs: Additional arguments passed to to_image() if applicable.
         Returns:
             PIL Image with guides drawn on it.
         """
         # Determine what to display guides on
         target = on if on is not None else self.context
         # Handle string shortcuts
         if isinstance(target, str):
-            if target == 'page':
-                if hasattr(self.context, 'page'):
+            if target == "page":
+                if hasattr(self.context, "page"):
                     target = self.context.page
-                elif hasattr(self.context, '_page'):
+                elif hasattr(self.context, "_page"):
                     target = self.context._page
                 else:
                     raise ValueError("Cannot resolve 'page' - context has no page attribute")
             else:
                 raise ValueError(f"Unknown string target: {target}. Only 'page' is supported.")
         if target is None:
             raise ValueError("No target specified and no context available for guides display")
         # Prepare kwargs for image generation
         image_kwargs = kwargs.copy()
         # Always turn off highlights to avoid visual clutter
-        image_kwargs['include_highlights'] = False
+        image_kwargs["include_highlights"] = False
         # If target is a region-like object, crop to just that region
-        if hasattr(target, 'bbox') and hasattr(target, 'page'):
+        if hasattr(target, "bbox") and hasattr(target, "page"):
             # This is likely a Region
-            image_kwargs['crop'] = True
+            image_kwargs["crop"] = True
         # Get base image
-        if hasattr(target, 'to_image'):
+        if hasattr(target, "to_image"):
             img = target.to_image(**image_kwargs)
-        elif hasattr(target, 'mode') and hasattr(target, 'size'):
+        elif hasattr(target, "mode") and hasattr(target, "size"):
             # It's already a PIL Image
             img = target
         else:
             raise ValueError(f"Object {target} does not support to_image() and is not a PIL Image")
         if img is None:
             raise ValueError("Failed to generate base image")
         # Create a copy to draw on
         img = img.copy()
         draw = ImageDraw.Draw(img)
         # Determine scale factor for coordinate conversion
-        if hasattr(target, 'width') and hasattr(target, 'height') and not (hasattr(target, 'mode') and hasattr(target, 'size')):
+        if (
+            hasattr(target, "width")
+            and hasattr(target, "height")
+            and not (hasattr(target, "mode") and hasattr(target, "size"))
+        ):
             # target is a PDF object (Page/Region) with PDF coordinates
             scale_x = img.width / target.width
             scale_y = img.height / target.height
             # If we're showing guides on a region, we need to adjust coordinates
             # to be relative to the region's origin
-            if hasattr(target, 'bbox') and hasattr(target, 'page'):
+            if hasattr(target, "bbox") and hasattr(target, "page"):
                 # This is a Region - adjust guide coordinates to be relative to region
                 region_x0, region_top = target.x0, target.top
             else:
@@ -1334,7 +1392,7 @@ class Guides:
             scale_x = 1.0
             scale_y = 1.0
             region_x0, region_top = 0, 0
         # Draw vertical guides (blue)
         for x_coord in self.vertical:
             # Adjust coordinate if we're showing on a region
@@ -1344,8 +1402,8 @@ class Guides:
             if 0 <= pixel_x <= img.width - 1:
                 x_pixel = int(min(pixel_x, img.width - 1))
                 draw.line([(x_pixel, 0), (x_pixel, img.height - 1)], fill=(0, 0, 255, 200), width=2)
-        # Draw horizontal guides (red)
+        # Draw horizontal guides (red)
         for y_coord in self.horizontal:
             # Adjust coordinate if we're showing on a region
             adjusted_y = y_coord - region_top
@@ -1354,22 +1412,22 @@ class Guides:
             if 0 <= pixel_y <= img.height - 1:
                 y_pixel = int(min(pixel_y, img.height - 1))
                 draw.line([(0, y_pixel), (img.width - 1, y_pixel)], fill=(255, 0, 0, 200), width=2)
         return img
     # -------------------------------------------------------------------------
     # Utility Methods
     # -------------------------------------------------------------------------
     def get_cells(self) -> List[Tuple[float, float, float, float]]:
         """
         Get all cell bounding boxes from guide intersections.
         Returns:
             List of (x0, y0, x1, y1) tuples for each cell
         """
         cells = []
         # Create cells from guide intersections
         for i in range(len(self.vertical) - 1):
             for j in range(len(self.horizontal) - 1):
@@ -1378,135 +1436,139 @@ class Guides:
                 y0 = self.horizontal[j]
                 y1 = self.horizontal[j + 1]
                 cells.append((x0, y0, x1, y1))
         return cells
     def to_dict(self) -> Dict[str, Any]:
         """
         Convert to dictionary format suitable for pdfplumber table_settings.
         Returns:
             Dictionary with explicit_vertical_lines and explicit_horizontal_lines
         """
         return {
-            'explicit_vertical_lines': self.vertical,
-            'explicit_horizontal_lines': self.horizontal
+            "explicit_vertical_lines": self.vertical,
+            "explicit_horizontal_lines": self.horizontal,
         }
     def to_relative(self) -> "Guides":
         """
         Convert absolute coordinates to relative (0-1) coordinates.
         Returns:
             New Guides object with relative coordinates
         """
         if self.relative:
             return self  # Already relative
         if not self.bounds:
             raise ValueError("Cannot convert to relative without bounds")
         x0, y0, x1, y1 = self.bounds
         width = x1 - x0
         height = y1 - y0
         rel_verticals = [(x - x0) / width for x in self.vertical]
         rel_horizontals = [(y - y0) / height for y in self.horizontal]
         return Guides(
             verticals=rel_verticals,
             horizontals=rel_horizontals,
             context=self.context,
             bounds=(0, 0, 1, 1),
-            relative=True
+            relative=True,
         )
     def to_absolute(self, bounds: Tuple[float, float, float, float]) -> "Guides":
         """
         Convert relative coordinates to absolute coordinates.
         Args:
             bounds: Target bounding box (x0, y0, x1, y1)
         Returns:
             New Guides object with absolute coordinates
         """
         if not self.relative:
             return self  # Already absolute
         x0, y0, x1, y1 = bounds
         width = x1 - x0
         height = y1 - y0
         abs_verticals = [x0 + x * width for x in self.vertical]
         abs_horizontals = [y0 + y * height for y in self.horizontal]
         return Guides(
             verticals=abs_verticals,
             horizontals=abs_horizontals,
             context=self.context,
             bounds=bounds,
-            relative=False
+            relative=False,
         )
     @property
     def n_rows(self) -> int:
         """Number of rows defined by horizontal guides."""
         return max(0, len(self.horizontal) - 1)
     @property
     def n_cols(self) -> int:
         """Number of columns defined by vertical guides."""
         return max(0, len(self.vertical) - 1)
     def _handle_snap_failure(self, message: str):
         """Handle cases where snapping cannot be performed."""
-        if hasattr(self, 'on_no_snap'):
-            if self.on_no_snap == 'warn':
+        if hasattr(self, "on_no_snap"):
+            if self.on_no_snap == "warn":
                 logger.warning(message)
-            elif self.on_no_snap == 'raise':
+            elif self.on_no_snap == "raise":
                 raise ValueError(message)
             # 'ignore' case: do nothing
         else:
             logger.warning(message)  # Default behavior
-    def _find_vertical_whitespace_gaps(self, text_elements, min_gap: float, threshold: Union[float, str] = 'auto') -> List[Tuple[float, float]]:
+    def _find_vertical_whitespace_gaps(
+        self, text_elements, min_gap: float, threshold: Union[float, str] = "auto"
+    ) -> List[Tuple[float, float]]:
         """
         Find vertical whitespace gaps using bbox-based density analysis.
         Returns list of (start, end) tuples representing trough ranges.
         """
         if not self.bounds:
             return []
         x0, _, x1, _ = self.bounds
         width_pixels = int(x1 - x0)
         if width_pixels <= 0:
             return []
         # Create density histogram: count bbox overlaps per x-coordinate
         density = np.zeros(width_pixels)
         for element in text_elements:
-            if not hasattr(element, 'x0') or not hasattr(element, 'x1'):
+            if not hasattr(element, "x0") or not hasattr(element, "x1"):
                 continue
             # Clip coordinates to bounds
             elem_x0 = max(x0, element.x0) - x0
             elem_x1 = min(x1, element.x1) - x0
             if elem_x1 > elem_x0:
                 start_px = int(elem_x0)
                 end_px = int(elem_x1)
                 density[start_px:end_px] += 1
         if density.max() == 0:
             return []
         # Determine the threshold value
-        if threshold == 'auto':
+        if threshold == "auto":
             # Auto mode: try different thresholds with step 0.05 until we have enough troughs
-            guides_needing_troughs = len([g for i, g in enumerate(self.vertical) if 0 < i < len(self.vertical) - 1])
+            guides_needing_troughs = len(
+                [g for i, g in enumerate(self.vertical) if 0 < i < len(self.vertical) - 1]
+            )
             if guides_needing_troughs == 0:
                 threshold_val = 0.5  # Default when no guides need placement
             else:
@@ -1515,9 +1577,11 @@ class Guides:
                     test_gaps = self._find_gaps_with_threshold(density, test_threshold, min_gap, x0)
                     if len(test_gaps) >= guides_needing_troughs:
                         threshold_val = test_threshold
-                        logger.debug(f"Auto threshold found: {test_threshold:.2f} (found {len(test_gaps)} troughs for {guides_needing_troughs} guides)")
+                        logger.debug(
+                            f"Auto threshold found: {test_threshold:.2f} (found {len(test_gaps)} troughs for {guides_needing_troughs} guides)"
+                        )
                         break
                 if threshold_val is None:
                     threshold_val = 0.8  # Fallback to permissive threshold
                     logger.debug(f"Auto threshold fallback to {threshold_val}")
@@ -1526,93 +1590,103 @@ class Guides:
             if not isinstance(threshold, (int, float)) or not (0.0 <= threshold <= 1.0):
                 raise ValueError("threshold must be a number between 0.0 and 1.0, or 'auto'")
             threshold_val = float(threshold)
         return self._find_gaps_with_threshold(density, threshold_val, min_gap, x0)
     def _find_gaps_with_threshold(self, density, threshold_val, min_gap, x0):
         """Helper method to find gaps given a specific threshold value."""
         max_density = density.max()
         threshold_density = threshold_val * max_density
         # Smooth the density for better trough detection
         from scipy.ndimage import gaussian_filter1d
         smoothed_density = gaussian_filter1d(density.astype(float), sigma=1.0)
         # Find regions below threshold
         below_threshold = smoothed_density <= threshold_density
         # Find contiguous regions
         from scipy.ndimage import label as nd_label
         labeled_regions, num_regions = nd_label(below_threshold)
         gaps = []
         for region_id in range(1, num_regions + 1):
             region_mask = labeled_regions == region_id
             region_indices = np.where(region_mask)[0]
             if len(region_indices) == 0:
                 continue
             start_px = region_indices[0]
             end_px = region_indices[-1] + 1
             # Convert back to PDF coordinates
             start_pdf = x0 + start_px
             end_pdf = x0 + end_px
             # Check minimum gap size
             if end_pdf - start_pdf >= min_gap:
                 gaps.append((start_pdf, end_pdf))
         return gaps
-    def _find_horizontal_whitespace_gaps(self, text_elements, min_gap: float, threshold: Union[float, str] = 'auto') -> List[Tuple[float, float]]:
+    def _find_horizontal_whitespace_gaps(
+        self, text_elements, min_gap: float, threshold: Union[float, str] = "auto"
+    ) -> List[Tuple[float, float]]:
         """
         Find horizontal whitespace gaps using bbox-based density analysis.
         Returns list of (start, end) tuples representing trough ranges.
         """
         if not self.bounds:
             return []
         _, y0, _, y1 = self.bounds
         height_pixels = int(y1 - y0)
         if height_pixels <= 0:
             return []
-        # Create density histogram: count bbox overlaps per y-coordinate
+        # Create density histogram: count bbox overlaps per y-coordinate
         density = np.zeros(height_pixels)
         for element in text_elements:
-            if not hasattr(element, 'top') or not hasattr(element, 'bottom'):
+            if not hasattr(element, "top") or not hasattr(element, "bottom"):
                 continue
             # Clip coordinates to bounds
             elem_top = max(y0, element.top) - y0
             elem_bottom = min(y1, element.bottom) - y0
             if elem_bottom > elem_top:
                 start_px = int(elem_top)
                 end_px = int(elem_bottom)
                 density[start_px:end_px] += 1
         if density.max() == 0:
             return []
         # Determine the threshold value (same logic as vertical)
-        if threshold == 'auto':
-            guides_needing_troughs = len([g for i, g in enumerate(self.horizontal) if 0 < i < len(self.horizontal) - 1])
+        if threshold == "auto":
+            guides_needing_troughs = len(
+                [g for i, g in enumerate(self.horizontal) if 0 < i < len(self.horizontal) - 1]
+            )
             if guides_needing_troughs == 0:
                 threshold_val = 0.5  # Default when no guides need placement
             else:
                 threshold_val = None
                 for test_threshold in np.arange(0.1, 1.0, 0.05):
-                    test_gaps = self._find_gaps_with_threshold_horizontal(density, test_threshold, min_gap, y0)
+                    test_gaps = self._find_gaps_with_threshold_horizontal(
+                        density, test_threshold, min_gap, y0
+                    )
                     if len(test_gaps) >= guides_needing_troughs:
                         threshold_val = test_threshold
-                        logger.debug(f"Auto threshold found: {test_threshold:.2f} (found {len(test_gaps)} troughs for {guides_needing_troughs} guides)")
+                        logger.debug(
+                            f"Auto threshold found: {test_threshold:.2f} (found {len(test_gaps)} troughs for {guides_needing_troughs} guides)"
+                        )
                         break
                 if threshold_val is None:
                     threshold_val = 0.8  # Fallback to permissive threshold
                     logger.debug(f"Auto threshold fallback to {threshold_val}")
@@ -1621,141 +1695,157 @@ class Guides:
             if not isinstance(threshold, (int, float)) or not (0.0 <= threshold <= 1.0):
                 raise ValueError("threshold must be a number between 0.0 and 1.0, or 'auto'")
             threshold_val = float(threshold)
         return self._find_gaps_with_threshold_horizontal(density, threshold_val, min_gap, y0)
     def _find_gaps_with_threshold_horizontal(self, density, threshold_val, min_gap, y0):
         """Helper method to find horizontal gaps given a specific threshold value."""
         max_density = density.max()
         threshold_density = threshold_val * max_density
         # Smooth the density for better trough detection
         from scipy.ndimage import gaussian_filter1d
         smoothed_density = gaussian_filter1d(density.astype(float), sigma=1.0)
         # Find regions below threshold
         below_threshold = smoothed_density <= threshold_density
         # Find contiguous regions
         from scipy.ndimage import label as nd_label
         labeled_regions, num_regions = nd_label(below_threshold)
         gaps = []
         for region_id in range(1, num_regions + 1):
             region_mask = labeled_regions == region_id
             region_indices = np.where(region_mask)[0]
             if len(region_indices) == 0:
                 continue
             start_px = region_indices[0]
             end_px = region_indices[-1] + 1
             # Convert back to PDF coordinates
             start_pdf = y0 + start_px
             end_pdf = y0 + end_px
             # Check minimum gap size
             if end_pdf - start_pdf >= min_gap:
                 gaps.append((start_pdf, end_pdf))
         return gaps
-    def _find_vertical_element_gaps(self, text_elements, min_gap: float) -> List[Tuple[float, float]]:
+    def _find_vertical_element_gaps(
+        self, text_elements, min_gap: float
+    ) -> List[Tuple[float, float]]:
         """
         Find vertical whitespace gaps using text element spacing analysis.
         Returns list of (start, end) tuples representing trough ranges.
         """
         if not self.bounds or not text_elements:
             return []
         x0, _, x1, _ = self.bounds
         # Get all element right and left edges
         element_edges = []
         for element in text_elements:
-            if not hasattr(element, 'x0') or not hasattr(element, 'x1'):
+            if not hasattr(element, "x0") or not hasattr(element, "x1"):
                 continue
             # Only include elements that overlap vertically with our bounds
-            if hasattr(element, 'top') and hasattr(element, 'bottom'):
+            if hasattr(element, "top") and hasattr(element, "bottom"):
                 if element.bottom < self.bounds[1] or element.top > self.bounds[3]:
                     continue
             element_edges.extend([element.x0, element.x1])
         if not element_edges:
             return []
         # Sort edges and find gaps
         element_edges = sorted(set(element_edges))
         trough_ranges = []
         for i in range(len(element_edges) - 1):
             gap_start = element_edges[i]
             gap_end = element_edges[i + 1]
             gap_width = gap_end - gap_start
             if gap_width >= min_gap:
                 # Check if this gap actually contains no text (is empty space)
                 gap_has_text = False
                 for element in text_elements:
-                    if (hasattr(element, 'x0') and hasattr(element, 'x1') and
-                        element.x0 < gap_end and element.x1 > gap_start):
+                    if (
+                        hasattr(element, "x0")
+                        and hasattr(element, "x1")
+                        and element.x0 < gap_end
+                        and element.x1 > gap_start
+                    ):
                         gap_has_text = True
                         break
                 if not gap_has_text:
                     trough_ranges.append((gap_start, gap_end))
         return trough_ranges
-    def _find_horizontal_element_gaps(self, text_elements, min_gap: float) -> List[Tuple[float, float]]:
+    def _find_horizontal_element_gaps(
+        self, text_elements, min_gap: float
+    ) -> List[Tuple[float, float]]:
         """
         Find horizontal whitespace gaps using text element spacing analysis.
         Returns list of (start, end) tuples representing trough ranges.
         """
         if not self.bounds or not text_elements:
             return []
         _, y0, _, y1 = self.bounds
         # Get all element top and bottom edges
         element_edges = []
         for element in text_elements:
-            if not hasattr(element, 'top') or not hasattr(element, 'bottom'):
+            if not hasattr(element, "top") or not hasattr(element, "bottom"):
                 continue
             # Only include elements that overlap horizontally with our bounds
-            if hasattr(element, 'x0') and hasattr(element, 'x1'):
+            if hasattr(element, "x0") and hasattr(element, "x1"):
                 if element.x1 < self.bounds[0] or element.x0 > self.bounds[2]:
                     continue
             element_edges.extend([element.top, element.bottom])
         if not element_edges:
             return []
         # Sort edges and find gaps
         element_edges = sorted(set(element_edges))
         trough_ranges = []
         for i in range(len(element_edges) - 1):
             gap_start = element_edges[i]
             gap_end = element_edges[i + 1]
             gap_width = gap_end - gap_start
             if gap_width >= min_gap:
                 # Check if this gap actually contains no text (is empty space)
                 gap_has_text = False
                 for element in text_elements:
-                    if (hasattr(element, 'top') and hasattr(element, 'bottom') and
-                        element.top < gap_end and element.bottom > gap_start):
+                    if (
+                        hasattr(element, "top")
+                        and hasattr(element, "bottom")
+                        and element.top < gap_end
+                        and element.bottom > gap_start
+                    ):
                         gap_has_text = True
                         break
                 if not gap_has_text:
                     trough_ranges.append((gap_start, gap_end))
         return trough_ranges
-    def _optimal_guide_assignment(self, guides: List[float], trough_ranges: List[Tuple[float, float]]) -> Dict[int, int]:
+    def _optimal_guide_assignment(
+        self, guides: List[float], trough_ranges: List[Tuple[float, float]]
+    ) -> Dict[int, int]:
         """
         Assign guides to trough ranges using the user's desired logic:
         - Guides already in a trough stay put
@@ -1764,18 +1854,20 @@ class Guides:
         """
         if not guides or not trough_ranges:
             return {}
         assignments = {}
         # Step 1: Identify which guides are already in troughs
         guides_in_troughs = set()
         for i, guide_pos in enumerate(guides):
             for trough_start, trough_end in trough_ranges:
                 if trough_start <= guide_pos <= trough_end:
                     guides_in_troughs.add(i)
-                    logger.debug(f"Guide {i} (pos {guide_pos:.1f}) is already in trough ({trough_start:.1f}-{trough_end:.1f}), keeping in place")
+                    logger.debug(
+                        f"Guide {i} (pos {guide_pos:.1f}) is already in trough ({trough_start:.1f}-{trough_end:.1f}), keeping in place"
+                    )
                     break
         # Step 2: Identify which troughs are already occupied
         occupied_troughs = set()
         for i in guides_in_troughs:
@@ -1784,21 +1876,23 @@ class Guides:
                 if trough_start <= guide_pos <= trough_end:
                     occupied_troughs.add(j)
                     break
         # Step 3: Find guides that need reassignment (not in any trough)
         guides_to_move = []
         for i, guide_pos in enumerate(guides):
             if i not in guides_in_troughs:
                 guides_to_move.append(i)
-                logger.debug(f"Guide {i} (pos {guide_pos:.1f}) is NOT in any trough, needs reassignment")
+                logger.debug(
+                    f"Guide {i} (pos {guide_pos:.1f}) is NOT in any trough, needs reassignment"
+                )
         # Step 4: Find available troughs (not occupied by existing guides)
         available_troughs = []
         for j, (trough_start, trough_end) in enumerate(trough_ranges):
             if j not in occupied_troughs:
                 available_troughs.append(j)
                 logger.debug(f"Trough {j} ({trough_start:.1f}-{trough_end:.1f}) is available")
         # Step 5: Assign guides to move to closest available troughs
         if guides_to_move and available_troughs:
             # Calculate distances for all combinations
@@ -1810,20 +1904,22 @@ class Guides:
                     trough_center = (trough_start + trough_end) / 2
                     distance = abs(guide_pos - trough_center)
                     distances.append((distance, guide_idx, trough_idx))
             # Sort by distance and assign greedily
             distances.sort()
             used_troughs = set()
             for distance, guide_idx, trough_idx in distances:
                 if guide_idx not in assignments and trough_idx not in used_troughs:
                     assignments[guide_idx] = trough_idx
                     used_troughs.add(trough_idx)
-                    logger.debug(f"Assigned guide {guide_idx} (pos {guides[guide_idx]:.1f}) to trough {trough_idx} (distance: {distance:.1f})")
+                    logger.debug(
+                        f"Assigned guide {guide_idx} (pos {guides[guide_idx]:.1f}) to trough {trough_idx} (distance: {distance:.1f})"
+                    )
         logger.debug(f"Final assignments: {assignments}")
         return assignments
     def _snap_guides_to_gaps(self, guides: List[float], gaps: List[Tuple[float, float]], axis: str):
         """
         Snap guides to nearby gaps using optimal assignment.
@@ -1831,15 +1927,15 @@ class Guides:
         """
         if not guides or not gaps:
             return
         logger.debug(f"Snapping {len(guides)} {axis} guides to {len(gaps)} trough ranges")
         for i, (start, end) in enumerate(gaps):
             center = (start + end) / 2
             logger.debug(f"  Trough {i}: {start:.1f} to {end:.1f} (center: {center:.1f})")
         # Get optimal assignments
         assignments = self._optimal_guide_assignment(guides, gaps)
         # Apply assignments (modify guides list in-place)
         for guide_idx, trough_idx in assignments.items():
             trough_start, trough_end = gaps[trough_idx]
@@ -1847,23 +1943,23 @@ class Guides:
             old_pos = guides[guide_idx]
             guides[guide_idx] = new_pos
             logger.info(f"Snapped {axis} guide from {old_pos:.1f} to {new_pos:.1f}")
     def build_grid(
         self,
         target: Optional[Union["Page", "Region"]] = None,
         source: str = "guides",
         cell_padding: float = 0.5,
-        include_outer_boundaries: bool = False
+        include_outer_boundaries: bool = False,
     ) -> Dict[str, int]:
         """
         Create table structure (table, rows, columns, cells) from guide coordinates.
         Args:
             target: Page or Region to create regions on (uses self.context if None)
             source: Source label for created regions (for identification)
             cell_padding: Internal padding for cell regions in points
             include_outer_boundaries: Whether to add boundaries at edges if missing
         Returns:
             Dictionary with counts: {'table': 1, 'rows': N, 'columns': M, 'cells': N*M}
         """
@@ -1871,98 +1967,142 @@ class Guides:
         target_obj = target or self.context
         if not target_obj:
             raise ValueError("No target object available. Provide target parameter or context.")
         # Get the page for creating regions
-        if hasattr(target_obj, 'x0') and hasattr(target_obj, 'top'):  # Region (has bbox coordinates)
+        if hasattr(target_obj, "x0") and hasattr(
+            target_obj, "top"
+        ):  # Region (has bbox coordinates)
             page = target_obj._page
             origin_x, origin_y = target_obj.x0, target_obj.top
             context_width, context_height = target_obj.width, target_obj.height
-        elif hasattr(target_obj, '_element_mgr') or hasattr(target_obj, 'width'):  # Page
+        elif hasattr(target_obj, "_element_mgr") or hasattr(target_obj, "width"):  # Page
             page = target_obj
             origin_x, origin_y = 0.0, 0.0
             context_width, context_height = page.width, page.height
         else:
             raise ValueError(f"Target object {target_obj} is not a Page or Region")
         element_manager = page._element_mgr
         # Setup boundaries
         row_boundaries = list(self.horizontal)
         col_boundaries = list(self.vertical)
         # Add outer boundaries if requested and missing
         if include_outer_boundaries:
             if not row_boundaries or row_boundaries[0] > origin_y:
                 row_boundaries.insert(0, origin_y)
             if not row_boundaries or row_boundaries[-1] < origin_y + context_height:
                 row_boundaries.append(origin_y + context_height)
             if not col_boundaries or col_boundaries[0] > origin_x:
                 col_boundaries.insert(0, origin_x)
             if not col_boundaries or col_boundaries[-1] < origin_x + context_width:
                 col_boundaries.append(origin_x + context_width)
         # Remove duplicates and sort
         row_boundaries = sorted(list(set(row_boundaries)))
         col_boundaries = sorted(list(set(col_boundaries)))
-        logger.debug(f"Building grid with {len(row_boundaries)} row and {len(col_boundaries)} col boundaries")
+        # ------------------------------------------------------------------
+        # Clean-up: remove any previously created grid regions (table, rows,
+        # columns, cells) that were generated by the same `source` label and
+        # overlap the area we are about to populate.  This prevents the page's
+        # `ElementManager` from accumulating stale/duplicate regions when the
+        # user rebuilds the grid multiple times.
+        # ------------------------------------------------------------------
+        try:
+            # Bounding box of the grid we are about to create
+            if row_boundaries and col_boundaries:
+                grid_bbox = (
+                    col_boundaries[0],  # x0
+                    row_boundaries[0],  # top
+                    col_boundaries[-1],  # x1
+                    row_boundaries[-1],  # bottom
+                )
+                def _bbox_overlap(b1, b2):
+                    """Return True if two (x0, top, x1, bottom) bboxes overlap."""
+                    return not (
+                        b1[2] <= b2[0]  # b1 right ≤ b2 left
+                        or b1[0] >= b2[2]  # b1 left ≥ b2 right
+                        or b1[3] <= b2[1]  # b1 bottom ≤ b2 top
+                        or b1[1] >= b2[3]  # b1 top ≥ b2 bottom
+                    )
+                # Collect existing regions that match the source & region types
+                regions_to_remove = [
+                    r
+                    for r in element_manager.regions
+                    if getattr(r, "source", None) == source
+                    and getattr(r, "region_type", None)
+                    in {"table", "table_row", "table_column", "table_cell"}
+                    and hasattr(r, "bbox")
+                    and _bbox_overlap(r.bbox, grid_bbox)
+                ]
+                for r in regions_to_remove:
+                    element_manager.remove_element(r, element_type="regions")
+                if regions_to_remove:
+                    logger.debug(
+                        f"Removed {len(regions_to_remove)} existing grid region(s) prior to rebuild"
+                    )
+        except Exception as cleanup_err:  # pragma: no cover – cleanup must never crash
+            logger.warning(f"Grid cleanup failed: {cleanup_err}")
+        logger.debug(
+            f"Building grid with {len(row_boundaries)} row and {len(col_boundaries)} col boundaries"
+        )
         # Track creation counts
-        counts = {'table': 0, 'rows': 0, 'columns': 0, 'cells': 0}
+        counts = {"table": 0, "rows": 0, "columns": 0, "cells": 0}
         # Create overall table region
         if len(row_boundaries) >= 2 and len(col_boundaries) >= 2:
             table_region = page.create_region(
-                col_boundaries[0], row_boundaries[0],
-                col_boundaries[-1], row_boundaries[-1]
+                col_boundaries[0], row_boundaries[0], col_boundaries[-1], row_boundaries[-1]
             )
             table_region.source = source
             table_region.region_type = "table"
             table_region.normalized_type = "table"
-            table_region.metadata.update({
-                "source_guides": True,
-                "num_rows": len(row_boundaries) - 1,
-                "num_cols": len(col_boundaries) - 1,
-                "boundaries": {"rows": row_boundaries, "cols": col_boundaries}
-            })
+            table_region.metadata.update(
+                {
+                    "source_guides": True,
+                    "num_rows": len(row_boundaries) - 1,
+                    "num_cols": len(col_boundaries) - 1,
+                    "boundaries": {"rows": row_boundaries, "cols": col_boundaries},
+                }
+            )
             element_manager.add_element(table_region, element_type="regions")
-            counts['table'] = 1
+            counts["table"] = 1
         # Create row regions
         if len(row_boundaries) >= 2 and len(col_boundaries) >= 2:
             for i in range(len(row_boundaries) - 1):
                 row_region = page.create_region(
-                    col_boundaries[0], row_boundaries[i],
-                    col_boundaries[-1], row_boundaries[i + 1]
+                    col_boundaries[0], row_boundaries[i], col_boundaries[-1], row_boundaries[i + 1]
                 )
                 row_region.source = source
                 row_region.region_type = "table_row"
                 row_region.normalized_type = "table_row"
-                row_region.metadata.update({
-                    "row_index": i,
-                    "source_guides": True
-                })
+                row_region.metadata.update({"row_index": i, "source_guides": True})
                 element_manager.add_element(row_region, element_type="regions")
-                counts['rows'] += 1
+                counts["rows"] += 1
         # Create column regions
         if len(col_boundaries) >= 2 and len(row_boundaries) >= 2:
             for j in range(len(col_boundaries) - 1):
                 col_region = page.create_region(
-                    col_boundaries[j], row_boundaries[0],
-                    col_boundaries[j + 1], row_boundaries[-1]
+                    col_boundaries[j], row_boundaries[0], col_boundaries[j + 1], row_boundaries[-1]
                 )
                 col_region.source = source
                 col_region.region_type = "table_column"
                 col_region.normalized_type = "table_column"
-                col_region.metadata.update({
-                    "col_index": j,
-                    "source_guides": True
-                })
+                col_region.metadata.update({"col_index": j, "source_guides": True})
                 element_manager.add_element(col_region, element_type="regions")
-                counts['columns'] += 1
+                counts["columns"] += 1
         # Create cell regions
         if len(row_boundaries) >= 2 and len(col_boundaries) >= 2:
             for i in range(len(row_boundaries) - 1):
@@ -1972,50 +2112,58 @@ class Guides:
                     cell_top = row_boundaries[i] + cell_padding
                     cell_x1 = col_boundaries[j + 1] - cell_padding
                     cell_bottom = row_boundaries[i + 1] - cell_padding
                     # Skip invalid cells
                     if cell_x1 <= cell_x0 or cell_bottom <= cell_top:
                         continue
                     cell_region = page.create_region(cell_x0, cell_top, cell_x1, cell_bottom)
                     cell_region.source = source
                     cell_region.region_type = "table_cell"
                     cell_region.normalized_type = "table_cell"
-                    cell_region.metadata.update({
-                        "row_index": i,
-                        "col_index": j,
-                        "source_guides": True,
-                        "original_boundaries": {
-                            "left": col_boundaries[j],
-                            "top": row_boundaries[i],
-                            "right": col_boundaries[j + 1],
-                            "bottom": row_boundaries[i + 1]
+                    cell_region.metadata.update(
+                        {
+                            "row_index": i,
+                            "col_index": j,
+                            "source_guides": True,
+                            "original_boundaries": {
+                                "left": col_boundaries[j],
+                                "top": row_boundaries[i],
+                                "right": col_boundaries[j + 1],
+                                "bottom": row_boundaries[i + 1],
+                            },
                         }
-                    })
+                    )
                     element_manager.add_element(cell_region, element_type="regions")
-                    counts['cells'] += 1
-        logger.info(f"Created {counts['table']} table, {counts['rows']} rows, "
-                   f"{counts['columns']} columns, and {counts['cells']} cells from guides")
+                    counts["cells"] += 1
+        logger.info(
+            f"Created {counts['table']} table, {counts['rows']} rows, "
+            f"{counts['columns']} columns, and {counts['cells']} cells from guides"
+        )
         return counts
     def __repr__(self) -> str:
         """String representation of the guides."""
-        return (f"Guides(verticals={len(self.vertical)}, "
-                f"horizontals={len(self.horizontal)}, "
-                f"cells={len(self.get_cells())})")
+        return (
+            f"Guides(verticals={len(self.vertical)}, "
+            f"horizontals={len(self.horizontal)}, "
+            f"cells={len(self.get_cells())})"
+        )
     def _get_text_elements(self):
         """Get text elements from the context."""
         if not self.context:
             return []
         # Get text elements from the context
-        if hasattr(self.context, 'find_all'):
+        if hasattr(self.context, "find_all"):
             try:
-                text_elements = self.context.find_all('text', apply_exclusions=False)
-                return text_elements.elements if hasattr(text_elements, 'elements') else text_elements
+                text_elements = self.context.find_all("text", apply_exclusions=False)
+                return (
+                    text_elements.elements if hasattr(text_elements, "elements") else text_elements
+                )
             except Exception as e:
                 logger.warning(f"Error getting text elements: {e}")
                 return []
@@ -2026,32 +2174,32 @@ class Guides:
     # -------------------------------------------------------------------------
     # Instance methods for fluent chaining (avoid name conflicts with class methods)
     # -------------------------------------------------------------------------
     def add_content(
         self,
-        axis: Literal['vertical', 'horizontal'] = 'vertical',
+        axis: Literal["vertical", "horizontal"] = "vertical",
         markers: Union[str, List[str], "ElementCollection", None] = None,
         obj: Optional[Union["Page", "Region"]] = None,
-        align: Literal['left', 'right', 'center', 'between'] = 'left',
+        align: Literal["left", "right", "center", "between"] = "left",
         outer: bool = True,
-        tolerance: float = 5
+        tolerance: float = 5,
     ) -> "Guides":
         """
         Instance method: Add guides from content, allowing chaining.
         This allows: Guides.new(page).add_content(axis='vertical', markers=[...])
         Args:
             axis: Which axis to create guides for
             markers: Content to search for. Can be:
                 - str: single selector or literal text
-                - List[str]: list of selectors or literal text strings
+                - List[str]: list of selectors or literal text strings
                 - ElementCollection: collection of elements to extract text from
                 - None: no markers
             obj: Page or Region to search (uses self.context if None)
             align: How to align guides relative to found elements
             outer: Whether to add outer boundary guides
             tolerance: Tolerance for snapping to element edges
         Returns:
             Self for method chaining
         """
@@ -2059,7 +2207,7 @@ class Guides:
         target_obj = obj or self.context
         if target_obj is None:
             raise ValueError("No object provided and no context available")
         # Create new guides using the class method
         new_guides = Guides.from_content(
             obj=target_obj,
@@ -2067,34 +2215,34 @@ class Guides:
             markers=markers,
             align=align,
             outer=outer,
-            tolerance=tolerance
+            tolerance=tolerance,
         )
         # Add the appropriate coordinates to this object
-        if axis == 'vertical':
+        if axis == "vertical":
             self.vertical = list(set(self.vertical + new_guides.vertical))
         else:
             self.horizontal = list(set(self.horizontal + new_guides.horizontal))
         return self
     def add_lines(
         self,
-        axis: Literal['vertical', 'horizontal', 'both'] = 'both',
-        obj: Optional[Union["Page", "Region"]] = None,
-        threshold: Union[float, str] = 'auto',
+        axis: Literal["vertical", "horizontal", "both"] = "both",
+        obj: Optional[Union["Page", "Region"]] = None,
+        threshold: Union[float, str] = "auto",
         source_label: Optional[str] = None,
         max_lines_h: Optional[int] = None,
         max_lines_v: Optional[int] = None,
         outer: bool = False,
-        detection_method: str = 'vector',
+        detection_method: str = "vector",
         resolution: int = 192,
-        **detect_kwargs
+        **detect_kwargs,
     ) -> "Guides":
         """
         Instance method: Add guides from lines, allowing chaining.
         This allows: Guides.new(page).add_lines(axis='horizontal')
         Args:
             axis: Which axis to detect lines for
             obj: Page or Region to search (uses self.context if None)
@@ -2106,7 +2254,7 @@ class Guides:
             detection_method: 'vector' (use existing LineElements) or 'pixels' (detect from image)
             resolution: DPI for pixel-based detection (default: 192)
             **detect_kwargs: Additional parameters for pixel detection (see from_lines)
         Returns:
             Self for method chaining
         """
@@ -2114,7 +2262,7 @@ class Guides:
         target_obj = obj or self.context
         if target_obj is None:
             raise ValueError("No object provided and no context available")
         # Create new guides using the class method
         new_guides = Guides.from_lines(
             obj=target_obj,
@@ -2126,32 +2274,32 @@ class Guides:
             outer=outer,
             detection_method=detection_method,
             resolution=resolution,
-            **detect_kwargs
+            **detect_kwargs,
         )
         # Add the appropriate coordinates to this object
-        if axis in ('vertical', 'both'):
+        if axis in ("vertical", "both"):
             self.vertical = list(set(self.vertical + new_guides.vertical))
-        if axis in ('horizontal', 'both'):
+        if axis in ("horizontal", "both"):
             self.horizontal = list(set(self.horizontal + new_guides.horizontal))
         return self
     def add_whitespace(
         self,
-        axis: Literal['vertical', 'horizontal', 'both'] = 'both',
+        axis: Literal["vertical", "horizontal", "both"] = "both",
         obj: Optional[Union["Page", "Region"]] = None,
-        min_gap: float = 10
+        min_gap: float = 10,
     ) -> "Guides":
         """
         Instance method: Add guides from whitespace, allowing chaining.
         This allows: Guides.new(page).add_whitespace(axis='both')
         Args:
             axis: Which axis to create guides for
             obj: Page or Region to search (uses self.context if None)
             min_gap: Minimum gap size to consider
         Returns:
             Self for method chaining
         """
@@ -2159,18 +2307,14 @@ class Guides:
         target_obj = obj or self.context
         if target_obj is None:
             raise ValueError("No object provided and no context available")
         # Create new guides using the class method
-        new_guides = Guides.from_whitespace(
-            obj=target_obj,
-            axis=axis,
-            min_gap=min_gap
-        )
+        new_guides = Guides.from_whitespace(obj=target_obj, axis=axis, min_gap=min_gap)
         # Add the appropriate coordinates to this object
-        if axis in ('vertical', 'both'):
+        if axis in ("vertical", "both"):
             self.vertical = list(set(self.vertical + new_guides.vertical))
-        if axis in ('horizontal', 'both'):
+        if axis in ("horizontal", "both"):
             self.horizontal = list(set(self.horizontal + new_guides.horizontal))
-        return self
+        return self

natural-pdf 0.1.33__py3-none-any.whl → 0.1.35__py3-none-any.whl

natural-pdf 0.1.33py3-none-any.whl → 0.1.35py3-none-any.whl