PyPI - natural-pdf - Versions diffs - 0.2.10__py3-none-any.whl → 0.2.12__py3-none-any.whl - Mend

natural-pdf 0.2.10py3-none-any.whl → 0.2.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

natural_pdf/analyzers/guides.py +318 -73
natural_pdf/core/page.py +56 -8
natural_pdf/elements/region.py +5 -3
{natural_pdf-0.2.10.dist-info → natural_pdf-0.2.12.dist-info}/METADATA +1 -1
{natural_pdf-0.2.10.dist-info → natural_pdf-0.2.12.dist-info}/RECORD +29 -9
{natural_pdf-0.2.10.dist-info → natural_pdf-0.2.12.dist-info}/top_level.txt +1 -0
temp/debug_cell_extraction.py +42 -0
temp/debug_exclusion_overlap.py +43 -0
temp/debug_exclusions_guides.py +67 -0
temp/debug_extra_guide.py +41 -0
temp/debug_outer_boundaries.py +46 -0
temp/debug_st_search.py +33 -0
temp/fix_page_exclusions.py +42 -0
temp/test_exclusion_with_debug.py +30 -0
temp/test_find_exclusions_fix.py +53 -0
temp/test_find_exclusions_fix_no_recursion.py +97 -0
temp/test_fix_real_pdf.py +48 -0
temp/test_fix_working.py +55 -0
temp/test_fixed_pdf_exclusions.py +67 -0
temp/test_horizontal_top_bottom.py +53 -0
temp/test_marker_order.py +45 -0
temp/test_original_exclusions_now_work.py +56 -0
temp/test_pdf_exclusions_with_guides.py +84 -0
temp/test_region_exclusions_detailed.py +25 -0
temp/test_stripes_real_pdf.py +62 -0
temp/test_vertical_stripes.py +55 -0
{natural_pdf-0.2.10.dist-info → natural_pdf-0.2.12.dist-info}/WHEEL +0 -0
{natural_pdf-0.2.10.dist-info → natural_pdf-0.2.12.dist-info}/entry_points.txt +0 -0
{natural_pdf-0.2.10.dist-info → natural_pdf-0.2.12.dist-info}/licenses/LICENSE +0 -0

natural_pdf/analyzers/guides.py CHANGED Viewed

@@ -185,7 +185,9 @@ class GuidesList(UserList):
         self,
         markers: Union[str, List[str], "ElementCollection", Callable, None],
         obj: Optional[Union["Page", "Region", "FlowRegion"]] = None,
-        align: Literal["left", "right", "center", "between"] = "left",
+        align: Union[
+            Literal["left", "right", "center", "between"], Literal["top", "bottom"]
+        ] = "left",
         outer: bool = True,
         tolerance: float = 5,
         *,
@@ -203,7 +205,10 @@ class GuidesList(UserList):
                 - Callable: function that takes a page and returns markers
                 - None: no markers
             obj: Page/Region/FlowRegion to search (uses parent's context if None)
-            align: How to align guides relative to found elements
+            align: How to align guides relative to found elements:
+                - For vertical guides: 'left', 'right', 'center', 'between'
+                - For horizontal guides: 'top', 'bottom', 'center', 'between'
+                - Note: 'left'/'right' also work for horizontal (mapped to top/bottom)
             outer: Whether to add outer boundary guides
             tolerance: Tolerance for snapping to element edges
             apply_exclusions: Whether to apply exclusion zones when searching for text
@@ -224,19 +229,25 @@ class GuidesList(UserList):
             self._callable = None
             actual_markers = markers
+        # Normalize alignment for horizontal guides
+        if self._axis == "horizontal":
+            if align == "top":
+                align = "left"
+            elif align == "bottom":
+                align = "right"
         # Check if parent is in flow mode
         if self._parent.is_flow_region:
             # Create guides across all constituent regions
             all_guides = []
             for region in self._parent.context.constituent_regions:
-                # Normalize markers for this region
-                marker_texts = _normalize_markers(actual_markers, region)
+                # Pass markers directly - from_content will handle them properly
                 # Create guides for this region
                 region_guides = Guides.from_content(
                     obj=region,
                     axis=self._axis,
-                    markers=marker_texts,
+                    markers=actual_markers,  # Pass original markers, not normalized text
                     align=align,
                     outer=outer,
                     tolerance=tolerance,
@@ -312,14 +323,14 @@ class GuidesList(UserList):
             return self._parent
         # Original single-region logic
-        # Normalize markers to list of text strings
-        marker_texts = _normalize_markers(actual_markers, target_obj)
+        # Pass markers directly to from_content which will handle them properly
+        # (no need to normalize here since from_content now handles ElementCollection)
         # Create guides for this axis
         new_guides = Guides.from_content(
             obj=target_obj,
             axis=self._axis,
-            markers=marker_texts,
+            markers=actual_markers,  # Pass original markers, not normalized text
             align=align,
             outer=outer,
             tolerance=tolerance,
@@ -930,6 +941,82 @@ class GuidesList(UserList):
         self.data.clear()
         return self._parent
+    def from_stripes(
+        self,
+        stripes=None,
+        color=None,  # Explicitly specify stripe color
+    ) -> "Guides":
+        """Create guides from striped table rows or columns.
+        Creates guides at both edges of stripe elements (e.g., colored table rows).
+        Perfect for zebra-striped tables where you need guides at every row boundary.
+        Args:
+            stripes: Elements representing stripes. If None, auto-detects.
+            color: Specific color to look for (e.g., '#00ffff'). If None, finds most common.
+        Examples:
+            # Auto-detect zebra stripes
+            guides.horizontal.from_stripes()
+            # Specific color
+            guides.horizontal.from_stripes(color='#00ffff')
+            # Manual selection
+            stripes = page.find_all('rect[fill=#00ffff]')
+            guides.horizontal.from_stripes(stripes)
+            # Vertical stripes
+            guides.vertical.from_stripes(color='#e0e0e0')
+        Returns:
+            Parent Guides object for chaining
+        """
+        from collections import defaultdict
+        target_obj = self._parent.context
+        if target_obj is None:
+            raise ValueError("No context available for stripe detection")
+        if stripes is None:
+            if color:
+                # User specified color
+                stripes = target_obj.find_all(f"rect[fill={color}]")
+            else:
+                # Auto-detect most common non-white fill
+                all_rects = target_obj.find_all("rect[fill]")
+                # Group by fill color
+                fill_counts = defaultdict(list)
+                for rect in all_rects:
+                    if rect.fill and rect.fill not in ["#ffffff", "white", "none", "transparent"]:
+                        fill_counts[rect.fill].append(rect)
+                if not fill_counts:
+                    return self._parent  # No stripes found
+                # Find most common fill color
+                stripes = max(fill_counts.values(), key=len)
+        if not stripes:
+            return self._parent
+        # Get both edges of each stripe
+        edges = []
+        if self._axis == "horizontal":
+            for stripe in stripes:
+                edges.extend([stripe.top, stripe.bottom])
+        else:
+            for stripe in stripes:
+                edges.extend([stripe.x0, stripe.x1])
+        # Remove duplicates and sort
+        edges = sorted(set(edges))
+        # Add guides
+        self.extend(edges)
+        return self._parent
     def __add__(self, other):
         """Handle addition of GuidesList objects by returning combined data."""
         if isinstance(other, GuidesList):
@@ -1459,7 +1546,9 @@ class Guides:
         obj: Union["Page", "Region", "FlowRegion"],
         axis: Literal["vertical", "horizontal"] = "vertical",
         markers: Union[str, List[str], "ElementCollection", None] = None,
-        align: Literal["left", "right", "center", "between"] = "left",
+        align: Union[
+            Literal["left", "right", "center", "between"], Literal["top", "bottom"]
+        ] = "left",
         outer: bool = True,
         tolerance: float = 5,
         apply_exclusions: bool = True,
@@ -1475,7 +1564,9 @@ class Guides:
                 - List[str]: list of selectors or literal text strings
                 - ElementCollection: collection of elements to extract text from
                 - None: no markers
-            align: Where to place guides relative to found text
+            align: Where to place guides relative to found text:
+                - For vertical guides: 'left', 'right', 'center', 'between'
+                - For horizontal guides: 'top', 'bottom', 'center', 'between'
             outer: Whether to add guides at the boundaries
             tolerance: Maximum distance to search for text
             apply_exclusions: Whether to apply exclusion zones when searching for text
@@ -1483,6 +1574,13 @@ class Guides:
         Returns:
             New Guides object aligned to text content
         """
+        # Normalize alignment for horizontal guides
+        if axis == "horizontal":
+            if align == "top":
+                align = "left"
+            elif align == "bottom":
+                align = "right"
         # Handle FlowRegion
         if hasattr(obj, "constituent_regions"):
             guides = cls(context=obj)
@@ -1530,39 +1628,51 @@ class Guides:
         elif hasattr(obj, "width"):
             bounds = (0, 0, obj.width, obj.height)
-        # Normalize markers to list of text strings
-        marker_texts = _normalize_markers(markers, obj)
+        # Handle different marker types
+        elements_to_process = []
-        # Find each marker and determine guide position
-        for marker in marker_texts:
-            if hasattr(obj, "find"):
-                element = obj.find(f'text:contains("{marker}")', apply_exclusions=apply_exclusions)
-                if element:
-                    if axis == "vertical":
-                        if align == "left":
-                            guides_coords.append(element.x0)
-                        elif align == "right":
-                            guides_coords.append(element.x1)
-                        elif align == "center":
-                            guides_coords.append((element.x0 + element.x1) / 2)
-                        elif align == "between":
-                            # For between, collect left edges for processing later
-                            guides_coords.append(element.x0)
-                    else:  # horizontal
-                        if align == "left":  # top for horizontal
-                            guides_coords.append(element.top)
-                        elif align == "right":  # bottom for horizontal
-                            guides_coords.append(element.bottom)
-                        elif align == "center":
-                            guides_coords.append((element.top + element.bottom) / 2)
-                        elif align == "between":
-                            # For between, collect top edges for processing later
-                            guides_coords.append(element.top)
+        # Check if markers is an ElementCollection or has elements attribute
+        if hasattr(markers, "elements") or hasattr(markers, "_elements"):
+            # It's an ElementCollection - use elements directly
+            elements_to_process = getattr(markers, "elements", getattr(markers, "_elements", []))
+        elif hasattr(markers, "__iter__") and not isinstance(markers, str):
+            # Check if it's an iterable of elements (not strings)
+            try:
+                markers_list = list(markers)
+                if markers_list and hasattr(markers_list[0], "x0"):
+                    # It's a list of elements
+                    elements_to_process = markers_list
+            except:
+                pass
-        # Handle 'between' alignment - find midpoints between adjacent markers
-        if align == "between" and len(guides_coords) >= 2:
-            # We need to get the right and left edges of each marker
-            marker_bounds = []
+        if elements_to_process:
+            # Process elements directly without text search
+            for element in elements_to_process:
+                if axis == "vertical":
+                    if align == "left":
+                        guides_coords.append(element.x0)
+                    elif align == "right":
+                        guides_coords.append(element.x1)
+                    elif align == "center":
+                        guides_coords.append((element.x0 + element.x1) / 2)
+                    elif align == "between":
+                        # For between, collect left edges for processing later
+                        guides_coords.append(element.x0)
+                else:  # horizontal
+                    if align == "left":  # top for horizontal
+                        guides_coords.append(element.top)
+                    elif align == "right":  # bottom for horizontal
+                        guides_coords.append(element.bottom)
+                    elif align == "center":
+                        guides_coords.append((element.top + element.bottom) / 2)
+                    elif align == "between":
+                        # For between, collect top edges for processing later
+                        guides_coords.append(element.top)
+        else:
+            # Fall back to text-based search
+            marker_texts = _normalize_markers(markers, obj)
+            # Find each marker and determine guide position
             for marker in marker_texts:
                 if hasattr(obj, "find"):
                     element = obj.find(
@@ -1570,9 +1680,52 @@ class Guides:
                     )
                     if element:
                         if axis == "vertical":
-                            marker_bounds.append((element.x0, element.x1))
+                            if align == "left":
+                                guides_coords.append(element.x0)
+                            elif align == "right":
+                                guides_coords.append(element.x1)
+                            elif align == "center":
+                                guides_coords.append((element.x0 + element.x1) / 2)
+                            elif align == "between":
+                                # For between, collect left edges for processing later
+                                guides_coords.append(element.x0)
                         else:  # horizontal
-                            marker_bounds.append((element.top, element.bottom))
+                            if align == "left":  # top for horizontal
+                                guides_coords.append(element.top)
+                            elif align == "right":  # bottom for horizontal
+                                guides_coords.append(element.bottom)
+                            elif align == "center":
+                                guides_coords.append((element.top + element.bottom) / 2)
+                            elif align == "between":
+                                # For between, collect top edges for processing later
+                                guides_coords.append(element.top)
+        # Handle 'between' alignment - find midpoints between adjacent markers
+        if align == "between" and len(guides_coords) >= 2:
+            # We need to get the right and left edges of each marker
+            marker_bounds = []
+            if elements_to_process:
+                # Use elements directly
+                for element in elements_to_process:
+                    if axis == "vertical":
+                        marker_bounds.append((element.x0, element.x1))
+                    else:  # horizontal
+                        marker_bounds.append((element.top, element.bottom))
+            else:
+                # Fall back to text search
+                if "marker_texts" not in locals():
+                    marker_texts = _normalize_markers(markers, obj)
+                for marker in marker_texts:
+                    if hasattr(obj, "find"):
+                        element = obj.find(
+                            f'text:contains("{marker}")', apply_exclusions=apply_exclusions
+                        )
+                        if element:
+                            if axis == "vertical":
+                                marker_bounds.append((element.x0, element.x1))
+                            else:  # horizontal
+                                marker_bounds.append((element.top, element.bottom))
             # Sort markers by their left edge (or top edge for horizontal)
             marker_bounds.sort(key=lambda x: x[0])
@@ -4246,12 +4399,26 @@ class _ColumnAccessor:
         """Return number of columns (vertical guides - 1)."""
         return max(0, len(self._guides.vertical) - 1)
-    def __getitem__(self, index: int) -> "Region":
-        """Get column at the specified index."""
-        # Handle negative indexing
-        if index < 0:
-            index = len(self) + index
-        return self._guides.column(index)
+    def __getitem__(self, index: Union[int, slice]) -> Union["Region", "ElementCollection"]:
+        """Get column at the specified index or slice."""
+        from natural_pdf.elements.element_collection import ElementCollection
+        if isinstance(index, slice):
+            # Handle slice notation - return multiple columns
+            columns = []
+            num_cols = len(self)
+            # Convert slice to range of indices
+            start, stop, step = index.indices(num_cols)
+            for i in range(start, stop, step):
+                columns.append(self._guides.column(i))
+            return ElementCollection(columns)
+        else:
+            # Handle negative indexing
+            if index < 0:
+                index = len(self) + index
+            return self._guides.column(index)
 class _RowAccessor:
@@ -4264,12 +4431,26 @@ class _RowAccessor:
         """Return number of rows (horizontal guides - 1)."""
         return max(0, len(self._guides.horizontal) - 1)
-    def __getitem__(self, index: int) -> "Region":
-        """Get row at the specified index."""
-        # Handle negative indexing
-        if index < 0:
-            index = len(self) + index
-        return self._guides.row(index)
+    def __getitem__(self, index: Union[int, slice]) -> Union["Region", "ElementCollection"]:
+        """Get row at the specified index or slice."""
+        from natural_pdf.elements.element_collection import ElementCollection
+        if isinstance(index, slice):
+            # Handle slice notation - return multiple rows
+            rows = []
+            num_rows = len(self)
+            # Convert slice to range of indices
+            start, stop, step = index.indices(num_rows)
+            for i in range(start, stop, step):
+                rows.append(self._guides.row(i))
+            return ElementCollection(rows)
+        else:
+            # Handle negative indexing
+            if index < 0:
+                index = len(self) + index
+            return self._guides.row(index)
 class _CellAccessor:
@@ -4278,33 +4459,82 @@ class _CellAccessor:
     def __init__(self, guides: "Guides"):
         self._guides = guides
-    def __getitem__(self, key) -> Union["Region", "_CellRowAccessor"]:
+    def __getitem__(self, key) -> Union["Region", "_CellRowAccessor", "ElementCollection"]:
         """
         Get cell(s) at the specified position.
         Supports:
-        - guides.cells[row, col] - tuple indexing
-        - guides.cells[row][col] - nested indexing
+        - guides.cells[row, col] - single cell
+        - guides.cells[row][col] - single cell (nested)
+        - guides.cells[row, :] - all cells in a row
+        - guides.cells[:, col] - all cells in a column
+        - guides.cells[:, :] - all cells
+        - guides.cells[row][:] - all cells in a row (nested)
         """
+        from natural_pdf.elements.element_collection import ElementCollection
         if isinstance(key, tuple) and len(key) == 2:
-            # Direct tuple access: guides.cells[row, col]
             row, col = key
-            # Handle negative indexing for both row and col
-            if row < 0:
-                row = len(self._guides.rows) + row
-            if col < 0:
-                col = len(self._guides.columns) + col
-            return self._guides.cell(row, col)
+            # Handle slices for row and/or column
+            if isinstance(row, slice) or isinstance(col, slice):
+                cells = []
+                num_rows = len(self._guides.rows)
+                num_cols = len(self._guides.columns)
+                # Convert slices to ranges
+                if isinstance(row, slice):
+                    row_indices = range(*row.indices(num_rows))
+                else:
+                    # Single row index
+                    if row < 0:
+                        row = num_rows + row
+                    row_indices = [row]
+                if isinstance(col, slice):
+                    col_indices = range(*col.indices(num_cols))
+                else:
+                    # Single column index
+                    if col < 0:
+                        col = num_cols + col
+                    col_indices = [col]
+                # Collect all cells in the specified ranges
+                for r in row_indices:
+                    for c in col_indices:
+                        cells.append(self._guides.cell(r, c))
+                return ElementCollection(cells)
+            else:
+                # Both are integers - single cell access
+                # Handle negative indexing for both row and col
+                if row < 0:
+                    row = len(self._guides.rows) + row
+                if col < 0:
+                    col = len(self._guides.columns) + col
+                return self._guides.cell(row, col)
+        elif isinstance(key, slice):
+            # First level slice: guides.cells[:] - return all rows as accessors
+            # For now, let's return all cells flattened
+            cells = []
+            num_rows = len(self._guides.rows)
+            row_indices = range(*key.indices(num_rows))
+            for r in row_indices:
+                for c in range(len(self._guides.columns)):
+                    cells.append(self._guides.cell(r, c))
+            return ElementCollection(cells)
         elif isinstance(key, int):
             # First level of nested access: guides.cells[row]
             # Handle negative indexing for row
             if key < 0:
                 key = len(self._guides.rows) + key
-            # Return a row accessor that allows [col] indexing
+            # Return a row accessor that allows [col] or [:] indexing
             return _CellRowAccessor(self._guides, key)
         else:
             raise TypeError(
-                f"Cell indices must be integers or tuple of two integers, got {type(key)}"
+                f"Cell indices must be integers, slices, or tuple of two integers/slices, got {type(key)}"
             )
@@ -4315,9 +4545,24 @@ class _CellRowAccessor:
         self._guides = guides
         self._row = row
-    def __getitem__(self, col: int) -> "Region":
-        """Get cell at [row][col]."""
-        # Handle negative indexing for column
-        if col < 0:
-            col = len(self._guides.columns) + col
-        return self._guides.cell(self._row, col)
+    def __getitem__(self, col: Union[int, slice]) -> Union["Region", "ElementCollection"]:
+        """Get cell at [row][col] or all cells in row with [row][:]."""
+        from natural_pdf.elements.element_collection import ElementCollection
+        if isinstance(col, slice):
+            # Handle slice notation - return all cells in this row
+            cells = []
+            num_cols = len(self._guides.columns)
+            # Convert slice to range of indices
+            start, stop, step = col.indices(num_cols)
+            for c in range(start, stop, step):
+                cells.append(self._guides.cell(self._row, c))
+            return ElementCollection(cells)
+        else:
+            # Handle single column index
+            # Handle negative indexing for column
+            if col < 0:
+                col = len(self._guides.columns) + col
+            return self._guides.cell(self._row, col)

natural_pdf/core/page.py CHANGED Viewed

@@ -815,11 +815,38 @@ class Page(
                             if debug:
                                 print(f"    ✗ Empty iterable returned from callable '{label}'")
                     elif region_result:
-                        logger.warning(
-                            f"Callable exclusion '{exclusion_label}' returned non-Region object: {type(region_result)}. Skipping."
-                        )
-                        if debug:
-                            print(f"    ✗ Callable returned non-Region/None: {type(region_result)}")
+                        # Check if it's a single Element that can be converted to a Region
+                        from natural_pdf.elements.base import Element
+                        if isinstance(region_result, Element) or (
+                            hasattr(region_result, "bbox") and hasattr(region_result, "expand")
+                        ):
+                            try:
+                                # Convert Element to Region using expand()
+                                expanded_region = region_result.expand()
+                                if isinstance(expanded_region, Region):
+                                    expanded_region.label = label
+                                    regions.append(expanded_region)
+                                    if debug:
+                                        print(
+                                            f"    ✓ Converted Element to Region from callable '{label}': {expanded_region}"
+                                        )
+                                else:
+                                    if debug:
+                                        print(
+                                            f"    ✗ Element.expand() did not return a Region: {type(expanded_region)}"
+                                        )
+                            except Exception as e:
+                                if debug:
+                                    print(f"    ✗ Failed to convert Element to Region: {e}")
+                        else:
+                            logger.warning(
+                                f"Callable exclusion '{exclusion_label}' returned non-Region object: {type(region_result)}. Skipping."
+                            )
+                            if debug:
+                                print(
+                                    f"    ✗ Callable returned non-Region/None: {type(region_result)}"
+                                )
                     else:
                         if debug:
                             print(
@@ -839,6 +866,27 @@ class Page(
                 if debug:
                     print(f"  - Added direct region '{label}': {exclusion_item}")
+            # Process direct Element objects - convert to Region
+            elif hasattr(exclusion_item, "bbox") and hasattr(exclusion_item, "expand"):
+                try:
+                    # Convert Element to Region using expand()
+                    expanded_region = exclusion_item.expand()
+                    if isinstance(expanded_region, Region):
+                        expanded_region.label = label
+                        regions.append(expanded_region)
+                        if debug:
+                            print(
+                                f"  - Converted direct Element to Region '{label}': {expanded_region}"
+                            )
+                    else:
+                        if debug:
+                            print(
+                                f"  - Element.expand() did not return a Region: {type(expanded_region)}"
+                            )
+                except Exception as e:
+                    if debug:
+                        print(f"  - Failed to convert Element to Region: {e}")
             # Process string selectors (from PDF-level exclusions)
             elif isinstance(exclusion_item, str):
                 selector_str = exclusion_item
@@ -1081,7 +1129,7 @@ class Page(
         )  # _apply_selector doesn't filter
         # Filter the results based on exclusions if requested
-        if apply_exclusions and self._exclusions and results_collection:
+        if apply_exclusions and results_collection:
             filtered_elements = self._filter_elements_by_exclusions(results_collection.elements)
             # Return the first element from the filtered list
             return filtered_elements[0] if filtered_elements else None
@@ -1176,7 +1224,7 @@ class Page(
         )  # _apply_selector doesn't filter
         # Filter the results based on exclusions if requested
-        if apply_exclusions and self._exclusions and results_collection:
+        if apply_exclusions and results_collection:
             filtered_elements = self._filter_elements_by_exclusions(results_collection.elements)
             return ElementCollection(filtered_elements)
         else:
@@ -1548,7 +1596,7 @@ class Page(
         all_elements = self._element_mgr.get_all_elements()
         # Apply exclusions if requested
-        if apply_exclusions and self._exclusions:
+        if apply_exclusions:
             return self._filter_elements_by_exclusions(
                 all_elements, debug_exclusions=debug_exclusions
             )

natural_pdf/elements/region.py CHANGED Viewed

@@ -1270,7 +1270,8 @@ class Region(
         # 3. Get Relevant Exclusions (overlapping this region)
         apply_exclusions_flag = kwargs.get("apply_exclusions", apply_exclusions)
         exclusion_regions = []
-        if apply_exclusions_flag and self._page._exclusions:
+        if apply_exclusions_flag:
+            # Always call _get_exclusion_regions to get both page and PDF level exclusions
             all_page_exclusions = self._page._get_exclusion_regions(
                 include_callable=True, debug=debug
             )
@@ -1281,10 +1282,11 @@ class Region(
             exclusion_regions = overlapping_exclusions
             if debug:
                 logger.debug(
-                    f"Region {self.bbox}: Applying {len(exclusion_regions)} overlapping exclusions."
+                    f"Region {self.bbox}: Found {len(all_page_exclusions)} total exclusions, "
+                    f"{len(exclusion_regions)} overlapping this region."
                 )
         elif debug:
-            logger.debug(f"Region {self.bbox}: Not applying exclusions.")
+            logger.debug(f"Region {self.bbox}: Not applying exclusions (apply_exclusions=False).")
         # 4. Spatially Filter Characters using Utility
         # Pass self as the target_region for precise polygon checks etc.

{natural_pdf-0.2.10.dist-info → natural_pdf-0.2.12.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: natural-pdf
-Version: 0.2.10
+Version: 0.2.12
 Summary: A more intuitive interface for working with PDFs
 Author-email: Jonathan Soma <jonathan.soma@gmail.com>
 License-Expression: MIT

natural-pdf 0.2.10__py3-none-any.whl → 0.2.12__py3-none-any.whl

natural-pdf 0.2.10py3-none-any.whl → 0.2.12py3-none-any.whl