PyPI - natural-pdf - Versions diffs - 0.2.5__py3-none-any.whl → 0.2.8__py3-none-any.whl - Mend

natural-pdf 0.2.5py3-none-any.whl → 0.2.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

natural_pdf/analyzers/guides.py +94 -42
natural_pdf/core/page.py +224 -62
natural_pdf/core/page_collection.py +261 -50
natural_pdf/core/page_groupby.py +20 -2
natural_pdf/core/pdf.py +17 -14
natural_pdf/core/render_spec.py +20 -5
natural_pdf/describe/base.py +1 -1
natural_pdf/describe/elements.py +1 -1
natural_pdf/elements/base.py +84 -8
natural_pdf/elements/element_collection.py +757 -20
natural_pdf/elements/region.py +181 -48
natural_pdf/flows/flow.py +3 -0
natural_pdf/selectors/parser.py +2 -2
natural_pdf/utils/color_utils.py +100 -0
{natural_pdf-0.2.5.dist-info → natural_pdf-0.2.8.dist-info}/METADATA +1 -1
{natural_pdf-0.2.5.dist-info → natural_pdf-0.2.8.dist-info}/RECORD +20 -19
{natural_pdf-0.2.5.dist-info → natural_pdf-0.2.8.dist-info}/WHEEL +0 -0
{natural_pdf-0.2.5.dist-info → natural_pdf-0.2.8.dist-info}/entry_points.txt +0 -0
{natural_pdf-0.2.5.dist-info → natural_pdf-0.2.8.dist-info}/licenses/LICENSE +0 -0
{natural_pdf-0.2.5.dist-info → natural_pdf-0.2.8.dist-info}/top_level.txt +0 -0

natural_pdf/core/page_collection.py CHANGED Viewed

@@ -460,6 +460,7 @@ class PageCollection(TextMixin, Generic[P], ApplyMixin, ShapeDetectionMixin, Vis
         end_elements=None,
         new_section_on_page_break=False,
         include_boundaries="both",
+        orientation="vertical",
     ) -> "ElementCollection[Region]":
         """
         Extract sections from a page collection based on start/end elements.
@@ -469,6 +470,7 @@ class PageCollection(TextMixin, Generic[P], ApplyMixin, ShapeDetectionMixin, Vis
             end_elements: Elements or selector string that mark the end of sections (optional)
             new_section_on_page_break: Whether to start a new section at page boundaries (default: False)
             include_boundaries: How to include boundary elements: 'start', 'end', 'both', or 'none' (default: 'both')
+            orientation: 'vertical' (default) or 'horizontal' - determines section direction
         Returns:
             List of Region objects representing the extracted sections
@@ -511,6 +513,9 @@ class PageCollection(TextMixin, Generic[P], ApplyMixin, ShapeDetectionMixin, Vis
                 next_page = self.pages[i + 1]
                 top_region = Region(next_page, (0, 0, next_page.width, 1))
                 top_region.is_page_boundary = True  # Mark it as a special boundary
+                # If start_elements is None, initialize it as an empty list
+                if start_elements is None:
+                    start_elements = []
                 start_elements.append(top_region)
         # Get all elements from all pages and sort them in document order
@@ -532,16 +537,23 @@ class PageCollection(TextMixin, Generic[P], ApplyMixin, ShapeDetectionMixin, Vis
             first_page = self.pages[0]
             first_start = Region(first_page, (0, 0, first_page.width, 1))
             first_start.is_implicit_start = True
+            # Don't mark this as created from any end element, so it can pair with any end
             start_elements.append(first_start)
             # For each end element (except the last), add an implicit start after it
-            sorted_end_elements = sorted(end_elements, key=lambda e: (e.page.index, e.top, e.x0))
+            # Sort by page, then top, then bottom (for elements with same top), then x0
+            sorted_end_elements = sorted(
+                end_elements, key=lambda e: (e.page.index, e.top, e.bottom, e.x0)
+            )
             for i, end_elem in enumerate(sorted_end_elements[:-1]):  # Exclude last end element
                 # Create implicit start element right after this end element
                 implicit_start = Region(
                     end_elem.page, (0, end_elem.bottom, end_elem.page.width, end_elem.bottom + 1)
                 )
                 implicit_start.is_implicit_start = True
+                # Track which end element this implicit start was created from
+                # to avoid pairing them together (which would create zero height)
+                implicit_start.created_from_end = end_elem
                 start_elements.append(implicit_start)
         # Mark section boundaries
@@ -606,17 +618,20 @@ class PageCollection(TextMixin, Generic[P], ApplyMixin, ShapeDetectionMixin, Vis
         # Sort boundaries by page index, then by actual document position
         def _sort_key(boundary):
-            """Sort boundaries by (page_idx, vertical_top, priority)."""
+            """Sort boundaries by (page_idx, position, priority)."""
             page_idx = boundary["page_idx"]
             element = boundary["element"]
-            # Vertical position on the page
-            y_pos = getattr(element, "top", 0.0)
+            # Position on the page based on orientation
+            if orientation == "vertical":
+                pos = getattr(element, "top", 0.0)
+            else:  # horizontal
+                pos = getattr(element, "x0", 0.0)
             # Ensure starts come before ends at the same coordinate
             priority = 0 if boundary["type"] == "start" else 1
-            return (page_idx, y_pos, priority)
+            return (page_idx, pos, priority)
         section_boundaries.sort(key=_sort_key)
@@ -624,10 +639,17 @@ class PageCollection(TextMixin, Generic[P], ApplyMixin, ShapeDetectionMixin, Vis
         sections = []
         # --- Helper: build a FlowRegion spanning multiple pages ---
-        def _build_flow_region(start_el, end_el):
-            """Return a FlowRegion that covers from *start_el* to *end_el* (inclusive).
-            If *end_el* is None, the region continues to the bottom of the last
-            page in this PageCollection."""
+        def _build_flow_region(start_el, end_el, include_boundaries="both", orientation="vertical"):
+            """Return a FlowRegion that covers from *start_el* to *end_el*.
+            If *end_el* is None, the region continues to the bottom/right of the last
+            page in this PageCollection.
+            Args:
+                start_el: Start element
+                end_el: End element
+                include_boundaries: How to include boundary elements: 'start', 'end', 'both', or 'none'
+                orientation: 'vertical' or 'horizontal' - determines section direction
+            """
             # Local imports to avoid top-level cycles
             from natural_pdf.elements.region import Region
             from natural_pdf.flows.element import FlowElement
@@ -639,12 +661,24 @@ class PageCollection(TextMixin, Generic[P], ApplyMixin, ShapeDetectionMixin, Vis
             parts: list[Region] = []
-            # Use the actual top of the start element (for implicit starts this is
-            # the bottom of the previous end element) instead of forcing to 0.
-            start_top = start_el.top
-            # Slice of first page beginning at *start_top*
-            parts.append(Region(start_pg, (0, start_top, start_pg.width, start_pg.height)))
+            if orientation == "vertical":
+                # Determine the start_top based on include_boundaries
+                start_top = start_el.top
+                if include_boundaries == "none" or include_boundaries == "end":
+                    # Exclude start boundary
+                    start_top = start_el.bottom if hasattr(start_el, "bottom") else start_el.top
+                # Slice of first page beginning at *start_top*
+                parts.append(Region(start_pg, (0, start_top, start_pg.width, start_pg.height)))
+            else:  # horizontal
+                # Determine the start_left based on include_boundaries
+                start_left = start_el.x0
+                if include_boundaries == "none" or include_boundaries == "end":
+                    # Exclude start boundary
+                    start_left = start_el.x1 if hasattr(start_el, "x1") else start_el.x0
+                # Slice of first page beginning at *start_left*
+                parts.append(Region(start_pg, (start_left, 0, start_pg.width, start_pg.height)))
             # Full middle pages
             for pg_idx in range(start_pg.index + 1, end_pg.index):
@@ -653,10 +687,32 @@ class PageCollection(TextMixin, Generic[P], ApplyMixin, ShapeDetectionMixin, Vis
             # Slice of last page (if distinct)
             if end_pg is not start_pg:
-                bottom = end_el.bottom if end_el is not None else end_pg.height
-                parts.append(Region(end_pg, (0, 0, end_pg.width, bottom)))
+                if orientation == "vertical":
+                    # Determine the bottom based on include_boundaries
+                    if end_el is not None:
+                        if include_boundaries == "none" or include_boundaries == "start":
+                            # Exclude end boundary
+                            bottom = end_el.top if hasattr(end_el, "top") else end_el.bottom
+                        else:
+                            # Include end boundary
+                            bottom = end_el.bottom
+                    else:
+                        bottom = end_pg.height
+                    parts.append(Region(end_pg, (0, 0, end_pg.width, bottom)))
+                else:  # horizontal
+                    # Determine the right based on include_boundaries
+                    if end_el is not None:
+                        if include_boundaries == "none" or include_boundaries == "start":
+                            # Exclude end boundary
+                            right = end_el.x0 if hasattr(end_el, "x0") else end_el.x1
+                        else:
+                            # Include end boundary
+                            right = end_el.x1
+                    else:
+                        right = end_pg.width
+                    parts.append(Region(end_pg, (0, 0, right, end_pg.height)))
-            flow = Flow(segments=parts, arrangement="vertical")
+            flow = Flow(segments=parts, arrangement=orientation)
             src_fe = FlowElement(physical_object=start_el, flow=flow)
             return FlowRegion(
                 flow=flow,
@@ -680,26 +736,103 @@ class PageCollection(TextMixin, Generic[P], ApplyMixin, ShapeDetectionMixin, Vis
                 start_element = current_start["element"]
                 end_element = boundary["element"]
+                # Check if this is an implicit start created from this same end element
+                # This would create a zero-height section, so skip this pairing
+                if (
+                    hasattr(start_element, "is_implicit_start")
+                    and hasattr(start_element, "created_from_end")
+                    and start_element.created_from_end is end_element
+                ):
+                    # Skip this pairing - keep current_start for next end element
+                    continue
                 # If both elements are on the same page, use the page's get_section_between
                 if start_element.page == end_element.page:
                     # For implicit start elements, create a region from the top of the page
                     if hasattr(start_element, "is_implicit_start"):
                         from natural_pdf.elements.region import Region
-                        section = Region(
-                            start_element.page,
-                            (0, start_element.top, start_element.page.width, end_element.bottom),
-                        )
+                        # Adjust boundaries based on include_boundaries parameter and orientation
+                        if orientation == "vertical":
+                            top = start_element.top
+                            bottom = end_element.bottom
+                            if include_boundaries == "none":
+                                # Exclude both boundaries - move past them
+                                top = (
+                                    start_element.bottom
+                                    if hasattr(start_element, "bottom")
+                                    else start_element.top
+                                )
+                                bottom = (
+                                    end_element.top
+                                    if hasattr(end_element, "top")
+                                    else end_element.bottom
+                                )
+                            elif include_boundaries == "start":
+                                # Include start, exclude end
+                                bottom = (
+                                    end_element.top
+                                    if hasattr(end_element, "top")
+                                    else end_element.bottom
+                                )
+                            elif include_boundaries == "end":
+                                # Exclude start, include end
+                                top = (
+                                    start_element.bottom
+                                    if hasattr(start_element, "bottom")
+                                    else start_element.top
+                                )
+                            # "both" is default - no adjustment needed
+                            section = Region(
+                                start_element.page,
+                                (0, top, start_element.page.width, bottom),
+                            )
+                        else:  # horizontal
+                            left = start_element.x0
+                            right = end_element.x1
+                            if include_boundaries == "none":
+                                # Exclude both boundaries - move past them
+                                left = (
+                                    start_element.x1
+                                    if hasattr(start_element, "x1")
+                                    else start_element.x0
+                                )
+                                right = (
+                                    end_element.x0 if hasattr(end_element, "x0") else end_element.x1
+                                )
+                            elif include_boundaries == "start":
+                                # Include start, exclude end
+                                right = (
+                                    end_element.x0 if hasattr(end_element, "x0") else end_element.x1
+                                )
+                            elif include_boundaries == "end":
+                                # Exclude start, include end
+                                left = (
+                                    start_element.x1
+                                    if hasattr(start_element, "x1")
+                                    else start_element.x0
+                                )
+                            # "both" is default - no adjustment needed
+                            section = Region(
+                                start_element.page,
+                                (left, 0, right, start_element.page.height),
+                            )
                         section.start_element = start_element
                         section.boundary_element_found = end_element
                     else:
                         section = start_element.page.get_section_between(
-                            start_element, end_element, include_boundaries
+                            start_element, end_element, include_boundaries, orientation
                         )
                     sections.append(section)
                 else:
                     # Create FlowRegion spanning pages
-                    flow_region = _build_flow_region(start_element, end_element)
+                    flow_region = _build_flow_region(
+                        start_element, end_element, include_boundaries, orientation
+                    )
                     sections.append(flow_region)
                 current_start = None
@@ -709,35 +842,84 @@ class PageCollection(TextMixin, Generic[P], ApplyMixin, ShapeDetectionMixin, Vis
                 # Create a section from current_start to just before this boundary
                 start_element = current_start["element"]
-                # Find the last element before this boundary on the same page
+                # Create section from current start to just before this new start
                 if start_element.page == boundary["element"].page:
-                    # Find elements on this page
-                    page_elements = [e for e in all_elements if e.page == start_element.page]
-                    # Sort by position
-                    page_elements.sort(key=lambda e: (e.top, e.x0))
-                    # Find the last element before the boundary
-                    end_idx = (
-                        page_elements.index(boundary["element"]) - 1
-                        if boundary["element"] in page_elements
-                        else -1
-                    )
-                    end_element = page_elements[end_idx] if end_idx >= 0 else None
+                    from natural_pdf.elements.region import Region
-                    # Create the section
-                    section = start_element.page.get_section_between(
-                        start_element, end_element, include_boundaries
-                    )
-                    sections.append(section)
+                    next_start = boundary["element"]
+                    # Create section based on orientation
+                    if orientation == "vertical":
+                        # Determine vertical bounds
+                        if include_boundaries in ["start", "both"]:
+                            top = start_element.top
+                        else:
+                            top = start_element.bottom
+                        # The section ends just before the next start
+                        bottom = next_start.top
+                        # Create the section with full page width
+                        if top < bottom:
+                            section = Region(
+                                start_element.page, (0, top, start_element.page.width, bottom)
+                            )
+                            section.start_element = start_element
+                            sections.append(section)
+                    else:  # horizontal
+                        # Determine horizontal bounds
+                        if include_boundaries in ["start", "both"]:
+                            left = start_element.x0
+                        else:
+                            left = start_element.x1
+                        # The section ends just before the next start
+                        right = next_start.x0
+                        # Create the section with full page height
+                        if left < right:
+                            section = Region(
+                                start_element.page, (left, 0, right, start_element.page.height)
+                            )
+                            section.start_element = start_element
+                            sections.append(section)
                 else:
                     # Cross-page section - create from current_start to the end of its page
                     from natural_pdf.elements.region import Region
                     start_page = start_element.page
-                    # Handle implicit start elements
-                    start_top = start_element.top
-                    region = Region(start_page, (0, start_top, start_page.width, start_page.height))
+                    # Handle implicit start elements and respect include_boundaries
+                    if orientation == "vertical":
+                        if include_boundaries in ["none", "end"]:
+                            # Exclude start boundary
+                            start_top = (
+                                start_element.bottom
+                                if hasattr(start_element, "bottom")
+                                else start_element.top
+                            )
+                        else:
+                            # Include start boundary
+                            start_top = start_element.top
+                        region = Region(
+                            start_page, (0, start_top, start_page.width, start_page.height)
+                        )
+                    else:  # horizontal
+                        if include_boundaries in ["none", "end"]:
+                            # Exclude start boundary
+                            start_left = (
+                                start_element.x1
+                                if hasattr(start_element, "x1")
+                                else start_element.x0
+                            )
+                        else:
+                            # Include start boundary
+                            start_left = start_element.x0
+                        region = Region(
+                            start_page, (start_left, 0, start_page.width, start_page.height)
+                        )
                     region.start_element = start_element
                     sections.append(region)
@@ -753,19 +935,48 @@ class PageCollection(TextMixin, Generic[P], ApplyMixin, ShapeDetectionMixin, Vis
                 # on the last page of the collection
                 last_page = self.pages[-1]
                 last_page_elements = [e for e in all_elements if e.page == last_page]
-                last_page_elements.sort(key=lambda e: (e.top, e.x0))
+                if orientation == "vertical":
+                    last_page_elements.sort(key=lambda e: (e.top, e.x0))
+                else:  # horizontal
+                    last_page_elements.sort(key=lambda e: (e.x0, e.top))
                 end_element = last_page_elements[-1] if last_page_elements else None
                 # Create FlowRegion spanning multiple pages using helper
-                flow_region = _build_flow_region(start_element, end_element)
+                flow_region = _build_flow_region(
+                    start_element, end_element, include_boundaries, orientation
+                )
                 sections.append(flow_region)
             else:
                 # With start_elements only, create a section to the end of the current page
                 from natural_pdf.elements.region import Region
-                # Handle implicit start elements
-                start_top = start_element.top
-                region = Region(start_page, (0, start_top, start_page.width, start_page.height))
+                # Handle implicit start elements and respect include_boundaries
+                if orientation == "vertical":
+                    if include_boundaries in ["none", "end"]:
+                        # Exclude start boundary
+                        start_top = (
+                            start_element.bottom
+                            if hasattr(start_element, "bottom")
+                            else start_element.top
+                        )
+                    else:
+                        # Include start boundary
+                        start_top = start_element.top
+                    region = Region(start_page, (0, start_top, start_page.width, start_page.height))
+                else:  # horizontal
+                    if include_boundaries in ["none", "end"]:
+                        # Exclude start boundary
+                        start_left = (
+                            start_element.x1 if hasattr(start_element, "x1") else start_element.x0
+                        )
+                    else:
+                        # Include start boundary
+                        start_left = start_element.x0
+                    region = Region(
+                        start_page, (start_left, 0, start_page.width, start_page.height)
+                    )
                 region.start_element = start_element
                 sections.append(region)

natural_pdf/core/page_groupby.py CHANGED Viewed

@@ -7,6 +7,8 @@ from typing import TYPE_CHECKING, Any, Callable, Dict, Iterator, List, Optional,
 from tqdm.auto import tqdm
+from natural_pdf.utils.color_utils import format_color_value
 if TYPE_CHECKING:
     from natural_pdf.core.page import Page
     from natural_pdf.core.page_collection import PageCollection
@@ -201,7 +203,15 @@ class PageGroupBy:
         """
         groups = self._compute_groups()
         for key, pages in groups.items():
-            print(f"\n--- Group: {key} ({len(pages)} pages) ---")
+            # Format the key for display, converting colors to hex if needed
+            if isinstance(self.by, str):
+                # If grouped by a string selector, check if it's a color attribute
+                formatted_key = format_color_value(key, attr_name=self.by)
+            else:
+                # For callable grouping, try to format as color
+                formatted_key = format_color_value(key)
+            print(f"\n--- Group: {formatted_key} ({len(pages)} pages) ---")
             pages.show(**kwargs)
     def __len__(self) -> int:
@@ -220,7 +230,15 @@ class PageGroupBy:
         print("-" * 40)
         for i, (key, pages) in enumerate(groups.items()):
-            key_display = f"'{key}'" if key is not None else "None"
+            if key is None:
+                key_display = "None"
+            else:
+                # Format the key for display, converting colors to hex if needed
+                if isinstance(self.by, str):
+                    formatted_key = format_color_value(key, attr_name=self.by)
+                else:
+                    formatted_key = format_color_value(key)
+                key_display = f"'{formatted_key}'"
             print(f"[{i}] {key_display}: {len(pages)} pages")
     def __repr__(self) -> str:

natural_pdf/core/pdf.py CHANGED Viewed

@@ -252,6 +252,16 @@ class _LazyPageList(Sequence):
                         logger.warning(f"Failed to apply region to page {cached.number}: {e}")
             self._cache[index] = cached
+            # Also cache in the parent PDF's main page list if this is a slice
+            if (
+                hasattr(self._parent_pdf, "_pages")
+                and hasattr(self._parent_pdf._pages, "_cache")
+                and actual_page_index < len(self._parent_pdf._pages._cache)
+                and self._parent_pdf._pages._cache[actual_page_index] is None
+            ):
+                self._parent_pdf._pages._cache[actual_page_index] = cached
         return cached
     # Sequence protocol ---------------------------------------------------
@@ -720,26 +730,16 @@ class PDF(
             # Store for bookkeeping and lazy application
             self._exclusions.append((exclusion_func, label))
-            # Apply only to already-created (cached) pages to avoid forcing page creation
-            for i in range(len(self._pages)):
-                if self._pages._cache[i] is not None:  # Only apply to existing pages
-                    try:
-                        self._pages._cache[i].add_exclusion(exclusion_func, label=label)
-                    except Exception as e:
-                        logger.warning(f"Failed to apply exclusion to existing page {i}: {e}")
+            # Don't modify already-cached pages - they will get PDF-level exclusions
+            # dynamically through _get_exclusion_regions()
             return self
         # Fallback to original callable / Region behaviour ------------------
         exclusion_data = (exclusion_func, label)
         self._exclusions.append(exclusion_data)
-        # Apply only to already-created (cached) pages to avoid forcing page creation
-        for i in range(len(self._pages)):
-            if self._pages._cache[i] is not None:  # Only apply to existing pages
-                try:
-                    self._pages._cache[i].add_exclusion(exclusion_func, label=label)
-                except Exception as e:
-                    logger.warning(f"Failed to apply exclusion to existing page {i}: {e}")
+        # Don't modify already-cached pages - they will get PDF-level exclusions
+        # dynamically through _get_exclusion_regions()
         return self
@@ -1280,6 +1280,7 @@ class PDF(
         end_elements=None,
         new_section_on_page_break=False,
         include_boundaries="both",
+        orientation="vertical",
     ) -> "ElementCollection":
         """
         Extract sections from the entire PDF based on start/end elements.
@@ -1292,6 +1293,7 @@ class PDF(
             end_elements: Elements or selector string that mark the end of sections (optional)
             new_section_on_page_break: Whether to start a new section at page boundaries (default: False)
             include_boundaries: How to include boundary elements: 'start', 'end', 'both', or 'none' (default: 'both')
+            orientation: 'vertical' (default) or 'horizontal' - determines section direction
         Returns:
             ElementCollection of Region objects representing the extracted sections
@@ -1328,6 +1330,7 @@ class PDF(
             end_elements=end_elements,
             new_section_on_page_break=new_section_on_page_break,
             include_boundaries=include_boundaries,
+            orientation=orientation,
         )
     def save_searchable(self, output_path: Union[str, "Path"], dpi: int = 300, **kwargs):

natural_pdf/core/render_spec.py CHANGED Viewed

@@ -196,7 +196,7 @@ class Visualizable:
         columns: Optional[int] = 6,  # For grid layout, defaults to 6 columns
         limit: Optional[int] = 30,  # Max pages to show (default 30)
         # Cropping options
-        crop: Union[bool, Literal["content"]] = False,
+        crop: Union[bool, int, str, "Region", Literal["wide"]] = False,
         crop_bbox: Optional[Tuple[float, float, float, float]] = None,
         **kwargs,
     ) -> Optional["PIL_Image"]:
@@ -219,7 +219,12 @@ class Visualizable:
             gap: Pixels between stacked images
             columns: Number of columns for grid layout (defaults to 6)
             limit: Maximum number of pages to display (default 30, None for all)
-            crop: Whether to crop (True, False, or 'content' for bbox of elements)
+            crop: Cropping mode:
+                - False: No cropping (default)
+                - True: Tight crop to element bounds
+                - int: Padding in pixels around element
+                - 'wide': Full page width, cropped vertically to element
+                - Region: Crop to the bounds of another region
             crop_bbox: Explicit crop bounds
             **kwargs: Additional parameters passed to rendering
@@ -230,6 +235,11 @@ class Visualizable:
         if isinstance(annotate, str):
             annotate = [annotate]
+        # Handle 'cols' as an alias for 'columns' for backward compatibility
+        if "cols" in kwargs and columns == 6:  # Only use cols if columns wasn't explicitly set
+            columns = kwargs.pop("cols")
+            logger.info(f"Using 'cols' parameter as alias for 'columns': {columns}")
         # Pass limit as max_pages to _get_render_specs
         if limit is not None:
             kwargs["max_pages"] = limit
@@ -283,7 +293,7 @@ class Visualizable:
         gap: int = 5,
         columns: Optional[int] = None,
         # Cropping options
-        crop: Union[bool, Literal["content"]] = False,
+        crop: Union[bool, int, str, "Region", Literal["wide"]] = False,
         crop_bbox: Optional[Tuple[float, float, float, float]] = None,
         **kwargs,
     ) -> Optional["PIL_Image"]:
@@ -299,13 +309,18 @@ class Visualizable:
             stack_direction: Direction for stack layout
             gap: Pixels between stacked images
             columns: Number of columns for grid layout
-            crop: Whether to crop
+            crop: Cropping mode (False, True, int for padding, 'wide', or Region)
             crop_bbox: Explicit crop bounds
             **kwargs: Additional parameters passed to rendering
         Returns:
             PIL Image object or None if nothing to render
         """
+        # Handle 'cols' as an alias for 'columns' for backward compatibility
+        if "cols" in kwargs and columns is None:  # Only use cols if columns wasn't explicitly set
+            columns = kwargs.pop("cols")
+            logger.info(f"Using 'cols' parameter as alias for 'columns': {columns}")
         specs = self._get_render_specs(mode="render", crop=crop, crop_bbox=crop_bbox, **kwargs)
         if not specs:
@@ -353,7 +368,7 @@ class Visualizable:
             stack_direction: Direction for stack layout
             gap: Pixels between stacked images
             columns: Number of columns for grid layout
-            crop: Whether to crop
+            crop: Cropping mode (False, True, int for padding, 'wide', or Region)
             crop_bbox: Explicit crop bounds
             format: Image format (inferred from path if not specified)
             **kwargs: Additional parameters passed to rendering

natural_pdf/describe/base.py CHANGED Viewed

@@ -344,7 +344,7 @@ def _extract_element_value(element: "Element", column: str) -> Any:
         elif column == "highlight":
             # If element is highlighted, return its colour; otherwise blank
-            if getattr(element, "highlight", False):
+            if getattr(element, "is_highlighted", False):
                 col_val = getattr(element, "highlight_color", None)
                 if col_val is None:
                     return "True"  # fallback if colour missing

natural_pdf/describe/elements.py CHANGED Viewed

@@ -306,7 +306,7 @@ def _analyze_typography(elements: List["Element"]) -> Dict[str, Any]:
             styles["strikeout"] += 1
         if getattr(element, "underline", False):
             styles["underline"] += 1
-        if getattr(element, "highlight", False):
+        if getattr(element, "is_highlighted", False):
             styles["highlight"] += 1
         # Color - use TextElement's color property

natural-pdf 0.2.5__py3-none-any.whl → 0.2.8__py3-none-any.whl

natural-pdf 0.2.5py3-none-any.whl → 0.2.8py3-none-any.whl