PyPI - natural-pdf - Versions diffs - 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl - Mend

natural-pdf 0.1.0py3-none-any.whl → 0.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

natural_pdf/__init__.py +1 -1
natural_pdf/analyzers/layout/layout_analyzer.py +133 -44
natural_pdf/analyzers/layout/layout_manager.py +9 -6
natural_pdf/analyzers/layout/layout_options.py +2 -4
natural_pdf/analyzers/layout/surya.py +199 -91
natural_pdf/core/highlighting_service.py +48 -17
natural_pdf/core/page.py +92 -27
natural_pdf/core/pdf.py +11 -0
natural_pdf/elements/base.py +99 -14
natural_pdf/elements/collections.py +56 -0
natural_pdf/elements/region.py +56 -131
natural_pdf/qa/document_qa.py +4 -3
natural_pdf/selectors/parser.py +215 -1
natural_pdf/utils/visualization.py +2 -2
natural_pdf-0.1.2.dist-info/METADATA +124 -0
{natural_pdf-0.1.0.dist-info → natural_pdf-0.1.2.dist-info}/RECORD +19 -19
natural_pdf-0.1.0.dist-info/METADATA +0 -295
{natural_pdf-0.1.0.dist-info → natural_pdf-0.1.2.dist-info}/WHEEL +0 -0
{natural_pdf-0.1.0.dist-info → natural_pdf-0.1.2.dist-info}/licenses/LICENSE +0 -0
{natural_pdf-0.1.0.dist-info → natural_pdf-0.1.2.dist-info}/top_level.txt +0 -0

natural_pdf/core/highlighting_service.py CHANGED Viewed

@@ -383,7 +383,7 @@ class HighlightingService:
     def add(
         self,
         page_index: int,
-        bbox: Tuple[float, float, float, float],
+        bbox: Union[Tuple[float, float, float, float], Any], # Relax input type hint
         color: Optional[Union[Tuple, str]] = None,
         label: Optional[str] = None,
         use_color_cycling: bool = False,
@@ -392,9 +392,32 @@ class HighlightingService:
         existing: str = 'append'
     ):
         """Adds a rectangular highlight."""
+        processed_bbox: Tuple[float, float, float, float]
+        # Check if bbox is an object with expected attributes (likely a Region)
+        # Assuming Region object has x0, top, x1, bottom attributes based on error context
+        if (hasattr(bbox, 'x0') and hasattr(bbox, 'top') and
+            hasattr(bbox, 'x1') and hasattr(bbox, 'bottom')):
+             try:
+                # Ensure attributes are numeric before creating tuple
+                processed_bbox = (float(bbox.x0), float(bbox.top), float(bbox.x1), float(bbox.bottom))
+             except (ValueError, TypeError):
+                 logger.error(f"Invalid attribute types in bbox object for page {page_index}: {bbox}. Expected numeric values.")
+                 return
+        elif isinstance(bbox, (list, tuple)) and len(bbox) == 4:
+             try:
+                 # Ensure elements are numeric and convert to tuple
+                 processed_bbox = tuple(float(v) for v in bbox)
+             except (ValueError, TypeError):
+                 logger.error(f"Invalid values in bbox sequence for page {page_index}: {bbox}. Expected numeric values.")
+                 return
+        else:
+            logger.error(f"Invalid bbox type or structure provided for page {page_index}: {type(bbox)} - {bbox}. Expected tuple/list of 4 numbers or Region-like object.")
+            return # Don't proceed if bbox is invalid
         self._add_internal(
             page_index=page_index,
-            bbox=bbox,
+            bbox=processed_bbox, # Use the processed tuple
             polygon=None,
             color_input=color,
             label=label,
@@ -526,6 +549,7 @@ class HighlightingService:
     ) -> Optional[Image.Image]:
         """
         Renders a specific page with its highlights.
+        Legend is now generated based only on highlights present on this page.
         Args:
             page_index: The 0-based index of the page to render.
@@ -545,23 +569,19 @@ class HighlightingService:
             return None
         page = self._pdf[page_index]
-        highlights_on_page = self.get_highlights_for_page(page_index)
+        highlights_on_page = self.get_highlights_for_page(page_index) # This list will be empty if clear_page was called
         # --- Get Base Image ---
         try:
             render_resolution = resolution if resolution is not None else scale * 72
-            # Use the underlying pdfplumber page object for base rendering
             img_object = page._page.to_image(resolution=render_resolution, **kwargs)
-            # Access the PIL image directly
-            base_image = img_object.annotated # .annotated usually holds the PIL Image
+            base_image = img_object.annotated
             if not isinstance(base_image, Image.Image):
-                 # Fallback for different pdfplumber versions/outputs
                  png_data = img_object._repr_png_()
                  if png_data:
                       base_image = Image.open(io.BytesIO(png_data)).convert('RGB')
                  else:
                       raise ValueError("Could not extract base PIL image from pdfplumber.")
-            # Convert to RGBA for compositing
             base_image = base_image.convert('RGBA')
             logger.debug(f"Base image for page {page_index} rendered with resolution {render_resolution}.")
         except Exception as e:
@@ -569,6 +589,7 @@ class HighlightingService:
             return None
         # --- Render Highlights ---
+        rendered_image: Image.Image
         if highlights_on_page:
             renderer = HighlightRenderer(
                 page=page,
@@ -579,21 +600,31 @@ class HighlightingService:
             )
             rendered_image = renderer.render()
         else:
-             # If no highlights, still need to potentially render OCR if requested
              if render_ocr:
+                  # Still render OCR even if no highlights
                   renderer = HighlightRenderer(page, base_image, [], scale, True)
-                  rendered_image = renderer.render() # Will only call _render_ocr_text
+                  rendered_image = renderer.render()
              else:
                   rendered_image = base_image # No highlights, no OCR requested
-        # --- Add Legend ---
+        # --- Add Legend (Based ONLY on this page's highlights) ---
         if labels:
-            label_colors = self.get_labels_and_colors()
-            if label_colors:
-                legend = create_legend(label_colors)
-                rendered_image = merge_images_with_legend(rendered_image, legend, legend_position)
-                logger.debug(f"Added legend with {len(label_colors)} labels to page {page_index}.")
+            # CHANGE: Create label_colors map only from highlights_on_page
+            labels_colors_on_page: Dict[str, Tuple[int, int, int, int]] = {}
+            for hl in highlights_on_page:
+                if hl.label and hl.label not in labels_colors_on_page:
+                    labels_colors_on_page[hl.label] = hl.color
+            if labels_colors_on_page: # Only add legend if there are labels on this page
+                legend = create_legend(labels_colors_on_page)
+                if legend: # Ensure create_legend didn't return None
+                     rendered_image = merge_images_with_legend(rendered_image, legend, legend_position)
+                     logger.debug(f"Added legend with {len(labels_colors_on_page)} labels for page {page_index}.")
+                else:
+                     logger.debug(f"Legend creation returned None for page {page_index}.")
+            else:
+                 logger.debug(f"No labels found on page {page_index}, skipping legend.")
         return rendered_image
     def render_preview(

natural_pdf/core/page.py CHANGED Viewed

@@ -9,6 +9,7 @@ import io
 import json
 from natural_pdf.elements.collections import ElementCollection
+from natural_pdf.elements.region import Region
 if TYPE_CHECKING:
     import pdfplumber
@@ -17,7 +18,6 @@ if TYPE_CHECKING:
     from natural_pdf.core.highlighting_service import HighlightingService
     from natural_pdf.elements.base import Element
-from natural_pdf.elements.region import Region
 from natural_pdf.elements.text import TextElement
 from natural_pdf.analyzers.layout.layout_manager import LayoutManager
 from natural_pdf.analyzers.layout.layout_options import LayoutOptions
@@ -120,18 +120,50 @@ class Page:
               raise AttributeError("Parent PDF object does not have a 'highlighter' attribute.")
          return self._parent.highlighter
-    def add_exclusion(self, exclusion_func_or_region: Union[Callable[['Page'], Region], Region]) -> 'Page':
+    def clear_exclusions(self) -> 'Page':
+        """
+        Clear all exclusions from the page.
+        """
+        self._exclusions = []
+        return self
+    def add_exclusion(self, exclusion_func_or_region: Union[Callable[['Page'], Region], Region, Any]) -> 'Page':
         """
         Add an exclusion to the page. Text from these regions will be excluded from extraction.
+        Ensures non-callable items are stored as Region objects if possible.
         Args:
-            exclusion_func_or_region: Either a Region object or a function that takes a Page
-                                      and returns a Region to exclude
+            exclusion_func_or_region: Either a callable function returning a Region,
+                                      a Region object, or another object with a valid .bbox attribute.
         Returns:
             Self for method chaining
-        """
-        self._exclusions.append(exclusion_func_or_region)
+        Raises:
+            TypeError: If a non-callable, non-Region object without a valid bbox is provided.
+        """
+        if callable(exclusion_func_or_region):
+            # Store callable functions directly
+            self._exclusions.append(exclusion_func_or_region)
+            logger.debug(f"Page {self.index}: Added callable exclusion: {exclusion_func_or_region}")
+        elif isinstance(exclusion_func_or_region, Region):
+            # Store Region objects directly
+            self._exclusions.append(exclusion_func_or_region)
+            logger.debug(f"Page {self.index}: Added Region exclusion: {exclusion_func_or_region}")
+        elif hasattr(exclusion_func_or_region, 'bbox') and isinstance(getattr(exclusion_func_or_region, 'bbox', None), (tuple, list)) and len(exclusion_func_or_region.bbox) == 4:
+            # Convert objects with a valid bbox to a Region before storing
+            try:
+                bbox_coords = tuple(float(v) for v in exclusion_func_or_region.bbox)
+                region_to_add = Region(self, bbox_coords)
+                self._exclusions.append(region_to_add)
+                logger.debug(f"Page {self.index}: Added exclusion converted to Region from {type(exclusion_func_or_region)}: {region_to_add}")
+            except (ValueError, TypeError, Exception) as e:
+                # Raise an error if conversion fails
+                raise TypeError(f"Failed to convert exclusion object {exclusion_func_or_region} with bbox {getattr(exclusion_func_or_region, 'bbox', 'N/A')} to Region: {e}") from e
+        else:
+            # Reject invalid types
+            raise TypeError(f"Invalid exclusion type: {type(exclusion_func_or_region)}. Must be callable, Region, or have a valid .bbox attribute.")
         return self
     def add_region(self, region: Region, name: Optional[str] = None) -> 'Page':
@@ -190,6 +222,7 @@ class Page:
     def _get_exclusion_regions(self, include_callable=True, debug=False) -> List[Region]:
         """
         Get all exclusion regions for this page.
+        Assumes self._exclusions contains only callables or Region objects.
         Args:
             include_callable: Whether to evaluate callable exclusion functions
@@ -207,15 +240,14 @@ class Page:
         for i, exclusion in enumerate(self._exclusions):
             # Get exclusion label if it's a tuple from PDF level
             exclusion_label = f"exclusion {i}"
-            original_exclusion = exclusion
-            # Check if it's a tuple from PDF.add_exclusion
+            original_exclusion = exclusion # Keep track for debugging
+            # Check if it's a tuple from PDF.add_exclusion (should still be handled if PDF adds labels)
             if isinstance(exclusion, tuple) and len(exclusion) == 2 and callable(exclusion[0]):
-                # This is likely from PDF.add_exclusion with (func, label)
                 exclusion_func, label = exclusion
                 if label:
                     exclusion_label = label
-                exclusion = exclusion_func
+                exclusion = exclusion_func # Use the function part
             # Process callable exclusion functions
             if callable(exclusion) and include_callable:
@@ -224,40 +256,45 @@ class Page:
                     if debug:
                         print(f"  - Evaluating callable {exclusion_label}...")
-                    # Create a temporary copy of exclusions to avoid recursion
-                    original_exclusions = self._exclusions
-                    self._exclusions = []  # Temporarily clear exclusions
+                    # Temporarily clear exclusions to avoid potential recursion if the callable uses exclusions itself
+                    # This might be overly cautious depending on use case, but safer.
+                    temp_original_exclusions = self._exclusions
+                    self._exclusions = []
-                    # Call the function
-                    region = exclusion(self)
+                    # Call the function - Expects it to return a Region or None
+                    region_result = exclusion(self)
                     # Restore exclusions
-                    self._exclusions = original_exclusions
+                    self._exclusions = temp_original_exclusions
-                    if region:
-                        regions.append(region)
+                    if isinstance(region_result, Region):
+                        regions.append(region_result)
                         if debug:
-                            print(f"    ✓ Added region: {region}")
+                            print(f"    ✓ Added region from callable: {region_result}")
+                    elif region_result:
+                         # Log warning if callable returned something other than Region/None
+                         logger.warning(f"Callable exclusion {exclusion_label} returned non-Region object: {type(region_result)}. Skipping.")
+                         if debug:
+                             print(f"    ✗ Callable returned non-Region/None: {type(region_result)}")
                     else:
                         if debug:
-                            print(f"    ✗ Function returned None, no region added")
+                            print(f"    ✗ Callable returned None, no region added")
                 except Exception as e:
-                    error_msg = f"Error in {exclusion_label} for page {self.index}: {e}"
+                    error_msg = f"Error evaluating callable exclusion {exclusion_label} for page {self.index}: {e}"
                     print(error_msg)
-                    # Print more detailed traceback for debugging
                     import traceback
                     print(f"    Traceback: {traceback.format_exc().splitlines()[-3:]}")
-            # Process direct Region objects
-            elif not callable(exclusion):
-                # It's already a Region object
+            # Process direct Region objects (already validated by add_exclusion)
+            elif isinstance(exclusion, Region):
                 regions.append(exclusion)
                 if debug:
                     print(f"  - Added direct region: {exclusion}")
+            # No else needed, add_exclusion should prevent invalid types
         if debug:
-            print(f"Page {self.index}: Found {len(regions)} valid exclusion regions")
+            print(f"Page {self.index}: Found {len(regions)} valid exclusion regions to apply")
         return regions
@@ -1178,6 +1215,34 @@ class Page:
         return ElementCollection(detected_regions)
+    def clear_detected_layout_regions(self) -> 'Page':
+        """
+        Removes all regions from this page that were added by layout analysis
+        (i.e., regions where `source` attribute is 'detected').
+        This clears the regions both from the page's internal `_regions['detected']` list
+        and from the ElementManager's internal list of regions.
+        Returns:
+            Self for method chaining.
+        """
+        if not hasattr(self._element_mgr, 'regions') or not hasattr(self._element_mgr, '_elements') or 'regions' not in self._element_mgr._elements:
+             logger.debug(f"Page {self.index}: No regions found in ElementManager, nothing to clear.")
+             self._regions['detected'] = [] # Ensure page's list is also clear
+             return self
+        # Filter ElementManager's list to keep only non-detected regions
+        original_count = len(self._element_mgr.regions)
+        self._element_mgr._elements['regions'] = [r for r in self._element_mgr.regions if getattr(r, 'source', None) != 'detected']
+        new_count = len(self._element_mgr.regions)
+        removed_count = original_count - new_count
+        # Clear the page's specific list of detected regions
+        self._regions['detected'] = []
+        logger.info(f"Page {self.index}: Cleared {removed_count} detected layout regions.")
+        return self
     def get_section_between(self, start_element=None, end_element=None, boundary_inclusion='both') -> Optional[Region]: # Return Optional
         """
         Get a section between two elements on this page.

natural_pdf/core/pdf.py CHANGED Viewed

@@ -125,6 +125,17 @@ class PDF:
         from natural_pdf.elements.collections import PageCollection
         return PageCollection(self._pages)
+    def clear_exclusions(self) -> 'PDF':
+        """
+        Clear all exclusion functions from the PDF.
+        Returns:
+            Self for method chaining
+        """
+        self._exclusions = []
+        return self
     def add_exclusion(self, exclusion_func: Callable[[Page], Region], label: str = None) -> 'PDF':
         """
         Add an exclusion function to the PDF. Text from these regions will be excluded from extraction.

natural_pdf/elements/base.py CHANGED Viewed

@@ -7,7 +7,8 @@ from PIL import Image
 if TYPE_CHECKING:
     from natural_pdf.core.page import Page
     from natural_pdf.elements.region import Region
-    from natural_pdf.elements.base import Element, DirectionalMixin
+    from natural_pdf.elements.base import Element
+    from natural_pdf.elements.collections import ElementCollection
 class DirectionalMixin:
@@ -17,7 +18,7 @@ class DirectionalMixin:
     def _direction(self, direction: str, size: Optional[float] = None,
                    cross_size: str = "full", include_element: bool = False,
-                   until: Optional[str] = None, include_endpoint: bool = True, **kwargs) -> Union['Element', 'Region']:
+                   until: Optional[str] = None, include_endpoint: bool = True, **kwargs) -> 'Region':
         """
         Protected helper method to create a region in a specified direction relative to this element/region.
@@ -154,7 +155,7 @@ class DirectionalMixin:
         return result
     def above(self, height: Optional[float] = None, width: str = "full", include_element: bool = False,
-             until: Optional[str] = None, include_endpoint: bool = True, **kwargs) -> Union['Element', 'Region']:
+             until: Optional[str] = None, include_endpoint: bool = True, **kwargs) -> 'Region':
         """
         Select region above this element/region.
@@ -180,7 +181,7 @@ class DirectionalMixin:
         )
     def below(self, height: Optional[float] = None, width: str = "full", include_element: bool = False,
-              until: Optional[str] = None, include_endpoint: bool = True, **kwargs) -> Union['Element', 'Region']:
+              until: Optional[str] = None, include_endpoint: bool = True, **kwargs) -> 'Region':
         """
         Select region below this element/region.
@@ -206,7 +207,7 @@ class DirectionalMixin:
         )
     def left(self, width: Optional[float] = None, height: str = "full", include_element: bool = False,
-             until: Optional[str] = None, include_endpoint: bool = True, **kwargs) -> Union['Element', 'Region']:
+             until: Optional[str] = None, include_endpoint: bool = True, **kwargs) -> 'Region':
         """
         Select region to the left of this element/region.
@@ -232,7 +233,7 @@ class DirectionalMixin:
         )
     def right(self, width: Optional[float] = None, height: str = "full", include_element: bool = False,
-              until: Optional[str] = None, include_endpoint: bool = True, **kwargs) -> Union['Element', 'Region']:
+              until: Optional[str] = None, include_endpoint: bool = True, **kwargs) -> 'Region':
         """
         Select region to the right of this element/region.
@@ -257,6 +258,86 @@ class DirectionalMixin:
             **kwargs
         )
+    def expand(self,
+              left: float = 0,
+              right: float = 0,
+              top_expand: float = 0,  # Renamed to avoid conflict
+              bottom_expand: float = 0,  # Renamed to avoid conflict
+              width_factor: float = 1.0,
+              height_factor: float = 1.0,
+              # Keep original parameter names for backward compatibility
+              top: float = None,
+              bottom: float = None) -> 'Region':
+        """
+        Create a new region expanded from this element/region.
+        Args:
+            left: Amount to expand left edge (positive value expands leftwards)
+            right: Amount to expand right edge (positive value expands rightwards)
+            top_expand: Amount to expand top edge (positive value expands upwards)
+            bottom_expand: Amount to expand bottom edge (positive value expands downwards)
+            width_factor: Factor to multiply width by (applied after absolute expansion)
+            height_factor: Factor to multiply height by (applied after absolute expansion)
+            top: (DEPRECATED, use top_expand) Amount to expand top edge (upward)
+            bottom: (DEPRECATED, use bottom_expand) Amount to expand bottom edge (downward)
+        Returns:
+            New expanded Region object
+        """
+        # Start with current coordinates
+        new_x0 = self.x0
+        new_x1 = self.x1
+        new_top = self.top
+        new_bottom = self.bottom
+        # Handle the deprecated parameter names for backward compatibility
+        if top is not None:
+            top_expand = top
+        if bottom is not None:
+            bottom_expand = bottom
+        # Apply absolute expansions first
+        new_x0 -= left
+        new_x1 += right
+        new_top -= top_expand  # Expand upward (decrease top coordinate)
+        new_bottom += bottom_expand  # Expand downward (increase bottom coordinate)
+        # Apply percentage factors if provided
+        if width_factor != 1.0 or height_factor != 1.0:
+            # Calculate center point *after* absolute expansion
+            center_x = (new_x0 + new_x1) / 2
+            center_y = (new_top + new_bottom) / 2
+            # Calculate current width and height *after* absolute expansion
+            current_width = new_x1 - new_x0
+            current_height = new_bottom - new_top
+            # Calculate new width and height
+            new_width = current_width * width_factor
+            new_height = current_height * height_factor
+            # Adjust coordinates based on the new dimensions, keeping the center
+            new_x0 = center_x - new_width / 2
+            new_x1 = center_x + new_width / 2
+            new_top = center_y - new_height / 2
+            new_bottom = center_y + new_height / 2
+        # Clamp coordinates to page boundaries
+        new_x0 = max(0, new_x0)
+        new_top = max(0, new_top)
+        new_x1 = min(self.page.width, new_x1)
+        new_bottom = min(self.page.height, new_bottom)
+        # Ensure coordinates are valid (x0 <= x1, top <= bottom)
+        if new_x0 > new_x1: new_x0 = new_x1 = (new_x0 + new_x1) / 2
+        if new_top > new_bottom: new_top = new_bottom = (new_top + new_bottom) / 2
+        # Create new region with expanded bbox
+        from natural_pdf.elements.region import Region
+        new_region = Region(self.page, (new_x0, new_top, new_x1, new_bottom))
+        return new_region
 class Element(DirectionalMixin):
     """
@@ -415,7 +496,8 @@ class Element(DirectionalMixin):
             candidates = candidates[:limit] if limit else candidates
             # Find matching elements
-            matches = self.page.filter_elements(candidates, selector, **kwargs)
+            from natural_pdf.elements.collections import ElementCollection
+            matches = ElementCollection(candidates).find_all(selector, **kwargs)
             return matches[0] if matches else None
         elif idx + 1 < len(all_elements):
             # No selector, just return the next element
@@ -449,16 +531,17 @@ class Element(DirectionalMixin):
         # Search for previous matching element
         if selector:
-            # Filter elements before this one
+            # Select elements before this one
             candidates = all_elements[:idx]
-            # Reverse to start from closest to this element
+            # Reverse to search backwards from the current element
             candidates = candidates[::-1]
             # Limit search range for performance
             candidates = candidates[:limit] if limit else candidates
-            # Find matching elements
-            matches = self.page.filter_elements(candidates, selector, **kwargs)
-            return matches[0] if matches else None
+            # Find matching elements using ElementCollection
+            from natural_pdf.elements.collections import ElementCollection
+            matches = ElementCollection(candidates).find_all(selector, **kwargs)
+            return matches[0] if matches else None # find_all returns a collection
         elif idx > 0:
             # No selector, just return the previous element
             return all_elements[idx - 1]
@@ -737,8 +820,9 @@ class Element(DirectionalMixin):
         Returns:
             First matching element or None
         """
-        # Create a temporary region from this element's bounds
         from natural_pdf.elements.region import Region
+        # Create a temporary region from this element's bounds
         temp_region = Region(self.page, self.bbox)
         return temp_region.find(selector, apply_exclusions=apply_exclusions, **kwargs)
@@ -755,7 +839,8 @@ class Element(DirectionalMixin):
         Returns:
             ElementCollection with matching elements
         """
-        # Create a temporary region from this element's bounds
         from natural_pdf.elements.region import Region
+        # Create a temporary region from this element's bounds
         temp_region = Region(self.page, self.bbox)
         return temp_region.find_all(selector, apply_exclusions=apply_exclusions, **kwargs)

natural_pdf/elements/collections.py CHANGED Viewed

@@ -2,6 +2,7 @@ import logging
 from typing import List, Optional, Dict, Any, Union, Callable, TypeVar, Generic, Iterator, Tuple, TYPE_CHECKING
 from natural_pdf.ocr import OCROptions
+from natural_pdf.selectors.parser import parse_selector, selector_to_filter_func
 logger = logging.getLogger(__name__)
@@ -882,6 +883,61 @@ class ElementCollection(Generic[T]):
              logger.error(f"Error creating interactive viewer from collection: {e}", exc_info=True)
              return None
+    def find_all(self, selector: str, regex: bool = False, case: bool = True, **kwargs) -> 'ElementCollection[T]':
+        """
+        Filter elements within this collection matching the selector.
+        Args:
+            selector: CSS-like selector string.
+            regex: Whether to use regex for text search in :contains (default: False).
+            case: Whether to do case-sensitive text search (default: True).
+            **kwargs: Additional filter parameters passed to the selector function.
+        Returns:
+            A new ElementCollection containing only the matching elements from this collection.
+        """
+        if not self._elements:
+            return ElementCollection([])
+        try:
+            selector_obj = parse_selector(selector)
+        except Exception as e:
+            logger.error(f"Error parsing selector '{selector}': {e}")
+            return ElementCollection([]) # Return empty on parse error
+        # Pass regex and case flags to selector function generator
+        kwargs['regex'] = regex
+        kwargs['case'] = case
+        try:
+            filter_func = selector_to_filter_func(selector_obj, **kwargs)
+        except Exception as e:
+            logger.error(f"Error creating filter function for selector '{selector}': {e}")
+            return ElementCollection([]) # Return empty on filter creation error
+        matching_elements = [element for element in self._elements if filter_func(element)]
+        # Note: Unlike Page.find_all, this doesn't re-sort.
+        # Sorting should be done explicitly on the collection if needed.
+        return ElementCollection(matching_elements)
+    def find(self, selector: str, regex: bool = False, case: bool = True, **kwargs) -> Optional[T]:
+        """
+        Find the first element within this collection matching the selector.
+        Args:
+            selector: CSS-like selector string.
+            regex: Whether to use regex for text search in :contains (default: False).
+            case: Whether to do case-sensitive text search (default: True).
+            **kwargs: Additional filter parameters passed to the selector function.
+        Returns:
+            The first matching element or None.
+        """
+        results = self.find_all(selector, regex=regex, case=case, **kwargs)
+        return results.first
 class PageCollection(Generic[P]):
     """
     A collection of PDF pages with cross-page operations.

natural-pdf 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

natural-pdf 0.1.0py3-none-any.whl → 0.1.2py3-none-any.whl