PyPI - natural-pdf - Versions diffs - 0.1.33__py3-none-any.whl → 0.1.34__py3-none-any.whl - Mend

natural-pdf 0.1.33py3-none-any.whl → 0.1.34py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

natural_pdf/analyzers/__init__.py +2 -2
natural_pdf/analyzers/guides.py +670 -595
natural_pdf/analyzers/layout/base.py +53 -6
natural_pdf/analyzers/layout/layout_analyzer.py +3 -1
natural_pdf/analyzers/layout/layout_manager.py +18 -14
natural_pdf/analyzers/layout/layout_options.py +1 -0
natural_pdf/analyzers/layout/paddle.py +102 -64
natural_pdf/analyzers/layout/table_structure_utils.py +3 -1
natural_pdf/analyzers/layout/yolo.py +2 -6
natural_pdf/analyzers/shape_detection_mixin.py +15 -6
natural_pdf/classification/manager.py +92 -77
natural_pdf/classification/mixin.py +49 -5
natural_pdf/classification/results.py +1 -1
natural_pdf/cli.py +7 -3
natural_pdf/collections/pdf_collection.py +96 -101
natural_pdf/core/element_manager.py +131 -45
natural_pdf/core/highlighting_service.py +5 -6
natural_pdf/core/page.py +113 -22
natural_pdf/core/pdf.py +477 -75
natural_pdf/describe/__init__.py +18 -12
natural_pdf/describe/base.py +179 -172
natural_pdf/describe/elements.py +155 -155
natural_pdf/describe/mixin.py +27 -19
natural_pdf/describe/summary.py +44 -55
natural_pdf/elements/base.py +134 -18
natural_pdf/elements/collections.py +90 -18
natural_pdf/elements/image.py +2 -1
natural_pdf/elements/line.py +0 -31
natural_pdf/elements/rect.py +0 -14
natural_pdf/elements/region.py +222 -108
natural_pdf/elements/text.py +18 -12
natural_pdf/exporters/__init__.py +4 -1
natural_pdf/exporters/original_pdf.py +12 -4
natural_pdf/extraction/mixin.py +66 -10
natural_pdf/extraction/result.py +1 -1
natural_pdf/flows/flow.py +63 -4
natural_pdf/flows/region.py +4 -4
natural_pdf/ocr/engine.py +83 -2
natural_pdf/ocr/engine_paddle.py +5 -5
natural_pdf/ocr/ocr_factory.py +2 -1
natural_pdf/ocr/ocr_manager.py +24 -13
natural_pdf/ocr/ocr_options.py +3 -10
natural_pdf/qa/document_qa.py +21 -8
natural_pdf/qa/qa_result.py +3 -7
natural_pdf/search/__init__.py +3 -2
natural_pdf/search/lancedb_search_service.py +5 -6
natural_pdf/search/numpy_search_service.py +5 -2
natural_pdf/selectors/parser.py +51 -6
natural_pdf/tables/__init__.py +2 -2
natural_pdf/tables/result.py +7 -6
natural_pdf/utils/bidi_mirror.py +2 -1
natural_pdf/utils/reading_order.py +3 -2
natural_pdf/utils/visualization.py +3 -3
natural_pdf/widgets/viewer.py +0 -1
{natural_pdf-0.1.33.dist-info → natural_pdf-0.1.34.dist-info}/METADATA +1 -1
natural_pdf-0.1.34.dist-info/RECORD +121 -0
optimization/memory_comparison.py +73 -58
optimization/pdf_analyzer.py +141 -96
optimization/performance_analysis.py +111 -110
optimization/test_cleanup_methods.py +47 -36
optimization/test_memory_fix.py +40 -39
tools/bad_pdf_eval/__init__.py +0 -1
tools/bad_pdf_eval/analyser.py +35 -18
tools/bad_pdf_eval/collate_summaries.py +22 -18
tools/bad_pdf_eval/compile_attempts_markdown.py +127 -0
tools/bad_pdf_eval/eval_suite.py +21 -9
tools/bad_pdf_eval/evaluate_quality.py +198 -0
tools/bad_pdf_eval/export_enrichment_csv.py +12 -8
tools/bad_pdf_eval/llm_enrich.py +71 -39
tools/bad_pdf_eval/llm_enrich_with_retry.py +289 -0
tools/bad_pdf_eval/reporter.py +1 -1
tools/bad_pdf_eval/utils.py +7 -4
natural_pdf-0.1.33.dist-info/RECORD +0 -118
{natural_pdf-0.1.33.dist-info → natural_pdf-0.1.34.dist-info}/WHEEL +0 -0
{natural_pdf-0.1.33.dist-info → natural_pdf-0.1.34.dist-info}/entry_points.txt +0 -0
{natural_pdf-0.1.33.dist-info → natural_pdf-0.1.34.dist-info}/licenses/LICENSE +0 -0
{natural_pdf-0.1.33.dist-info → natural_pdf-0.1.34.dist-info}/top_level.txt +0 -0

natural_pdf/describe/summary.py CHANGED Viewed

@@ -8,53 +8,53 @@ from typing import Any, Dict, List, Union
 class ElementSummary:
     """
     Container for element summary data with markdown rendering.
     Automatically renders as markdown in Jupyter notebooks and provides
     access to underlying data as dictionaries.
     """
     def __init__(self, data: Dict[str, Any], title: str = "Summary"):
         """
         Initialize summary with data and optional title.
         Args:
             data: Dictionary containing summary sections
             title: Title for the summary display
         """
         self.data = data
         self.title = title
     def __str__(self) -> str:
         """String representation as markdown."""
         return self._to_markdown()
     def __repr__(self) -> str:
         """Repr as markdown for better display."""
         return self._to_markdown()
     def _repr_markdown_(self) -> str:
         """Jupyter notebook markdown rendering."""
         return self._to_markdown()
     def to_dict(self) -> Dict[str, Any]:
         """Return underlying data as dictionary."""
         return self.data.copy()
     def _to_markdown(self) -> str:
         """Convert data to markdown format."""
         lines = [f"## {self.title}", ""]
         for section_name, section_data in self.data.items():
             lines.extend(self._format_section(section_name, section_data))
             lines.append("")  # Empty line between sections
         return "\n".join(lines).rstrip()
     def _format_section(self, name: str, data: Any) -> List[str]:
         """Format a single section as markdown."""
         # Use bold text instead of headers for more compact display
-        section_title = name.replace('_', ' ').title()
+        section_title = name.replace("_", " ").title()
         if isinstance(data, dict):
             lines = [f"**{section_title}**:", ""]
             lines.extend(self._format_dict(data, indent=""))
@@ -62,26 +62,26 @@ class ElementSummary:
             lines = [f"**{section_title}**: {', '.join(str(item) for item in data)}"]
         else:
             lines = [f"**{section_title}**: {data}"]
         return lines
     def _format_dict(self, data: Dict[str, Any], indent: str = "") -> List[str]:
         """Format dictionary as markdown list."""
         lines = []
         for key, value in data.items():
-            key_display = key.replace('_', ' ')
+            key_display = key.replace("_", " ")
             if isinstance(value, dict):
                 # Nested dict - always format as list items
                 lines.append(f"{indent}- **{key_display}**:")
                 for subkey, subvalue in value.items():
-                    subkey_display = subkey.replace('_', ' ')
+                    subkey_display = subkey.replace("_", " ")
                     if isinstance(subvalue, dict):
                         # Another level of nesting
                         lines.append(f"{indent}  - **{subkey_display}**:")
                         for subsubkey, subsubvalue in subvalue.items():
-                            subsubkey_display = subsubkey.replace('_', ' ')
+                            subsubkey_display = subsubkey.replace("_", " ")
                             lines.append(f"{indent}    - {subsubkey_display}: {subsubvalue}")
                     else:
                         lines.append(f"{indent}  - {subkey_display}: {subvalue}")
@@ -93,9 +93,9 @@ class ElementSummary:
                     lines.append(f"{indent}- **{key_display}**: {len(value)} items")
             else:
                 lines.append(f"{indent}- **{key_display}**: {value}")
         return lines
     def _format_list(self, data: List[Any]) -> List[str]:
         """Format list as markdown."""
         lines = []
@@ -106,27 +106,18 @@ class ElementSummary:
             else:
                 lines.append(f"- {item}")
         return lines
     def _format_horizontal_table(self, title: str, data: Dict[str, Any]) -> List[str]:
         """Format dict as horizontal table."""
         headers = list(data.keys())
         values = list(data.values())
         # Create table
         header_row = "| " + " | ".join(headers) + " |"
         separator = "|" + "|".join("------" for _ in headers) + "|"
         value_row = "| " + " | ".join(str(v) for v in values) + " |"
-        return [
-            f"- **{title}**:",
-            "",
-            header_row,
-            separator,
-            value_row,
-            ""
-        ]
+        return [f"- **{title}**:", "", header_row, separator, value_row, ""]
     # Added for better VS Code and other frontends support
     def _repr_html_(self) -> str:  # type: ignore
@@ -147,11 +138,7 @@ class ElementSummary:
             return _markdown.markdown(md_source, extensions=["tables"])
         except Exception:  # noqa: BLE001, broad-except
             # Fallback: present the Markdown as-is inside a <pre> block.
-            escaped = (
-                md_source.replace("&", "&amp;")
-                .replace("<", "&lt;")
-                .replace(">", "&gt;")
-            )
+            escaped = md_source.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
             return f"<pre>{escaped}</pre>"
@@ -159,40 +146,42 @@ class InspectionSummary(ElementSummary):
     """
     Summary for element inspection with tabular data.
     """
     def _format_section(self, name: str, data: Any) -> List[str]:
         """Format inspection section with element tables."""
-        section_title = name.replace('_', ' ').title()
-        if isinstance(data, dict) and 'elements' in data:
+        section_title = name.replace("_", " ").title()
+        if isinstance(data, dict) and "elements" in data:
             # This is an element table section - use ### header for inspect
-            elements = data['elements']
+            elements = data["elements"]
             lines = [f"### {section_title}"]
             if elements:
-                lines.extend(self._format_element_table(elements, data.get('columns', [])))
+                lines.extend(self._format_element_table(elements, data.get("columns", [])))
                 # Add note if truncated
-                if 'note' in data:
+                if "note" in data:
                     lines.append(f"_{data['note']}_")
             else:
                 lines.append("No elements found.")
         else:
             # Regular section formatting
             lines = [f"**{section_title}**: {data}"]
         return lines
-    def _format_element_table(self, elements: List[Dict[str, Any]], columns: List[str]) -> List[str]:
+    def _format_element_table(
+        self, elements: List[Dict[str, Any]], columns: List[str]
+    ) -> List[str]:
         """Format elements as markdown table."""
         if not elements or not columns:
             return ["No elements to display."]
         lines = [""]  # Empty line before table
         # Table header
         header_row = "| " + " | ".join(columns) + " |"
         separator = "|" + "|".join("------" for _ in columns) + "|"
         lines.extend([header_row, separator])
         # Table rows
         for element in elements:
             row_values = []
@@ -205,8 +194,8 @@ class InspectionSummary(ElementSummary):
                 elif isinstance(value, str) and len(value) > 50:
                     value = value[:50] + "..."
                 row_values.append(str(value))
             row = "| " + " | ".join(row_values) + " |"
             lines.append(row)
-        return lines
+        return lines

natural_pdf/elements/base.py CHANGED Viewed

@@ -6,27 +6,49 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, overl
 from PIL import Image
+from natural_pdf.classification.mixin import ClassificationMixin
+from natural_pdf.describe.mixin import DescribeMixin
 # Import selector parsing functions
 from natural_pdf.selectors.parser import parse_selector, selector_to_filter_func
-from natural_pdf.describe.mixin import DescribeMixin
-from natural_pdf.classification.mixin import ClassificationMixin
 if TYPE_CHECKING:
+    from natural_pdf.classification.manager import ClassificationManager  # noqa: F401
     from natural_pdf.core.page import Page
     from natural_pdf.elements.collections import ElementCollection
     from natural_pdf.elements.region import Region
-    from natural_pdf.classification.manager import ClassificationManager  # noqa: F401
 def extract_bbox(obj: Any) -> Optional[Tuple[float, float, float, float]]:
-    """
-    Extract bounding box coordinates from any object that has bbox properties.
+    """Extract bounding box coordinates from any object that has bbox properties.
+    This utility function provides a standardized way to extract bounding box
+    coordinates from various object types that may store bbox information in
+    different formats (properties, attributes, or dictionary keys).
     Args:
-        obj: Object that might have bbox coordinates (Element, Region, etc.)
+        obj: Object that might have bbox coordinates. Can be an Element, Region,
+            dictionary, or any object with bbox-related attributes.
     Returns:
-        Tuple of (x0, top, x1, bottom) or None if object doesn't have bbox properties
+        Tuple of (x0, top, x1, bottom) coordinates as floats, or None if the
+        object doesn't have valid bbox properties. Coordinates are in PDF
+        coordinate system (points, with origin at bottom-left).
+    Example:
+        ```python
+        # Works with various object types
+        element_bbox = extract_bbox(text_element)  # From Element
+        region_bbox = extract_bbox(region)         # From Region
+        dict_bbox = extract_bbox({                 # From dictionary
+            'x0': 100, 'top': 200, 'x1': 300, 'bottom': 250
+        })
+        if element_bbox:
+            x0, top, x1, bottom = element_bbox
+            width = x1 - x0
+            height = bottom - top
+        ```
     """
     # Try bbox property first (most common)
     if hasattr(obj, "bbox") and obj.bbox is not None:
@@ -53,8 +75,26 @@ def extract_bbox(obj: Any) -> Optional[Tuple[float, float, float, float]]:
 class DirectionalMixin:
-    """
-    Mixin class providing directional methods for both Element and Region classes.
+    """Mixin class providing directional methods for both Element and Region classes.
+    This mixin provides spatial navigation capabilities that allow elements and regions
+    to create new regions in specific directions (left, right, above, below) relative
+    to themselves. This forms the foundation of natural-pdf's spatial navigation system.
+    The directional methods use the PDF coordinate system where:
+    - x increases from left to right
+    - y increases from bottom to top (PDF standard)
+    - Origin (0, 0) is at the bottom-left of the page
+    Methods provided:
+    - left(): Create region to the left
+    - right(): Create region to the right
+    - above(): Create region above
+    - below(): Create region below
+    Note:
+        This mixin requires the implementing class to have 'page', 'x0', 'top',
+        'x1', and 'bottom' attributes for coordinate calculations.
     """
     def _direction(
@@ -524,20 +564,88 @@ class DirectionalMixin:
 class Element(DirectionalMixin, ClassificationMixin, DescribeMixin):
-    """
-    Base class for all PDF elements.
+    """Base class for all PDF elements.
     This class provides common properties and methods for all PDF elements,
-    such as text, rectangles, lines, etc.
+    including text elements, rectangles, lines, images, and other geometric shapes.
+    It serves as the foundation for natural-pdf's element system and provides
+    spatial navigation, classification, and description capabilities through mixins.
+    The Element class wraps underlying pdfplumber objects and extends them with:
+    - Spatial navigation methods (left, right, above, below)
+    - Bounding box and coordinate properties
+    - Classification and description capabilities
+    - Polygon support for complex shapes
+    - Metadata storage for analysis results
+    All coordinates use the PDF coordinate system where:
+    - Origin (0, 0) is at the bottom-left of the page
+    - x increases from left to right
+    - y increases from bottom to top
+    Attributes:
+        type: Element type (e.g., 'char', 'line', 'rect', 'image').
+        bbox: Bounding box tuple (x0, top, x1, bottom).
+        x0: Left x-coordinate.
+        top: Top y-coordinate (minimum y).
+        x1: Right x-coordinate.
+        bottom: Bottom y-coordinate (maximum y).
+        width: Element width (x1 - x0).
+        height: Element height (bottom - top).
+        page: Reference to the parent Page object.
+        metadata: Dictionary for storing analysis results and custom data.
+    Example:
+        ```python
+        pdf = npdf.PDF("document.pdf")
+        page = pdf.pages[0]
+        # Get text elements
+        text_elements = page.chars
+        for element in text_elements:
+            print(f"Text '{element.get_text()}' at {element.bbox}")
+        # Spatial navigation
+        first_char = page.chars[0]
+        region_to_right = first_char.right(size=100)
+        # Classification
+        element.classify("document_type", model="clip")
+        ```
+    Note:
+        Element objects are typically created automatically when accessing page
+        collections (page.chars, page.words, page.rects, etc.). Direct instantiation
+        is rarely needed in normal usage.
     """
     def __init__(self, obj: Dict[str, Any], page: "Page"):
-        """
-        Initialize base element.
+        """Initialize base element.
+        Creates an Element object that wraps a pdfplumber data object with enhanced
+        functionality for spatial navigation, analysis, and classification.
         Args:
-            obj: The underlying pdfplumber object
-            page: The parent Page object
+            obj: The underlying pdfplumber object dictionary containing element
+                properties like coordinates, text, fonts, etc. This typically comes
+                from pdfplumber's chars, words, rects, lines, or images collections.
+            page: The parent Page object that contains this element and provides
+                access to document-level functionality and other elements.
+        Note:
+            This constructor is typically called automatically when accessing element
+            collections through page properties. Direct instantiation is rarely needed.
+        Example:
+            ```python
+            # Elements are usually accessed through page collections
+            page = pdf.pages[0]
+            chars = page.chars  # Elements created automatically
+            # Direct construction (advanced usage)
+            pdfplumber_char = page._page.chars[0]  # Raw pdfplumber data
+            element = Element(pdfplumber_char, page)
+            ```
         """
         self._obj = obj
         self._page = page
@@ -976,6 +1084,7 @@ class Element(DirectionalMixin, ClassificationMixin, DescribeMixin):
         """
         # Apply global options as defaults
         import natural_pdf
         if resolution is None:
             if natural_pdf.options.image.resolution is not None:
                 resolution = natural_pdf.options.image.resolution
@@ -1027,7 +1136,11 @@ class Element(DirectionalMixin, ClassificationMixin, DescribeMixin):
             return None
     def save(
-        self, filename: str, resolution: Optional[float] = None, labels: bool = True, legend_position: str = "right"
+        self,
+        filename: str,
+        resolution: Optional[float] = None,
+        labels: bool = True,
+        legend_position: str = "right",
     ) -> None:
         """
         Save the page with this element highlighted to an image file.
@@ -1043,13 +1156,16 @@ class Element(DirectionalMixin, ClassificationMixin, DescribeMixin):
         """
         # Apply global options as defaults
         import natural_pdf
         if resolution is None:
             if natural_pdf.options.image.resolution is not None:
                 resolution = natural_pdf.options.image.resolution
             else:
                 resolution = 144  # Default resolution when none specified
         # Save the highlighted image
-        self.page.save_image(filename, resolution=resolution, labels=labels, legend_position=legend_position)
+        self.page.save_image(
+            filename, resolution=resolution, labels=labels, legend_position=legend_position
+        )
         return self
     # Note: save_image method removed in favor of save()

natural_pdf/elements/collections.py CHANGED Viewed

@@ -30,9 +30,9 @@ from tqdm.auto import tqdm
 from natural_pdf.analyzers.shape_detection_mixin import ShapeDetectionMixin
 from natural_pdf.classification.manager import ClassificationManager
 from natural_pdf.classification.mixin import ClassificationMixin
-from natural_pdf.describe.mixin import DescribeMixin, InspectMixin
 from natural_pdf.collections.mixins import ApplyMixin, DirectionalCollectionMixin
 from natural_pdf.core.pdf import PDF
+from natural_pdf.describe.mixin import DescribeMixin, InspectMixin
 from natural_pdf.elements.base import Element
 from natural_pdf.elements.region import Region
 from natural_pdf.elements.text import TextElement
@@ -81,16 +81,90 @@ class ElementCollection(
     InspectMixin,
     MutableSequence,
 ):
-    """
-    Collection of PDF elements with batch operations.
+    """Collection of PDF elements with batch operations.
+    ElementCollection provides a powerful interface for working with groups of
+    PDF elements (text, rectangles, lines, etc.) with batch processing capabilities.
+    It implements the MutableSequence protocol for list-like behavior while adding
+    specialized functionality for document analysis workflows.
+    The collection integrates multiple capabilities through mixins:
+    - Batch processing with .apply() method
+    - Export functionality for various formats
+    - AI-powered classification of element groups
+    - Spatial navigation for creating related regions
+    - Description and inspection capabilities
+    - Element filtering and selection
+    Collections support functional programming patterns and method chaining,
+    making it easy to build complex document processing pipelines.
+    Attributes:
+        elements: List of Element objects in the collection.
+        first: First element in the collection (None if empty).
+        last: Last element in the collection (None if empty).
+    Example:
+        Basic usage:
+        ```python
+        pdf = npdf.PDF("document.pdf")
+        page = pdf.pages[0]
+        # Get collections of elements
+        all_text = page.chars
+        headers = page.find_all('text[size>12]:bold')
+        # Collection operations
+        print(f"Found {len(headers)} headers")
+        header_text = headers.get_text()
+        # Batch processing
+        results = headers.apply(lambda el: el.fontname)
+        ```
+        Advanced workflows:
+        ```python
+        # Functional programming style
+        important_text = (page.chars
+                         .filter('text:contains("IMPORTANT")')
+                         .apply(lambda el: el.text.upper())
+                         .classify("urgency_level"))
+        # Spatial navigation from collections
+        content_region = headers.below(until='rect[height>2]')
+        # Export functionality
+        headers.save_pdf("headers_only.pdf")
+        ```
+    Note:
+        Collections are typically created by page methods (page.chars, page.find_all())
+        or by filtering existing collections. Direct instantiation is less common.
     """
     def __init__(self, elements: List[T]):
-        """
-        Initialize a collection of elements.
+        """Initialize a collection of elements.
+        Creates an ElementCollection that wraps a list of PDF elements and provides
+        enhanced functionality for batch operations, filtering, and analysis.
         Args:
-            elements: List of Element objects
+            elements: List of Element objects (TextElement, RectangleElement, etc.)
+                to include in the collection. Can be empty for an empty collection.
+        Example:
+            ```python
+            # Collections are usually created by page methods
+            chars = page.chars  # ElementCollection[TextElement]
+            rects = page.rects  # ElementCollection[RectangleElement]
+            # Direct creation (advanced usage)
+            selected_elements = ElementCollection([element1, element2, element3])
+            ```
+        Note:
+            ElementCollection implements MutableSequence, so it behaves like a list
+            with additional natural-pdf functionality for document processing.
         """
         self._elements = elements or []
@@ -1426,7 +1500,6 @@ class ElementCollection(
         analysis_key: str = "classification",
         multi_label: bool = False,
         batch_size: int = 8,
-        max_workers: Optional[int] = None,
         progress_bar: bool = True,
         **kwargs,
     ):
@@ -1440,8 +1513,6 @@ class ElementCollection(
             analysis_key: Key for storing results in element.analyses.
             multi_label: Allow multiple labels per item.
             batch_size: Size of batches passed to the inference pipeline.
-            max_workers: (Not currently used for classification batching which is
-                         handled by the underlying pipeline).
             progress_bar: Display a progress bar.
             **kwargs: Additional arguments for the ClassificationManager.
         """
@@ -1818,12 +1889,13 @@ class ElementCollection(
         """
         # Apply global options as defaults
         import natural_pdf
         if resolution is None:
             if natural_pdf.options.image.resolution is not None:
                 resolution = natural_pdf.options.image.resolution
             else:
                 resolution = 144  # Default resolution when none specified
         return self.apply(
             lambda element: element.trim(
                 padding=padding, threshold=threshold, resolution=resolution
@@ -1896,9 +1968,7 @@ class ElementCollection(
         # Fallback to original behaviour: apply same clipping parameters to all elements
         return self.apply(
-            lambda element: element.clip(
-                obj=obj, left=left, top=top, right=right, bottom=bottom
-            )
+            lambda element: element.clip(obj=obj, left=left, top=top, right=right, bottom=bottom)
         )
     # ------------------------------------------------------------------
@@ -2439,8 +2509,8 @@ class PageCollection(Generic[P], ApplyMixin, ShapeDetectionMixin):
             page in this PageCollection."""
             # Local imports to avoid top-level cycles
             from natural_pdf.elements.region import Region
-            from natural_pdf.flows.flow import Flow
             from natural_pdf.flows.element import FlowElement
+            from natural_pdf.flows.flow import Flow
             from natural_pdf.flows.region import FlowRegion
             start_pg = start_el.page
@@ -2462,10 +2532,12 @@ class PageCollection(Generic[P], ApplyMixin, ShapeDetectionMixin):
             flow = Flow(segments=parts, arrangement="vertical")
             src_fe = FlowElement(physical_object=start_el, flow=flow)
-            return FlowRegion(flow=flow,
-                               constituent_regions=parts,
-                               source_flow_element=src_fe,
-                               boundary_element_found=end_el)
+            return FlowRegion(
+                flow=flow,
+                constituent_regions=parts,
+                source_flow_element=src_fe,
+                boundary_element_found=end_el,
+            )
         # ------------------------------------------------------------------

natural_pdf/elements/image.py CHANGED Viewed

@@ -5,6 +5,7 @@ from natural_pdf.elements.base import Element
 if TYPE_CHECKING:
     from natural_pdf.core.page import Page
 class ImageElement(Element):
     """Represents a raster XObject (embedded image) on a PDF page."""
@@ -40,4 +41,4 @@ class ImageElement(Element):
         return ""
     def __repr__(self):
-        return f"<ImageElement bbox={self.bbox} srcsize={self.srcsize}>"
+        return f"<ImageElement bbox={self.bbox} srcsize={self.srcsize}>"

natural_pdf/elements/line.py CHANGED Viewed

@@ -102,37 +102,6 @@ class LineElement(Element):
         elif self.is_vertical:
             return "vertical"
-    def text_above(self, distance: float = 5, **kwargs) -> Any:
-        """
-        Get text elements above this line.
-        Args:
-            distance: Maximum distance above the line in points
-            **kwargs: Additional filter parameters
-        Returns:
-            ElementCollection of text elements above this line
-        """
-        from natural_pdf.elements.collections import ElementCollection
-        # TODO: Implement proper filtering of elements above this line
-        return ElementCollection([])  # Placeholder
-    def text_below(self, distance: float = 5, **kwargs) -> Any:
-        """
-        Get text elements below this line.
-        Args:
-            distance: Maximum distance below the line in points
-            **kwargs: Additional filter parameters
-        Returns:
-            ElementCollection of text elements below this line
-        """
-        from natural_pdf.elements.collections import ElementCollection
-        # TODO: Implement proper filtering of elements below this line
-        return ElementCollection([])  # Placeholder
     def extract_text(self, keep_blank_chars=True, apply_exclusions=True, **kwargs) -> str:
         """

natural-pdf 0.1.33__py3-none-any.whl → 0.1.34__py3-none-any.whl

natural-pdf 0.1.33py3-none-any.whl → 0.1.34py3-none-any.whl