PyPI - natural-pdf - Versions diffs - 0.2.1.dev0__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

natural-pdf 0.2.1.dev0py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

natural_pdf/analyzers/guides.py +159 -3
natural_pdf/core/highlighting_service.py +8 -8
natural_pdf/core/page.py +135 -4
natural_pdf/core/page_collection.py +37 -0
natural_pdf/core/page_groupby.py +229 -0
natural_pdf/core/render_spec.py +18 -4
natural_pdf/elements/base.py +54 -6
natural_pdf/elements/element_collection.py +1 -0
natural_pdf/elements/region.py +2 -2
natural_pdf/elements/text.py +5 -0
natural_pdf/extraction/manager.py +8 -14
natural_pdf/extraction/mixin.py +35 -21
natural_pdf/selectors/parser.py +2 -2
natural_pdf/tables/result.py +37 -0
{natural_pdf-0.2.1.dev0.dist-info → natural_pdf-0.2.2.dist-info}/METADATA +2 -2
{natural_pdf-0.2.1.dev0.dist-info → natural_pdf-0.2.2.dist-info}/RECORD +22 -21
optimization/performance_analysis.py +1 -1
tools/bad_pdf_eval/analyser.py +1 -1
{natural_pdf-0.2.1.dev0.dist-info → natural_pdf-0.2.2.dist-info}/WHEEL +0 -0
{natural_pdf-0.2.1.dev0.dist-info → natural_pdf-0.2.2.dist-info}/entry_points.txt +0 -0
{natural_pdf-0.2.1.dev0.dist-info → natural_pdf-0.2.2.dist-info}/licenses/LICENSE +0 -0
{natural_pdf-0.2.1.dev0.dist-info → natural_pdf-0.2.2.dist-info}/top_level.txt +0 -0

natural_pdf/core/page_groupby.py ADDED Viewed

@@ -0,0 +1,229 @@
+"""
+PageGroupBy class for grouping pages by selector text or callable results.
+"""
+from collections import defaultdict
+from typing import TYPE_CHECKING, Any, Callable, Dict, Iterator, List, Optional, Tuple, Union
+from tqdm.auto import tqdm
+if TYPE_CHECKING:
+    from natural_pdf.core.page import Page
+    from natural_pdf.core.page_collection import PageCollection
+class PageGroupBy:
+    """
+    A groupby object for PageCollections that supports both iteration and dict-like access.
+    This class provides pandas-like groupby functionality for natural-pdf PageCollections.
+    Pages are grouped by the result of applying a selector string or callable function.
+    Supports:
+    - Direct iteration: for key, pages in grouped:
+    - Dict-like access: grouped.get(key), grouped.get_group(key)
+    - Batch operations: grouped.apply(func)
+    """
+    def __init__(
+        self,
+        page_collection: "PageCollection",
+        by: Union[str, Callable],
+        *,
+        show_progress: bool = True,
+    ):
+        """
+        Initialize the PageGroupBy object.
+        Args:
+            page_collection: The PageCollection to group
+            by: CSS selector string or callable function for grouping
+            show_progress: Whether to show progress bar during computation (default: True)
+        """
+        self.page_collection = page_collection
+        self.by = by
+        self.show_progress = show_progress
+        self._groups: Optional[Dict[Any, "PageCollection"]] = None
+    def _compute_groups(self) -> Dict[Any, "PageCollection"]:
+        """
+        Compute the groups by applying the selector/callable to each page.
+        Returns:
+            Dictionary mapping group keys to PageCollection objects
+        """
+        if self._groups is not None:
+            return self._groups
+        groups = defaultdict(list)
+        # Setup progress bar if enabled and collection is large enough
+        pages_iterator = self.page_collection.pages
+        total_pages = len(self.page_collection)
+        if self.show_progress and total_pages > 1:  # Show progress for more than 1 page
+            desc = f"Grouping by {'selector' if isinstance(self.by, str) else 'function'}"
+            pages_iterator = tqdm(pages_iterator, desc=desc, unit="pages", total=total_pages)
+        for page in pages_iterator:
+            if callable(self.by):
+                # Apply callable function
+                key = self.by(page)
+            else:
+                # Apply selector string
+                element = page.find(self.by)
+                if element:
+                    key = element.extract_text()
+                else:
+                    key = None
+            groups[key].append(page)
+        # Convert lists to PageCollections
+        from natural_pdf.core.page_collection import PageCollection
+        self._groups = {key: PageCollection(pages) for key, pages in groups.items()}
+        return self._groups
+    def __iter__(self) -> Iterator[Tuple[Any, "PageCollection"]]:
+        """
+        Support direct iteration: for key, pages in grouped:
+        Yields:
+            Tuples of (group_key, PageCollection)
+        """
+        groups = self._compute_groups()
+        return iter(groups.items())
+    def get(
+        self, key: Any, default: Optional["PageCollection"] = None
+    ) -> Optional["PageCollection"]:
+        """
+        Dict-like access to get a specific group.
+        Args:
+            key: The group key to look up
+            default: Value to return if key is not found
+        Returns:
+            PageCollection for the group, or default if not found
+        """
+        groups = self._compute_groups()
+        return groups.get(key, default)
+    def get_group(self, key: Any) -> "PageCollection":
+        """
+        Pandas-style access to get a specific group.
+        Args:
+            key: The group key to look up
+        Returns:
+            PageCollection for the group
+        Raises:
+            KeyError: If the group key is not found
+        """
+        groups = self._compute_groups()
+        if key not in groups:
+            raise KeyError(f"Group key '{key}' not found")
+        return groups[key]
+    def keys(self) -> List[Any]:
+        """
+        Get all group keys.
+        Returns:
+            List of all group keys
+        """
+        groups = self._compute_groups()
+        return list(groups.keys())
+    def __getitem__(self, index: Union[int, Any]) -> "PageCollection":
+        """
+        Access groups by index or key.
+        Args:
+            index: Integer index (0-based) or group key
+        Returns:
+            PageCollection for the specified group
+        Examples:
+            grouped = pages.groupby('text[size=16]')
+            # Access by index (useful for quick exploration)
+            first_group = grouped[0]        # First group by order
+            second_group = grouped[1]       # Second group
+            last_group = grouped[-1]        # Last group
+            # Access by key (same as .get_group())
+            madison = grouped['CITY OF MADISON']
+        """
+        groups = self._compute_groups()
+        if isinstance(index, int):
+            # Access by integer index
+            keys_list = list(groups.keys())
+            original_index = index  # Keep original for error message
+            if index < 0:
+                index = len(keys_list) + index  # Support negative indexing
+            if not (0 <= index < len(keys_list)):
+                raise IndexError(f"Group index {original_index} out of range")
+            key = keys_list[index]
+            return groups[key]
+        else:
+            # Access by key (same as get_group)
+            if index not in groups:
+                raise KeyError(f"Group key '{index}' not found")
+            return groups[index]
+    def apply(self, func: Callable[["PageCollection"], Any]) -> Dict[Any, Any]:
+        """
+        Apply a function to each group.
+        Args:
+            func: Function to apply to each PageCollection group
+        Returns:
+            Dictionary mapping group keys to function results
+        """
+        groups = self._compute_groups()
+        return {key: func(pages) for key, pages in groups.items()}
+    def show(self, **kwargs):
+        """
+        Show each group separately with headers.
+        Args:
+            **kwargs: Arguments passed to each group's show() method
+        """
+        groups = self._compute_groups()
+        for key, pages in groups.items():
+            print(f"\n--- Group: {key} ({len(pages)} pages) ---")
+            pages.show(**kwargs)
+    def __len__(self) -> int:
+        """Return the number of groups."""
+        groups = self._compute_groups()
+        return len(groups)
+    def info(self) -> None:
+        """
+        Print information about all groups.
+        Useful for quick exploration of group structure.
+        """
+        groups = self._compute_groups()
+        print(f"PageGroupBy with {len(groups)} groups:")
+        print("-" * 40)
+        for i, (key, pages) in enumerate(groups.items()):
+            key_display = f"'{key}'" if key is not None else "None"
+            print(f"[{i}] {key_display}: {len(pages)} pages")
+    def __repr__(self) -> str:
+        """String representation showing group count."""
+        groups = self._compute_groups()
+        return f"<PageGroupBy(groups={len(groups)})>"

natural_pdf/core/render_spec.py CHANGED Viewed

@@ -146,10 +146,11 @@ class Visualizable:
         legend_position: str = "right",
         annotate: Optional[Union[str, List[str]]] = None,
         # Layout options for multi-page/region
-        layout: Literal["stack", "grid", "single"] = "stack",
+        layout: Optional[Literal["stack", "grid", "single"]] = None,
         stack_direction: Literal["vertical", "horizontal"] = "vertical",
         gap: int = 5,
-        columns: Optional[int] = None,  # For grid layout
+        columns: Optional[int] = 6,  # For grid layout, defaults to 6 columns
+        limit: Optional[int] = 30,  # Max pages to show (default 30)
         # Cropping options
         crop: Union[bool, Literal["content"]] = False,
         crop_bbox: Optional[Tuple[float, float, float, float]] = None,
@@ -169,10 +170,11 @@ class Visualizable:
             highlights: Additional highlight groups to show
             legend_position: Position of legend/colorbar ('right', 'left', 'top', 'bottom')
             annotate: Attribute name(s) to display on highlights (string or list)
-            layout: How to arrange multiple pages/regions
+            layout: How to arrange multiple pages/regions (defaults to 'grid' for multi-page, 'single' for single page)
             stack_direction: Direction for stack layout
             gap: Pixels between stacked images
-            columns: Number of columns for grid layout
+            columns: Number of columns for grid layout (defaults to 6)
+            limit: Maximum number of pages to display (default 30, None for all)
             crop: Whether to crop (True, False, or 'content' for bbox of elements)
             crop_bbox: Explicit crop bounds
             **kwargs: Additional parameters passed to rendering
@@ -184,6 +186,10 @@ class Visualizable:
         if isinstance(annotate, str):
             annotate = [annotate]
+        # Pass limit as max_pages to _get_render_specs
+        if limit is not None:
+            kwargs["max_pages"] = limit
         specs = self._get_render_specs(
             mode="show",
             color=color,
@@ -198,6 +204,14 @@ class Visualizable:
             logger.warning(f"{self.__class__.__name__}.show() generated no render specs")
             return None
+        # Determine default layout based on content and parameters
+        if layout is None:
+            # For PDFs and multi-page collections, default to grid with 6 columns
+            if len(specs) > 1:
+                layout = "grid"
+            else:
+                layout = "single"
         highlighter = self._get_highlighter()
         return highlighter.unified_render(
             specs=specs,

natural_pdf/elements/base.py CHANGED Viewed

@@ -260,7 +260,7 @@ class DirectionalMixin:
         Args:
             height: Height of the region above, in points
-            width: Width mode - "full" for full page width or "element" for element width
+            width: Width mode - "full" (default) for full page width or "element" for element width
             include_source: Whether to include this element/region in the result (default: False)
             until: Optional selector string to specify an upper boundary element
             include_endpoint: Whether to include the boundary element in the region (default: True)
@@ -268,6 +268,18 @@ class DirectionalMixin:
         Returns:
             Region object representing the area above
+        Examples:
+            ```python
+            # Default: full page width
+            signature.above()  # Gets everything above across full page width
+            # Match element width
+            signature.above(width='element')  # Gets region above matching signature width
+            # Stop at specific element
+            signature.above(until='text:contains("Date")')  # Region from date to signature
+            ```
         """
         return self._direction(
             direction="above",
@@ -293,7 +305,7 @@ class DirectionalMixin:
         Args:
             height: Height of the region below, in points
-            width: Width mode - "full" for full page width or "element" for element width
+            width: Width mode - "full" (default) for full page width or "element" for element width
             include_source: Whether to include this element/region in the result (default: False)
             until: Optional selector string to specify a lower boundary element
             include_endpoint: Whether to include the boundary element in the region (default: True)
@@ -301,6 +313,18 @@ class DirectionalMixin:
         Returns:
             Region object representing the area below
+        Examples:
+            ```python
+            # Default: full page width
+            header.below()  # Gets everything below across full page width
+            # Match element width
+            header.below(width='element')  # Gets region below matching header width
+            # Limited height
+            header.below(height=200)  # Gets 200pt tall region below header
+            ```
         """
         return self._direction(
             direction="below",
@@ -315,7 +339,7 @@ class DirectionalMixin:
     def left(
         self,
         width: Optional[float] = None,
-        height: str = "full",
+        height: str = "element",
         include_source: bool = False,
         until: Optional[str] = None,
         include_endpoint: bool = True,
@@ -326,7 +350,7 @@ class DirectionalMixin:
         Args:
             width: Width of the region to the left, in points
-            height: Height mode - "full" for full page height or "element" for element height
+            height: Height mode - "element" (default) for element height or "full" for full page height
             include_source: Whether to include this element/region in the result (default: False)
             until: Optional selector string to specify a left boundary element
             include_endpoint: Whether to include the boundary element in the region (default: True)
@@ -334,6 +358,18 @@ class DirectionalMixin:
         Returns:
             Region object representing the area to the left
+        Examples:
+            ```python
+            # Default: matches element height
+            table.left()  # Gets region to the left at same height as table
+            # Full page height
+            table.left(height='full')  # Gets entire left side of page
+            # Custom height
+            table.left(height=100)  # Gets 100pt tall region to the left
+            ```
         """
         return self._direction(
             direction="left",
@@ -348,7 +384,7 @@ class DirectionalMixin:
     def right(
         self,
         width: Optional[float] = None,
-        height: str = "full",
+        height: str = "element",
         include_source: bool = False,
         until: Optional[str] = None,
         include_endpoint: bool = True,
@@ -359,7 +395,7 @@ class DirectionalMixin:
         Args:
             width: Width of the region to the right, in points
-            height: Height mode - "full" for full page height or "element" for element height
+            height: Height mode - "element" (default) for element height or "full" for full page height
             include_source: Whether to include this element/region in the result (default: False)
             until: Optional selector string to specify a right boundary element
             include_endpoint: Whether to include the boundary element in the region (default: True)
@@ -367,6 +403,18 @@ class DirectionalMixin:
         Returns:
             Region object representing the area to the right
+        Examples:
+            ```python
+            # Default: matches element height
+            label.right()  # Gets region to the right at same height as label
+            # Full page height
+            label.right(height='full')  # Gets entire right side of page
+            # Custom height
+            label.right(height=50)  # Gets 50pt tall region to the right
+            ```
         """
         return self._direction(
             direction="right",

natural_pdf/elements/element_collection.py CHANGED Viewed

@@ -891,6 +891,7 @@ class ElementCollection(
         label_format: Optional[str] = None,
         annotate: Optional[List[str]] = None,
         bins: Optional[Union[int, List[float]]] = None,
+        **kwargs,
     ) -> List[Dict]:
         """
         Determines the parameters for highlighting each element based on the strategy.

natural_pdf/elements/region.py CHANGED Viewed

@@ -960,7 +960,7 @@ class Region(
         right_content_col = min(width - 1, content_col_indices[-1] + padding)
         # Convert trimmed pixel coordinates back to PDF coordinates
-        scale_factor = resolution / 72.0  # Scale factor used in to_image()
+        scale_factor = resolution / 72.0  # Scale factor used in render()
         # Calculate new PDF coordinates and ensure they are Python floats
         trimmed_x0 = float(work_region.x0 + (left_content_col / scale_factor))
@@ -3437,7 +3437,7 @@ class Region(
                     r_idx = int(cell.metadata.get("row_index"))
                     c_idx = int(cell.metadata.get("col_index"))
                     text_val = cell.extract_text(
-                        layout=False, apply_exclusions=False, content_filter=content_filter
+                        layout=False, apply_exclusions=True, content_filter=content_filter
                     ).strip()
                     table_grid[r_idx][c_idx] = text_val if text_val else None
                 except Exception as _err:

natural_pdf/elements/text.py CHANGED Viewed

@@ -215,6 +215,11 @@ class TextElement(Element):
         if isinstance(color, (int, float)):
             return (color, color, color)
+        # If it's a single-value tuple (grayscale), treat as grayscale
+        if isinstance(color, tuple) and len(color) == 1:
+            gray = color[0]
+            return (gray, gray, gray)
         # If it's a tuple of 3 values, treat as RGB
         if isinstance(color, tuple) and len(color) == 3:
             return color

natural_pdf/extraction/manager.py CHANGED Viewed

@@ -119,17 +119,11 @@ class StructuredDataManager:
         )
         messages = self._prepare_llm_messages(content, prompt, using, schema)
-        try:
-            logger.debug(f"Extracting with model '{selected_model}'")
-            completion = client.beta.chat.completions.parse(
-                model=selected_model, messages=messages, response_format=schema, **kwargs
-            )
-            parsed_data = completion.choices[0].message.parsed
-            return StructuredDataResult(
-                data=parsed_data, success=True, error_message=None, model_used=selected_model
-            )
-        except Exception as e:
-            logger.error(f"Extraction failed: {str(e)}")
-            return StructuredDataResult(
-                data=None, success=False, error_message=str(e), model_used=selected_model
-            )
+        logger.debug(f"Extracting with model '{selected_model}'")
+        completion = client.beta.chat.completions.parse(
+            model=selected_model, messages=messages, response_format=schema, **kwargs
+        )
+        parsed_data = completion.choices[0].message.parsed
+        return StructuredDataResult(
+            data=parsed_data, success=True, error_message=None, model_used=selected_model
+        )

natural_pdf/extraction/mixin.py CHANGED Viewed

@@ -35,7 +35,7 @@ class ExtractionMixin(ABC):
     Host class requirements:
     - Must implement extract_text(**kwargs) -> str
-    - Must implement to_image(**kwargs) -> PIL.Image
+    - Must implement render(**kwargs) -> PIL.Image
     - Must have access to StructuredDataManager (usually via parent PDF)
     Example:
@@ -72,25 +72,24 @@ class ExtractionMixin(ABC):
         Args:
             using: 'text' or 'vision'
-            **kwargs: Additional arguments passed to extract_text or to_image
+            **kwargs: Additional arguments passed to extract_text or render
         Returns:
             str: Extracted text if using='text'
             PIL.Image.Image: Rendered image if using='vision'
             None: If content cannot be retrieved
         """
-        if not hasattr(self, "extract_text") or not callable(self.extract_text):
-            logger.error(f"ExtractionMixin requires 'extract_text' method on {self!r}")
-            return None
-        if not hasattr(self, "to_image") or not callable(self.to_image):
-            logger.error(f"ExtractionMixin requires 'to_image' method on {self!r}")
-            return None
         try:
             if using == "text":
+                if not hasattr(self, "extract_text") or not callable(self.extract_text):
+                    logger.error(f"ExtractionMixin requires 'extract_text' method on {self!r}")
+                    return None
                 layout = kwargs.pop("layout", True)
                 return self.extract_text(layout=layout, **kwargs)
             elif using == "vision":
+                if not hasattr(self, "render") or not callable(self.render):
+                    logger.error(f"ExtractionMixin requires 'render' method on {self!r}")
+                    return None
                 resolution = kwargs.pop("resolution", 72)
                 include_highlights = kwargs.pop("include_highlights", False)
                 labels = kwargs.pop("labels", False)
@@ -102,8 +101,13 @@ class ExtractionMixin(ABC):
                 logger.error(f"Unsupported value for 'using': {using}")
                 return None
         except Exception as e:
-            logger.error(f"Error getting {using} content from {self!r}: {e}")
-            return None
+            import warnings
+            warnings.warn(
+                f"Error getting {using} content from {self!r}: {e}",
+                RuntimeWarning,
+            )
+            raise
     def extract(
         self: Any,
@@ -275,10 +279,7 @@ class ExtractionMixin(ABC):
             raise RuntimeError("StructuredDataManager is not available")
         # Get content
-        layout_for_text = kwargs.pop("layout", True)
-        content = self._get_extraction_content(
-            using=using, layout=layout_for_text, **kwargs
-        )  # Pass kwargs
+        content = self._get_extraction_content(using=using, **kwargs)  # Pass kwargs
         if content is None or (
             using == "text" and isinstance(content, str) and not content.strip()
@@ -359,10 +360,11 @@ class ExtractionMixin(ABC):
             )
         if not result.success:
-            raise ValueError(
-                f"Stored result for '{target_key}' indicates a failed extraction attempt. "
-                f"Error: {result.error_message}"
+            # Return None for failed extractions to allow batch processing to continue
+            logger.warning(
+                f"Extraction '{target_key}' failed: {result.error_message}. Returning None."
             )
+            return None
         if result.data is None:
             # This case might occur if success=True but data is somehow None
@@ -591,16 +593,28 @@ class ExtractionMixin(ABC):
             raise RuntimeError("StructuredDataManager is not available")
         # Content preparation
-        layout_for_text = kwargs.pop("layout", True)
-        content = self._get_extraction_content(using=using, layout=layout_for_text, **kwargs)
+        content = self._get_extraction_content(using=using, **kwargs)
+        import warnings
         if content is None or (
             using == "text" and isinstance(content, str) and not content.strip()
         ):
+            preview = None
+            if isinstance(content, str):
+                preview = content[:120]
+            msg = (
+                f"No content available for extraction (using='{using}'). "
+                "Ensure the page has a text layer or render() returns an image. "
+                "For scanned PDFs run apply_ocr() or switch to using='vision'. "
+                f"Content preview: {preview!r}"
+            )
+            warnings.warn(msg, RuntimeWarning)
             result = StructuredDataResult(
                 data=None,
                 success=False,
-                error_message=f"No content available for extraction (using='{using}')",
+                error_message=msg,
                 model_used=model,
             )
         else:

natural_pdf/selectors/parser.py CHANGED Viewed

@@ -721,8 +721,8 @@ def _build_filter_list(selector: Dict[str, Any], **kwargs) -> List[Dict[str, Any
         # Start with a base name, modify for specifics like :not
         filter_name = f"pseudo-class :{name}"
-        # Relational pseudo-classes are handled separately by the caller
-        if name in ("above", "below", "near", "left-of", "right-of"):
+        # Relational pseudo-classes and collection-level pseudo-classes are handled separately by the caller
+        if name in ("above", "below", "near", "left-of", "right-of", "first", "last"):
             continue
         # --- Handle :not() ---

natural_pdf/tables/result.py CHANGED Viewed

@@ -44,6 +44,7 @@ class TableResult(Sequence):
         header: Union[str, int, List[int], None] = "first",
         index_col=None,
         skip_repeating_headers=None,
+        keep_blank: bool = False,
         **kwargs,
     ):
         """Convert to *pandas* DataFrame.
@@ -52,11 +53,22 @@ class TableResult(Sequence):
         ----------
         header : "first" | int | list[int] | None, default "first"
             • "first" – use row 0 as column names.\n            • int       – use that row index.\n            • list[int] – multi-row header.\n            • None/False– no header.
+            Note: If the header row has a different number of columns than the
+            body rows, the method will automatically fall back to header=None
+            to prevent pandas errors. This commonly occurs when headers are
+            merged into a single cell during PDF extraction.
         index_col : same semantics as pandas, forwarded.
         skip_repeating_headers : bool, optional
             Whether to remove body rows that exactly match the header row(s).
             Defaults to True when header is truthy, False otherwise.
             Useful for PDFs where headers repeat throughout the table body.
+        keep_blank : bool, default False
+            Whether to preserve empty strings ('') as-is in the DataFrame.
+            When False (default), empty cells become pd.NA for better pandas integration
+            with numerical operations and missing data functions (.dropna(), .fillna(), etc.).
+            When True, empty strings are preserved as empty strings.
         **kwargs  : forwarded to :pyclass:`pandas.DataFrame`.
         """
         try:
@@ -112,7 +124,32 @@ class TableResult(Sequence):
                 # Could add logging here if desired
                 pass
+        # Check for header/body column count mismatch and fallback to no header
+        if hdr is not None and body:
+            # Get the maximum number of columns from all body rows
+            # This handles cases where some rows have different column counts
+            max_cols = max(len(row) for row in body) if body else 0
+            # Check if header matches the maximum column count
+            header_cols = 0
+            if isinstance(hdr, list) and not isinstance(hdr[0], list):
+                # Single header row
+                header_cols = len(hdr)
+            elif isinstance(hdr, list) and len(hdr) > 0 and isinstance(hdr[0], list):
+                # Multi-row header - check first header row
+                header_cols = len(hdr[0])
+            if header_cols != max_cols:
+                # Column count mismatch - fallback to no header
+                hdr = None
+                body = self._rows  # Use all rows as body
         df = pd.DataFrame(body, columns=hdr)
+        # Convert empty strings to NaN by default
+        if not keep_blank:
+            df = df.replace("", pd.NA)
         if index_col is not None and not df.empty:
             df.set_index(
                 df.columns[index_col] if isinstance(index_col, int) else index_col, inplace=True

natural-pdf 0.2.1.dev0__py3-none-any.whl → 0.2.2__py3-none-any.whl

natural-pdf 0.2.1.dev0py3-none-any.whl → 0.2.2py3-none-any.whl