PyPI - natural-pdf - Versions diffs - 0.1.22__py3-none-any.whl → 0.1.24__py3-none-any.whl - Mend

natural-pdf 0.1.22py3-none-any.whl → 0.1.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

natural_pdf/analyzers/shape_detection_mixin.py +43 -3
natural_pdf/classification/manager.py +1 -1
natural_pdf/classification/mixin.py +35 -14
natural_pdf/classification/results.py +16 -1
natural_pdf/cli.py +1 -0
natural_pdf/core/highlighting_service.py +23 -0
natural_pdf/core/page.py +32 -2
natural_pdf/core/pdf.py +24 -4
natural_pdf/describe/base.py +11 -1
natural_pdf/describe/summary.py +26 -0
natural_pdf/elements/base.py +81 -3
natural_pdf/elements/collections.py +162 -101
natural_pdf/elements/region.py +187 -160
natural_pdf/elements/text.py +15 -7
natural_pdf/exporters/paddleocr.py +1 -1
natural_pdf/extraction/manager.py +2 -2
natural_pdf/extraction/mixin.py +295 -11
natural_pdf/extraction/result.py +28 -1
natural_pdf/flows/region.py +117 -2
natural_pdf/ocr/engine_surya.py +25 -5
natural_pdf/qa/__init__.py +2 -1
natural_pdf/qa/document_qa.py +166 -113
natural_pdf/qa/qa_result.py +55 -0
natural_pdf/selectors/parser.py +22 -0
natural_pdf/utils/text_extraction.py +34 -14
{natural_pdf-0.1.22.dist-info → natural_pdf-0.1.24.dist-info}/METADATA +22 -13
{natural_pdf-0.1.22.dist-info → natural_pdf-0.1.24.dist-info}/RECORD +31 -30
{natural_pdf-0.1.22.dist-info → natural_pdf-0.1.24.dist-info}/WHEEL +0 -0
{natural_pdf-0.1.22.dist-info → natural_pdf-0.1.24.dist-info}/entry_points.txt +0 -0
{natural_pdf-0.1.22.dist-info → natural_pdf-0.1.24.dist-info}/licenses/LICENSE +0 -0
{natural_pdf-0.1.22.dist-info → natural_pdf-0.1.24.dist-info}/top_level.txt +0 -0

natural_pdf/elements/region.py CHANGED Viewed

@@ -82,7 +82,7 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
         self.end_element = None
         self.metadata: Dict[str, Any] = {}
-        self.analyses: Dict[str, Any] = {}
+        # Analysis results live under self.metadata['analysis'] via property
         # Standard attributes for all elements
         self.object_type = "region"  # For selector compatibility
@@ -115,146 +115,28 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
         **kwargs,
     ) -> "Region":
         """
-        Protected helper method to create a region in a specified direction relative to this region.
+        Region-specific wrapper around :py:meth:`DirectionalMixin._direction`.
-        Args:
-            direction: 'left', 'right', 'above', or 'below'
-            size: Size in the primary direction (width for horizontal, height for vertical)
-            cross_size: Size in the cross direction ('full' or 'element')
-            include_source: Whether to include this region's area in the result
-            until: Optional selector string to specify a boundary element
-            include_endpoint: Whether to include the boundary element found by 'until'
-            **kwargs: Additional parameters for the 'until' selector search
-        Returns:
-            Region object
-        """
-        import math  # Use math.inf for infinity
-        is_horizontal = direction in ("left", "right")
-        is_positive = direction in ("right", "below")  # right/below are positive directions
-        pixel_offset = 1  # Offset for excluding elements/endpoints
-        # 1. Determine initial boundaries based on direction and include_source
-        if is_horizontal:
-            # Initial cross-boundaries (vertical)
-            y0 = 0 if cross_size == "full" else self.top
-            y1 = self.page.height if cross_size == "full" else self.bottom
-            # Initial primary boundaries (horizontal)
-            if is_positive:  # right
-                x0_initial = self.x0 if include_source else self.x1 + pixel_offset
-                x1_initial = self.x1  # This edge moves
-            else:  # left
-                x0_initial = self.x0  # This edge moves
-                x1_initial = self.x1 if include_source else self.x0 - pixel_offset
-        else:  # Vertical
-            # Initial cross-boundaries (horizontal)
-            x0 = 0 if cross_size == "full" else self.x0
-            x1 = self.page.width if cross_size == "full" else self.x1
-            # Initial primary boundaries (vertical)
-            if is_positive:  # below
-                y0_initial = self.top if include_source else self.bottom + pixel_offset
-                y1_initial = self.bottom  # This edge moves
-            else:  # above
-                y0_initial = self.top  # This edge moves
-                y1_initial = self.bottom if include_source else self.top - pixel_offset
-        # 2. Calculate the final primary boundary, considering 'size' or page limits
-        if is_horizontal:
-            if is_positive:  # right
-                x1_final = min(
-                    self.page.width,
-                    x1_initial + (size if size is not None else (self.page.width - x1_initial)),
-                )
-                x0_final = x0_initial
-            else:  # left
-                x0_final = max(0, x0_initial - (size if size is not None else x0_initial))
-                x1_final = x1_initial
-        else:  # Vertical
-            if is_positive:  # below
-                y1_final = min(
-                    self.page.height,
-                    y1_initial + (size if size is not None else (self.page.height - y1_initial)),
-                )
-                y0_final = y0_initial
-            else:  # above
-                y0_final = max(0, y0_initial - (size if size is not None else y0_initial))
-                y1_final = y1_initial
-        # 3. Handle 'until' selector if provided
-        target = None
-        if until:
-            all_matches = self.page.find_all(until, **kwargs)
-            matches_in_direction = []
-            # Filter and sort matches based on direction
-            if direction == "above":
-                matches_in_direction = [m for m in all_matches if m.bottom <= self.top]
-                matches_in_direction.sort(key=lambda e: e.bottom, reverse=True)
-            elif direction == "below":
-                matches_in_direction = [m for m in all_matches if m.top >= self.bottom]
-                matches_in_direction.sort(key=lambda e: e.top)
-            elif direction == "left":
-                matches_in_direction = [m for m in all_matches if m.x1 <= self.x0]
-                matches_in_direction.sort(key=lambda e: e.x1, reverse=True)
-            elif direction == "right":
-                matches_in_direction = [m for m in all_matches if m.x0 >= self.x1]
-                matches_in_direction.sort(key=lambda e: e.x0)
-            if matches_in_direction:
-                target = matches_in_direction[0]
-                # Adjust the primary boundary based on the target
-                if is_horizontal:
-                    if is_positive:  # right
-                        x1_final = target.x1 if include_endpoint else target.x0 - pixel_offset
-                    else:  # left
-                        x0_final = target.x0 if include_endpoint else target.x1 + pixel_offset
-                else:  # Vertical
-                    if is_positive:  # below
-                        y1_final = target.bottom if include_endpoint else target.top - pixel_offset
-                    else:  # above
-                        y0_final = target.top if include_endpoint else target.bottom + pixel_offset
-                # Adjust cross boundaries if cross_size is 'element'
-                if cross_size == "element":
-                    if is_horizontal:  # Adjust y0, y1
-                        target_y0 = (
-                            target.top if include_endpoint else target.bottom
-                        )  # Use opposite boundary if excluding
-                        target_y1 = target.bottom if include_endpoint else target.top
-                        y0 = min(y0, target_y0)
-                        y1 = max(y1, target_y1)
-                    else:  # Adjust x0, x1
-                        target_x0 = (
-                            target.x0 if include_endpoint else target.x1
-                        )  # Use opposite boundary if excluding
-                        target_x1 = target.x1 if include_endpoint else target.x0
-                        x0 = min(x0, target_x0)
-                        x1 = max(x1, target_x1)
-        # 4. Finalize bbox coordinates
-        if is_horizontal:
-            bbox = (x0_final, y0, x1_final, y1)
-        else:
-            bbox = (x0, y0_final, x1, y1_final)
+        It performs any pre-processing required by *Region* (none currently),
+        delegates the core geometry work to the mix-in implementation via
+        ``super()``, then attaches region-level metadata before returning the
+        new :class:`Region` instance.
+        """
-        # Ensure valid coordinates (x0 <= x1, y0 <= y1)
-        final_x0 = min(bbox[0], bbox[2])
-        final_y0 = min(bbox[1], bbox[3])
-        final_x1 = max(bbox[0], bbox[2])
-        final_y1 = max(bbox[1], bbox[3])
-        final_bbox = (final_x0, final_y0, final_x1, final_y1)
+        # Delegate to the shared implementation on DirectionalMixin
+        region = super()._direction(
+            direction=direction,
+            size=size,
+            cross_size=cross_size,
+            include_source=include_source,
+            until=until,
+            include_endpoint=include_endpoint,
+            **kwargs,
+        )
-        # 5. Create and return Region
-        region = Region(self.page, final_bbox)
+        # Post-process: make sure callers can trace lineage and flags
         region.source_element = self
         region.includes_source = include_source
-        # Optionally store the boundary element if found
-        if target:
-            region.boundary_element = target
         return region
@@ -710,7 +592,7 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
         self,
         scale: float = 2.0,
         resolution: float = 150,
-        crop_only: bool = False,
+        crop: bool = False,
         include_highlights: bool = True,
         **kwargs,
     ) -> "Image.Image":
@@ -719,7 +601,7 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
         Args:
             resolution: Resolution in DPI for rendering (default: 150)
-            crop_only: If True, only crop the region without highlighting its boundaries
+            crop: If True, only crop the region without highlighting its boundaries
             include_highlights: Whether to include existing highlights (default: True)
             **kwargs: Additional parameters for page.to_image()
@@ -730,7 +612,7 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
         page_kwargs = kwargs.copy()
         effective_resolution = resolution  # Start with the provided resolution
-        if crop_only and "width" in kwargs:
+        if crop and "width" in kwargs:
             target_width = kwargs["width"]
             # Calculate what resolution is needed to make the region crop have target_width
             region_width_points = self.width  # Region width in PDF points
@@ -785,8 +667,8 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
         # Crop the image to just this region
         region_image = page_image.crop((x0, top, x1, bottom))
-        # If not crop_only, add a border to highlight the region boundaries
-        if not crop_only:
+        # If not crop, add a border to highlight the region boundaries
+        if not crop:
             from PIL import ImageDraw
             # Create a 1px border around the region
@@ -808,6 +690,7 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
         color: Optional[Union[Tuple, str]] = "blue",
         label: Optional[str] = None,
         width: Optional[int] = None,  # Add width parameter
+        crop: bool = False,  # NEW: Crop output to region bounds before legend
     ) -> "Image.Image":
         """
         Show the page with just this region highlighted temporarily.
@@ -819,6 +702,9 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
             color: Color to highlight this region (default: blue)
             label: Optional label for this region in the legend
             width: Optional width for the output image in pixels
+            crop: If True, crop the rendered image to this region's
+                        bounding box (with a small margin handled inside
+                        HighlightingService) before legends/overlays are added.
         Returns:
             PIL Image of the page with only this region highlighted
@@ -844,6 +730,9 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
             "use_color_cycling": False,  # Explicitly false for single preview
         }
+        # Determine crop bbox if requested
+        crop_bbox = self.bbox if crop else None
         # Use render_preview to show only this highlight
         return service.render_preview(
             page_index=self._page.index,
@@ -852,6 +741,7 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
             width=width,  # Pass the width parameter
             labels=labels,
             legend_position=legend_position,
+            crop_bbox=crop_bbox,
         )
     def save(
@@ -880,7 +770,7 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
         self,
         filename: str,
         resolution: float = 150,
-        crop_only: bool = False,
+        crop: bool = False,
         include_highlights: bool = True,
         **kwargs,
     ) -> "Region":
@@ -890,7 +780,7 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
         Args:
             filename: Path to save the image to
             resolution: Resolution in DPI for rendering (default: 150)
-            crop_only: If True, only crop the region without highlighting its boundaries
+            crop: If True, only crop the region without highlighting its boundaries
             include_highlights: Whether to include existing highlights (default: True)
             **kwargs: Additional parameters for page.to_image()
@@ -900,7 +790,7 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
         # Get the region image
         image = self.to_image(
             resolution=resolution,
-            crop_only=crop_only,
+            crop=crop,
             include_highlights=include_highlights,
             **kwargs,
         )
@@ -953,7 +843,7 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
         # Get the region image
         image = work_region.to_image(
-            resolution=resolution, crop_only=True, include_highlights=False
+            resolution=resolution, crop=True, include_highlights=False
         )
         if image is None:
@@ -1320,6 +1210,24 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
                 # Try lattice first, then fall back to stream if no meaningful results
                 logger.debug(f"Region {self.bbox}: Auto-detecting table extraction method...")
+                # --- NEW: Prefer already-created table_cell regions if they exist --- #
+                try:
+                    cell_regions_in_table = [
+                        c
+                        for c in self.page.find_all("region[type=table_cell]", apply_exclusions=False)
+                        if self.intersects(c)
+                    ]
+                except Exception as _cells_err:
+                    cell_regions_in_table = []  # Fallback silently
+                if cell_regions_in_table:
+                    logger.debug(
+                        f"Region {self.bbox}: Found {len(cell_regions_in_table)} pre-computed table_cell regions – using 'cells' method."
+                    )
+                    return self._extract_table_from_cells(cell_regions_in_table)
+                # --------------------------------------------------------------- #
                 try:
                     logger.debug(f"Region {self.bbox}: Trying 'lattice' method first...")
                     lattice_result = self.extract_table(
@@ -2015,19 +1923,19 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
             logger.info(
                 f"Region {self.bbox}: Removing existing OCR elements before applying new OCR."
             )
-            # Find all OCR elements in this region
-            ocr_selector = "text[source=ocr]"
-            ocr_elements = self.find_all(ocr_selector)
+            # Remove existing OCR word elements strictly inside this region
+            ocr_selector = "text[source=ocr]"
+            ocr_elements = self.find_all(ocr_selector, apply_exclusions=False)
             if ocr_elements:
+                removed_count = ocr_elements.remove()
                 logger.info(
-                    f"Region {self.bbox}: Found {len(ocr_elements)} existing OCR elements to remove."
+                    f"Region {self.bbox}: Removed {removed_count} existing OCR word elements in region before re-applying OCR."
                 )
-                # Remove these elements from their page
-                removed_count = ocr_elements.remove()
-                logger.info(f"Region {self.bbox}: Removed {removed_count} OCR elements.")
             else:
-                logger.info(f"Region {self.bbox}: No existing OCR elements found to remove.")
+                logger.info(
+                    f"Region {self.bbox}: No existing OCR word elements found within region to remove."
+                )
         ocr_mgr = self.page._parent._ocr_manager
@@ -2044,7 +1952,7 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
         # Render the page region to an image using the determined resolution
         try:
             region_image = self.to_image(
-                resolution=final_resolution, include_highlights=False, crop_only=True
+                resolution=final_resolution, include_highlights=False, crop=True
             )
             if not region_image:
                 logger.error("Failed to render region to image for OCR.")
@@ -2088,8 +1996,17 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
                 page_top = self.top + (img_top * scale_y)
                 page_x1 = self.x0 + (img_x1 * scale_x)
                 page_bottom = self.top + (img_bottom * scale_y)
+                raw_conf = result.get("confidence")
+                # Convert confidence to float unless it is None/invalid
+                try:
+                    confidence_val = float(raw_conf) if raw_conf is not None else None
+                except (TypeError, ValueError):
+                    confidence_val = None
+                text_val = result.get("text")  # May legitimately be None in detect_only mode
                 element_data = {
-                    "text": result["text"],
+                    "text": text_val,
                     "x0": page_x0,
                     "top": page_top,
                     "x1": page_x1,
@@ -2098,7 +2015,7 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
                     "height": page_bottom - page_top,
                     "object_type": "word",
                     "source": "ocr",
-                    "confidence": float(result.get("confidence", 0.0)),
+                    "confidence": confidence_val,
                     "fontname": "OCR",
                     "size": round(pdf_height) if pdf_height > 0 else 10.0,
                     "page_number": self.page.number,
@@ -2434,12 +2351,12 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
     def ask(
         self,
-        question: str,
+        question: Union[str, List[str], Tuple[str, ...]],
         min_confidence: float = 0.1,
         model: str = None,
         debug: bool = False,
         **kwargs,
-    ) -> Dict[str, Any]:
+    ) -> Union[Dict[str, Any], List[Dict[str, Any]]]:
         """
         Ask a question about the region content using document QA.
@@ -2466,7 +2383,7 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
             from natural_pdf.qa.document_qa import get_qa_engine
         except ImportError:
             logger.error(
-                "Question answering requires optional dependencies. Install with `pip install natural-pdf[core-ml]`"
+                "Question answering requires optional dependencies. Install with `pip install natural-pdf[ai]`"
             )
             return {
                 "answer": None,
@@ -2684,7 +2601,7 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
             img = self.to_image(
                 resolution=resolution,
                 include_highlights=False,  # No highlights for classification input
-                crop_only=True,  # Just the region content
+                crop=True,  # Just the region content
             )
             if img is None:
                 raise ValueError(
@@ -2964,4 +2881,114 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
         return text_element
+    # ------------------------------------------------------------------
+    # Unified analysis storage (maps to metadata["analysis"])
+    # ------------------------------------------------------------------
+    @property
+    def analyses(self) -> Dict[str, Any]:
+        if not hasattr(self, "metadata") or self.metadata is None:
+            self.metadata = {}
+        return self.metadata.setdefault("analysis", {})
+    @analyses.setter
+    def analyses(self, value: Dict[str, Any]):
+        if not hasattr(self, "metadata") or self.metadata is None:
+            self.metadata = {}
+        self.metadata["analysis"] = value
+    # ------------------------------------------------------------------
+    # New helper: build table from pre-computed table_cell regions
+    # ------------------------------------------------------------------
+    def _extract_table_from_cells(self, cell_regions: List["Region"]) -> List[List[Optional[str]]]:
+        """Construct a table (list-of-lists) from table_cell regions.
+        This assumes each cell Region has metadata.row_index / col_index as written by
+        detect_table_structure_from_lines().  If these keys are missing we will
+        fall back to sorting by geometry.
+        """
+        if not cell_regions:
+            return []
+        # Attempt to use explicit indices first
+        all_row_idxs = []
+        all_col_idxs = []
+        for cell in cell_regions:
+            try:
+                r_idx = int(cell.metadata.get("row_index"))
+                c_idx = int(cell.metadata.get("col_index"))
+                all_row_idxs.append(r_idx)
+                all_col_idxs.append(c_idx)
+            except Exception:
+                # Not all cells have indices – clear the lists so we switch to geometric sorting
+                all_row_idxs = []
+                all_col_idxs = []
+                break
+        if all_row_idxs and all_col_idxs:
+            num_rows = max(all_row_idxs) + 1
+            num_cols = max(all_col_idxs) + 1
+            # Initialise blank grid
+            table_grid: List[List[Optional[str]]] = [[None] * num_cols for _ in range(num_rows)]
+            for cell in cell_regions:
+                try:
+                    r_idx = int(cell.metadata.get("row_index"))
+                    c_idx = int(cell.metadata.get("col_index"))
+                    text_val = cell.extract_text(layout=False, apply_exclusions=False).strip()
+                    table_grid[r_idx][c_idx] = text_val if text_val else None
+                except Exception as _err:
+                    # Skip problematic cell
+                    continue
+            return table_grid
+        # ------------------------------------------------------------------
+        # Fallback: derive order purely from geometry if indices are absent
+        # ------------------------------------------------------------------
+        # Sort unique centers to define ordering
+        try:
+            import numpy as np
+        except ImportError:
+            logger.warning("NumPy required for geometric cell ordering; returning empty result.")
+            return []
+        # Build arrays of centers
+        centers = np.array([
+            [(c.x0 + c.x1) / 2.0, (c.top + c.bottom) / 2.0] for c in cell_regions
+        ])
+        xs = centers[:, 0]
+        ys = centers[:, 1]
+        # Cluster unique row Y positions and column X positions with a tolerance
+        def _cluster(vals, tol=1.0):
+            sorted_vals = np.sort(vals)
+            groups = [[sorted_vals[0]]]
+            for v in sorted_vals[1:]:
+                if abs(v - groups[-1][-1]) <= tol:
+                    groups[-1].append(v)
+                else:
+                    groups.append([v])
+            return [np.mean(g) for g in groups]
+        row_centers = _cluster(ys)
+        col_centers = _cluster(xs)
+        num_rows = len(row_centers)
+        num_cols = len(col_centers)
+        table_grid: List[List[Optional[str]]] = [[None] * num_cols for _ in range(num_rows)]
+        # Assign each cell to nearest row & col center
+        for cell, (cx, cy) in zip(cell_regions, centers):
+            row_idx = int(np.argmin([abs(cy - rc) for rc in row_centers]))
+            col_idx = int(np.argmin([abs(cx - cc) for cc in col_centers]))
+            text_val = cell.extract_text(layout=False, apply_exclusions=False).strip()
+            table_grid[row_idx][col_idx] = text_val if text_val else None
+        return table_grid

natural_pdf/elements/text.py CHANGED Viewed

@@ -151,20 +151,28 @@ class TextElement(Element):
         # Default to black
         return (0, 0, 0)
-    def extract_text(self, keep_blank_chars=True, **kwargs) -> str:
+    def extract_text(self, keep_blank_chars=True, strip: Optional[bool] = True, **kwargs) -> str:
         """
         Extract text from this element.
         Args:
-            keep_blank_chars: Whether to keep blank characters (default: True)
-            **kwargs: Additional extraction parameters
+            keep_blank_chars: Retained for API compatibility (unused).
+            strip: If True (default) remove leading/trailing whitespace. Users may
+                   pass ``strip=False`` to preserve whitespace exactly as stored.
+            **kwargs: Accepted for forward-compatibility and ignored here.
         Returns:
-            Text content
+            The text content, optionally stripped.
         """
-        # For text elements, keep_blank_chars doesn't affect anything as we're
-        # simply returning the text property. Included for API consistency.
-        return self.text
+        # Basic retrieval
+        result = self.text or ""
+        # Apply optional stripping – align with global convention where simple
+        # element extraction is stripped by default.
+        if strip:
+            result = result.strip()
+        return result
     def contains(self, substring: str, case_sensitive: bool = True) -> bool:
         """

natural_pdf/exporters/paddleocr.py CHANGED Viewed

@@ -217,7 +217,7 @@ class PaddleOCRRecognitionExporter(FinetuneExporter):
                         # Expand region, render, and save image
                         region = element.expand(self.padding)
                         img = region.to_image(
-                            resolution=self.resolution, crop_only=True, include_highlights=False
+                            resolution=self.resolution, crop=True, include_highlights=False
                         )
                         img.save(absolute_image_path, "PNG")

natural_pdf/extraction/manager.py CHANGED Viewed

@@ -126,10 +126,10 @@ class StructuredDataManager:
             )
             parsed_data = completion.choices[0].message.parsed
             return StructuredDataResult(
-                data=parsed_data, success=True, error_message=None, model=selected_model
+                data=parsed_data, success=True, error_message=None, model_used=selected_model
             )
         except Exception as e:
             logger.error(f"Extraction failed: {str(e)}")
             return StructuredDataResult(
-                data=None, success=False, error_message=str(e), model=selected_model
+                data=None, success=False, error_message=str(e), model_used=selected_model
             )

natural-pdf 0.1.22__py3-none-any.whl → 0.1.24__py3-none-any.whl

natural-pdf 0.1.22py3-none-any.whl → 0.1.24py3-none-any.whl