PyPI - natural-pdf - Versions diffs - 0.1.31__py3-none-any.whl → 0.1.33__py3-none-any.whl - Mend

natural-pdf 0.1.31py3-none-any.whl → 0.1.33py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

natural_pdf/analyzers/__init__.py +18 -4
natural_pdf/analyzers/guides.py +2176 -0
natural_pdf/analyzers/shape_detection_mixin.py +0 -650
natural_pdf/core/element_manager.py +99 -40
natural_pdf/core/page.py +76 -3
natural_pdf/core/pdf.py +38 -3
natural_pdf/elements/collections.py +61 -0
natural_pdf/elements/region.py +270 -14
{natural_pdf-0.1.31.dist-info → natural_pdf-0.1.33.dist-info}/METADATA +1 -1
{natural_pdf-0.1.31.dist-info → natural_pdf-0.1.33.dist-info}/RECORD +14 -18
bad_pdf_analysis/analyze_10_more.py +0 -300
bad_pdf_analysis/analyze_final_10.py +0 -552
bad_pdf_analysis/analyze_specific_pages.py +0 -394
bad_pdf_analysis/analyze_specific_pages_direct.py +0 -382
tools/rtl_smoke_test.py +0 -80
{natural_pdf-0.1.31.dist-info → natural_pdf-0.1.33.dist-info}/WHEEL +0 -0
{natural_pdf-0.1.31.dist-info → natural_pdf-0.1.33.dist-info}/entry_points.txt +0 -0
{natural_pdf-0.1.31.dist-info → natural_pdf-0.1.33.dist-info}/licenses/LICENSE +0 -0
{natural_pdf-0.1.31.dist-info → natural_pdf-0.1.33.dist-info}/top_level.txt +0 -0

natural_pdf/elements/region.py CHANGED Viewed

@@ -1319,6 +1319,28 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
             table_settings.setdefault("vertical_strategy", "lines")
             table_settings.setdefault("horizontal_strategy", "lines")
+        # -------------------------------------------------------------
+        # Auto-inject tolerances when text-based strategies are requested.
+        # This must happen AFTER alias handling (so strategies are final)
+        # and BEFORE we delegate to _extract_table_* helpers.
+        # -------------------------------------------------------------
+        if "text" in (table_settings.get("vertical_strategy"), table_settings.get("horizontal_strategy")):
+            page_cfg = getattr(self.page, "_config", {})
+            # Ensure text_* tolerances passed to pdfplumber
+            if "text_x_tolerance" not in table_settings and "x_tolerance" not in table_settings:
+                if page_cfg.get("x_tolerance") is not None:
+                    table_settings["text_x_tolerance"] = page_cfg["x_tolerance"]
+            if "text_y_tolerance" not in table_settings and "y_tolerance" not in table_settings:
+                if page_cfg.get("y_tolerance") is not None:
+                    table_settings["text_y_tolerance"] = page_cfg["y_tolerance"]
+            # Snap / join tolerances (~ line spacing)
+            if "snap_tolerance" not in table_settings and "snap_x_tolerance" not in table_settings:
+                snap = max(1, round((page_cfg.get("y_tolerance", 1)) * 0.9))
+                table_settings["snap_tolerance"] = snap
+            if "join_tolerance" not in table_settings and "join_x_tolerance" not in table_settings:
+                table_settings["join_tolerance"] = table_settings["snap_tolerance"]
         logger.debug(f"Region {self.bbox}: Extracting table using method '{effective_method}'")
         # Use the selected method
@@ -1438,6 +1460,30 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
         Returns:
             List of tables, where each table is a list of rows, and each row is a list of cell values
         """
+        # Inject global PDF-level text tolerances if not explicitly present
+        pdf_cfg = getattr(self.page, "_config", getattr(self.page._parent, "_config", {}))
+        _uses_text = "text" in (
+            table_settings.get("vertical_strategy"),
+            table_settings.get("horizontal_strategy"),
+        )
+        if _uses_text and "text_x_tolerance" not in table_settings and "x_tolerance" not in table_settings:
+            x_tol = pdf_cfg.get("x_tolerance")
+            if x_tol is not None:
+                table_settings.setdefault("text_x_tolerance", x_tol)
+        if _uses_text and "text_y_tolerance" not in table_settings and "y_tolerance" not in table_settings:
+            y_tol = pdf_cfg.get("y_tolerance")
+            if y_tol is not None:
+                table_settings.setdefault("text_y_tolerance", y_tol)
+        if _uses_text and "snap_tolerance" not in table_settings and "snap_x_tolerance" not in table_settings:
+            snap = max(1, round((pdf_cfg.get("y_tolerance", 1)) * 0.9))
+            table_settings.setdefault("snap_tolerance", snap)
+        if _uses_text and "join_tolerance" not in table_settings and "join_x_tolerance" not in table_settings:
+            join = table_settings.get("snap_tolerance", 1)
+            table_settings.setdefault("join_tolerance", join)
+            table_settings.setdefault("join_x_tolerance", join)
+            table_settings.setdefault("join_y_tolerance", join)
         # Create a crop of the page for this region
         cropped = self.page._page.crop(self.bbox)
@@ -1458,6 +1504,21 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
         Returns:
             Table data as a list of rows, where each row is a list of cell values
         """
+        # Inject global PDF-level text tolerances if not explicitly present
+        pdf_cfg = getattr(self.page, "_config", getattr(self.page._parent, "_config", {}))
+        _uses_text = "text" in (
+            table_settings.get("vertical_strategy"),
+            table_settings.get("horizontal_strategy"),
+        )
+        if _uses_text and "text_x_tolerance" not in table_settings and "x_tolerance" not in table_settings:
+            x_tol = pdf_cfg.get("x_tolerance")
+            if x_tol is not None:
+                table_settings.setdefault("text_x_tolerance", x_tol)
+        if _uses_text and "text_y_tolerance" not in table_settings and "y_tolerance" not in table_settings:
+            y_tol = pdf_cfg.get("y_tolerance")
+            if y_tol is not None:
+                table_settings.setdefault("text_y_tolerance", y_tol)
         # Create a crop of the page for this region
         cropped = self.page._page.crop(self.bbox)
@@ -1943,21 +2004,45 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
         """
         Apply OCR to this region and return the created text elements.
+        This method supports two modes:
+        1. **Built-in OCR Engines** (default) – identical to previous behaviour. Pass typical
+           parameters like ``engine='easyocr'`` or ``languages=['en']`` and the method will
+           route the request through :class:`OCRManager`.
+        2. **Custom OCR Function** – pass a *callable* under the keyword ``function`` (or
+           ``ocr_function``). The callable will receive *this* Region instance and should
+           return the extracted text (``str``) or ``None``.  Internally the call is
+           delegated to :pymeth:`apply_custom_ocr` so the same logic (replacement, element
+           creation, etc.) is re-used.
+        Examples
+        ---------
+        >>> def llm_ocr(region):
+        ...     image = region.to_image(resolution=300, crop=True)
+        ...     return my_llm_client.ocr(image)
+        >>> region.apply_ocr(function=llm_ocr)
         Args:
-            replace: If True (default), removes existing OCR elements in the region
-                    before adding new ones. If False, adds new OCR elements without
-                    removing existing ones.
-            **ocr_params: Keyword arguments passed to the OCR Manager.
-                          Common parameters like `engine`, `languages`, `min_confidence`,
-                          `device`, and `resolution` (for image rendering) should be
-                          provided here. **The `languages` list must contain codes
-                          understood by the specific engine selected.** No mapping
-                          is performed. Engine-specific settings can be passed in
-                          an `options` object (e.g., `options=EasyOCROptions(...)`).
+            replace: Whether to remove existing OCR elements first (default ``True``).
+            **ocr_params: Parameters for the built-in OCR manager *or* the special
+                          ``function``/``ocr_function`` keyword to trigger custom mode.
+        Returns
+        -------
+            Self – for chaining.
+        """
+        # --- Custom OCR function path --------------------------------------------------
+        custom_func = ocr_params.pop("function", None) or ocr_params.pop("ocr_function", None)
+        if callable(custom_func):
+            # Delegate to the specialised helper while preserving key kwargs
+            return self.apply_custom_ocr(
+                ocr_function=custom_func,
+                source_label=ocr_params.pop("source_label", "custom-ocr"),
+                replace=replace,
+                confidence=ocr_params.pop("confidence", None),
+                add_to_page=ocr_params.pop("add_to_page", True),
+            )
-        Returns:
-            Self for method chaining.
-        """
+        # --- Original built-in OCR engine path (unchanged except docstring) ------------
         # Ensure OCRManager is available
         if not hasattr(self.page._parent, "_ocr_manager") or self.page._parent._ocr_manager is None:
             logger.error("OCRManager not available on parent PDF. Cannot apply OCR to region.")
@@ -2123,6 +2208,146 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
         logger.info(f"Region {self.bbox}: Added {len(created_elements)} elements from OCR.")
         return self
+    def apply_custom_ocr(
+        self,
+        ocr_function: Callable[["Region"], Optional[str]],
+        source_label: str = "custom-ocr",
+        replace: bool = True,
+        confidence: Optional[float] = None,
+        add_to_page: bool = True,
+    ) -> "Region":
+        """
+        Apply a custom OCR function to this region and create text elements from the results.
+        This is useful when you want to use a custom OCR method (e.g., an LLM API,
+        specialized OCR service, or any custom logic) instead of the built-in OCR engines.
+        Args:
+            ocr_function: A callable that takes a Region and returns the OCR'd text (or None).
+                          The function receives this region as its argument and should return
+                          the extracted text as a string, or None if no text was found.
+            source_label: Label to identify the source of these text elements (default: "custom-ocr").
+                          This will be set as the 'source' attribute on created elements.
+            replace: If True (default), removes existing OCR elements in this region before
+                     adding new ones. If False, adds new OCR elements alongside existing ones.
+            confidence: Optional confidence score for the OCR result (0.0-1.0).
+                        If None, defaults to 1.0 if text is returned, 0.0 if None is returned.
+            add_to_page: If True (default), adds the created text element to the page.
+                         If False, creates the element but doesn't add it to the page.
+        Returns:
+            Self for method chaining.
+        Example:
+            # Using with an LLM
+            def ocr_with_llm(region):
+                image = region.to_image(resolution=300, crop=True)
+                # Call your LLM API here
+                return llm_client.ocr(image)
+            region.apply_custom_ocr(ocr_with_llm)
+            # Using with a custom OCR service
+            def ocr_with_service(region):
+                img_bytes = region.to_image(crop=True).tobytes()
+                response = ocr_service.process(img_bytes)
+                return response.text
+            region.apply_custom_ocr(ocr_with_service, source_label="my-ocr-service")
+        """
+        # If replace is True, remove existing OCR elements in this region
+        if replace:
+            logger.info(
+                f"Region {self.bbox}: Removing existing OCR elements before applying custom OCR."
+            )
+            removed_count = 0
+            # Helper to remove a single element safely
+            def _safe_remove(elem):
+                nonlocal removed_count
+                success = False
+                if hasattr(elem, "page") and hasattr(elem.page, "_element_mgr"):
+                    etype = getattr(elem, "object_type", "word")
+                    if etype == "word":
+                        etype_key = "words"
+                    elif etype == "char":
+                        etype_key = "chars"
+                    else:
+                        etype_key = etype + "s" if not etype.endswith("s") else etype
+                    try:
+                        success = elem.page._element_mgr.remove_element(elem, etype_key)
+                    except Exception:
+                        success = False
+                if success:
+                    removed_count += 1
+            # Remove ALL OCR elements overlapping this region
+            # Remove elements with source=="ocr" (built-in OCR) or matching the source_label (previous custom OCR)
+            for word in list(self.page._element_mgr.words):
+                word_source = getattr(word, "source", "")
+                # Match built-in OCR behavior: remove elements with source "ocr" exactly
+                # Also remove elements with the same source_label to avoid duplicates
+                if (word_source == "ocr" or word_source == source_label) and self.intersects(word):
+                    _safe_remove(word)
+            # Also remove char dicts if needed (matching built-in OCR)
+            for char in list(self.page._element_mgr.chars):
+                # char can be dict or TextElement; normalize
+                char_src = char.get("source") if isinstance(char, dict) else getattr(char, "source", None)
+                if char_src == "ocr" or char_src == source_label:
+                    # Rough bbox for dicts
+                    if isinstance(char, dict):
+                        cx0, ctop, cx1, cbottom = char.get("x0", 0), char.get("top", 0), char.get("x1", 0), char.get("bottom", 0)
+                    else:
+                        cx0, ctop, cx1, cbottom = char.x0, char.top, char.x1, char.bottom
+                    # Quick overlap check
+                    if not (cx1 < self.x0 or cx0 > self.x1 or cbottom < self.top or ctop > self.bottom):
+                        _safe_remove(char)
+            if removed_count > 0:
+                logger.info(
+                    f"Region {self.bbox}: Removed {removed_count} existing OCR elements."
+                )
+        # Call the custom OCR function
+        try:
+            logger.debug(f"Region {self.bbox}: Calling custom OCR function...")
+            ocr_text = ocr_function(self)
+            if ocr_text is not None and not isinstance(ocr_text, str):
+                logger.warning(
+                    f"Custom OCR function returned non-string type ({type(ocr_text)}). "
+                    f"Converting to string."
+                )
+                ocr_text = str(ocr_text)
+        except Exception as e:
+            logger.error(
+                f"Error calling custom OCR function for region {self.bbox}: {e}",
+                exc_info=True
+            )
+            return self
+        # Create text element if we got text
+        if ocr_text is not None:
+            # Use the to_text_element method to create the element
+            text_element = self.to_text_element(
+                text_content=ocr_text,
+                source_label=source_label,
+                confidence=confidence,
+                add_to_page=add_to_page
+            )
+            logger.info(
+                f"Region {self.bbox}: Created text element with {len(ocr_text)} chars"
+                f"{' and added to page' if add_to_page else ''}"
+            )
+        else:
+            logger.debug(f"Region {self.bbox}: Custom OCR function returned None (no text found)")
+        return self
     def get_section_between(self, start_element=None, end_element=None, boundary_inclusion="both"):
         """
         Get a section between two elements within this region.
@@ -2917,6 +3142,33 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
         if not hasattr(self, "page") or self.page is None:
             raise ValueError("Region must have a valid 'page' attribute to create a TextElement.")
+        # Create character dictionaries for the text
+        char_dicts = []
+        if actual_text:
+            # Create a single character dict that spans the entire region
+            # This is a simplified approach - OCR engines typically create one per character
+            char_dict = {
+                "text": actual_text,
+                "x0": self.x0,
+                "top": self.top,
+                "x1": self.x1,
+                "bottom": self.bottom,
+                "width": self.width,
+                "height": self.height,
+                "object_type": "char",
+                "page_number": self.page.page_number,
+                "fontname": default_font_name,
+                "size": default_font_size,
+                "upright": True,
+                "direction": 1,
+                "adv": self.width,
+                "source": source_label,
+                "confidence": final_confidence,
+                "stroking_color": (0, 0, 0),
+                "non_stroking_color": (0, 0, 0),
+            }
+            char_dicts.append(char_dict)
         elem_data = {
             "text": actual_text,
             "x0": self.x0,
@@ -2936,7 +3188,7 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
             "adv": self.width,
             "source": source_label,
             "confidence": final_confidence,
-            "_char_dicts": [],
+            "_char_dicts": char_dicts,
         }
         text_element = TextElement(elem_data, self.page)
@@ -2952,6 +3204,10 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
                 logger.debug(
                     f"TextElement created from region {self.bbox} and added to page {self.page.page_number} as {add_as_type}."
                 )
+                # Also add character dictionaries to the chars collection
+                if char_dicts and object_type == "word":
+                    for char_dict in char_dicts:
+                        self.page._element_mgr.add_element(char_dict, element_type="chars")
             else:
                 page_num_str = (
                     str(self.page.page_number) if hasattr(self.page, "page_number") else "N/A"

{natural_pdf-0.1.31.dist-info → natural_pdf-0.1.33.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: natural-pdf
-Version: 0.1.31
+Version: 0.1.33
 Summary: A more intuitive interface for working with PDFs
 Author-email: Jonathan Soma <jonathan.soma@gmail.com>
 License-Expression: MIT

{natural_pdf-0.1.31.dist-info → natural_pdf-0.1.33.dist-info}/RECORD RENAMED Viewed

@@ -1,11 +1,8 @@
-bad_pdf_analysis/analyze_10_more.py,sha256=UjsTuHE1GUMoVjkX3afy3x6DfpXyfZXHgS2W1GQqUmw,11906
-bad_pdf_analysis/analyze_final_10.py,sha256=xYkIId0nF9LpWHRLDP1_nlJfJfC0b0Tu4mLu-3mim-0,25170
-bad_pdf_analysis/analyze_specific_pages.py,sha256=wzq3_ZWR28hFdT7GEkayHPYgsk20OpD476LYmy2rAEk,13725
-bad_pdf_analysis/analyze_specific_pages_direct.py,sha256=307gSNplwOtNTR9a0lEQWxlAKGeoZIcDe5z1pROKUXY,14846
 natural_pdf/__init__.py,sha256=qDFJNF8sbEDO-2WSFAxoWEM8updOUP6dB-ckya0kxfs,3275
 natural_pdf/cli.py,sha256=IXrP2lCHihr-ed-CFiDbMTnSsutQa1j1PYALOLGbpsc,4019
-natural_pdf/analyzers/__init__.py,sha256=dIXjsMqoxKmd9OOnSBzn12wvdIz7D7YNQRAnXslpJSM,142
-natural_pdf/analyzers/shape_detection_mixin.py,sha256=0a4uuKQ4Z1Ta_UVuUtX7mVhlwmXdAkoHTyC5wZyp5do,94455
+natural_pdf/analyzers/__init__.py,sha256=MQRctn4i5Q7u8pb8vQVHKEXUiVGpKyPZUECrlDH4AuU,673
+natural_pdf/analyzers/guides.py,sha256=tzyViSBDdM66mT0niwFTDIJ16UzRCZ18Iqv8wA5DYAk,90302
+natural_pdf/analyzers/shape_detection_mixin.py,sha256=q7gDM-z2t7bSTxjfV2aaW3533CySu1qsEpu4wb5Rp-I,62688
 natural_pdf/analyzers/text_options.py,sha256=qEkDaYWla0rIM_gszEOsu52q7C_dAfV81P2HLJZM2sw,3333
 natural_pdf/analyzers/text_structure.py,sha256=VfKTsTFrK877sC0grsis9jK3rrgp0Mbp13VWEbukTcs,28437
 natural_pdf/analyzers/utils.py,sha256=PYbzJzSAHZ7JsMes84WIrSbA0zkjJGs0CLvIeINsf_k,2100
@@ -28,10 +25,10 @@ natural_pdf/classification/results.py,sha256=Mcay-xLBHbYoZ8U7f4gMj2IhhH_yORNEkZH
 natural_pdf/collections/mixins.py,sha256=sj76Cn6EdBtb5f-bdAV-1qpdixX8tI4BzPccPiYLI1w,5117
 natural_pdf/collections/pdf_collection.py,sha256=HLlyakM--23ZOeHDPucoM6Tw3yUyMXm0SSoqJwxRc2E,30744
 natural_pdf/core/__init__.py,sha256=QC8H4M3KbXwMFiQORZ0pdPlzx1Ix6oKKQSS7Ib2KEaA,38
-natural_pdf/core/element_manager.py,sha256=Mn4cYqPL_2LD_GK9lf2duExaJF1qhASCKsOdAZdQb00,49821
+natural_pdf/core/element_manager.py,sha256=DbRzAKD3to5NpKc73Q-TXZIZkhx8zZtbi_UNu5K7AAU,52766
 natural_pdf/core/highlighting_service.py,sha256=WKDqRpex1yS8CWhkNitWtKhxbyRRCLu3Xsct_HTPsD4,40774
-natural_pdf/core/page.py,sha256=kQKKqsbOaNeLhW3ark6mueDS-4tsopJcGcoMmKPK6B8,125624
-natural_pdf/core/pdf.py,sha256=YfniZp54AyptzMyr7ZP8n617n4wlV28SPrajt32nNBk,74233
+natural_pdf/core/page.py,sha256=k4jezvsLqL07Raglc-rZmMnsVwBMo_A_OerklpBIejY,129477
+natural_pdf/core/pdf.py,sha256=u0ZCPuIijNecU-AJHLvqfAYVCr9h7MgUKnlEtH6RoZI,75969
 natural_pdf/describe/__init__.py,sha256=B3zjuHjFI_dFuBLgXR1Q4v7c72fVDyk84d2hs0H4KV8,561
 natural_pdf/describe/base.py,sha256=HaWlHltb-dw6ug4mfR_iBLHWxr1OdPwLaUshXRxO7gg,18462
 natural_pdf/describe/elements.py,sha256=COvKF3B_RbAxXl5ORJDubV4C5PsiuSfuzD0ufPIJTFM,12983
@@ -39,11 +36,11 @@ natural_pdf/describe/mixin.py,sha256=U0x6v8r57KQb8qC3VVo64hvhfXQWsti8vdKBM7AXnMo
 natural_pdf/describe/summary.py,sha256=7FIF3zF6bzNx-gx4pCJr2XQFKiVzOEDnWsAYQ_mr9L0,7982
 natural_pdf/elements/__init__.py,sha256=S8XeiNWJ1WcgnyYKdYV1yxQlAxCCO3FfITT8MQwNbyk,41
 natural_pdf/elements/base.py,sha256=VshU4RstdzONJFq_8UVIjT_lVOai0MwMFsSFrCN-IO8,47299
-natural_pdf/elements/collections.py,sha256=52Oac96svzm_QMJcVaItnCG9P7d6JMNiGEx9lHgDEQg,125915
+natural_pdf/elements/collections.py,sha256=1E2MSg2NNcEcoRM2rumrv_CqIdO7DgbRHYEtfw35FaQ,128457
 natural_pdf/elements/image.py,sha256=UjHNzCgDzOseQmLpkKshcxg51DPmWNIAVYxZ0TAMyUI,1423
 natural_pdf/elements/line.py,sha256=aQm4pDdlQSDAAXqrdg4AU-oTl9JCXgYuaJN0EYls6E0,4920
 natural_pdf/elements/rect.py,sha256=kiVa3e377ZnqIOXc89d9ZSY4EcmDxtccdtUw-HOQzpw,3796
-natural_pdf/elements/region.py,sha256=v1PzWvQoGHGdn7SQiPf4Oq3hIGueIfYGwcZ05ZU6XPE,127692
+natural_pdf/elements/region.py,sha256=23J5Tv7ffAgz3IBgDXPq9Ab_lLg2Sog7elFRb6nvvZE,140541
 natural_pdf/elements/text.py,sha256=kw7u2KfHtDB905YawP7Hs89kcR8XnbtpkYQGEk6LNyk,18860
 natural_pdf/export/mixin.py,sha256=L1q3MIEFWuvie4j4_EmW7GT3NerbZ1as0XMUoqTS7gM,5083
 natural_pdf/exporters/__init__.py,sha256=g1WRPCDVzceaUUsm8dchPhzdHFSjYM0NfRyc8iN0mtE,644
@@ -100,13 +97,12 @@ natural_pdf/utils/text_extraction.py,sha256=mDeN1_VevNi3RwvFe48PM5vBh-A5WeBlYgP6
 natural_pdf/utils/visualization.py,sha256=n3IZpbY5cf9LItzGavBcNyVZZrrUVxjYnmqZHYPa7NU,9386
 natural_pdf/widgets/__init__.py,sha256=QTVaUmsw__FCweFYZebwPssQxxUFUMd0wpm_cUbGZJY,181
 natural_pdf/widgets/viewer.py,sha256=2VUY1TzWMDe9I-IVNOosKZ2LaqpjLB62ftMAdk-s6_8,24952
-natural_pdf-0.1.31.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
+natural_pdf-0.1.33.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
 optimization/memory_comparison.py,sha256=XEHtjduSmzXzxnsJMvemTcq-OAlvGUBAm5wwnOXq8TY,6524
 optimization/pdf_analyzer.py,sha256=G3XWhsEqIYbohEgTqz6wzxkAnOx4MkbvbSspx577-8w,19145
 optimization/performance_analysis.py,sha256=vVlFDywEXxhJLd9n2KVVqqQnS6rwWoHV_jlogboGF2k,13784
 optimization/test_cleanup_methods.py,sha256=B_zHiJr1hI8q-tdfBoFi0Jf5lj2PURjA_6teRBGoz8o,6277
 optimization/test_memory_fix.py,sha256=CWc0OSvFfKE0-nxqJOi_HAQc0GXUPKzkQbTeJp5UqxU,6364
-tools/rtl_smoke_test.py,sha256=-ogcbvNzumJasICP0NNQHk4Zb4M1VRx0TnGkJUQC7SM,3043
 tools/bad_pdf_eval/__init__.py,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
 tools/bad_pdf_eval/analyser.py,sha256=sR31aVVmTXRHS8uwLZXlPefTH2_lskxtAzuZwlhsyOo,13391
 tools/bad_pdf_eval/collate_summaries.py,sha256=Mcmf1OvVn0S0efj5ypk0syXKSrfUf6L5dowoGvOTgjU,5047
@@ -115,8 +111,8 @@ tools/bad_pdf_eval/export_enrichment_csv.py,sha256=SMEm9WxFUN_RIf8AGfZfjGEmvBvrO
 tools/bad_pdf_eval/llm_enrich.py,sha256=PsFMymPc8BNck21T3vupTN18pLdum-A_OLoJEKr6f80,12234
 tools/bad_pdf_eval/reporter.py,sha256=LIhcguDZ5XKgb0WeJsyA7m0kcliebOohzveShvt_KmY,400
 tools/bad_pdf_eval/utils.py,sha256=FuxaPX6f26IjQXu1vP0a2i9h1jgJNbASb8mRyj5-elE,4849
-natural_pdf-0.1.31.dist-info/METADATA,sha256=tqimu2ZReyYu5pS0PsbCo-Z9fIzkpMj1ljGPNbaOFss,6711
-natural_pdf-0.1.31.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-natural_pdf-0.1.31.dist-info/entry_points.txt,sha256=1R_KMv7g60UBBpRqGfw7bppsMNGdayR-iJlb9ohEk_8,81
-natural_pdf-0.1.31.dist-info/top_level.txt,sha256=oZlRzSc3nZ9sV3L6kD_Di734Pp62ANrm46imFVa51qQ,58
-natural_pdf-0.1.31.dist-info/RECORD,,
+natural_pdf-0.1.33.dist-info/METADATA,sha256=mSAwh3vuD9aRvO_AC_XBZG5sw9SeiuidC86a7kuV--I,6711
+natural_pdf-0.1.33.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+natural_pdf-0.1.33.dist-info/entry_points.txt,sha256=1R_KMv7g60UBBpRqGfw7bppsMNGdayR-iJlb9ohEk_8,81
+natural_pdf-0.1.33.dist-info/top_level.txt,sha256=oZlRzSc3nZ9sV3L6kD_Di734Pp62ANrm46imFVa51qQ,58
+natural_pdf-0.1.33.dist-info/RECORD,,

natural-pdf 0.1.31__py3-none-any.whl → 0.1.33__py3-none-any.whl

natural-pdf 0.1.31py3-none-any.whl → 0.1.33py3-none-any.whl