PyPI - natural-pdf - Versions diffs - 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

natural-pdf 0.1.3py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

natural_pdf/collections/pdf_collection.py CHANGED Viewed

@@ -215,8 +215,8 @@ class PDFCollection(SearchableMixin): # Inherit from the mixin
          """Returns the list of PDF objects held by the collection."""
          return self._pdfs
-    # --- Other Methods (e.g., apply_ocr_to_pages - could leverage service in future?) ---
-    def apply_ocr_to_pages(self, *args, **kwargs):
+    # --- Other Methods (e.g., apply_ocr - could leverage service in future?) ---
+    def apply_ocr(self, *args, **kwargs):
         PDF = self._get_pdf_class()
         # Delegate to individual PDF objects
         logger.info("Applying OCR to relevant PDFs in collection...")
@@ -225,8 +225,8 @@ class PDFCollection(SearchableMixin): # Inherit from the mixin
              # We need to figure out which pages belong to which PDF if batching here
              # For now, simpler to call on each PDF
              try:
-                 # Assume apply_ocr_to_pages exists on PDF and accepts similar args
-                 pdf.apply_ocr_to_pages(*args, **kwargs)
+                 # Assume apply_ocr exists on PDF and accepts similar args
+                 pdf.apply_ocr(*args, **kwargs)
              except Exception as e:
                  logger.error(f"Failed applying OCR to {pdf.path}: {e}", exc_info=True)
         return self

natural_pdf/core/page.py CHANGED Viewed

@@ -1074,19 +1074,19 @@ class Page:
         device: Optional[str] = None,
     ) -> List[TextElement]:
         """
-        Apply OCR to THIS page and add results to page elements via PDF.apply_ocr_to_pages.
+        Apply OCR to THIS page and add results to page elements via PDF.apply_ocr.
         Returns:
             List of created TextElements derived from OCR results for this page.
         """
-        if not hasattr(self._parent, 'apply_ocr_to_pages'):
-             logger.error(f"Page {self.number}: Parent PDF missing 'apply_ocr_to_pages'. Cannot apply OCR.")
+        if not hasattr(self._parent, 'apply_ocr'):
+             logger.error(f"Page {self.number}: Parent PDF missing 'apply_ocr'. Cannot apply OCR.")
              return []
-        logger.info(f"Page {self.number}: Delegating apply_ocr to PDF.apply_ocr_to_pages.")
+        logger.info(f"Page {self.number}: Delegating apply_ocr to PDF.apply_ocr.")
         try:
             # Delegate to parent PDF, targeting only this page's index
-            self._parent.apply_ocr_to_pages(
+            self._parent.apply_ocr(
                 pages=[self.index],
                 engine=engine, options=options, languages=languages,
                 min_confidence=min_confidence, device=device

natural_pdf/core/pdf.py CHANGED Viewed

@@ -198,7 +198,7 @@ class PDF:
         return self
-    def apply_ocr_to_pages(
+    def apply_ocr(
         self,
         pages: Optional[Union[Iterable[int], range, slice]] = None,
         engine: Optional[str] = None,
@@ -598,7 +598,7 @@ class PDF:
         Requires optional dependencies. Install with: pip install "natural-pdf[ocr-save]"
         Note: OCR must have been applied to the pages beforehand
-              (e.g., using pdf.apply_ocr_to_pages()).
+              (e.g., using pdf.apply_ocr()).
         Args:
             output_path: Path to save the searchable PDF.
@@ -614,7 +614,7 @@ class PDF:
         # For now, we pass through and let the exporter handle pages without OCR elements.
         # if not any(page.get_elements(source='ocr') for page in self.pages):
         #      logger.warning("No OCR elements found on pages. "
-        #                   "Ensure apply_ocr_to_pages() was called. "
+        #                   "Ensure apply_ocr() was called. "
         #                   "Output PDF might not be searchable.")
         # Convert pathlib.Path to string if necessary

natural_pdf/elements/collections.py CHANGED Viewed

@@ -1009,8 +1009,7 @@ class PageCollection(Generic[P]):
         """
         Applies OCR to all pages within this collection using batch processing.
-        This delegates the work to the parent PDF object's `apply_ocr_to_pages`
-        method for efficiency. The OCR results (TextElements) are added directly
+        This delegates the work to the parent PDF object's `apply_ocr` method for efficiency. The OCR results (TextElements) are added directly
         to the respective Page objects within this collection.
         Args:
@@ -1028,8 +1027,8 @@ class PageCollection(Generic[P]):
         Raises:
             RuntimeError: If pages in the collection lack a parent PDF object
                           or if the parent PDF object lacks the required
-                          `apply_ocr_to_pages` method.
-            (Propagates exceptions from PDF.apply_ocr_to_pages)
+                          `apply_ocr` method.
+            (Propagates exceptions from PDF.apply_ocr)
         """
         if not self.pages:
             logger.warning("Cannot apply OCR to an empty PageCollection.")
@@ -1042,16 +1041,17 @@ class PageCollection(Generic[P]):
         parent_pdf = first_page._parent
-        if not hasattr(parent_pdf, 'apply_ocr_to_pages') or not callable(parent_pdf.apply_ocr_to_pages):
-             raise RuntimeError("Parent PDF object does not have the required 'apply_ocr_to_pages' method.")
+        # Updated check for renamed method
+        if not hasattr(parent_pdf, 'apply_ocr') or not callable(parent_pdf.apply_ocr):
+             raise RuntimeError("Parent PDF object does not have the required 'apply_ocr' method.")
         # Get the 0-based indices of the pages in this collection
         page_indices = [p.index for p in self.pages]
         logger.info(f"Applying OCR via parent PDF to page indices: {page_indices} in collection.")
-        # Delegate the batch call to the parent PDF object
-        parent_pdf.apply_ocr_to_pages(
+        # Delegate the batch call to the parent PDF object (using renamed method)
+        parent_pdf.apply_ocr(
             pages=page_indices,
             engine=engine,
             options=options,

{natural_pdf-0.1.3.dist-info → natural_pdf-0.1.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: natural-pdf
-Version: 0.1.3
+Version: 0.1.4
 Summary: A more intuitive interface for working with PDFs
 Author-email: Jonathan Soma <jonathan.soma@gmail.com>
 License-Expression: MIT
@@ -8,24 +8,24 @@ Project-URL: Homepage, https://github.com/jsoma/natural-pdf
 Project-URL: Repository, https://github.com/jsoma/natural-pdf
 Classifier: Programming Language :: Python :: 3
 Classifier: Operating System :: OS Independent
-Requires-Python: >=3.7
+Requires-Python: >=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: pdfplumber>=0.7.0
-Requires-Dist: Pillow>=8.0.0
-Requires-Dist: colour>=0.1.5
-Requires-Dist: numpy>=1.20.0
-Requires-Dist: urllib3>=1.26.0
-Requires-Dist: torch>=2.0.0
-Requires-Dist: torchvision>=0.15.0
-Requires-Dist: transformers>=4.30.0
-Requires-Dist: huggingface_hub>=0.19.0
-Requires-Dist: ocrmypdf>=16.0.0
-Requires-Dist: pikepdf>=10.0.0
+Requires-Dist: pdfplumber
+Requires-Dist: Pillow
+Requires-Dist: colour
+Requires-Dist: numpy
+Requires-Dist: urllib3
+Requires-Dist: torch
+Requires-Dist: torchvision
+Requires-Dist: transformers
+Requires-Dist: huggingface_hub
+Requires-Dist: ocrmypdf
+Requires-Dist: pikepdf
 Provides-Extra: interactive
 Requires-Dist: ipywidgets<9.0.0,>=7.0.0; extra == "interactive"
 Provides-Extra: haystack
-Requires-Dist: haystack-ai>=2.0.0b5; extra == "haystack"
+Requires-Dist: haystack-ai; extra == "haystack"
 Requires-Dist: chroma-haystack; extra == "haystack"
 Requires-Dist: sentence-transformers; extra == "haystack"
 Provides-Extra: easyocr
@@ -45,7 +45,7 @@ Requires-Dist: paddlepaddle; extra == "all"
 Requires-Dist: paddleocr; extra == "all"
 Requires-Dist: doclayout_yolo; extra == "all"
 Requires-Dist: surya-ocr; extra == "all"
-Requires-Dist: haystack-ai>=2.0.0b5; extra == "all"
+Requires-Dist: haystack-ai; extra == "all"
 Requires-Dist: chroma-haystack; extra == "all"
 Requires-Dist: sentence-transformers; extra == "all"
 Dynamic: license-file

{natural_pdf-0.1.3.dist-info → natural_pdf-0.1.4.dist-info}/RECORD RENAMED Viewed

@@ -13,15 +13,15 @@ natural_pdf/analyzers/layout/paddle.py,sha256=QCasH_Z9UITX6wRGlE_HjmwkBuANz9Yyw5
 natural_pdf/analyzers/layout/surya.py,sha256=Ibwo42TioJ-BZP3-2T13KCtH3kLSWQh7C9ZYuk1kUQo,12657
 natural_pdf/analyzers/layout/tatr.py,sha256=H0Xygk9jA46-vlPleoal94cuDyz-LHTSxVb3e6gpmV8,11956
 natural_pdf/analyzers/layout/yolo.py,sha256=NSQK3TcS1qN8D2MDxCvcwTpS_kvzGy3I2LepJDUceoQ,7699
-natural_pdf/collections/pdf_collection.py,sha256=Da8saWBTguxk16pNzMxCrFwatrWk_qrcG0RVPQybro8,12159
+natural_pdf/collections/pdf_collection.py,sha256=LLtixKaKRzPRfZNdDQQ7HY3wyWbBcefPYvf_4Ke-FLw,12123
 natural_pdf/core/__init__.py,sha256=GUuFtj2Apc9biAdUOlnL8leL3BQncEzubvpiAUaU3ss,37
 natural_pdf/core/element_manager.py,sha256=H1896JSt48ASLSmG22xEXMY-xSKcpYsUlYmYMD48i6Q,17117
 natural_pdf/core/highlighting_service.py,sha256=a-40UMohOglYrw4klW1GuQ_p3jZOxnAfPOXPORThr4U,31476
-natural_pdf/core/page.py,sha256=qhumZqmwHoBlGodiCvYE0z34Iu1WSs32V4_Iz_Sfaow,69350
-natural_pdf/core/pdf.py,sha256=MLN-asJ_d5spmCjLz7SDp74t__vioszfKEFooBul7nU,41167
+natural_pdf/core/page.py,sha256=7LSqJbGHhpKQliAdcy7aRQzkr8sO9jUP68bzy7uH54U,69305
+natural_pdf/core/pdf.py,sha256=ALCO7YB_oaMtGZpS6JHJglrIIDbUd63sSso0oNAAP9k,41140
 natural_pdf/elements/__init__.py,sha256=6FGHZm2oONd8zErahMEawuB4AvJR5jOZPt4KtEwbj80,40
 natural_pdf/elements/base.py,sha256=9SQ-O2qbQe9Avbf9JI-p6vWlyThZVch-p1yqXWSrBHw,35750
-natural_pdf/elements/collections.py,sha256=RJf4cBZeLfCtfS0-SjzYFRCtbzYjWsgk3LrcTwJAYMs,62392
+natural_pdf/elements/collections.py,sha256=G6H-6VtCWq_KW-A0y9XhyHLOIWxz-1vHByfC6dq8lmU,62387
 natural_pdf/elements/line.py,sha256=QvVdhf_K6rwJkq3q67JmgdZpDhrBgWuSMF-Q25malP4,4783
 natural_pdf/elements/rect.py,sha256=dls9g-R213O78HvfAJMak3_eV14Zh654Zw7hqTTXxDQ,3949
 natural_pdf/elements/region.py,sha256=5dXHYbbdO1QNgkD6b6I34ezHt-SHKx_aH1ubzbfMHQs,74370
@@ -54,8 +54,8 @@ natural_pdf/utils/visualization.py,sha256=14BM-K4ovDqHniNbxbP_y9KaEYNlkbpELGAv9_
 natural_pdf/widgets/__init__.py,sha256=qckw3DjdVTsASPLJ8uUrGKg3MFhvzHndUpeNGlqwg6A,215
 natural_pdf/widgets/viewer.py,sha256=h_amj_uvf-vRqEsFg4P00fgKxawLAd9jjC1ohUza4BY,37479
 natural_pdf/widgets/frontend/viewer.js,sha256=w8ywfz_IOAAv2nP_qaf2VBUkF1KhjT3zorhJxM1-CfU,4371
-natural_pdf-0.1.3.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
-natural_pdf-0.1.3.dist-info/METADATA,sha256=kBSb1SueOGQFw97pvHBxlJYcuNwxAB-lInLKows0BEs,5069
-natural_pdf-0.1.3.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
-natural_pdf-0.1.3.dist-info/top_level.txt,sha256=XtfS3IiR1fTjaQG9TjGDjZsB1Ih2GXQteDbJ2dXlLvQ,12
-natural_pdf-0.1.3.dist-info/RECORD,,
+natural_pdf-0.1.4.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
+natural_pdf-0.1.4.dist-info/METADATA,sha256=Qbj7uNu_w5OfHexqGGFEi1VQCELaidq670nHDArAtqE,4967
+natural_pdf-0.1.4.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+natural_pdf-0.1.4.dist-info/top_level.txt,sha256=XtfS3IiR1fTjaQG9TjGDjZsB1Ih2GXQteDbJ2dXlLvQ,12
+natural_pdf-0.1.4.dist-info/RECORD,,

{natural_pdf-0.1.3.dist-info → natural_pdf-0.1.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{natural_pdf-0.1.3.dist-info → natural_pdf-0.1.4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{natural_pdf-0.1.3.dist-info → natural_pdf-0.1.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

natural-pdf 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

natural-pdf 0.1.3py3-none-any.whl → 0.1.4py3-none-any.whl