PyPI - docling - Versions diffs - 1.2.0__tar.gz → 1.2.1__tar.gz - Mend

docling 1.2.0tar.gz → 1.2.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

{docling-1.2.0 → docling-1.2.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: docling
-Version: 1.2.0
+Version: 1.2.1
 Summary: Docling PDF conversion package
 Home-page: https://github.com/DS4SD/docling
 License: MIT
@@ -21,6 +21,7 @@ Classifier: Programming Language :: Python :: 3.12
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Provides-Extra: easyocr
 Provides-Extra: ocr
+Requires-Dist: certifi (>=2024.7.4)
 Requires-Dist: deepsearch-glm (>=0.19.0,<1)
 Requires-Dist: docling-core (>=1.1.2,<2.0.0)
 Requires-Dist: docling-ibm-models (>=1.1.0,<2.0.0)
@@ -93,17 +94,21 @@ print(doc.export_to_markdown())  # output: "## DocLayNet: A Large Human-Annotate
 ### Convert a batch of documents
-For an example of batch-converting documents, see [convert.py](https://github.com/DS4SD/docling/blob/main/examples/convert.py).
+For an example of batch-converting documents, see [batch_convert.py](https://github.com/DS4SD/docling/blob/main/examples/batch_convert.py).
 From a local repo clone, you can run it with:
 ```
-python examples/convert.py
+python examples/batch_convert.py
 ```
 The output of the above command will be written to `./scratch`.
 ### Adjust pipeline features
+The example file [custom_convert.py](https://github.com/DS4SD/docling/blob/main/examples/custom_convert.py) contains multiple ways
+one can adjust the conversion pipeline and features.
 #### Control pipeline options
 You can control if table structure recognition or OCR should be performed by arguments passed to `DocumentConverter`:

{docling-1.2.0 → docling-1.2.1}/README.md RENAMED Viewed

@@ -56,17 +56,21 @@ print(doc.export_to_markdown())  # output: "## DocLayNet: A Large Human-Annotate
 ### Convert a batch of documents
-For an example of batch-converting documents, see [convert.py](https://github.com/DS4SD/docling/blob/main/examples/convert.py).
+For an example of batch-converting documents, see [batch_convert.py](https://github.com/DS4SD/docling/blob/main/examples/batch_convert.py).
 From a local repo clone, you can run it with:
 ```
-python examples/convert.py
+python examples/batch_convert.py
 ```
 The output of the above command will be written to `./scratch`.
 ### Adjust pipeline features
+The example file [custom_convert.py](https://github.com/DS4SD/docling/blob/main/examples/custom_convert.py) contains multiple ways
+one can adjust the conversion pipeline and features.
 #### Control pipeline options
 You can control if table structure recognition or OCR should be performed by arguments passed to `DocumentConverter`:

{docling-1.2.0 → docling-1.2.1}/docling/backend/abstract_backend.py RENAMED Viewed

@@ -35,7 +35,7 @@ class PdfPageBackend(ABC):
 class PdfDocumentBackend(ABC):
     @abstractmethod
-    def __init__(self, path_or_stream: Iterable[Union[BytesIO, Path]]):
+    def __init__(self, path_or_stream: Union[BytesIO, Path]):
         pass
     @abstractmethod

{docling-1.2.0 → docling-1.2.1}/docling/backend/docling_parse_backend.py RENAMED Viewed

@@ -146,11 +146,12 @@ class DoclingParsePageBackend(PdfPageBackend):
 class DoclingParseDocumentBackend(PdfDocumentBackend):
-    def __init__(self, path_or_stream: Iterable[Union[BytesIO, Path]]):
+    def __init__(self, path_or_stream: Union[BytesIO, Path]):
         super().__init__(path_or_stream)
         self._pdoc = pdfium.PdfDocument(path_or_stream)
         # Parsing cells with docling_parser call
-        print("PARSING WITH DOCLING PARSE")
+        if isinstance(path_or_stream, BytesIO):
+            raise NotImplemented("This backend does not support byte streams yet.")
         parser = pdf_parser()
         self._parser_doc = parser.find_cells(str(path_or_stream))

{docling-1.2.0 → docling-1.2.1}/docling/backend/pypdfium2_backend.py RENAMED Viewed

@@ -199,7 +199,7 @@ class PyPdfiumPageBackend(PdfPageBackend):
 class PyPdfiumDocumentBackend(PdfDocumentBackend):
-    def __init__(self, path_or_stream: Iterable[Union[BytesIO, Path]]):
+    def __init__(self, path_or_stream: Union[BytesIO, Path]):
         super().__init__(path_or_stream)
         self._pdoc = pdfium.PdfDocument(path_or_stream)

{docling-1.2.0 → docling-1.2.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "docling"
-version = "1.2.0"  # DO NOT EDIT, updated automatically
+version = "1.2.1"  # DO NOT EDIT, updated automatically
 description = "Docling PDF conversion package"
 authors = ["Christoph Auer <cau@zurich.ibm.com>", "Michele Dolfi <dol@zurich.ibm.com>", "Maxim Lysak <mly@zurich.ibm.com>", "Nikos Livathinos <nli@zurich.ibm.com>", "Ahmed Nassar <ahn@zurich.ibm.com>", "Peter Staar <taa@zurich.ibm.com>"]
 license = "MIT"
@@ -33,6 +33,7 @@ huggingface_hub = ">=0.23,<1"
 requests = "^2.32.3"
 easyocr = { version = "^1.7", optional = true }
 docling-parse = "^0.0.1"
+certifi = ">=2024.7.4"
 [tool.poetry.group.dev.dependencies]
 black = {extras = ["jupyter"], version = "^24.4.2"}