PyPI - docling - Versions diffs - 1.16.0__tar.gz → 1.17.0__tar.gz - Mend

docling 1.16.0tar.gz → 1.17.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

{docling-1.16.0 → docling-1.17.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: docling
-Version: 1.16.0
+Version: 1.17.0
 Summary: Docling PDF conversion package
 Home-page: https://github.com/DS4SD/docling
 License: MIT
@@ -20,10 +20,10 @@ Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Dist: certifi (>=2024.7.4)
-Requires-Dist: deepsearch-glm (>=0.21.1,<0.22.0)
+Requires-Dist: deepsearch-glm (>=0.22.0,<0.23.0)
 Requires-Dist: docling-core (>=1.6.2,<2.0.0)
-Requires-Dist: docling-ibm-models (>=1.2.0,<2.0.0)
-Requires-Dist: docling-parse (>=1.2.0,<2.0.0)
+Requires-Dist: docling-ibm-models (>=1.3.1,<2.0.0)
+Requires-Dist: docling-parse (>=1.4.1,<2.0.0)
 Requires-Dist: easyocr (>=1.7,<2.0)
 Requires-Dist: filetype (>=1.2.0,<2.0.0)
 Requires-Dist: huggingface_hub (>=0.23,<1)
@@ -77,8 +77,7 @@ To use Docling, simply install `docling` from your package manager, e.g. pip:
 pip install docling
 ```
-> [!NOTE]
-> Works on macOS and Linux environments. Windows platforms are currently not tested.
+Works on macOS, Linux and Windows environments. Both x86_64 and arm64 architectures.
 <details>
   <summary><b>Alternative PyTorch distributions</b></summary>
@@ -251,6 +250,28 @@ results = doc_converter.convert(conv_input)
 You can limit the CPU threads used by Docling by setting the environment variable `OMP_NUM_THREADS` accordingly. The default setting is using 4 CPU threads.
+### Chunking
+You can perform a hierarchy-aware chunking of a Docling document as follows:
+```python
+from docling.document_converter import DocumentConverter
+from docling_core.transforms.chunker import HierarchicalChunker
+doc = DocumentConverter().convert_single("https://arxiv.org/pdf/2206.01062").output
+chunks = list(HierarchicalChunker().chunk(doc))
+# > [
+# >     ChunkWithMetadata(
+# >         path='$.main-text[0]',
+# >         text='DocLayNet: A Large Human-Annotated Dataset [...]',
+# >         page=1,
+# >         bbox=[107.30, 672.38, 505.19, 709.08]
+# >     ),
+# >     [...]
+# > ]
+```
 ## Technical report
 For more details on Docling's inner workings, check out the [Docling Technical Report](https://arxiv.org/abs/2408.09869).

{docling-1.16.0 → docling-1.17.0}/README.md RENAMED Viewed

@@ -33,8 +33,7 @@ To use Docling, simply install `docling` from your package manager, e.g. pip:
 pip install docling
 ```
-> [!NOTE]
-> Works on macOS and Linux environments. Windows platforms are currently not tested.
+Works on macOS, Linux and Windows environments. Both x86_64 and arm64 architectures.
 <details>
   <summary><b>Alternative PyTorch distributions</b></summary>
@@ -207,6 +206,28 @@ results = doc_converter.convert(conv_input)
 You can limit the CPU threads used by Docling by setting the environment variable `OMP_NUM_THREADS` accordingly. The default setting is using 4 CPU threads.
+### Chunking
+You can perform a hierarchy-aware chunking of a Docling document as follows:
+```python
+from docling.document_converter import DocumentConverter
+from docling_core.transforms.chunker import HierarchicalChunker
+doc = DocumentConverter().convert_single("https://arxiv.org/pdf/2206.01062").output
+chunks = list(HierarchicalChunker().chunk(doc))
+# > [
+# >     ChunkWithMetadata(
+# >         path='$.main-text[0]',
+# >         text='DocLayNet: A Large Human-Annotated Dataset [...]',
+# >         page=1,
+# >         bbox=[107.30, 672.38, 505.19, 709.08]
+# >     ),
+# >     [...]
+# > ]
+```
 ## Technical report
 For more details on Docling's inner workings, check out the [Docling Technical Report](https://arxiv.org/abs/2408.09869).

{docling-1.16.0 → docling-1.17.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "docling"
-version = "1.16.0"  # DO NOT EDIT, updated automatically
+version = "1.17.0"  # DO NOT EDIT, updated automatically
 description = "Docling PDF conversion package"
 authors = ["Christoph Auer <cau@zurich.ibm.com>", "Michele Dolfi <dol@zurich.ibm.com>", "Maxim Lysak <mly@zurich.ibm.com>", "Nikos Livathinos <nli@zurich.ibm.com>", "Ahmed Nassar <ahn@zurich.ibm.com>", "Peter Staar <taa@zurich.ibm.com>"]
 license = "MIT"
@@ -38,15 +38,15 @@ torchvision = [
 python = "^3.10"
 pydantic = "^2.0.0"
 docling-core = "^1.6.2"
-docling-ibm-models = "^1.2.0"
-deepsearch-glm = "^0.21.1"
+docling-ibm-models = "^1.3.1"
+deepsearch-glm = "^0.22.0"
 filetype = "^1.2.0"
 pypdfium2 = "^4.30.0"
 pydantic-settings = "^2.3.0"
 huggingface_hub = ">=0.23,<1"
 requests = "^2.32.3"
 easyocr = "^1.7"
-docling-parse = "^1.2.0"
+docling-parse = "^1.4.1"
 certifi = ">=2024.7.4"
 rtree = "^1.3.0"
 scipy = "^1.14.1"