docling 1.16.0__tar.gz → 1.17.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {docling-1.16.0 → docling-1.17.0}/PKG-INFO +27 -6
  2. {docling-1.16.0 → docling-1.17.0}/README.md +23 -2
  3. {docling-1.16.0 → docling-1.17.0}/pyproject.toml +4 -4
  4. {docling-1.16.0 → docling-1.17.0}/LICENSE +0 -0
  5. {docling-1.16.0 → docling-1.17.0}/docling/__init__.py +0 -0
  6. {docling-1.16.0 → docling-1.17.0}/docling/backend/__init__.py +0 -0
  7. {docling-1.16.0 → docling-1.17.0}/docling/backend/abstract_backend.py +0 -0
  8. {docling-1.16.0 → docling-1.17.0}/docling/backend/docling_parse_backend.py +0 -0
  9. {docling-1.16.0 → docling-1.17.0}/docling/backend/pypdfium2_backend.py +0 -0
  10. {docling-1.16.0 → docling-1.17.0}/docling/cli/__init__.py +0 -0
  11. {docling-1.16.0 → docling-1.17.0}/docling/cli/main.py +0 -0
  12. {docling-1.16.0 → docling-1.17.0}/docling/datamodel/__init__.py +0 -0
  13. {docling-1.16.0 → docling-1.17.0}/docling/datamodel/base_models.py +0 -0
  14. {docling-1.16.0 → docling-1.17.0}/docling/datamodel/document.py +0 -0
  15. {docling-1.16.0 → docling-1.17.0}/docling/datamodel/pipeline_options.py +0 -0
  16. {docling-1.16.0 → docling-1.17.0}/docling/datamodel/settings.py +0 -0
  17. {docling-1.16.0 → docling-1.17.0}/docling/document_converter.py +0 -0
  18. {docling-1.16.0 → docling-1.17.0}/docling/models/__init__.py +0 -0
  19. {docling-1.16.0 → docling-1.17.0}/docling/models/base_ocr_model.py +0 -0
  20. {docling-1.16.0 → docling-1.17.0}/docling/models/ds_glm_model.py +0 -0
  21. {docling-1.16.0 → docling-1.17.0}/docling/models/easyocr_model.py +0 -0
  22. {docling-1.16.0 → docling-1.17.0}/docling/models/layout_model.py +0 -0
  23. {docling-1.16.0 → docling-1.17.0}/docling/models/page_assemble_model.py +0 -0
  24. {docling-1.16.0 → docling-1.17.0}/docling/models/table_structure_model.py +0 -0
  25. {docling-1.16.0 → docling-1.17.0}/docling/pipeline/__init__.py +0 -0
  26. {docling-1.16.0 → docling-1.17.0}/docling/pipeline/base_model_pipeline.py +0 -0
  27. {docling-1.16.0 → docling-1.17.0}/docling/pipeline/standard_model_pipeline.py +0 -0
  28. {docling-1.16.0 → docling-1.17.0}/docling/utils/__init__.py +0 -0
  29. {docling-1.16.0 → docling-1.17.0}/docling/utils/export.py +0 -0
  30. {docling-1.16.0 → docling-1.17.0}/docling/utils/layout_utils.py +0 -0
  31. {docling-1.16.0 → docling-1.17.0}/docling/utils/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling
3
- Version: 1.16.0
3
+ Version: 1.17.0
4
4
  Summary: Docling PDF conversion package
5
5
  Home-page: https://github.com/DS4SD/docling
6
6
  License: MIT
@@ -20,10 +20,10 @@ Classifier: Programming Language :: Python :: 3.11
20
20
  Classifier: Programming Language :: Python :: 3.12
21
21
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
22
  Requires-Dist: certifi (>=2024.7.4)
23
- Requires-Dist: deepsearch-glm (>=0.21.1,<0.22.0)
23
+ Requires-Dist: deepsearch-glm (>=0.22.0,<0.23.0)
24
24
  Requires-Dist: docling-core (>=1.6.2,<2.0.0)
25
- Requires-Dist: docling-ibm-models (>=1.2.0,<2.0.0)
26
- Requires-Dist: docling-parse (>=1.2.0,<2.0.0)
25
+ Requires-Dist: docling-ibm-models (>=1.3.1,<2.0.0)
26
+ Requires-Dist: docling-parse (>=1.4.1,<2.0.0)
27
27
  Requires-Dist: easyocr (>=1.7,<2.0)
28
28
  Requires-Dist: filetype (>=1.2.0,<2.0.0)
29
29
  Requires-Dist: huggingface_hub (>=0.23,<1)
@@ -77,8 +77,7 @@ To use Docling, simply install `docling` from your package manager, e.g. pip:
77
77
  pip install docling
78
78
  ```
79
79
 
80
- > [!NOTE]
81
- > Works on macOS and Linux environments. Windows platforms are currently not tested.
80
+ Works on macOS, Linux and Windows environments. Both x86_64 and arm64 architectures.
82
81
 
83
82
  <details>
84
83
  <summary><b>Alternative PyTorch distributions</b></summary>
@@ -251,6 +250,28 @@ results = doc_converter.convert(conv_input)
251
250
 
252
251
  You can limit the CPU threads used by Docling by setting the environment variable `OMP_NUM_THREADS` accordingly. The default setting is using 4 CPU threads.
253
252
 
253
+ ### Chunking
254
+
255
+ You can perform a hierarchy-aware chunking of a Docling document as follows:
256
+
257
+ ```python
258
+ from docling.document_converter import DocumentConverter
259
+ from docling_core.transforms.chunker import HierarchicalChunker
260
+
261
+ doc = DocumentConverter().convert_single("https://arxiv.org/pdf/2206.01062").output
262
+ chunks = list(HierarchicalChunker().chunk(doc))
263
+ # > [
264
+ # > ChunkWithMetadata(
265
+ # > path='$.main-text[0]',
266
+ # > text='DocLayNet: A Large Human-Annotated Dataset [...]',
267
+ # > page=1,
268
+ # > bbox=[107.30, 672.38, 505.19, 709.08]
269
+ # > ),
270
+ # > [...]
271
+ # > ]
272
+ ```
273
+
274
+
254
275
  ## Technical report
255
276
 
256
277
  For more details on Docling's inner workings, check out the [Docling Technical Report](https://arxiv.org/abs/2408.09869).
@@ -33,8 +33,7 @@ To use Docling, simply install `docling` from your package manager, e.g. pip:
33
33
  pip install docling
34
34
  ```
35
35
 
36
- > [!NOTE]
37
- > Works on macOS and Linux environments. Windows platforms are currently not tested.
36
+ Works on macOS, Linux and Windows environments. Both x86_64 and arm64 architectures.
38
37
 
39
38
  <details>
40
39
  <summary><b>Alternative PyTorch distributions</b></summary>
@@ -207,6 +206,28 @@ results = doc_converter.convert(conv_input)
207
206
 
208
207
  You can limit the CPU threads used by Docling by setting the environment variable `OMP_NUM_THREADS` accordingly. The default setting is using 4 CPU threads.
209
208
 
209
+ ### Chunking
210
+
211
+ You can perform a hierarchy-aware chunking of a Docling document as follows:
212
+
213
+ ```python
214
+ from docling.document_converter import DocumentConverter
215
+ from docling_core.transforms.chunker import HierarchicalChunker
216
+
217
+ doc = DocumentConverter().convert_single("https://arxiv.org/pdf/2206.01062").output
218
+ chunks = list(HierarchicalChunker().chunk(doc))
219
+ # > [
220
+ # > ChunkWithMetadata(
221
+ # > path='$.main-text[0]',
222
+ # > text='DocLayNet: A Large Human-Annotated Dataset [...]',
223
+ # > page=1,
224
+ # > bbox=[107.30, 672.38, 505.19, 709.08]
225
+ # > ),
226
+ # > [...]
227
+ # > ]
228
+ ```
229
+
230
+
210
231
  ## Technical report
211
232
 
212
233
  For more details on Docling's inner workings, check out the [Docling Technical Report](https://arxiv.org/abs/2408.09869).
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "docling"
3
- version = "1.16.0" # DO NOT EDIT, updated automatically
3
+ version = "1.17.0" # DO NOT EDIT, updated automatically
4
4
  description = "Docling PDF conversion package"
5
5
  authors = ["Christoph Auer <cau@zurich.ibm.com>", "Michele Dolfi <dol@zurich.ibm.com>", "Maxim Lysak <mly@zurich.ibm.com>", "Nikos Livathinos <nli@zurich.ibm.com>", "Ahmed Nassar <ahn@zurich.ibm.com>", "Peter Staar <taa@zurich.ibm.com>"]
6
6
  license = "MIT"
@@ -38,15 +38,15 @@ torchvision = [
38
38
  python = "^3.10"
39
39
  pydantic = "^2.0.0"
40
40
  docling-core = "^1.6.2"
41
- docling-ibm-models = "^1.2.0"
42
- deepsearch-glm = "^0.21.1"
41
+ docling-ibm-models = "^1.3.1"
42
+ deepsearch-glm = "^0.22.0"
43
43
  filetype = "^1.2.0"
44
44
  pypdfium2 = "^4.30.0"
45
45
  pydantic-settings = "^2.3.0"
46
46
  huggingface_hub = ">=0.23,<1"
47
47
  requests = "^2.32.3"
48
48
  easyocr = "^1.7"
49
- docling-parse = "^1.2.0"
49
+ docling-parse = "^1.4.1"
50
50
  certifi = ">=2024.7.4"
51
51
  rtree = "^1.3.0"
52
52
  scipy = "^1.14.1"
File without changes
File without changes
File without changes