docling 1.16.0__tar.gz → 1.16.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {docling-1.16.0 → docling-1.16.1}/PKG-INFO +24 -2
  2. {docling-1.16.0 → docling-1.16.1}/README.md +22 -0
  3. {docling-1.16.0 → docling-1.16.1}/pyproject.toml +2 -2
  4. {docling-1.16.0 → docling-1.16.1}/LICENSE +0 -0
  5. {docling-1.16.0 → docling-1.16.1}/docling/__init__.py +0 -0
  6. {docling-1.16.0 → docling-1.16.1}/docling/backend/__init__.py +0 -0
  7. {docling-1.16.0 → docling-1.16.1}/docling/backend/abstract_backend.py +0 -0
  8. {docling-1.16.0 → docling-1.16.1}/docling/backend/docling_parse_backend.py +0 -0
  9. {docling-1.16.0 → docling-1.16.1}/docling/backend/pypdfium2_backend.py +0 -0
  10. {docling-1.16.0 → docling-1.16.1}/docling/cli/__init__.py +0 -0
  11. {docling-1.16.0 → docling-1.16.1}/docling/cli/main.py +0 -0
  12. {docling-1.16.0 → docling-1.16.1}/docling/datamodel/__init__.py +0 -0
  13. {docling-1.16.0 → docling-1.16.1}/docling/datamodel/base_models.py +0 -0
  14. {docling-1.16.0 → docling-1.16.1}/docling/datamodel/document.py +0 -0
  15. {docling-1.16.0 → docling-1.16.1}/docling/datamodel/pipeline_options.py +0 -0
  16. {docling-1.16.0 → docling-1.16.1}/docling/datamodel/settings.py +0 -0
  17. {docling-1.16.0 → docling-1.16.1}/docling/document_converter.py +0 -0
  18. {docling-1.16.0 → docling-1.16.1}/docling/models/__init__.py +0 -0
  19. {docling-1.16.0 → docling-1.16.1}/docling/models/base_ocr_model.py +0 -0
  20. {docling-1.16.0 → docling-1.16.1}/docling/models/ds_glm_model.py +0 -0
  21. {docling-1.16.0 → docling-1.16.1}/docling/models/easyocr_model.py +0 -0
  22. {docling-1.16.0 → docling-1.16.1}/docling/models/layout_model.py +0 -0
  23. {docling-1.16.0 → docling-1.16.1}/docling/models/page_assemble_model.py +0 -0
  24. {docling-1.16.0 → docling-1.16.1}/docling/models/table_structure_model.py +0 -0
  25. {docling-1.16.0 → docling-1.16.1}/docling/pipeline/__init__.py +0 -0
  26. {docling-1.16.0 → docling-1.16.1}/docling/pipeline/base_model_pipeline.py +0 -0
  27. {docling-1.16.0 → docling-1.16.1}/docling/pipeline/standard_model_pipeline.py +0 -0
  28. {docling-1.16.0 → docling-1.16.1}/docling/utils/__init__.py +0 -0
  29. {docling-1.16.0 → docling-1.16.1}/docling/utils/export.py +0 -0
  30. {docling-1.16.0 → docling-1.16.1}/docling/utils/layout_utils.py +0 -0
  31. {docling-1.16.0 → docling-1.16.1}/docling/utils/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling
3
- Version: 1.16.0
3
+ Version: 1.16.1
4
4
  Summary: Docling PDF conversion package
5
5
  Home-page: https://github.com/DS4SD/docling
6
6
  License: MIT
@@ -22,7 +22,7 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
22
  Requires-Dist: certifi (>=2024.7.4)
23
23
  Requires-Dist: deepsearch-glm (>=0.21.1,<0.22.0)
24
24
  Requires-Dist: docling-core (>=1.6.2,<2.0.0)
25
- Requires-Dist: docling-ibm-models (>=1.2.0,<2.0.0)
25
+ Requires-Dist: docling-ibm-models (>=1.3.1,<2.0.0)
26
26
  Requires-Dist: docling-parse (>=1.2.0,<2.0.0)
27
27
  Requires-Dist: easyocr (>=1.7,<2.0)
28
28
  Requires-Dist: filetype (>=1.2.0,<2.0.0)
@@ -251,6 +251,28 @@ results = doc_converter.convert(conv_input)
251
251
 
252
252
  You can limit the CPU threads used by Docling by setting the environment variable `OMP_NUM_THREADS` accordingly. The default setting is using 4 CPU threads.
253
253
 
254
+ ### Chunking
255
+
256
+ You can perform a hierarchy-aware chunking of a Docling document as follows:
257
+
258
+ ```python
259
+ from docling.document_converter import DocumentConverter
260
+ from docling_core.transforms.chunker import HierarchicalChunker
261
+
262
+ doc = DocumentConverter().convert_single("https://arxiv.org/pdf/2206.01062").output
263
+ chunks = list(HierarchicalChunker().chunk(doc))
264
+ # > [
265
+ # > ChunkWithMetadata(
266
+ # > path='$.main-text[0]',
267
+ # > text='DocLayNet: A Large Human-Annotated Dataset [...]',
268
+ # > page=1,
269
+ # > bbox=[107.30, 672.38, 505.19, 709.08]
270
+ # > ),
271
+ # > [...]
272
+ # > ]
273
+ ```
274
+
275
+
254
276
  ## Technical report
255
277
 
256
278
  For more details on Docling's inner workings, check out the [Docling Technical Report](https://arxiv.org/abs/2408.09869).
@@ -207,6 +207,28 @@ results = doc_converter.convert(conv_input)
207
207
 
208
208
  You can limit the CPU threads used by Docling by setting the environment variable `OMP_NUM_THREADS` accordingly. The default setting is using 4 CPU threads.
209
209
 
210
+ ### Chunking
211
+
212
+ You can perform a hierarchy-aware chunking of a Docling document as follows:
213
+
214
+ ```python
215
+ from docling.document_converter import DocumentConverter
216
+ from docling_core.transforms.chunker import HierarchicalChunker
217
+
218
+ doc = DocumentConverter().convert_single("https://arxiv.org/pdf/2206.01062").output
219
+ chunks = list(HierarchicalChunker().chunk(doc))
220
+ # > [
221
+ # > ChunkWithMetadata(
222
+ # > path='$.main-text[0]',
223
+ # > text='DocLayNet: A Large Human-Annotated Dataset [...]',
224
+ # > page=1,
225
+ # > bbox=[107.30, 672.38, 505.19, 709.08]
226
+ # > ),
227
+ # > [...]
228
+ # > ]
229
+ ```
230
+
231
+
210
232
  ## Technical report
211
233
 
212
234
  For more details on Docling's inner workings, check out the [Docling Technical Report](https://arxiv.org/abs/2408.09869).
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "docling"
3
- version = "1.16.0" # DO NOT EDIT, updated automatically
3
+ version = "1.16.1" # DO NOT EDIT, updated automatically
4
4
  description = "Docling PDF conversion package"
5
5
  authors = ["Christoph Auer <cau@zurich.ibm.com>", "Michele Dolfi <dol@zurich.ibm.com>", "Maxim Lysak <mly@zurich.ibm.com>", "Nikos Livathinos <nli@zurich.ibm.com>", "Ahmed Nassar <ahn@zurich.ibm.com>", "Peter Staar <taa@zurich.ibm.com>"]
6
6
  license = "MIT"
@@ -38,7 +38,7 @@ torchvision = [
38
38
  python = "^3.10"
39
39
  pydantic = "^2.0.0"
40
40
  docling-core = "^1.6.2"
41
- docling-ibm-models = "^1.2.0"
41
+ docling-ibm-models = "^1.3.1"
42
42
  deepsearch-glm = "^0.21.1"
43
43
  filetype = "^1.2.0"
44
44
  pypdfium2 = "^4.30.0"
File without changes
File without changes
File without changes