docling 2.25.1__tar.gz → 2.25.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {docling-2.25.1 → docling-2.25.2}/PKG-INFO +2 -2
- {docling-2.25.1 → docling-2.25.2}/README.md +1 -1
- {docling-2.25.1 → docling-2.25.2}/docling/utils/layout_postprocessor.py +2 -1
- {docling-2.25.1 → docling-2.25.2}/pyproject.toml +1 -1
- {docling-2.25.1 → docling-2.25.2}/LICENSE +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/__init__.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/backend/__init__.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/backend/abstract_backend.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/backend/asciidoc_backend.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/backend/csv_backend.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/backend/docling_parse_backend.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/backend/docling_parse_v2_backend.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/backend/html_backend.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/backend/json/__init__.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/backend/json/docling_json_backend.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/backend/md_backend.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/backend/msexcel_backend.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/backend/mspowerpoint_backend.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/backend/msword_backend.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/backend/pdf_backend.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/backend/pypdfium2_backend.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/backend/xml/__init__.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/backend/xml/jats_backend.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/backend/xml/uspto_backend.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/chunking/__init__.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/cli/__init__.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/cli/main.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/cli/models.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/cli/tools.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/datamodel/__init__.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/datamodel/base_models.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/datamodel/document.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/datamodel/pipeline_options.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/datamodel/settings.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/document_converter.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/exceptions.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/models/__init__.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/models/base_model.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/models/base_ocr_model.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/models/code_formula_model.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/models/document_picture_classifier.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/models/easyocr_model.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/models/hf_vlm_model.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/models/layout_model.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/models/ocr_mac_model.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/models/page_assemble_model.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/models/page_preprocessing_model.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/models/picture_description_api_model.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/models/picture_description_base_model.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/models/picture_description_vlm_model.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/models/rapid_ocr_model.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/models/readingorder_model.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/models/table_structure_model.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/models/tesseract_ocr_cli_model.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/models/tesseract_ocr_model.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/pipeline/__init__.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/pipeline/base_pipeline.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/pipeline/simple_pipeline.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/pipeline/standard_pdf_pipeline.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/pipeline/vlm_pipeline.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/py.typed +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/utils/__init__.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/utils/accelerator_utils.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/utils/export.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/utils/glm_utils.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/utils/locks.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/utils/model_downloader.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/utils/ocr_utils.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/utils/profiling.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/utils/utils.py +0 -0
- {docling-2.25.1 → docling-2.25.2}/docling/utils/visualization.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: docling
|
3
|
-
Version: 2.25.
|
3
|
+
Version: 2.25.2
|
4
4
|
Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
|
5
5
|
Home-page: https://github.com/DS4SD/docling
|
6
6
|
License: MIT
|
@@ -185,7 +185,7 @@ For individual model usage, please refer to the model licenses found in the orig
|
|
185
185
|
|
186
186
|
Docling has been brought to you by IBM.
|
187
187
|
|
188
|
-
[supported_formats]: https://ds4sd.github.io/docling/supported_formats/
|
188
|
+
[supported_formats]: https://ds4sd.github.io/docling/usage/supported_formats/
|
189
189
|
[docling_document]: https://ds4sd.github.io/docling/concepts/docling_document/
|
190
190
|
[integrations]: https://ds4sd.github.io/docling/integrations/
|
191
191
|
|
@@ -123,6 +123,6 @@ For individual model usage, please refer to the model licenses found in the orig
|
|
123
123
|
|
124
124
|
Docling has been brought to you by IBM.
|
125
125
|
|
126
|
-
[supported_formats]: https://ds4sd.github.io/docling/supported_formats/
|
126
|
+
[supported_formats]: https://ds4sd.github.io/docling/usage/supported_formats/
|
127
127
|
[docling_document]: https://ds4sd.github.io/docling/concepts/docling_document/
|
128
128
|
[integrations]: https://ds4sd.github.io/docling/integrations/
|
@@ -203,6 +203,7 @@ class LayoutPostprocessor:
|
|
203
203
|
"""Initialize processor with cells and spatial indices."""
|
204
204
|
self.cells = cells
|
205
205
|
self.page_size = page_size
|
206
|
+
self.all_clusters = clusters
|
206
207
|
self.regular_clusters = [
|
207
208
|
c for c in clusters if c.label not in self.SPECIAL_TYPES
|
208
209
|
]
|
@@ -267,7 +268,7 @@ class LayoutPostprocessor:
|
|
267
268
|
# Handle orphaned cells
|
268
269
|
unassigned = self._find_unassigned_cells(clusters)
|
269
270
|
if unassigned:
|
270
|
-
next_id = max((c.id for c in
|
271
|
+
next_id = max((c.id for c in self.all_clusters), default=0) + 1
|
271
272
|
orphan_clusters = []
|
272
273
|
for i, cell in enumerate(unassigned):
|
273
274
|
conf = 1.0
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "docling"
|
3
|
-
version = "2.25.
|
3
|
+
version = "2.25.2" # DO NOT EDIT, updated automatically
|
4
4
|
description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications."
|
5
5
|
authors = ["Christoph Auer <cau@zurich.ibm.com>", "Michele Dolfi <dol@zurich.ibm.com>", "Maxim Lysak <mly@zurich.ibm.com>", "Nikos Livathinos <nli@zurich.ibm.com>", "Ahmed Nassar <ahn@zurich.ibm.com>", "Panos Vagenas <pva@zurich.ibm.com>", "Peter Staar <taa@zurich.ibm.com>"]
|
6
6
|
license = "MIT"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|