docling 2.25.1__tar.gz → 2.25.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. {docling-2.25.1 → docling-2.25.2}/PKG-INFO +2 -2
  2. {docling-2.25.1 → docling-2.25.2}/README.md +1 -1
  3. {docling-2.25.1 → docling-2.25.2}/docling/utils/layout_postprocessor.py +2 -1
  4. {docling-2.25.1 → docling-2.25.2}/pyproject.toml +1 -1
  5. {docling-2.25.1 → docling-2.25.2}/LICENSE +0 -0
  6. {docling-2.25.1 → docling-2.25.2}/docling/__init__.py +0 -0
  7. {docling-2.25.1 → docling-2.25.2}/docling/backend/__init__.py +0 -0
  8. {docling-2.25.1 → docling-2.25.2}/docling/backend/abstract_backend.py +0 -0
  9. {docling-2.25.1 → docling-2.25.2}/docling/backend/asciidoc_backend.py +0 -0
  10. {docling-2.25.1 → docling-2.25.2}/docling/backend/csv_backend.py +0 -0
  11. {docling-2.25.1 → docling-2.25.2}/docling/backend/docling_parse_backend.py +0 -0
  12. {docling-2.25.1 → docling-2.25.2}/docling/backend/docling_parse_v2_backend.py +0 -0
  13. {docling-2.25.1 → docling-2.25.2}/docling/backend/html_backend.py +0 -0
  14. {docling-2.25.1 → docling-2.25.2}/docling/backend/json/__init__.py +0 -0
  15. {docling-2.25.1 → docling-2.25.2}/docling/backend/json/docling_json_backend.py +0 -0
  16. {docling-2.25.1 → docling-2.25.2}/docling/backend/md_backend.py +0 -0
  17. {docling-2.25.1 → docling-2.25.2}/docling/backend/msexcel_backend.py +0 -0
  18. {docling-2.25.1 → docling-2.25.2}/docling/backend/mspowerpoint_backend.py +0 -0
  19. {docling-2.25.1 → docling-2.25.2}/docling/backend/msword_backend.py +0 -0
  20. {docling-2.25.1 → docling-2.25.2}/docling/backend/pdf_backend.py +0 -0
  21. {docling-2.25.1 → docling-2.25.2}/docling/backend/pypdfium2_backend.py +0 -0
  22. {docling-2.25.1 → docling-2.25.2}/docling/backend/xml/__init__.py +0 -0
  23. {docling-2.25.1 → docling-2.25.2}/docling/backend/xml/jats_backend.py +0 -0
  24. {docling-2.25.1 → docling-2.25.2}/docling/backend/xml/uspto_backend.py +0 -0
  25. {docling-2.25.1 → docling-2.25.2}/docling/chunking/__init__.py +0 -0
  26. {docling-2.25.1 → docling-2.25.2}/docling/cli/__init__.py +0 -0
  27. {docling-2.25.1 → docling-2.25.2}/docling/cli/main.py +0 -0
  28. {docling-2.25.1 → docling-2.25.2}/docling/cli/models.py +0 -0
  29. {docling-2.25.1 → docling-2.25.2}/docling/cli/tools.py +0 -0
  30. {docling-2.25.1 → docling-2.25.2}/docling/datamodel/__init__.py +0 -0
  31. {docling-2.25.1 → docling-2.25.2}/docling/datamodel/base_models.py +0 -0
  32. {docling-2.25.1 → docling-2.25.2}/docling/datamodel/document.py +0 -0
  33. {docling-2.25.1 → docling-2.25.2}/docling/datamodel/pipeline_options.py +0 -0
  34. {docling-2.25.1 → docling-2.25.2}/docling/datamodel/settings.py +0 -0
  35. {docling-2.25.1 → docling-2.25.2}/docling/document_converter.py +0 -0
  36. {docling-2.25.1 → docling-2.25.2}/docling/exceptions.py +0 -0
  37. {docling-2.25.1 → docling-2.25.2}/docling/models/__init__.py +0 -0
  38. {docling-2.25.1 → docling-2.25.2}/docling/models/base_model.py +0 -0
  39. {docling-2.25.1 → docling-2.25.2}/docling/models/base_ocr_model.py +0 -0
  40. {docling-2.25.1 → docling-2.25.2}/docling/models/code_formula_model.py +0 -0
  41. {docling-2.25.1 → docling-2.25.2}/docling/models/document_picture_classifier.py +0 -0
  42. {docling-2.25.1 → docling-2.25.2}/docling/models/easyocr_model.py +0 -0
  43. {docling-2.25.1 → docling-2.25.2}/docling/models/hf_vlm_model.py +0 -0
  44. {docling-2.25.1 → docling-2.25.2}/docling/models/layout_model.py +0 -0
  45. {docling-2.25.1 → docling-2.25.2}/docling/models/ocr_mac_model.py +0 -0
  46. {docling-2.25.1 → docling-2.25.2}/docling/models/page_assemble_model.py +0 -0
  47. {docling-2.25.1 → docling-2.25.2}/docling/models/page_preprocessing_model.py +0 -0
  48. {docling-2.25.1 → docling-2.25.2}/docling/models/picture_description_api_model.py +0 -0
  49. {docling-2.25.1 → docling-2.25.2}/docling/models/picture_description_base_model.py +0 -0
  50. {docling-2.25.1 → docling-2.25.2}/docling/models/picture_description_vlm_model.py +0 -0
  51. {docling-2.25.1 → docling-2.25.2}/docling/models/rapid_ocr_model.py +0 -0
  52. {docling-2.25.1 → docling-2.25.2}/docling/models/readingorder_model.py +0 -0
  53. {docling-2.25.1 → docling-2.25.2}/docling/models/table_structure_model.py +0 -0
  54. {docling-2.25.1 → docling-2.25.2}/docling/models/tesseract_ocr_cli_model.py +0 -0
  55. {docling-2.25.1 → docling-2.25.2}/docling/models/tesseract_ocr_model.py +0 -0
  56. {docling-2.25.1 → docling-2.25.2}/docling/pipeline/__init__.py +0 -0
  57. {docling-2.25.1 → docling-2.25.2}/docling/pipeline/base_pipeline.py +0 -0
  58. {docling-2.25.1 → docling-2.25.2}/docling/pipeline/simple_pipeline.py +0 -0
  59. {docling-2.25.1 → docling-2.25.2}/docling/pipeline/standard_pdf_pipeline.py +0 -0
  60. {docling-2.25.1 → docling-2.25.2}/docling/pipeline/vlm_pipeline.py +0 -0
  61. {docling-2.25.1 → docling-2.25.2}/docling/py.typed +0 -0
  62. {docling-2.25.1 → docling-2.25.2}/docling/utils/__init__.py +0 -0
  63. {docling-2.25.1 → docling-2.25.2}/docling/utils/accelerator_utils.py +0 -0
  64. {docling-2.25.1 → docling-2.25.2}/docling/utils/export.py +0 -0
  65. {docling-2.25.1 → docling-2.25.2}/docling/utils/glm_utils.py +0 -0
  66. {docling-2.25.1 → docling-2.25.2}/docling/utils/locks.py +0 -0
  67. {docling-2.25.1 → docling-2.25.2}/docling/utils/model_downloader.py +0 -0
  68. {docling-2.25.1 → docling-2.25.2}/docling/utils/ocr_utils.py +0 -0
  69. {docling-2.25.1 → docling-2.25.2}/docling/utils/profiling.py +0 -0
  70. {docling-2.25.1 → docling-2.25.2}/docling/utils/utils.py +0 -0
  71. {docling-2.25.1 → docling-2.25.2}/docling/utils/visualization.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling
3
- Version: 2.25.1
3
+ Version: 2.25.2
4
4
  Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
5
5
  Home-page: https://github.com/DS4SD/docling
6
6
  License: MIT
@@ -185,7 +185,7 @@ For individual model usage, please refer to the model licenses found in the orig
185
185
 
186
186
  Docling has been brought to you by IBM.
187
187
 
188
- [supported_formats]: https://ds4sd.github.io/docling/supported_formats/
188
+ [supported_formats]: https://ds4sd.github.io/docling/usage/supported_formats/
189
189
  [docling_document]: https://ds4sd.github.io/docling/concepts/docling_document/
190
190
  [integrations]: https://ds4sd.github.io/docling/integrations/
191
191
 
@@ -123,6 +123,6 @@ For individual model usage, please refer to the model licenses found in the orig
123
123
 
124
124
  Docling has been brought to you by IBM.
125
125
 
126
- [supported_formats]: https://ds4sd.github.io/docling/supported_formats/
126
+ [supported_formats]: https://ds4sd.github.io/docling/usage/supported_formats/
127
127
  [docling_document]: https://ds4sd.github.io/docling/concepts/docling_document/
128
128
  [integrations]: https://ds4sd.github.io/docling/integrations/
@@ -203,6 +203,7 @@ class LayoutPostprocessor:
203
203
  """Initialize processor with cells and spatial indices."""
204
204
  self.cells = cells
205
205
  self.page_size = page_size
206
+ self.all_clusters = clusters
206
207
  self.regular_clusters = [
207
208
  c for c in clusters if c.label not in self.SPECIAL_TYPES
208
209
  ]
@@ -267,7 +268,7 @@ class LayoutPostprocessor:
267
268
  # Handle orphaned cells
268
269
  unassigned = self._find_unassigned_cells(clusters)
269
270
  if unassigned:
270
- next_id = max((c.id for c in clusters), default=0) + 1
271
+ next_id = max((c.id for c in self.all_clusters), default=0) + 1
271
272
  orphan_clusters = []
272
273
  for i, cell in enumerate(unassigned):
273
274
  conf = 1.0
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "docling"
3
- version = "2.25.1" # DO NOT EDIT, updated automatically
3
+ version = "2.25.2" # DO NOT EDIT, updated automatically
4
4
  description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications."
5
5
  authors = ["Christoph Auer <cau@zurich.ibm.com>", "Michele Dolfi <dol@zurich.ibm.com>", "Maxim Lysak <mly@zurich.ibm.com>", "Nikos Livathinos <nli@zurich.ibm.com>", "Ahmed Nassar <ahn@zurich.ibm.com>", "Panos Vagenas <pva@zurich.ibm.com>", "Peter Staar <taa@zurich.ibm.com>"]
6
6
  license = "MIT"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes