docling 2.47.0__tar.gz → 2.47.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. {docling-2.47.0 → docling-2.47.1}/PKG-INFO +2 -2
  2. {docling-2.47.0 → docling-2.47.1}/docling/pipeline/base_pipeline.py +3 -2
  3. {docling-2.47.0 → docling-2.47.1}/docling.egg-info/PKG-INFO +2 -2
  4. {docling-2.47.0 → docling-2.47.1}/docling.egg-info/requires.txt +1 -1
  5. {docling-2.47.0 → docling-2.47.1}/pyproject.toml +2 -2
  6. {docling-2.47.0 → docling-2.47.1}/LICENSE +0 -0
  7. {docling-2.47.0 → docling-2.47.1}/README.md +0 -0
  8. {docling-2.47.0 → docling-2.47.1}/docling/__init__.py +0 -0
  9. {docling-2.47.0 → docling-2.47.1}/docling/backend/__init__.py +0 -0
  10. {docling-2.47.0 → docling-2.47.1}/docling/backend/abstract_backend.py +0 -0
  11. {docling-2.47.0 → docling-2.47.1}/docling/backend/asciidoc_backend.py +0 -0
  12. {docling-2.47.0 → docling-2.47.1}/docling/backend/csv_backend.py +0 -0
  13. {docling-2.47.0 → docling-2.47.1}/docling/backend/docling_parse_backend.py +0 -0
  14. {docling-2.47.0 → docling-2.47.1}/docling/backend/docling_parse_v2_backend.py +0 -0
  15. {docling-2.47.0 → docling-2.47.1}/docling/backend/docling_parse_v4_backend.py +0 -0
  16. {docling-2.47.0 → docling-2.47.1}/docling/backend/docx/__init__.py +0 -0
  17. {docling-2.47.0 → docling-2.47.1}/docling/backend/docx/latex/__init__.py +0 -0
  18. {docling-2.47.0 → docling-2.47.1}/docling/backend/docx/latex/latex_dict.py +0 -0
  19. {docling-2.47.0 → docling-2.47.1}/docling/backend/docx/latex/omml.py +0 -0
  20. {docling-2.47.0 → docling-2.47.1}/docling/backend/html_backend.py +0 -0
  21. {docling-2.47.0 → docling-2.47.1}/docling/backend/json/__init__.py +0 -0
  22. {docling-2.47.0 → docling-2.47.1}/docling/backend/json/docling_json_backend.py +0 -0
  23. {docling-2.47.0 → docling-2.47.1}/docling/backend/md_backend.py +0 -0
  24. {docling-2.47.0 → docling-2.47.1}/docling/backend/mets_gbs_backend.py +0 -0
  25. {docling-2.47.0 → docling-2.47.1}/docling/backend/msexcel_backend.py +0 -0
  26. {docling-2.47.0 → docling-2.47.1}/docling/backend/mspowerpoint_backend.py +0 -0
  27. {docling-2.47.0 → docling-2.47.1}/docling/backend/msword_backend.py +0 -0
  28. {docling-2.47.0 → docling-2.47.1}/docling/backend/noop_backend.py +0 -0
  29. {docling-2.47.0 → docling-2.47.1}/docling/backend/pdf_backend.py +0 -0
  30. {docling-2.47.0 → docling-2.47.1}/docling/backend/pypdfium2_backend.py +0 -0
  31. {docling-2.47.0 → docling-2.47.1}/docling/backend/xml/__init__.py +0 -0
  32. {docling-2.47.0 → docling-2.47.1}/docling/backend/xml/jats_backend.py +0 -0
  33. {docling-2.47.0 → docling-2.47.1}/docling/backend/xml/uspto_backend.py +0 -0
  34. {docling-2.47.0 → docling-2.47.1}/docling/chunking/__init__.py +0 -0
  35. {docling-2.47.0 → docling-2.47.1}/docling/cli/__init__.py +0 -0
  36. {docling-2.47.0 → docling-2.47.1}/docling/cli/main.py +0 -0
  37. {docling-2.47.0 → docling-2.47.1}/docling/cli/models.py +0 -0
  38. {docling-2.47.0 → docling-2.47.1}/docling/cli/tools.py +0 -0
  39. {docling-2.47.0 → docling-2.47.1}/docling/datamodel/__init__.py +0 -0
  40. {docling-2.47.0 → docling-2.47.1}/docling/datamodel/accelerator_options.py +0 -0
  41. {docling-2.47.0 → docling-2.47.1}/docling/datamodel/asr_model_specs.py +0 -0
  42. {docling-2.47.0 → docling-2.47.1}/docling/datamodel/base_models.py +0 -0
  43. {docling-2.47.0 → docling-2.47.1}/docling/datamodel/document.py +0 -0
  44. {docling-2.47.0 → docling-2.47.1}/docling/datamodel/layout_model_specs.py +0 -0
  45. {docling-2.47.0 → docling-2.47.1}/docling/datamodel/pipeline_options.py +0 -0
  46. {docling-2.47.0 → docling-2.47.1}/docling/datamodel/pipeline_options_asr_model.py +0 -0
  47. {docling-2.47.0 → docling-2.47.1}/docling/datamodel/pipeline_options_vlm_model.py +0 -0
  48. {docling-2.47.0 → docling-2.47.1}/docling/datamodel/settings.py +0 -0
  49. {docling-2.47.0 → docling-2.47.1}/docling/datamodel/vlm_model_specs.py +0 -0
  50. {docling-2.47.0 → docling-2.47.1}/docling/document_converter.py +0 -0
  51. {docling-2.47.0 → docling-2.47.1}/docling/exceptions.py +0 -0
  52. {docling-2.47.0 → docling-2.47.1}/docling/models/__init__.py +0 -0
  53. {docling-2.47.0 → docling-2.47.1}/docling/models/api_vlm_model.py +0 -0
  54. {docling-2.47.0 → docling-2.47.1}/docling/models/base_model.py +0 -0
  55. {docling-2.47.0 → docling-2.47.1}/docling/models/base_ocr_model.py +0 -0
  56. {docling-2.47.0 → docling-2.47.1}/docling/models/code_formula_model.py +0 -0
  57. {docling-2.47.0 → docling-2.47.1}/docling/models/document_picture_classifier.py +0 -0
  58. {docling-2.47.0 → docling-2.47.1}/docling/models/easyocr_model.py +0 -0
  59. {docling-2.47.0 → docling-2.47.1}/docling/models/factories/__init__.py +0 -0
  60. {docling-2.47.0 → docling-2.47.1}/docling/models/factories/base_factory.py +0 -0
  61. {docling-2.47.0 → docling-2.47.1}/docling/models/factories/ocr_factory.py +0 -0
  62. {docling-2.47.0 → docling-2.47.1}/docling/models/factories/picture_description_factory.py +0 -0
  63. {docling-2.47.0 → docling-2.47.1}/docling/models/layout_model.py +0 -0
  64. {docling-2.47.0 → docling-2.47.1}/docling/models/ocr_mac_model.py +0 -0
  65. {docling-2.47.0 → docling-2.47.1}/docling/models/page_assemble_model.py +0 -0
  66. {docling-2.47.0 → docling-2.47.1}/docling/models/page_preprocessing_model.py +0 -0
  67. {docling-2.47.0 → docling-2.47.1}/docling/models/picture_description_api_model.py +0 -0
  68. {docling-2.47.0 → docling-2.47.1}/docling/models/picture_description_base_model.py +0 -0
  69. {docling-2.47.0 → docling-2.47.1}/docling/models/picture_description_vlm_model.py +0 -0
  70. {docling-2.47.0 → docling-2.47.1}/docling/models/plugins/__init__.py +0 -0
  71. {docling-2.47.0 → docling-2.47.1}/docling/models/plugins/defaults.py +0 -0
  72. {docling-2.47.0 → docling-2.47.1}/docling/models/rapid_ocr_model.py +0 -0
  73. {docling-2.47.0 → docling-2.47.1}/docling/models/readingorder_model.py +0 -0
  74. {docling-2.47.0 → docling-2.47.1}/docling/models/table_structure_model.py +0 -0
  75. {docling-2.47.0 → docling-2.47.1}/docling/models/tesseract_ocr_cli_model.py +0 -0
  76. {docling-2.47.0 → docling-2.47.1}/docling/models/tesseract_ocr_model.py +0 -0
  77. {docling-2.47.0 → docling-2.47.1}/docling/models/utils/__init__.py +0 -0
  78. {docling-2.47.0 → docling-2.47.1}/docling/models/utils/hf_model_download.py +0 -0
  79. {docling-2.47.0 → docling-2.47.1}/docling/models/vlm_models_inline/__init__.py +0 -0
  80. {docling-2.47.0 → docling-2.47.1}/docling/models/vlm_models_inline/hf_transformers_model.py +0 -0
  81. {docling-2.47.0 → docling-2.47.1}/docling/models/vlm_models_inline/mlx_model.py +0 -0
  82. {docling-2.47.0 → docling-2.47.1}/docling/models/vlm_models_inline/vllm_model.py +0 -0
  83. {docling-2.47.0 → docling-2.47.1}/docling/pipeline/__init__.py +0 -0
  84. {docling-2.47.0 → docling-2.47.1}/docling/pipeline/asr_pipeline.py +0 -0
  85. {docling-2.47.0 → docling-2.47.1}/docling/pipeline/simple_pipeline.py +0 -0
  86. {docling-2.47.0 → docling-2.47.1}/docling/pipeline/standard_pdf_pipeline.py +0 -0
  87. {docling-2.47.0 → docling-2.47.1}/docling/pipeline/threaded_standard_pdf_pipeline.py +0 -0
  88. {docling-2.47.0 → docling-2.47.1}/docling/pipeline/vlm_pipeline.py +0 -0
  89. {docling-2.47.0 → docling-2.47.1}/docling/py.typed +0 -0
  90. {docling-2.47.0 → docling-2.47.1}/docling/utils/__init__.py +0 -0
  91. {docling-2.47.0 → docling-2.47.1}/docling/utils/accelerator_utils.py +0 -0
  92. {docling-2.47.0 → docling-2.47.1}/docling/utils/api_image_request.py +0 -0
  93. {docling-2.47.0 → docling-2.47.1}/docling/utils/export.py +0 -0
  94. {docling-2.47.0 → docling-2.47.1}/docling/utils/glm_utils.py +0 -0
  95. {docling-2.47.0 → docling-2.47.1}/docling/utils/layout_postprocessor.py +0 -0
  96. {docling-2.47.0 → docling-2.47.1}/docling/utils/locks.py +0 -0
  97. {docling-2.47.0 → docling-2.47.1}/docling/utils/model_downloader.py +0 -0
  98. {docling-2.47.0 → docling-2.47.1}/docling/utils/ocr_utils.py +0 -0
  99. {docling-2.47.0 → docling-2.47.1}/docling/utils/orientation.py +0 -0
  100. {docling-2.47.0 → docling-2.47.1}/docling/utils/profiling.py +0 -0
  101. {docling-2.47.0 → docling-2.47.1}/docling/utils/utils.py +0 -0
  102. {docling-2.47.0 → docling-2.47.1}/docling/utils/visualization.py +0 -0
  103. {docling-2.47.0 → docling-2.47.1}/docling.egg-info/SOURCES.txt +0 -0
  104. {docling-2.47.0 → docling-2.47.1}/docling.egg-info/dependency_links.txt +0 -0
  105. {docling-2.47.0 → docling-2.47.1}/docling.egg-info/entry_points.txt +0 -0
  106. {docling-2.47.0 → docling-2.47.1}/docling.egg-info/top_level.txt +0 -0
  107. {docling-2.47.0 → docling-2.47.1}/setup.cfg +0 -0
  108. {docling-2.47.0 → docling-2.47.1}/tests/test_asr_pipeline.py +0 -0
  109. {docling-2.47.0 → docling-2.47.1}/tests/test_backend_asciidoc.py +0 -0
  110. {docling-2.47.0 → docling-2.47.1}/tests/test_backend_csv.py +0 -0
  111. {docling-2.47.0 → docling-2.47.1}/tests/test_backend_docling_json.py +0 -0
  112. {docling-2.47.0 → docling-2.47.1}/tests/test_backend_docling_parse.py +0 -0
  113. {docling-2.47.0 → docling-2.47.1}/tests/test_backend_docling_parse_v2.py +0 -0
  114. {docling-2.47.0 → docling-2.47.1}/tests/test_backend_docling_parse_v4.py +0 -0
  115. {docling-2.47.0 → docling-2.47.1}/tests/test_backend_html.py +0 -0
  116. {docling-2.47.0 → docling-2.47.1}/tests/test_backend_jats.py +0 -0
  117. {docling-2.47.0 → docling-2.47.1}/tests/test_backend_markdown.py +0 -0
  118. {docling-2.47.0 → docling-2.47.1}/tests/test_backend_mets_gbs.py +0 -0
  119. {docling-2.47.0 → docling-2.47.1}/tests/test_backend_msexcel.py +0 -0
  120. {docling-2.47.0 → docling-2.47.1}/tests/test_backend_msword.py +0 -0
  121. {docling-2.47.0 → docling-2.47.1}/tests/test_backend_patent_uspto.py +0 -0
  122. {docling-2.47.0 → docling-2.47.1}/tests/test_backend_pdfium.py +0 -0
  123. {docling-2.47.0 → docling-2.47.1}/tests/test_backend_pptx.py +0 -0
  124. {docling-2.47.0 → docling-2.47.1}/tests/test_backend_webp.py +0 -0
  125. {docling-2.47.0 → docling-2.47.1}/tests/test_cli.py +0 -0
  126. {docling-2.47.0 → docling-2.47.1}/tests/test_code_formula.py +0 -0
  127. {docling-2.47.0 → docling-2.47.1}/tests/test_data_gen_flag.py +0 -0
  128. {docling-2.47.0 → docling-2.47.1}/tests/test_document_picture_classifier.py +0 -0
  129. {docling-2.47.0 → docling-2.47.1}/tests/test_e2e_conversion.py +0 -0
  130. {docling-2.47.0 → docling-2.47.1}/tests/test_e2e_ocr_conversion.py +0 -0
  131. {docling-2.47.0 → docling-2.47.1}/tests/test_input_doc.py +0 -0
  132. {docling-2.47.0 → docling-2.47.1}/tests/test_interfaces.py +0 -0
  133. {docling-2.47.0 → docling-2.47.1}/tests/test_invalid_input.py +0 -0
  134. {docling-2.47.0 → docling-2.47.1}/tests/test_legacy_format_transform.py +0 -0
  135. {docling-2.47.0 → docling-2.47.1}/tests/test_ocr_utils.py +0 -0
  136. {docling-2.47.0 → docling-2.47.1}/tests/test_options.py +0 -0
  137. {docling-2.47.0 → docling-2.47.1}/tests/test_settings_load.py +0 -0
  138. {docling-2.47.0 → docling-2.47.1}/tests/test_threaded_pipeline.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling
3
- Version: 2.47.0
3
+ Version: 2.47.1
4
4
  Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
5
5
  Author-email: Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Nikos Livathinos <nli@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  License-Expression: MIT
@@ -59,7 +59,7 @@ Provides-Extra: vlm
59
59
  Requires-Dist: transformers<5.0.0,>=4.46.0; extra == "vlm"
60
60
  Requires-Dist: accelerate<2.0.0,>=1.2.1; extra == "vlm"
61
61
  Requires-Dist: mlx-vlm<1.0.0,>=0.3.0; (python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "vlm"
62
- Requires-Dist: vllm<1.0.0,>=0.10.0; (python_version >= "3.10" and sys_platform == "linux") and extra == "vlm"
62
+ Requires-Dist: vllm<1.0.0,>=0.10.0; (python_version >= "3.10" and sys_platform == "linux" and platform_machine == "x86_64") and extra == "vlm"
63
63
  Provides-Extra: rapidocr
64
64
  Requires-Dist: rapidocr-onnxruntime<2.0.0,>=1.4.0; python_version < "3.13" and extra == "rapidocr"
65
65
  Requires-Dist: onnxruntime<2.0.0,>=1.7.0; extra == "rapidocr"
@@ -146,6 +146,7 @@ class PaginatedPipeline(BasePipeline): # TODO this is a bad name.
146
146
  conv_res.pages.append(Page(page_no=i))
147
147
 
148
148
  try:
149
+ total_pages_processed = 0
149
150
  # Iterate batches of pages (page_batch_size) in the doc
150
151
  for page_batch in chunkify(
151
152
  conv_res.pages, settings.perf.page_batch_size
@@ -186,9 +187,9 @@ class PaginatedPipeline(BasePipeline): # TODO this is a bad name.
186
187
  )
187
188
  conv_res.status = ConversionStatus.PARTIAL_SUCCESS
188
189
  break
189
-
190
+ total_pages_processed += len(page_batch)
190
191
  _log.debug(
191
- f"Finished converting page batch time={end_batch_time:.3f}"
192
+ f"Finished converting pages {total_pages_processed}/{len(conv_res.pages)} time={end_batch_time:.3f}"
192
193
  )
193
194
 
194
195
  except Exception as e:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling
3
- Version: 2.47.0
3
+ Version: 2.47.1
4
4
  Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
5
5
  Author-email: Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Nikos Livathinos <nli@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  License-Expression: MIT
@@ -59,7 +59,7 @@ Provides-Extra: vlm
59
59
  Requires-Dist: transformers<5.0.0,>=4.46.0; extra == "vlm"
60
60
  Requires-Dist: accelerate<2.0.0,>=1.2.1; extra == "vlm"
61
61
  Requires-Dist: mlx-vlm<1.0.0,>=0.3.0; (python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "vlm"
62
- Requires-Dist: vllm<1.0.0,>=0.10.0; (python_version >= "3.10" and sys_platform == "linux") and extra == "vlm"
62
+ Requires-Dist: vllm<1.0.0,>=0.10.0; (python_version >= "3.10" and sys_platform == "linux" and platform_machine == "x86_64") and extra == "vlm"
63
63
  Provides-Extra: rapidocr
64
64
  Requires-Dist: rapidocr-onnxruntime<2.0.0,>=1.4.0; python_version < "3.13" and extra == "rapidocr"
65
65
  Requires-Dist: onnxruntime<2.0.0,>=1.7.0; extra == "rapidocr"
@@ -49,5 +49,5 @@ accelerate<2.0.0,>=1.2.1
49
49
  [vlm:python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64"]
50
50
  mlx-vlm<1.0.0,>=0.3.0
51
51
 
52
- [vlm:python_version >= "3.10" and sys_platform == "linux"]
52
+ [vlm:python_version >= "3.10" and sys_platform == "linux" and platform_machine == "x86_64"]
53
53
  vllm<1.0.0,>=0.10.0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "docling"
3
- version = "2.47.0" # DO NOT EDIT, updated automatically
3
+ version = "2.47.1" # DO NOT EDIT, updated automatically
4
4
  description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications."
5
5
  license = "MIT"
6
6
  keywords = [
@@ -93,7 +93,7 @@ vlm = [
93
93
  'transformers (>=4.46.0,<5.0.0)',
94
94
  'accelerate (>=1.2.1,<2.0.0)',
95
95
  'mlx-vlm (>=0.3.0,<1.0.0) ; python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64"',
96
- 'vllm (>=0.10.0,<1.0.0) ; python_version >= "3.10" and sys_platform == "linux"',
96
+ 'vllm (>=0.10.0,<1.0.0) ; python_version >= "3.10" and sys_platform == "linux" and platform_machine == "x86_64"',
97
97
  ]
98
98
  rapidocr = [
99
99
  'rapidocr-onnxruntime (>=1.4.0,<2.0.0) ; python_version < "3.13"',
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes