docling 2.58.0__tar.gz → 2.60.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling might be problematic. Click here for more details.

Files changed (153) hide show
  1. {docling-2.58.0 → docling-2.60.0}/PKG-INFO +9 -8
  2. {docling-2.58.0 → docling-2.60.0}/docling/backend/msexcel_backend.py +6 -2
  3. {docling-2.58.0 → docling-2.60.0}/docling/backend/pypdfium2_backend.py +4 -4
  4. {docling-2.58.0 → docling-2.60.0}/docling/cli/main.py +19 -8
  5. {docling-2.58.0 → docling-2.60.0}/docling/datamodel/base_models.py +2 -0
  6. {docling-2.58.0 → docling-2.60.0}/docling/datamodel/pipeline_options.py +13 -10
  7. {docling-2.58.0 → docling-2.60.0}/docling/datamodel/pipeline_options_vlm_model.py +1 -0
  8. {docling-2.58.0 → docling-2.60.0}/docling/models/api_vlm_model.py +5 -3
  9. {docling-2.58.0 → docling-2.60.0}/docling/models/layout_model.py +4 -0
  10. {docling-2.58.0 → docling-2.60.0}/docling/models/picture_description_vlm_model.py +5 -1
  11. {docling-2.58.0 → docling-2.60.0}/docling/models/vlm_models_inline/hf_transformers_model.py +13 -3
  12. {docling-2.58.0 → docling-2.60.0}/docling/models/vlm_models_inline/mlx_model.py +9 -3
  13. {docling-2.58.0 → docling-2.60.0}/docling/models/vlm_models_inline/nuextract_transformers_model.py +13 -3
  14. {docling-2.58.0 → docling-2.60.0}/docling/models/vlm_models_inline/vllm_model.py +42 -8
  15. {docling-2.58.0 → docling-2.60.0}/docling/pipeline/asr_pipeline.py +10 -3
  16. docling-2.58.0/docling/pipeline/standard_pdf_pipeline.py → docling-2.60.0/docling/pipeline/legacy_standard_pdf_pipeline.py +2 -2
  17. docling-2.58.0/docling/pipeline/threaded_standard_pdf_pipeline.py → docling-2.60.0/docling/pipeline/standard_pdf_pipeline.py +101 -19
  18. docling-2.60.0/docling/pipeline/threaded_standard_pdf_pipeline.py +5 -0
  19. {docling-2.58.0 → docling-2.60.0}/docling/utils/api_image_request.py +17 -6
  20. {docling-2.58.0 → docling-2.60.0}/docling.egg-info/PKG-INFO +9 -8
  21. {docling-2.58.0 → docling-2.60.0}/docling.egg-info/SOURCES.txt +1 -0
  22. {docling-2.58.0 → docling-2.60.0}/docling.egg-info/requires.txt +8 -6
  23. {docling-2.58.0 → docling-2.60.0}/pyproject.toml +11 -10
  24. {docling-2.58.0 → docling-2.60.0}/tests/test_asr_pipeline.py +6 -0
  25. {docling-2.58.0 → docling-2.60.0}/tests/test_backend_msexcel.py +90 -0
  26. {docling-2.58.0 → docling-2.60.0}/tests/test_e2e_ocr_conversion.py +10 -4
  27. {docling-2.58.0 → docling-2.60.0}/tests/test_threaded_pipeline.py +23 -1
  28. {docling-2.58.0 → docling-2.60.0}/LICENSE +0 -0
  29. {docling-2.58.0 → docling-2.60.0}/README.md +0 -0
  30. {docling-2.58.0 → docling-2.60.0}/docling/__init__.py +0 -0
  31. {docling-2.58.0 → docling-2.60.0}/docling/backend/__init__.py +0 -0
  32. {docling-2.58.0 → docling-2.60.0}/docling/backend/abstract_backend.py +0 -0
  33. {docling-2.58.0 → docling-2.60.0}/docling/backend/asciidoc_backend.py +0 -0
  34. {docling-2.58.0 → docling-2.60.0}/docling/backend/csv_backend.py +0 -0
  35. {docling-2.58.0 → docling-2.60.0}/docling/backend/docling_parse_backend.py +0 -0
  36. {docling-2.58.0 → docling-2.60.0}/docling/backend/docling_parse_v2_backend.py +0 -0
  37. {docling-2.58.0 → docling-2.60.0}/docling/backend/docling_parse_v4_backend.py +0 -0
  38. {docling-2.58.0 → docling-2.60.0}/docling/backend/docx/__init__.py +0 -0
  39. {docling-2.58.0 → docling-2.60.0}/docling/backend/docx/drawingml/utils.py +0 -0
  40. {docling-2.58.0 → docling-2.60.0}/docling/backend/docx/latex/__init__.py +0 -0
  41. {docling-2.58.0 → docling-2.60.0}/docling/backend/docx/latex/latex_dict.py +0 -0
  42. {docling-2.58.0 → docling-2.60.0}/docling/backend/docx/latex/omml.py +0 -0
  43. {docling-2.58.0 → docling-2.60.0}/docling/backend/html_backend.py +0 -0
  44. {docling-2.58.0 → docling-2.60.0}/docling/backend/json/__init__.py +0 -0
  45. {docling-2.58.0 → docling-2.60.0}/docling/backend/json/docling_json_backend.py +0 -0
  46. {docling-2.58.0 → docling-2.60.0}/docling/backend/md_backend.py +0 -0
  47. {docling-2.58.0 → docling-2.60.0}/docling/backend/mets_gbs_backend.py +0 -0
  48. {docling-2.58.0 → docling-2.60.0}/docling/backend/mspowerpoint_backend.py +0 -0
  49. {docling-2.58.0 → docling-2.60.0}/docling/backend/msword_backend.py +0 -0
  50. {docling-2.58.0 → docling-2.60.0}/docling/backend/noop_backend.py +0 -0
  51. {docling-2.58.0 → docling-2.60.0}/docling/backend/pdf_backend.py +0 -0
  52. {docling-2.58.0 → docling-2.60.0}/docling/backend/webvtt_backend.py +0 -0
  53. {docling-2.58.0 → docling-2.60.0}/docling/backend/xml/__init__.py +0 -0
  54. {docling-2.58.0 → docling-2.60.0}/docling/backend/xml/jats_backend.py +0 -0
  55. {docling-2.58.0 → docling-2.60.0}/docling/backend/xml/uspto_backend.py +0 -0
  56. {docling-2.58.0 → docling-2.60.0}/docling/chunking/__init__.py +0 -0
  57. {docling-2.58.0 → docling-2.60.0}/docling/cli/__init__.py +0 -0
  58. {docling-2.58.0 → docling-2.60.0}/docling/cli/models.py +0 -0
  59. {docling-2.58.0 → docling-2.60.0}/docling/cli/tools.py +0 -0
  60. {docling-2.58.0 → docling-2.60.0}/docling/datamodel/__init__.py +0 -0
  61. {docling-2.58.0 → docling-2.60.0}/docling/datamodel/accelerator_options.py +0 -0
  62. {docling-2.58.0 → docling-2.60.0}/docling/datamodel/asr_model_specs.py +0 -0
  63. {docling-2.58.0 → docling-2.60.0}/docling/datamodel/backend_options.py +0 -0
  64. {docling-2.58.0 → docling-2.60.0}/docling/datamodel/document.py +0 -0
  65. {docling-2.58.0 → docling-2.60.0}/docling/datamodel/extraction.py +0 -0
  66. {docling-2.58.0 → docling-2.60.0}/docling/datamodel/layout_model_specs.py +0 -0
  67. {docling-2.58.0 → docling-2.60.0}/docling/datamodel/pipeline_options_asr_model.py +0 -0
  68. {docling-2.58.0 → docling-2.60.0}/docling/datamodel/settings.py +0 -0
  69. {docling-2.58.0 → docling-2.60.0}/docling/datamodel/vlm_model_specs.py +0 -0
  70. {docling-2.58.0 → docling-2.60.0}/docling/document_converter.py +0 -0
  71. {docling-2.58.0 → docling-2.60.0}/docling/document_extractor.py +0 -0
  72. {docling-2.58.0 → docling-2.60.0}/docling/exceptions.py +0 -0
  73. {docling-2.58.0 → docling-2.60.0}/docling/models/__init__.py +0 -0
  74. {docling-2.58.0 → docling-2.60.0}/docling/models/auto_ocr_model.py +0 -0
  75. {docling-2.58.0 → docling-2.60.0}/docling/models/base_model.py +0 -0
  76. {docling-2.58.0 → docling-2.60.0}/docling/models/base_ocr_model.py +0 -0
  77. {docling-2.58.0 → docling-2.60.0}/docling/models/code_formula_model.py +0 -0
  78. {docling-2.58.0 → docling-2.60.0}/docling/models/document_picture_classifier.py +0 -0
  79. {docling-2.58.0 → docling-2.60.0}/docling/models/easyocr_model.py +0 -0
  80. {docling-2.58.0 → docling-2.60.0}/docling/models/factories/__init__.py +0 -0
  81. {docling-2.58.0 → docling-2.60.0}/docling/models/factories/base_factory.py +0 -0
  82. {docling-2.58.0 → docling-2.60.0}/docling/models/factories/ocr_factory.py +0 -0
  83. {docling-2.58.0 → docling-2.60.0}/docling/models/factories/picture_description_factory.py +0 -0
  84. {docling-2.58.0 → docling-2.60.0}/docling/models/ocr_mac_model.py +0 -0
  85. {docling-2.58.0 → docling-2.60.0}/docling/models/page_assemble_model.py +0 -0
  86. {docling-2.58.0 → docling-2.60.0}/docling/models/page_preprocessing_model.py +0 -0
  87. {docling-2.58.0 → docling-2.60.0}/docling/models/picture_description_api_model.py +0 -0
  88. {docling-2.58.0 → docling-2.60.0}/docling/models/picture_description_base_model.py +0 -0
  89. {docling-2.58.0 → docling-2.60.0}/docling/models/plugins/__init__.py +0 -0
  90. {docling-2.58.0 → docling-2.60.0}/docling/models/plugins/defaults.py +0 -0
  91. {docling-2.58.0 → docling-2.60.0}/docling/models/rapid_ocr_model.py +0 -0
  92. {docling-2.58.0 → docling-2.60.0}/docling/models/readingorder_model.py +0 -0
  93. {docling-2.58.0 → docling-2.60.0}/docling/models/table_structure_model.py +0 -0
  94. {docling-2.58.0 → docling-2.60.0}/docling/models/tesseract_ocr_cli_model.py +0 -0
  95. {docling-2.58.0 → docling-2.60.0}/docling/models/tesseract_ocr_model.py +0 -0
  96. {docling-2.58.0 → docling-2.60.0}/docling/models/utils/__init__.py +0 -0
  97. {docling-2.58.0 → docling-2.60.0}/docling/models/utils/generation_utils.py +0 -0
  98. {docling-2.58.0 → docling-2.60.0}/docling/models/utils/hf_model_download.py +0 -0
  99. {docling-2.58.0 → docling-2.60.0}/docling/models/vlm_models_inline/__init__.py +0 -0
  100. {docling-2.58.0 → docling-2.60.0}/docling/pipeline/__init__.py +0 -0
  101. {docling-2.58.0 → docling-2.60.0}/docling/pipeline/base_extraction_pipeline.py +0 -0
  102. {docling-2.58.0 → docling-2.60.0}/docling/pipeline/base_pipeline.py +0 -0
  103. {docling-2.58.0 → docling-2.60.0}/docling/pipeline/extraction_vlm_pipeline.py +0 -0
  104. {docling-2.58.0 → docling-2.60.0}/docling/pipeline/simple_pipeline.py +0 -0
  105. {docling-2.58.0 → docling-2.60.0}/docling/pipeline/vlm_pipeline.py +0 -0
  106. {docling-2.58.0 → docling-2.60.0}/docling/py.typed +0 -0
  107. {docling-2.58.0 → docling-2.60.0}/docling/utils/__init__.py +0 -0
  108. {docling-2.58.0 → docling-2.60.0}/docling/utils/accelerator_utils.py +0 -0
  109. {docling-2.58.0 → docling-2.60.0}/docling/utils/export.py +0 -0
  110. {docling-2.58.0 → docling-2.60.0}/docling/utils/glm_utils.py +0 -0
  111. {docling-2.58.0 → docling-2.60.0}/docling/utils/layout_postprocessor.py +0 -0
  112. {docling-2.58.0 → docling-2.60.0}/docling/utils/locks.py +0 -0
  113. {docling-2.58.0 → docling-2.60.0}/docling/utils/model_downloader.py +0 -0
  114. {docling-2.58.0 → docling-2.60.0}/docling/utils/ocr_utils.py +0 -0
  115. {docling-2.58.0 → docling-2.60.0}/docling/utils/orientation.py +0 -0
  116. {docling-2.58.0 → docling-2.60.0}/docling/utils/profiling.py +0 -0
  117. {docling-2.58.0 → docling-2.60.0}/docling/utils/utils.py +0 -0
  118. {docling-2.58.0 → docling-2.60.0}/docling/utils/visualization.py +0 -0
  119. {docling-2.58.0 → docling-2.60.0}/docling.egg-info/dependency_links.txt +0 -0
  120. {docling-2.58.0 → docling-2.60.0}/docling.egg-info/entry_points.txt +0 -0
  121. {docling-2.58.0 → docling-2.60.0}/docling.egg-info/top_level.txt +0 -0
  122. {docling-2.58.0 → docling-2.60.0}/setup.cfg +0 -0
  123. {docling-2.58.0 → docling-2.60.0}/tests/test_asr_mlx_whisper.py +0 -0
  124. {docling-2.58.0 → docling-2.60.0}/tests/test_backend_asciidoc.py +0 -0
  125. {docling-2.58.0 → docling-2.60.0}/tests/test_backend_csv.py +0 -0
  126. {docling-2.58.0 → docling-2.60.0}/tests/test_backend_docling_json.py +0 -0
  127. {docling-2.58.0 → docling-2.60.0}/tests/test_backend_docling_parse.py +0 -0
  128. {docling-2.58.0 → docling-2.60.0}/tests/test_backend_docling_parse_v2.py +0 -0
  129. {docling-2.58.0 → docling-2.60.0}/tests/test_backend_docling_parse_v4.py +0 -0
  130. {docling-2.58.0 → docling-2.60.0}/tests/test_backend_html.py +0 -0
  131. {docling-2.58.0 → docling-2.60.0}/tests/test_backend_jats.py +0 -0
  132. {docling-2.58.0 → docling-2.60.0}/tests/test_backend_markdown.py +0 -0
  133. {docling-2.58.0 → docling-2.60.0}/tests/test_backend_mets_gbs.py +0 -0
  134. {docling-2.58.0 → docling-2.60.0}/tests/test_backend_msword.py +0 -0
  135. {docling-2.58.0 → docling-2.60.0}/tests/test_backend_patent_uspto.py +0 -0
  136. {docling-2.58.0 → docling-2.60.0}/tests/test_backend_pdfium.py +0 -0
  137. {docling-2.58.0 → docling-2.60.0}/tests/test_backend_pptx.py +0 -0
  138. {docling-2.58.0 → docling-2.60.0}/tests/test_backend_vtt.py +0 -0
  139. {docling-2.58.0 → docling-2.60.0}/tests/test_backend_webp.py +0 -0
  140. {docling-2.58.0 → docling-2.60.0}/tests/test_cli.py +0 -0
  141. {docling-2.58.0 → docling-2.60.0}/tests/test_code_formula.py +0 -0
  142. {docling-2.58.0 → docling-2.60.0}/tests/test_data_gen_flag.py +0 -0
  143. {docling-2.58.0 → docling-2.60.0}/tests/test_document_picture_classifier.py +0 -0
  144. {docling-2.58.0 → docling-2.60.0}/tests/test_e2e_conversion.py +0 -0
  145. {docling-2.58.0 → docling-2.60.0}/tests/test_extraction.py +0 -0
  146. {docling-2.58.0 → docling-2.60.0}/tests/test_input_doc.py +0 -0
  147. {docling-2.58.0 → docling-2.60.0}/tests/test_interfaces.py +0 -0
  148. {docling-2.58.0 → docling-2.60.0}/tests/test_invalid_input.py +0 -0
  149. {docling-2.58.0 → docling-2.60.0}/tests/test_legacy_format_transform.py +0 -0
  150. {docling-2.58.0 → docling-2.60.0}/tests/test_ocr_utils.py +0 -0
  151. {docling-2.58.0 → docling-2.60.0}/tests/test_options.py +0 -0
  152. {docling-2.58.0 → docling-2.60.0}/tests/test_pdf_password.py +0 -0
  153. {docling-2.58.0 → docling-2.60.0}/tests/test_settings_load.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling
3
- Version: 2.58.0
3
+ Version: 2.60.0
4
4
  Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
5
5
  Author-email: Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Nikos Livathinos <nli@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  License-Expression: MIT
@@ -22,6 +22,7 @@ Classifier: Programming Language :: Python :: 3.10
22
22
  Classifier: Programming Language :: Python :: 3.11
23
23
  Classifier: Programming Language :: Python :: 3.12
24
24
  Classifier: Programming Language :: Python :: 3.13
25
+ Classifier: Programming Language :: Python :: 3.14
25
26
  Requires-Python: <4.0,>=3.9
26
27
  Description-Content-Type: text/markdown
27
28
  License-File: LICENSE
@@ -45,7 +46,7 @@ Requires-Dist: beautifulsoup4<5.0.0,>=4.12.3
45
46
  Requires-Dist: pandas<3.0.0,>=2.1.4
46
47
  Requires-Dist: marko<3.0.0,>=2.1.2
47
48
  Requires-Dist: openpyxl<4.0.0,>=3.1.5
48
- Requires-Dist: lxml<6.0.0,>=4.0.0
49
+ Requires-Dist: lxml<7.0.0,>=4.0.0
49
50
  Requires-Dist: pillow<12.0.0,>=10.0.0
50
51
  Requires-Dist: tqdm<5.0.0,>=4.65.0
51
52
  Requires-Dist: pluggy<2.0.0,>=1.0.0
@@ -62,15 +63,15 @@ Requires-Dist: ocrmac<2.0.0,>=1.0.0; sys_platform == "darwin" and extra == "ocrm
62
63
  Provides-Extra: vlm
63
64
  Requires-Dist: transformers<5.0.0,>=4.46.0; extra == "vlm"
64
65
  Requires-Dist: accelerate<2.0.0,>=1.2.1; extra == "vlm"
65
- Requires-Dist: mlx-vlm<1.0.0,>=0.3.0; (python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "vlm"
66
- Requires-Dist: vllm<1.0.0,>=0.10.0; (python_version >= "3.10" and sys_platform == "linux" and platform_machine == "x86_64") and extra == "vlm"
66
+ Requires-Dist: mlx-vlm<1.0.0,>=0.3.0; (python_version >= "3.10" and python_version < "3.14" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "vlm"
67
+ Requires-Dist: vllm<1.0.0,>=0.10.0; (python_version >= "3.10" and python_version < "3.14" and sys_platform == "linux" and platform_machine == "x86_64") and extra == "vlm"
67
68
  Requires-Dist: qwen-vl-utils>=0.0.11; extra == "vlm"
68
69
  Provides-Extra: rapidocr
69
- Requires-Dist: rapidocr<4.0.0,>=3.3; python_version < "3.14" and extra == "rapidocr"
70
- Requires-Dist: onnxruntime<2.0.0,>=1.7.0; extra == "rapidocr"
70
+ Requires-Dist: rapidocr<4.0.0,>=3.3; extra == "rapidocr"
71
+ Requires-Dist: onnxruntime<2.0.0,>=1.7.0; python_version < "3.14" and extra == "rapidocr"
71
72
  Provides-Extra: asr
72
- Requires-Dist: mlx-whisper>=0.4.3; (python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "asr"
73
- Requires-Dist: openai-whisper>=20250625; extra == "asr"
73
+ Requires-Dist: mlx-whisper>=0.4.3; (python_version >= "3.10" and python_version < "3.14" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "asr"
74
+ Requires-Dist: openai-whisper>=20250625; python_version < "3.14" and extra == "asr"
74
75
  Dynamic: license-file
75
76
 
76
77
  <p align="center">
@@ -139,10 +139,14 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentBacken
139
139
  self.workbook = None
140
140
  try:
141
141
  if isinstance(self.path_or_stream, BytesIO):
142
- self.workbook = load_workbook(filename=self.path_or_stream)
142
+ self.workbook = load_workbook(
143
+ filename=self.path_or_stream, data_only=True
144
+ )
143
145
 
144
146
  elif isinstance(self.path_or_stream, Path):
145
- self.workbook = load_workbook(filename=str(self.path_or_stream))
147
+ self.workbook = load_workbook(
148
+ filename=str(self.path_or_stream), data_only=True
149
+ )
146
150
 
147
151
  self.valid = self.workbook is not None
148
152
  except Exception as e:
@@ -229,10 +229,10 @@ class PyPdfiumPageBackend(PdfPageBackend):
229
229
  b=max(cell.rect.to_bounding_box().b for cell in group),
230
230
  )
231
231
 
232
- assert self._ppage is not None
233
- self.text_page = self._ppage.get_textpage()
232
+ assert self.text_page is not None
234
233
  bbox = merged_bbox.to_bottom_left_origin(page_size.height)
235
- merged_text = self.text_page.get_text_bounded(*bbox.as_tuple())
234
+ with pypdfium2_lock:
235
+ merged_text = self.text_page.get_text_bounded(*bbox.as_tuple())
236
236
 
237
237
  return TextCell(
238
238
  index=group[0].index,
@@ -255,9 +255,9 @@ class PyPdfiumPageBackend(PdfPageBackend):
255
255
  def get_bitmap_rects(self, scale: float = 1) -> Iterable[BoundingBox]:
256
256
  AREA_THRESHOLD = 0 # 32 * 32
257
257
  page_size = self.get_size()
258
- rotation = self._ppage.get_rotation()
259
258
 
260
259
  with pypdfium2_lock:
260
+ rotation = self._ppage.get_rotation()
261
261
  for obj in self._ppage.get_objects(filter=[pdfium_c.FPDF_PAGEOBJ_IMAGE]):
262
262
  pos = obj.get_pos()
263
263
  if rotation == 90:
@@ -738,10 +738,15 @@ def convert( # noqa: C901
738
738
 
739
739
  pipeline_options.vlm_options = SMOLDOCLING_MLX
740
740
  except ImportError:
741
- _log.warning(
742
- "To run SmolDocling faster, please install mlx-vlm:\n"
743
- "pip install mlx-vlm"
744
- )
741
+ if sys.version_info < (3, 14):
742
+ _log.warning(
743
+ "To run SmolDocling faster, please install mlx-vlm:\n"
744
+ "pip install mlx-vlm"
745
+ )
746
+ else:
747
+ _log.warning(
748
+ "You can run SmolDocling faster with MLX support, but it is unfortunately not yet available on Python 3.14."
749
+ )
745
750
 
746
751
  elif vlm_model == VlmModelType.GRANITEDOCLING:
747
752
  pipeline_options.vlm_options = GRANITEDOCLING_TRANSFORMERS
@@ -751,10 +756,16 @@ def convert( # noqa: C901
751
756
 
752
757
  pipeline_options.vlm_options = GRANITEDOCLING_MLX
753
758
  except ImportError:
754
- _log.warning(
755
- "To run GraniteDocling faster, please install mlx-vlm:\n"
756
- "pip install mlx-vlm"
757
- )
759
+ if sys.version_info < (3, 14):
760
+ _log.warning(
761
+ "To run GraniteDocling faster, please install mlx-vlm:\n"
762
+ "pip install mlx-vlm"
763
+ )
764
+ else:
765
+ _log.warning(
766
+ "You can run GraniteDocling faster with MLX support, but it is unfortunately not yet available on Python 3.14."
767
+ )
768
+
758
769
  elif vlm_model == VlmModelType.SMOLDOCLING_VLLM:
759
770
  pipeline_options.vlm_options = SMOLDOCLING_VLLM
760
771
 
@@ -207,6 +207,8 @@ class VlmPrediction(BaseModel):
207
207
  text: str = ""
208
208
  generated_tokens: list[VlmPredictionToken] = []
209
209
  generation_time: float = -1
210
+ num_tokens: Optional[int] = None
211
+ stop_reason: Optional[str] = None # todo define an enum for possible stop reasons
210
212
 
211
213
 
212
214
  class ContainerElement(
@@ -361,15 +361,7 @@ class PdfPipelineOptions(PaginatedPipelineOptions):
361
361
 
362
362
  generate_parsed_pages: bool = False
363
363
 
364
-
365
- class ProcessingPipeline(str, Enum):
366
- STANDARD = "standard"
367
- VLM = "vlm"
368
- ASR = "asr"
369
-
370
-
371
- class ThreadedPdfPipelineOptions(PdfPipelineOptions):
372
- """Pipeline options for the threaded PDF pipeline with batching and backpressure control"""
364
+ ### Arguments for threaded PDF pipeline with batching and backpressure control
373
365
 
374
366
  # Batch sizes for different stages
375
367
  ocr_batch_size: int = 4
@@ -377,7 +369,18 @@ class ThreadedPdfPipelineOptions(PdfPipelineOptions):
377
369
  table_batch_size: int = 4
378
370
 
379
371
  # Timing control
380
- batch_timeout_seconds: float = 2.0
372
+ batch_polling_interval_seconds: float = 0.5
381
373
 
382
374
  # Backpressure and queue control
383
375
  queue_max_size: int = 100
376
+
377
+
378
+ class ProcessingPipeline(str, Enum):
379
+ LEGACY = "legacy"
380
+ STANDARD = "standard"
381
+ VLM = "vlm"
382
+ ASR = "asr"
383
+
384
+
385
+ class ThreadedPdfPipelineOptions(PdfPipelineOptions):
386
+ """Pipeline options for the threaded PDF pipeline with batching and backpressure control"""
@@ -82,6 +82,7 @@ class InlineVlmOptions(BaseVlmOptions):
82
82
 
83
83
  use_kv_cache: bool = True
84
84
  max_new_tokens: int = 4096
85
+ track_generated_tokens: bool = False
85
86
 
86
87
  @property
87
88
  def repo_cache_folder(self) -> str:
@@ -73,7 +73,7 @@ class ApiVlmModel(BasePageModel):
73
73
  # Skip non-GenerationStopper criteria (should have been caught in validation)
74
74
 
75
75
  # Streaming path with early abort support
76
- page_tags = api_image_request_streaming(
76
+ page_tags, num_tokens = api_image_request_streaming(
77
77
  image=hi_res_image,
78
78
  prompt=prompt,
79
79
  url=self.vlm_options.url,
@@ -84,7 +84,7 @@ class ApiVlmModel(BasePageModel):
84
84
  )
85
85
  else:
86
86
  # Non-streaming fallback (existing behavior)
87
- page_tags = api_image_request(
87
+ page_tags, num_tokens = api_image_request(
88
88
  image=hi_res_image,
89
89
  prompt=prompt,
90
90
  url=self.vlm_options.url,
@@ -94,7 +94,9 @@ class ApiVlmModel(BasePageModel):
94
94
  )
95
95
 
96
96
  page_tags = self.vlm_options.decode_response(page_tags)
97
- page.predictions.vlm_response = VlmPrediction(text=page_tags)
97
+ page.predictions.vlm_response = VlmPrediction(
98
+ text=page_tags, num_tokens=num_tokens
99
+ )
98
100
  return page
99
101
 
100
102
  with ThreadPoolExecutor(max_workers=self.concurrency) as executor:
@@ -167,6 +167,10 @@ class LayoutModel(BasePageModel):
167
167
  valid_pages.append(page)
168
168
  valid_page_images.append(page_image)
169
169
 
170
+ print(f"{len(pages)=}, {pages[0].page_no}-{pages[-1].page_no}")
171
+ print(f"{len(valid_pages)=}")
172
+ print(f"{len(valid_page_images)=}")
173
+
170
174
  # Process all valid pages with batch prediction
171
175
  batch_predictions = []
172
176
  if valid_page_images:
@@ -1,3 +1,4 @@
1
+ import sys
1
2
  import threading
2
3
  from collections.abc import Iterable
3
4
  from pathlib import Path
@@ -75,7 +76,10 @@ class PictureDescriptionVlmModel(
75
76
  else "sdpa"
76
77
  ),
77
78
  )
78
- self.model = torch.compile(self.model) # type: ignore
79
+ if sys.version_info < (3, 14):
80
+ self.model = torch.compile(self.model) # type: ignore
81
+ else:
82
+ self.model.eval()
79
83
 
80
84
  self.provenance = f"{self.options.repo_id}"
81
85
 
@@ -1,5 +1,6 @@
1
1
  import importlib.metadata
2
2
  import logging
3
+ import sys
3
4
  import time
4
5
  from collections.abc import Iterable
5
6
  from pathlib import Path
@@ -129,7 +130,10 @@ class HuggingFaceTransformersVlmModel(BaseVlmPageModel, HuggingFaceModelDownload
129
130
  trust_remote_code=vlm_options.trust_remote_code,
130
131
  revision=vlm_options.revision,
131
132
  )
132
- self.vlm_model = torch.compile(self.vlm_model) # type: ignore
133
+ if sys.version_info < (3, 14):
134
+ self.vlm_model = torch.compile(self.vlm_model) # type: ignore
135
+ else:
136
+ self.vlm_model.eval()
133
137
 
134
138
  # Load generation config
135
139
  self.generation_config = GenerationConfig.from_pretrained(
@@ -363,13 +367,19 @@ class HuggingFaceTransformersVlmModel(BaseVlmPageModel, HuggingFaceModelDownload
363
367
  decoded_texts = [text.rstrip(pad_token) for text in decoded_texts]
364
368
 
365
369
  # -- Optional logging
370
+ num_tokens = None
366
371
  if generated_ids.shape[0] > 0:
372
+ num_tokens = int(generated_ids[0].shape[0])
367
373
  _log.debug(
368
- f"Generated {int(generated_ids[0].shape[0])} tokens in {generation_time:.2f}s "
374
+ f"Generated {num_tokens} tokens in {generation_time:.2f}s "
369
375
  f"for batch size {generated_ids.shape[0]}."
370
376
  )
371
377
 
372
378
  for text in decoded_texts:
373
379
  # Apply decode_response to the output text
374
380
  decoded_text = self.vlm_options.decode_response(text)
375
- yield VlmPrediction(text=decoded_text, generation_time=generation_time)
381
+ yield VlmPrediction(
382
+ text=decoded_text,
383
+ generation_time=generation_time,
384
+ num_tokens=num_tokens,
385
+ )
@@ -50,9 +50,14 @@ class HuggingFaceMlxModel(BaseVlmPageModel, HuggingFaceModelDownloadMixin):
50
50
  from mlx_vlm.prompt_utils import apply_chat_template # type: ignore
51
51
  from mlx_vlm.utils import load_config # type: ignore
52
52
  except ImportError:
53
- raise ImportError(
54
- "mlx-vlm is not installed. Please install it via `pip install mlx-vlm` to use MLX VLM models."
55
- )
53
+ if sys.version_info < (3, 14):
54
+ raise ImportError(
55
+ "mlx-vlm is not installed. Please install it via `pip install mlx-vlm` to use MLX VLM models."
56
+ )
57
+ else:
58
+ raise ImportError(
59
+ "mlx-vlm is not installed. It is not yet available on Python 3.14."
60
+ )
56
61
 
57
62
  repo_cache_folder = vlm_options.repo_id.replace("/", "--")
58
63
 
@@ -313,5 +318,6 @@ class HuggingFaceMlxModel(BaseVlmPageModel, HuggingFaceModelDownloadMixin):
313
318
  text=decoded_output,
314
319
  generation_time=generation_time,
315
320
  generated_tokens=tokens,
321
+ num_tokens=len(tokens),
316
322
  )
317
323
  _log.debug("MLX model: Released global lock")
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ import sys
2
3
  import time
3
4
  from collections.abc import Iterable
4
5
  from pathlib import Path
@@ -153,7 +154,10 @@ class NuExtractTransformersModel(BaseVlmModel, HuggingFaceModelDownloadMixin):
153
154
  ),
154
155
  trust_remote_code=vlm_options.trust_remote_code,
155
156
  )
156
- self.vlm_model = torch.compile(self.vlm_model) # type: ignore
157
+ if sys.version_info < (3, 14):
158
+ self.vlm_model = torch.compile(self.vlm_model) # type: ignore
159
+ else:
160
+ self.vlm_model.eval()
157
161
 
158
162
  # Load generation config
159
163
  self.generation_config = GenerationConfig.from_pretrained(artifacts_path)
@@ -278,13 +282,19 @@ class NuExtractTransformersModel(BaseVlmModel, HuggingFaceModelDownloadMixin):
278
282
  )
279
283
 
280
284
  # Optional logging
285
+ num_tokens = None
281
286
  if generated_ids.shape[0] > 0: # type: ignore
287
+ num_tokens = int(generated_ids[0].shape[0])
282
288
  _log.debug(
283
- f"Generated {int(generated_ids[0].shape[0])} tokens in {generation_time:.2f}s "
289
+ f"Generated {num_tokens} tokens in {generation_time:.2f}s "
284
290
  f"for batch size {generated_ids.shape[0]}." # type: ignore
285
291
  )
286
292
 
287
293
  for text in decoded_texts:
288
294
  # Apply decode_response to the output text
289
295
  decoded_text = self.vlm_options.decode_response(text)
290
- yield VlmPrediction(text=decoded_text, generation_time=generation_time)
296
+ yield VlmPrediction(
297
+ text=decoded_text,
298
+ generation_time=generation_time,
299
+ num_tokens=num_tokens,
300
+ )
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ import sys
2
3
  import time
3
4
  from collections.abc import Iterable
4
5
  from pathlib import Path
@@ -8,7 +9,7 @@ import numpy as np
8
9
  from PIL.Image import Image
9
10
 
10
11
  from docling.datamodel.accelerator_options import AcceleratorOptions
11
- from docling.datamodel.base_models import Page, VlmPrediction
12
+ from docling.datamodel.base_models import Page, VlmPrediction, VlmPredictionToken
12
13
  from docling.datamodel.document import ConversionResult
13
14
  from docling.datamodel.pipeline_options_vlm_model import (
14
15
  InlineVlmOptions,
@@ -87,7 +88,7 @@ class VllmVlmModel(BaseVlmPageModel, HuggingFaceModelDownloadMixin):
87
88
  vlm_options: InlineVlmOptions,
88
89
  ):
89
90
  self.enabled = enabled
90
- self.vlm_options = vlm_options
91
+ self.vlm_options: InlineVlmOptions = vlm_options
91
92
 
92
93
  self.llm = None
93
94
  self.sampling_params = None
@@ -100,7 +101,18 @@ class VllmVlmModel(BaseVlmPageModel, HuggingFaceModelDownloadMixin):
100
101
  return
101
102
 
102
103
  from transformers import AutoProcessor
103
- from vllm import LLM, SamplingParams
104
+
105
+ try:
106
+ from vllm import LLM, SamplingParams
107
+ except ImportError:
108
+ if sys.version_info < (3, 14):
109
+ raise ImportError(
110
+ "vllm is not installed. Please install it via `pip install vllm`."
111
+ )
112
+ else:
113
+ raise ImportError(
114
+ "vllm is not installed. It is not yet available on Python 3.14."
115
+ )
104
116
 
105
117
  # Device selection
106
118
  self.device = decide_device(
@@ -222,7 +234,8 @@ class VllmVlmModel(BaseVlmPageModel, HuggingFaceModelDownloadMixin):
222
234
  pages_with_images.append(page)
223
235
 
224
236
  if images:
225
- predictions = list(self.process_images(images, user_prompts))
237
+ with TimeRecorder(conv_res, "vlm_inference"):
238
+ predictions = list(self.process_images(images, user_prompts))
226
239
  for page, prediction in zip(pages_with_images, predictions):
227
240
  page.predictions.vlm_response = prediction
228
241
 
@@ -288,13 +301,34 @@ class VllmVlmModel(BaseVlmPageModel, HuggingFaceModelDownloadMixin):
288
301
  # Optional debug
289
302
  if outputs:
290
303
  try:
291
- num_tokens = len(outputs[0].outputs[0].token_ids)
292
- _log.debug(f"Generated {num_tokens} tokens in {generation_time:.2f}s.")
304
+ num_tokens_within_batch = len(outputs[0].outputs[0].token_ids)
305
+ _log.debug(
306
+ f"Generated {num_tokens_within_batch} tokens for batch in {generation_time:.2f}s."
307
+ )
293
308
  except Exception:
294
- pass
309
+ num_tokens_within_batch = 0
295
310
 
296
311
  # Emit predictions
297
312
  for output in outputs:
298
313
  text = output.outputs[0].text if output.outputs else ""
314
+ stop_reason = output.outputs[0].stop_reason if output.outputs else ""
315
+ generated_tokens = [
316
+ VlmPredictionToken(token=int(p)) for p in output.outputs[0].token_ids
317
+ ]
318
+ num_tokens = len(generated_tokens)
299
319
  decoded_text = self.vlm_options.decode_response(text)
300
- yield VlmPrediction(text=decoded_text, generation_time=generation_time)
320
+ if self.vlm_options.track_generated_tokens:
321
+ yield VlmPrediction(
322
+ text=decoded_text,
323
+ generation_time=generation_time,
324
+ num_tokens=num_tokens,
325
+ stop_reason=stop_reason,
326
+ generated_tokens=generated_tokens,
327
+ )
328
+ else:
329
+ yield VlmPrediction(
330
+ text=decoded_text,
331
+ generation_time=generation_time,
332
+ num_tokens=num_tokens,
333
+ stop_reason=stop_reason,
334
+ )
@@ -1,6 +1,7 @@
1
1
  import logging
2
2
  import os
3
3
  import re
4
+ import sys
4
5
  import tempfile
5
6
  from io import BytesIO
6
7
  from pathlib import Path
@@ -117,9 +118,15 @@ class _NativeWhisperModel:
117
118
  try:
118
119
  import whisper # type: ignore
119
120
  except ImportError:
120
- raise ImportError(
121
- "whisper is not installed. Please install it via `pip install openai-whisper` or do `uv sync --extra asr`."
122
- )
121
+ if sys.version_info < (3, 14):
122
+ raise ImportError(
123
+ "whisper is not installed. Please install it via `pip install openai-whisper` or do `uv sync --extra asr`."
124
+ )
125
+ else:
126
+ raise ImportError(
127
+ "whisper is not installed. Unfortunately its dependencies are not yet available for Python 3.14."
128
+ )
129
+
123
130
  self.asr_options = asr_options
124
131
  self.max_tokens = asr_options.max_new_tokens
125
132
  self.temperature = asr_options.temperature
@@ -31,7 +31,7 @@ from docling.utils.profiling import ProfilingScope, TimeRecorder
31
31
  _log = logging.getLogger(__name__)
32
32
 
33
33
 
34
- class StandardPdfPipeline(PaginatedPipeline):
34
+ class LegacyStandardPdfPipeline(PaginatedPipeline):
35
35
  def __init__(self, pipeline_options: PdfPipelineOptions):
36
36
  super().__init__(pipeline_options)
37
37
  self.pipeline_options: PdfPipelineOptions
@@ -102,7 +102,7 @@ class StandardPdfPipeline(PaginatedPipeline):
102
102
  local_dir: Optional[Path] = None, force: bool = False
103
103
  ) -> Path:
104
104
  warnings.warn(
105
- "The usage of StandardPdfPipeline.download_models_hf() is deprecated "
105
+ "The usage of LegacyStandardPdfPipeline.download_models_hf() is deprecated "
106
106
  "use instead the utility `docling-tools models download`, or "
107
107
  "the upstream method docling.utils.models_downloader.download_all()",
108
108
  DeprecationWarning,