docling 2.34.0__tar.gz → 2.36.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. {docling-2.34.0 → docling-2.36.0}/PKG-INFO +54 -55
  2. {docling-2.34.0 → docling-2.36.0}/README.md +3 -4
  3. {docling-2.34.0 → docling-2.36.0}/docling/cli/main.py +48 -18
  4. docling-2.36.0/docling/datamodel/accelerator_options.py +68 -0
  5. {docling-2.34.0 → docling-2.36.0}/docling/datamodel/base_models.py +10 -8
  6. {docling-2.34.0 → docling-2.36.0}/docling/datamodel/document.py +7 -2
  7. {docling-2.34.0 → docling-2.36.0}/docling/datamodel/pipeline_options.py +29 -161
  8. docling-2.36.0/docling/datamodel/pipeline_options_vlm_model.py +81 -0
  9. docling-2.36.0/docling/datamodel/vlm_model_specs.py +144 -0
  10. {docling-2.34.0 → docling-2.36.0}/docling/document_converter.py +5 -0
  11. {docling-2.34.0 → docling-2.36.0}/docling/models/api_vlm_model.py +1 -1
  12. {docling-2.34.0 → docling-2.36.0}/docling/models/base_ocr_model.py +2 -1
  13. {docling-2.34.0 → docling-2.36.0}/docling/models/code_formula_model.py +6 -11
  14. {docling-2.34.0 → docling-2.36.0}/docling/models/document_picture_classifier.py +6 -11
  15. {docling-2.34.0 → docling-2.36.0}/docling/models/easyocr_model.py +1 -2
  16. {docling-2.34.0 → docling-2.36.0}/docling/models/layout_model.py +22 -17
  17. {docling-2.34.0 → docling-2.36.0}/docling/models/ocr_mac_model.py +1 -1
  18. {docling-2.34.0 → docling-2.36.0}/docling/models/page_preprocessing_model.py +11 -6
  19. {docling-2.34.0 → docling-2.36.0}/docling/models/picture_description_api_model.py +1 -1
  20. {docling-2.34.0 → docling-2.36.0}/docling/models/picture_description_base_model.py +1 -1
  21. {docling-2.34.0 → docling-2.36.0}/docling/models/picture_description_vlm_model.py +7 -22
  22. {docling-2.34.0 → docling-2.36.0}/docling/models/rapid_ocr_model.py +1 -2
  23. {docling-2.34.0 → docling-2.36.0}/docling/models/table_structure_model.py +6 -12
  24. {docling-2.34.0 → docling-2.36.0}/docling/models/tesseract_ocr_cli_model.py +1 -1
  25. {docling-2.34.0 → docling-2.36.0}/docling/models/tesseract_ocr_model.py +1 -1
  26. docling-2.36.0/docling/models/utils/hf_model_download.py +40 -0
  27. docling-2.36.0/docling/models/vlm_models_inline/hf_transformers_model.py +194 -0
  28. docling-2.34.0/docling/models/hf_mlx_model.py → docling-2.36.0/docling/models/vlm_models_inline/mlx_model.py +56 -44
  29. docling-2.36.0/docling/pipeline/__init__.py +0 -0
  30. {docling-2.34.0 → docling-2.36.0}/docling/pipeline/standard_pdf_pipeline.py +69 -57
  31. docling-2.36.0/docling/pipeline/vlm_pipeline.py +386 -0
  32. docling-2.36.0/docling/py.typed +1 -0
  33. docling-2.36.0/docling/utils/__init__.py +0 -0
  34. {docling-2.34.0 → docling-2.36.0}/docling/utils/accelerator_utils.py +17 -2
  35. {docling-2.34.0 → docling-2.36.0}/docling/utils/model_downloader.py +13 -12
  36. docling-2.36.0/docling.egg-info/PKG-INFO +216 -0
  37. docling-2.36.0/docling.egg-info/SOURCES.txt +124 -0
  38. docling-2.36.0/docling.egg-info/entry_points.txt +6 -0
  39. docling-2.36.0/docling.egg-info/requires.txt +47 -0
  40. docling-2.36.0/docling.egg-info/top_level.txt +1 -0
  41. docling-2.36.0/pyproject.toml +266 -0
  42. docling-2.36.0/setup.cfg +4 -0
  43. docling-2.36.0/tests/test_backend_asciidoc.py +50 -0
  44. docling-2.36.0/tests/test_backend_csv.py +87 -0
  45. docling-2.36.0/tests/test_backend_docling_json.py +58 -0
  46. docling-2.36.0/tests/test_backend_docling_parse.py +77 -0
  47. docling-2.36.0/tests/test_backend_docling_parse_v2.py +76 -0
  48. docling-2.36.0/tests/test_backend_docling_parse_v4.py +76 -0
  49. docling-2.36.0/tests/test_backend_html.py +149 -0
  50. docling-2.36.0/tests/test_backend_jats.py +62 -0
  51. docling-2.36.0/tests/test_backend_markdown.py +41 -0
  52. docling-2.36.0/tests/test_backend_msexcel.py +99 -0
  53. docling-2.36.0/tests/test_backend_msword.py +133 -0
  54. docling-2.36.0/tests/test_backend_patent_uspto.py +458 -0
  55. docling-2.36.0/tests/test_backend_pdfium.py +90 -0
  56. docling-2.36.0/tests/test_backend_pptx.py +55 -0
  57. docling-2.36.0/tests/test_backend_webp.py +82 -0
  58. docling-2.36.0/tests/test_cli.py +27 -0
  59. docling-2.36.0/tests/test_code_formula.py +62 -0
  60. docling-2.36.0/tests/test_data_gen_flag.py +9 -0
  61. docling-2.36.0/tests/test_document_picture_classifier.py +78 -0
  62. docling-2.36.0/tests/test_e2e_conversion.py +60 -0
  63. docling-2.36.0/tests/test_e2e_ocr_conversion.py +104 -0
  64. docling-2.36.0/tests/test_input_doc.py +245 -0
  65. docling-2.36.0/tests/test_interfaces.py +67 -0
  66. docling-2.36.0/tests/test_invalid_input.py +44 -0
  67. docling-2.36.0/tests/test_legacy_format_transform.py +52 -0
  68. docling-2.36.0/tests/test_options.py +172 -0
  69. docling-2.36.0/tests/test_settings_load.py +29 -0
  70. docling-2.34.0/docling/models/hf_vlm_model.py +0 -182
  71. docling-2.34.0/docling/pipeline/vlm_pipeline.py +0 -219
  72. docling-2.34.0/pyproject.toml +0 -285
  73. {docling-2.34.0 → docling-2.36.0}/LICENSE +0 -0
  74. {docling-2.34.0 → docling-2.36.0}/docling/__init__.py +0 -0
  75. {docling-2.34.0 → docling-2.36.0}/docling/backend/__init__.py +0 -0
  76. {docling-2.34.0 → docling-2.36.0}/docling/backend/abstract_backend.py +0 -0
  77. {docling-2.34.0 → docling-2.36.0}/docling/backend/asciidoc_backend.py +0 -0
  78. {docling-2.34.0 → docling-2.36.0}/docling/backend/csv_backend.py +0 -0
  79. {docling-2.34.0 → docling-2.36.0}/docling/backend/docling_parse_backend.py +0 -0
  80. {docling-2.34.0 → docling-2.36.0}/docling/backend/docling_parse_v2_backend.py +0 -0
  81. {docling-2.34.0 → docling-2.36.0}/docling/backend/docling_parse_v4_backend.py +0 -0
  82. {docling-2.34.0 → docling-2.36.0}/docling/backend/docx/__init__.py +0 -0
  83. {docling-2.34.0 → docling-2.36.0}/docling/backend/docx/latex/__init__.py +0 -0
  84. {docling-2.34.0 → docling-2.36.0}/docling/backend/docx/latex/latex_dict.py +0 -0
  85. {docling-2.34.0 → docling-2.36.0}/docling/backend/docx/latex/omml.py +0 -0
  86. {docling-2.34.0 → docling-2.36.0}/docling/backend/html_backend.py +0 -0
  87. {docling-2.34.0 → docling-2.36.0}/docling/backend/json/__init__.py +0 -0
  88. {docling-2.34.0 → docling-2.36.0}/docling/backend/json/docling_json_backend.py +0 -0
  89. {docling-2.34.0 → docling-2.36.0}/docling/backend/md_backend.py +0 -0
  90. {docling-2.34.0 → docling-2.36.0}/docling/backend/msexcel_backend.py +0 -0
  91. {docling-2.34.0 → docling-2.36.0}/docling/backend/mspowerpoint_backend.py +0 -0
  92. {docling-2.34.0 → docling-2.36.0}/docling/backend/msword_backend.py +0 -0
  93. {docling-2.34.0 → docling-2.36.0}/docling/backend/pdf_backend.py +0 -0
  94. {docling-2.34.0 → docling-2.36.0}/docling/backend/pypdfium2_backend.py +0 -0
  95. {docling-2.34.0 → docling-2.36.0}/docling/backend/xml/__init__.py +0 -0
  96. {docling-2.34.0 → docling-2.36.0}/docling/backend/xml/jats_backend.py +0 -0
  97. {docling-2.34.0 → docling-2.36.0}/docling/backend/xml/uspto_backend.py +0 -0
  98. {docling-2.34.0 → docling-2.36.0}/docling/chunking/__init__.py +0 -0
  99. {docling-2.34.0 → docling-2.36.0}/docling/cli/__init__.py +0 -0
  100. {docling-2.34.0 → docling-2.36.0}/docling/cli/models.py +0 -0
  101. {docling-2.34.0 → docling-2.36.0}/docling/cli/tools.py +0 -0
  102. {docling-2.34.0 → docling-2.36.0}/docling/datamodel/__init__.py +0 -0
  103. {docling-2.34.0 → docling-2.36.0}/docling/datamodel/settings.py +0 -0
  104. {docling-2.34.0 → docling-2.36.0}/docling/exceptions.py +0 -0
  105. {docling-2.34.0 → docling-2.36.0}/docling/models/__init__.py +0 -0
  106. {docling-2.34.0 → docling-2.36.0}/docling/models/base_model.py +0 -0
  107. {docling-2.34.0 → docling-2.36.0}/docling/models/factories/__init__.py +0 -0
  108. {docling-2.34.0 → docling-2.36.0}/docling/models/factories/base_factory.py +0 -0
  109. {docling-2.34.0 → docling-2.36.0}/docling/models/factories/ocr_factory.py +0 -0
  110. {docling-2.34.0 → docling-2.36.0}/docling/models/factories/picture_description_factory.py +0 -0
  111. {docling-2.34.0 → docling-2.36.0}/docling/models/page_assemble_model.py +0 -0
  112. {docling-2.34.0 → docling-2.36.0}/docling/models/plugins/__init__.py +0 -0
  113. {docling-2.34.0 → docling-2.36.0}/docling/models/plugins/defaults.py +0 -0
  114. {docling-2.34.0 → docling-2.36.0}/docling/models/readingorder_model.py +0 -0
  115. {docling-2.34.0/docling/pipeline → docling-2.36.0/docling/models/utils}/__init__.py +0 -0
  116. {docling-2.34.0/docling/utils → docling-2.36.0/docling/models/vlm_models_inline}/__init__.py +0 -0
  117. {docling-2.34.0 → docling-2.36.0}/docling/pipeline/base_pipeline.py +0 -0
  118. {docling-2.34.0 → docling-2.36.0}/docling/pipeline/simple_pipeline.py +0 -0
  119. {docling-2.34.0 → docling-2.36.0}/docling/utils/api_image_request.py +0 -0
  120. {docling-2.34.0 → docling-2.36.0}/docling/utils/export.py +0 -0
  121. {docling-2.34.0 → docling-2.36.0}/docling/utils/glm_utils.py +0 -0
  122. {docling-2.34.0 → docling-2.36.0}/docling/utils/layout_postprocessor.py +0 -0
  123. {docling-2.34.0 → docling-2.36.0}/docling/utils/locks.py +0 -0
  124. {docling-2.34.0 → docling-2.36.0}/docling/utils/ocr_utils.py +0 -0
  125. {docling-2.34.0 → docling-2.36.0}/docling/utils/orientation.py +0 -0
  126. {docling-2.34.0 → docling-2.36.0}/docling/utils/profiling.py +0 -0
  127. {docling-2.34.0 → docling-2.36.0}/docling/utils/utils.py +0 -0
  128. {docling-2.34.0 → docling-2.36.0}/docling/utils/visualization.py +0 -0
  129. /docling-2.34.0/docling/py.typed → /docling-2.36.0/docling.egg-info/dependency_links.txt +0 -0
@@ -1,67 +1,68 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: docling
3
- Version: 2.34.0
3
+ Version: 2.36.0
4
4
  Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
5
- Home-page: https://github.com/docling-project/docling
6
- License: MIT
5
+ Author-email: Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Nikos Livathinos <nli@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
+ License-Expression: MIT
7
+ Project-URL: homepage, https://github.com/docling-project/docling
8
+ Project-URL: repository, https://github.com/docling-project/docling
9
+ Project-URL: issues, https://github.com/docling-project/docling/issues
10
+ Project-URL: changelog, https://github.com/docling-project/docling/blob/main/CHANGELOG.md
7
11
  Keywords: docling,convert,document,pdf,docx,html,markdown,layout model,segmentation,table structure,table former
8
- Author: Christoph Auer
9
- Author-email: cau@zurich.ibm.com
10
- Requires-Python: >=3.9,<4.0
12
+ Classifier: Operating System :: MacOS :: MacOS X
13
+ Classifier: Operating System :: POSIX :: Linux
14
+ Classifier: Operating System :: Microsoft :: Windows
11
15
  Classifier: Development Status :: 5 - Production/Stable
12
16
  Classifier: Intended Audience :: Developers
13
17
  Classifier: Intended Audience :: Science/Research
14
- Classifier: License :: OSI Approved :: MIT License
15
- Classifier: Operating System :: MacOS :: MacOS X
16
- Classifier: Operating System :: POSIX :: Linux
18
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
17
19
  Classifier: Programming Language :: Python :: 3
18
20
  Classifier: Programming Language :: Python :: 3.9
19
21
  Classifier: Programming Language :: Python :: 3.10
20
22
  Classifier: Programming Language :: Python :: 3.11
21
23
  Classifier: Programming Language :: Python :: 3.12
22
24
  Classifier: Programming Language :: Python :: 3.13
23
- Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
24
- Provides-Extra: ocrmac
25
- Provides-Extra: rapidocr
25
+ Requires-Python: <4.0,>=3.9
26
+ Description-Content-Type: text/markdown
27
+ License-File: LICENSE
28
+ Requires-Dist: pydantic<3.0.0,>=2.0.0
29
+ Requires-Dist: docling-core[chunking]<3.0.0,>=2.29.0
30
+ Requires-Dist: docling-ibm-models<4.0.0,>=3.4.4
31
+ Requires-Dist: docling-parse<5.0.0,>=4.0.0
32
+ Requires-Dist: filetype<2.0.0,>=1.2.0
33
+ Requires-Dist: pypdfium2<5.0.0,>=4.30.0
34
+ Requires-Dist: pydantic-settings<3.0.0,>=2.3.0
35
+ Requires-Dist: huggingface_hub<1,>=0.23
36
+ Requires-Dist: requests<3.0.0,>=2.32.2
37
+ Requires-Dist: easyocr<2.0,>=1.7
38
+ Requires-Dist: certifi>=2024.7.4
39
+ Requires-Dist: rtree<2.0.0,>=1.3.0
40
+ Requires-Dist: typer<0.16.0,>=0.12.5
41
+ Requires-Dist: python-docx<2.0.0,>=1.1.2
42
+ Requires-Dist: python-pptx<2.0.0,>=1.0.2
43
+ Requires-Dist: beautifulsoup4<5.0.0,>=4.12.3
44
+ Requires-Dist: pandas<3.0.0,>=2.1.4
45
+ Requires-Dist: marko<3.0.0,>=2.1.2
46
+ Requires-Dist: openpyxl<4.0.0,>=3.1.5
47
+ Requires-Dist: lxml<6.0.0,>=4.0.0
48
+ Requires-Dist: pillow<12.0.0,>=10.0.0
49
+ Requires-Dist: tqdm<5.0.0,>=4.65.0
50
+ Requires-Dist: pluggy<2.0.0,>=1.0.0
51
+ Requires-Dist: pylatexenc<3.0,>=2.10
52
+ Requires-Dist: click<8.2.0
53
+ Requires-Dist: scipy<2.0.0,>=1.6.0
26
54
  Provides-Extra: tesserocr
55
+ Requires-Dist: tesserocr<3.0.0,>=2.7.1; extra == "tesserocr"
56
+ Provides-Extra: ocrmac
57
+ Requires-Dist: ocrmac<2.0.0,>=1.0.0; sys_platform == "darwin" and extra == "ocrmac"
27
58
  Provides-Extra: vlm
28
- Requires-Dist: accelerate (>=1.2.1,<2.0.0) ; (sys_platform != "darwin" or platform_machine != "x86_64") and (extra == "vlm")
29
- Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
30
- Requires-Dist: certifi (>=2024.7.4)
31
- Requires-Dist: click (<8.2.0)
32
- Requires-Dist: docling-core[chunking] (>=2.29.0,<3.0.0)
33
- Requires-Dist: docling-ibm-models (>=3.4.0,<4.0.0)
34
- Requires-Dist: docling-parse (>=4.0.0,<5.0.0)
35
- Requires-Dist: easyocr (>=1.7,<2.0)
36
- Requires-Dist: filetype (>=1.2.0,<2.0.0)
37
- Requires-Dist: huggingface_hub (>=0.23,<1)
38
- Requires-Dist: lxml (>=4.0.0,<6.0.0)
39
- Requires-Dist: marko (>=2.1.2,<3.0.0)
40
- Requires-Dist: ocrmac (>=1.0.0,<2.0.0) ; (sys_platform == "darwin") and (extra == "ocrmac")
41
- Requires-Dist: onnxruntime (>=1.7.0,<1.20.0) ; (python_version < "3.10") and (extra == "rapidocr")
42
- Requires-Dist: onnxruntime (>=1.7.0,<2.0.0) ; (python_version >= "3.10") and (extra == "rapidocr")
43
- Requires-Dist: openpyxl (>=3.1.5,<4.0.0)
44
- Requires-Dist: pandas (>=2.1.4,<3.0.0)
45
- Requires-Dist: pillow (>=10.0.0,<12.0.0)
46
- Requires-Dist: pluggy (>=1.0.0,<2.0.0)
47
- Requires-Dist: pydantic (>=2.0.0,<3.0.0)
48
- Requires-Dist: pydantic-settings (>=2.3.0,<3.0.0)
49
- Requires-Dist: pylatexenc (>=2.10,<3.0)
50
- Requires-Dist: pypdfium2 (>=4.30.0,<5.0.0)
51
- Requires-Dist: python-docx (>=1.1.2,<2.0.0)
52
- Requires-Dist: python-pptx (>=1.0.2,<2.0.0)
53
- Requires-Dist: rapidocr-onnxruntime (>=1.4.0,<2.0.0) ; (python_version < "3.13") and (extra == "rapidocr")
54
- Requires-Dist: requests (>=2.32.2,<3.0.0)
55
- Requires-Dist: rtree (>=1.3.0,<2.0.0)
56
- Requires-Dist: scipy (>=1.6.0,<1.14.0) ; python_version < "3.10"
57
- Requires-Dist: scipy (>=1.6.0,<2.0.0) ; python_version >= "3.10"
58
- Requires-Dist: tesserocr (>=2.7.1,<3.0.0) ; extra == "tesserocr"
59
- Requires-Dist: tqdm (>=4.65.0,<5.0.0)
60
- Requires-Dist: transformers (>=4.42.0,<4.43.0) ; (sys_platform == "darwin" and platform_machine == "x86_64") and (extra == "vlm")
61
- Requires-Dist: transformers (>=4.46.0,<5.0.0) ; (sys_platform != "darwin" or platform_machine != "x86_64") and (extra == "vlm")
62
- Requires-Dist: typer (>=0.12.5,<0.16.0)
63
- Project-URL: Repository, https://github.com/docling-project/docling
64
- Description-Content-Type: text/markdown
59
+ Requires-Dist: transformers<5.0.0,>=4.46.0; extra == "vlm"
60
+ Requires-Dist: accelerate<2.0.0,>=1.2.1; extra == "vlm"
61
+ Requires-Dist: mlx-vlm>=0.1.22; (python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "vlm"
62
+ Provides-Extra: rapidocr
63
+ Requires-Dist: rapidocr-onnxruntime<2.0.0,>=1.4.0; python_version < "3.13" and extra == "rapidocr"
64
+ Requires-Dist: onnxruntime<2.0.0,>=1.7.0; extra == "rapidocr"
65
+ Dynamic: license-file
65
66
 
66
67
  <p align="center">
67
68
  <a href="https://github.com/docling-project/docling">
@@ -79,9 +80,8 @@ Description-Content-Type: text/markdown
79
80
  [![Docs](https://img.shields.io/badge/docs-live-brightgreen)](https://docling-project.github.io/docling/)
80
81
  [![PyPI version](https://img.shields.io/pypi/v/docling)](https://pypi.org/project/docling/)
81
82
  [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/docling)](https://pypi.org/project/docling/)
82
- [![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)](https://python-poetry.org/)
83
- [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
84
- [![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
83
+ [![uv](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/uv/main/assets/badge/v0.json)](https://github.com/astral-sh/uv)
84
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
85
85
  [![Pydantic v2](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/pydantic/pydantic/main/docs/badge/v2.json)](https://pydantic.dev)
86
86
  [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)
87
87
  [![License MIT](https://img.shields.io/github/license/docling-project/docling)](https://opensource.org/licenses/MIT)
@@ -101,7 +101,7 @@ Docling simplifies document processing, parsing diverse formats — including ad
101
101
  * 🔒 Local execution capabilities for sensitive data and air-gapped environments
102
102
  * 🤖 Plug-and-play [integrations][integrations] incl. LangChain, LlamaIndex, Crew AI & Haystack for agentic AI
103
103
  * 🔍 Extensive OCR support for scanned PDFs and images
104
- * 🥚 Support of Visual Language Models ([SmolDocling](https://huggingface.co/ds4sd/SmolDocling-256M-preview)) 🆕
104
+ * 🥚 Support of several Visual Language Models ([SmolDocling](https://huggingface.co/ds4sd/SmolDocling-256M-preview))
105
105
  * 💻 Simple and convenient CLI
106
106
 
107
107
  ### Coming soon
@@ -214,4 +214,3 @@ The project was started by the AI for knowledge team at IBM Research Zurich.
214
214
  [supported_formats]: https://docling-project.github.io/docling/usage/supported_formats/
215
215
  [docling_document]: https://docling-project.github.io/docling/concepts/docling_document/
216
216
  [integrations]: https://docling-project.github.io/docling/integrations/
217
-
@@ -14,9 +14,8 @@
14
14
  [![Docs](https://img.shields.io/badge/docs-live-brightgreen)](https://docling-project.github.io/docling/)
15
15
  [![PyPI version](https://img.shields.io/pypi/v/docling)](https://pypi.org/project/docling/)
16
16
  [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/docling)](https://pypi.org/project/docling/)
17
- [![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)](https://python-poetry.org/)
18
- [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
19
- [![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
17
+ [![uv](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/uv/main/assets/badge/v0.json)](https://github.com/astral-sh/uv)
18
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
20
19
  [![Pydantic v2](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/pydantic/pydantic/main/docs/badge/v2.json)](https://pydantic.dev)
21
20
  [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)
22
21
  [![License MIT](https://img.shields.io/github/license/docling-project/docling)](https://opensource.org/licenses/MIT)
@@ -36,7 +35,7 @@ Docling simplifies document processing, parsing diverse formats — including ad
36
35
  * 🔒 Local execution capabilities for sensitive data and air-gapped environments
37
36
  * 🤖 Plug-and-play [integrations][integrations] incl. LangChain, LlamaIndex, Crew AI & Haystack for agentic AI
38
37
  * 🔍 Extensive OCR support for scanned PDFs and images
39
- * 🥚 Support of Visual Language Models ([SmolDocling](https://huggingface.co/ds4sd/SmolDocling-256M-preview)) 🆕
38
+ * 🥚 Support of several Visual Language Models ([SmolDocling](https://huggingface.co/ds4sd/SmolDocling-256M-preview))
40
39
  * 💻 Simple and convenient CLI
41
40
 
42
41
  ### Coming soon
@@ -12,6 +12,12 @@ from typing import Annotated, Dict, List, Optional, Type
12
12
 
13
13
  import rich.table
14
14
  import typer
15
+ from docling_core.transforms.serializer.html import (
16
+ HTMLDocSerializer,
17
+ HTMLOutputStyle,
18
+ HTMLParams,
19
+ )
20
+ from docling_core.transforms.visualizer.layout_visualizer import LayoutVisualizer
15
21
  from docling_core.types.doc import ImageRefMode
16
22
  from docling_core.utils.file import resolve_source_to_path
17
23
  from pydantic import TypeAdapter
@@ -22,6 +28,7 @@ from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBacke
22
28
  from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
23
29
  from docling.backend.pdf_backend import PdfDocumentBackend
24
30
  from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
31
+ from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
25
32
  from docling.datamodel.base_models import (
26
33
  ConversionStatus,
27
34
  FormatToExtensions,
@@ -30,8 +37,6 @@ from docling.datamodel.base_models import (
30
37
  )
31
38
  from docling.datamodel.document import ConversionResult
32
39
  from docling.datamodel.pipeline_options import (
33
- AcceleratorDevice,
34
- AcceleratorOptions,
35
40
  EasyOcrOptions,
36
41
  OcrOptions,
37
42
  PaginatedPipelineOptions,
@@ -39,14 +44,16 @@ from docling.datamodel.pipeline_options import (
39
44
  PdfPipeline,
40
45
  PdfPipelineOptions,
41
46
  TableFormerMode,
42
- VlmModelType,
43
47
  VlmPipelineOptions,
44
- granite_vision_vlm_conversion_options,
45
- granite_vision_vlm_ollama_conversion_options,
46
- smoldocling_vlm_conversion_options,
47
- smoldocling_vlm_mlx_conversion_options,
48
48
  )
49
49
  from docling.datamodel.settings import settings
50
+ from docling.datamodel.vlm_model_specs import (
51
+ GRANITE_VISION_OLLAMA,
52
+ GRANITE_VISION_TRANSFORMERS,
53
+ SMOLDOCLING_MLX,
54
+ SMOLDOCLING_TRANSFORMERS,
55
+ VlmModelType,
56
+ )
50
57
  from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
51
58
  from docling.models.factories import get_ocr_factory
52
59
  from docling.pipeline.vlm_pipeline import VlmPipeline
@@ -156,6 +163,7 @@ def export_documents(
156
163
  export_json: bool,
157
164
  export_html: bool,
158
165
  export_html_split_page: bool,
166
+ show_layout: bool,
159
167
  export_md: bool,
160
168
  export_txt: bool,
161
169
  export_doctags: bool,
@@ -189,9 +197,27 @@ def export_documents(
189
197
  if export_html_split_page:
190
198
  fname = output_dir / f"{doc_filename}.html"
191
199
  _log.info(f"writing HTML output to {fname}")
192
- conv_res.document.save_as_html(
193
- filename=fname, image_mode=image_export_mode, split_page_view=True
194
- )
200
+ if show_layout:
201
+ ser = HTMLDocSerializer(
202
+ doc=conv_res.document,
203
+ params=HTMLParams(
204
+ image_mode=image_export_mode,
205
+ output_style=HTMLOutputStyle.SPLIT_PAGE,
206
+ ),
207
+ )
208
+ visualizer = LayoutVisualizer()
209
+ visualizer.params.show_label = False
210
+ ser_res = ser.serialize(
211
+ visualizer=visualizer,
212
+ )
213
+ with open(fname, "w") as fw:
214
+ fw.write(ser_res.text)
215
+ else:
216
+ conv_res.document.save_as_html(
217
+ filename=fname,
218
+ image_mode=image_export_mode,
219
+ split_page_view=True,
220
+ )
195
221
 
196
222
  # Export Text format:
197
223
  if export_txt:
@@ -250,6 +276,13 @@ def convert( # noqa: C901
250
276
  to_formats: List[OutputFormat] = typer.Option(
251
277
  None, "--to", help="Specify output formats. Defaults to Markdown."
252
278
  ),
279
+ show_layout: Annotated[
280
+ bool,
281
+ typer.Option(
282
+ ...,
283
+ help="If enabled, the page images will show the bounding-boxes of the items.",
284
+ ),
285
+ ] = False,
253
286
  headers: str = typer.Option(
254
287
  None,
255
288
  "--headers",
@@ -547,20 +580,16 @@ def convert( # noqa: C901
547
580
  )
548
581
 
549
582
  if vlm_model == VlmModelType.GRANITE_VISION:
550
- pipeline_options.vlm_options = granite_vision_vlm_conversion_options
583
+ pipeline_options.vlm_options = GRANITE_VISION_TRANSFORMERS
551
584
  elif vlm_model == VlmModelType.GRANITE_VISION_OLLAMA:
552
- pipeline_options.vlm_options = (
553
- granite_vision_vlm_ollama_conversion_options
554
- )
585
+ pipeline_options.vlm_options = GRANITE_VISION_OLLAMA
555
586
  elif vlm_model == VlmModelType.SMOLDOCLING:
556
- pipeline_options.vlm_options = smoldocling_vlm_conversion_options
587
+ pipeline_options.vlm_options = SMOLDOCLING_TRANSFORMERS
557
588
  if sys.platform == "darwin":
558
589
  try:
559
590
  import mlx_vlm
560
591
 
561
- pipeline_options.vlm_options = (
562
- smoldocling_vlm_mlx_conversion_options
563
- )
592
+ pipeline_options.vlm_options = SMOLDOCLING_MLX
564
593
  except ImportError:
565
594
  _log.warning(
566
595
  "To run SmolDocling faster, please install mlx-vlm:\n"
@@ -596,6 +625,7 @@ def convert( # noqa: C901
596
625
  export_json=export_json,
597
626
  export_html=export_html,
598
627
  export_html_split_page=export_html_split_page,
628
+ show_layout=show_layout,
599
629
  export_md=export_md,
600
630
  export_txt=export_txt,
601
631
  export_doctags=export_doctags,
@@ -0,0 +1,68 @@
1
+ import logging
2
+ import os
3
+ import re
4
+ from enum import Enum
5
+ from typing import Any, Union
6
+
7
+ from pydantic import field_validator, model_validator
8
+ from pydantic_settings import BaseSettings, SettingsConfigDict
9
+
10
+ _log = logging.getLogger(__name__)
11
+
12
+
13
+ class AcceleratorDevice(str, Enum):
14
+ """Devices to run model inference"""
15
+
16
+ AUTO = "auto"
17
+ CPU = "cpu"
18
+ CUDA = "cuda"
19
+ MPS = "mps"
20
+
21
+
22
+ class AcceleratorOptions(BaseSettings):
23
+ model_config = SettingsConfigDict(
24
+ env_prefix="DOCLING_", env_nested_delimiter="_", populate_by_name=True
25
+ )
26
+
27
+ num_threads: int = 4
28
+ device: Union[str, AcceleratorDevice] = "auto"
29
+ cuda_use_flash_attention2: bool = False
30
+
31
+ @field_validator("device")
32
+ def validate_device(cls, value):
33
+ # "auto", "cpu", "cuda", "mps", or "cuda:N"
34
+ if value in {d.value for d in AcceleratorDevice} or re.match(
35
+ r"^cuda(:\d+)?$", value
36
+ ):
37
+ return value
38
+ raise ValueError(
39
+ "Invalid device option. Use 'auto', 'cpu', 'mps', 'cuda', or 'cuda:N'."
40
+ )
41
+
42
+ @model_validator(mode="before")
43
+ @classmethod
44
+ def check_alternative_envvars(cls, data: Any) -> Any:
45
+ r"""
46
+ Set num_threads from the "alternative" envvar OMP_NUM_THREADS.
47
+ The alternative envvar is used only if it is valid and the regular envvar is not set.
48
+
49
+ Notice: The standard pydantic settings mechanism with parameter "aliases" does not provide
50
+ the same functionality. In case the alias envvar is set and the user tries to override the
51
+ parameter in settings initialization, Pydantic treats the parameter provided in __init__()
52
+ as an extra input instead of simply overwriting the evvar value for that parameter.
53
+ """
54
+ if isinstance(data, dict):
55
+ input_num_threads = data.get("num_threads")
56
+ # Check if to set the num_threads from the alternative envvar
57
+ if input_num_threads is None:
58
+ docling_num_threads = os.getenv("DOCLING_NUM_THREADS")
59
+ omp_num_threads = os.getenv("OMP_NUM_THREADS")
60
+ if docling_num_threads is None and omp_num_threads is not None:
61
+ try:
62
+ data["num_threads"] = int(omp_num_threads)
63
+ except ValueError:
64
+ _log.error(
65
+ "Ignoring misformatted envvar OMP_NUM_THREADS '%s'",
66
+ omp_num_threads,
67
+ )
68
+ return data
@@ -13,11 +13,11 @@ from docling_core.types.doc import (
13
13
  TableCell,
14
14
  )
15
15
  from docling_core.types.doc.page import SegmentedPdfPage, TextCell
16
-
17
- # DO NOT REMOVE; explicitly exposed from this location
18
16
  from docling_core.types.io import (
19
17
  DocumentStream,
20
18
  )
19
+
20
+ # DO NOT REMOVE; explicitly exposed from this location
21
21
  from PIL.Image import Image
22
22
  from pydantic import BaseModel, ConfigDict, Field, computed_field
23
23
 
@@ -131,12 +131,6 @@ class ErrorItem(BaseModel):
131
131
  error_message: str
132
132
 
133
133
 
134
- # class Cell(BaseModel):
135
- # id: int
136
- # text: str
137
- # bbox: BoundingBox
138
-
139
-
140
134
  class Cluster(BaseModel):
141
135
  id: int
142
136
  label: DocItemLabel
@@ -158,8 +152,16 @@ class LayoutPrediction(BaseModel):
158
152
  clusters: List[Cluster] = []
159
153
 
160
154
 
155
+ class VlmPredictionToken(BaseModel):
156
+ text: str = ""
157
+ token: int = -1
158
+ logprob: float = -1
159
+
160
+
161
161
  class VlmPrediction(BaseModel):
162
162
  text: str = ""
163
+ generated_tokens: list[VlmPredictionToken] = []
164
+ generation_time: float = -1
163
165
 
164
166
 
165
167
  class ContainerElement(
@@ -334,9 +334,9 @@ class _DocumentConversionInput(BaseModel):
334
334
  ) -> Optional[InputFormat]:
335
335
  """Guess the input format of a document by checking part of its content."""
336
336
  input_format: Optional[InputFormat] = None
337
- content_str = content.decode("utf-8")
338
337
 
339
338
  if mime == "application/xml":
339
+ content_str = content.decode("utf-8")
340
340
  match_doctype = re.search(r"<!DOCTYPE [^>]+>", content_str)
341
341
  if match_doctype:
342
342
  xml_doctype = match_doctype.group()
@@ -358,6 +358,7 @@ class _DocumentConversionInput(BaseModel):
358
358
  input_format = InputFormat.XML_JATS
359
359
 
360
360
  elif mime == "text/plain":
361
+ content_str = content.decode("utf-8")
361
362
  if InputFormat.XML_USPTO in formats and content_str.startswith("PATN\r\n"):
362
363
  input_format = InputFormat.XML_USPTO
363
364
 
@@ -411,7 +412,11 @@ class _DocumentConversionInput(BaseModel):
411
412
  else:
412
413
  return "application/xml"
413
414
 
414
- if re.match(r"<!doctype\s+html|<html|<head|<body", content_str):
415
+ if re.match(
416
+ r"(<script.*?>.*?</script>\s*)?(<!doctype\s+html|<html|<head|<body)",
417
+ content_str,
418
+ re.DOTALL,
419
+ ):
415
420
  return "text/html"
416
421
 
417
422
  p = re.compile(