docling 2.35.0__tar.gz → 2.36.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. {docling-2.35.0 → docling-2.36.1}/PKG-INFO +53 -55
  2. {docling-2.35.0 → docling-2.36.1}/README.md +3 -4
  3. {docling-2.35.0 → docling-2.36.1}/docling/cli/main.py +12 -15
  4. docling-2.36.1/docling/datamodel/accelerator_options.py +68 -0
  5. {docling-2.35.0 → docling-2.36.1}/docling/datamodel/base_models.py +10 -8
  6. {docling-2.35.0 → docling-2.36.1}/docling/datamodel/pipeline_options.py +29 -161
  7. docling-2.36.1/docling/datamodel/pipeline_options_vlm_model.py +81 -0
  8. docling-2.36.1/docling/datamodel/vlm_model_specs.py +144 -0
  9. {docling-2.35.0 → docling-2.36.1}/docling/document_converter.py +5 -0
  10. {docling-2.35.0 → docling-2.36.1}/docling/models/api_vlm_model.py +1 -1
  11. {docling-2.35.0 → docling-2.36.1}/docling/models/base_ocr_model.py +2 -1
  12. {docling-2.35.0 → docling-2.36.1}/docling/models/code_formula_model.py +6 -11
  13. {docling-2.35.0 → docling-2.36.1}/docling/models/document_picture_classifier.py +6 -11
  14. {docling-2.35.0 → docling-2.36.1}/docling/models/easyocr_model.py +1 -2
  15. {docling-2.35.0 → docling-2.36.1}/docling/models/layout_model.py +6 -11
  16. {docling-2.35.0 → docling-2.36.1}/docling/models/ocr_mac_model.py +1 -1
  17. {docling-2.35.0 → docling-2.36.1}/docling/models/picture_description_api_model.py +1 -1
  18. {docling-2.35.0 → docling-2.36.1}/docling/models/picture_description_base_model.py +1 -1
  19. {docling-2.35.0 → docling-2.36.1}/docling/models/picture_description_vlm_model.py +7 -22
  20. {docling-2.35.0 → docling-2.36.1}/docling/models/rapid_ocr_model.py +1 -2
  21. {docling-2.35.0 → docling-2.36.1}/docling/models/table_structure_model.py +6 -12
  22. {docling-2.35.0 → docling-2.36.1}/docling/models/tesseract_ocr_cli_model.py +1 -1
  23. {docling-2.35.0 → docling-2.36.1}/docling/models/tesseract_ocr_model.py +1 -1
  24. docling-2.36.1/docling/models/utils/hf_model_download.py +40 -0
  25. docling-2.36.1/docling/models/vlm_models_inline/hf_transformers_model.py +194 -0
  26. docling-2.35.0/docling/models/hf_mlx_model.py → docling-2.36.1/docling/models/vlm_models_inline/mlx_model.py +56 -44
  27. docling-2.36.1/docling/pipeline/__init__.py +0 -0
  28. docling-2.36.1/docling/pipeline/vlm_pipeline.py +386 -0
  29. docling-2.36.1/docling/py.typed +1 -0
  30. docling-2.36.1/docling/utils/__init__.py +0 -0
  31. {docling-2.35.0 → docling-2.36.1}/docling/utils/accelerator_utils.py +17 -2
  32. {docling-2.35.0 → docling-2.36.1}/docling/utils/model_downloader.py +13 -12
  33. docling-2.36.1/docling.egg-info/PKG-INFO +215 -0
  34. docling-2.36.1/docling.egg-info/SOURCES.txt +124 -0
  35. docling-2.36.1/docling.egg-info/entry_points.txt +6 -0
  36. docling-2.36.1/docling.egg-info/requires.txt +46 -0
  37. docling-2.36.1/docling.egg-info/top_level.txt +1 -0
  38. docling-2.36.1/pyproject.toml +265 -0
  39. docling-2.36.1/setup.cfg +4 -0
  40. docling-2.36.1/tests/test_backend_asciidoc.py +50 -0
  41. docling-2.36.1/tests/test_backend_csv.py +87 -0
  42. docling-2.36.1/tests/test_backend_docling_json.py +58 -0
  43. docling-2.36.1/tests/test_backend_docling_parse.py +77 -0
  44. docling-2.36.1/tests/test_backend_docling_parse_v2.py +76 -0
  45. docling-2.36.1/tests/test_backend_docling_parse_v4.py +76 -0
  46. docling-2.36.1/tests/test_backend_html.py +149 -0
  47. docling-2.36.1/tests/test_backend_jats.py +62 -0
  48. docling-2.36.1/tests/test_backend_markdown.py +41 -0
  49. docling-2.36.1/tests/test_backend_msexcel.py +99 -0
  50. docling-2.36.1/tests/test_backend_msword.py +133 -0
  51. docling-2.36.1/tests/test_backend_patent_uspto.py +458 -0
  52. docling-2.36.1/tests/test_backend_pdfium.py +90 -0
  53. docling-2.36.1/tests/test_backend_pptx.py +55 -0
  54. docling-2.36.1/tests/test_backend_webp.py +82 -0
  55. docling-2.36.1/tests/test_cli.py +27 -0
  56. docling-2.36.1/tests/test_code_formula.py +62 -0
  57. docling-2.36.1/tests/test_data_gen_flag.py +9 -0
  58. docling-2.36.1/tests/test_document_picture_classifier.py +78 -0
  59. docling-2.36.1/tests/test_e2e_conversion.py +60 -0
  60. docling-2.36.1/tests/test_e2e_ocr_conversion.py +104 -0
  61. docling-2.36.1/tests/test_input_doc.py +245 -0
  62. docling-2.36.1/tests/test_interfaces.py +67 -0
  63. docling-2.36.1/tests/test_invalid_input.py +44 -0
  64. docling-2.36.1/tests/test_legacy_format_transform.py +52 -0
  65. docling-2.36.1/tests/test_options.py +172 -0
  66. docling-2.36.1/tests/test_settings_load.py +29 -0
  67. docling-2.35.0/docling/models/hf_vlm_model.py +0 -182
  68. docling-2.35.0/docling/pipeline/vlm_pipeline.py +0 -219
  69. docling-2.35.0/pyproject.toml +0 -285
  70. {docling-2.35.0 → docling-2.36.1}/LICENSE +0 -0
  71. {docling-2.35.0 → docling-2.36.1}/docling/__init__.py +0 -0
  72. {docling-2.35.0 → docling-2.36.1}/docling/backend/__init__.py +0 -0
  73. {docling-2.35.0 → docling-2.36.1}/docling/backend/abstract_backend.py +0 -0
  74. {docling-2.35.0 → docling-2.36.1}/docling/backend/asciidoc_backend.py +0 -0
  75. {docling-2.35.0 → docling-2.36.1}/docling/backend/csv_backend.py +0 -0
  76. {docling-2.35.0 → docling-2.36.1}/docling/backend/docling_parse_backend.py +0 -0
  77. {docling-2.35.0 → docling-2.36.1}/docling/backend/docling_parse_v2_backend.py +0 -0
  78. {docling-2.35.0 → docling-2.36.1}/docling/backend/docling_parse_v4_backend.py +0 -0
  79. {docling-2.35.0 → docling-2.36.1}/docling/backend/docx/__init__.py +0 -0
  80. {docling-2.35.0 → docling-2.36.1}/docling/backend/docx/latex/__init__.py +0 -0
  81. {docling-2.35.0 → docling-2.36.1}/docling/backend/docx/latex/latex_dict.py +0 -0
  82. {docling-2.35.0 → docling-2.36.1}/docling/backend/docx/latex/omml.py +0 -0
  83. {docling-2.35.0 → docling-2.36.1}/docling/backend/html_backend.py +0 -0
  84. {docling-2.35.0 → docling-2.36.1}/docling/backend/json/__init__.py +0 -0
  85. {docling-2.35.0 → docling-2.36.1}/docling/backend/json/docling_json_backend.py +0 -0
  86. {docling-2.35.0 → docling-2.36.1}/docling/backend/md_backend.py +0 -0
  87. {docling-2.35.0 → docling-2.36.1}/docling/backend/msexcel_backend.py +0 -0
  88. {docling-2.35.0 → docling-2.36.1}/docling/backend/mspowerpoint_backend.py +0 -0
  89. {docling-2.35.0 → docling-2.36.1}/docling/backend/msword_backend.py +0 -0
  90. {docling-2.35.0 → docling-2.36.1}/docling/backend/pdf_backend.py +0 -0
  91. {docling-2.35.0 → docling-2.36.1}/docling/backend/pypdfium2_backend.py +0 -0
  92. {docling-2.35.0 → docling-2.36.1}/docling/backend/xml/__init__.py +0 -0
  93. {docling-2.35.0 → docling-2.36.1}/docling/backend/xml/jats_backend.py +0 -0
  94. {docling-2.35.0 → docling-2.36.1}/docling/backend/xml/uspto_backend.py +0 -0
  95. {docling-2.35.0 → docling-2.36.1}/docling/chunking/__init__.py +0 -0
  96. {docling-2.35.0 → docling-2.36.1}/docling/cli/__init__.py +0 -0
  97. {docling-2.35.0 → docling-2.36.1}/docling/cli/models.py +0 -0
  98. {docling-2.35.0 → docling-2.36.1}/docling/cli/tools.py +0 -0
  99. {docling-2.35.0 → docling-2.36.1}/docling/datamodel/__init__.py +0 -0
  100. {docling-2.35.0 → docling-2.36.1}/docling/datamodel/document.py +0 -0
  101. {docling-2.35.0 → docling-2.36.1}/docling/datamodel/settings.py +0 -0
  102. {docling-2.35.0 → docling-2.36.1}/docling/exceptions.py +0 -0
  103. {docling-2.35.0 → docling-2.36.1}/docling/models/__init__.py +0 -0
  104. {docling-2.35.0 → docling-2.36.1}/docling/models/base_model.py +0 -0
  105. {docling-2.35.0 → docling-2.36.1}/docling/models/factories/__init__.py +0 -0
  106. {docling-2.35.0 → docling-2.36.1}/docling/models/factories/base_factory.py +0 -0
  107. {docling-2.35.0 → docling-2.36.1}/docling/models/factories/ocr_factory.py +0 -0
  108. {docling-2.35.0 → docling-2.36.1}/docling/models/factories/picture_description_factory.py +0 -0
  109. {docling-2.35.0 → docling-2.36.1}/docling/models/page_assemble_model.py +0 -0
  110. {docling-2.35.0 → docling-2.36.1}/docling/models/page_preprocessing_model.py +0 -0
  111. {docling-2.35.0 → docling-2.36.1}/docling/models/plugins/__init__.py +0 -0
  112. {docling-2.35.0 → docling-2.36.1}/docling/models/plugins/defaults.py +0 -0
  113. {docling-2.35.0 → docling-2.36.1}/docling/models/readingorder_model.py +0 -0
  114. {docling-2.35.0/docling/pipeline → docling-2.36.1/docling/models/utils}/__init__.py +0 -0
  115. {docling-2.35.0/docling/utils → docling-2.36.1/docling/models/vlm_models_inline}/__init__.py +0 -0
  116. {docling-2.35.0 → docling-2.36.1}/docling/pipeline/base_pipeline.py +0 -0
  117. {docling-2.35.0 → docling-2.36.1}/docling/pipeline/simple_pipeline.py +0 -0
  118. {docling-2.35.0 → docling-2.36.1}/docling/pipeline/standard_pdf_pipeline.py +0 -0
  119. {docling-2.35.0 → docling-2.36.1}/docling/utils/api_image_request.py +0 -0
  120. {docling-2.35.0 → docling-2.36.1}/docling/utils/export.py +0 -0
  121. {docling-2.35.0 → docling-2.36.1}/docling/utils/glm_utils.py +0 -0
  122. {docling-2.35.0 → docling-2.36.1}/docling/utils/layout_postprocessor.py +0 -0
  123. {docling-2.35.0 → docling-2.36.1}/docling/utils/locks.py +0 -0
  124. {docling-2.35.0 → docling-2.36.1}/docling/utils/ocr_utils.py +0 -0
  125. {docling-2.35.0 → docling-2.36.1}/docling/utils/orientation.py +0 -0
  126. {docling-2.35.0 → docling-2.36.1}/docling/utils/profiling.py +0 -0
  127. {docling-2.35.0 → docling-2.36.1}/docling/utils/utils.py +0 -0
  128. {docling-2.35.0 → docling-2.36.1}/docling/utils/visualization.py +0 -0
  129. /docling-2.35.0/docling/py.typed → /docling-2.36.1/docling.egg-info/dependency_links.txt +0 -0
@@ -1,67 +1,67 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: docling
3
- Version: 2.35.0
3
+ Version: 2.36.1
4
4
  Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
5
- Home-page: https://github.com/docling-project/docling
6
- License: MIT
5
+ Author-email: Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Nikos Livathinos <nli@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
+ License-Expression: MIT
7
+ Project-URL: homepage, https://github.com/docling-project/docling
8
+ Project-URL: repository, https://github.com/docling-project/docling
9
+ Project-URL: issues, https://github.com/docling-project/docling/issues
10
+ Project-URL: changelog, https://github.com/docling-project/docling/blob/main/CHANGELOG.md
7
11
  Keywords: docling,convert,document,pdf,docx,html,markdown,layout model,segmentation,table structure,table former
8
- Author: Christoph Auer
9
- Author-email: cau@zurich.ibm.com
10
- Requires-Python: >=3.9,<4.0
12
+ Classifier: Operating System :: MacOS :: MacOS X
13
+ Classifier: Operating System :: POSIX :: Linux
14
+ Classifier: Operating System :: Microsoft :: Windows
11
15
  Classifier: Development Status :: 5 - Production/Stable
12
16
  Classifier: Intended Audience :: Developers
13
17
  Classifier: Intended Audience :: Science/Research
14
- Classifier: License :: OSI Approved :: MIT License
15
- Classifier: Operating System :: MacOS :: MacOS X
16
- Classifier: Operating System :: POSIX :: Linux
18
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
17
19
  Classifier: Programming Language :: Python :: 3
18
20
  Classifier: Programming Language :: Python :: 3.9
19
21
  Classifier: Programming Language :: Python :: 3.10
20
22
  Classifier: Programming Language :: Python :: 3.11
21
23
  Classifier: Programming Language :: Python :: 3.12
22
24
  Classifier: Programming Language :: Python :: 3.13
23
- Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
24
- Provides-Extra: ocrmac
25
- Provides-Extra: rapidocr
25
+ Requires-Python: <4.0,>=3.9
26
+ Description-Content-Type: text/markdown
27
+ License-File: LICENSE
28
+ Requires-Dist: pydantic<3.0.0,>=2.0.0
29
+ Requires-Dist: docling-core[chunking]<3.0.0,>=2.29.0
30
+ Requires-Dist: docling-ibm-models<4.0.0,>=3.4.4
31
+ Requires-Dist: docling-parse<5.0.0,>=4.0.0
32
+ Requires-Dist: filetype<2.0.0,>=1.2.0
33
+ Requires-Dist: pypdfium2<5.0.0,>=4.30.0
34
+ Requires-Dist: pydantic-settings<3.0.0,>=2.3.0
35
+ Requires-Dist: huggingface_hub<1,>=0.23
36
+ Requires-Dist: requests<3.0.0,>=2.32.2
37
+ Requires-Dist: easyocr<2.0,>=1.7
38
+ Requires-Dist: certifi>=2024.7.4
39
+ Requires-Dist: rtree<2.0.0,>=1.3.0
40
+ Requires-Dist: typer<0.17.0,>=0.12.5
41
+ Requires-Dist: python-docx<2.0.0,>=1.1.2
42
+ Requires-Dist: python-pptx<2.0.0,>=1.0.2
43
+ Requires-Dist: beautifulsoup4<5.0.0,>=4.12.3
44
+ Requires-Dist: pandas<3.0.0,>=2.1.4
45
+ Requires-Dist: marko<3.0.0,>=2.1.2
46
+ Requires-Dist: openpyxl<4.0.0,>=3.1.5
47
+ Requires-Dist: lxml<6.0.0,>=4.0.0
48
+ Requires-Dist: pillow<12.0.0,>=10.0.0
49
+ Requires-Dist: tqdm<5.0.0,>=4.65.0
50
+ Requires-Dist: pluggy<2.0.0,>=1.0.0
51
+ Requires-Dist: pylatexenc<3.0,>=2.10
52
+ Requires-Dist: scipy<2.0.0,>=1.6.0
26
53
  Provides-Extra: tesserocr
54
+ Requires-Dist: tesserocr<3.0.0,>=2.7.1; extra == "tesserocr"
55
+ Provides-Extra: ocrmac
56
+ Requires-Dist: ocrmac<2.0.0,>=1.0.0; sys_platform == "darwin" and extra == "ocrmac"
27
57
  Provides-Extra: vlm
28
- Requires-Dist: accelerate (>=1.2.1,<2.0.0) ; (sys_platform != "darwin" or platform_machine != "x86_64") and (extra == "vlm")
29
- Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
30
- Requires-Dist: certifi (>=2024.7.4)
31
- Requires-Dist: click (<8.2.0)
32
- Requires-Dist: docling-core[chunking] (>=2.31.2,<3.0.0)
33
- Requires-Dist: docling-ibm-models (>=3.4.0,<4.0.0)
34
- Requires-Dist: docling-parse (>=4.0.0,<5.0.0)
35
- Requires-Dist: easyocr (>=1.7,<2.0)
36
- Requires-Dist: filetype (>=1.2.0,<2.0.0)
37
- Requires-Dist: huggingface_hub (>=0.23,<1)
38
- Requires-Dist: lxml (>=4.0.0,<6.0.0)
39
- Requires-Dist: marko (>=2.1.2,<3.0.0)
40
- Requires-Dist: ocrmac (>=1.0.0,<2.0.0) ; (sys_platform == "darwin") and (extra == "ocrmac")
41
- Requires-Dist: onnxruntime (>=1.7.0,<1.20.0) ; (python_version < "3.10") and (extra == "rapidocr")
42
- Requires-Dist: onnxruntime (>=1.7.0,<2.0.0) ; (python_version >= "3.10") and (extra == "rapidocr")
43
- Requires-Dist: openpyxl (>=3.1.5,<4.0.0)
44
- Requires-Dist: pandas (>=2.1.4,<3.0.0)
45
- Requires-Dist: pillow (>=10.0.0,<12.0.0)
46
- Requires-Dist: pluggy (>=1.0.0,<2.0.0)
47
- Requires-Dist: pydantic (>=2.0.0,<3.0.0)
48
- Requires-Dist: pydantic-settings (>=2.3.0,<3.0.0)
49
- Requires-Dist: pylatexenc (>=2.10,<3.0)
50
- Requires-Dist: pypdfium2 (>=4.30.0,<5.0.0)
51
- Requires-Dist: python-docx (>=1.1.2,<2.0.0)
52
- Requires-Dist: python-pptx (>=1.0.2,<2.0.0)
53
- Requires-Dist: rapidocr-onnxruntime (>=1.4.0,<2.0.0) ; (python_version < "3.13") and (extra == "rapidocr")
54
- Requires-Dist: requests (>=2.32.2,<3.0.0)
55
- Requires-Dist: rtree (>=1.3.0,<2.0.0)
56
- Requires-Dist: scipy (>=1.6.0,<1.14.0) ; python_version < "3.10"
57
- Requires-Dist: scipy (>=1.6.0,<2.0.0) ; python_version >= "3.10"
58
- Requires-Dist: tesserocr (>=2.7.1,<3.0.0) ; extra == "tesserocr"
59
- Requires-Dist: tqdm (>=4.65.0,<5.0.0)
60
- Requires-Dist: transformers (>=4.42.0,<4.43.0) ; (sys_platform == "darwin" and platform_machine == "x86_64") and (extra == "vlm")
61
- Requires-Dist: transformers (>=4.46.0,<5.0.0) ; (sys_platform != "darwin" or platform_machine != "x86_64") and (extra == "vlm")
62
- Requires-Dist: typer (>=0.12.5,<0.16.0)
63
- Project-URL: Repository, https://github.com/docling-project/docling
64
- Description-Content-Type: text/markdown
58
+ Requires-Dist: transformers<5.0.0,>=4.46.0; extra == "vlm"
59
+ Requires-Dist: accelerate<2.0.0,>=1.2.1; extra == "vlm"
60
+ Requires-Dist: mlx-vlm>=0.1.22; (python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "vlm"
61
+ Provides-Extra: rapidocr
62
+ Requires-Dist: rapidocr-onnxruntime<2.0.0,>=1.4.0; python_version < "3.13" and extra == "rapidocr"
63
+ Requires-Dist: onnxruntime<2.0.0,>=1.7.0; extra == "rapidocr"
64
+ Dynamic: license-file
65
65
 
66
66
  <p align="center">
67
67
  <a href="https://github.com/docling-project/docling">
@@ -79,9 +79,8 @@ Description-Content-Type: text/markdown
79
79
  [![Docs](https://img.shields.io/badge/docs-live-brightgreen)](https://docling-project.github.io/docling/)
80
80
  [![PyPI version](https://img.shields.io/pypi/v/docling)](https://pypi.org/project/docling/)
81
81
  [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/docling)](https://pypi.org/project/docling/)
82
- [![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)](https://python-poetry.org/)
83
- [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
84
- [![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
82
+ [![uv](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/uv/main/assets/badge/v0.json)](https://github.com/astral-sh/uv)
83
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
85
84
  [![Pydantic v2](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/pydantic/pydantic/main/docs/badge/v2.json)](https://pydantic.dev)
86
85
  [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)
87
86
  [![License MIT](https://img.shields.io/github/license/docling-project/docling)](https://opensource.org/licenses/MIT)
@@ -101,7 +100,7 @@ Docling simplifies document processing, parsing diverse formats — including ad
101
100
  * 🔒 Local execution capabilities for sensitive data and air-gapped environments
102
101
  * 🤖 Plug-and-play [integrations][integrations] incl. LangChain, LlamaIndex, Crew AI & Haystack for agentic AI
103
102
  * 🔍 Extensive OCR support for scanned PDFs and images
104
- * 🥚 Support of Visual Language Models ([SmolDocling](https://huggingface.co/ds4sd/SmolDocling-256M-preview)) 🆕
103
+ * 🥚 Support of several Visual Language Models ([SmolDocling](https://huggingface.co/ds4sd/SmolDocling-256M-preview))
105
104
  * 💻 Simple and convenient CLI
106
105
 
107
106
  ### Coming soon
@@ -214,4 +213,3 @@ The project was started by the AI for knowledge team at IBM Research Zurich.
214
213
  [supported_formats]: https://docling-project.github.io/docling/usage/supported_formats/
215
214
  [docling_document]: https://docling-project.github.io/docling/concepts/docling_document/
216
215
  [integrations]: https://docling-project.github.io/docling/integrations/
217
-
@@ -14,9 +14,8 @@
14
14
  [![Docs](https://img.shields.io/badge/docs-live-brightgreen)](https://docling-project.github.io/docling/)
15
15
  [![PyPI version](https://img.shields.io/pypi/v/docling)](https://pypi.org/project/docling/)
16
16
  [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/docling)](https://pypi.org/project/docling/)
17
- [![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)](https://python-poetry.org/)
18
- [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
19
- [![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
17
+ [![uv](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/uv/main/assets/badge/v0.json)](https://github.com/astral-sh/uv)
18
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
20
19
  [![Pydantic v2](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/pydantic/pydantic/main/docs/badge/v2.json)](https://pydantic.dev)
21
20
  [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)
22
21
  [![License MIT](https://img.shields.io/github/license/docling-project/docling)](https://opensource.org/licenses/MIT)
@@ -36,7 +35,7 @@ Docling simplifies document processing, parsing diverse formats — including ad
36
35
  * 🔒 Local execution capabilities for sensitive data and air-gapped environments
37
36
  * 🤖 Plug-and-play [integrations][integrations] incl. LangChain, LlamaIndex, Crew AI & Haystack for agentic AI
38
37
  * 🔍 Extensive OCR support for scanned PDFs and images
39
- * 🥚 Support of Visual Language Models ([SmolDocling](https://huggingface.co/ds4sd/SmolDocling-256M-preview)) 🆕
38
+ * 🥚 Support of several Visual Language Models ([SmolDocling](https://huggingface.co/ds4sd/SmolDocling-256M-preview))
40
39
  * 💻 Simple and convenient CLI
41
40
 
42
41
  ### Coming soon
@@ -28,6 +28,7 @@ from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBacke
28
28
  from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
29
29
  from docling.backend.pdf_backend import PdfDocumentBackend
30
30
  from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
31
+ from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
31
32
  from docling.datamodel.base_models import (
32
33
  ConversionStatus,
33
34
  FormatToExtensions,
@@ -36,8 +37,6 @@ from docling.datamodel.base_models import (
36
37
  )
37
38
  from docling.datamodel.document import ConversionResult
38
39
  from docling.datamodel.pipeline_options import (
39
- AcceleratorDevice,
40
- AcceleratorOptions,
41
40
  EasyOcrOptions,
42
41
  OcrOptions,
43
42
  PaginatedPipelineOptions,
@@ -45,14 +44,16 @@ from docling.datamodel.pipeline_options import (
45
44
  PdfPipeline,
46
45
  PdfPipelineOptions,
47
46
  TableFormerMode,
48
- VlmModelType,
49
47
  VlmPipelineOptions,
50
- granite_vision_vlm_conversion_options,
51
- granite_vision_vlm_ollama_conversion_options,
52
- smoldocling_vlm_conversion_options,
53
- smoldocling_vlm_mlx_conversion_options,
54
48
  )
55
49
  from docling.datamodel.settings import settings
50
+ from docling.datamodel.vlm_model_specs import (
51
+ GRANITE_VISION_OLLAMA,
52
+ GRANITE_VISION_TRANSFORMERS,
53
+ SMOLDOCLING_MLX,
54
+ SMOLDOCLING_TRANSFORMERS,
55
+ VlmModelType,
56
+ )
56
57
  from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
57
58
  from docling.models.factories import get_ocr_factory
58
59
  from docling.pipeline.vlm_pipeline import VlmPipeline
@@ -579,20 +580,16 @@ def convert( # noqa: C901
579
580
  )
580
581
 
581
582
  if vlm_model == VlmModelType.GRANITE_VISION:
582
- pipeline_options.vlm_options = granite_vision_vlm_conversion_options
583
+ pipeline_options.vlm_options = GRANITE_VISION_TRANSFORMERS
583
584
  elif vlm_model == VlmModelType.GRANITE_VISION_OLLAMA:
584
- pipeline_options.vlm_options = (
585
- granite_vision_vlm_ollama_conversion_options
586
- )
585
+ pipeline_options.vlm_options = GRANITE_VISION_OLLAMA
587
586
  elif vlm_model == VlmModelType.SMOLDOCLING:
588
- pipeline_options.vlm_options = smoldocling_vlm_conversion_options
587
+ pipeline_options.vlm_options = SMOLDOCLING_TRANSFORMERS
589
588
  if sys.platform == "darwin":
590
589
  try:
591
590
  import mlx_vlm
592
591
 
593
- pipeline_options.vlm_options = (
594
- smoldocling_vlm_mlx_conversion_options
595
- )
592
+ pipeline_options.vlm_options = SMOLDOCLING_MLX
596
593
  except ImportError:
597
594
  _log.warning(
598
595
  "To run SmolDocling faster, please install mlx-vlm:\n"
@@ -0,0 +1,68 @@
1
+ import logging
2
+ import os
3
+ import re
4
+ from enum import Enum
5
+ from typing import Any, Union
6
+
7
+ from pydantic import field_validator, model_validator
8
+ from pydantic_settings import BaseSettings, SettingsConfigDict
9
+
10
+ _log = logging.getLogger(__name__)
11
+
12
+
13
+ class AcceleratorDevice(str, Enum):
14
+ """Devices to run model inference"""
15
+
16
+ AUTO = "auto"
17
+ CPU = "cpu"
18
+ CUDA = "cuda"
19
+ MPS = "mps"
20
+
21
+
22
+ class AcceleratorOptions(BaseSettings):
23
+ model_config = SettingsConfigDict(
24
+ env_prefix="DOCLING_", env_nested_delimiter="_", populate_by_name=True
25
+ )
26
+
27
+ num_threads: int = 4
28
+ device: Union[str, AcceleratorDevice] = "auto"
29
+ cuda_use_flash_attention2: bool = False
30
+
31
+ @field_validator("device")
32
+ def validate_device(cls, value):
33
+ # "auto", "cpu", "cuda", "mps", or "cuda:N"
34
+ if value in {d.value for d in AcceleratorDevice} or re.match(
35
+ r"^cuda(:\d+)?$", value
36
+ ):
37
+ return value
38
+ raise ValueError(
39
+ "Invalid device option. Use 'auto', 'cpu', 'mps', 'cuda', or 'cuda:N'."
40
+ )
41
+
42
+ @model_validator(mode="before")
43
+ @classmethod
44
+ def check_alternative_envvars(cls, data: Any) -> Any:
45
+ r"""
46
+ Set num_threads from the "alternative" envvar OMP_NUM_THREADS.
47
+ The alternative envvar is used only if it is valid and the regular envvar is not set.
48
+
49
+ Notice: The standard pydantic settings mechanism with parameter "aliases" does not provide
50
+ the same functionality. In case the alias envvar is set and the user tries to override the
51
+ parameter in settings initialization, Pydantic treats the parameter provided in __init__()
52
+ as an extra input instead of simply overwriting the evvar value for that parameter.
53
+ """
54
+ if isinstance(data, dict):
55
+ input_num_threads = data.get("num_threads")
56
+ # Check if to set the num_threads from the alternative envvar
57
+ if input_num_threads is None:
58
+ docling_num_threads = os.getenv("DOCLING_NUM_THREADS")
59
+ omp_num_threads = os.getenv("OMP_NUM_THREADS")
60
+ if docling_num_threads is None and omp_num_threads is not None:
61
+ try:
62
+ data["num_threads"] = int(omp_num_threads)
63
+ except ValueError:
64
+ _log.error(
65
+ "Ignoring misformatted envvar OMP_NUM_THREADS '%s'",
66
+ omp_num_threads,
67
+ )
68
+ return data
@@ -13,11 +13,11 @@ from docling_core.types.doc import (
13
13
  TableCell,
14
14
  )
15
15
  from docling_core.types.doc.page import SegmentedPdfPage, TextCell
16
-
17
- # DO NOT REMOVE; explicitly exposed from this location
18
16
  from docling_core.types.io import (
19
17
  DocumentStream,
20
18
  )
19
+
20
+ # DO NOT REMOVE; explicitly exposed from this location
21
21
  from PIL.Image import Image
22
22
  from pydantic import BaseModel, ConfigDict, Field, computed_field
23
23
 
@@ -131,12 +131,6 @@ class ErrorItem(BaseModel):
131
131
  error_message: str
132
132
 
133
133
 
134
- # class Cell(BaseModel):
135
- # id: int
136
- # text: str
137
- # bbox: BoundingBox
138
-
139
-
140
134
  class Cluster(BaseModel):
141
135
  id: int
142
136
  label: DocItemLabel
@@ -158,8 +152,16 @@ class LayoutPrediction(BaseModel):
158
152
  clusters: List[Cluster] = []
159
153
 
160
154
 
155
+ class VlmPredictionToken(BaseModel):
156
+ text: str = ""
157
+ token: int = -1
158
+ logprob: float = -1
159
+
160
+
161
161
  class VlmPrediction(BaseModel):
162
162
  text: str = ""
163
+ generated_tokens: list[VlmPredictionToken] = []
164
+ generation_time: float = -1
163
165
 
164
166
 
165
167
  class ContainerElement(
@@ -1,6 +1,4 @@
1
1
  import logging
2
- import os
3
- import re
4
2
  from enum import Enum
5
3
  from pathlib import Path
6
4
  from typing import Any, ClassVar, Dict, List, Literal, Optional, Union
@@ -10,71 +8,26 @@ from pydantic import (
10
8
  BaseModel,
11
9
  ConfigDict,
12
10
  Field,
13
- field_validator,
14
- model_validator,
15
11
  )
16
- from pydantic_settings import BaseSettings, SettingsConfigDict
17
12
  from typing_extensions import deprecated
18
13
 
19
- _log = logging.getLogger(__name__)
20
-
21
-
22
- class AcceleratorDevice(str, Enum):
23
- """Devices to run model inference"""
24
-
25
- AUTO = "auto"
26
- CPU = "cpu"
27
- CUDA = "cuda"
28
- MPS = "mps"
29
-
30
-
31
- class AcceleratorOptions(BaseSettings):
32
- model_config = SettingsConfigDict(
33
- env_prefix="DOCLING_", env_nested_delimiter="_", populate_by_name=True
34
- )
14
+ # Import the following for backwards compatibility
15
+ from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
16
+ from docling.datamodel.pipeline_options_vlm_model import (
17
+ ApiVlmOptions,
18
+ InferenceFramework,
19
+ InlineVlmOptions,
20
+ ResponseFormat,
21
+ )
22
+ from docling.datamodel.vlm_model_specs import (
23
+ GRANITE_VISION_OLLAMA as granite_vision_vlm_ollama_conversion_options,
24
+ GRANITE_VISION_TRANSFORMERS as granite_vision_vlm_conversion_options,
25
+ SMOLDOCLING_MLX as smoldocling_vlm_mlx_conversion_options,
26
+ SMOLDOCLING_TRANSFORMERS as smoldocling_vlm_conversion_options,
27
+ VlmModelType,
28
+ )
35
29
 
36
- num_threads: int = 4
37
- device: Union[str, AcceleratorDevice] = "auto"
38
- cuda_use_flash_attention2: bool = False
39
-
40
- @field_validator("device")
41
- def validate_device(cls, value):
42
- # "auto", "cpu", "cuda", "mps", or "cuda:N"
43
- if value in {d.value for d in AcceleratorDevice} or re.match(
44
- r"^cuda(:\d+)?$", value
45
- ):
46
- return value
47
- raise ValueError(
48
- "Invalid device option. Use 'auto', 'cpu', 'mps', 'cuda', or 'cuda:N'."
49
- )
50
-
51
- @model_validator(mode="before")
52
- @classmethod
53
- def check_alternative_envvars(cls, data: Any) -> Any:
54
- r"""
55
- Set num_threads from the "alternative" envvar OMP_NUM_THREADS.
56
- The alternative envvar is used only if it is valid and the regular envvar is not set.
57
-
58
- Notice: The standard pydantic settings mechanism with parameter "aliases" does not provide
59
- the same functionality. In case the alias envvar is set and the user tries to override the
60
- parameter in settings initialization, Pydantic treats the parameter provided in __init__()
61
- as an extra input instead of simply overwriting the evvar value for that parameter.
62
- """
63
- if isinstance(data, dict):
64
- input_num_threads = data.get("num_threads")
65
- # Check if to set the num_threads from the alternative envvar
66
- if input_num_threads is None:
67
- docling_num_threads = os.getenv("DOCLING_NUM_THREADS")
68
- omp_num_threads = os.getenv("OMP_NUM_THREADS")
69
- if docling_num_threads is None and omp_num_threads is not None:
70
- try:
71
- data["num_threads"] = int(omp_num_threads)
72
- except ValueError:
73
- _log.error(
74
- "Ignoring misformatted envvar OMP_NUM_THREADS '%s'",
75
- omp_num_threads,
76
- )
77
- return data
30
+ _log = logging.getLogger(__name__)
78
31
 
79
32
 
80
33
  class BaseOptions(BaseModel):
@@ -121,24 +74,22 @@ class RapidOcrOptions(OcrOptions):
121
74
  lang: List[str] = [
122
75
  "english",
123
76
  "chinese",
124
- ] # However, language as a parameter is not supported by rapidocr yet and hence changing this options doesn't affect anything.
125
- # For more details on supported languages by RapidOCR visit https://rapidai.github.io/RapidOCRDocs/blog/2022/09/28/%E6%94%AF%E6%8C%81%E8%AF%86%E5%88%AB%E8%AF%AD%E8%A8%80/
77
+ ]
78
+ # However, language as a parameter is not supported by rapidocr yet
79
+ # and hence changing this options doesn't affect anything.
80
+
81
+ # For more details on supported languages by RapidOCR visit
82
+ # https://rapidai.github.io/RapidOCRDocs/blog/2022/09/28/%E6%94%AF%E6%8C%81%E8%AF%86%E5%88%AB%E8%AF%AD%E8%A8%80/
83
+
84
+ # For more details on the following options visit
85
+ # https://rapidai.github.io/RapidOCRDocs/install_usage/api/RapidOCR/
126
86
 
127
- # For more details on the following options visit https://rapidai.github.io/RapidOCRDocs/install_usage/api/RapidOCR/
128
87
  text_score: float = 0.5 # same default as rapidocr
129
88
 
130
89
  use_det: Optional[bool] = None # same default as rapidocr
131
90
  use_cls: Optional[bool] = None # same default as rapidocr
132
91
  use_rec: Optional[bool] = None # same default as rapidocr
133
92
 
134
- # class Device(Enum):
135
- # CPU = "CPU"
136
- # CUDA = "CUDA"
137
- # DIRECTML = "DIRECTML"
138
- # AUTO = "AUTO"
139
-
140
- # device: Device = Device.AUTO # Default value is AUTO
141
-
142
93
  print_verbose: bool = False # same default as rapidocr
143
94
 
144
95
  det_model_path: Optional[str] = None # same default as rapidocr
@@ -244,101 +195,18 @@ class PictureDescriptionVlmOptions(PictureDescriptionBaseOptions):
244
195
  return self.repo_id.replace("/", "--")
245
196
 
246
197
 
198
+ # SmolVLM
247
199
  smolvlm_picture_description = PictureDescriptionVlmOptions(
248
200
  repo_id="HuggingFaceTB/SmolVLM-256M-Instruct"
249
201
  )
250
- # phi_picture_description = PictureDescriptionVlmOptions(repo_id="microsoft/Phi-3-vision-128k-instruct")
202
+
203
+ # GraniteVision
251
204
  granite_picture_description = PictureDescriptionVlmOptions(
252
205
  repo_id="ibm-granite/granite-vision-3.1-2b-preview",
253
206
  prompt="What is shown in this image?",
254
207
  )
255
208
 
256
209
 
257
- class BaseVlmOptions(BaseModel):
258
- kind: str
259
- prompt: str
260
-
261
-
262
- class ResponseFormat(str, Enum):
263
- DOCTAGS = "doctags"
264
- MARKDOWN = "markdown"
265
-
266
-
267
- class InferenceFramework(str, Enum):
268
- MLX = "mlx"
269
- TRANSFORMERS = "transformers"
270
- OPENAI = "openai"
271
-
272
-
273
- class HuggingFaceVlmOptions(BaseVlmOptions):
274
- kind: Literal["hf_model_options"] = "hf_model_options"
275
-
276
- repo_id: str
277
- load_in_8bit: bool = True
278
- llm_int8_threshold: float = 6.0
279
- quantized: bool = False
280
-
281
- inference_framework: InferenceFramework
282
- response_format: ResponseFormat
283
-
284
- @property
285
- def repo_cache_folder(self) -> str:
286
- return self.repo_id.replace("/", "--")
287
-
288
-
289
- class ApiVlmOptions(BaseVlmOptions):
290
- kind: Literal["api_model_options"] = "api_model_options"
291
-
292
- url: AnyUrl = AnyUrl(
293
- "http://localhost:11434/v1/chat/completions"
294
- ) # Default to ollama
295
- headers: Dict[str, str] = {}
296
- params: Dict[str, Any] = {}
297
- scale: float = 2.0
298
- timeout: float = 60
299
- concurrency: int = 1
300
- response_format: ResponseFormat
301
-
302
-
303
- smoldocling_vlm_mlx_conversion_options = HuggingFaceVlmOptions(
304
- repo_id="ds4sd/SmolDocling-256M-preview-mlx-bf16",
305
- prompt="Convert this page to docling.",
306
- response_format=ResponseFormat.DOCTAGS,
307
- inference_framework=InferenceFramework.MLX,
308
- )
309
-
310
-
311
- smoldocling_vlm_conversion_options = HuggingFaceVlmOptions(
312
- repo_id="ds4sd/SmolDocling-256M-preview",
313
- prompt="Convert this page to docling.",
314
- response_format=ResponseFormat.DOCTAGS,
315
- inference_framework=InferenceFramework.TRANSFORMERS,
316
- )
317
-
318
- granite_vision_vlm_conversion_options = HuggingFaceVlmOptions(
319
- repo_id="ibm-granite/granite-vision-3.1-2b-preview",
320
- # prompt="OCR the full page to markdown.",
321
- prompt="OCR this image.",
322
- response_format=ResponseFormat.MARKDOWN,
323
- inference_framework=InferenceFramework.TRANSFORMERS,
324
- )
325
-
326
- granite_vision_vlm_ollama_conversion_options = ApiVlmOptions(
327
- url=AnyUrl("http://localhost:11434/v1/chat/completions"),
328
- params={"model": "granite3.2-vision:2b"},
329
- prompt="OCR the full page to markdown.",
330
- scale=1.0,
331
- timeout=120,
332
- response_format=ResponseFormat.MARKDOWN,
333
- )
334
-
335
-
336
- class VlmModelType(str, Enum):
337
- SMOLDOCLING = "smoldocling"
338
- GRANITE_VISION = "granite_vision"
339
- GRANITE_VISION_OLLAMA = "granite_vision_ollama"
340
-
341
-
342
210
  # Define an enum for the backend options
343
211
  class PdfBackend(str, Enum):
344
212
  """Enum of valid PDF backends."""
@@ -387,7 +255,7 @@ class VlmPipelineOptions(PaginatedPipelineOptions):
387
255
  False # (To be used with vlms, or other generative models)
388
256
  )
389
257
  # If True, text from backend will be used instead of generated text
390
- vlm_options: Union[HuggingFaceVlmOptions, ApiVlmOptions] = (
258
+ vlm_options: Union[InlineVlmOptions, ApiVlmOptions] = (
391
259
  smoldocling_vlm_conversion_options
392
260
  )
393
261
 
@@ -0,0 +1,81 @@
1
+ from enum import Enum
2
+ from typing import Any, Dict, List, Literal
3
+
4
+ from pydantic import AnyUrl, BaseModel
5
+ from typing_extensions import deprecated
6
+
7
+ from docling.datamodel.accelerator_options import AcceleratorDevice
8
+
9
+
10
+ class BaseVlmOptions(BaseModel):
11
+ kind: str
12
+ prompt: str
13
+
14
+
15
+ class ResponseFormat(str, Enum):
16
+ DOCTAGS = "doctags"
17
+ MARKDOWN = "markdown"
18
+ HTML = "html"
19
+
20
+
21
+ class InferenceFramework(str, Enum):
22
+ MLX = "mlx"
23
+ TRANSFORMERS = "transformers"
24
+
25
+
26
+ class TransformersModelType(str, Enum):
27
+ AUTOMODEL = "automodel"
28
+ AUTOMODEL_VISION2SEQ = "automodel-vision2seq"
29
+ AUTOMODEL_CAUSALLM = "automodel-causallm"
30
+
31
+
32
+ class InlineVlmOptions(BaseVlmOptions):
33
+ kind: Literal["inline_model_options"] = "inline_model_options"
34
+
35
+ repo_id: str
36
+ trust_remote_code: bool = False
37
+ load_in_8bit: bool = True
38
+ llm_int8_threshold: float = 6.0
39
+ quantized: bool = False
40
+
41
+ inference_framework: InferenceFramework
42
+ transformers_model_type: TransformersModelType = TransformersModelType.AUTOMODEL
43
+ response_format: ResponseFormat
44
+
45
+ supported_devices: List[AcceleratorDevice] = [
46
+ AcceleratorDevice.CPU,
47
+ AcceleratorDevice.CUDA,
48
+ AcceleratorDevice.MPS,
49
+ ]
50
+
51
+ scale: float = 2.0
52
+
53
+ temperature: float = 0.0
54
+ stop_strings: List[str] = []
55
+ extra_generation_config: Dict[str, Any] = {}
56
+
57
+ use_kv_cache: bool = True
58
+ max_new_tokens: int = 4096
59
+
60
+ @property
61
+ def repo_cache_folder(self) -> str:
62
+ return self.repo_id.replace("/", "--")
63
+
64
+
65
+ @deprecated("Use InlineVlmOptions instead.")
66
+ class HuggingFaceVlmOptions(InlineVlmOptions):
67
+ pass
68
+
69
+
70
+ class ApiVlmOptions(BaseVlmOptions):
71
+ kind: Literal["api_model_options"] = "api_model_options"
72
+
73
+ url: AnyUrl = AnyUrl(
74
+ "http://localhost:11434/v1/chat/completions"
75
+ ) # Default to ollama
76
+ headers: Dict[str, str] = {}
77
+ params: Dict[str, Any] = {}
78
+ scale: float = 2.0
79
+ timeout: float = 60
80
+ concurrency: int = 1
81
+ response_format: ResponseFormat