docling 2.69.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling might be problematic. Click here for more details.

Files changed (138) hide show
  1. docling/__init__.py +0 -0
  2. docling/backend/__init__.py +0 -0
  3. docling/backend/abstract_backend.py +84 -0
  4. docling/backend/asciidoc_backend.py +443 -0
  5. docling/backend/csv_backend.py +125 -0
  6. docling/backend/docling_parse_backend.py +237 -0
  7. docling/backend/docling_parse_v2_backend.py +276 -0
  8. docling/backend/docling_parse_v4_backend.py +260 -0
  9. docling/backend/docx/__init__.py +0 -0
  10. docling/backend/docx/drawingml/utils.py +131 -0
  11. docling/backend/docx/latex/__init__.py +0 -0
  12. docling/backend/docx/latex/latex_dict.py +274 -0
  13. docling/backend/docx/latex/omml.py +459 -0
  14. docling/backend/html_backend.py +1502 -0
  15. docling/backend/image_backend.py +188 -0
  16. docling/backend/json/__init__.py +0 -0
  17. docling/backend/json/docling_json_backend.py +58 -0
  18. docling/backend/md_backend.py +618 -0
  19. docling/backend/mets_gbs_backend.py +399 -0
  20. docling/backend/msexcel_backend.py +686 -0
  21. docling/backend/mspowerpoint_backend.py +398 -0
  22. docling/backend/msword_backend.py +1663 -0
  23. docling/backend/noop_backend.py +51 -0
  24. docling/backend/pdf_backend.py +82 -0
  25. docling/backend/pypdfium2_backend.py +417 -0
  26. docling/backend/webvtt_backend.py +572 -0
  27. docling/backend/xml/__init__.py +0 -0
  28. docling/backend/xml/jats_backend.py +819 -0
  29. docling/backend/xml/uspto_backend.py +1905 -0
  30. docling/chunking/__init__.py +12 -0
  31. docling/cli/__init__.py +0 -0
  32. docling/cli/main.py +974 -0
  33. docling/cli/models.py +196 -0
  34. docling/cli/tools.py +17 -0
  35. docling/datamodel/__init__.py +0 -0
  36. docling/datamodel/accelerator_options.py +69 -0
  37. docling/datamodel/asr_model_specs.py +494 -0
  38. docling/datamodel/backend_options.py +102 -0
  39. docling/datamodel/base_models.py +493 -0
  40. docling/datamodel/document.py +699 -0
  41. docling/datamodel/extraction.py +39 -0
  42. docling/datamodel/layout_model_specs.py +91 -0
  43. docling/datamodel/pipeline_options.py +457 -0
  44. docling/datamodel/pipeline_options_asr_model.py +78 -0
  45. docling/datamodel/pipeline_options_vlm_model.py +136 -0
  46. docling/datamodel/settings.py +65 -0
  47. docling/datamodel/vlm_model_specs.py +365 -0
  48. docling/document_converter.py +559 -0
  49. docling/document_extractor.py +327 -0
  50. docling/exceptions.py +10 -0
  51. docling/experimental/__init__.py +5 -0
  52. docling/experimental/datamodel/__init__.py +1 -0
  53. docling/experimental/datamodel/table_crops_layout_options.py +13 -0
  54. docling/experimental/datamodel/threaded_layout_vlm_pipeline_options.py +45 -0
  55. docling/experimental/models/__init__.py +3 -0
  56. docling/experimental/models/table_crops_layout_model.py +114 -0
  57. docling/experimental/pipeline/__init__.py +1 -0
  58. docling/experimental/pipeline/threaded_layout_vlm_pipeline.py +439 -0
  59. docling/models/__init__.py +0 -0
  60. docling/models/base_layout_model.py +39 -0
  61. docling/models/base_model.py +230 -0
  62. docling/models/base_ocr_model.py +241 -0
  63. docling/models/base_table_model.py +45 -0
  64. docling/models/extraction/__init__.py +0 -0
  65. docling/models/extraction/nuextract_transformers_model.py +305 -0
  66. docling/models/factories/__init__.py +47 -0
  67. docling/models/factories/base_factory.py +122 -0
  68. docling/models/factories/layout_factory.py +7 -0
  69. docling/models/factories/ocr_factory.py +11 -0
  70. docling/models/factories/picture_description_factory.py +11 -0
  71. docling/models/factories/table_factory.py +7 -0
  72. docling/models/picture_description_base_model.py +149 -0
  73. docling/models/plugins/__init__.py +0 -0
  74. docling/models/plugins/defaults.py +60 -0
  75. docling/models/stages/__init__.py +0 -0
  76. docling/models/stages/code_formula/__init__.py +0 -0
  77. docling/models/stages/code_formula/code_formula_model.py +342 -0
  78. docling/models/stages/layout/__init__.py +0 -0
  79. docling/models/stages/layout/layout_model.py +249 -0
  80. docling/models/stages/ocr/__init__.py +0 -0
  81. docling/models/stages/ocr/auto_ocr_model.py +132 -0
  82. docling/models/stages/ocr/easyocr_model.py +200 -0
  83. docling/models/stages/ocr/ocr_mac_model.py +145 -0
  84. docling/models/stages/ocr/rapid_ocr_model.py +328 -0
  85. docling/models/stages/ocr/tesseract_ocr_cli_model.py +331 -0
  86. docling/models/stages/ocr/tesseract_ocr_model.py +262 -0
  87. docling/models/stages/page_assemble/__init__.py +0 -0
  88. docling/models/stages/page_assemble/page_assemble_model.py +156 -0
  89. docling/models/stages/page_preprocessing/__init__.py +0 -0
  90. docling/models/stages/page_preprocessing/page_preprocessing_model.py +145 -0
  91. docling/models/stages/picture_classifier/__init__.py +0 -0
  92. docling/models/stages/picture_classifier/document_picture_classifier.py +246 -0
  93. docling/models/stages/picture_description/__init__.py +0 -0
  94. docling/models/stages/picture_description/picture_description_api_model.py +66 -0
  95. docling/models/stages/picture_description/picture_description_vlm_model.py +123 -0
  96. docling/models/stages/reading_order/__init__.py +0 -0
  97. docling/models/stages/reading_order/readingorder_model.py +431 -0
  98. docling/models/stages/table_structure/__init__.py +0 -0
  99. docling/models/stages/table_structure/table_structure_model.py +305 -0
  100. docling/models/utils/__init__.py +0 -0
  101. docling/models/utils/generation_utils.py +157 -0
  102. docling/models/utils/hf_model_download.py +45 -0
  103. docling/models/vlm_pipeline_models/__init__.py +1 -0
  104. docling/models/vlm_pipeline_models/api_vlm_model.py +180 -0
  105. docling/models/vlm_pipeline_models/hf_transformers_model.py +391 -0
  106. docling/models/vlm_pipeline_models/mlx_model.py +325 -0
  107. docling/models/vlm_pipeline_models/vllm_model.py +344 -0
  108. docling/pipeline/__init__.py +0 -0
  109. docling/pipeline/asr_pipeline.py +431 -0
  110. docling/pipeline/base_extraction_pipeline.py +72 -0
  111. docling/pipeline/base_pipeline.py +326 -0
  112. docling/pipeline/extraction_vlm_pipeline.py +207 -0
  113. docling/pipeline/legacy_standard_pdf_pipeline.py +262 -0
  114. docling/pipeline/simple_pipeline.py +55 -0
  115. docling/pipeline/standard_pdf_pipeline.py +859 -0
  116. docling/pipeline/threaded_standard_pdf_pipeline.py +5 -0
  117. docling/pipeline/vlm_pipeline.py +416 -0
  118. docling/py.typed +1 -0
  119. docling/utils/__init__.py +0 -0
  120. docling/utils/accelerator_utils.py +97 -0
  121. docling/utils/api_image_request.py +205 -0
  122. docling/utils/deepseekocr_utils.py +388 -0
  123. docling/utils/export.py +146 -0
  124. docling/utils/glm_utils.py +361 -0
  125. docling/utils/layout_postprocessor.py +683 -0
  126. docling/utils/locks.py +3 -0
  127. docling/utils/model_downloader.py +168 -0
  128. docling/utils/ocr_utils.py +69 -0
  129. docling/utils/orientation.py +65 -0
  130. docling/utils/profiling.py +65 -0
  131. docling/utils/utils.py +65 -0
  132. docling/utils/visualization.py +85 -0
  133. docling-2.69.0.dist-info/METADATA +237 -0
  134. docling-2.69.0.dist-info/RECORD +138 -0
  135. docling-2.69.0.dist-info/WHEEL +5 -0
  136. docling-2.69.0.dist-info/entry_points.txt +6 -0
  137. docling-2.69.0.dist-info/licenses/LICENSE +21 -0
  138. docling-2.69.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,237 @@
1
+ Metadata-Version: 2.4
2
+ Name: docling
3
+ Version: 2.69.0
4
+ Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
5
+ Author-email: Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Nikos Livathinos <nli@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
+ License-Expression: MIT
7
+ Project-URL: homepage, https://github.com/docling-project/docling
8
+ Project-URL: repository, https://github.com/docling-project/docling
9
+ Project-URL: issues, https://github.com/docling-project/docling/issues
10
+ Project-URL: changelog, https://github.com/docling-project/docling/blob/main/CHANGELOG.md
11
+ Keywords: docling,convert,document,pdf,docx,html,markdown,layout model,segmentation,table structure,table former
12
+ Classifier: Operating System :: MacOS :: MacOS X
13
+ Classifier: Operating System :: POSIX :: Linux
14
+ Classifier: Operating System :: Microsoft :: Windows
15
+ Classifier: Development Status :: 5 - Production/Stable
16
+ Classifier: Intended Audience :: Developers
17
+ Classifier: Intended Audience :: Science/Research
18
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
19
+ Classifier: Programming Language :: Python :: 3
20
+ Classifier: Programming Language :: Python :: 3.9
21
+ Classifier: Programming Language :: Python :: 3.10
22
+ Classifier: Programming Language :: Python :: 3.11
23
+ Classifier: Programming Language :: Python :: 3.12
24
+ Classifier: Programming Language :: Python :: 3.13
25
+ Classifier: Programming Language :: Python :: 3.14
26
+ Requires-Python: <4.0,>=3.9
27
+ Description-Content-Type: text/markdown
28
+ License-File: LICENSE
29
+ Requires-Dist: pydantic<3.0.0,>=2.0.0
30
+ Requires-Dist: docling-core[chunking]<3.0.0,>=2.50.1
31
+ Requires-Dist: docling-parse<5.0.0,>=4.7.0
32
+ Requires-Dist: docling-ibm-models<4,>=3.9.1
33
+ Requires-Dist: filetype<2.0.0,>=1.2.0
34
+ Requires-Dist: pypdfium2!=4.30.1,<6.0.0,>=4.30.0
35
+ Requires-Dist: pydantic-settings<3.0.0,>=2.3.0
36
+ Requires-Dist: huggingface_hub<1,>=0.23
37
+ Requires-Dist: requests<3.0.0,>=2.32.2
38
+ Requires-Dist: ocrmac<2.0.0,>=1.0.0; sys_platform == "darwin"
39
+ Requires-Dist: rapidocr<4.0.0,>=3.3
40
+ Requires-Dist: certifi>=2024.7.4
41
+ Requires-Dist: rtree<2.0.0,>=1.3.0
42
+ Requires-Dist: typer<0.20.0,>=0.12.5
43
+ Requires-Dist: python-docx<2.0.0,>=1.1.2
44
+ Requires-Dist: python-pptx<2.0.0,>=1.0.2
45
+ Requires-Dist: beautifulsoup4<5.0.0,>=4.12.3
46
+ Requires-Dist: pandas<3.0.0,>=2.1.4
47
+ Requires-Dist: marko<3.0.0,>=2.1.2
48
+ Requires-Dist: openpyxl<4.0.0,>=3.1.5
49
+ Requires-Dist: lxml<7.0.0,>=4.0.0
50
+ Requires-Dist: pillow<12.0.0,>=10.0.0
51
+ Requires-Dist: tqdm<5.0.0,>=4.65.0
52
+ Requires-Dist: pluggy<2.0.0,>=1.0.0
53
+ Requires-Dist: pylatexenc<3.0,>=2.10
54
+ Requires-Dist: scipy<2.0.0,>=1.6.0
55
+ Requires-Dist: accelerate<2,>=1.0.0
56
+ Requires-Dist: polyfactory>=2.22.2
57
+ Provides-Extra: easyocr
58
+ Requires-Dist: easyocr<2.0,>=1.7; extra == "easyocr"
59
+ Provides-Extra: tesserocr
60
+ Requires-Dist: tesserocr<3.0.0,>=2.7.1; extra == "tesserocr"
61
+ Provides-Extra: ocrmac
62
+ Requires-Dist: ocrmac<2.0.0,>=1.0.0; sys_platform == "darwin" and extra == "ocrmac"
63
+ Provides-Extra: vlm
64
+ Requires-Dist: transformers<5.0.0,>=4.46.0; extra == "vlm"
65
+ Requires-Dist: accelerate<2.0.0,>=1.2.1; extra == "vlm"
66
+ Requires-Dist: mlx-vlm<1.0.0,>=0.3.0; (python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "vlm"
67
+ Requires-Dist: vllm<1.0.0,>=0.10.0; (python_version >= "3.10" and python_version < "3.14" and sys_platform == "linux" and platform_machine == "x86_64") and extra == "vlm"
68
+ Requires-Dist: qwen-vl-utils>=0.0.11; extra == "vlm"
69
+ Provides-Extra: rapidocr
70
+ Requires-Dist: rapidocr<4.0.0,>=3.3; extra == "rapidocr"
71
+ Requires-Dist: onnxruntime<2.0.0,>=1.7.0; python_version < "3.14" and extra == "rapidocr"
72
+ Provides-Extra: asr
73
+ Requires-Dist: mlx-whisper>=0.4.3; (python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "asr"
74
+ Requires-Dist: openai-whisper>=20250625; extra == "asr"
75
+ Dynamic: license-file
76
+
77
+ <p align="center">
78
+ <a href="https://github.com/docling-project/docling">
79
+ <img loading="lazy" alt="Docling" src="https://github.com/docling-project/docling/raw/main/docs/assets/docling_processing.png" width="100%"/>
80
+ </a>
81
+ </p>
82
+
83
+ # Docling
84
+
85
+ <p align="center">
86
+ <a href="https://trendshift.io/repositories/12132" target="_blank"><img src="https://trendshift.io/api/badge/repositories/12132" alt="DS4SD%2Fdocling | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
87
+ </p>
88
+
89
+ [![arXiv](https://img.shields.io/badge/arXiv-2408.09869-b31b1b.svg)](https://arxiv.org/abs/2408.09869)
90
+ [![Docs](https://img.shields.io/badge/docs-live-brightgreen)](https://docling-project.github.io/docling/)
91
+ [![PyPI version](https://img.shields.io/pypi/v/docling)](https://pypi.org/project/docling/)
92
+ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/docling)](https://pypi.org/project/docling/)
93
+ [![uv](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/uv/main/assets/badge/v0.json)](https://github.com/astral-sh/uv)
94
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
95
+ [![Pydantic v2](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/pydantic/pydantic/main/docs/badge/v2.json)](https://pydantic.dev)
96
+ [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)
97
+ [![License MIT](https://img.shields.io/github/license/docling-project/docling)](https://opensource.org/licenses/MIT)
98
+ [![PyPI Downloads](https://static.pepy.tech/badge/docling/month)](https://pepy.tech/projects/docling)
99
+ [![Docling Actor](https://apify.com/actor-badge?actor=vancura/docling?fpr=docling)](https://apify.com/vancura/docling)
100
+ [![Chat with Dosu](https://dosu.dev/dosu-chat-badge.svg)](https://app.dosu.dev/097760a8-135e-4789-8234-90c8837d7f1c/ask?utm_source=github)
101
+ [![Discord](https://img.shields.io/discord/1399788921306746971?color=6A7EC2&logo=discord&logoColor=ffffff)](https://docling.ai/discord)
102
+ [![OpenSSF Best Practices](https://www.bestpractices.dev/projects/10101/badge)](https://www.bestpractices.dev/projects/10101)
103
+ [![LF AI & Data](https://img.shields.io/badge/LF%20AI%20%26%20Data-003778?logo=linuxfoundation&logoColor=fff&color=0094ff&labelColor=003778)](https://lfaidata.foundation/projects/)
104
+
105
+ Docling simplifies document processing, parsing diverse formats — including advanced PDF understanding — and providing seamless integrations with the gen AI ecosystem.
106
+
107
+ ## Features
108
+
109
+ * 🗂️ Parsing of [multiple document formats][supported_formats] incl. PDF, DOCX, PPTX, XLSX, HTML, WAV, MP3, VTT, images (PNG, TIFF, JPEG, ...), and more
110
+ * 📑 Advanced PDF understanding incl. page layout, reading order, table structure, code, formulas, image classification, and more
111
+ * 🧬 Unified, expressive [DoclingDocument][docling_document] representation format
112
+ * ↪️ Various [export formats][supported_formats] and options, including Markdown, HTML, [DocTags](https://arxiv.org/abs/2503.11576) and lossless JSON
113
+ * 🔒 Local execution capabilities for sensitive data and air-gapped environments
114
+ * 🤖 Plug-and-play [integrations][integrations] incl. LangChain, LlamaIndex, Crew AI & Haystack for agentic AI
115
+ * 🔍 Extensive OCR support for scanned PDFs and images
116
+ * 👓 Support of several Visual Language Models ([GraniteDocling](https://huggingface.co/ibm-granite/granite-docling-258M))
117
+ * 🎙️ Audio support with Automatic Speech Recognition (ASR) models
118
+ * 🔌 Connect to any agent using the [MCP server](https://docling-project.github.io/docling/usage/mcp/)
119
+ * 💻 Simple and convenient CLI
120
+
121
+ ### What's new
122
+ * 📤 Structured [information extraction][extraction] \[🧪 beta\]
123
+ * 📑 New layout model (**Heron**) by default, for faster PDF parsing
124
+ * 🔌 [MCP server](https://docling-project.github.io/docling/usage/mcp/) for agentic applications
125
+ * 💬 Parsing of Web Video Text Tracks (WebVTT) files
126
+
127
+ ### Coming soon
128
+
129
+ * 📝 Metadata extraction, including title, authors, references & language
130
+ * 📝 Chart understanding (Barchart, Piechart, LinePlot, etc)
131
+ * 📝 Complex chemistry understanding (Molecular structures)
132
+
133
+ ## Installation
134
+
135
+ To use Docling, simply install `docling` from your package manager, e.g. pip:
136
+ ```bash
137
+ pip install docling
138
+ ```
139
+
140
+ Works on macOS, Linux and Windows environments. Both x86_64 and arm64 architectures.
141
+
142
+ More [detailed installation instructions](https://docling-project.github.io/docling/installation/) are available in the docs.
143
+
144
+ ## Getting started
145
+
146
+ To convert individual documents with python, use `convert()`, for example:
147
+
148
+ ```python
149
+ from docling.document_converter import DocumentConverter
150
+
151
+ source = "https://arxiv.org/pdf/2408.09869" # document per local path or URL
152
+ converter = DocumentConverter()
153
+ result = converter.convert(source)
154
+ print(result.document.export_to_markdown()) # output: "## Docling Technical Report[...]"
155
+ ```
156
+
157
+ More [advanced usage options](https://docling-project.github.io/docling/usage/advanced_options/) are available in
158
+ the docs.
159
+
160
+ ## CLI
161
+
162
+ Docling has a built-in CLI to run conversions.
163
+
164
+ ```bash
165
+ docling https://arxiv.org/pdf/2206.01062
166
+ ```
167
+
168
+ You can also use 🥚[GraniteDocling](https://huggingface.co/ibm-granite/granite-docling-258M) and other VLMs via Docling CLI:
169
+ ```bash
170
+ docling --pipeline vlm --vlm-model granite_docling https://arxiv.org/pdf/2206.01062
171
+ ```
172
+ This will use MLX acceleration on supported Apple Silicon hardware.
173
+
174
+ Read more [here](https://docling-project.github.io/docling/usage/)
175
+
176
+ ## Documentation
177
+
178
+ Check out Docling's [documentation](https://docling-project.github.io/docling/), for details on
179
+ installation, usage, concepts, recipes, extensions, and more.
180
+
181
+ ## Examples
182
+
183
+ Go hands-on with our [examples](https://docling-project.github.io/docling/examples/),
184
+ demonstrating how to address different application use cases with Docling.
185
+
186
+ ## Integrations
187
+
188
+ To further accelerate your AI application development, check out Docling's native
189
+ [integrations](https://docling-project.github.io/docling/integrations/) with popular frameworks
190
+ and tools.
191
+
192
+ ## Get help and support
193
+
194
+ Please feel free to connect with us using the [discussion section](https://github.com/docling-project/docling/discussions).
195
+
196
+ ## Technical report
197
+
198
+ For more details on Docling's inner workings, check out the [Docling Technical Report](https://arxiv.org/abs/2408.09869).
199
+
200
+ ## Contributing
201
+
202
+ Please read [Contributing to Docling](https://github.com/docling-project/docling/blob/main/CONTRIBUTING.md) for details.
203
+
204
+ ## References
205
+
206
+ If you use Docling in your projects, please consider citing the following:
207
+
208
+ ```bib
209
+ @techreport{Docling,
210
+ author = {Deep Search Team},
211
+ month = {8},
212
+ title = {Docling Technical Report},
213
+ url = {https://arxiv.org/abs/2408.09869},
214
+ eprint = {2408.09869},
215
+ doi = {10.48550/arXiv.2408.09869},
216
+ version = {1.0.0},
217
+ year = {2024}
218
+ }
219
+ ```
220
+
221
+ ## License
222
+
223
+ The Docling codebase is under MIT license.
224
+ For individual model usage, please refer to the model licenses found in the original packages.
225
+
226
+ ## LF AI & Data
227
+
228
+ Docling is hosted as a project in the [LF AI & Data Foundation](https://lfaidata.foundation/projects/).
229
+
230
+ ### IBM ❤️ Open Source AI
231
+
232
+ The project was started by the AI for knowledge team at IBM Research Zurich.
233
+
234
+ [supported_formats]: https://docling-project.github.io/docling/usage/supported_formats/
235
+ [docling_document]: https://docling-project.github.io/docling/concepts/docling_document/
236
+ [integrations]: https://docling-project.github.io/docling/integrations/
237
+ [extraction]: https://docling-project.github.io/docling/examples/extraction/
@@ -0,0 +1,138 @@
1
+ docling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ docling/document_converter.py,sha256=KnSRAHsHYWaDFGTzsOhQ-1jg8CTGLzq-EaEBNztT8wA,21907
3
+ docling/document_extractor.py,sha256=ubGNST6TF-SF0ztxflgZ8N-edVCh8JhIMfzjjTIPIPU,12025
4
+ docling/exceptions.py,sha256=K1WnCS1leK2JtMB5ewZWKkb0EaijFgl-tRzrO9ntgPM,134
5
+ docling/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
6
+ docling/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ docling/backend/abstract_backend.py,sha256=_xKSjLpR-ia93Kz0dto0yyVsaeIqEepUhVEGo18MuWw,2169
8
+ docling/backend/asciidoc_backend.py,sha256=DR8AUTNvy_SCHkieMpqZXg_NLRTy4roEqa0V8sILPWk,14400
9
+ docling/backend/csv_backend.py,sha256=2g9famYG2W-ID9jEdZPxc6O8QGv1vWQfjN8pL-QMBE0,4536
10
+ docling/backend/docling_parse_backend.py,sha256=9rUo1vPxX6QLzGqF-2B2iEYglZg6YQ3Uea00XrLluTg,7918
11
+ docling/backend/docling_parse_v2_backend.py,sha256=3ckTfke8IICjaImlIzc3TRhG7KDuxDDba0AuCEcjA-M,9500
12
+ docling/backend/docling_parse_v4_backend.py,sha256=tBJR0BbKFOIDKSngjVDu0BrzTj7qUZAhFdRT8GvAJ18,8232
13
+ docling/backend/html_backend.py,sha256=Ba2BVRkH1IejZ7qiOrGV03QBdt55X3FjIB5IeNHWyO4,57570
14
+ docling/backend/image_backend.py,sha256=MDdYePfcV4DWfXBndbldCOw5ImXs7ja-1m1W4ZNcbXc,6512
15
+ docling/backend/md_backend.py,sha256=tTHTCPoIXqnNS9wEgHBXY51GivAc2yrCui55nB6boZU,23624
16
+ docling/backend/mets_gbs_backend.py,sha256=EA8sY6tbmGiysKGYPPZiNlK-i7Adn8bLTo-7Ym15hTU,12774
17
+ docling/backend/msexcel_backend.py,sha256=TN-8rv9SGaC7NejXHgmbikvvN3x6mIs0r7TKdLtvbFw,24564
18
+ docling/backend/mspowerpoint_backend.py,sha256=71W_iV31Rggqn9UcMzXmsZ3QKMRpsBT8fCwdjsIIKAs,15109
19
+ docling/backend/msword_backend.py,sha256=7ulVMcRHYtb4-EJs5lo1qLqF5sPY9qdt9Wx9_Rknq6Q,64861
20
+ docling/backend/noop_backend.py,sha256=EOPbD86FzZPX-K_DpNrJh0_lC0bZz--4DpG-OagDNGY,1688
21
+ docling/backend/pdf_backend.py,sha256=TE0sYp8tjcoq-S0jniheb9w0a_IpyhYFnahiYoOzYWo,2226
22
+ docling/backend/pypdfium2_backend.py,sha256=kFX5VV-YT2uqLxU8QUfOdNtwKq2nMLL2WNR9lxS2YOg,14858
23
+ docling/backend/webvtt_backend.py,sha256=9xPcfWVLuqhEAFrkv8aU36qHnSgjeINZAXT_C9C6XJA,19165
24
+ docling/backend/docx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
+ docling/backend/docx/drawingml/utils.py,sha256=E9Iq8_052eEV5L1IN3ZqFX9eBidH56DKNlh6Tk7Do0I,3640
26
+ docling/backend/docx/latex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
+ docling/backend/docx/latex/latex_dict.py,sha256=v_r_N1Ehs8olTAlpkHNcc9LNzhY3b65D-VQPaCcdZi8,6717
28
+ docling/backend/docx/latex/omml.py,sha256=1n1yKy4XqCFBmcLD2AAQBdopq5FThaO0wLpxJpSK7Mw,12519
29
+ docling/backend/json/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
+ docling/backend/json/docling_json_backend.py,sha256=LlFMVoZrrCfVwbDuRbNN4Xg96Lujh4xxrTBt9jGhY9I,1984
31
+ docling/backend/xml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
+ docling/backend/xml/jats_backend.py,sha256=_BWpQQg3SlsHAOOj0v2qRJoVqaQzL91GqN1tK9LxqWo,29463
33
+ docling/backend/xml/uspto_backend.py,sha256=gURSYFDo3bM7G6MSwNDp1X3fYY-W2iMSdIgkGJaY4CA,70904
34
+ docling/chunking/__init__.py,sha256=h83TDs0AuOV6oEPLAPrn9dpGKiU-2Vg6IRNo4cv6GDA,346
35
+ docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
+ docling/cli/main.py,sha256=6AntqkUI31bw3rNmB-j3q7AAzdKlEpIeuTljQJ93u1U,40632
37
+ docling/cli/models.py,sha256=qKYicq1-K2BlCmsfbc4hsm4MUqXR86HqEGbwEJOmHOM,6353
38
+ docling/cli/tools.py,sha256=QhtRxQG0TVrfsMqdv5i7J0_qQy1ZZyWYnHPwJl7b5oY,322
39
+ docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
+ docling/datamodel/accelerator_options.py,sha256=336BX_ZAyAsSZ0mXLepDsWxFHgvv84Xk0677F7T6Fc4,2541
41
+ docling/datamodel/asr_model_specs.py,sha256=gQJkW7DaSPiOuW_0QoI5OzR1_DQGRkw7yQlrVJ4hyo0,14473
42
+ docling/datamodel/backend_options.py,sha256=IYjVI6DZwI5aT97UyswUlJzEJ44Mce06AL_WWttaGus,3163
43
+ docling/datamodel/base_models.py,sha256=Oky1BbHXOX0t3osnTx3Gx7SgWeoowFcr58E4TKLbwLs,13592
44
+ docling/datamodel/document.py,sha256=02HFqnMlwkrkD4xsqdEQy78QwHEPC_ATdDC371OTmf4,25132
45
+ docling/datamodel/extraction.py,sha256=boyBRi8wFFr-ii0VEFvEYvk4we9-hd46DuOtbR_nThk,1338
46
+ docling/datamodel/layout_model_specs.py,sha256=5fSbuO1qjlBSTiNVHaNYs1lKIBEsoY_EHIHr6cAQlGI,2430
47
+ docling/datamodel/pipeline_options.py,sha256=Dy4jbEurd0iAx7rE5OMULvQFwUTD7MwwH6XB13YthGc,14126
48
+ docling/datamodel/pipeline_options_asr_model.py,sha256=fqmJrPi71z6YiblHWzitADXvmypQRxmtAkQTa1tS5Xg,2203
49
+ docling/datamodel/pipeline_options_vlm_model.py,sha256=VcKEM1u408BI5BM6da9vwR2yR3ZUgkrPDUZg2Pok69w,3862
50
+ docling/datamodel/settings.py,sha256=c0MTw6pO5be_BKxHKYl4SaBJAw_qL-aapxp-g5HHj1A,2084
51
+ docling/datamodel/vlm_model_specs.py,sha256=HIi74jiYHYKlKwVp-4_Ba6os4WYZNU5sUwmHc36JZxI,11625
52
+ docling/experimental/__init__.py,sha256=Bmrk6lyMjWASIUbEbLUgWnAWC8mOSNNRV6SGXaPmXG4,162
53
+ docling/experimental/datamodel/__init__.py,sha256=B8qy9Aaz3pL13tzhdbNHTTIvpJNBC_qJjoCCjzX0ldQ,38
54
+ docling/experimental/datamodel/table_crops_layout_options.py,sha256=8rlMVYmPe1nknWsMiuD_pNCWTU6DpbSboKFjNxGoy5U,385
55
+ docling/experimental/datamodel/threaded_layout_vlm_pipeline_options.py,sha256=WIte0vgKPJhMw1VxULuPNwIpBw847kNJOuWduHgGIpY,1656
56
+ docling/experimental/models/__init__.py,sha256=-aQHTrjZck1TM0K8xvCegyaQuRx6dBCruCkcjphwtEw,64
57
+ docling/experimental/models/table_crops_layout_model.py,sha256=E5kXZFPXvNwoSBjT842z99AfiytqBnEJJkF5ptabM9c,3667
58
+ docling/experimental/pipeline/__init__.py,sha256=OG6bPn1T8x9B1jt0s6Ie2mNTmZEJm8_Astl-st4Cfj0,37
59
+ docling/experimental/pipeline/threaded_layout_vlm_pipeline.py,sha256=Z2XVl6-aY5UBH7wPQeCrPOznIM4r-5hTjn-KF6PgjL0,18116
60
+ docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
61
+ docling/models/base_layout_model.py,sha256=oJ7BRGUmdeeMCWduRLIxDNu_VunpEm9DasFtxP7xAnc,1263
62
+ docling/models/base_model.py,sha256=wHUit7qtpW1gIyRJV5CSyni-VCHlD6ajbcxZeOOBeLM,7983
63
+ docling/models/base_ocr_model.py,sha256=4A5nbU0YQCSgmjnKybX1Az2ALlWBPjicMOMgEaSU24Q,8950
64
+ docling/models/base_table_model.py,sha256=wT1w0KwZ7IzuvOaDtxRqM3MLTyr89QjSAXpuAVgYvLE,1448
65
+ docling/models/picture_description_base_model.py,sha256=pI9C1aMOciolx4uJozIxQHloGqsDjKyVcn9dIHC2r2I,5002
66
+ docling/models/extraction/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
67
+ docling/models/extraction/nuextract_transformers_model.py,sha256=rTUhTp-coVGnjYZBYGs2nLxf6YGqXgHpZAlmOz2Ejr4,11036
68
+ docling/models/factories/__init__.py,sha256=d5IU5W7lrUfVvxMgbsJcUxvtpwW0ZZyetceNkaR2TtE,1632
69
+ docling/models/factories/base_factory.py,sha256=MfWIljMETi5aaVR-6qLTelW8u1gwDAQsOwg3fu7O4Qc,4028
70
+ docling/models/factories/layout_factory.py,sha256=wfT0NjFilFHWWul6qluwDa-LZWD9s6STXeG_9_dtv0A,277
71
+ docling/models/factories/ocr_factory.py,sha256=G5RkmkKvkl-ihpo6qSj8WC77VdlVSQ1s0ekwUX2ILts,316
72
+ docling/models/factories/picture_description_factory.py,sha256=Ru3-TnVVEKf5O07C_UpGf2HCOHc7j20AJzfficw3agM,385
73
+ docling/models/factories/table_factory.py,sha256=hkSc5dlFGmqKVSQokIjhbHZoFYYuYzoth-ZJpgVjDJM,309
74
+ docling/models/plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
75
+ docling/models/plugins/defaults.py,sha256=t7Rn-nFAe0wH7GyL6QGr_vKNMwHpWFyvGVfY9vO6CRc,1691
76
+ docling/models/stages/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
77
+ docling/models/stages/code_formula/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
78
+ docling/models/stages/code_formula/code_formula_model.py,sha256=YrmLOcLzJf5JmaH090KLIIuVg6ntVtkjwW3_lIdaDQA,11443
79
+ docling/models/stages/layout/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
80
+ docling/models/stages/layout/layout_model.py,sha256=6hyg3FJpOfjQUmgiHq_Ny3zQCpzPSqtWse0FpqRMRxs,9630
81
+ docling/models/stages/ocr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
82
+ docling/models/stages/ocr/auto_ocr_model.py,sha256=I8M1WKEgBB_SHhVxLXPXjL0MQq7DZoYIQPjNSNHujEI,5221
83
+ docling/models/stages/ocr/easyocr_model.py,sha256=sCWIe2xUYU1uK8g4qkPXh0OkUX-rV6BRd4Deb_na9Y4,7899
84
+ docling/models/stages/ocr/ocr_mac_model.py,sha256=y-1DSFDbACHpEwNTfQwzN9ab8r5j5rBFNPtQ48BzsrA,5396
85
+ docling/models/stages/ocr/rapid_ocr_model.py,sha256=e1nE6xZd_MxyREnApotGkIykPnaxdF4bQgi697sd8Ms,14232
86
+ docling/models/stages/ocr/tesseract_ocr_cli_model.py,sha256=KuO4rXc-88C2-cAymvcr41TqFi3hNg4gerEzoI3Z6m4,13039
87
+ docling/models/stages/ocr/tesseract_ocr_model.py,sha256=hcHd7vyXaPIz_psO9392ihIC7ct0zSB35fIGCFFH9FE,11154
88
+ docling/models/stages/page_assemble/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
89
+ docling/models/stages/page_assemble/page_assemble_model.py,sha256=K2Jn0gbz8X8NQPomTwRCnhDdNZ-yd5OU1_TNWvkxt9w,6337
90
+ docling/models/stages/page_preprocessing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
91
+ docling/models/stages/page_preprocessing/page_preprocessing_model.py,sha256=EmusNexws5ZmR93js_saVU0BedqZ_HIHQeY7lcf52tI,5284
92
+ docling/models/stages/picture_classifier/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
93
+ docling/models/stages/picture_classifier/document_picture_classifier.py,sha256=fXKFbHNhOu9n9hvqMr2NN5MJZppmbk8fe2gMNNXuWZ0,8583
94
+ docling/models/stages/picture_description/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
95
+ docling/models/stages/picture_description/picture_description_api_model.py,sha256=EMvVYHds5C1Ee7SCu0Scindhzswlz3LhFDq0-3Ogqn0,2418
96
+ docling/models/stages/picture_description/picture_description_vlm_model.py,sha256=ieY_vyQWz-w_IU4cZZIE30SdtRQ5ssH_17G_6iosPvs,4329
97
+ docling/models/stages/reading_order/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
98
+ docling/models/stages/reading_order/readingorder_model.py,sha256=oa-S_QQhV6DJ5zOxYk1MCx4FNKjmdv4xHZYqsy9IAwM,17200
99
+ docling/models/stages/table_structure/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
100
+ docling/models/stages/table_structure/table_structure_model.py,sha256=I7I2J7guCVtaIo9O3hXXN31mof0FjSOE4chPomIybqY,11991
101
+ docling/models/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
102
+ docling/models/utils/generation_utils.py,sha256=0ZfMBMbolHAWjdbMza8FbD4_jQ4VY6ReUa4gqVLwMoU,5365
103
+ docling/models/utils/hf_model_download.py,sha256=VlKna9tLIVOGQkIRQBXfDimPIIyeRV7cFCbuOVmFQiU,1092
104
+ docling/models/vlm_pipeline_models/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
105
+ docling/models/vlm_pipeline_models/api_vlm_model.py,sha256=hesszuyWDac9DyDC8YiPJgGuwNuw9jpJNsK6YIHzPQw,7111
106
+ docling/models/vlm_pipeline_models/hf_transformers_model.py,sha256=kCpoaAGyGgGylLsb54Q3i_LS-mBKF3-r_9QduaGQv-M,15357
107
+ docling/models/vlm_pipeline_models/mlx_model.py,sha256=RSMUbvoEkIs74mvDghflRY5Q8bvE7xXgFenThU6b51E,13620
108
+ docling/models/vlm_pipeline_models/vllm_model.py,sha256=JiVQ_B7TzLKTyH-QjuGETrcUafoRfE85EvTXtkS16nE,11782
109
+ docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
110
+ docling/pipeline/asr_pipeline.py,sha256=rzEMHkbZfTmCwl4mjMa2bWRlVmkajC5nKBaY0bT7qj0,16020
111
+ docling/pipeline/base_extraction_pipeline.py,sha256=GYrEz83IXv-tdIHjtNWxMBNczFwL8SZyf9vnPJ3STaI,2627
112
+ docling/pipeline/base_pipeline.py,sha256=u6Ty_6bPwIYXT32U5h5wtrOhedWOgJ7ghz0PCWWb7Yg,13138
113
+ docling/pipeline/extraction_vlm_pipeline.py,sha256=oCKPm0MgwwpiWPRuPTC14kM7KkXrh6W4Ukb34mGhleo,8788
114
+ docling/pipeline/legacy_standard_pdf_pipeline.py,sha256=qLS5zYzPXrB09EltfXaxutBU4Q8mP40-itvB7HiMmkk,10905
115
+ docling/pipeline/simple_pipeline.py,sha256=FSL_ucDd9k0D9DjNKMUkyCULIU8a057dvWfLEPmAc2A,2287
116
+ docling/pipeline/standard_pdf_pipeline.py,sha256=YZTGxNN0i8LbIJ4wv7JEI8pNfOvJ0s8PbRyYB1zY6g0,36940
117
+ docling/pipeline/threaded_standard_pdf_pipeline.py,sha256=SO2ezB6QKpYKCJ1STctJNPoPyRQyf2jTksO1CRSukGc,200
118
+ docling/pipeline/vlm_pipeline.py,sha256=k9o9DfnoPlFvFU_NO-WXCYXBi3cIKqD_bmnBIQV0NXw,16488
119
+ docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
120
+ docling/utils/accelerator_utils.py,sha256=nkiLL3amdEzvwtKFBBeQv99ONTCV7NtyttfukXNp4EM,3461
121
+ docling/utils/api_image_request.py,sha256=2J6bMQEKTk-SODCWm1CWSyN9RKh-JB3hDsjzrxe3iBA,7024
122
+ docling/utils/deepseekocr_utils.py,sha256=TUerN2XGXOGodnAaRgXMZlKI57MmcFDB1yo3V0sQfow,13300
123
+ docling/utils/export.py,sha256=VwVUnYDk3mhGmISDbVm306fwpGNnoojouStBD4UajXI,4673
124
+ docling/utils/glm_utils.py,sha256=TKOWQqWAHsX_w4fvoAA7_2xCi_urhnp1DsmjY8_sk5w,12274
125
+ docling/utils/layout_postprocessor.py,sha256=bwDIhgUg5rKianzccGPTotTjqjkWtIQSoZwgKio8YC4,25124
126
+ docling/utils/locks.py,sha256=RzqQtD5UispgV71pGN_nU6GYfeN11BN0Sh_Dq9ycqGo,52
127
+ docling/utils/model_downloader.py,sha256=PkIPPrPso6ReUbGa6_fpIAKSm19uyfHyBs284EC31EI,5636
128
+ docling/utils/ocr_utils.py,sha256=nmresYyfin0raanpQc_GGeU3WoLsfExf6SEXNIQ7Djg,2325
129
+ docling/utils/orientation.py,sha256=jTyLxyT31FlOodZoBMlADHNQK2lAWKYVs5z7pXd_6Cg,1842
130
+ docling/utils/profiling.py,sha256=oFpOBP0ZTnnKb1Vlt4EmDhrYfnXFnpwu3o16Mcqmio0,1834
131
+ docling/utils/utils.py,sha256=kJtIYuzXeOyJHYlxmLAo7dGM5rEsDa1i84qEsUj1nio,1908
132
+ docling/utils/visualization.py,sha256=tY2ylE2aiQKkmzlSLnFW-HTfFyqUUMguW18ldd1PLfo,2868
133
+ docling-2.69.0.dist-info/licenses/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
134
+ docling-2.69.0.dist-info/METADATA,sha256=aBKCvBneOkcZYu5yoc5RzVK0kLfvXlR2oA88LQF4iLw,11696
135
+ docling-2.69.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
136
+ docling-2.69.0.dist-info/entry_points.txt,sha256=hzVlbeE0aMSTQ9S0-NTYN0Hmgsn6qL_EA2qX4UbkAuY,149
137
+ docling-2.69.0.dist-info/top_level.txt,sha256=vkIywP-USjFyYo1AIRQbWQQaL3xB5jf8vkCYdTIfNic,8
138
+ docling-2.69.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,6 @@
1
+ [console_scripts]
2
+ docling = docling.cli.main:app
3
+ docling-tools = docling.cli.tools:app
4
+
5
+ [docling]
6
+ docling_defaults = docling.models.plugins.defaults
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 International Business Machines
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ docling