docling 2.34.0__py3-none-any.whl → 2.36.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. docling/backend/xml/jats_backend.py +0 -0
  2. docling/cli/main.py +48 -18
  3. docling/datamodel/accelerator_options.py +68 -0
  4. docling/datamodel/base_models.py +10 -8
  5. docling/datamodel/document.py +7 -2
  6. docling/datamodel/pipeline_options.py +29 -161
  7. docling/datamodel/pipeline_options_vlm_model.py +81 -0
  8. docling/datamodel/vlm_model_specs.py +144 -0
  9. docling/document_converter.py +5 -0
  10. docling/models/api_vlm_model.py +1 -1
  11. docling/models/base_ocr_model.py +2 -1
  12. docling/models/code_formula_model.py +6 -11
  13. docling/models/document_picture_classifier.py +6 -11
  14. docling/models/easyocr_model.py +1 -2
  15. docling/models/layout_model.py +22 -17
  16. docling/models/ocr_mac_model.py +1 -1
  17. docling/models/page_preprocessing_model.py +11 -6
  18. docling/models/picture_description_api_model.py +1 -1
  19. docling/models/picture_description_base_model.py +1 -1
  20. docling/models/picture_description_vlm_model.py +7 -22
  21. docling/models/rapid_ocr_model.py +1 -2
  22. docling/models/table_structure_model.py +6 -12
  23. docling/models/tesseract_ocr_cli_model.py +1 -1
  24. docling/models/tesseract_ocr_model.py +1 -1
  25. docling/models/utils/__init__.py +0 -0
  26. docling/models/utils/hf_model_download.py +40 -0
  27. docling/models/vlm_models_inline/__init__.py +0 -0
  28. docling/models/vlm_models_inline/hf_transformers_model.py +194 -0
  29. docling/models/{hf_mlx_model.py → vlm_models_inline/mlx_model.py} +56 -44
  30. docling/pipeline/standard_pdf_pipeline.py +69 -57
  31. docling/pipeline/vlm_pipeline.py +228 -61
  32. docling/utils/accelerator_utils.py +17 -2
  33. docling/utils/model_downloader.py +13 -12
  34. {docling-2.34.0.dist-info → docling-2.36.0.dist-info}/METADATA +54 -55
  35. {docling-2.34.0.dist-info → docling-2.36.0.dist-info}/RECORD +48 -41
  36. {docling-2.34.0.dist-info → docling-2.36.0.dist-info}/WHEEL +2 -1
  37. docling-2.36.0.dist-info/entry_points.txt +6 -0
  38. docling-2.36.0.dist-info/top_level.txt +1 -0
  39. docling/models/hf_vlm_model.py +0 -182
  40. docling-2.34.0.dist-info/entry_points.txt +0 -7
  41. {docling-2.34.0.dist-info → docling-2.36.0.dist-info/licenses}/LICENSE +0 -0
@@ -1,29 +1,46 @@
1
1
  import logging
2
+ import re
2
3
  from io import BytesIO
3
4
  from pathlib import Path
4
5
  from typing import List, Optional, Union, cast
5
6
 
6
- from docling_core.types import DoclingDocument
7
- from docling_core.types.doc import BoundingBox, DocItem, ImageRef, PictureItem, TextItem
7
+ from docling_core.types.doc import (
8
+ BoundingBox,
9
+ DocItem,
10
+ DoclingDocument,
11
+ ImageRef,
12
+ PictureItem,
13
+ ProvenanceItem,
14
+ TextItem,
15
+ )
16
+ from docling_core.types.doc.base import (
17
+ BoundingBox,
18
+ Size,
19
+ )
8
20
  from docling_core.types.doc.document import DocTagsDocument
9
21
  from PIL import Image as PILImage
10
22
 
11
23
  from docling.backend.abstract_backend import AbstractDocumentBackend
24
+ from docling.backend.html_backend import HTMLDocumentBackend
12
25
  from docling.backend.md_backend import MarkdownDocumentBackend
13
26
  from docling.backend.pdf_backend import PdfDocumentBackend
14
27
  from docling.datamodel.base_models import InputFormat, Page
15
28
  from docling.datamodel.document import ConversionResult, InputDocument
16
29
  from docling.datamodel.pipeline_options import (
30
+ VlmPipelineOptions,
31
+ )
32
+ from docling.datamodel.pipeline_options_vlm_model import (
17
33
  ApiVlmOptions,
18
- HuggingFaceVlmOptions,
19
34
  InferenceFramework,
35
+ InlineVlmOptions,
20
36
  ResponseFormat,
21
- VlmPipelineOptions,
22
37
  )
23
38
  from docling.datamodel.settings import settings
24
39
  from docling.models.api_vlm_model import ApiVlmModel
25
- from docling.models.hf_mlx_model import HuggingFaceMlxModel
26
- from docling.models.hf_vlm_model import HuggingFaceVlmModel
40
+ from docling.models.vlm_models_inline.hf_transformers_model import (
41
+ HuggingFaceTransformersVlmModel,
42
+ )
43
+ from docling.models.vlm_models_inline.mlx_model import HuggingFaceMlxModel
27
44
  from docling.pipeline.base_pipeline import PaginatedPipeline
28
45
  from docling.utils.profiling import ProfilingScope, TimeRecorder
29
46
 
@@ -66,8 +83,8 @@ class VlmPipeline(PaginatedPipeline):
66
83
  vlm_options=cast(ApiVlmOptions, self.pipeline_options.vlm_options),
67
84
  ),
68
85
  ]
69
- elif isinstance(self.pipeline_options.vlm_options, HuggingFaceVlmOptions):
70
- vlm_options = cast(HuggingFaceVlmOptions, self.pipeline_options.vlm_options)
86
+ elif isinstance(self.pipeline_options.vlm_options, InlineVlmOptions):
87
+ vlm_options = cast(InlineVlmOptions, self.pipeline_options.vlm_options)
71
88
  if vlm_options.inference_framework == InferenceFramework.MLX:
72
89
  self.build_pipe = [
73
90
  HuggingFaceMlxModel(
@@ -77,15 +94,19 @@ class VlmPipeline(PaginatedPipeline):
77
94
  vlm_options=vlm_options,
78
95
  ),
79
96
  ]
80
- else:
97
+ elif vlm_options.inference_framework == InferenceFramework.TRANSFORMERS:
81
98
  self.build_pipe = [
82
- HuggingFaceVlmModel(
99
+ HuggingFaceTransformersVlmModel(
83
100
  enabled=True, # must be always enabled for this pipeline to make sense.
84
101
  artifacts_path=artifacts_path,
85
102
  accelerator_options=pipeline_options.accelerator_options,
86
103
  vlm_options=vlm_options,
87
104
  ),
88
105
  ]
106
+ else:
107
+ raise ValueError(
108
+ f"Could not instantiate the right type of VLM pipeline: {vlm_options.inference_framework}"
109
+ )
89
110
 
90
111
  self.enrichment_pipe = [
91
112
  # Other models working on `NodeItem` elements in the DoclingDocument
@@ -116,49 +137,19 @@ class VlmPipeline(PaginatedPipeline):
116
137
  self.pipeline_options.vlm_options.response_format
117
138
  == ResponseFormat.DOCTAGS
118
139
  ):
119
- doctags_list = []
120
- image_list = []
121
- for page in conv_res.pages:
122
- predicted_doctags = ""
123
- img = PILImage.new("RGB", (1, 1), "rgb(255,255,255)")
124
- if page.predictions.vlm_response:
125
- predicted_doctags = page.predictions.vlm_response.text
126
- if page.image:
127
- img = page.image
128
- image_list.append(img)
129
- doctags_list.append(predicted_doctags)
130
-
131
- doctags_list_c = cast(List[Union[Path, str]], doctags_list)
132
- image_list_c = cast(List[Union[Path, PILImage.Image]], image_list)
133
- doctags_doc = DocTagsDocument.from_doctags_and_image_pairs(
134
- doctags_list_c, image_list_c
135
- )
136
- conv_res.document = DoclingDocument.load_from_doctags(doctags_doc)
137
-
138
- # If forced backend text, replace model predicted text with backend one
139
- if self.force_backend_text:
140
- scale = self.pipeline_options.images_scale
141
- for element, _level in conv_res.document.iterate_items():
142
- if not isinstance(element, TextItem) or len(element.prov) == 0:
143
- continue
144
- page_ix = element.prov[0].page_no - 1
145
- page = conv_res.pages[page_ix]
146
- if not page.size:
147
- continue
148
- crop_bbox = (
149
- element.prov[0]
150
- .bbox.scaled(scale=scale)
151
- .to_top_left_origin(page_height=page.size.height * scale)
152
- )
153
- txt = self.extract_text_from_backend(page, crop_bbox)
154
- element.text = txt
155
- element.orig = txt
140
+ conv_res.document = self._turn_dt_into_doc(conv_res)
141
+
156
142
  elif (
157
143
  self.pipeline_options.vlm_options.response_format
158
144
  == ResponseFormat.MARKDOWN
159
145
  ):
160
146
  conv_res.document = self._turn_md_into_doc(conv_res)
161
147
 
148
+ elif (
149
+ self.pipeline_options.vlm_options.response_format == ResponseFormat.HTML
150
+ ):
151
+ conv_res.document = self._turn_html_into_doc(conv_res)
152
+
162
153
  else:
163
154
  raise RuntimeError(
164
155
  f"Unsupported VLM response format {self.pipeline_options.vlm_options.response_format}"
@@ -192,23 +183,199 @@ class VlmPipeline(PaginatedPipeline):
192
183
 
193
184
  return conv_res
194
185
 
195
- def _turn_md_into_doc(self, conv_res):
196
- predicted_text = ""
197
- for pg_idx, page in enumerate(conv_res.pages):
186
+ def _turn_dt_into_doc(self, conv_res) -> DoclingDocument:
187
+ doctags_list = []
188
+ image_list = []
189
+ for page in conv_res.pages:
190
+ predicted_doctags = ""
191
+ img = PILImage.new("RGB", (1, 1), "rgb(255,255,255)")
198
192
  if page.predictions.vlm_response:
199
- predicted_text += page.predictions.vlm_response.text + "\n\n"
200
- response_bytes = BytesIO(predicted_text.encode("utf8"))
201
- out_doc = InputDocument(
202
- path_or_stream=response_bytes,
203
- filename=conv_res.input.file.name,
204
- format=InputFormat.MD,
205
- backend=MarkdownDocumentBackend,
193
+ predicted_doctags = page.predictions.vlm_response.text
194
+ if page.image:
195
+ img = page.image
196
+ image_list.append(img)
197
+ doctags_list.append(predicted_doctags)
198
+
199
+ doctags_list_c = cast(List[Union[Path, str]], doctags_list)
200
+ image_list_c = cast(List[Union[Path, PILImage.Image]], image_list)
201
+ doctags_doc = DocTagsDocument.from_doctags_and_image_pairs(
202
+ doctags_list_c, image_list_c
206
203
  )
207
- backend = MarkdownDocumentBackend(
208
- in_doc=out_doc,
209
- path_or_stream=response_bytes,
204
+ conv_res.document = DoclingDocument.load_from_doctags(
205
+ doctag_document=doctags_doc
210
206
  )
211
- return backend.convert()
207
+
208
+ # If forced backend text, replace model predicted text with backend one
209
+ if page.size:
210
+ if self.force_backend_text:
211
+ scale = self.pipeline_options.images_scale
212
+ for element, _level in conv_res.document.iterate_items():
213
+ if not isinstance(element, TextItem) or len(element.prov) == 0:
214
+ continue
215
+ crop_bbox = (
216
+ element.prov[0]
217
+ .bbox.scaled(scale=scale)
218
+ .to_top_left_origin(page_height=page.size.height * scale)
219
+ )
220
+ txt = self.extract_text_from_backend(page, crop_bbox)
221
+ element.text = txt
222
+ element.orig = txt
223
+
224
+ return conv_res.document
225
+
226
+ def _turn_md_into_doc(self, conv_res):
227
+ def _extract_markdown_code(text):
228
+ """
229
+ Extracts text from markdown code blocks (enclosed in triple backticks).
230
+ If no code blocks are found, returns the original text.
231
+
232
+ Args:
233
+ text (str): Input text that may contain markdown code blocks
234
+
235
+ Returns:
236
+ str: Extracted code if code blocks exist, otherwise original text
237
+ """
238
+ # Regex pattern to match content between triple backticks
239
+ # This handles multiline content and optional language specifier
240
+ pattern = r"^```(?:\w*\n)?(.*?)```(\n)*$"
241
+
242
+ # Search with DOTALL flag to match across multiple lines
243
+ mtch = re.search(pattern, text, re.DOTALL)
244
+
245
+ if mtch:
246
+ # Return only the content of the first capturing group
247
+ return mtch.group(1)
248
+ else:
249
+ # No code blocks found, return original text
250
+ return text
251
+
252
+ for pg_idx, page in enumerate(conv_res.pages):
253
+ page_no = pg_idx + 1 # FIXME: might be incorrect
254
+
255
+ predicted_text = ""
256
+ if page.predictions.vlm_response:
257
+ predicted_text = page.predictions.vlm_response.text + "\n\n"
258
+
259
+ predicted_text = _extract_markdown_code(text=predicted_text)
260
+
261
+ response_bytes = BytesIO(predicted_text.encode("utf8"))
262
+ out_doc = InputDocument(
263
+ path_or_stream=response_bytes,
264
+ filename=conv_res.input.file.name,
265
+ format=InputFormat.MD,
266
+ backend=MarkdownDocumentBackend,
267
+ )
268
+ backend = MarkdownDocumentBackend(
269
+ in_doc=out_doc,
270
+ path_or_stream=response_bytes,
271
+ )
272
+ page_doc = backend.convert()
273
+
274
+ if page.image is not None:
275
+ pg_width = page.image.width
276
+ pg_height = page.image.height
277
+ else:
278
+ pg_width = 1
279
+ pg_height = 1
280
+
281
+ conv_res.document.add_page(
282
+ page_no=page_no,
283
+ size=Size(width=pg_width, height=pg_height),
284
+ image=ImageRef.from_pil(image=page.image, dpi=72)
285
+ if page.image
286
+ else None,
287
+ )
288
+
289
+ for item, level in page_doc.iterate_items():
290
+ item.prov = [
291
+ ProvenanceItem(
292
+ page_no=pg_idx + 1,
293
+ bbox=BoundingBox(
294
+ t=0.0, b=0.0, l=0.0, r=0.0
295
+ ), # FIXME: would be nice not to have to "fake" it
296
+ charspan=[0, 0],
297
+ )
298
+ ]
299
+ conv_res.document.append_child_item(child=item)
300
+
301
+ return conv_res.document
302
+
303
+ def _turn_html_into_doc(self, conv_res):
304
+ def _extract_html_code(text):
305
+ """
306
+ Extracts text from markdown code blocks (enclosed in triple backticks).
307
+ If no code blocks are found, returns the original text.
308
+
309
+ Args:
310
+ text (str): Input text that may contain markdown code blocks
311
+
312
+ Returns:
313
+ str: Extracted code if code blocks exist, otherwise original text
314
+ """
315
+ # Regex pattern to match content between triple backticks
316
+ # This handles multiline content and optional language specifier
317
+ pattern = r"^```(?:\w*\n)?(.*?)```(\n)*$"
318
+
319
+ # Search with DOTALL flag to match across multiple lines
320
+ mtch = re.search(pattern, text, re.DOTALL)
321
+
322
+ if mtch:
323
+ # Return only the content of the first capturing group
324
+ return mtch.group(1)
325
+ else:
326
+ # No code blocks found, return original text
327
+ return text
328
+
329
+ for pg_idx, page in enumerate(conv_res.pages):
330
+ page_no = pg_idx + 1 # FIXME: might be incorrect
331
+
332
+ predicted_text = ""
333
+ if page.predictions.vlm_response:
334
+ predicted_text = page.predictions.vlm_response.text + "\n\n"
335
+
336
+ predicted_text = _extract_html_code(text=predicted_text)
337
+
338
+ response_bytes = BytesIO(predicted_text.encode("utf8"))
339
+ out_doc = InputDocument(
340
+ path_or_stream=response_bytes,
341
+ filename=conv_res.input.file.name,
342
+ format=InputFormat.MD,
343
+ backend=HTMLDocumentBackend,
344
+ )
345
+ backend = HTMLDocumentBackend(
346
+ in_doc=out_doc,
347
+ path_or_stream=response_bytes,
348
+ )
349
+ page_doc = backend.convert()
350
+
351
+ if page.image is not None:
352
+ pg_width = page.image.width
353
+ pg_height = page.image.height
354
+ else:
355
+ pg_width = 1
356
+ pg_height = 1
357
+
358
+ conv_res.document.add_page(
359
+ page_no=page_no,
360
+ size=Size(width=pg_width, height=pg_height),
361
+ image=ImageRef.from_pil(image=page.image, dpi=72)
362
+ if page.image
363
+ else None,
364
+ )
365
+
366
+ for item, level in page_doc.iterate_items():
367
+ item.prov = [
368
+ ProvenanceItem(
369
+ page_no=pg_idx + 1,
370
+ bbox=BoundingBox(
371
+ t=0.0, b=0.0, l=0.0, r=0.0
372
+ ), # FIXME: would be nice not to have to "fake" it
373
+ charspan=[0, 0],
374
+ )
375
+ ]
376
+ conv_res.document.append_child_item(child=item)
377
+
378
+ return conv_res.document
212
379
 
213
380
  @classmethod
214
381
  def get_default_options(cls) -> VlmPipelineOptions:
@@ -1,13 +1,16 @@
1
1
  import logging
2
+ from typing import List, Optional
2
3
 
3
4
  import torch
4
5
 
5
- from docling.datamodel.pipeline_options import AcceleratorDevice
6
+ from docling.datamodel.accelerator_options import AcceleratorDevice
6
7
 
7
8
  _log = logging.getLogger(__name__)
8
9
 
9
10
 
10
- def decide_device(accelerator_device: str) -> str:
11
+ def decide_device(
12
+ accelerator_device: str, supported_devices: Optional[List[AcceleratorDevice]] = None
13
+ ) -> str:
11
14
  r"""
12
15
  Resolve the device based on the acceleration options and the available devices in the system.
13
16
 
@@ -20,6 +23,18 @@ def decide_device(accelerator_device: str) -> str:
20
23
  has_cuda = torch.backends.cuda.is_built() and torch.cuda.is_available()
21
24
  has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available()
22
25
 
26
+ if supported_devices is not None:
27
+ if has_cuda and AcceleratorDevice.CUDA not in supported_devices:
28
+ _log.info(
29
+ f"Removing CUDA from available devices because it is not in {supported_devices=}"
30
+ )
31
+ has_cuda = False
32
+ if has_mps and AcceleratorDevice.MPS not in supported_devices:
33
+ _log.info(
34
+ f"Removing MPS from available devices because it is not in {supported_devices=}"
35
+ )
36
+ has_mps = False
37
+
23
38
  if accelerator_device == AcceleratorDevice.AUTO.value: # Handle 'auto'
24
39
  if has_cuda:
25
40
  device = "cuda:0"
@@ -4,18 +4,20 @@ from typing import Optional
4
4
 
5
5
  from docling.datamodel.pipeline_options import (
6
6
  granite_picture_description,
7
- smoldocling_vlm_conversion_options,
8
- smoldocling_vlm_mlx_conversion_options,
9
7
  smolvlm_picture_description,
10
8
  )
11
9
  from docling.datamodel.settings import settings
10
+ from docling.datamodel.vlm_model_specs import (
11
+ SMOLDOCLING_MLX,
12
+ SMOLDOCLING_TRANSFORMERS,
13
+ )
12
14
  from docling.models.code_formula_model import CodeFormulaModel
13
15
  from docling.models.document_picture_classifier import DocumentPictureClassifier
14
16
  from docling.models.easyocr_model import EasyOcrModel
15
- from docling.models.hf_vlm_model import HuggingFaceVlmModel
16
17
  from docling.models.layout_model import LayoutModel
17
18
  from docling.models.picture_description_vlm_model import PictureDescriptionVlmModel
18
19
  from docling.models.table_structure_model import TableStructureModel
20
+ from docling.models.utils.hf_model_download import download_hf_model
19
21
 
20
22
  _log = logging.getLogger(__name__)
21
23
 
@@ -75,7 +77,7 @@ def download_models(
75
77
 
76
78
  if with_smolvlm:
77
79
  _log.info("Downloading SmolVlm model...")
78
- PictureDescriptionVlmModel.download_models(
80
+ download_hf_model(
79
81
  repo_id=smolvlm_picture_description.repo_id,
80
82
  local_dir=output_dir / smolvlm_picture_description.repo_cache_folder,
81
83
  force=force,
@@ -84,26 +86,25 @@ def download_models(
84
86
 
85
87
  if with_smoldocling:
86
88
  _log.info("Downloading SmolDocling model...")
87
- HuggingFaceVlmModel.download_models(
88
- repo_id=smoldocling_vlm_conversion_options.repo_id,
89
- local_dir=output_dir / smoldocling_vlm_conversion_options.repo_cache_folder,
89
+ download_hf_model(
90
+ repo_id=SMOLDOCLING_TRANSFORMERS.repo_id,
91
+ local_dir=output_dir / SMOLDOCLING_TRANSFORMERS.repo_cache_folder,
90
92
  force=force,
91
93
  progress=progress,
92
94
  )
93
95
 
94
96
  if with_smoldocling_mlx:
95
97
  _log.info("Downloading SmolDocling MLX model...")
96
- HuggingFaceVlmModel.download_models(
97
- repo_id=smoldocling_vlm_mlx_conversion_options.repo_id,
98
- local_dir=output_dir
99
- / smoldocling_vlm_mlx_conversion_options.repo_cache_folder,
98
+ download_hf_model(
99
+ repo_id=SMOLDOCLING_MLX.repo_id,
100
+ local_dir=output_dir / SMOLDOCLING_MLX.repo_cache_folder,
100
101
  force=force,
101
102
  progress=progress,
102
103
  )
103
104
 
104
105
  if with_granite_vision:
105
106
  _log.info("Downloading Granite Vision model...")
106
- PictureDescriptionVlmModel.download_models(
107
+ download_hf_model(
107
108
  repo_id=granite_picture_description.repo_id,
108
109
  local_dir=output_dir / granite_picture_description.repo_cache_folder,
109
110
  force=force,
@@ -1,67 +1,68 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: docling
3
- Version: 2.34.0
3
+ Version: 2.36.0
4
4
  Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
5
- Home-page: https://github.com/docling-project/docling
6
- License: MIT
5
+ Author-email: Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Nikos Livathinos <nli@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
+ License-Expression: MIT
7
+ Project-URL: homepage, https://github.com/docling-project/docling
8
+ Project-URL: repository, https://github.com/docling-project/docling
9
+ Project-URL: issues, https://github.com/docling-project/docling/issues
10
+ Project-URL: changelog, https://github.com/docling-project/docling/blob/main/CHANGELOG.md
7
11
  Keywords: docling,convert,document,pdf,docx,html,markdown,layout model,segmentation,table structure,table former
8
- Author: Christoph Auer
9
- Author-email: cau@zurich.ibm.com
10
- Requires-Python: >=3.9,<4.0
12
+ Classifier: Operating System :: MacOS :: MacOS X
13
+ Classifier: Operating System :: POSIX :: Linux
14
+ Classifier: Operating System :: Microsoft :: Windows
11
15
  Classifier: Development Status :: 5 - Production/Stable
12
16
  Classifier: Intended Audience :: Developers
13
17
  Classifier: Intended Audience :: Science/Research
14
- Classifier: License :: OSI Approved :: MIT License
15
- Classifier: Operating System :: MacOS :: MacOS X
16
- Classifier: Operating System :: POSIX :: Linux
18
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
17
19
  Classifier: Programming Language :: Python :: 3
18
20
  Classifier: Programming Language :: Python :: 3.9
19
21
  Classifier: Programming Language :: Python :: 3.10
20
22
  Classifier: Programming Language :: Python :: 3.11
21
23
  Classifier: Programming Language :: Python :: 3.12
22
24
  Classifier: Programming Language :: Python :: 3.13
23
- Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
24
- Provides-Extra: ocrmac
25
- Provides-Extra: rapidocr
25
+ Requires-Python: <4.0,>=3.9
26
+ Description-Content-Type: text/markdown
27
+ License-File: LICENSE
28
+ Requires-Dist: pydantic<3.0.0,>=2.0.0
29
+ Requires-Dist: docling-core[chunking]<3.0.0,>=2.29.0
30
+ Requires-Dist: docling-ibm-models<4.0.0,>=3.4.4
31
+ Requires-Dist: docling-parse<5.0.0,>=4.0.0
32
+ Requires-Dist: filetype<2.0.0,>=1.2.0
33
+ Requires-Dist: pypdfium2<5.0.0,>=4.30.0
34
+ Requires-Dist: pydantic-settings<3.0.0,>=2.3.0
35
+ Requires-Dist: huggingface_hub<1,>=0.23
36
+ Requires-Dist: requests<3.0.0,>=2.32.2
37
+ Requires-Dist: easyocr<2.0,>=1.7
38
+ Requires-Dist: certifi>=2024.7.4
39
+ Requires-Dist: rtree<2.0.0,>=1.3.0
40
+ Requires-Dist: typer<0.16.0,>=0.12.5
41
+ Requires-Dist: python-docx<2.0.0,>=1.1.2
42
+ Requires-Dist: python-pptx<2.0.0,>=1.0.2
43
+ Requires-Dist: beautifulsoup4<5.0.0,>=4.12.3
44
+ Requires-Dist: pandas<3.0.0,>=2.1.4
45
+ Requires-Dist: marko<3.0.0,>=2.1.2
46
+ Requires-Dist: openpyxl<4.0.0,>=3.1.5
47
+ Requires-Dist: lxml<6.0.0,>=4.0.0
48
+ Requires-Dist: pillow<12.0.0,>=10.0.0
49
+ Requires-Dist: tqdm<5.0.0,>=4.65.0
50
+ Requires-Dist: pluggy<2.0.0,>=1.0.0
51
+ Requires-Dist: pylatexenc<3.0,>=2.10
52
+ Requires-Dist: click<8.2.0
53
+ Requires-Dist: scipy<2.0.0,>=1.6.0
26
54
  Provides-Extra: tesserocr
55
+ Requires-Dist: tesserocr<3.0.0,>=2.7.1; extra == "tesserocr"
56
+ Provides-Extra: ocrmac
57
+ Requires-Dist: ocrmac<2.0.0,>=1.0.0; sys_platform == "darwin" and extra == "ocrmac"
27
58
  Provides-Extra: vlm
28
- Requires-Dist: accelerate (>=1.2.1,<2.0.0) ; (sys_platform != "darwin" or platform_machine != "x86_64") and (extra == "vlm")
29
- Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
30
- Requires-Dist: certifi (>=2024.7.4)
31
- Requires-Dist: click (<8.2.0)
32
- Requires-Dist: docling-core[chunking] (>=2.29.0,<3.0.0)
33
- Requires-Dist: docling-ibm-models (>=3.4.0,<4.0.0)
34
- Requires-Dist: docling-parse (>=4.0.0,<5.0.0)
35
- Requires-Dist: easyocr (>=1.7,<2.0)
36
- Requires-Dist: filetype (>=1.2.0,<2.0.0)
37
- Requires-Dist: huggingface_hub (>=0.23,<1)
38
- Requires-Dist: lxml (>=4.0.0,<6.0.0)
39
- Requires-Dist: marko (>=2.1.2,<3.0.0)
40
- Requires-Dist: ocrmac (>=1.0.0,<2.0.0) ; (sys_platform == "darwin") and (extra == "ocrmac")
41
- Requires-Dist: onnxruntime (>=1.7.0,<1.20.0) ; (python_version < "3.10") and (extra == "rapidocr")
42
- Requires-Dist: onnxruntime (>=1.7.0,<2.0.0) ; (python_version >= "3.10") and (extra == "rapidocr")
43
- Requires-Dist: openpyxl (>=3.1.5,<4.0.0)
44
- Requires-Dist: pandas (>=2.1.4,<3.0.0)
45
- Requires-Dist: pillow (>=10.0.0,<12.0.0)
46
- Requires-Dist: pluggy (>=1.0.0,<2.0.0)
47
- Requires-Dist: pydantic (>=2.0.0,<3.0.0)
48
- Requires-Dist: pydantic-settings (>=2.3.0,<3.0.0)
49
- Requires-Dist: pylatexenc (>=2.10,<3.0)
50
- Requires-Dist: pypdfium2 (>=4.30.0,<5.0.0)
51
- Requires-Dist: python-docx (>=1.1.2,<2.0.0)
52
- Requires-Dist: python-pptx (>=1.0.2,<2.0.0)
53
- Requires-Dist: rapidocr-onnxruntime (>=1.4.0,<2.0.0) ; (python_version < "3.13") and (extra == "rapidocr")
54
- Requires-Dist: requests (>=2.32.2,<3.0.0)
55
- Requires-Dist: rtree (>=1.3.0,<2.0.0)
56
- Requires-Dist: scipy (>=1.6.0,<1.14.0) ; python_version < "3.10"
57
- Requires-Dist: scipy (>=1.6.0,<2.0.0) ; python_version >= "3.10"
58
- Requires-Dist: tesserocr (>=2.7.1,<3.0.0) ; extra == "tesserocr"
59
- Requires-Dist: tqdm (>=4.65.0,<5.0.0)
60
- Requires-Dist: transformers (>=4.42.0,<4.43.0) ; (sys_platform == "darwin" and platform_machine == "x86_64") and (extra == "vlm")
61
- Requires-Dist: transformers (>=4.46.0,<5.0.0) ; (sys_platform != "darwin" or platform_machine != "x86_64") and (extra == "vlm")
62
- Requires-Dist: typer (>=0.12.5,<0.16.0)
63
- Project-URL: Repository, https://github.com/docling-project/docling
64
- Description-Content-Type: text/markdown
59
+ Requires-Dist: transformers<5.0.0,>=4.46.0; extra == "vlm"
60
+ Requires-Dist: accelerate<2.0.0,>=1.2.1; extra == "vlm"
61
+ Requires-Dist: mlx-vlm>=0.1.22; (python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "vlm"
62
+ Provides-Extra: rapidocr
63
+ Requires-Dist: rapidocr-onnxruntime<2.0.0,>=1.4.0; python_version < "3.13" and extra == "rapidocr"
64
+ Requires-Dist: onnxruntime<2.0.0,>=1.7.0; extra == "rapidocr"
65
+ Dynamic: license-file
65
66
 
66
67
  <p align="center">
67
68
  <a href="https://github.com/docling-project/docling">
@@ -79,9 +80,8 @@ Description-Content-Type: text/markdown
79
80
  [![Docs](https://img.shields.io/badge/docs-live-brightgreen)](https://docling-project.github.io/docling/)
80
81
  [![PyPI version](https://img.shields.io/pypi/v/docling)](https://pypi.org/project/docling/)
81
82
  [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/docling)](https://pypi.org/project/docling/)
82
- [![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)](https://python-poetry.org/)
83
- [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
84
- [![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
83
+ [![uv](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/uv/main/assets/badge/v0.json)](https://github.com/astral-sh/uv)
84
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
85
85
  [![Pydantic v2](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/pydantic/pydantic/main/docs/badge/v2.json)](https://pydantic.dev)
86
86
  [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)
87
87
  [![License MIT](https://img.shields.io/github/license/docling-project/docling)](https://opensource.org/licenses/MIT)
@@ -101,7 +101,7 @@ Docling simplifies document processing, parsing diverse formats — including ad
101
101
  * 🔒 Local execution capabilities for sensitive data and air-gapped environments
102
102
  * 🤖 Plug-and-play [integrations][integrations] incl. LangChain, LlamaIndex, Crew AI & Haystack for agentic AI
103
103
  * 🔍 Extensive OCR support for scanned PDFs and images
104
- * 🥚 Support of Visual Language Models ([SmolDocling](https://huggingface.co/ds4sd/SmolDocling-256M-preview)) 🆕
104
+ * 🥚 Support of several Visual Language Models ([SmolDocling](https://huggingface.co/ds4sd/SmolDocling-256M-preview))
105
105
  * 💻 Simple and convenient CLI
106
106
 
107
107
  ### Coming soon
@@ -214,4 +214,3 @@ The project was started by the AI for knowledge team at IBM Research Zurich.
214
214
  [supported_formats]: https://docling-project.github.io/docling/usage/supported_formats/
215
215
  [docling_document]: https://docling-project.github.io/docling/concepts/docling_document/
216
216
  [integrations]: https://docling-project.github.io/docling/integrations/
217
-