docling 2.69.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling might be problematic. Click here for more details.

Files changed (138) hide show
  1. docling/__init__.py +0 -0
  2. docling/backend/__init__.py +0 -0
  3. docling/backend/abstract_backend.py +84 -0
  4. docling/backend/asciidoc_backend.py +443 -0
  5. docling/backend/csv_backend.py +125 -0
  6. docling/backend/docling_parse_backend.py +237 -0
  7. docling/backend/docling_parse_v2_backend.py +276 -0
  8. docling/backend/docling_parse_v4_backend.py +260 -0
  9. docling/backend/docx/__init__.py +0 -0
  10. docling/backend/docx/drawingml/utils.py +131 -0
  11. docling/backend/docx/latex/__init__.py +0 -0
  12. docling/backend/docx/latex/latex_dict.py +274 -0
  13. docling/backend/docx/latex/omml.py +459 -0
  14. docling/backend/html_backend.py +1502 -0
  15. docling/backend/image_backend.py +188 -0
  16. docling/backend/json/__init__.py +0 -0
  17. docling/backend/json/docling_json_backend.py +58 -0
  18. docling/backend/md_backend.py +618 -0
  19. docling/backend/mets_gbs_backend.py +399 -0
  20. docling/backend/msexcel_backend.py +686 -0
  21. docling/backend/mspowerpoint_backend.py +398 -0
  22. docling/backend/msword_backend.py +1663 -0
  23. docling/backend/noop_backend.py +51 -0
  24. docling/backend/pdf_backend.py +82 -0
  25. docling/backend/pypdfium2_backend.py +417 -0
  26. docling/backend/webvtt_backend.py +572 -0
  27. docling/backend/xml/__init__.py +0 -0
  28. docling/backend/xml/jats_backend.py +819 -0
  29. docling/backend/xml/uspto_backend.py +1905 -0
  30. docling/chunking/__init__.py +12 -0
  31. docling/cli/__init__.py +0 -0
  32. docling/cli/main.py +974 -0
  33. docling/cli/models.py +196 -0
  34. docling/cli/tools.py +17 -0
  35. docling/datamodel/__init__.py +0 -0
  36. docling/datamodel/accelerator_options.py +69 -0
  37. docling/datamodel/asr_model_specs.py +494 -0
  38. docling/datamodel/backend_options.py +102 -0
  39. docling/datamodel/base_models.py +493 -0
  40. docling/datamodel/document.py +699 -0
  41. docling/datamodel/extraction.py +39 -0
  42. docling/datamodel/layout_model_specs.py +91 -0
  43. docling/datamodel/pipeline_options.py +457 -0
  44. docling/datamodel/pipeline_options_asr_model.py +78 -0
  45. docling/datamodel/pipeline_options_vlm_model.py +136 -0
  46. docling/datamodel/settings.py +65 -0
  47. docling/datamodel/vlm_model_specs.py +365 -0
  48. docling/document_converter.py +559 -0
  49. docling/document_extractor.py +327 -0
  50. docling/exceptions.py +10 -0
  51. docling/experimental/__init__.py +5 -0
  52. docling/experimental/datamodel/__init__.py +1 -0
  53. docling/experimental/datamodel/table_crops_layout_options.py +13 -0
  54. docling/experimental/datamodel/threaded_layout_vlm_pipeline_options.py +45 -0
  55. docling/experimental/models/__init__.py +3 -0
  56. docling/experimental/models/table_crops_layout_model.py +114 -0
  57. docling/experimental/pipeline/__init__.py +1 -0
  58. docling/experimental/pipeline/threaded_layout_vlm_pipeline.py +439 -0
  59. docling/models/__init__.py +0 -0
  60. docling/models/base_layout_model.py +39 -0
  61. docling/models/base_model.py +230 -0
  62. docling/models/base_ocr_model.py +241 -0
  63. docling/models/base_table_model.py +45 -0
  64. docling/models/extraction/__init__.py +0 -0
  65. docling/models/extraction/nuextract_transformers_model.py +305 -0
  66. docling/models/factories/__init__.py +47 -0
  67. docling/models/factories/base_factory.py +122 -0
  68. docling/models/factories/layout_factory.py +7 -0
  69. docling/models/factories/ocr_factory.py +11 -0
  70. docling/models/factories/picture_description_factory.py +11 -0
  71. docling/models/factories/table_factory.py +7 -0
  72. docling/models/picture_description_base_model.py +149 -0
  73. docling/models/plugins/__init__.py +0 -0
  74. docling/models/plugins/defaults.py +60 -0
  75. docling/models/stages/__init__.py +0 -0
  76. docling/models/stages/code_formula/__init__.py +0 -0
  77. docling/models/stages/code_formula/code_formula_model.py +342 -0
  78. docling/models/stages/layout/__init__.py +0 -0
  79. docling/models/stages/layout/layout_model.py +249 -0
  80. docling/models/stages/ocr/__init__.py +0 -0
  81. docling/models/stages/ocr/auto_ocr_model.py +132 -0
  82. docling/models/stages/ocr/easyocr_model.py +200 -0
  83. docling/models/stages/ocr/ocr_mac_model.py +145 -0
  84. docling/models/stages/ocr/rapid_ocr_model.py +328 -0
  85. docling/models/stages/ocr/tesseract_ocr_cli_model.py +331 -0
  86. docling/models/stages/ocr/tesseract_ocr_model.py +262 -0
  87. docling/models/stages/page_assemble/__init__.py +0 -0
  88. docling/models/stages/page_assemble/page_assemble_model.py +156 -0
  89. docling/models/stages/page_preprocessing/__init__.py +0 -0
  90. docling/models/stages/page_preprocessing/page_preprocessing_model.py +145 -0
  91. docling/models/stages/picture_classifier/__init__.py +0 -0
  92. docling/models/stages/picture_classifier/document_picture_classifier.py +246 -0
  93. docling/models/stages/picture_description/__init__.py +0 -0
  94. docling/models/stages/picture_description/picture_description_api_model.py +66 -0
  95. docling/models/stages/picture_description/picture_description_vlm_model.py +123 -0
  96. docling/models/stages/reading_order/__init__.py +0 -0
  97. docling/models/stages/reading_order/readingorder_model.py +431 -0
  98. docling/models/stages/table_structure/__init__.py +0 -0
  99. docling/models/stages/table_structure/table_structure_model.py +305 -0
  100. docling/models/utils/__init__.py +0 -0
  101. docling/models/utils/generation_utils.py +157 -0
  102. docling/models/utils/hf_model_download.py +45 -0
  103. docling/models/vlm_pipeline_models/__init__.py +1 -0
  104. docling/models/vlm_pipeline_models/api_vlm_model.py +180 -0
  105. docling/models/vlm_pipeline_models/hf_transformers_model.py +391 -0
  106. docling/models/vlm_pipeline_models/mlx_model.py +325 -0
  107. docling/models/vlm_pipeline_models/vllm_model.py +344 -0
  108. docling/pipeline/__init__.py +0 -0
  109. docling/pipeline/asr_pipeline.py +431 -0
  110. docling/pipeline/base_extraction_pipeline.py +72 -0
  111. docling/pipeline/base_pipeline.py +326 -0
  112. docling/pipeline/extraction_vlm_pipeline.py +207 -0
  113. docling/pipeline/legacy_standard_pdf_pipeline.py +262 -0
  114. docling/pipeline/simple_pipeline.py +55 -0
  115. docling/pipeline/standard_pdf_pipeline.py +859 -0
  116. docling/pipeline/threaded_standard_pdf_pipeline.py +5 -0
  117. docling/pipeline/vlm_pipeline.py +416 -0
  118. docling/py.typed +1 -0
  119. docling/utils/__init__.py +0 -0
  120. docling/utils/accelerator_utils.py +97 -0
  121. docling/utils/api_image_request.py +205 -0
  122. docling/utils/deepseekocr_utils.py +388 -0
  123. docling/utils/export.py +146 -0
  124. docling/utils/glm_utils.py +361 -0
  125. docling/utils/layout_postprocessor.py +683 -0
  126. docling/utils/locks.py +3 -0
  127. docling/utils/model_downloader.py +168 -0
  128. docling/utils/ocr_utils.py +69 -0
  129. docling/utils/orientation.py +65 -0
  130. docling/utils/profiling.py +65 -0
  131. docling/utils/utils.py +65 -0
  132. docling/utils/visualization.py +85 -0
  133. docling-2.69.0.dist-info/METADATA +237 -0
  134. docling-2.69.0.dist-info/RECORD +138 -0
  135. docling-2.69.0.dist-info/WHEEL +5 -0
  136. docling-2.69.0.dist-info/entry_points.txt +6 -0
  137. docling-2.69.0.dist-info/licenses/LICENSE +21 -0
  138. docling-2.69.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,5 @@
1
+ from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline
2
+
3
+
4
+ class ThreadedStandardPdfPipeline(StandardPdfPipeline):
5
+ """Backwards compatible import for ThreadedStandardPdfPipeline."""
@@ -0,0 +1,416 @@
1
+ import logging
2
+ import re
3
+ from io import BytesIO
4
+ from pathlib import Path
5
+ from typing import List, Optional, Union, cast
6
+
7
+ from docling_core.types.doc import (
8
+ BoundingBox,
9
+ ContentLayer,
10
+ DocItem,
11
+ DoclingDocument,
12
+ ImageRef,
13
+ PictureItem,
14
+ ProvenanceItem,
15
+ TableCell,
16
+ TableData,
17
+ TextItem,
18
+ )
19
+ from docling_core.types.doc.base import (
20
+ BoundingBox,
21
+ Size,
22
+ )
23
+ from docling_core.types.doc.document import DocTagsDocument
24
+ from lxml import etree
25
+ from PIL import Image as PILImage
26
+
27
+ from docling.backend.abstract_backend import (
28
+ AbstractDocumentBackend,
29
+ DeclarativeDocumentBackend,
30
+ )
31
+ from docling.backend.html_backend import HTMLDocumentBackend
32
+ from docling.backend.md_backend import MarkdownDocumentBackend
33
+ from docling.backend.pdf_backend import PdfDocumentBackend
34
+ from docling.datamodel.base_models import InputFormat, Page
35
+ from docling.datamodel.document import ConversionResult, InputDocument
36
+ from docling.datamodel.pipeline_options import (
37
+ VlmPipelineOptions,
38
+ )
39
+ from docling.datamodel.pipeline_options_vlm_model import (
40
+ ApiVlmOptions,
41
+ InferenceFramework,
42
+ InlineVlmOptions,
43
+ ResponseFormat,
44
+ )
45
+ from docling.datamodel.settings import settings
46
+ from docling.models.vlm_pipeline_models.api_vlm_model import ApiVlmModel
47
+ from docling.models.vlm_pipeline_models.hf_transformers_model import (
48
+ HuggingFaceTransformersVlmModel,
49
+ )
50
+ from docling.models.vlm_pipeline_models.mlx_model import HuggingFaceMlxModel
51
+ from docling.pipeline.base_pipeline import PaginatedPipeline
52
+ from docling.utils.deepseekocr_utils import parse_deepseekocr_markdown
53
+ from docling.utils.profiling import ProfilingScope, TimeRecorder
54
+
55
+ _log = logging.getLogger(__name__)
56
+
57
+
58
+ class VlmPipeline(PaginatedPipeline):
59
+ def __init__(self, pipeline_options: VlmPipelineOptions):
60
+ super().__init__(pipeline_options)
61
+ self.keep_backend = True
62
+
63
+ self.pipeline_options: VlmPipelineOptions
64
+
65
+ # force_backend_text = False - use text that is coming from VLM response
66
+ # force_backend_text = True - get text from backend using bounding boxes predicted by SmolDocling doctags
67
+ self.force_backend_text = (
68
+ pipeline_options.force_backend_text
69
+ and pipeline_options.vlm_options.response_format == ResponseFormat.DOCTAGS
70
+ )
71
+
72
+ self.keep_images = self.pipeline_options.generate_page_images
73
+
74
+ if isinstance(pipeline_options.vlm_options, ApiVlmOptions):
75
+ self.build_pipe = [
76
+ ApiVlmModel(
77
+ enabled=True, # must be always enabled for this pipeline to make sense.
78
+ enable_remote_services=self.pipeline_options.enable_remote_services,
79
+ vlm_options=cast(ApiVlmOptions, self.pipeline_options.vlm_options),
80
+ ),
81
+ ]
82
+ elif isinstance(self.pipeline_options.vlm_options, InlineVlmOptions):
83
+ vlm_options = cast(InlineVlmOptions, self.pipeline_options.vlm_options)
84
+ if vlm_options.inference_framework == InferenceFramework.MLX:
85
+ self.build_pipe = [
86
+ HuggingFaceMlxModel(
87
+ enabled=True, # must be always enabled for this pipeline to make sense.
88
+ artifacts_path=self.artifacts_path,
89
+ accelerator_options=pipeline_options.accelerator_options,
90
+ vlm_options=vlm_options,
91
+ ),
92
+ ]
93
+ elif vlm_options.inference_framework == InferenceFramework.TRANSFORMERS:
94
+ self.build_pipe = [
95
+ HuggingFaceTransformersVlmModel(
96
+ enabled=True, # must be always enabled for this pipeline to make sense.
97
+ artifacts_path=self.artifacts_path,
98
+ accelerator_options=pipeline_options.accelerator_options,
99
+ vlm_options=vlm_options,
100
+ ),
101
+ ]
102
+ elif vlm_options.inference_framework == InferenceFramework.VLLM:
103
+ from docling.models.vlm_pipeline_models.vllm_model import VllmVlmModel
104
+
105
+ self.build_pipe = [
106
+ VllmVlmModel(
107
+ enabled=True, # must be always enabled for this pipeline to make sense.
108
+ artifacts_path=self.artifacts_path,
109
+ accelerator_options=pipeline_options.accelerator_options,
110
+ vlm_options=vlm_options,
111
+ ),
112
+ ]
113
+ else:
114
+ raise ValueError(
115
+ f"Could not instantiate the right type of VLM pipeline: {vlm_options.inference_framework}"
116
+ )
117
+
118
+ self.enrichment_pipe = [
119
+ # Other models working on `NodeItem` elements in the DoclingDocument
120
+ ]
121
+
122
+ def initialize_page(self, conv_res: ConversionResult, page: Page) -> Page:
123
+ with TimeRecorder(conv_res, "page_init"):
124
+ images_scale = self.pipeline_options.images_scale
125
+ if images_scale is not None:
126
+ page._default_image_scale = images_scale
127
+ page._backend = conv_res.input._backend.load_page(page.page_no) # type: ignore
128
+ if page._backend is not None and page._backend.is_valid():
129
+ page.size = page._backend.get_size()
130
+
131
+ if self.force_backend_text:
132
+ page.parsed_page = page._backend.get_segmented_page()
133
+
134
+ return page
135
+
136
+ def extract_text_from_backend(
137
+ self, page: Page, bbox: Union[BoundingBox, None]
138
+ ) -> str:
139
+ # Convert bounding box normalized to 0-100 into page coordinates for cropping
140
+ text = ""
141
+ if bbox:
142
+ if page.size:
143
+ if page._backend:
144
+ text = page._backend.get_text_in_rect(bbox)
145
+ return text
146
+
147
+ def _assemble_document(self, conv_res: ConversionResult) -> ConversionResult:
148
+ with TimeRecorder(conv_res, "doc_assemble", scope=ProfilingScope.DOCUMENT):
149
+ if (
150
+ self.pipeline_options.vlm_options.response_format
151
+ == ResponseFormat.DOCTAGS
152
+ ):
153
+ conv_res.document = self._turn_dt_into_doc(conv_res)
154
+
155
+ elif (
156
+ self.pipeline_options.vlm_options.response_format
157
+ == ResponseFormat.DEEPSEEKOCR_MARKDOWN
158
+ ):
159
+ conv_res.document = self._parse_deepseekocr_markdown(conv_res)
160
+
161
+ elif (
162
+ self.pipeline_options.vlm_options.response_format
163
+ == ResponseFormat.MARKDOWN
164
+ ):
165
+ conv_res.document = self._convert_text_with_backend(
166
+ conv_res, InputFormat.MD, MarkdownDocumentBackend
167
+ )
168
+
169
+ elif (
170
+ self.pipeline_options.vlm_options.response_format == ResponseFormat.HTML
171
+ ):
172
+ conv_res.document = self._convert_text_with_backend(
173
+ conv_res, InputFormat.HTML, HTMLDocumentBackend
174
+ )
175
+
176
+ else:
177
+ raise RuntimeError(
178
+ f"Unsupported VLM response format {self.pipeline_options.vlm_options.response_format}"
179
+ )
180
+
181
+ # Generate images of the requested element types
182
+ if self.pipeline_options.generate_picture_images:
183
+ scale = self.pipeline_options.images_scale
184
+ for element, _level in conv_res.document.iterate_items():
185
+ if not isinstance(element, DocItem) or len(element.prov) == 0:
186
+ continue
187
+ if (
188
+ isinstance(element, PictureItem)
189
+ and self.pipeline_options.generate_picture_images
190
+ ):
191
+ page_ix = element.prov[0].page_no - 1
192
+ page = conv_res.pages[page_ix]
193
+ assert page.size is not None
194
+ assert page.image is not None
195
+
196
+ crop_bbox = (
197
+ element.prov[0]
198
+ .bbox.scaled(scale=scale)
199
+ .to_top_left_origin(page_height=page.size.height * scale)
200
+ )
201
+
202
+ cropped_im = page.image.crop(crop_bbox.as_tuple())
203
+ element.image = ImageRef.from_pil(
204
+ cropped_im, dpi=int(72 * scale)
205
+ )
206
+
207
+ return conv_res
208
+
209
+ def _turn_dt_into_doc(self, conv_res) -> DoclingDocument:
210
+ doctags_list = []
211
+ image_list = []
212
+ for page in conv_res.pages:
213
+ predicted_doctags = ""
214
+ img = PILImage.new("RGB", (1, 1), "rgb(255,255,255)")
215
+ if page.predictions.vlm_response:
216
+ predicted_doctags = page.predictions.vlm_response.text
217
+ if page.image:
218
+ img = page.image
219
+ image_list.append(img)
220
+ doctags_list.append(predicted_doctags)
221
+
222
+ doctags_list_c = cast(List[Union[Path, str]], doctags_list)
223
+ image_list_c = cast(List[Union[Path, PILImage.Image]], image_list)
224
+ doctags_doc = DocTagsDocument.from_doctags_and_image_pairs(
225
+ doctags_list_c, image_list_c
226
+ )
227
+ conv_res.document = DoclingDocument.load_from_doctags(
228
+ doctag_document=doctags_doc
229
+ )
230
+
231
+ # If forced backend text, replace model predicted text with backend one
232
+ if page.size:
233
+ if self.force_backend_text:
234
+ scale = self.pipeline_options.images_scale
235
+ for element, _level in conv_res.document.iterate_items():
236
+ if not isinstance(element, TextItem) or len(element.prov) == 0:
237
+ continue
238
+ crop_bbox = (
239
+ element.prov[0]
240
+ .bbox.scaled(scale=scale)
241
+ .to_top_left_origin(page_height=page.size.height * scale)
242
+ )
243
+ txt = self.extract_text_from_backend(page, crop_bbox)
244
+ element.text = txt
245
+ element.orig = txt
246
+
247
+ return conv_res.document
248
+
249
+ def _parse_deepseekocr_markdown(
250
+ self, conv_res: ConversionResult
251
+ ) -> DoclingDocument:
252
+ """Parse DeepSeek OCR markdown with label[[x1, y1, x2, y2]] format.
253
+
254
+ Labels supported:
255
+ - text: Standard body text
256
+ - title: Main document or section titles
257
+ - sub_title: Secondary headings or sub-headers
258
+ - table: Tabular data
259
+ - table_caption: Descriptive text for tables
260
+ - figure: Image-based elements or diagrams
261
+ - figure_caption: Titles or descriptions for figures/images
262
+ - header / footer: Content at top or bottom margins of pages
263
+ """
264
+ page_docs = []
265
+
266
+ for pg_idx, page in enumerate(conv_res.pages):
267
+ predicted_text = ""
268
+ if page.predictions.vlm_response:
269
+ predicted_text = page.predictions.vlm_response.text
270
+
271
+ assert page.size is not None
272
+
273
+ # Parse single page using the utility function
274
+ # Pass vlm_options.scale to convert bboxes from scaled image coords to original PDF coords
275
+ page_doc = parse_deepseekocr_markdown(
276
+ content=predicted_text,
277
+ original_page_size=page.size,
278
+ page_no=pg_idx + 1,
279
+ filename=conv_res.input.file.name or "file",
280
+ page_image=page.image,
281
+ )
282
+ page_docs.append(page_doc)
283
+
284
+ # Add page metadata and concatenate
285
+ return self._add_page_metadata_and_concatenate(page_docs, conv_res)
286
+
287
+ def _extract_code_block(self, text: str) -> str:
288
+ """
289
+ Extracts text from markdown code blocks (enclosed in triple backticks).
290
+ If no code blocks are found, returns the original text.
291
+
292
+ Args:
293
+ text (str): Input text that may contain markdown code blocks
294
+
295
+ Returns:
296
+ str: Extracted code if code blocks exist, otherwise original text
297
+ """
298
+ # Regex pattern to match content between triple backticks
299
+ # This handles multiline content and optional language specifier
300
+ pattern = r"^```(?:\w*\n)?(.*?)```(\n)*$"
301
+
302
+ # Search with DOTALL flag to match across multiple lines
303
+ mtch = re.search(pattern, text, re.DOTALL)
304
+
305
+ if mtch:
306
+ # Return only the content of the first capturing group
307
+ return mtch.group(1)
308
+ else:
309
+ # No code blocks found, return original text
310
+ return text
311
+
312
+ def _add_page_metadata_and_concatenate(
313
+ self,
314
+ page_docs: List[DoclingDocument],
315
+ conv_res: ConversionResult,
316
+ ) -> DoclingDocument:
317
+ """
318
+ Add page metadata to page documents and concatenate them.
319
+
320
+ Args:
321
+ page_docs: List of page documents to process
322
+ conv_res: Conversion result containing page information
323
+
324
+ Returns:
325
+ DoclingDocument: Concatenated document with page metadata
326
+ """
327
+ for pg_idx, (page_doc, page) in enumerate(zip(page_docs, conv_res.pages)):
328
+ # Add page metadata to the page document before concatenation
329
+ if page.image is not None:
330
+ pg_width = page.image.width
331
+ pg_height = page.image.height
332
+ else:
333
+ pg_width = 1
334
+ pg_height = 1
335
+
336
+ page_doc.add_page(
337
+ page_no=pg_idx + 1,
338
+ size=Size(width=pg_width, height=pg_height),
339
+ image=ImageRef.from_pil(image=page.image, dpi=72)
340
+ if page.image
341
+ else None,
342
+ )
343
+
344
+ # Concatenate all page documents to preserve hierarchy
345
+ return DoclingDocument.concatenate(docs=page_docs)
346
+
347
+ def _convert_text_with_backend(
348
+ self,
349
+ conv_res: ConversionResult,
350
+ input_format: InputFormat,
351
+ backend_class: type[DeclarativeDocumentBackend],
352
+ ) -> DoclingDocument:
353
+ """
354
+ Convert text-based formats (Markdown, HTML) into DoclingDocument using a backend.
355
+
356
+ Args:
357
+ conv_res: The conversion result containing pages with VLM predictions
358
+ input_format: The format type (MD or HTML)
359
+ backend_class: The backend class to use for conversion
360
+
361
+ Returns:
362
+ DoclingDocument: The assembled document
363
+ """
364
+ page_docs = []
365
+
366
+ for pg_idx, page in enumerate(conv_res.pages):
367
+ predicted_text = ""
368
+ if page.predictions.vlm_response:
369
+ predicted_text = page.predictions.vlm_response.text + "\n\n"
370
+
371
+ # Extract content from code blocks if present
372
+ predicted_text = self._extract_code_block(text=predicted_text)
373
+
374
+ # Convert text to document using specified backend
375
+ response_bytes = BytesIO(predicted_text.encode("utf8"))
376
+ out_doc = InputDocument(
377
+ path_or_stream=response_bytes,
378
+ filename=conv_res.input.file.name,
379
+ format=input_format,
380
+ backend=backend_class,
381
+ )
382
+ backend = backend_class(
383
+ in_doc=out_doc,
384
+ path_or_stream=response_bytes,
385
+ )
386
+ page_doc = backend.convert()
387
+
388
+ # Modify provenance in place for all items in the page document
389
+ for item, level in page_doc.iterate_items(
390
+ with_groups=True,
391
+ traverse_pictures=True,
392
+ included_content_layers=set(ContentLayer),
393
+ ):
394
+ if isinstance(item, DocItem):
395
+ item.prov = [
396
+ ProvenanceItem(
397
+ page_no=pg_idx + 1,
398
+ bbox=BoundingBox(
399
+ t=0.0, b=0.0, l=0.0, r=0.0
400
+ ), # FIXME: would be nice not to have to "fake" it
401
+ charspan=[0, 0],
402
+ )
403
+ ]
404
+
405
+ page_docs.append(page_doc)
406
+
407
+ # Add page metadata and concatenate
408
+ return self._add_page_metadata_and_concatenate(page_docs, conv_res)
409
+
410
+ @classmethod
411
+ def get_default_options(cls) -> VlmPipelineOptions:
412
+ return VlmPipelineOptions()
413
+
414
+ @classmethod
415
+ def is_backend_supported(cls, backend: AbstractDocumentBackend):
416
+ return isinstance(backend, PdfDocumentBackend)
docling/py.typed ADDED
@@ -0,0 +1 @@
1
+
File without changes
@@ -0,0 +1,97 @@
1
+ import logging
2
+ from typing import List, Optional
3
+
4
+ from docling.datamodel.accelerator_options import AcceleratorDevice
5
+
6
+ _log = logging.getLogger(__name__)
7
+
8
+
9
+ def decide_device(
10
+ accelerator_device: str, supported_devices: Optional[List[AcceleratorDevice]] = None
11
+ ) -> str:
12
+ r"""
13
+ Resolve the device based on the acceleration options and the available devices in the system.
14
+
15
+ Rules:
16
+ 1. AUTO: Check for the best available device on the system.
17
+ 2. User-defined: Check if the device actually exists, otherwise fall-back to CPU
18
+ """
19
+ import torch
20
+
21
+ device = "cpu"
22
+
23
+ has_cuda = torch.backends.cuda.is_built() and torch.cuda.is_available()
24
+ has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available()
25
+ has_xpu = hasattr(torch, "xpu") and torch.xpu.is_available()
26
+
27
+ if supported_devices is not None:
28
+ if has_cuda and AcceleratorDevice.CUDA not in supported_devices:
29
+ _log.info(
30
+ f"Removing CUDA from available devices because it is not in {supported_devices=}"
31
+ )
32
+ has_cuda = False
33
+ if has_mps and AcceleratorDevice.MPS not in supported_devices:
34
+ _log.info(
35
+ f"Removing MPS from available devices because it is not in {supported_devices=}"
36
+ )
37
+ has_mps = False
38
+ if has_xpu and AcceleratorDevice.XPU not in supported_devices:
39
+ _log.info(
40
+ f"Removing XPU from available devices because it is not in {supported_devices=}"
41
+ )
42
+ has_xpu = False
43
+
44
+ if accelerator_device == AcceleratorDevice.AUTO.value: # Handle 'auto'
45
+ if has_cuda:
46
+ device = "cuda:0"
47
+ elif has_mps:
48
+ device = "mps"
49
+ elif has_xpu:
50
+ device = "xpu"
51
+
52
+ elif accelerator_device.startswith("cuda"):
53
+ if has_cuda:
54
+ # if cuda device index specified extract device id
55
+ parts = accelerator_device.split(":")
56
+ if len(parts) == 2 and parts[1].isdigit():
57
+ # select cuda device's id
58
+ cuda_index = int(parts[1])
59
+ if cuda_index < torch.cuda.device_count():
60
+ device = f"cuda:{cuda_index}"
61
+ else:
62
+ _log.warning(
63
+ "CUDA device 'cuda:%d' is not available. Fall back to 'CPU'.",
64
+ cuda_index,
65
+ )
66
+ elif len(parts) == 1: # just "cuda"
67
+ device = "cuda:0"
68
+ else:
69
+ _log.warning(
70
+ "Invalid CUDA device format '%s'. Fall back to 'CPU'",
71
+ accelerator_device,
72
+ )
73
+ else:
74
+ _log.warning("CUDA is not available in the system. Fall back to 'CPU'")
75
+
76
+ elif accelerator_device == AcceleratorDevice.MPS.value:
77
+ if has_mps:
78
+ device = "mps"
79
+ else:
80
+ _log.warning("MPS is not available in the system. Fall back to 'CPU'")
81
+
82
+ elif accelerator_device == AcceleratorDevice.XPU.value:
83
+ if has_xpu:
84
+ device = "xpu"
85
+ else:
86
+ _log.warning("XPU is not available in the system. Fall back to 'CPU'")
87
+
88
+ elif accelerator_device == AcceleratorDevice.CPU.value:
89
+ device = "cpu"
90
+
91
+ else:
92
+ _log.warning(
93
+ "Unknown device option '%s'. Fall back to 'CPU'", accelerator_device
94
+ )
95
+
96
+ _log.info("Accelerator device: '%s'", device)
97
+ return device