docling 2.69.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling might be problematic. Click here for more details.

Files changed (138) hide show
  1. docling/__init__.py +0 -0
  2. docling/backend/__init__.py +0 -0
  3. docling/backend/abstract_backend.py +84 -0
  4. docling/backend/asciidoc_backend.py +443 -0
  5. docling/backend/csv_backend.py +125 -0
  6. docling/backend/docling_parse_backend.py +237 -0
  7. docling/backend/docling_parse_v2_backend.py +276 -0
  8. docling/backend/docling_parse_v4_backend.py +260 -0
  9. docling/backend/docx/__init__.py +0 -0
  10. docling/backend/docx/drawingml/utils.py +131 -0
  11. docling/backend/docx/latex/__init__.py +0 -0
  12. docling/backend/docx/latex/latex_dict.py +274 -0
  13. docling/backend/docx/latex/omml.py +459 -0
  14. docling/backend/html_backend.py +1502 -0
  15. docling/backend/image_backend.py +188 -0
  16. docling/backend/json/__init__.py +0 -0
  17. docling/backend/json/docling_json_backend.py +58 -0
  18. docling/backend/md_backend.py +618 -0
  19. docling/backend/mets_gbs_backend.py +399 -0
  20. docling/backend/msexcel_backend.py +686 -0
  21. docling/backend/mspowerpoint_backend.py +398 -0
  22. docling/backend/msword_backend.py +1663 -0
  23. docling/backend/noop_backend.py +51 -0
  24. docling/backend/pdf_backend.py +82 -0
  25. docling/backend/pypdfium2_backend.py +417 -0
  26. docling/backend/webvtt_backend.py +572 -0
  27. docling/backend/xml/__init__.py +0 -0
  28. docling/backend/xml/jats_backend.py +819 -0
  29. docling/backend/xml/uspto_backend.py +1905 -0
  30. docling/chunking/__init__.py +12 -0
  31. docling/cli/__init__.py +0 -0
  32. docling/cli/main.py +974 -0
  33. docling/cli/models.py +196 -0
  34. docling/cli/tools.py +17 -0
  35. docling/datamodel/__init__.py +0 -0
  36. docling/datamodel/accelerator_options.py +69 -0
  37. docling/datamodel/asr_model_specs.py +494 -0
  38. docling/datamodel/backend_options.py +102 -0
  39. docling/datamodel/base_models.py +493 -0
  40. docling/datamodel/document.py +699 -0
  41. docling/datamodel/extraction.py +39 -0
  42. docling/datamodel/layout_model_specs.py +91 -0
  43. docling/datamodel/pipeline_options.py +457 -0
  44. docling/datamodel/pipeline_options_asr_model.py +78 -0
  45. docling/datamodel/pipeline_options_vlm_model.py +136 -0
  46. docling/datamodel/settings.py +65 -0
  47. docling/datamodel/vlm_model_specs.py +365 -0
  48. docling/document_converter.py +559 -0
  49. docling/document_extractor.py +327 -0
  50. docling/exceptions.py +10 -0
  51. docling/experimental/__init__.py +5 -0
  52. docling/experimental/datamodel/__init__.py +1 -0
  53. docling/experimental/datamodel/table_crops_layout_options.py +13 -0
  54. docling/experimental/datamodel/threaded_layout_vlm_pipeline_options.py +45 -0
  55. docling/experimental/models/__init__.py +3 -0
  56. docling/experimental/models/table_crops_layout_model.py +114 -0
  57. docling/experimental/pipeline/__init__.py +1 -0
  58. docling/experimental/pipeline/threaded_layout_vlm_pipeline.py +439 -0
  59. docling/models/__init__.py +0 -0
  60. docling/models/base_layout_model.py +39 -0
  61. docling/models/base_model.py +230 -0
  62. docling/models/base_ocr_model.py +241 -0
  63. docling/models/base_table_model.py +45 -0
  64. docling/models/extraction/__init__.py +0 -0
  65. docling/models/extraction/nuextract_transformers_model.py +305 -0
  66. docling/models/factories/__init__.py +47 -0
  67. docling/models/factories/base_factory.py +122 -0
  68. docling/models/factories/layout_factory.py +7 -0
  69. docling/models/factories/ocr_factory.py +11 -0
  70. docling/models/factories/picture_description_factory.py +11 -0
  71. docling/models/factories/table_factory.py +7 -0
  72. docling/models/picture_description_base_model.py +149 -0
  73. docling/models/plugins/__init__.py +0 -0
  74. docling/models/plugins/defaults.py +60 -0
  75. docling/models/stages/__init__.py +0 -0
  76. docling/models/stages/code_formula/__init__.py +0 -0
  77. docling/models/stages/code_formula/code_formula_model.py +342 -0
  78. docling/models/stages/layout/__init__.py +0 -0
  79. docling/models/stages/layout/layout_model.py +249 -0
  80. docling/models/stages/ocr/__init__.py +0 -0
  81. docling/models/stages/ocr/auto_ocr_model.py +132 -0
  82. docling/models/stages/ocr/easyocr_model.py +200 -0
  83. docling/models/stages/ocr/ocr_mac_model.py +145 -0
  84. docling/models/stages/ocr/rapid_ocr_model.py +328 -0
  85. docling/models/stages/ocr/tesseract_ocr_cli_model.py +331 -0
  86. docling/models/stages/ocr/tesseract_ocr_model.py +262 -0
  87. docling/models/stages/page_assemble/__init__.py +0 -0
  88. docling/models/stages/page_assemble/page_assemble_model.py +156 -0
  89. docling/models/stages/page_preprocessing/__init__.py +0 -0
  90. docling/models/stages/page_preprocessing/page_preprocessing_model.py +145 -0
  91. docling/models/stages/picture_classifier/__init__.py +0 -0
  92. docling/models/stages/picture_classifier/document_picture_classifier.py +246 -0
  93. docling/models/stages/picture_description/__init__.py +0 -0
  94. docling/models/stages/picture_description/picture_description_api_model.py +66 -0
  95. docling/models/stages/picture_description/picture_description_vlm_model.py +123 -0
  96. docling/models/stages/reading_order/__init__.py +0 -0
  97. docling/models/stages/reading_order/readingorder_model.py +431 -0
  98. docling/models/stages/table_structure/__init__.py +0 -0
  99. docling/models/stages/table_structure/table_structure_model.py +305 -0
  100. docling/models/utils/__init__.py +0 -0
  101. docling/models/utils/generation_utils.py +157 -0
  102. docling/models/utils/hf_model_download.py +45 -0
  103. docling/models/vlm_pipeline_models/__init__.py +1 -0
  104. docling/models/vlm_pipeline_models/api_vlm_model.py +180 -0
  105. docling/models/vlm_pipeline_models/hf_transformers_model.py +391 -0
  106. docling/models/vlm_pipeline_models/mlx_model.py +325 -0
  107. docling/models/vlm_pipeline_models/vllm_model.py +344 -0
  108. docling/pipeline/__init__.py +0 -0
  109. docling/pipeline/asr_pipeline.py +431 -0
  110. docling/pipeline/base_extraction_pipeline.py +72 -0
  111. docling/pipeline/base_pipeline.py +326 -0
  112. docling/pipeline/extraction_vlm_pipeline.py +207 -0
  113. docling/pipeline/legacy_standard_pdf_pipeline.py +262 -0
  114. docling/pipeline/simple_pipeline.py +55 -0
  115. docling/pipeline/standard_pdf_pipeline.py +859 -0
  116. docling/pipeline/threaded_standard_pdf_pipeline.py +5 -0
  117. docling/pipeline/vlm_pipeline.py +416 -0
  118. docling/py.typed +1 -0
  119. docling/utils/__init__.py +0 -0
  120. docling/utils/accelerator_utils.py +97 -0
  121. docling/utils/api_image_request.py +205 -0
  122. docling/utils/deepseekocr_utils.py +388 -0
  123. docling/utils/export.py +146 -0
  124. docling/utils/glm_utils.py +361 -0
  125. docling/utils/layout_postprocessor.py +683 -0
  126. docling/utils/locks.py +3 -0
  127. docling/utils/model_downloader.py +168 -0
  128. docling/utils/ocr_utils.py +69 -0
  129. docling/utils/orientation.py +65 -0
  130. docling/utils/profiling.py +65 -0
  131. docling/utils/utils.py +65 -0
  132. docling/utils/visualization.py +85 -0
  133. docling-2.69.0.dist-info/METADATA +237 -0
  134. docling-2.69.0.dist-info/RECORD +138 -0
  135. docling-2.69.0.dist-info/WHEEL +5 -0
  136. docling-2.69.0.dist-info/entry_points.txt +6 -0
  137. docling-2.69.0.dist-info/licenses/LICENSE +21 -0
  138. docling-2.69.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,439 @@
1
+ """Threaded Layout+VLM Pipeline
2
+ ================================
3
+ A specialized two-stage threaded pipeline that combines layout model preprocessing
4
+ with VLM processing. The layout model detects document elements and coordinates,
5
+ which are then injected into the VLM prompt for enhanced structured output.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import itertools
11
+ import logging
12
+ from pathlib import Path
13
+ from typing import TYPE_CHECKING, List, Optional, Union, cast
14
+
15
+ from docling_core.types.doc import DoclingDocument
16
+ from docling_core.types.doc.document import DocTagsDocument
17
+ from PIL import Image as PILImage
18
+
19
+ if TYPE_CHECKING:
20
+ from docling_core.types.doc.page import SegmentedPage
21
+
22
+ from docling.backend.abstract_backend import AbstractDocumentBackend
23
+ from docling.backend.pdf_backend import PdfDocumentBackend
24
+ from docling.datamodel.base_models import ConversionStatus, Page
25
+ from docling.datamodel.document import ConversionResult
26
+ from docling.datamodel.pipeline_options_vlm_model import (
27
+ ApiVlmOptions,
28
+ InferenceFramework,
29
+ InlineVlmOptions,
30
+ )
31
+ from docling.datamodel.settings import settings
32
+ from docling.experimental.datamodel.threaded_layout_vlm_pipeline_options import (
33
+ ThreadedLayoutVlmPipelineOptions,
34
+ )
35
+ from docling.models.base_model import BaseVlmPageModel
36
+ from docling.models.stages.layout.layout_model import LayoutModel
37
+ from docling.models.vlm_pipeline_models.api_vlm_model import ApiVlmModel
38
+ from docling.models.vlm_pipeline_models.hf_transformers_model import (
39
+ HuggingFaceTransformersVlmModel,
40
+ )
41
+ from docling.models.vlm_pipeline_models.mlx_model import HuggingFaceMlxModel
42
+ from docling.pipeline.base_pipeline import BasePipeline
43
+ from docling.pipeline.standard_pdf_pipeline import (
44
+ ProcessingResult,
45
+ RunContext,
46
+ ThreadedItem,
47
+ ThreadedPipelineStage,
48
+ ThreadedQueue,
49
+ )
50
+ from docling.utils.profiling import ProfilingScope, TimeRecorder
51
+
52
+ _log = logging.getLogger(__name__)
53
+
54
+
55
+ class ThreadedLayoutVlmPipeline(BasePipeline):
56
+ """Two-stage threaded pipeline: Layout Model → VLM Model."""
57
+
58
+ def __init__(self, pipeline_options: ThreadedLayoutVlmPipelineOptions) -> None:
59
+ super().__init__(pipeline_options)
60
+ self.pipeline_options: ThreadedLayoutVlmPipelineOptions = pipeline_options
61
+ self._run_seq = itertools.count(1) # deterministic, monotonic run ids
62
+
63
+ # VLM model type (initialized in _init_models)
64
+ self.vlm_model: BaseVlmPageModel
65
+
66
+ # Initialize models
67
+ self._init_models()
68
+
69
+ def _init_models(self) -> None:
70
+ """Initialize layout and VLM models."""
71
+ art_path = self._resolve_artifacts_path()
72
+
73
+ # Layout model
74
+ self.layout_model = LayoutModel(
75
+ artifacts_path=art_path,
76
+ accelerator_options=self.pipeline_options.accelerator_options,
77
+ options=self.pipeline_options.layout_options,
78
+ )
79
+
80
+ # VLM model based on options type
81
+ # Create layout-aware VLM options internally
82
+ base_vlm_options = self.pipeline_options.vlm_options
83
+
84
+ class LayoutAwareVlmOptions(type(base_vlm_options)): # type: ignore[misc]
85
+ def build_prompt(
86
+ self,
87
+ page: Optional[SegmentedPage],
88
+ *,
89
+ _internal_page: Optional[Page] = None,
90
+ ) -> str:
91
+ base_prompt = self.prompt
92
+ augmented_prompt = base_prompt
93
+
94
+ # In this layout-aware pipeline, _internal_page is always provided
95
+ if _internal_page is None:
96
+ return base_prompt
97
+
98
+ if not _internal_page.size:
99
+ _log.warning(
100
+ f"Page size not available for page {_internal_page.page_no}. Cannot enhance prompt with layout info."
101
+ )
102
+ return base_prompt
103
+
104
+ if _internal_page.predictions.layout:
105
+ from docling_core.types.doc.tokens import DocumentToken
106
+
107
+ layout_elements = []
108
+ for cluster in _internal_page.predictions.layout.clusters:
109
+ # Get proper tag name from DocItemLabel
110
+ tag_name = DocumentToken.create_token_name_from_doc_item_label(
111
+ label=cluster.label
112
+ )
113
+
114
+ # Convert bbox to tuple and get location tokens
115
+ bbox_tuple = cluster.bbox.as_tuple()
116
+ location_tokens = DocumentToken.get_location(
117
+ bbox=bbox_tuple,
118
+ page_w=_internal_page.size.width,
119
+ page_h=_internal_page.size.height,
120
+ )
121
+
122
+ # Create XML element with DocTags format
123
+ xml_element = f"<{tag_name}>{location_tokens}</{tag_name}>"
124
+ layout_elements.append(xml_element)
125
+
126
+ if layout_elements:
127
+ # Join elements with newlines and wrap in layout tags
128
+ layout_xml = (
129
+ "<layout>" + "\n".join(layout_elements) + "</layout>"
130
+ )
131
+ layout_injection = f"{layout_xml}"
132
+
133
+ augmented_prompt = base_prompt + layout_injection
134
+
135
+ _log.debug(
136
+ "Enhanced Prompt with Layout Info: %s\n", augmented_prompt
137
+ )
138
+
139
+ return augmented_prompt
140
+
141
+ vlm_options = LayoutAwareVlmOptions(**base_vlm_options.model_dump())
142
+
143
+ if isinstance(base_vlm_options, ApiVlmOptions):
144
+ self.vlm_model = ApiVlmModel(
145
+ enabled=True,
146
+ enable_remote_services=self.pipeline_options.enable_remote_services,
147
+ vlm_options=vlm_options,
148
+ )
149
+ elif isinstance(base_vlm_options, InlineVlmOptions):
150
+ if vlm_options.inference_framework == InferenceFramework.TRANSFORMERS:
151
+ self.vlm_model = HuggingFaceTransformersVlmModel(
152
+ enabled=True,
153
+ artifacts_path=art_path,
154
+ accelerator_options=self.pipeline_options.accelerator_options,
155
+ vlm_options=vlm_options,
156
+ )
157
+ elif vlm_options.inference_framework == InferenceFramework.MLX:
158
+ self.vlm_model = HuggingFaceMlxModel(
159
+ enabled=True,
160
+ artifacts_path=art_path,
161
+ accelerator_options=self.pipeline_options.accelerator_options,
162
+ vlm_options=vlm_options,
163
+ )
164
+ elif vlm_options.inference_framework == InferenceFramework.VLLM:
165
+ from docling.models.vlm_pipeline_models.vllm_model import VllmVlmModel
166
+
167
+ self.vlm_model = VllmVlmModel(
168
+ enabled=True,
169
+ artifacts_path=art_path,
170
+ accelerator_options=self.pipeline_options.accelerator_options,
171
+ vlm_options=vlm_options,
172
+ )
173
+ else:
174
+ raise ValueError(
175
+ f"Unsupported VLM inference framework: {vlm_options.inference_framework}"
176
+ )
177
+ else:
178
+ raise ValueError(f"Unsupported VLM options type: {type(base_vlm_options)}")
179
+
180
+ def _resolve_artifacts_path(self) -> Optional[Path]:
181
+ """Resolve artifacts path from options or settings."""
182
+ if self.pipeline_options.artifacts_path:
183
+ p = Path(self.pipeline_options.artifacts_path).expanduser()
184
+ elif settings.artifacts_path:
185
+ p = Path(settings.artifacts_path).expanduser()
186
+ else:
187
+ return None
188
+ if not p.is_dir():
189
+ raise RuntimeError(
190
+ f"{p} does not exist or is not a directory containing the required models"
191
+ )
192
+ return p
193
+
194
+ def _create_run_ctx(self) -> RunContext:
195
+ """Create pipeline stages and wire them together."""
196
+ opts = self.pipeline_options
197
+
198
+ # Layout stage
199
+ layout_stage = ThreadedPipelineStage(
200
+ name="layout",
201
+ model=self.layout_model,
202
+ batch_size=opts.layout_batch_size,
203
+ batch_timeout=opts.batch_timeout_seconds,
204
+ queue_max_size=opts.queue_max_size,
205
+ )
206
+
207
+ # VLM stage - now layout-aware through enhanced build_prompt
208
+ vlm_stage = ThreadedPipelineStage(
209
+ name="vlm",
210
+ model=self.vlm_model,
211
+ batch_size=opts.vlm_batch_size,
212
+ batch_timeout=opts.batch_timeout_seconds,
213
+ queue_max_size=opts.queue_max_size,
214
+ )
215
+
216
+ # Wire stages
217
+ output_q = ThreadedQueue(opts.queue_max_size)
218
+ layout_stage.add_output_queue(vlm_stage.input_queue)
219
+ vlm_stage.add_output_queue(output_q)
220
+
221
+ stages = [layout_stage, vlm_stage]
222
+ return RunContext(
223
+ stages=stages, first_stage=layout_stage, output_queue=output_q
224
+ )
225
+
226
+ def _build_document(self, conv_res: ConversionResult) -> ConversionResult:
227
+ """Build document using threaded layout+VLM pipeline."""
228
+ run_id = next(self._run_seq)
229
+ assert isinstance(conv_res.input._backend, PdfDocumentBackend)
230
+ backend = conv_res.input._backend
231
+
232
+ # Initialize pages
233
+ start_page, end_page = conv_res.input.limits.page_range
234
+ pages: List[Page] = []
235
+ images_scale = self.pipeline_options.images_scale
236
+ for i in range(conv_res.input.page_count):
237
+ if start_page - 1 <= i <= end_page - 1:
238
+ page = Page(page_no=i)
239
+ if images_scale is not None:
240
+ page._default_image_scale = images_scale
241
+ page._backend = backend.load_page(i)
242
+ if page._backend and page._backend.is_valid():
243
+ page.size = page._backend.get_size()
244
+ conv_res.pages.append(page)
245
+ pages.append(page)
246
+
247
+ if not pages:
248
+ conv_res.status = ConversionStatus.FAILURE
249
+ return conv_res
250
+
251
+ total_pages = len(pages)
252
+ ctx = self._create_run_ctx()
253
+ for st in ctx.stages:
254
+ st.start()
255
+
256
+ proc = ProcessingResult(total_expected=total_pages)
257
+ fed_idx = 0
258
+ batch_size = 32
259
+
260
+ try:
261
+ while proc.success_count + proc.failure_count < total_pages:
262
+ # Feed pages to first stage
263
+ while fed_idx < total_pages:
264
+ ok = ctx.first_stage.input_queue.put(
265
+ ThreadedItem(
266
+ payload=pages[fed_idx],
267
+ run_id=run_id,
268
+ page_no=pages[fed_idx].page_no,
269
+ conv_res=conv_res,
270
+ ),
271
+ timeout=0.0,
272
+ )
273
+ if ok:
274
+ fed_idx += 1
275
+ if fed_idx == total_pages:
276
+ ctx.first_stage.input_queue.close()
277
+ else:
278
+ break
279
+
280
+ # Drain results from output
281
+ out_batch = ctx.output_queue.get_batch(batch_size, timeout=0.05)
282
+ for itm in out_batch:
283
+ if itm.run_id != run_id:
284
+ continue
285
+ if itm.is_failed or itm.error:
286
+ proc.failed_pages.append(
287
+ (itm.page_no, itm.error or RuntimeError("unknown error"))
288
+ )
289
+ else:
290
+ assert itm.payload is not None
291
+ proc.pages.append(itm.payload)
292
+
293
+ # Handle early termination
294
+ if not out_batch and ctx.output_queue.closed:
295
+ missing = total_pages - (proc.success_count + proc.failure_count)
296
+ if missing > 0:
297
+ proc.failed_pages.extend(
298
+ [(-1, RuntimeError("pipeline terminated early"))] * missing
299
+ )
300
+ break
301
+ finally:
302
+ for st in ctx.stages:
303
+ st.stop()
304
+ ctx.output_queue.close()
305
+
306
+ self._integrate_results(conv_res, proc)
307
+ return conv_res
308
+
309
+ def _integrate_results(
310
+ self, conv_res: ConversionResult, proc: ProcessingResult
311
+ ) -> None:
312
+ """Integrate processing results into conversion result."""
313
+ page_map = {p.page_no: p for p in proc.pages}
314
+
315
+ # Track failed pages for cleanup
316
+ failed_page_nos = {fp for fp, _ in proc.failed_pages}
317
+
318
+ # Collect pages that will be removed (failed pages) for resource cleanup
319
+ pages_to_remove = [p for p in conv_res.pages if p.page_no in failed_page_nos]
320
+
321
+ conv_res.pages = [
322
+ page_map.get(p.page_no, p)
323
+ for p in conv_res.pages
324
+ if p.page_no in page_map
325
+ or not any(fp == p.page_no for fp, _ in proc.failed_pages)
326
+ ]
327
+
328
+ if proc.is_complete_failure:
329
+ conv_res.status = ConversionStatus.FAILURE
330
+ elif proc.is_partial_success:
331
+ conv_res.status = ConversionStatus.PARTIAL_SUCCESS
332
+ else:
333
+ conv_res.status = ConversionStatus.SUCCESS
334
+
335
+ # Clean up resources for failed pages that were removed
336
+ for p in pages_to_remove:
337
+ if p._backend is not None:
338
+ p._backend.unload()
339
+ p._image_cache = {}
340
+ # Clean up parsed_page if it exists (it's Optional[SegmentedPdfPage])
341
+ if p.parsed_page is not None:
342
+ del p.parsed_page
343
+ p.parsed_page = None
344
+
345
+ # Clean up images if not needed for remaining pages
346
+ if not self.pipeline_options.generate_page_images:
347
+ for p in conv_res.pages:
348
+ p._image_cache = {}
349
+
350
+ def _assemble_document(self, conv_res: ConversionResult) -> ConversionResult:
351
+ """Assemble final document from VLM predictions."""
352
+ from docling_core.types.doc import DocItem, ImageRef, PictureItem
353
+
354
+ from docling.datamodel.pipeline_options_vlm_model import ResponseFormat
355
+
356
+ with TimeRecorder(conv_res, "doc_assemble", scope=ProfilingScope.DOCUMENT):
357
+ # Response format validation is done in ThreadedLayoutVlmPipelineOptions
358
+ # This check is kept as a safety net, but should never trigger if validation works
359
+ if (
360
+ self.pipeline_options.vlm_options.response_format
361
+ != ResponseFormat.DOCTAGS
362
+ ):
363
+ raise RuntimeError(
364
+ f"Unsupported VLM response format {self.pipeline_options.vlm_options.response_format}. Only DOCTAGS format is supported."
365
+ )
366
+ conv_res.document = self._turn_dt_into_doc(conv_res)
367
+
368
+ # Generate images of the requested element types
369
+ if self.pipeline_options.generate_picture_images:
370
+ # Create mapping from page_no to Page object since pages may be non-continuous
371
+ page_map = {p.page_no: p for p in conv_res.pages}
372
+ scale = self.pipeline_options.images_scale
373
+ for element, _level in conv_res.document.iterate_items():
374
+ if not isinstance(element, DocItem) or len(element.prov) == 0:
375
+ continue
376
+ if (
377
+ isinstance(element, PictureItem)
378
+ and self.pipeline_options.generate_picture_images
379
+ ):
380
+ page_no = element.prov[0].page_no
381
+ page = page_map.get(page_no)
382
+ if page is None:
383
+ _log.warning(
384
+ f"Page {page_no} not found in conversion result for picture element. Skipping image generation."
385
+ )
386
+ continue
387
+ assert page.size is not None
388
+ assert page.image is not None
389
+
390
+ crop_bbox = (
391
+ element.prov[0]
392
+ .bbox.scaled(scale=scale)
393
+ .to_top_left_origin(page_height=page.size.height * scale)
394
+ )
395
+
396
+ cropped_im = page.image.crop(crop_bbox.as_tuple())
397
+ element.image = ImageRef.from_pil(
398
+ cropped_im, dpi=int(72 * scale)
399
+ )
400
+
401
+ return conv_res
402
+
403
+ def _turn_dt_into_doc(self, conv_res: ConversionResult) -> DoclingDocument:
404
+ """Convert DOCTAGS response format to DoclingDocument."""
405
+ doctags_list = []
406
+ image_list = []
407
+ for page in conv_res.pages:
408
+ # Only include pages that have both an image and VLM predictions
409
+ if page.image and page.predictions.vlm_response:
410
+ predicted_doctags = page.predictions.vlm_response.text
411
+ image_list.append(page.image)
412
+ doctags_list.append(predicted_doctags)
413
+
414
+ doctags_list_c = cast(List[Union[Path, str]], doctags_list)
415
+ image_list_c = cast(List[Union[Path, PILImage.Image]], image_list)
416
+ doctags_doc = DocTagsDocument.from_doctags_and_image_pairs(
417
+ doctags_list_c, image_list_c
418
+ )
419
+ document = DoclingDocument.load_from_doctags(doctag_document=doctags_doc)
420
+
421
+ return document
422
+
423
+ @classmethod
424
+ def get_default_options(cls) -> ThreadedLayoutVlmPipelineOptions:
425
+ return ThreadedLayoutVlmPipelineOptions()
426
+
427
+ @classmethod
428
+ def is_backend_supported(cls, backend: AbstractDocumentBackend) -> bool:
429
+ return isinstance(backend, PdfDocumentBackend)
430
+
431
+ def _determine_status(self, conv_res: ConversionResult) -> ConversionStatus:
432
+ return conv_res.status
433
+
434
+ def _unload(self, conv_res: ConversionResult) -> None:
435
+ for p in conv_res.pages:
436
+ if p._backend is not None:
437
+ p._backend.unload()
438
+ if conv_res.input._backend:
439
+ conv_res.input._backend.unload()
File without changes
@@ -0,0 +1,39 @@
1
+ from __future__ import annotations
2
+
3
+ from abc import ABC, abstractmethod
4
+ from collections.abc import Iterable, Sequence
5
+ from typing import Type
6
+
7
+ from docling.datamodel.base_models import LayoutPrediction, Page
8
+ from docling.datamodel.document import ConversionResult
9
+ from docling.datamodel.pipeline_options import BaseLayoutOptions
10
+ from docling.models.base_model import BaseModelWithOptions, BasePageModel
11
+
12
+
13
+ class BaseLayoutModel(BasePageModel, BaseModelWithOptions, ABC):
14
+ """Shared interface for layout models."""
15
+
16
+ @classmethod
17
+ @abstractmethod
18
+ def get_options_type(cls) -> Type[BaseLayoutOptions]:
19
+ """Return the options type supported by this layout model."""
20
+
21
+ @abstractmethod
22
+ def predict_layout(
23
+ self,
24
+ conv_res: ConversionResult,
25
+ pages: Sequence[Page],
26
+ ) -> Sequence[LayoutPrediction]:
27
+ """Produce layout predictions for the provided pages."""
28
+
29
+ def __call__(
30
+ self,
31
+ conv_res: ConversionResult,
32
+ page_batch: Iterable[Page],
33
+ ) -> Iterable[Page]:
34
+ pages = list(page_batch)
35
+ predictions = self.predict_layout(conv_res, pages)
36
+
37
+ for page, prediction in zip(pages, predictions):
38
+ page.predictions.layout = prediction
39
+ yield page