docling 2.69.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling might be problematic. Click here for more details.

Files changed (138) hide show
  1. docling/__init__.py +0 -0
  2. docling/backend/__init__.py +0 -0
  3. docling/backend/abstract_backend.py +84 -0
  4. docling/backend/asciidoc_backend.py +443 -0
  5. docling/backend/csv_backend.py +125 -0
  6. docling/backend/docling_parse_backend.py +237 -0
  7. docling/backend/docling_parse_v2_backend.py +276 -0
  8. docling/backend/docling_parse_v4_backend.py +260 -0
  9. docling/backend/docx/__init__.py +0 -0
  10. docling/backend/docx/drawingml/utils.py +131 -0
  11. docling/backend/docx/latex/__init__.py +0 -0
  12. docling/backend/docx/latex/latex_dict.py +274 -0
  13. docling/backend/docx/latex/omml.py +459 -0
  14. docling/backend/html_backend.py +1502 -0
  15. docling/backend/image_backend.py +188 -0
  16. docling/backend/json/__init__.py +0 -0
  17. docling/backend/json/docling_json_backend.py +58 -0
  18. docling/backend/md_backend.py +618 -0
  19. docling/backend/mets_gbs_backend.py +399 -0
  20. docling/backend/msexcel_backend.py +686 -0
  21. docling/backend/mspowerpoint_backend.py +398 -0
  22. docling/backend/msword_backend.py +1663 -0
  23. docling/backend/noop_backend.py +51 -0
  24. docling/backend/pdf_backend.py +82 -0
  25. docling/backend/pypdfium2_backend.py +417 -0
  26. docling/backend/webvtt_backend.py +572 -0
  27. docling/backend/xml/__init__.py +0 -0
  28. docling/backend/xml/jats_backend.py +819 -0
  29. docling/backend/xml/uspto_backend.py +1905 -0
  30. docling/chunking/__init__.py +12 -0
  31. docling/cli/__init__.py +0 -0
  32. docling/cli/main.py +974 -0
  33. docling/cli/models.py +196 -0
  34. docling/cli/tools.py +17 -0
  35. docling/datamodel/__init__.py +0 -0
  36. docling/datamodel/accelerator_options.py +69 -0
  37. docling/datamodel/asr_model_specs.py +494 -0
  38. docling/datamodel/backend_options.py +102 -0
  39. docling/datamodel/base_models.py +493 -0
  40. docling/datamodel/document.py +699 -0
  41. docling/datamodel/extraction.py +39 -0
  42. docling/datamodel/layout_model_specs.py +91 -0
  43. docling/datamodel/pipeline_options.py +457 -0
  44. docling/datamodel/pipeline_options_asr_model.py +78 -0
  45. docling/datamodel/pipeline_options_vlm_model.py +136 -0
  46. docling/datamodel/settings.py +65 -0
  47. docling/datamodel/vlm_model_specs.py +365 -0
  48. docling/document_converter.py +559 -0
  49. docling/document_extractor.py +327 -0
  50. docling/exceptions.py +10 -0
  51. docling/experimental/__init__.py +5 -0
  52. docling/experimental/datamodel/__init__.py +1 -0
  53. docling/experimental/datamodel/table_crops_layout_options.py +13 -0
  54. docling/experimental/datamodel/threaded_layout_vlm_pipeline_options.py +45 -0
  55. docling/experimental/models/__init__.py +3 -0
  56. docling/experimental/models/table_crops_layout_model.py +114 -0
  57. docling/experimental/pipeline/__init__.py +1 -0
  58. docling/experimental/pipeline/threaded_layout_vlm_pipeline.py +439 -0
  59. docling/models/__init__.py +0 -0
  60. docling/models/base_layout_model.py +39 -0
  61. docling/models/base_model.py +230 -0
  62. docling/models/base_ocr_model.py +241 -0
  63. docling/models/base_table_model.py +45 -0
  64. docling/models/extraction/__init__.py +0 -0
  65. docling/models/extraction/nuextract_transformers_model.py +305 -0
  66. docling/models/factories/__init__.py +47 -0
  67. docling/models/factories/base_factory.py +122 -0
  68. docling/models/factories/layout_factory.py +7 -0
  69. docling/models/factories/ocr_factory.py +11 -0
  70. docling/models/factories/picture_description_factory.py +11 -0
  71. docling/models/factories/table_factory.py +7 -0
  72. docling/models/picture_description_base_model.py +149 -0
  73. docling/models/plugins/__init__.py +0 -0
  74. docling/models/plugins/defaults.py +60 -0
  75. docling/models/stages/__init__.py +0 -0
  76. docling/models/stages/code_formula/__init__.py +0 -0
  77. docling/models/stages/code_formula/code_formula_model.py +342 -0
  78. docling/models/stages/layout/__init__.py +0 -0
  79. docling/models/stages/layout/layout_model.py +249 -0
  80. docling/models/stages/ocr/__init__.py +0 -0
  81. docling/models/stages/ocr/auto_ocr_model.py +132 -0
  82. docling/models/stages/ocr/easyocr_model.py +200 -0
  83. docling/models/stages/ocr/ocr_mac_model.py +145 -0
  84. docling/models/stages/ocr/rapid_ocr_model.py +328 -0
  85. docling/models/stages/ocr/tesseract_ocr_cli_model.py +331 -0
  86. docling/models/stages/ocr/tesseract_ocr_model.py +262 -0
  87. docling/models/stages/page_assemble/__init__.py +0 -0
  88. docling/models/stages/page_assemble/page_assemble_model.py +156 -0
  89. docling/models/stages/page_preprocessing/__init__.py +0 -0
  90. docling/models/stages/page_preprocessing/page_preprocessing_model.py +145 -0
  91. docling/models/stages/picture_classifier/__init__.py +0 -0
  92. docling/models/stages/picture_classifier/document_picture_classifier.py +246 -0
  93. docling/models/stages/picture_description/__init__.py +0 -0
  94. docling/models/stages/picture_description/picture_description_api_model.py +66 -0
  95. docling/models/stages/picture_description/picture_description_vlm_model.py +123 -0
  96. docling/models/stages/reading_order/__init__.py +0 -0
  97. docling/models/stages/reading_order/readingorder_model.py +431 -0
  98. docling/models/stages/table_structure/__init__.py +0 -0
  99. docling/models/stages/table_structure/table_structure_model.py +305 -0
  100. docling/models/utils/__init__.py +0 -0
  101. docling/models/utils/generation_utils.py +157 -0
  102. docling/models/utils/hf_model_download.py +45 -0
  103. docling/models/vlm_pipeline_models/__init__.py +1 -0
  104. docling/models/vlm_pipeline_models/api_vlm_model.py +180 -0
  105. docling/models/vlm_pipeline_models/hf_transformers_model.py +391 -0
  106. docling/models/vlm_pipeline_models/mlx_model.py +325 -0
  107. docling/models/vlm_pipeline_models/vllm_model.py +344 -0
  108. docling/pipeline/__init__.py +0 -0
  109. docling/pipeline/asr_pipeline.py +431 -0
  110. docling/pipeline/base_extraction_pipeline.py +72 -0
  111. docling/pipeline/base_pipeline.py +326 -0
  112. docling/pipeline/extraction_vlm_pipeline.py +207 -0
  113. docling/pipeline/legacy_standard_pdf_pipeline.py +262 -0
  114. docling/pipeline/simple_pipeline.py +55 -0
  115. docling/pipeline/standard_pdf_pipeline.py +859 -0
  116. docling/pipeline/threaded_standard_pdf_pipeline.py +5 -0
  117. docling/pipeline/vlm_pipeline.py +416 -0
  118. docling/py.typed +1 -0
  119. docling/utils/__init__.py +0 -0
  120. docling/utils/accelerator_utils.py +97 -0
  121. docling/utils/api_image_request.py +205 -0
  122. docling/utils/deepseekocr_utils.py +388 -0
  123. docling/utils/export.py +146 -0
  124. docling/utils/glm_utils.py +361 -0
  125. docling/utils/layout_postprocessor.py +683 -0
  126. docling/utils/locks.py +3 -0
  127. docling/utils/model_downloader.py +168 -0
  128. docling/utils/ocr_utils.py +69 -0
  129. docling/utils/orientation.py +65 -0
  130. docling/utils/profiling.py +65 -0
  131. docling/utils/utils.py +65 -0
  132. docling/utils/visualization.py +85 -0
  133. docling-2.69.0.dist-info/METADATA +237 -0
  134. docling-2.69.0.dist-info/RECORD +138 -0
  135. docling-2.69.0.dist-info/WHEEL +5 -0
  136. docling-2.69.0.dist-info/entry_points.txt +6 -0
  137. docling-2.69.0.dist-info/licenses/LICENSE +21 -0
  138. docling-2.69.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,494 @@
1
+ import logging
2
+ from enum import Enum
3
+
4
+ from pydantic import (
5
+ AnyUrl,
6
+ )
7
+
8
+ from docling.datamodel.accelerator_options import AcceleratorDevice
9
+ from docling.datamodel.pipeline_options_asr_model import (
10
+ # AsrResponseFormat,
11
+ # ApiAsrOptions,
12
+ InferenceAsrFramework,
13
+ InlineAsrMlxWhisperOptions,
14
+ InlineAsrNativeWhisperOptions,
15
+ TransformersModelType,
16
+ )
17
+
18
+ _log = logging.getLogger(__name__)
19
+
20
+
21
+ def _get_whisper_tiny_model():
22
+ """
23
+ Get the best Whisper Tiny model for the current hardware.
24
+
25
+ Automatically selects MLX Whisper Tiny for Apple Silicon (MPS) if available,
26
+ otherwise falls back to native Whisper Tiny.
27
+ """
28
+ # Check if MPS is available (Apple Silicon)
29
+ try:
30
+ import torch
31
+
32
+ has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available()
33
+ except ImportError:
34
+ has_mps = False
35
+
36
+ # Check if mlx-whisper is available
37
+ try:
38
+ import mlx_whisper # type: ignore
39
+
40
+ has_mlx_whisper = True
41
+ except ImportError:
42
+ has_mlx_whisper = False
43
+
44
+ # Use MLX Whisper if both MPS and mlx-whisper are available
45
+ if has_mps and has_mlx_whisper:
46
+ return InlineAsrMlxWhisperOptions(
47
+ repo_id="mlx-community/whisper-tiny-mlx",
48
+ inference_framework=InferenceAsrFramework.MLX,
49
+ language="en",
50
+ task="transcribe",
51
+ word_timestamps=True,
52
+ no_speech_threshold=0.6,
53
+ logprob_threshold=-1.0,
54
+ compression_ratio_threshold=2.4,
55
+ )
56
+ else:
57
+ return InlineAsrNativeWhisperOptions(
58
+ repo_id="tiny",
59
+ inference_framework=InferenceAsrFramework.WHISPER,
60
+ verbose=True,
61
+ timestamps=True,
62
+ word_timestamps=True,
63
+ temperature=0.0,
64
+ max_new_tokens=256,
65
+ max_time_chunk=30.0,
66
+ )
67
+
68
+
69
+ # Create the model instance
70
+ WHISPER_TINY = _get_whisper_tiny_model()
71
+
72
+
73
+ def _get_whisper_small_model():
74
+ """
75
+ Get the best Whisper Small model for the current hardware.
76
+
77
+ Automatically selects MLX Whisper Small for Apple Silicon (MPS) if available,
78
+ otherwise falls back to native Whisper Small.
79
+ """
80
+ # Check if MPS is available (Apple Silicon)
81
+ try:
82
+ import torch
83
+
84
+ has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available()
85
+ except ImportError:
86
+ has_mps = False
87
+
88
+ # Check if mlx-whisper is available
89
+ try:
90
+ import mlx_whisper # type: ignore
91
+
92
+ has_mlx_whisper = True
93
+ except ImportError:
94
+ has_mlx_whisper = False
95
+
96
+ # Use MLX Whisper if both MPS and mlx-whisper are available
97
+ if has_mps and has_mlx_whisper:
98
+ return InlineAsrMlxWhisperOptions(
99
+ repo_id="mlx-community/whisper-small-mlx",
100
+ inference_framework=InferenceAsrFramework.MLX,
101
+ language="en",
102
+ task="transcribe",
103
+ word_timestamps=True,
104
+ no_speech_threshold=0.6,
105
+ logprob_threshold=-1.0,
106
+ compression_ratio_threshold=2.4,
107
+ )
108
+ else:
109
+ return InlineAsrNativeWhisperOptions(
110
+ repo_id="small",
111
+ inference_framework=InferenceAsrFramework.WHISPER,
112
+ verbose=True,
113
+ timestamps=True,
114
+ word_timestamps=True,
115
+ temperature=0.0,
116
+ max_new_tokens=256,
117
+ max_time_chunk=30.0,
118
+ )
119
+
120
+
121
+ # Create the model instance
122
+ WHISPER_SMALL = _get_whisper_small_model()
123
+
124
+
125
+ def _get_whisper_medium_model():
126
+ """
127
+ Get the best Whisper Medium model for the current hardware.
128
+
129
+ Automatically selects MLX Whisper Medium for Apple Silicon (MPS) if available,
130
+ otherwise falls back to native Whisper Medium.
131
+ """
132
+ # Check if MPS is available (Apple Silicon)
133
+ try:
134
+ import torch
135
+
136
+ has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available()
137
+ except ImportError:
138
+ has_mps = False
139
+
140
+ # Check if mlx-whisper is available
141
+ try:
142
+ import mlx_whisper # type: ignore
143
+
144
+ has_mlx_whisper = True
145
+ except ImportError:
146
+ has_mlx_whisper = False
147
+
148
+ # Use MLX Whisper if both MPS and mlx-whisper are available
149
+ if has_mps and has_mlx_whisper:
150
+ return InlineAsrMlxWhisperOptions(
151
+ repo_id="mlx-community/whisper-medium-mlx-8bit",
152
+ inference_framework=InferenceAsrFramework.MLX,
153
+ language="en",
154
+ task="transcribe",
155
+ word_timestamps=True,
156
+ no_speech_threshold=0.6,
157
+ logprob_threshold=-1.0,
158
+ compression_ratio_threshold=2.4,
159
+ )
160
+ else:
161
+ return InlineAsrNativeWhisperOptions(
162
+ repo_id="medium",
163
+ inference_framework=InferenceAsrFramework.WHISPER,
164
+ verbose=True,
165
+ timestamps=True,
166
+ word_timestamps=True,
167
+ temperature=0.0,
168
+ max_new_tokens=256,
169
+ max_time_chunk=30.0,
170
+ )
171
+
172
+
173
+ # Create the model instance
174
+ WHISPER_MEDIUM = _get_whisper_medium_model()
175
+
176
+
177
+ def _get_whisper_base_model():
178
+ """
179
+ Get the best Whisper Base model for the current hardware.
180
+
181
+ Automatically selects MLX Whisper Base for Apple Silicon (MPS) if available,
182
+ otherwise falls back to native Whisper Base.
183
+ """
184
+ # Check if MPS is available (Apple Silicon)
185
+ try:
186
+ import torch
187
+
188
+ has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available()
189
+ except ImportError:
190
+ has_mps = False
191
+
192
+ # Check if mlx-whisper is available
193
+ try:
194
+ import mlx_whisper # type: ignore
195
+
196
+ has_mlx_whisper = True
197
+ except ImportError:
198
+ has_mlx_whisper = False
199
+
200
+ # Use MLX Whisper if both MPS and mlx-whisper are available
201
+ if has_mps and has_mlx_whisper:
202
+ return InlineAsrMlxWhisperOptions(
203
+ repo_id="mlx-community/whisper-base-mlx",
204
+ inference_framework=InferenceAsrFramework.MLX,
205
+ language="en",
206
+ task="transcribe",
207
+ word_timestamps=True,
208
+ no_speech_threshold=0.6,
209
+ logprob_threshold=-1.0,
210
+ compression_ratio_threshold=2.4,
211
+ )
212
+ else:
213
+ return InlineAsrNativeWhisperOptions(
214
+ repo_id="base",
215
+ inference_framework=InferenceAsrFramework.WHISPER,
216
+ verbose=True,
217
+ timestamps=True,
218
+ word_timestamps=True,
219
+ temperature=0.0,
220
+ max_new_tokens=256,
221
+ max_time_chunk=30.0,
222
+ )
223
+
224
+
225
+ # Create the model instance
226
+ WHISPER_BASE = _get_whisper_base_model()
227
+
228
+
229
+ def _get_whisper_large_model():
230
+ """
231
+ Get the best Whisper Large model for the current hardware.
232
+
233
+ Automatically selects MLX Whisper Large for Apple Silicon (MPS) if available,
234
+ otherwise falls back to native Whisper Large.
235
+ """
236
+ # Check if MPS is available (Apple Silicon)
237
+ try:
238
+ import torch
239
+
240
+ has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available()
241
+ except ImportError:
242
+ has_mps = False
243
+
244
+ # Check if mlx-whisper is available
245
+ try:
246
+ import mlx_whisper # type: ignore
247
+
248
+ has_mlx_whisper = True
249
+ except ImportError:
250
+ has_mlx_whisper = False
251
+
252
+ # Use MLX Whisper if both MPS and mlx-whisper are available
253
+ if has_mps and has_mlx_whisper:
254
+ return InlineAsrMlxWhisperOptions(
255
+ repo_id="mlx-community/whisper-large-mlx-8bit",
256
+ inference_framework=InferenceAsrFramework.MLX,
257
+ language="en",
258
+ task="transcribe",
259
+ word_timestamps=True,
260
+ no_speech_threshold=0.6,
261
+ logprob_threshold=-1.0,
262
+ compression_ratio_threshold=2.4,
263
+ )
264
+ else:
265
+ return InlineAsrNativeWhisperOptions(
266
+ repo_id="large",
267
+ inference_framework=InferenceAsrFramework.WHISPER,
268
+ verbose=True,
269
+ timestamps=True,
270
+ word_timestamps=True,
271
+ temperature=0.0,
272
+ max_new_tokens=256,
273
+ max_time_chunk=30.0,
274
+ )
275
+
276
+
277
+ # Create the model instance
278
+ WHISPER_LARGE = _get_whisper_large_model()
279
+
280
+
281
+ def _get_whisper_turbo_model():
282
+ """
283
+ Get the best Whisper Turbo model for the current hardware.
284
+
285
+ Automatically selects MLX Whisper Turbo for Apple Silicon (MPS) if available,
286
+ otherwise falls back to native Whisper Turbo.
287
+ """
288
+ # Check if MPS is available (Apple Silicon)
289
+ try:
290
+ import torch
291
+
292
+ has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available()
293
+ except ImportError:
294
+ has_mps = False
295
+
296
+ # Check if mlx-whisper is available
297
+ try:
298
+ import mlx_whisper # type: ignore
299
+
300
+ has_mlx_whisper = True
301
+ except ImportError:
302
+ has_mlx_whisper = False
303
+
304
+ # Use MLX Whisper if both MPS and mlx-whisper are available
305
+ if has_mps and has_mlx_whisper:
306
+ return InlineAsrMlxWhisperOptions(
307
+ repo_id="mlx-community/whisper-turbo",
308
+ inference_framework=InferenceAsrFramework.MLX,
309
+ language="en",
310
+ task="transcribe",
311
+ word_timestamps=True,
312
+ no_speech_threshold=0.6,
313
+ logprob_threshold=-1.0,
314
+ compression_ratio_threshold=2.4,
315
+ )
316
+ else:
317
+ return InlineAsrNativeWhisperOptions(
318
+ repo_id="turbo",
319
+ inference_framework=InferenceAsrFramework.WHISPER,
320
+ verbose=True,
321
+ timestamps=True,
322
+ word_timestamps=True,
323
+ temperature=0.0,
324
+ max_new_tokens=256,
325
+ max_time_chunk=30.0,
326
+ )
327
+
328
+
329
+ # Create the model instance
330
+ WHISPER_TURBO = _get_whisper_turbo_model()
331
+
332
+ # Explicit MLX Whisper model options for users who want to force MLX usage
333
+ WHISPER_TINY_MLX = InlineAsrMlxWhisperOptions(
334
+ repo_id="mlx-community/whisper-tiny-mlx",
335
+ inference_framework=InferenceAsrFramework.MLX,
336
+ language="en",
337
+ task="transcribe",
338
+ word_timestamps=True,
339
+ no_speech_threshold=0.6,
340
+ logprob_threshold=-1.0,
341
+ compression_ratio_threshold=2.4,
342
+ )
343
+
344
+ WHISPER_SMALL_MLX = InlineAsrMlxWhisperOptions(
345
+ repo_id="mlx-community/whisper-small-mlx",
346
+ inference_framework=InferenceAsrFramework.MLX,
347
+ language="en",
348
+ task="transcribe",
349
+ word_timestamps=True,
350
+ no_speech_threshold=0.6,
351
+ logprob_threshold=-1.0,
352
+ compression_ratio_threshold=2.4,
353
+ )
354
+
355
+ WHISPER_MEDIUM_MLX = InlineAsrMlxWhisperOptions(
356
+ repo_id="mlx-community/whisper-medium-mlx-8bit",
357
+ inference_framework=InferenceAsrFramework.MLX,
358
+ language="en",
359
+ task="transcribe",
360
+ word_timestamps=True,
361
+ no_speech_threshold=0.6,
362
+ logprob_threshold=-1.0,
363
+ compression_ratio_threshold=2.4,
364
+ )
365
+
366
+ WHISPER_BASE_MLX = InlineAsrMlxWhisperOptions(
367
+ repo_id="mlx-community/whisper-base-mlx",
368
+ inference_framework=InferenceAsrFramework.MLX,
369
+ language="en",
370
+ task="transcribe",
371
+ word_timestamps=True,
372
+ no_speech_threshold=0.6,
373
+ logprob_threshold=-1.0,
374
+ compression_ratio_threshold=2.4,
375
+ )
376
+
377
+ WHISPER_LARGE_MLX = InlineAsrMlxWhisperOptions(
378
+ repo_id="mlx-community/whisper-large-mlx-8bit",
379
+ inference_framework=InferenceAsrFramework.MLX,
380
+ language="en",
381
+ task="transcribe",
382
+ word_timestamps=True,
383
+ no_speech_threshold=0.6,
384
+ logprob_threshold=-1.0,
385
+ compression_ratio_threshold=2.4,
386
+ )
387
+
388
+ WHISPER_TURBO_MLX = InlineAsrMlxWhisperOptions(
389
+ repo_id="mlx-community/whisper-turbo",
390
+ inference_framework=InferenceAsrFramework.MLX,
391
+ language="en",
392
+ task="transcribe",
393
+ word_timestamps=True,
394
+ no_speech_threshold=0.6,
395
+ logprob_threshold=-1.0,
396
+ compression_ratio_threshold=2.4,
397
+ )
398
+
399
+ # Explicit Native Whisper model options for users who want to force native usage
400
+ WHISPER_TINY_NATIVE = InlineAsrNativeWhisperOptions(
401
+ repo_id="tiny",
402
+ inference_framework=InferenceAsrFramework.WHISPER,
403
+ verbose=True,
404
+ timestamps=True,
405
+ word_timestamps=True,
406
+ temperature=0.0,
407
+ max_new_tokens=256,
408
+ max_time_chunk=30.0,
409
+ )
410
+
411
+ WHISPER_SMALL_NATIVE = InlineAsrNativeWhisperOptions(
412
+ repo_id="small",
413
+ inference_framework=InferenceAsrFramework.WHISPER,
414
+ verbose=True,
415
+ timestamps=True,
416
+ word_timestamps=True,
417
+ temperature=0.0,
418
+ max_new_tokens=256,
419
+ max_time_chunk=30.0,
420
+ )
421
+
422
+ WHISPER_MEDIUM_NATIVE = InlineAsrNativeWhisperOptions(
423
+ repo_id="medium",
424
+ inference_framework=InferenceAsrFramework.WHISPER,
425
+ verbose=True,
426
+ timestamps=True,
427
+ word_timestamps=True,
428
+ temperature=0.0,
429
+ max_new_tokens=256,
430
+ max_time_chunk=30.0,
431
+ )
432
+
433
+ WHISPER_BASE_NATIVE = InlineAsrNativeWhisperOptions(
434
+ repo_id="base",
435
+ inference_framework=InferenceAsrFramework.WHISPER,
436
+ verbose=True,
437
+ timestamps=True,
438
+ word_timestamps=True,
439
+ temperature=0.0,
440
+ max_new_tokens=256,
441
+ max_time_chunk=30.0,
442
+ )
443
+
444
+ WHISPER_LARGE_NATIVE = InlineAsrNativeWhisperOptions(
445
+ repo_id="large",
446
+ inference_framework=InferenceAsrFramework.WHISPER,
447
+ verbose=True,
448
+ timestamps=True,
449
+ word_timestamps=True,
450
+ temperature=0.0,
451
+ max_new_tokens=256,
452
+ max_time_chunk=30.0,
453
+ )
454
+
455
+ WHISPER_TURBO_NATIVE = InlineAsrNativeWhisperOptions(
456
+ repo_id="turbo",
457
+ inference_framework=InferenceAsrFramework.WHISPER,
458
+ verbose=True,
459
+ timestamps=True,
460
+ word_timestamps=True,
461
+ temperature=0.0,
462
+ max_new_tokens=256,
463
+ max_time_chunk=30.0,
464
+ )
465
+
466
+ # Note: The main WHISPER_* models (WHISPER_TURBO, WHISPER_BASE, etc.) automatically
467
+ # select the best implementation (MLX on Apple Silicon, Native elsewhere).
468
+ # Use the explicit _MLX or _NATIVE variants if you need to force a specific implementation.
469
+
470
+
471
+ class AsrModelType(str, Enum):
472
+ # Auto-selecting models (choose best implementation for hardware)
473
+ WHISPER_TINY = "whisper_tiny"
474
+ WHISPER_SMALL = "whisper_small"
475
+ WHISPER_MEDIUM = "whisper_medium"
476
+ WHISPER_BASE = "whisper_base"
477
+ WHISPER_LARGE = "whisper_large"
478
+ WHISPER_TURBO = "whisper_turbo"
479
+
480
+ # Explicit MLX models (force MLX implementation)
481
+ WHISPER_TINY_MLX = "whisper_tiny_mlx"
482
+ WHISPER_SMALL_MLX = "whisper_small_mlx"
483
+ WHISPER_MEDIUM_MLX = "whisper_medium_mlx"
484
+ WHISPER_BASE_MLX = "whisper_base_mlx"
485
+ WHISPER_LARGE_MLX = "whisper_large_mlx"
486
+ WHISPER_TURBO_MLX = "whisper_turbo_mlx"
487
+
488
+ # Explicit Native models (force native implementation)
489
+ WHISPER_TINY_NATIVE = "whisper_tiny_native"
490
+ WHISPER_SMALL_NATIVE = "whisper_small_native"
491
+ WHISPER_MEDIUM_NATIVE = "whisper_medium_native"
492
+ WHISPER_BASE_NATIVE = "whisper_base_native"
493
+ WHISPER_LARGE_NATIVE = "whisper_large_native"
494
+ WHISPER_TURBO_NATIVE = "whisper_turbo_native"
@@ -0,0 +1,102 @@
1
+ from pathlib import PurePath
2
+ from typing import Annotated, Literal, Optional, Union
3
+
4
+ from pydantic import AnyUrl, BaseModel, Field, SecretStr
5
+
6
+
7
+ class BaseBackendOptions(BaseModel):
8
+ """Common options for all declarative document backends."""
9
+
10
+ enable_remote_fetch: bool = Field(
11
+ False, description="Enable remote resource fetching."
12
+ )
13
+ enable_local_fetch: bool = Field(
14
+ False, description="Enable local resource fetching."
15
+ )
16
+
17
+
18
+ class DeclarativeBackendOptions(BaseBackendOptions):
19
+ """Default backend options for a declarative document backend."""
20
+
21
+ kind: Literal["declarative"] = Field("declarative", exclude=True, repr=False)
22
+
23
+
24
+ class HTMLBackendOptions(BaseBackendOptions):
25
+ """Options specific to the HTML backend.
26
+
27
+ This class can be extended to include options specific to HTML processing.
28
+ """
29
+
30
+ kind: Literal["html"] = Field("html", exclude=True, repr=False)
31
+ fetch_images: bool = Field(
32
+ False,
33
+ description=(
34
+ "Whether the backend should access remote or local resources to parse "
35
+ "images in an HTML document."
36
+ ),
37
+ )
38
+ source_uri: Optional[Union[AnyUrl, PurePath]] = Field(
39
+ None,
40
+ description=(
41
+ "The URI that originates the HTML document. If provided, the backend "
42
+ "will use it to resolve relative paths in the HTML document."
43
+ ),
44
+ )
45
+ add_title: bool = Field(
46
+ True, description="Add the HTML title tag as furniture in the DoclingDocument."
47
+ )
48
+ infer_furniture: bool = Field(
49
+ True, description="Infer all the content before the first header as furniture."
50
+ )
51
+
52
+
53
+ class MarkdownBackendOptions(BaseBackendOptions):
54
+ """Options specific to the Markdown backend."""
55
+
56
+ kind: Literal["md"] = Field("md", exclude=True, repr=False)
57
+ fetch_images: bool = Field(
58
+ False,
59
+ description=(
60
+ "Whether the backend should access remote or local resources to parse "
61
+ "images in the markdown document."
62
+ ),
63
+ )
64
+ source_uri: Optional[Union[AnyUrl, PurePath]] = Field(
65
+ None,
66
+ description=(
67
+ "The URI that originates the markdown document. If provided, the backend "
68
+ "will use it to resolve relative paths in the markdown document."
69
+ ),
70
+ )
71
+
72
+
73
+ class PdfBackendOptions(BaseBackendOptions):
74
+ """Backend options for pdf document backends."""
75
+
76
+ kind: Literal["pdf"] = Field("pdf", exclude=True, repr=False)
77
+ password: Optional[SecretStr] = None
78
+
79
+
80
+ class MsExcelBackendOptions(BaseBackendOptions):
81
+ """Options specific to the MS Excel backend."""
82
+
83
+ kind: Literal["xlsx"] = Field("xlsx", exclude=True, repr=False)
84
+ treat_singleton_as_text: bool = Field(
85
+ False,
86
+ description=(
87
+ "Whether to treat singleton cells (1x1 tables with empty neighboring "
88
+ "cells) as TextItem instead of TableItem."
89
+ ),
90
+ )
91
+
92
+
93
+ BackendOptions = Annotated[
94
+ Union[
95
+ DeclarativeBackendOptions,
96
+ HTMLBackendOptions,
97
+ MarkdownBackendOptions,
98
+ PdfBackendOptions,
99
+ MsExcelBackendOptions,
100
+ ],
101
+ Field(discriminator="kind"),
102
+ ]