docling 2.56.1__py3-none-any.whl → 2.58.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling might be problematic. Click here for more details.

@@ -4,7 +4,7 @@ import re
4
4
  import tempfile
5
5
  from io import BytesIO
6
6
  from pathlib import Path
7
- from typing import List, Optional, Union, cast
7
+ from typing import TYPE_CHECKING, List, Optional, Union, cast
8
8
 
9
9
  from docling_core.types.doc import DoclingDocument, DocumentOrigin
10
10
 
@@ -32,6 +32,7 @@ from docling.datamodel.pipeline_options import (
32
32
  AsrPipelineOptions,
33
33
  )
34
34
  from docling.datamodel.pipeline_options_asr_model import (
35
+ InlineAsrMlxWhisperOptions,
35
36
  InlineAsrNativeWhisperOptions,
36
37
  # AsrResponseFormat,
37
38
  InlineAsrOptions,
@@ -228,22 +229,157 @@ class _NativeWhisperModel:
228
229
  return convo
229
230
 
230
231
 
232
+ class _MlxWhisperModel:
233
+ def __init__(
234
+ self,
235
+ enabled: bool,
236
+ artifacts_path: Optional[Path],
237
+ accelerator_options: AcceleratorOptions,
238
+ asr_options: InlineAsrMlxWhisperOptions,
239
+ ):
240
+ """
241
+ Transcriber using MLX Whisper for Apple Silicon optimization.
242
+ """
243
+ self.enabled = enabled
244
+
245
+ _log.info(f"artifacts-path: {artifacts_path}")
246
+ _log.info(f"accelerator_options: {accelerator_options}")
247
+
248
+ if self.enabled:
249
+ try:
250
+ import mlx_whisper # type: ignore
251
+ except ImportError:
252
+ raise ImportError(
253
+ "mlx-whisper is not installed. Please install it via `pip install mlx-whisper` or do `uv sync --extra asr`."
254
+ )
255
+ self.asr_options = asr_options
256
+ self.mlx_whisper = mlx_whisper
257
+
258
+ self.device = decide_device(
259
+ accelerator_options.device,
260
+ supported_devices=asr_options.supported_devices,
261
+ )
262
+ _log.info(f"Available device for MLX Whisper: {self.device}")
263
+
264
+ self.model_name = asr_options.repo_id
265
+ _log.info(f"loading _MlxWhisperModel({self.model_name})")
266
+
267
+ # MLX Whisper models are loaded differently - they use HuggingFace repos
268
+ self.model_path = self.model_name
269
+
270
+ # Store MLX-specific options
271
+ self.language = asr_options.language
272
+ self.task = asr_options.task
273
+ self.word_timestamps = asr_options.word_timestamps
274
+ self.no_speech_threshold = asr_options.no_speech_threshold
275
+ self.logprob_threshold = asr_options.logprob_threshold
276
+ self.compression_ratio_threshold = asr_options.compression_ratio_threshold
277
+
278
+ def run(self, conv_res: ConversionResult) -> ConversionResult:
279
+ audio_path: Path = Path(conv_res.input.file).resolve()
280
+
281
+ try:
282
+ conversation = self.transcribe(audio_path)
283
+
284
+ # Ensure we have a proper DoclingDocument
285
+ origin = DocumentOrigin(
286
+ filename=conv_res.input.file.name or "audio.wav",
287
+ mimetype="audio/x-wav",
288
+ binary_hash=conv_res.input.document_hash,
289
+ )
290
+ conv_res.document = DoclingDocument(
291
+ name=conv_res.input.file.stem or "audio.wav", origin=origin
292
+ )
293
+
294
+ for citem in conversation:
295
+ conv_res.document.add_text(
296
+ label=DocItemLabel.TEXT, text=citem.to_string()
297
+ )
298
+
299
+ conv_res.status = ConversionStatus.SUCCESS
300
+ return conv_res
301
+
302
+ except Exception as exc:
303
+ _log.error(f"MLX Audio transcription has an error: {exc}")
304
+
305
+ conv_res.status = ConversionStatus.FAILURE
306
+ return conv_res
307
+
308
+ def transcribe(self, fpath: Path) -> list[_ConversationItem]:
309
+ """
310
+ Transcribe audio using MLX Whisper.
311
+
312
+ Args:
313
+ fpath: Path to audio file
314
+
315
+ Returns:
316
+ List of conversation items with timestamps
317
+ """
318
+ result = self.mlx_whisper.transcribe(
319
+ str(fpath),
320
+ path_or_hf_repo=self.model_path,
321
+ language=self.language,
322
+ task=self.task,
323
+ word_timestamps=self.word_timestamps,
324
+ no_speech_threshold=self.no_speech_threshold,
325
+ logprob_threshold=self.logprob_threshold,
326
+ compression_ratio_threshold=self.compression_ratio_threshold,
327
+ )
328
+
329
+ convo: list[_ConversationItem] = []
330
+
331
+ # MLX Whisper returns segments similar to native Whisper
332
+ for segment in result.get("segments", []):
333
+ item = _ConversationItem(
334
+ start_time=segment.get("start"),
335
+ end_time=segment.get("end"),
336
+ text=segment.get("text", "").strip(),
337
+ words=[],
338
+ )
339
+
340
+ # Add word-level timestamps if available
341
+ if self.word_timestamps and "words" in segment:
342
+ item.words = []
343
+ for word_data in segment["words"]:
344
+ item.words.append(
345
+ _ConversationWord(
346
+ start_time=word_data.get("start"),
347
+ end_time=word_data.get("end"),
348
+ text=word_data.get("word", ""),
349
+ )
350
+ )
351
+ convo.append(item)
352
+
353
+ return convo
354
+
355
+
231
356
  class AsrPipeline(BasePipeline):
232
357
  def __init__(self, pipeline_options: AsrPipelineOptions):
233
358
  super().__init__(pipeline_options)
234
359
  self.keep_backend = True
235
360
 
236
361
  self.pipeline_options: AsrPipelineOptions = pipeline_options
362
+ self._model: Union[_NativeWhisperModel, _MlxWhisperModel]
237
363
 
238
364
  if isinstance(self.pipeline_options.asr_options, InlineAsrNativeWhisperOptions):
239
- asr_options: InlineAsrNativeWhisperOptions = (
365
+ native_asr_options: InlineAsrNativeWhisperOptions = (
240
366
  self.pipeline_options.asr_options
241
367
  )
242
368
  self._model = _NativeWhisperModel(
243
369
  enabled=True, # must be always enabled for this pipeline to make sense.
244
370
  artifacts_path=self.artifacts_path,
245
371
  accelerator_options=pipeline_options.accelerator_options,
246
- asr_options=asr_options,
372
+ asr_options=native_asr_options,
373
+ )
374
+ elif isinstance(self.pipeline_options.asr_options, InlineAsrMlxWhisperOptions):
375
+ mlx_asr_options: InlineAsrMlxWhisperOptions = (
376
+ self.pipeline_options.asr_options
377
+ )
378
+ self._model = _MlxWhisperModel(
379
+ enabled=True, # must be always enabled for this pipeline to make sense.
380
+ artifacts_path=self.artifacts_path,
381
+ accelerator_options=pipeline_options.accelerator_options,
382
+ asr_options=mlx_asr_options,
247
383
  )
248
384
  else:
249
385
  _log.error(f"No model support for {self.pipeline_options.asr_options}")
@@ -6,6 +6,7 @@ from typing import List, Optional, Union, cast
6
6
 
7
7
  from docling_core.types.doc import (
8
8
  BoundingBox,
9
+ ContentLayer,
9
10
  DocItem,
10
11
  DoclingDocument,
11
12
  ImageRef,
@@ -251,9 +252,9 @@ class VlmPipeline(PaginatedPipeline):
251
252
  # No code blocks found, return original text
252
253
  return text
253
254
 
254
- for pg_idx, page in enumerate(conv_res.pages):
255
- page_no = pg_idx + 1 # FIXME: might be incorrect
255
+ page_docs = []
256
256
 
257
+ for pg_idx, page in enumerate(conv_res.pages):
257
258
  predicted_text = ""
258
259
  if page.predictions.vlm_response:
259
260
  predicted_text = page.predictions.vlm_response.text + "\n\n"
@@ -273,6 +274,24 @@ class VlmPipeline(PaginatedPipeline):
273
274
  )
274
275
  page_doc = backend.convert()
275
276
 
277
+ # Modify provenance in place for all items in the page document
278
+ for item, level in page_doc.iterate_items(
279
+ with_groups=True,
280
+ traverse_pictures=True,
281
+ included_content_layers=set(ContentLayer),
282
+ ):
283
+ if isinstance(item, DocItem):
284
+ item.prov = [
285
+ ProvenanceItem(
286
+ page_no=pg_idx + 1,
287
+ bbox=BoundingBox(
288
+ t=0.0, b=0.0, l=0.0, r=0.0
289
+ ), # FIXME: would be nice not to have to "fake" it
290
+ charspan=[0, 0],
291
+ )
292
+ ]
293
+
294
+ # Add page metadata to the page document before concatenation
276
295
  if page.image is not None:
277
296
  pg_width = page.image.width
278
297
  pg_height = page.image.height
@@ -280,27 +299,18 @@ class VlmPipeline(PaginatedPipeline):
280
299
  pg_width = 1
281
300
  pg_height = 1
282
301
 
283
- conv_res.document.add_page(
284
- page_no=page_no,
302
+ page_doc.add_page(
303
+ page_no=pg_idx + 1,
285
304
  size=Size(width=pg_width, height=pg_height),
286
305
  image=ImageRef.from_pil(image=page.image, dpi=72)
287
306
  if page.image
288
307
  else None,
289
308
  )
290
309
 
291
- for item, level in page_doc.iterate_items():
292
- item.prov = [
293
- ProvenanceItem(
294
- page_no=pg_idx + 1,
295
- bbox=BoundingBox(
296
- t=0.0, b=0.0, l=0.0, r=0.0
297
- ), # FIXME: would be nice not to have to "fake" it
298
- charspan=[0, 0],
299
- )
300
- ]
301
- conv_res.document.append_child_item(child=item)
310
+ page_docs.append(page_doc)
302
311
 
303
- return conv_res.document
312
+ final_doc = DoclingDocument.concatenate(docs=page_docs)
313
+ return final_doc
304
314
 
305
315
  def _turn_html_into_doc(self, conv_res):
306
316
  def _extract_html_code(text):
@@ -328,9 +338,9 @@ class VlmPipeline(PaginatedPipeline):
328
338
  # No code blocks found, return original text
329
339
  return text
330
340
 
331
- for pg_idx, page in enumerate(conv_res.pages):
332
- page_no = pg_idx + 1 # FIXME: might be incorrect
341
+ page_docs = []
333
342
 
343
+ for pg_idx, page in enumerate(conv_res.pages):
334
344
  predicted_text = ""
335
345
  if page.predictions.vlm_response:
336
346
  predicted_text = page.predictions.vlm_response.text + "\n\n"
@@ -341,7 +351,7 @@ class VlmPipeline(PaginatedPipeline):
341
351
  out_doc = InputDocument(
342
352
  path_or_stream=response_bytes,
343
353
  filename=conv_res.input.file.name,
344
- format=InputFormat.MD,
354
+ format=InputFormat.HTML,
345
355
  backend=HTMLDocumentBackend,
346
356
  )
347
357
  backend = HTMLDocumentBackend(
@@ -350,6 +360,24 @@ class VlmPipeline(PaginatedPipeline):
350
360
  )
351
361
  page_doc = backend.convert()
352
362
 
363
+ # Modify provenance in place for all items in the page document
364
+ for item, level in page_doc.iterate_items(
365
+ with_groups=True,
366
+ traverse_pictures=True,
367
+ included_content_layers=set(ContentLayer),
368
+ ):
369
+ if isinstance(item, DocItem):
370
+ item.prov = [
371
+ ProvenanceItem(
372
+ page_no=pg_idx + 1,
373
+ bbox=BoundingBox(
374
+ t=0.0, b=0.0, l=0.0, r=0.0
375
+ ), # FIXME: would be nice not to have to "fake" it
376
+ charspan=[0, 0],
377
+ )
378
+ ]
379
+
380
+ # Add page metadata to the page document before concatenation
353
381
  if page.image is not None:
354
382
  pg_width = page.image.width
355
383
  pg_height = page.image.height
@@ -357,27 +385,19 @@ class VlmPipeline(PaginatedPipeline):
357
385
  pg_width = 1
358
386
  pg_height = 1
359
387
 
360
- conv_res.document.add_page(
361
- page_no=page_no,
388
+ page_doc.add_page(
389
+ page_no=pg_idx + 1,
362
390
  size=Size(width=pg_width, height=pg_height),
363
391
  image=ImageRef.from_pil(image=page.image, dpi=72)
364
392
  if page.image
365
393
  else None,
366
394
  )
367
395
 
368
- for item, level in page_doc.iterate_items():
369
- item.prov = [
370
- ProvenanceItem(
371
- page_no=pg_idx + 1,
372
- bbox=BoundingBox(
373
- t=0.0, b=0.0, l=0.0, r=0.0
374
- ), # FIXME: would be nice not to have to "fake" it
375
- charspan=[0, 0],
376
- )
377
- ]
378
- conv_res.document.append_child_item(child=item)
396
+ page_docs.append(page_doc)
379
397
 
380
- return conv_res.document
398
+ # Concatenate all page documents to preserve hierarchy
399
+ final_doc = DoclingDocument.concatenate(docs=page_docs)
400
+ return final_doc
381
401
 
382
402
  @classmethod
383
403
  def get_default_options(cls) -> VlmPipelineOptions:
@@ -2,7 +2,7 @@ import base64
2
2
  import json
3
3
  import logging
4
4
  from io import BytesIO
5
- from typing import Dict, List, Optional
5
+ from typing import Optional
6
6
 
7
7
  import requests
8
8
  from PIL import Image
@@ -19,7 +19,7 @@ def api_image_request(
19
19
  prompt: str,
20
20
  url: AnyUrl,
21
21
  timeout: float = 20,
22
- headers: Optional[Dict[str, str]] = None,
22
+ headers: Optional[dict[str, str]] = None,
23
23
  **params,
24
24
  ) -> str:
25
25
  img_io = BytesIO()
@@ -69,8 +69,8 @@ def api_image_request_streaming(
69
69
  url: AnyUrl,
70
70
  *,
71
71
  timeout: float = 20,
72
- headers: Optional[Dict[str, str]] = None,
73
- generation_stoppers: List[GenerationStopper] = [],
72
+ headers: Optional[dict[str, str]] = None,
73
+ generation_stoppers: list[GenerationStopper] = [],
74
74
  **params,
75
75
  ) -> str:
76
76
  """
@@ -2,7 +2,6 @@ import bisect
2
2
  import logging
3
3
  import sys
4
4
  from collections import defaultdict
5
- from typing import Dict, List, Set, Tuple
6
5
 
7
6
  from docling_core.types.doc import DocItemLabel, Size
8
7
  from docling_core.types.doc.page import TextCell
@@ -39,7 +38,7 @@ class UnionFind:
39
38
  self.parent[root_y] = root_x
40
39
  self.rank[root_x] += 1
41
40
 
42
- def get_groups(self) -> Dict[int, List[int]]:
41
+ def get_groups(self) -> dict[int, list[int]]:
43
42
  """Returns groups as {root: [elements]}."""
44
43
  groups = defaultdict(list)
45
44
  for elem in self.parent:
@@ -50,13 +49,13 @@ class UnionFind:
50
49
  class SpatialClusterIndex:
51
50
  """Efficient spatial indexing for clusters using R-tree and interval trees."""
52
51
 
53
- def __init__(self, clusters: List[Cluster]):
52
+ def __init__(self, clusters: list[Cluster]):
54
53
  p = index.Property()
55
54
  p.dimension = 2
56
55
  self.spatial_index = index.Index(properties=p)
57
56
  self.x_intervals = IntervalTree()
58
57
  self.y_intervals = IntervalTree()
59
- self.clusters_by_id: Dict[int, Cluster] = {}
58
+ self.clusters_by_id: dict[int, Cluster] = {}
60
59
 
61
60
  for cluster in clusters:
62
61
  self.add_cluster(cluster)
@@ -72,7 +71,7 @@ class SpatialClusterIndex:
72
71
  self.spatial_index.delete(cluster.id, cluster.bbox.as_tuple())
73
72
  del self.clusters_by_id[cluster.id]
74
73
 
75
- def find_candidates(self, bbox: BoundingBox) -> Set[int]:
74
+ def find_candidates(self, bbox: BoundingBox) -> set[int]:
76
75
  """Find potential overlapping cluster IDs using all indexes."""
77
76
  spatial = set(self.spatial_index.intersection(bbox.as_tuple()))
78
77
  x_candidates = self.x_intervals.find_containing(
@@ -123,13 +122,13 @@ class IntervalTree:
123
122
  """Memory-efficient interval tree for 1D overlap queries."""
124
123
 
125
124
  def __init__(self):
126
- self.intervals: List[Interval] = [] # Sorted by min_val
125
+ self.intervals: list[Interval] = [] # Sorted by min_val
127
126
 
128
127
  def insert(self, min_val: float, max_val: float, id: int):
129
128
  interval = Interval(min_val, max_val, id)
130
129
  bisect.insort(self.intervals, interval)
131
130
 
132
- def find_containing(self, point: float) -> Set[int]:
131
+ def find_containing(self, point: float) -> set[int]:
133
132
  """Find all intervals containing the point."""
134
133
  pos = bisect.bisect_left(self.intervals, point)
135
134
  result = set()
@@ -196,7 +195,7 @@ class LayoutPostprocessor:
196
195
  }
197
196
 
198
197
  def __init__(
199
- self, page: Page, clusters: List[Cluster], options: LayoutOptions
198
+ self, page: Page, clusters: list[Cluster], options: LayoutOptions
200
199
  ) -> None:
201
200
  """Initialize processor with page and clusters."""
202
201
 
@@ -219,7 +218,7 @@ class LayoutPostprocessor:
219
218
  [c for c in self.special_clusters if c.label in self.WRAPPER_TYPES]
220
219
  )
221
220
 
222
- def postprocess(self) -> Tuple[List[Cluster], List[TextCell]]:
221
+ def postprocess(self) -> tuple[list[Cluster], list[TextCell]]:
223
222
  """Main processing pipeline."""
224
223
  self.regular_clusters = self._process_regular_clusters()
225
224
  self.special_clusters = self._process_special_clusters()
@@ -254,7 +253,7 @@ class LayoutPostprocessor:
254
253
 
255
254
  return final_clusters, self.cells
256
255
 
257
- def _process_regular_clusters(self) -> List[Cluster]:
256
+ def _process_regular_clusters(self) -> list[Cluster]:
258
257
  """Process regular clusters with iterative refinement."""
259
258
  clusters = [
260
259
  c
@@ -311,7 +310,7 @@ class LayoutPostprocessor:
311
310
 
312
311
  return clusters
313
312
 
314
- def _process_special_clusters(self) -> List[Cluster]:
313
+ def _process_special_clusters(self) -> list[Cluster]:
315
314
  special_clusters = [
316
315
  c
317
316
  for c in self.special_clusters
@@ -381,7 +380,7 @@ class LayoutPostprocessor:
381
380
 
382
381
  return picture_clusters + wrapper_clusters
383
382
 
384
- def _handle_cross_type_overlaps(self, special_clusters) -> List[Cluster]:
383
+ def _handle_cross_type_overlaps(self, special_clusters) -> list[Cluster]:
385
384
  """Handle overlaps between regular and wrapper clusters before child assignment.
386
385
 
387
386
  In particular, KEY_VALUE_REGION proposals that are almost identical to a TABLE
@@ -454,7 +453,7 @@ class LayoutPostprocessor:
454
453
 
455
454
  def _select_best_cluster_from_group(
456
455
  self,
457
- group_clusters: List[Cluster],
456
+ group_clusters: list[Cluster],
458
457
  params: dict,
459
458
  ) -> Cluster:
460
459
  """Select best cluster from a group of overlapping clusters based on all rules."""
@@ -487,11 +486,11 @@ class LayoutPostprocessor:
487
486
 
488
487
  def _remove_overlapping_clusters(
489
488
  self,
490
- clusters: List[Cluster],
489
+ clusters: list[Cluster],
491
490
  cluster_type: str,
492
491
  overlap_threshold: float = 0.8,
493
492
  containment_threshold: float = 0.8,
494
- ) -> List[Cluster]:
493
+ ) -> list[Cluster]:
495
494
  if not clusters:
496
495
  return []
497
496
 
@@ -544,7 +543,7 @@ class LayoutPostprocessor:
544
543
 
545
544
  def _select_best_cluster(
546
545
  self,
547
- clusters: List[Cluster],
546
+ clusters: list[Cluster],
548
547
  area_threshold: float,
549
548
  conf_threshold: float,
550
549
  ) -> Cluster:
@@ -572,7 +571,7 @@ class LayoutPostprocessor:
572
571
 
573
572
  return current_best if current_best else clusters[0]
574
573
 
575
- def _deduplicate_cells(self, cells: List[TextCell]) -> List[TextCell]:
574
+ def _deduplicate_cells(self, cells: list[TextCell]) -> list[TextCell]:
576
575
  """Ensure each cell appears only once, maintaining order of first appearance."""
577
576
  seen_ids = set()
578
577
  unique_cells = []
@@ -583,8 +582,8 @@ class LayoutPostprocessor:
583
582
  return unique_cells
584
583
 
585
584
  def _assign_cells_to_clusters(
586
- self, clusters: List[Cluster], min_overlap: float = 0.2
587
- ) -> List[Cluster]:
585
+ self, clusters: list[Cluster], min_overlap: float = 0.2
586
+ ) -> list[Cluster]:
588
587
  """Assign cells to best overlapping cluster."""
589
588
  for cluster in clusters:
590
589
  cluster.cells = []
@@ -616,7 +615,7 @@ class LayoutPostprocessor:
616
615
 
617
616
  return clusters
618
617
 
619
- def _find_unassigned_cells(self, clusters: List[Cluster]) -> List[TextCell]:
618
+ def _find_unassigned_cells(self, clusters: list[Cluster]) -> list[TextCell]:
620
619
  """Find cells not assigned to any cluster."""
621
620
  assigned = {cell.index for cluster in clusters for cell in cluster.cells}
622
621
  return [
@@ -625,7 +624,7 @@ class LayoutPostprocessor:
625
624
  if cell.index not in assigned and cell.text.strip()
626
625
  ]
627
626
 
628
- def _adjust_cluster_bboxes(self, clusters: List[Cluster]) -> List[Cluster]:
627
+ def _adjust_cluster_bboxes(self, clusters: list[Cluster]) -> list[Cluster]:
629
628
  """Adjust cluster bounding boxes to contain their cells."""
630
629
  for cluster in clusters:
631
630
  if not cluster.cells:
@@ -651,13 +650,13 @@ class LayoutPostprocessor:
651
650
 
652
651
  return clusters
653
652
 
654
- def _sort_cells(self, cells: List[TextCell]) -> List[TextCell]:
653
+ def _sort_cells(self, cells: list[TextCell]) -> list[TextCell]:
655
654
  """Sort cells in native reading order."""
656
655
  return sorted(cells, key=lambda c: (c.index))
657
656
 
658
657
  def _sort_clusters(
659
- self, clusters: List[Cluster], mode: str = "id"
660
- ) -> List[Cluster]:
658
+ self, clusters: list[Cluster], mode: str = "id"
659
+ ) -> list[Cluster]:
661
660
  """Sort clusters in reading order (top-to-bottom, left-to-right)."""
662
661
  if mode == "id": # sort in the order the cells are printed in the PDF.
663
662
  return sorted(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling
3
- Version: 2.56.1
3
+ Version: 2.58.0
4
4
  Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
5
5
  Author-email: Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Nikos Livathinos <nli@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  License-Expression: MIT
@@ -27,7 +27,7 @@ Description-Content-Type: text/markdown
27
27
  License-File: LICENSE
28
28
  Requires-Dist: pydantic<3.0.0,>=2.0.0
29
29
  Requires-Dist: docling-core[chunking]<3.0.0,>=2.48.2
30
- Requires-Dist: docling-parse<5.0.0,>=4.4.0
30
+ Requires-Dist: docling-parse<5.0.0,>=4.7.0
31
31
  Requires-Dist: docling-ibm-models<4,>=3.9.1
32
32
  Requires-Dist: filetype<2.0.0,>=1.2.0
33
33
  Requires-Dist: pypdfium2!=4.30.1,<5.0.0,>=4.30.0
@@ -69,6 +69,7 @@ Provides-Extra: rapidocr
69
69
  Requires-Dist: rapidocr<4.0.0,>=3.3; python_version < "3.14" and extra == "rapidocr"
70
70
  Requires-Dist: onnxruntime<2.0.0,>=1.7.0; extra == "rapidocr"
71
71
  Provides-Extra: asr
72
+ Requires-Dist: mlx-whisper>=0.4.3; (python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "asr"
72
73
  Requires-Dist: openai-whisper>=20250625; extra == "asr"
73
74
  Dynamic: license-file
74
75
 
@@ -96,6 +97,7 @@ Dynamic: license-file
96
97
  [![PyPI Downloads](https://static.pepy.tech/badge/docling/month)](https://pepy.tech/projects/docling)
97
98
  [![Docling Actor](https://apify.com/actor-badge?actor=vancura/docling?fpr=docling)](https://apify.com/vancura/docling)
98
99
  [![Chat with Dosu](https://dosu.dev/dosu-chat-badge.svg)](https://app.dosu.dev/097760a8-135e-4789-8234-90c8837d7f1c/ask?utm_source=github)
100
+ [![Discord](https://img.shields.io/discord/1399788921306746971?color=6A7EC2&logo=discord&logoColor=ffffff)](https://docling.ai/discord)
99
101
  [![OpenSSF Best Practices](https://www.bestpractices.dev/projects/10101/badge)](https://www.bestpractices.dev/projects/10101)
100
102
  [![LF AI & Data](https://img.shields.io/badge/LF%20AI%20%26%20Data-003778?logo=linuxfoundation&logoColor=fff&color=0094ff&labelColor=003778)](https://lfaidata.foundation/projects/)
101
103