docling 2.37.0__py3-none-any.whl → 2.38.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,253 @@
1
+ import logging
2
+ import os
3
+ import re
4
+ from io import BytesIO
5
+ from pathlib import Path
6
+ from typing import List, Optional, Union, cast
7
+
8
+ from docling_core.types.doc import DoclingDocument, DocumentOrigin
9
+
10
+ # import whisper # type: ignore
11
+ # import librosa
12
+ # import numpy as np
13
+ # import soundfile as sf # type: ignore
14
+ from docling_core.types.doc.labels import DocItemLabel
15
+ from pydantic import BaseModel, Field, validator
16
+
17
+ from docling.backend.abstract_backend import AbstractDocumentBackend
18
+ from docling.backend.noop_backend import NoOpBackend
19
+
20
+ # from pydub import AudioSegment # type: ignore
21
+ # from transformers import WhisperForConditionalGeneration, WhisperProcessor, pipeline
22
+ from docling.datamodel.accelerator_options import (
23
+ AcceleratorOptions,
24
+ )
25
+ from docling.datamodel.base_models import (
26
+ ConversionStatus,
27
+ FormatToMimeType,
28
+ )
29
+ from docling.datamodel.document import ConversionResult, InputDocument
30
+ from docling.datamodel.pipeline_options import (
31
+ AsrPipelineOptions,
32
+ )
33
+ from docling.datamodel.pipeline_options_asr_model import (
34
+ InlineAsrNativeWhisperOptions,
35
+ # AsrResponseFormat,
36
+ InlineAsrOptions,
37
+ )
38
+ from docling.datamodel.pipeline_options_vlm_model import (
39
+ InferenceFramework,
40
+ )
41
+ from docling.datamodel.settings import settings
42
+ from docling.pipeline.base_pipeline import BasePipeline
43
+ from docling.utils.accelerator_utils import decide_device
44
+ from docling.utils.profiling import ProfilingScope, TimeRecorder
45
+
46
+ _log = logging.getLogger(__name__)
47
+
48
+
49
+ class _ConversationWord(BaseModel):
50
+ text: str
51
+ start_time: Optional[float] = Field(
52
+ None, description="Start time in seconds from video start"
53
+ )
54
+ end_time: Optional[float] = Field(
55
+ None, ge=0, description="End time in seconds from video start"
56
+ )
57
+
58
+
59
+ class _ConversationItem(BaseModel):
60
+ text: str
61
+ start_time: Optional[float] = Field(
62
+ None, description="Start time in seconds from video start"
63
+ )
64
+ end_time: Optional[float] = Field(
65
+ None, ge=0, description="End time in seconds from video start"
66
+ )
67
+ speaker_id: Optional[int] = Field(None, description="Numeric speaker identifier")
68
+ speaker: Optional[str] = Field(
69
+ None, description="Speaker name, defaults to speaker-{speaker_id}"
70
+ )
71
+ words: Optional[list[_ConversationWord]] = Field(
72
+ None, description="Individual words with time-stamps"
73
+ )
74
+
75
+ def __lt__(self, other):
76
+ if not isinstance(other, _ConversationItem):
77
+ return NotImplemented
78
+ return self.start_time < other.start_time
79
+
80
+ def __eq__(self, other):
81
+ if not isinstance(other, _ConversationItem):
82
+ return NotImplemented
83
+ return self.start_time == other.start_time
84
+
85
+ def to_string(self) -> str:
86
+ """Format the conversation entry as a string"""
87
+ result = ""
88
+ if (self.start_time is not None) and (self.end_time is not None):
89
+ result += f"[time: {self.start_time}-{self.end_time}] "
90
+
91
+ if self.speaker is not None:
92
+ result += f"[speaker:{self.speaker}] "
93
+
94
+ result += self.text
95
+ return result
96
+
97
+
98
+ class _NativeWhisperModel:
99
+ def __init__(
100
+ self,
101
+ enabled: bool,
102
+ artifacts_path: Optional[Path],
103
+ accelerator_options: AcceleratorOptions,
104
+ asr_options: InlineAsrNativeWhisperOptions,
105
+ ):
106
+ """
107
+ Transcriber using native Whisper.
108
+ """
109
+ self.enabled = enabled
110
+
111
+ _log.info(f"artifacts-path: {artifacts_path}")
112
+ _log.info(f"accelerator_options: {accelerator_options}")
113
+
114
+ if self.enabled:
115
+ try:
116
+ import whisper # type: ignore
117
+ except ImportError:
118
+ raise ImportError(
119
+ "whisper is not installed. Please install it via `pip install openai-whisper` or do `uv sync --extra asr`."
120
+ )
121
+ self.asr_options = asr_options
122
+ self.max_tokens = asr_options.max_new_tokens
123
+ self.temperature = asr_options.temperature
124
+
125
+ self.device = decide_device(
126
+ accelerator_options.device,
127
+ supported_devices=asr_options.supported_devices,
128
+ )
129
+ _log.info(f"Available device for Whisper: {self.device}")
130
+
131
+ self.model_name = asr_options.repo_id
132
+ _log.info(f"loading _NativeWhisperModel({self.model_name})")
133
+ if artifacts_path is not None:
134
+ _log.info(f"loading {self.model_name} from {artifacts_path}")
135
+ self.model = whisper.load_model(
136
+ name=self.model_name,
137
+ device=self.device,
138
+ download_root=str(artifacts_path),
139
+ )
140
+ else:
141
+ self.model = whisper.load_model(
142
+ name=self.model_name, device=self.device
143
+ )
144
+
145
+ self.verbose = asr_options.verbose
146
+ self.timestamps = asr_options.timestamps
147
+ self.word_timestamps = asr_options.word_timestamps
148
+
149
+ def run(self, conv_res: ConversionResult) -> ConversionResult:
150
+ audio_path: Path = Path(conv_res.input.file).resolve()
151
+
152
+ try:
153
+ conversation = self.transcribe(audio_path)
154
+
155
+ # Ensure we have a proper DoclingDocument
156
+ origin = DocumentOrigin(
157
+ filename=conv_res.input.file.name or "audio.wav",
158
+ mimetype="audio/x-wav",
159
+ binary_hash=conv_res.input.document_hash,
160
+ )
161
+ conv_res.document = DoclingDocument(
162
+ name=conv_res.input.file.stem or "audio.wav", origin=origin
163
+ )
164
+
165
+ for citem in conversation:
166
+ conv_res.document.add_text(
167
+ label=DocItemLabel.TEXT, text=citem.to_string()
168
+ )
169
+
170
+ conv_res.status = ConversionStatus.SUCCESS
171
+ return conv_res
172
+
173
+ except Exception as exc:
174
+ _log.error(f"Audio tranciption has an error: {exc}")
175
+
176
+ conv_res.status = ConversionStatus.FAILURE
177
+ return conv_res
178
+
179
+ def transcribe(self, fpath: Path) -> list[_ConversationItem]:
180
+ result = self.model.transcribe(
181
+ str(fpath), verbose=self.verbose, word_timestamps=self.word_timestamps
182
+ )
183
+
184
+ convo: list[_ConversationItem] = []
185
+ for _ in result["segments"]:
186
+ item = _ConversationItem(
187
+ start_time=_["start"], end_time=_["end"], text=_["text"], words=[]
188
+ )
189
+ if "words" in _ and self.word_timestamps:
190
+ item.words = []
191
+ for __ in _["words"]:
192
+ item.words.append(
193
+ _ConversationWord(
194
+ start_time=__["start"],
195
+ end_time=__["end"],
196
+ text=__["word"],
197
+ )
198
+ )
199
+ convo.append(item)
200
+
201
+ return convo
202
+
203
+
204
+ class AsrPipeline(BasePipeline):
205
+ def __init__(self, pipeline_options: AsrPipelineOptions):
206
+ super().__init__(pipeline_options)
207
+ self.keep_backend = True
208
+
209
+ self.pipeline_options: AsrPipelineOptions = pipeline_options
210
+
211
+ artifacts_path: Optional[Path] = None
212
+ if pipeline_options.artifacts_path is not None:
213
+ artifacts_path = Path(pipeline_options.artifacts_path).expanduser()
214
+ elif settings.artifacts_path is not None:
215
+ artifacts_path = Path(settings.artifacts_path).expanduser()
216
+
217
+ if artifacts_path is not None and not artifacts_path.is_dir():
218
+ raise RuntimeError(
219
+ f"The value of {artifacts_path=} is not valid. "
220
+ "When defined, it must point to a folder containing all models required by the pipeline."
221
+ )
222
+
223
+ if isinstance(self.pipeline_options.asr_options, InlineAsrNativeWhisperOptions):
224
+ asr_options: InlineAsrNativeWhisperOptions = (
225
+ self.pipeline_options.asr_options
226
+ )
227
+ self._model = _NativeWhisperModel(
228
+ enabled=True, # must be always enabled for this pipeline to make sense.
229
+ artifacts_path=artifacts_path,
230
+ accelerator_options=pipeline_options.accelerator_options,
231
+ asr_options=asr_options,
232
+ )
233
+ else:
234
+ _log.error(f"No model support for {self.pipeline_options.asr_options}")
235
+
236
+ def _determine_status(self, conv_res: ConversionResult) -> ConversionStatus:
237
+ status = ConversionStatus.SUCCESS
238
+ return status
239
+
240
+ @classmethod
241
+ def get_default_options(cls) -> AsrPipelineOptions:
242
+ return AsrPipelineOptions()
243
+
244
+ def _build_document(self, conv_res: ConversionResult) -> ConversionResult:
245
+ _log.info(f"start _build_document in AsrPipeline: {conv_res.input.file}")
246
+ with TimeRecorder(conv_res, "doc_build", scope=ProfilingScope.DOCUMENT):
247
+ self._model.run(conv_res=conv_res)
248
+
249
+ return conv_res
250
+
251
+ @classmethod
252
+ def is_backend_supported(cls, backend: AbstractDocumentBackend):
253
+ return isinstance(backend, NoOpBackend)
@@ -193,6 +193,17 @@ class PaginatedPipeline(BasePipeline): # TODO this is a bad name.
193
193
  )
194
194
  raise e
195
195
 
196
+ # Filter out uninitialized pages (those with size=None) that may remain
197
+ # after timeout or processing failures to prevent assertion errors downstream
198
+ initial_page_count = len(conv_res.pages)
199
+ conv_res.pages = [page for page in conv_res.pages if page.size is not None]
200
+
201
+ if len(conv_res.pages) < initial_page_count:
202
+ _log.info(
203
+ f"Filtered out {initial_page_count - len(conv_res.pages)} uninitialized pages "
204
+ f"due to timeout or processing failures"
205
+ )
206
+
196
207
  return conv_res
197
208
 
198
209
  def _unload(self, conv_res: ConversionResult) -> ConversionResult:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling
3
- Version: 2.37.0
3
+ Version: 2.38.1
4
4
  Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
5
5
  Author-email: Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Nikos Livathinos <nli@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  License-Expression: MIT
@@ -61,6 +61,8 @@ Requires-Dist: mlx-vlm>=0.1.22; (python_version >= "3.10" and sys_platform == "d
61
61
  Provides-Extra: rapidocr
62
62
  Requires-Dist: rapidocr-onnxruntime<2.0.0,>=1.4.0; python_version < "3.13" and extra == "rapidocr"
63
63
  Requires-Dist: onnxruntime<2.0.0,>=1.7.0; extra == "rapidocr"
64
+ Provides-Extra: asr
65
+ Requires-Dist: openai-whisper>=20240930; extra == "asr"
64
66
  Dynamic: license-file
65
67
 
66
68
  <p align="center">
@@ -93,14 +95,15 @@ Docling simplifies document processing, parsing diverse formats — including ad
93
95
 
94
96
  ## Features
95
97
 
96
- * 🗂️ Parsing of [multiple document formats][supported_formats] incl. PDF, DOCX, XLSX, HTML, images, and more
98
+ * 🗂️ Parsing of [multiple document formats][supported_formats] incl. PDF, DOCX, PPTX, XLSX, HTML, WAV, MP3, images (PNG, TIFF, JPEG, ...), and more
97
99
  * 📑 Advanced PDF understanding incl. page layout, reading order, table structure, code, formulas, image classification, and more
98
100
  * 🧬 Unified, expressive [DoclingDocument][docling_document] representation format
99
- * ↪️ Various [export formats][supported_formats] and options, including Markdown, HTML, and lossless JSON
101
+ * ↪️ Various [export formats][supported_formats] and options, including Markdown, HTML, [DocTags](https://arxiv.org/abs/2503.11576) and lossless JSON
100
102
  * 🔒 Local execution capabilities for sensitive data and air-gapped environments
101
103
  * 🤖 Plug-and-play [integrations][integrations] incl. LangChain, LlamaIndex, Crew AI & Haystack for agentic AI
102
104
  * 🔍 Extensive OCR support for scanned PDFs and images
103
- * 🥚 Support of several Visual Language Models ([SmolDocling](https://huggingface.co/ds4sd/SmolDocling-256M-preview))
105
+ * 👓 Support of several Visual Language Models ([SmolDocling](https://huggingface.co/ds4sd/SmolDocling-256M-preview))
106
+ * 🎙️ Support for Audio with Automatic Speech Recognition (ASR) models
104
107
  * 💻 Simple and convenient CLI
105
108
 
106
109
  ### Coming soon
@@ -1,5 +1,5 @@
1
1
  docling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- docling/document_converter.py,sha256=bnUA9k1LCuCfNwCsneGQiGCvFdnX8W-vbpnu6U_fuuI,14003
2
+ docling/document_converter.py,sha256=3jWywP_TLy-1PMvjJBUlnTM9FNzpBLRCHYA1RKFvGR4,14333
3
3
  docling/exceptions.py,sha256=K1WnCS1leK2JtMB5ewZWKkb0EaijFgl-tRzrO9ntgPM,134
4
4
  docling/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
5
5
  docling/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -10,10 +10,11 @@ docling/backend/docling_parse_backend.py,sha256=9rUo1vPxX6QLzGqF-2B2iEYglZg6YQ3U
10
10
  docling/backend/docling_parse_v2_backend.py,sha256=3ckTfke8IICjaImlIzc3TRhG7KDuxDDba0AuCEcjA-M,9500
11
11
  docling/backend/docling_parse_v4_backend.py,sha256=7tQvpCwpYoq98PNszDkrXaFhy5eWmQqMP4RjWWPLPgw,6197
12
12
  docling/backend/html_backend.py,sha256=3K-l5SUAAyqISNEb7nPst_I51xzYOVOkgmwXh3lv9sw,21063
13
- docling/backend/md_backend.py,sha256=JkY1qTvQFXjKSZGfD-83d-fZelorUG_l6mpJdYGqvX8,17210
13
+ docling/backend/md_backend.py,sha256=kSQ7dn_IrAmt53kL_0Z5LnpE2fWif9RkBAGtqzgfQaM,20514
14
14
  docling/backend/msexcel_backend.py,sha256=3j0WQfqDpgPXdPMCguefdv7arcNVDedPD6gl54cmLn8,18110
15
15
  docling/backend/mspowerpoint_backend.py,sha256=0lsb8ZeQFxbDt7jZpSQyk5wYHYa3SP2T2y2dMI-o30o,15216
16
- docling/backend/msword_backend.py,sha256=GCwUnebgRgvHlF6z1RP8RUb1nhHheJ5bpiVeTfNGsBU,44694
16
+ docling/backend/msword_backend.py,sha256=xj009k1s7uzmNx3yGZZelsSgxa6ylaJ1yYHxYfHVLOo,44975
17
+ docling/backend/noop_backend.py,sha256=EOPbD86FzZPX-K_DpNrJh0_lC0bZz--4DpG-OagDNGY,1688
17
18
  docling/backend/pdf_backend.py,sha256=KE9TMuFO5WX-o5A_DAd4tEaLi4HMZ4XjKdpllItVkWM,2238
18
19
  docling/backend/pypdfium2_backend.py,sha256=8dVniLHgiTdJuDbYr66kPp6Ccv5ZDlqDMEbA2xIfS7U,13370
19
20
  docling/backend/docx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -27,20 +28,22 @@ docling/backend/xml/jats_backend.py,sha256=ghGi9bHjx3BvaOtmzLw86-wZy4UxpQPOPQL4e
27
28
  docling/backend/xml/uspto_backend.py,sha256=nyAMr5ht7dclxkVDwsKNeiOhLQrUtRLS8JdscB2AVJg,70924
28
29
  docling/chunking/__init__.py,sha256=h83TDs0AuOV6oEPLAPrn9dpGKiU-2Vg6IRNo4cv6GDA,346
29
30
  docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
- docling/cli/main.py,sha256=fDGjepShl6KO_BdA6qUNyNBoCjqZUKRnmmkzesGtvVU,27202
31
+ docling/cli/main.py,sha256=D2gEoArnQ2yQ9BesH9CkxZbYQyhZRGgjjNWYqmRRUtU,29617
31
32
  docling/cli/models.py,sha256=9yLGp6QRJGpR86U3SjmWAXDt3MvBaJLLY4xDVdsu3O8,4160
32
33
  docling/cli/tools.py,sha256=QhtRxQG0TVrfsMqdv5i7J0_qQy1ZZyWYnHPwJl7b5oY,322
33
34
  docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
35
  docling/datamodel/accelerator_options.py,sha256=wv6dOFTVAwr9onkE-0pfUqX_fDb6gX53iPPE6o8nKjI,2511
35
- docling/datamodel/base_models.py,sha256=iHkzAgWXPyvYwhqrcsgHTY1YoKoQZQO3eNvIjxagRp0,10818
36
- docling/datamodel/document.py,sha256=vPwiVU5zWCKbVYMq-TSmb7LTjijrqJq0FyAgDBa0XGA,16154
37
- docling/datamodel/pipeline_options.py,sha256=NCldcrDjmV_N1PUtK4FfpxVQaKj4f0IdSIbXf5nZYVY,9155
38
- docling/datamodel/pipeline_options_vlm_model.py,sha256=kivUljsC97CQGb7VEJ5nqC-d26q9Kj_2uRdInH1YTX4,2052
36
+ docling/datamodel/asr_model_specs.py,sha256=L7ETXsUKVbPsVcPLhEIMxQjd4UzMGZBVsy74CLsZBkU,2181
37
+ docling/datamodel/base_models.py,sha256=67o1ptOTT8tW7i-g6gM2JKEX_1CDbmKEMQ_B9ZYM2z0,11156
38
+ docling/datamodel/document.py,sha256=CA_dgt4V_phze5HXpfgfKNBKd1cPC1o3WE_IENX63EM,16252
39
+ docling/datamodel/pipeline_options.py,sha256=7mKv1IThXYpu3osggp_Y2h7E5C8nbxJLQXS7JJPMvYQ,9479
40
+ docling/datamodel/pipeline_options_asr_model.py,sha256=7X068xl-qpbyPxC7-TwX7Q6tLyZXGT5h1osZ_xLNLM0,1454
41
+ docling/datamodel/pipeline_options_vlm_model.py,sha256=rtDMVtKFZbgQD269w8FvHMXEhdRBrsA4rVYk6A-M-b4,2063
39
42
  docling/datamodel/settings.py,sha256=ajMz7Ao2m0ZGYkfArqTDDbiF89O408mtgeh06PUi0MA,1900
40
43
  docling/datamodel/vlm_model_specs.py,sha256=--jZexGeu-s_lWp7y_WwWEf6CD1J4XqADrS1-OY_pWM,4737
41
44
  docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
- docling/models/api_vlm_model.py,sha256=w3P1wOsr3JvZsawbK1Z4uwnD5ehUMbcKGkyhcX83Okc,2738
43
- docling/models/base_model.py,sha256=Zx_nByGYkubTvvYiQxwiB6P8lc7wOD4ZTC2QIw6vCEg,2950
45
+ docling/models/api_vlm_model.py,sha256=GDDJGAia4SJjK7JFxsZy5oEU-D8yQo8Kb3NvvPbTvT0,2820
46
+ docling/models/base_model.py,sha256=NNjIapqCruAEAWR-CCdsNgXc2QkwiPYAcaQ_ZYe1W28,2978
44
47
  docling/models/base_ocr_model.py,sha256=HtrefTq9Zy4UnUInMchPv0tbobiA7CQU5VUauKJD7IU,8006
45
48
  docling/models/code_formula_model.py,sha256=5uWh-eI-Ejmv3DujKJoKKgJBuvPLokt7AJ_ybt8VHEw,11373
46
49
  docling/models/document_picture_classifier.py,sha256=fkJLV7pMy3v6iNwOzVb6zdBU1dGtBM1ARHLIRPfoAG4,6124
@@ -53,7 +56,7 @@ docling/models/picture_description_api_model.py,sha256=o3EkV5aHW_6WzE_fdj_VRnNCr
53
56
  docling/models/picture_description_base_model.py,sha256=kLthLhdlgwhootQ4_xhhcAk6A-vso5-qcsFJ3TcYfO0,2991
54
57
  docling/models/picture_description_vlm_model.py,sha256=7LeCx9ZdPxsmWJ468OtxCdAkH48A1HD0iwH9cs_7-1Q,3800
55
58
  docling/models/rapid_ocr_model.py,sha256=AMdc66s_iWO4p6nQ0LNjQMUYVxrDSxMyLNPpjPYt6N8,5916
56
- docling/models/readingorder_model.py,sha256=46ZYGJrRIp2ueJAQPmqXHjEw-5LcNtVUECSd4yIcHnM,14582
59
+ docling/models/readingorder_model.py,sha256=QHb5fyiqmxU8lg4W5IzdukqHPh6V7rNw_57O4-z-Az4,14615
57
60
  docling/models/table_structure_model.py,sha256=dQf6u_zn5fHCkHzmTwYfCbRtZCBddsyAM0WNVBUUQzk,12473
58
61
  docling/models/tesseract_ocr_cli_model.py,sha256=qcM3-n7Z_dm1CGBhVUcNr2XT41iXnU32zk4RqKHBl9I,12775
59
62
  docling/models/tesseract_ocr_model.py,sha256=9DPAE7XP7smej7HYhr7mdwpuxSjAcv_GPrYZG3bb1RA,10587
@@ -66,10 +69,11 @@ docling/models/plugins/defaults.py,sha256=qslXGnRX07Z3GGttNriqaox0v0vXp4zs4KLurH
66
69
  docling/models/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
67
70
  docling/models/utils/hf_model_download.py,sha256=scBEfsM4yl7xPzqe7UtPvDh9RfQZQnuOhqQKilYBHls,984
68
71
  docling/models/vlm_models_inline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
69
- docling/models/vlm_models_inline/hf_transformers_model.py,sha256=4o1_G2__4opIl3J1HzujmdGyZaabqtEGgTmkraZYsXo,7343
70
- docling/models/vlm_models_inline/mlx_model.py,sha256=CFe1UNxQufZd5K4iaOW3HsplQBPb_1cENf3KIwWUSWw,5702
72
+ docling/models/vlm_models_inline/hf_transformers_model.py,sha256=w9_N4ccjmYYK5yYQou0LSMGaj6gs8l0hULvXbkfYXSQ,7425
73
+ docling/models/vlm_models_inline/mlx_model.py,sha256=qpyi6fGHm0vPqW2yeTsRBKOTTshNJ1LAPbH1SBDp8Y8,5784
71
74
  docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
72
- docling/pipeline/base_pipeline.py,sha256=DnuxAf7EQusdSRae0QUVth-0f2mSff8JZjX-2vazk00,8751
75
+ docling/pipeline/asr_pipeline.py,sha256=tQkhu9fXdkSuYIL22xzV2YRUlQh-9qktHBbs2qeXhJI,9070
76
+ docling/pipeline/base_pipeline.py,sha256=14yQrDjsojl4RgbBjKFSEfVBYR_sULZfBI1uDzFLi8Y,9331
73
77
  docling/pipeline/simple_pipeline.py,sha256=TXZOwR7hZRji462ZTIpte0VJjzbxvNVE8dbLFANDhSU,2253
74
78
  docling/pipeline/standard_pdf_pipeline.py,sha256=2Hqg2wnAXfbZbLUOQrRus8PMEuZ549jR1mfR86-CAB4,12659
75
79
  docling/pipeline/vlm_pipeline.py,sha256=IrjDbajCPmUPep_jATKNiABST4tQ8mvpkQz9mtBQ8qQ,15279
@@ -86,9 +90,9 @@ docling/utils/orientation.py,sha256=xXlOfowL54FKwjsTFrM7y3ogk1wChLNn_-u74tYIf1s,
86
90
  docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
87
91
  docling/utils/utils.py,sha256=kJtIYuzXeOyJHYlxmLAo7dGM5rEsDa1i84qEsUj1nio,1908
88
92
  docling/utils/visualization.py,sha256=tY2ylE2aiQKkmzlSLnFW-HTfFyqUUMguW18ldd1PLfo,2868
89
- docling-2.37.0.dist-info/licenses/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
90
- docling-2.37.0.dist-info/METADATA,sha256=MvNxmbh1_bNJ8Z2_GG3EoJHH2S-5rbOEBpM6x4LQeoA,10036
91
- docling-2.37.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
92
- docling-2.37.0.dist-info/entry_points.txt,sha256=hzVlbeE0aMSTQ9S0-NTYN0Hmgsn6qL_EA2qX4UbkAuY,149
93
- docling-2.37.0.dist-info/top_level.txt,sha256=vkIywP-USjFyYo1AIRQbWQQaL3xB5jf8vkCYdTIfNic,8
94
- docling-2.37.0.dist-info/RECORD,,
93
+ docling-2.38.1.dist-info/licenses/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
94
+ docling-2.38.1.dist-info/METADATA,sha256=14E9MwQXlyuB4nWa31ZTjW6vvv5p2eCs2xxVTE4-qT4,10273
95
+ docling-2.38.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
96
+ docling-2.38.1.dist-info/entry_points.txt,sha256=hzVlbeE0aMSTQ9S0-NTYN0Hmgsn6qL_EA2qX4UbkAuY,149
97
+ docling-2.38.1.dist-info/top_level.txt,sha256=vkIywP-USjFyYo1AIRQbWQQaL3xB5jf8vkCYdTIfNic,8
98
+ docling-2.38.1.dist-info/RECORD,,