docling 2.37.0__py3-none-any.whl → 2.38.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docling/backend/md_backend.py +185 -80
- docling/backend/msword_backend.py +76 -63
- docling/backend/noop_backend.py +51 -0
- docling/cli/main.py +82 -14
- docling/datamodel/asr_model_specs.py +92 -0
- docling/datamodel/base_models.py +12 -2
- docling/datamodel/document.py +3 -1
- docling/datamodel/pipeline_options.py +13 -2
- docling/datamodel/pipeline_options_asr_model.py +57 -0
- docling/datamodel/pipeline_options_vlm_model.py +2 -3
- docling/document_converter.py +8 -0
- docling/models/api_vlm_model.py +3 -1
- docling/models/base_model.py +1 -1
- docling/models/readingorder_model.py +1 -1
- docling/models/vlm_models_inline/hf_transformers_model.py +3 -1
- docling/models/vlm_models_inline/mlx_model.py +3 -1
- docling/pipeline/asr_pipeline.py +253 -0
- docling/pipeline/base_pipeline.py +11 -0
- {docling-2.37.0.dist-info → docling-2.38.1.dist-info}/METADATA +7 -4
- {docling-2.37.0.dist-info → docling-2.38.1.dist-info}/RECORD +24 -20
- {docling-2.37.0.dist-info → docling-2.38.1.dist-info}/WHEEL +0 -0
- {docling-2.37.0.dist-info → docling-2.38.1.dist-info}/entry_points.txt +0 -0
- {docling-2.37.0.dist-info → docling-2.38.1.dist-info}/licenses/LICENSE +0 -0
- {docling-2.37.0.dist-info → docling-2.38.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,253 @@
|
|
1
|
+
import logging
|
2
|
+
import os
|
3
|
+
import re
|
4
|
+
from io import BytesIO
|
5
|
+
from pathlib import Path
|
6
|
+
from typing import List, Optional, Union, cast
|
7
|
+
|
8
|
+
from docling_core.types.doc import DoclingDocument, DocumentOrigin
|
9
|
+
|
10
|
+
# import whisper # type: ignore
|
11
|
+
# import librosa
|
12
|
+
# import numpy as np
|
13
|
+
# import soundfile as sf # type: ignore
|
14
|
+
from docling_core.types.doc.labels import DocItemLabel
|
15
|
+
from pydantic import BaseModel, Field, validator
|
16
|
+
|
17
|
+
from docling.backend.abstract_backend import AbstractDocumentBackend
|
18
|
+
from docling.backend.noop_backend import NoOpBackend
|
19
|
+
|
20
|
+
# from pydub import AudioSegment # type: ignore
|
21
|
+
# from transformers import WhisperForConditionalGeneration, WhisperProcessor, pipeline
|
22
|
+
from docling.datamodel.accelerator_options import (
|
23
|
+
AcceleratorOptions,
|
24
|
+
)
|
25
|
+
from docling.datamodel.base_models import (
|
26
|
+
ConversionStatus,
|
27
|
+
FormatToMimeType,
|
28
|
+
)
|
29
|
+
from docling.datamodel.document import ConversionResult, InputDocument
|
30
|
+
from docling.datamodel.pipeline_options import (
|
31
|
+
AsrPipelineOptions,
|
32
|
+
)
|
33
|
+
from docling.datamodel.pipeline_options_asr_model import (
|
34
|
+
InlineAsrNativeWhisperOptions,
|
35
|
+
# AsrResponseFormat,
|
36
|
+
InlineAsrOptions,
|
37
|
+
)
|
38
|
+
from docling.datamodel.pipeline_options_vlm_model import (
|
39
|
+
InferenceFramework,
|
40
|
+
)
|
41
|
+
from docling.datamodel.settings import settings
|
42
|
+
from docling.pipeline.base_pipeline import BasePipeline
|
43
|
+
from docling.utils.accelerator_utils import decide_device
|
44
|
+
from docling.utils.profiling import ProfilingScope, TimeRecorder
|
45
|
+
|
46
|
+
_log = logging.getLogger(__name__)
|
47
|
+
|
48
|
+
|
49
|
+
class _ConversationWord(BaseModel):
|
50
|
+
text: str
|
51
|
+
start_time: Optional[float] = Field(
|
52
|
+
None, description="Start time in seconds from video start"
|
53
|
+
)
|
54
|
+
end_time: Optional[float] = Field(
|
55
|
+
None, ge=0, description="End time in seconds from video start"
|
56
|
+
)
|
57
|
+
|
58
|
+
|
59
|
+
class _ConversationItem(BaseModel):
|
60
|
+
text: str
|
61
|
+
start_time: Optional[float] = Field(
|
62
|
+
None, description="Start time in seconds from video start"
|
63
|
+
)
|
64
|
+
end_time: Optional[float] = Field(
|
65
|
+
None, ge=0, description="End time in seconds from video start"
|
66
|
+
)
|
67
|
+
speaker_id: Optional[int] = Field(None, description="Numeric speaker identifier")
|
68
|
+
speaker: Optional[str] = Field(
|
69
|
+
None, description="Speaker name, defaults to speaker-{speaker_id}"
|
70
|
+
)
|
71
|
+
words: Optional[list[_ConversationWord]] = Field(
|
72
|
+
None, description="Individual words with time-stamps"
|
73
|
+
)
|
74
|
+
|
75
|
+
def __lt__(self, other):
|
76
|
+
if not isinstance(other, _ConversationItem):
|
77
|
+
return NotImplemented
|
78
|
+
return self.start_time < other.start_time
|
79
|
+
|
80
|
+
def __eq__(self, other):
|
81
|
+
if not isinstance(other, _ConversationItem):
|
82
|
+
return NotImplemented
|
83
|
+
return self.start_time == other.start_time
|
84
|
+
|
85
|
+
def to_string(self) -> str:
|
86
|
+
"""Format the conversation entry as a string"""
|
87
|
+
result = ""
|
88
|
+
if (self.start_time is not None) and (self.end_time is not None):
|
89
|
+
result += f"[time: {self.start_time}-{self.end_time}] "
|
90
|
+
|
91
|
+
if self.speaker is not None:
|
92
|
+
result += f"[speaker:{self.speaker}] "
|
93
|
+
|
94
|
+
result += self.text
|
95
|
+
return result
|
96
|
+
|
97
|
+
|
98
|
+
class _NativeWhisperModel:
|
99
|
+
def __init__(
|
100
|
+
self,
|
101
|
+
enabled: bool,
|
102
|
+
artifacts_path: Optional[Path],
|
103
|
+
accelerator_options: AcceleratorOptions,
|
104
|
+
asr_options: InlineAsrNativeWhisperOptions,
|
105
|
+
):
|
106
|
+
"""
|
107
|
+
Transcriber using native Whisper.
|
108
|
+
"""
|
109
|
+
self.enabled = enabled
|
110
|
+
|
111
|
+
_log.info(f"artifacts-path: {artifacts_path}")
|
112
|
+
_log.info(f"accelerator_options: {accelerator_options}")
|
113
|
+
|
114
|
+
if self.enabled:
|
115
|
+
try:
|
116
|
+
import whisper # type: ignore
|
117
|
+
except ImportError:
|
118
|
+
raise ImportError(
|
119
|
+
"whisper is not installed. Please install it via `pip install openai-whisper` or do `uv sync --extra asr`."
|
120
|
+
)
|
121
|
+
self.asr_options = asr_options
|
122
|
+
self.max_tokens = asr_options.max_new_tokens
|
123
|
+
self.temperature = asr_options.temperature
|
124
|
+
|
125
|
+
self.device = decide_device(
|
126
|
+
accelerator_options.device,
|
127
|
+
supported_devices=asr_options.supported_devices,
|
128
|
+
)
|
129
|
+
_log.info(f"Available device for Whisper: {self.device}")
|
130
|
+
|
131
|
+
self.model_name = asr_options.repo_id
|
132
|
+
_log.info(f"loading _NativeWhisperModel({self.model_name})")
|
133
|
+
if artifacts_path is not None:
|
134
|
+
_log.info(f"loading {self.model_name} from {artifacts_path}")
|
135
|
+
self.model = whisper.load_model(
|
136
|
+
name=self.model_name,
|
137
|
+
device=self.device,
|
138
|
+
download_root=str(artifacts_path),
|
139
|
+
)
|
140
|
+
else:
|
141
|
+
self.model = whisper.load_model(
|
142
|
+
name=self.model_name, device=self.device
|
143
|
+
)
|
144
|
+
|
145
|
+
self.verbose = asr_options.verbose
|
146
|
+
self.timestamps = asr_options.timestamps
|
147
|
+
self.word_timestamps = asr_options.word_timestamps
|
148
|
+
|
149
|
+
def run(self, conv_res: ConversionResult) -> ConversionResult:
|
150
|
+
audio_path: Path = Path(conv_res.input.file).resolve()
|
151
|
+
|
152
|
+
try:
|
153
|
+
conversation = self.transcribe(audio_path)
|
154
|
+
|
155
|
+
# Ensure we have a proper DoclingDocument
|
156
|
+
origin = DocumentOrigin(
|
157
|
+
filename=conv_res.input.file.name or "audio.wav",
|
158
|
+
mimetype="audio/x-wav",
|
159
|
+
binary_hash=conv_res.input.document_hash,
|
160
|
+
)
|
161
|
+
conv_res.document = DoclingDocument(
|
162
|
+
name=conv_res.input.file.stem or "audio.wav", origin=origin
|
163
|
+
)
|
164
|
+
|
165
|
+
for citem in conversation:
|
166
|
+
conv_res.document.add_text(
|
167
|
+
label=DocItemLabel.TEXT, text=citem.to_string()
|
168
|
+
)
|
169
|
+
|
170
|
+
conv_res.status = ConversionStatus.SUCCESS
|
171
|
+
return conv_res
|
172
|
+
|
173
|
+
except Exception as exc:
|
174
|
+
_log.error(f"Audio tranciption has an error: {exc}")
|
175
|
+
|
176
|
+
conv_res.status = ConversionStatus.FAILURE
|
177
|
+
return conv_res
|
178
|
+
|
179
|
+
def transcribe(self, fpath: Path) -> list[_ConversationItem]:
|
180
|
+
result = self.model.transcribe(
|
181
|
+
str(fpath), verbose=self.verbose, word_timestamps=self.word_timestamps
|
182
|
+
)
|
183
|
+
|
184
|
+
convo: list[_ConversationItem] = []
|
185
|
+
for _ in result["segments"]:
|
186
|
+
item = _ConversationItem(
|
187
|
+
start_time=_["start"], end_time=_["end"], text=_["text"], words=[]
|
188
|
+
)
|
189
|
+
if "words" in _ and self.word_timestamps:
|
190
|
+
item.words = []
|
191
|
+
for __ in _["words"]:
|
192
|
+
item.words.append(
|
193
|
+
_ConversationWord(
|
194
|
+
start_time=__["start"],
|
195
|
+
end_time=__["end"],
|
196
|
+
text=__["word"],
|
197
|
+
)
|
198
|
+
)
|
199
|
+
convo.append(item)
|
200
|
+
|
201
|
+
return convo
|
202
|
+
|
203
|
+
|
204
|
+
class AsrPipeline(BasePipeline):
|
205
|
+
def __init__(self, pipeline_options: AsrPipelineOptions):
|
206
|
+
super().__init__(pipeline_options)
|
207
|
+
self.keep_backend = True
|
208
|
+
|
209
|
+
self.pipeline_options: AsrPipelineOptions = pipeline_options
|
210
|
+
|
211
|
+
artifacts_path: Optional[Path] = None
|
212
|
+
if pipeline_options.artifacts_path is not None:
|
213
|
+
artifacts_path = Path(pipeline_options.artifacts_path).expanduser()
|
214
|
+
elif settings.artifacts_path is not None:
|
215
|
+
artifacts_path = Path(settings.artifacts_path).expanduser()
|
216
|
+
|
217
|
+
if artifacts_path is not None and not artifacts_path.is_dir():
|
218
|
+
raise RuntimeError(
|
219
|
+
f"The value of {artifacts_path=} is not valid. "
|
220
|
+
"When defined, it must point to a folder containing all models required by the pipeline."
|
221
|
+
)
|
222
|
+
|
223
|
+
if isinstance(self.pipeline_options.asr_options, InlineAsrNativeWhisperOptions):
|
224
|
+
asr_options: InlineAsrNativeWhisperOptions = (
|
225
|
+
self.pipeline_options.asr_options
|
226
|
+
)
|
227
|
+
self._model = _NativeWhisperModel(
|
228
|
+
enabled=True, # must be always enabled for this pipeline to make sense.
|
229
|
+
artifacts_path=artifacts_path,
|
230
|
+
accelerator_options=pipeline_options.accelerator_options,
|
231
|
+
asr_options=asr_options,
|
232
|
+
)
|
233
|
+
else:
|
234
|
+
_log.error(f"No model support for {self.pipeline_options.asr_options}")
|
235
|
+
|
236
|
+
def _determine_status(self, conv_res: ConversionResult) -> ConversionStatus:
|
237
|
+
status = ConversionStatus.SUCCESS
|
238
|
+
return status
|
239
|
+
|
240
|
+
@classmethod
|
241
|
+
def get_default_options(cls) -> AsrPipelineOptions:
|
242
|
+
return AsrPipelineOptions()
|
243
|
+
|
244
|
+
def _build_document(self, conv_res: ConversionResult) -> ConversionResult:
|
245
|
+
_log.info(f"start _build_document in AsrPipeline: {conv_res.input.file}")
|
246
|
+
with TimeRecorder(conv_res, "doc_build", scope=ProfilingScope.DOCUMENT):
|
247
|
+
self._model.run(conv_res=conv_res)
|
248
|
+
|
249
|
+
return conv_res
|
250
|
+
|
251
|
+
@classmethod
|
252
|
+
def is_backend_supported(cls, backend: AbstractDocumentBackend):
|
253
|
+
return isinstance(backend, NoOpBackend)
|
@@ -193,6 +193,17 @@ class PaginatedPipeline(BasePipeline): # TODO this is a bad name.
|
|
193
193
|
)
|
194
194
|
raise e
|
195
195
|
|
196
|
+
# Filter out uninitialized pages (those with size=None) that may remain
|
197
|
+
# after timeout or processing failures to prevent assertion errors downstream
|
198
|
+
initial_page_count = len(conv_res.pages)
|
199
|
+
conv_res.pages = [page for page in conv_res.pages if page.size is not None]
|
200
|
+
|
201
|
+
if len(conv_res.pages) < initial_page_count:
|
202
|
+
_log.info(
|
203
|
+
f"Filtered out {initial_page_count - len(conv_res.pages)} uninitialized pages "
|
204
|
+
f"due to timeout or processing failures"
|
205
|
+
)
|
206
|
+
|
196
207
|
return conv_res
|
197
208
|
|
198
209
|
def _unload(self, conv_res: ConversionResult) -> ConversionResult:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: docling
|
3
|
-
Version: 2.
|
3
|
+
Version: 2.38.1
|
4
4
|
Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
|
5
5
|
Author-email: Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Nikos Livathinos <nli@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
|
6
6
|
License-Expression: MIT
|
@@ -61,6 +61,8 @@ Requires-Dist: mlx-vlm>=0.1.22; (python_version >= "3.10" and sys_platform == "d
|
|
61
61
|
Provides-Extra: rapidocr
|
62
62
|
Requires-Dist: rapidocr-onnxruntime<2.0.0,>=1.4.0; python_version < "3.13" and extra == "rapidocr"
|
63
63
|
Requires-Dist: onnxruntime<2.0.0,>=1.7.0; extra == "rapidocr"
|
64
|
+
Provides-Extra: asr
|
65
|
+
Requires-Dist: openai-whisper>=20240930; extra == "asr"
|
64
66
|
Dynamic: license-file
|
65
67
|
|
66
68
|
<p align="center">
|
@@ -93,14 +95,15 @@ Docling simplifies document processing, parsing diverse formats — including ad
|
|
93
95
|
|
94
96
|
## Features
|
95
97
|
|
96
|
-
* 🗂️
|
98
|
+
* 🗂️ Parsing of [multiple document formats][supported_formats] incl. PDF, DOCX, PPTX, XLSX, HTML, WAV, MP3, images (PNG, TIFF, JPEG, ...), and more
|
97
99
|
* 📑 Advanced PDF understanding incl. page layout, reading order, table structure, code, formulas, image classification, and more
|
98
100
|
* 🧬 Unified, expressive [DoclingDocument][docling_document] representation format
|
99
|
-
* ↪️
|
101
|
+
* ↪️ Various [export formats][supported_formats] and options, including Markdown, HTML, [DocTags](https://arxiv.org/abs/2503.11576) and lossless JSON
|
100
102
|
* 🔒 Local execution capabilities for sensitive data and air-gapped environments
|
101
103
|
* 🤖 Plug-and-play [integrations][integrations] incl. LangChain, LlamaIndex, Crew AI & Haystack for agentic AI
|
102
104
|
* 🔍 Extensive OCR support for scanned PDFs and images
|
103
|
-
*
|
105
|
+
* 👓 Support of several Visual Language Models ([SmolDocling](https://huggingface.co/ds4sd/SmolDocling-256M-preview))
|
106
|
+
* 🎙️ Support for Audio with Automatic Speech Recognition (ASR) models
|
104
107
|
* 💻 Simple and convenient CLI
|
105
108
|
|
106
109
|
### Coming soon
|
@@ -1,5 +1,5 @@
|
|
1
1
|
docling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
docling/document_converter.py,sha256=
|
2
|
+
docling/document_converter.py,sha256=3jWywP_TLy-1PMvjJBUlnTM9FNzpBLRCHYA1RKFvGR4,14333
|
3
3
|
docling/exceptions.py,sha256=K1WnCS1leK2JtMB5ewZWKkb0EaijFgl-tRzrO9ntgPM,134
|
4
4
|
docling/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
5
5
|
docling/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -10,10 +10,11 @@ docling/backend/docling_parse_backend.py,sha256=9rUo1vPxX6QLzGqF-2B2iEYglZg6YQ3U
|
|
10
10
|
docling/backend/docling_parse_v2_backend.py,sha256=3ckTfke8IICjaImlIzc3TRhG7KDuxDDba0AuCEcjA-M,9500
|
11
11
|
docling/backend/docling_parse_v4_backend.py,sha256=7tQvpCwpYoq98PNszDkrXaFhy5eWmQqMP4RjWWPLPgw,6197
|
12
12
|
docling/backend/html_backend.py,sha256=3K-l5SUAAyqISNEb7nPst_I51xzYOVOkgmwXh3lv9sw,21063
|
13
|
-
docling/backend/md_backend.py,sha256=
|
13
|
+
docling/backend/md_backend.py,sha256=kSQ7dn_IrAmt53kL_0Z5LnpE2fWif9RkBAGtqzgfQaM,20514
|
14
14
|
docling/backend/msexcel_backend.py,sha256=3j0WQfqDpgPXdPMCguefdv7arcNVDedPD6gl54cmLn8,18110
|
15
15
|
docling/backend/mspowerpoint_backend.py,sha256=0lsb8ZeQFxbDt7jZpSQyk5wYHYa3SP2T2y2dMI-o30o,15216
|
16
|
-
docling/backend/msword_backend.py,sha256=
|
16
|
+
docling/backend/msword_backend.py,sha256=xj009k1s7uzmNx3yGZZelsSgxa6ylaJ1yYHxYfHVLOo,44975
|
17
|
+
docling/backend/noop_backend.py,sha256=EOPbD86FzZPX-K_DpNrJh0_lC0bZz--4DpG-OagDNGY,1688
|
17
18
|
docling/backend/pdf_backend.py,sha256=KE9TMuFO5WX-o5A_DAd4tEaLi4HMZ4XjKdpllItVkWM,2238
|
18
19
|
docling/backend/pypdfium2_backend.py,sha256=8dVniLHgiTdJuDbYr66kPp6Ccv5ZDlqDMEbA2xIfS7U,13370
|
19
20
|
docling/backend/docx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -27,20 +28,22 @@ docling/backend/xml/jats_backend.py,sha256=ghGi9bHjx3BvaOtmzLw86-wZy4UxpQPOPQL4e
|
|
27
28
|
docling/backend/xml/uspto_backend.py,sha256=nyAMr5ht7dclxkVDwsKNeiOhLQrUtRLS8JdscB2AVJg,70924
|
28
29
|
docling/chunking/__init__.py,sha256=h83TDs0AuOV6oEPLAPrn9dpGKiU-2Vg6IRNo4cv6GDA,346
|
29
30
|
docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
30
|
-
docling/cli/main.py,sha256=
|
31
|
+
docling/cli/main.py,sha256=D2gEoArnQ2yQ9BesH9CkxZbYQyhZRGgjjNWYqmRRUtU,29617
|
31
32
|
docling/cli/models.py,sha256=9yLGp6QRJGpR86U3SjmWAXDt3MvBaJLLY4xDVdsu3O8,4160
|
32
33
|
docling/cli/tools.py,sha256=QhtRxQG0TVrfsMqdv5i7J0_qQy1ZZyWYnHPwJl7b5oY,322
|
33
34
|
docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
34
35
|
docling/datamodel/accelerator_options.py,sha256=wv6dOFTVAwr9onkE-0pfUqX_fDb6gX53iPPE6o8nKjI,2511
|
35
|
-
docling/datamodel/
|
36
|
-
docling/datamodel/
|
37
|
-
docling/datamodel/
|
38
|
-
docling/datamodel/
|
36
|
+
docling/datamodel/asr_model_specs.py,sha256=L7ETXsUKVbPsVcPLhEIMxQjd4UzMGZBVsy74CLsZBkU,2181
|
37
|
+
docling/datamodel/base_models.py,sha256=67o1ptOTT8tW7i-g6gM2JKEX_1CDbmKEMQ_B9ZYM2z0,11156
|
38
|
+
docling/datamodel/document.py,sha256=CA_dgt4V_phze5HXpfgfKNBKd1cPC1o3WE_IENX63EM,16252
|
39
|
+
docling/datamodel/pipeline_options.py,sha256=7mKv1IThXYpu3osggp_Y2h7E5C8nbxJLQXS7JJPMvYQ,9479
|
40
|
+
docling/datamodel/pipeline_options_asr_model.py,sha256=7X068xl-qpbyPxC7-TwX7Q6tLyZXGT5h1osZ_xLNLM0,1454
|
41
|
+
docling/datamodel/pipeline_options_vlm_model.py,sha256=rtDMVtKFZbgQD269w8FvHMXEhdRBrsA4rVYk6A-M-b4,2063
|
39
42
|
docling/datamodel/settings.py,sha256=ajMz7Ao2m0ZGYkfArqTDDbiF89O408mtgeh06PUi0MA,1900
|
40
43
|
docling/datamodel/vlm_model_specs.py,sha256=--jZexGeu-s_lWp7y_WwWEf6CD1J4XqADrS1-OY_pWM,4737
|
41
44
|
docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
42
|
-
docling/models/api_vlm_model.py,sha256=
|
43
|
-
docling/models/base_model.py,sha256=
|
45
|
+
docling/models/api_vlm_model.py,sha256=GDDJGAia4SJjK7JFxsZy5oEU-D8yQo8Kb3NvvPbTvT0,2820
|
46
|
+
docling/models/base_model.py,sha256=NNjIapqCruAEAWR-CCdsNgXc2QkwiPYAcaQ_ZYe1W28,2978
|
44
47
|
docling/models/base_ocr_model.py,sha256=HtrefTq9Zy4UnUInMchPv0tbobiA7CQU5VUauKJD7IU,8006
|
45
48
|
docling/models/code_formula_model.py,sha256=5uWh-eI-Ejmv3DujKJoKKgJBuvPLokt7AJ_ybt8VHEw,11373
|
46
49
|
docling/models/document_picture_classifier.py,sha256=fkJLV7pMy3v6iNwOzVb6zdBU1dGtBM1ARHLIRPfoAG4,6124
|
@@ -53,7 +56,7 @@ docling/models/picture_description_api_model.py,sha256=o3EkV5aHW_6WzE_fdj_VRnNCr
|
|
53
56
|
docling/models/picture_description_base_model.py,sha256=kLthLhdlgwhootQ4_xhhcAk6A-vso5-qcsFJ3TcYfO0,2991
|
54
57
|
docling/models/picture_description_vlm_model.py,sha256=7LeCx9ZdPxsmWJ468OtxCdAkH48A1HD0iwH9cs_7-1Q,3800
|
55
58
|
docling/models/rapid_ocr_model.py,sha256=AMdc66s_iWO4p6nQ0LNjQMUYVxrDSxMyLNPpjPYt6N8,5916
|
56
|
-
docling/models/readingorder_model.py,sha256=
|
59
|
+
docling/models/readingorder_model.py,sha256=QHb5fyiqmxU8lg4W5IzdukqHPh6V7rNw_57O4-z-Az4,14615
|
57
60
|
docling/models/table_structure_model.py,sha256=dQf6u_zn5fHCkHzmTwYfCbRtZCBddsyAM0WNVBUUQzk,12473
|
58
61
|
docling/models/tesseract_ocr_cli_model.py,sha256=qcM3-n7Z_dm1CGBhVUcNr2XT41iXnU32zk4RqKHBl9I,12775
|
59
62
|
docling/models/tesseract_ocr_model.py,sha256=9DPAE7XP7smej7HYhr7mdwpuxSjAcv_GPrYZG3bb1RA,10587
|
@@ -66,10 +69,11 @@ docling/models/plugins/defaults.py,sha256=qslXGnRX07Z3GGttNriqaox0v0vXp4zs4KLurH
|
|
66
69
|
docling/models/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
67
70
|
docling/models/utils/hf_model_download.py,sha256=scBEfsM4yl7xPzqe7UtPvDh9RfQZQnuOhqQKilYBHls,984
|
68
71
|
docling/models/vlm_models_inline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
69
|
-
docling/models/vlm_models_inline/hf_transformers_model.py,sha256=
|
70
|
-
docling/models/vlm_models_inline/mlx_model.py,sha256=
|
72
|
+
docling/models/vlm_models_inline/hf_transformers_model.py,sha256=w9_N4ccjmYYK5yYQou0LSMGaj6gs8l0hULvXbkfYXSQ,7425
|
73
|
+
docling/models/vlm_models_inline/mlx_model.py,sha256=qpyi6fGHm0vPqW2yeTsRBKOTTshNJ1LAPbH1SBDp8Y8,5784
|
71
74
|
docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
72
|
-
docling/pipeline/
|
75
|
+
docling/pipeline/asr_pipeline.py,sha256=tQkhu9fXdkSuYIL22xzV2YRUlQh-9qktHBbs2qeXhJI,9070
|
76
|
+
docling/pipeline/base_pipeline.py,sha256=14yQrDjsojl4RgbBjKFSEfVBYR_sULZfBI1uDzFLi8Y,9331
|
73
77
|
docling/pipeline/simple_pipeline.py,sha256=TXZOwR7hZRji462ZTIpte0VJjzbxvNVE8dbLFANDhSU,2253
|
74
78
|
docling/pipeline/standard_pdf_pipeline.py,sha256=2Hqg2wnAXfbZbLUOQrRus8PMEuZ549jR1mfR86-CAB4,12659
|
75
79
|
docling/pipeline/vlm_pipeline.py,sha256=IrjDbajCPmUPep_jATKNiABST4tQ8mvpkQz9mtBQ8qQ,15279
|
@@ -86,9 +90,9 @@ docling/utils/orientation.py,sha256=xXlOfowL54FKwjsTFrM7y3ogk1wChLNn_-u74tYIf1s,
|
|
86
90
|
docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
|
87
91
|
docling/utils/utils.py,sha256=kJtIYuzXeOyJHYlxmLAo7dGM5rEsDa1i84qEsUj1nio,1908
|
88
92
|
docling/utils/visualization.py,sha256=tY2ylE2aiQKkmzlSLnFW-HTfFyqUUMguW18ldd1PLfo,2868
|
89
|
-
docling-2.
|
90
|
-
docling-2.
|
91
|
-
docling-2.
|
92
|
-
docling-2.
|
93
|
-
docling-2.
|
94
|
-
docling-2.
|
93
|
+
docling-2.38.1.dist-info/licenses/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
|
94
|
+
docling-2.38.1.dist-info/METADATA,sha256=14E9MwQXlyuB4nWa31ZTjW6vvv5p2eCs2xxVTE4-qT4,10273
|
95
|
+
docling-2.38.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
96
|
+
docling-2.38.1.dist-info/entry_points.txt,sha256=hzVlbeE0aMSTQ9S0-NTYN0Hmgsn6qL_EA2qX4UbkAuY,149
|
97
|
+
docling-2.38.1.dist-info/top_level.txt,sha256=vkIywP-USjFyYo1AIRQbWQQaL3xB5jf8vkCYdTIfNic,8
|
98
|
+
docling-2.38.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|