docling 2.18.0__py3-none-any.whl → 2.20.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docling/backend/md_backend.py +62 -46
- docling/backend/msword_backend.py +1 -1
- docling/cli/main.py +13 -0
- docling/cli/models.py +107 -0
- docling/cli/tools.py +17 -0
- docling/datamodel/pipeline_options.py +52 -2
- docling/datamodel/settings.py +2 -0
- docling/models/base_model.py +5 -2
- docling/models/code_formula_model.py +15 -9
- docling/models/document_picture_classifier.py +11 -8
- docling/models/easyocr_model.py +49 -4
- docling/models/layout_model.py +49 -3
- docling/models/picture_description_api_model.py +101 -0
- docling/models/picture_description_base_model.py +64 -0
- docling/models/picture_description_vlm_model.py +109 -0
- docling/models/table_structure_model.py +44 -2
- docling/pipeline/base_pipeline.py +1 -1
- docling/pipeline/standard_pdf_pipeline.py +66 -25
- docling/utils/model_downloader.py +84 -0
- docling/utils/utils.py +24 -0
- {docling-2.18.0.dist-info → docling-2.20.0.dist-info}/METADATA +8 -4
- {docling-2.18.0.dist-info → docling-2.20.0.dist-info}/RECORD +25 -19
- {docling-2.18.0.dist-info → docling-2.20.0.dist-info}/entry_points.txt +1 -0
- {docling-2.18.0.dist-info → docling-2.20.0.dist-info}/LICENSE +0 -0
- {docling-2.18.0.dist-info → docling-2.20.0.dist-info}/WHEEL +0 -0
@@ -1,5 +1,6 @@
|
|
1
1
|
import logging
|
2
2
|
import sys
|
3
|
+
import warnings
|
3
4
|
from pathlib import Path
|
4
5
|
from typing import Optional
|
5
6
|
|
@@ -13,10 +14,13 @@ from docling.datamodel.pipeline_options import (
|
|
13
14
|
EasyOcrOptions,
|
14
15
|
OcrMacOptions,
|
15
16
|
PdfPipelineOptions,
|
17
|
+
PictureDescriptionApiOptions,
|
18
|
+
PictureDescriptionVlmOptions,
|
16
19
|
RapidOcrOptions,
|
17
20
|
TesseractCliOcrOptions,
|
18
21
|
TesseractOcrOptions,
|
19
22
|
)
|
23
|
+
from docling.datamodel.settings import settings
|
20
24
|
from docling.models.base_ocr_model import BaseOcrModel
|
21
25
|
from docling.models.code_formula_model import CodeFormulaModel, CodeFormulaModelOptions
|
22
26
|
from docling.models.document_picture_classifier import (
|
@@ -32,28 +36,31 @@ from docling.models.page_preprocessing_model import (
|
|
32
36
|
PagePreprocessingModel,
|
33
37
|
PagePreprocessingOptions,
|
34
38
|
)
|
39
|
+
from docling.models.picture_description_api_model import PictureDescriptionApiModel
|
40
|
+
from docling.models.picture_description_base_model import PictureDescriptionBaseModel
|
41
|
+
from docling.models.picture_description_vlm_model import PictureDescriptionVlmModel
|
35
42
|
from docling.models.rapid_ocr_model import RapidOcrModel
|
36
43
|
from docling.models.table_structure_model import TableStructureModel
|
37
44
|
from docling.models.tesseract_ocr_cli_model import TesseractOcrCliModel
|
38
45
|
from docling.models.tesseract_ocr_model import TesseractOcrModel
|
39
46
|
from docling.pipeline.base_pipeline import PaginatedPipeline
|
47
|
+
from docling.utils.model_downloader import download_models
|
40
48
|
from docling.utils.profiling import ProfilingScope, TimeRecorder
|
41
49
|
|
42
50
|
_log = logging.getLogger(__name__)
|
43
51
|
|
44
52
|
|
45
53
|
class StandardPdfPipeline(PaginatedPipeline):
|
46
|
-
_layout_model_path =
|
47
|
-
_table_model_path =
|
54
|
+
_layout_model_path = LayoutModel._model_path
|
55
|
+
_table_model_path = TableStructureModel._model_path
|
48
56
|
|
49
57
|
def __init__(self, pipeline_options: PdfPipelineOptions):
|
50
58
|
super().__init__(pipeline_options)
|
51
59
|
self.pipeline_options: PdfPipelineOptions
|
52
60
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
self.artifacts_path = Path(pipeline_options.artifacts_path)
|
61
|
+
artifacts_path: Optional[Path] = None
|
62
|
+
if pipeline_options.artifacts_path is not None:
|
63
|
+
artifacts_path = Path(pipeline_options.artifacts_path).expanduser()
|
57
64
|
|
58
65
|
self.keep_images = (
|
59
66
|
self.pipeline_options.generate_page_images
|
@@ -63,7 +70,7 @@ class StandardPdfPipeline(PaginatedPipeline):
|
|
63
70
|
|
64
71
|
self.glm_model = GlmModel(options=GlmOptions())
|
65
72
|
|
66
|
-
if (ocr_model := self.get_ocr_model()) is None:
|
73
|
+
if (ocr_model := self.get_ocr_model(artifacts_path=artifacts_path)) is None:
|
67
74
|
raise RuntimeError(
|
68
75
|
f"The specified OCR kind is not supported: {pipeline_options.ocr_options.kind}."
|
69
76
|
)
|
@@ -79,15 +86,13 @@ class StandardPdfPipeline(PaginatedPipeline):
|
|
79
86
|
ocr_model,
|
80
87
|
# Layout model
|
81
88
|
LayoutModel(
|
82
|
-
artifacts_path=
|
83
|
-
/ StandardPdfPipeline._layout_model_path,
|
89
|
+
artifacts_path=artifacts_path,
|
84
90
|
accelerator_options=pipeline_options.accelerator_options,
|
85
91
|
),
|
86
92
|
# Table structure model
|
87
93
|
TableStructureModel(
|
88
94
|
enabled=pipeline_options.do_table_structure,
|
89
|
-
artifacts_path=
|
90
|
-
/ StandardPdfPipeline._table_model_path,
|
95
|
+
artifacts_path=artifacts_path,
|
91
96
|
options=pipeline_options.table_structure_options,
|
92
97
|
accelerator_options=pipeline_options.accelerator_options,
|
93
98
|
),
|
@@ -95,13 +100,22 @@ class StandardPdfPipeline(PaginatedPipeline):
|
|
95
100
|
PageAssembleModel(options=PageAssembleOptions()),
|
96
101
|
]
|
97
102
|
|
103
|
+
# Picture description model
|
104
|
+
if (
|
105
|
+
picture_description_model := self.get_picture_description_model(
|
106
|
+
artifacts_path=artifacts_path
|
107
|
+
)
|
108
|
+
) is None:
|
109
|
+
raise RuntimeError(
|
110
|
+
f"The specified picture description kind is not supported: {pipeline_options.picture_description_options.kind}."
|
111
|
+
)
|
112
|
+
|
98
113
|
self.enrichment_pipe = [
|
99
|
-
# Other models working on `NodeItem` elements in the DoclingDocument
|
100
114
|
# Code Formula Enrichment Model
|
101
115
|
CodeFormulaModel(
|
102
116
|
enabled=pipeline_options.do_code_enrichment
|
103
117
|
or pipeline_options.do_formula_enrichment,
|
104
|
-
artifacts_path=
|
118
|
+
artifacts_path=artifacts_path,
|
105
119
|
options=CodeFormulaModelOptions(
|
106
120
|
do_code_enrichment=pipeline_options.do_code_enrichment,
|
107
121
|
do_formula_enrichment=pipeline_options.do_formula_enrichment,
|
@@ -111,15 +125,18 @@ class StandardPdfPipeline(PaginatedPipeline):
|
|
111
125
|
# Document Picture Classifier
|
112
126
|
DocumentPictureClassifier(
|
113
127
|
enabled=pipeline_options.do_picture_classification,
|
114
|
-
artifacts_path=
|
128
|
+
artifacts_path=artifacts_path,
|
115
129
|
options=DocumentPictureClassifierOptions(),
|
116
130
|
accelerator_options=pipeline_options.accelerator_options,
|
117
131
|
),
|
132
|
+
# Document Picture description
|
133
|
+
picture_description_model,
|
118
134
|
]
|
119
135
|
|
120
136
|
if (
|
121
137
|
self.pipeline_options.do_formula_enrichment
|
122
138
|
or self.pipeline_options.do_code_enrichment
|
139
|
+
or self.pipeline_options.do_picture_description
|
123
140
|
):
|
124
141
|
self.keep_backend = True
|
125
142
|
|
@@ -127,23 +144,24 @@ class StandardPdfPipeline(PaginatedPipeline):
|
|
127
144
|
def download_models_hf(
|
128
145
|
local_dir: Optional[Path] = None, force: bool = False
|
129
146
|
) -> Path:
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
force_download=force,
|
137
|
-
local_dir=local_dir,
|
138
|
-
revision="v2.1.0",
|
147
|
+
warnings.warn(
|
148
|
+
"The usage of StandardPdfPipeline.download_models_hf() is deprecated "
|
149
|
+
"use instead the utility `docling-tools models download`, or "
|
150
|
+
"the upstream method docling.utils.models_downloader.download_all()",
|
151
|
+
DeprecationWarning,
|
152
|
+
stacklevel=3,
|
139
153
|
)
|
140
154
|
|
141
|
-
|
155
|
+
output_dir = download_models(output_dir=local_dir, force=force, progress=False)
|
156
|
+
return output_dir
|
142
157
|
|
143
|
-
def get_ocr_model(
|
158
|
+
def get_ocr_model(
|
159
|
+
self, artifacts_path: Optional[Path] = None
|
160
|
+
) -> Optional[BaseOcrModel]:
|
144
161
|
if isinstance(self.pipeline_options.ocr_options, EasyOcrOptions):
|
145
162
|
return EasyOcrModel(
|
146
163
|
enabled=self.pipeline_options.do_ocr,
|
164
|
+
artifacts_path=artifacts_path,
|
147
165
|
options=self.pipeline_options.ocr_options,
|
148
166
|
accelerator_options=self.pipeline_options.accelerator_options,
|
149
167
|
)
|
@@ -174,6 +192,29 @@ class StandardPdfPipeline(PaginatedPipeline):
|
|
174
192
|
)
|
175
193
|
return None
|
176
194
|
|
195
|
+
def get_picture_description_model(
|
196
|
+
self, artifacts_path: Optional[Path] = None
|
197
|
+
) -> Optional[PictureDescriptionBaseModel]:
|
198
|
+
if isinstance(
|
199
|
+
self.pipeline_options.picture_description_options,
|
200
|
+
PictureDescriptionApiOptions,
|
201
|
+
):
|
202
|
+
return PictureDescriptionApiModel(
|
203
|
+
enabled=self.pipeline_options.do_picture_description,
|
204
|
+
options=self.pipeline_options.picture_description_options,
|
205
|
+
)
|
206
|
+
elif isinstance(
|
207
|
+
self.pipeline_options.picture_description_options,
|
208
|
+
PictureDescriptionVlmOptions,
|
209
|
+
):
|
210
|
+
return PictureDescriptionVlmModel(
|
211
|
+
enabled=self.pipeline_options.do_picture_description,
|
212
|
+
artifacts_path=artifacts_path,
|
213
|
+
options=self.pipeline_options.picture_description_options,
|
214
|
+
accelerator_options=self.pipeline_options.accelerator_options,
|
215
|
+
)
|
216
|
+
return None
|
217
|
+
|
177
218
|
def initialize_page(self, conv_res: ConversionResult, page: Page) -> Page:
|
178
219
|
with TimeRecorder(conv_res, "page_init"):
|
179
220
|
page._backend = conv_res.input._backend.load_page(page.page_no) # type: ignore
|
@@ -0,0 +1,84 @@
|
|
1
|
+
import logging
|
2
|
+
from pathlib import Path
|
3
|
+
from typing import Optional
|
4
|
+
|
5
|
+
from docling.datamodel.pipeline_options import smolvlm_picture_description
|
6
|
+
from docling.datamodel.settings import settings
|
7
|
+
from docling.models.code_formula_model import CodeFormulaModel
|
8
|
+
from docling.models.document_picture_classifier import DocumentPictureClassifier
|
9
|
+
from docling.models.easyocr_model import EasyOcrModel
|
10
|
+
from docling.models.layout_model import LayoutModel
|
11
|
+
from docling.models.picture_description_vlm_model import PictureDescriptionVlmModel
|
12
|
+
from docling.models.table_structure_model import TableStructureModel
|
13
|
+
|
14
|
+
_log = logging.getLogger(__name__)
|
15
|
+
|
16
|
+
|
17
|
+
def download_models(
|
18
|
+
output_dir: Optional[Path] = None,
|
19
|
+
*,
|
20
|
+
force: bool = False,
|
21
|
+
progress: bool = False,
|
22
|
+
with_layout: bool = True,
|
23
|
+
with_tableformer: bool = True,
|
24
|
+
with_code_formula: bool = True,
|
25
|
+
with_picture_classifier: bool = True,
|
26
|
+
with_smolvlm: bool = True,
|
27
|
+
with_easyocr: bool = True,
|
28
|
+
):
|
29
|
+
if output_dir is None:
|
30
|
+
output_dir = settings.cache_dir / "models"
|
31
|
+
|
32
|
+
# Make sure the folder exists
|
33
|
+
output_dir.mkdir(exist_ok=True, parents=True)
|
34
|
+
|
35
|
+
if with_layout:
|
36
|
+
_log.info(f"Downloading layout model...")
|
37
|
+
LayoutModel.download_models(
|
38
|
+
local_dir=output_dir / LayoutModel._model_repo_folder,
|
39
|
+
force=force,
|
40
|
+
progress=progress,
|
41
|
+
)
|
42
|
+
|
43
|
+
if with_tableformer:
|
44
|
+
_log.info(f"Downloading tableformer model...")
|
45
|
+
TableStructureModel.download_models(
|
46
|
+
local_dir=output_dir / TableStructureModel._model_repo_folder,
|
47
|
+
force=force,
|
48
|
+
progress=progress,
|
49
|
+
)
|
50
|
+
|
51
|
+
if with_picture_classifier:
|
52
|
+
_log.info(f"Downloading picture classifier model...")
|
53
|
+
DocumentPictureClassifier.download_models(
|
54
|
+
local_dir=output_dir / DocumentPictureClassifier._model_repo_folder,
|
55
|
+
force=force,
|
56
|
+
progress=progress,
|
57
|
+
)
|
58
|
+
|
59
|
+
if with_code_formula:
|
60
|
+
_log.info(f"Downloading code formula model...")
|
61
|
+
CodeFormulaModel.download_models(
|
62
|
+
local_dir=output_dir / CodeFormulaModel._model_repo_folder,
|
63
|
+
force=force,
|
64
|
+
progress=progress,
|
65
|
+
)
|
66
|
+
|
67
|
+
if with_smolvlm:
|
68
|
+
_log.info(f"Downloading SmolVlm model...")
|
69
|
+
PictureDescriptionVlmModel.download_models(
|
70
|
+
repo_id=smolvlm_picture_description.repo_id,
|
71
|
+
local_dir=output_dir / smolvlm_picture_description.repo_cache_folder,
|
72
|
+
force=force,
|
73
|
+
progress=progress,
|
74
|
+
)
|
75
|
+
|
76
|
+
if with_easyocr:
|
77
|
+
_log.info(f"Downloading easyocr models...")
|
78
|
+
EasyOcrModel.download_models(
|
79
|
+
local_dir=output_dir / EasyOcrModel._model_repo_folder,
|
80
|
+
force=force,
|
81
|
+
progress=progress,
|
82
|
+
)
|
83
|
+
|
84
|
+
return output_dir
|
docling/utils/utils.py
CHANGED
@@ -4,6 +4,9 @@ from itertools import islice
|
|
4
4
|
from pathlib import Path
|
5
5
|
from typing import List, Union
|
6
6
|
|
7
|
+
import requests
|
8
|
+
from tqdm import tqdm
|
9
|
+
|
7
10
|
|
8
11
|
def chunkify(iterator, chunk_size):
|
9
12
|
"""Yield successive chunks of chunk_size from the iterable."""
|
@@ -39,3 +42,24 @@ def create_hash(string: str):
|
|
39
42
|
hasher.update(string.encode("utf-8"))
|
40
43
|
|
41
44
|
return hasher.hexdigest()
|
45
|
+
|
46
|
+
|
47
|
+
def download_url_with_progress(url: str, progress: bool = False) -> BytesIO:
|
48
|
+
buf = BytesIO()
|
49
|
+
with requests.get(url, stream=True, allow_redirects=True) as response:
|
50
|
+
total_size = int(response.headers.get("content-length", 0))
|
51
|
+
progress_bar = tqdm(
|
52
|
+
total=total_size,
|
53
|
+
unit="B",
|
54
|
+
unit_scale=True,
|
55
|
+
unit_divisor=1024,
|
56
|
+
disable=(not progress),
|
57
|
+
)
|
58
|
+
|
59
|
+
for chunk in response.iter_content(10 * 1024):
|
60
|
+
buf.write(chunk)
|
61
|
+
progress_bar.update(len(chunk))
|
62
|
+
progress_bar.close()
|
63
|
+
|
64
|
+
buf.seek(0)
|
65
|
+
return buf
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: docling
|
3
|
-
Version: 2.
|
3
|
+
Version: 2.20.0
|
4
4
|
Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
|
5
5
|
Home-page: https://github.com/DS4SD/docling
|
6
6
|
License: MIT
|
@@ -24,12 +24,13 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
24
24
|
Provides-Extra: ocrmac
|
25
25
|
Provides-Extra: rapidocr
|
26
26
|
Provides-Extra: tesserocr
|
27
|
-
|
27
|
+
Provides-Extra: vlm
|
28
|
+
Requires-Dist: beautifulsoup4 (>=4.12.3,<4.13.0)
|
28
29
|
Requires-Dist: certifi (>=2024.7.4)
|
29
30
|
Requires-Dist: deepsearch-glm (>=1.0.0,<2.0.0)
|
30
|
-
Requires-Dist: docling-core[chunking] (>=2.17.
|
31
|
+
Requires-Dist: docling-core[chunking] (>=2.17.2,<3.0.0)
|
31
32
|
Requires-Dist: docling-ibm-models (>=3.3.0,<4.0.0)
|
32
|
-
Requires-Dist: docling-parse (>=3.
|
33
|
+
Requires-Dist: docling-parse (>=3.3.0,<4.0.0)
|
33
34
|
Requires-Dist: easyocr (>=1.7,<2.0)
|
34
35
|
Requires-Dist: filetype (>=1.2.0,<2.0.0)
|
35
36
|
Requires-Dist: huggingface_hub (>=0.23,<1)
|
@@ -52,6 +53,9 @@ Requires-Dist: rtree (>=1.3.0,<2.0.0)
|
|
52
53
|
Requires-Dist: scipy (>=1.6.0,<1.14.0) ; python_version < "3.10"
|
53
54
|
Requires-Dist: scipy (>=1.6.0,<2.0.0) ; python_version >= "3.10"
|
54
55
|
Requires-Dist: tesserocr (>=2.7.1,<3.0.0) ; extra == "tesserocr"
|
56
|
+
Requires-Dist: tqdm (>=4.65.0,<5.0.0)
|
57
|
+
Requires-Dist: transformers (>=4.42.0,<4.43.0) ; (sys_platform == "darwin" and platform_machine == "x86_64") and (extra == "vlm")
|
58
|
+
Requires-Dist: transformers (>=4.46.0,<5.0.0) ; (sys_platform != "darwin" or platform_machine != "x86_64") and (extra == "vlm")
|
55
59
|
Requires-Dist: typer (>=0.12.5,<0.13.0)
|
56
60
|
Project-URL: Repository, https://github.com/DS4SD/docling
|
57
61
|
Description-Content-Type: text/markdown
|
@@ -7,10 +7,10 @@ docling/backend/docling_parse_v2_backend.py,sha256=IpwrBrtLGwNRl5AYO-o3NjEfNRsAk
|
|
7
7
|
docling/backend/html_backend.py,sha256=YTPLZiEEEuGaP6G62skK3wXJ0KftuqBCl8erNXeJyoE,15893
|
8
8
|
docling/backend/json/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
9
|
docling/backend/json/docling_json_backend.py,sha256=LlFMVoZrrCfVwbDuRbNN4Xg96Lujh4xxrTBt9jGhY9I,1984
|
10
|
-
docling/backend/md_backend.py,sha256=
|
10
|
+
docling/backend/md_backend.py,sha256=NaVfcnEH-5bwVovjn76EobF6B6Wm8AhaTZ4E8k0TUPo,16826
|
11
11
|
docling/backend/msexcel_backend.py,sha256=lyJc4ShJGAN2ZfNTTuhdYTF-44cZsGyn_8Djstp3IEU,12700
|
12
12
|
docling/backend/mspowerpoint_backend.py,sha256=esAyaaQe17BQFweGAGJHvImKETefY0BpvfpUSECC49w,16424
|
13
|
-
docling/backend/msword_backend.py,sha256=
|
13
|
+
docling/backend/msword_backend.py,sha256=V4miLIcOH8DDlSCm25F_DALBW60Uf9JoSS0TB4yrQBw,20591
|
14
14
|
docling/backend/pdf_backend.py,sha256=17Pr8dWsD1C4FYUprrwMM9trDGW-JYLjrcScx1Ul4io,2048
|
15
15
|
docling/backend/pypdfium2_backend.py,sha256=QSPfp903ZtSpoNqPmcIek0HmvETrJ1kkwrdxnF5pjS0,9014
|
16
16
|
docling/backend/xml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -18,45 +18,51 @@ docling/backend/xml/pubmed_backend.py,sha256=LMnpowjnxa5SydfNC00Ll840BYraL8dCJu-
|
|
18
18
|
docling/backend/xml/uspto_backend.py,sha256=a5GxWLj2SUR5Of8TWJinhef1gKyaQSjHPVXvGiN8yG8,70324
|
19
19
|
docling/chunking/__init__.py,sha256=h83TDs0AuOV6oEPLAPrn9dpGKiU-2Vg6IRNo4cv6GDA,346
|
20
20
|
docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
21
|
-
docling/cli/main.py,sha256=
|
21
|
+
docling/cli/main.py,sha256=T9Tibi1z7YaU8jyl0HrN3h4y0cALvFVg3-4sanWd35M,16590
|
22
|
+
docling/cli/models.py,sha256=Z4IEuaXE9el5PuI6_6mR4D5Sn3y8WZzBtoIJPi6jL_s,3188
|
23
|
+
docling/cli/tools.py,sha256=QhtRxQG0TVrfsMqdv5i7J0_qQy1ZZyWYnHPwJl7b5oY,322
|
22
24
|
docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
23
25
|
docling/datamodel/base_models.py,sha256=vewP1X99qfAwiUsiC2m8CBDGiQPsGyp_WkKJHYPoYn4,7026
|
24
26
|
docling/datamodel/document.py,sha256=HkmvQKW3QSx3tAqPTnXiJbD_y1EVwR-WE3n6Gq8g1NY,13428
|
25
|
-
docling/datamodel/pipeline_options.py,sha256=
|
26
|
-
docling/datamodel/settings.py,sha256=
|
27
|
+
docling/datamodel/pipeline_options.py,sha256=SRlZYIXdmpFO_vyziOonttZvftRQMiFvltxNlolzYW8,9642
|
28
|
+
docling/datamodel/settings.py,sha256=pJi9OBqZQhsNi7RwJWQFRDKGhm3u679iN76psA3VtsY,1817
|
27
29
|
docling/document_converter.py,sha256=qaldb7Thqk59RdE-RTGtj1M7l5UzaBdnxIvGoQ7lTeo,12876
|
28
30
|
docling/exceptions.py,sha256=-FoP46rFJgz_jn5uDv2V052udEEg8gckk6uhoItchXc,85
|
29
31
|
docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
30
|
-
docling/models/base_model.py,sha256=
|
32
|
+
docling/models/base_model.py,sha256=q_lKeQ0FT70idXlZ3JgyAv8dA8J3bZWBSDBkqTzy0lo,2679
|
31
33
|
docling/models/base_ocr_model.py,sha256=YiUMvdjnHw9SHjnfJKT5INrPMoIGEf_Z2OApfl_VRTE,6919
|
32
|
-
docling/models/code_formula_model.py,sha256=
|
33
|
-
docling/models/document_picture_classifier.py,sha256=
|
34
|
+
docling/models/code_formula_model.py,sha256=6grbRPWaLljadheT5s4omdT6hmXfin4gJU17csWvhjY,8611
|
35
|
+
docling/models/document_picture_classifier.py,sha256=6I_j6fG5fnhIV6rqN31LYikNTZyg5isXrVs0GIqHDaY,6235
|
34
36
|
docling/models/ds_glm_model.py,sha256=CkhsP0cEWwm4wb1g3cLFriVGpVtELiUK3REDMkPwAMw,13028
|
35
|
-
docling/models/easyocr_model.py,sha256=
|
36
|
-
docling/models/layout_model.py,sha256=
|
37
|
+
docling/models/easyocr_model.py,sha256=ePg1exAXeOzkBRBT-6PBSmqKFmnNFkCEd4HNDsGVgLM,6860
|
38
|
+
docling/models/layout_model.py,sha256=7fQWipGV1HDrvbP4uOKa9QAicQl89jp7lailQmbFL3w,7804
|
37
39
|
docling/models/ocr_mac_model.py,sha256=bLP14UUmZcSzjDe-HLj-mtksTuBmsCTg2C1wCxUpan0,4502
|
38
40
|
docling/models/page_assemble_model.py,sha256=c5KLKwkUIdW0JcDHizWsqrpb5x_3DK28x82Q8o-3VJM,5968
|
39
41
|
docling/models/page_preprocessing_model.py,sha256=1gVrZjObKxAvXkkKvXnIFApPOggzgiTFPtt1CGbMbSs,2763
|
42
|
+
docling/models/picture_description_api_model.py,sha256=Tw4B92_5Fa5FKZeGmgUWmSFMAw5D58uzQM2XRTIXOXI,3003
|
43
|
+
docling/models/picture_description_base_model.py,sha256=rZLIW1_CaRAw_EP3zuI8ktC0ZxwO7yubhh2RkaC_8e8,1910
|
44
|
+
docling/models/picture_description_vlm_model.py,sha256=a2vYUdlcA0--_8neY0tTiU8reCf29NCbVMKwWdMy2QQ,3653
|
40
45
|
docling/models/rapid_ocr_model.py,sha256=2HXmurNRPP6qyqn7U5h9NQIs8zi0TMHf56CpcKQk0fU,5038
|
41
|
-
docling/models/table_structure_model.py,sha256=
|
46
|
+
docling/models/table_structure_model.py,sha256=UIqWlw_9JNfGsO86c00rPb4GCg-yNliKEwyhCqlsZbM,11225
|
42
47
|
docling/models/tesseract_ocr_cli_model.py,sha256=b2Is5x2gZLS6mQWnKe0y7p6UU6hRTHDfoH4D2RQ5mx0,9310
|
43
48
|
docling/models/tesseract_ocr_model.py,sha256=BN85u-4a-xzUY7Iw21Ib8L8kx4mgbDGiUtxBelLiJm8,8513
|
44
49
|
docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
45
|
-
docling/pipeline/base_pipeline.py,sha256=
|
50
|
+
docling/pipeline/base_pipeline.py,sha256=9ABK-Cr235bxE5vweoIA5rgBZV_EF8qFxAqLI27H_Pg,8749
|
46
51
|
docling/pipeline/simple_pipeline.py,sha256=mZqANqUtAOFAyqQEShErQnAUz6tJFOl6zVoazEDJ_wE,2254
|
47
|
-
docling/pipeline/standard_pdf_pipeline.py,sha256=
|
52
|
+
docling/pipeline/standard_pdf_pipeline.py,sha256=UKB9COXmVGvUhTVwsKsQTjNKi86TRdGLJjknwNZPwNo,12366
|
48
53
|
docling/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
49
54
|
docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
50
55
|
docling/utils/accelerator_utils.py,sha256=ZjULCn-qhxqx3frF-rJmAlWdzqgUMxH5utLHbSPev80,1367
|
51
56
|
docling/utils/export.py,sha256=KyGF1BVDHPFfHVXZc8vegsWlFfOgGPP2YckWpTadyI8,4694
|
52
57
|
docling/utils/glm_utils.py,sha256=uyCoFTX9FbS1Ke0aSlkdzGLUt08dZfkgriWadkyLiiA,11856
|
53
58
|
docling/utils/layout_postprocessor.py,sha256=urRzeF9PrKiMBvA6DdHHwyLxG06CMhelgJeV5B1l6l0,24258
|
59
|
+
docling/utils/model_downloader.py,sha256=XK3ozGXyQcNPvrSsevTwR9VnY41JWovlsGk_ZBnu6FU,2787
|
54
60
|
docling/utils/ocr_utils.py,sha256=F7iOOjqolUcImUzir4qjDQd4QWSO3s6JC4WRn3U7uY4,263
|
55
61
|
docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
|
56
|
-
docling/utils/utils.py,sha256=
|
62
|
+
docling/utils/utils.py,sha256=0ozCk7zUkYzxRVmYoIB2zA1lqjQOuaQzxfGuf1wmKW4,1866
|
57
63
|
docling/utils/visualization.py,sha256=4pn-80fVuE04ken7hUg5Ar47ndRSL9MWBgdHM-1g1zU,2735
|
58
|
-
docling-2.
|
59
|
-
docling-2.
|
60
|
-
docling-2.
|
61
|
-
docling-2.
|
62
|
-
docling-2.
|
64
|
+
docling-2.20.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
|
65
|
+
docling-2.20.0.dist-info/METADATA,sha256=9g0XmFk8hxdswqwT5jWnrUPKswGA26JDlyV5hqJ_tCc,8720
|
66
|
+
docling-2.20.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
67
|
+
docling-2.20.0.dist-info/entry_points.txt,sha256=cFrINXsORijdm2EWJzf1m9_rDxH9G9W1fP385-9atY4,84
|
68
|
+
docling-2.20.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|