docling 2.17.0__py3-none-any.whl → 2.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docling/backend/html_backend.py +18 -18
- docling/backend/md_backend.py +144 -75
- docling/backend/mspowerpoint_backend.py +39 -27
- docling/backend/msword_backend.py +173 -131
- docling/cli/main.py +8 -0
- docling/cli/models.py +105 -0
- docling/cli/tools.py +17 -0
- docling/datamodel/document.py +2 -0
- docling/datamodel/settings.py +18 -1
- docling/document_converter.py +12 -2
- docling/models/base_model.py +3 -0
- docling/models/code_formula_model.py +15 -9
- docling/models/document_picture_classifier.py +11 -8
- docling/models/easyocr_model.py +50 -3
- docling/models/layout_model.py +49 -3
- docling/models/table_structure_model.py +53 -7
- docling/pipeline/base_pipeline.py +4 -2
- docling/pipeline/standard_pdf_pipeline.py +25 -24
- docling/utils/glm_utils.py +4 -0
- docling/utils/model_downloader.py +72 -0
- docling/utils/utils.py +24 -0
- {docling-2.17.0.dist-info → docling-2.19.0.dist-info}/METADATA +11 -5
- {docling-2.17.0.dist-info → docling-2.19.0.dist-info}/RECORD +26 -23
- {docling-2.17.0.dist-info → docling-2.19.0.dist-info}/WHEEL +1 -1
- {docling-2.17.0.dist-info → docling-2.19.0.dist-info}/entry_points.txt +1 -0
- {docling-2.17.0.dist-info → docling-2.19.0.dist-info}/LICENSE +0 -0
@@ -1,5 +1,6 @@
|
|
1
1
|
import logging
|
2
2
|
import sys
|
3
|
+
import warnings
|
3
4
|
from pathlib import Path
|
4
5
|
from typing import Optional
|
5
6
|
|
@@ -17,6 +18,7 @@ from docling.datamodel.pipeline_options import (
|
|
17
18
|
TesseractCliOcrOptions,
|
18
19
|
TesseractOcrOptions,
|
19
20
|
)
|
21
|
+
from docling.datamodel.settings import settings
|
20
22
|
from docling.models.base_ocr_model import BaseOcrModel
|
21
23
|
from docling.models.code_formula_model import CodeFormulaModel, CodeFormulaModelOptions
|
22
24
|
from docling.models.document_picture_classifier import (
|
@@ -37,23 +39,23 @@ from docling.models.table_structure_model import TableStructureModel
|
|
37
39
|
from docling.models.tesseract_ocr_cli_model import TesseractOcrCliModel
|
38
40
|
from docling.models.tesseract_ocr_model import TesseractOcrModel
|
39
41
|
from docling.pipeline.base_pipeline import PaginatedPipeline
|
42
|
+
from docling.utils.model_downloader import download_models
|
40
43
|
from docling.utils.profiling import ProfilingScope, TimeRecorder
|
41
44
|
|
42
45
|
_log = logging.getLogger(__name__)
|
43
46
|
|
44
47
|
|
45
48
|
class StandardPdfPipeline(PaginatedPipeline):
|
46
|
-
_layout_model_path =
|
47
|
-
_table_model_path =
|
49
|
+
_layout_model_path = LayoutModel._model_path
|
50
|
+
_table_model_path = TableStructureModel._model_path
|
48
51
|
|
49
52
|
def __init__(self, pipeline_options: PdfPipelineOptions):
|
50
53
|
super().__init__(pipeline_options)
|
51
54
|
self.pipeline_options: PdfPipelineOptions
|
52
55
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
self.artifacts_path = Path(pipeline_options.artifacts_path)
|
56
|
+
artifacts_path: Optional[Path] = None
|
57
|
+
if pipeline_options.artifacts_path is not None:
|
58
|
+
artifacts_path = Path(pipeline_options.artifacts_path).expanduser()
|
57
59
|
|
58
60
|
self.keep_images = (
|
59
61
|
self.pipeline_options.generate_page_images
|
@@ -63,7 +65,7 @@ class StandardPdfPipeline(PaginatedPipeline):
|
|
63
65
|
|
64
66
|
self.glm_model = GlmModel(options=GlmOptions())
|
65
67
|
|
66
|
-
if (ocr_model := self.get_ocr_model()) is None:
|
68
|
+
if (ocr_model := self.get_ocr_model(artifacts_path=artifacts_path)) is None:
|
67
69
|
raise RuntimeError(
|
68
70
|
f"The specified OCR kind is not supported: {pipeline_options.ocr_options.kind}."
|
69
71
|
)
|
@@ -79,15 +81,13 @@ class StandardPdfPipeline(PaginatedPipeline):
|
|
79
81
|
ocr_model,
|
80
82
|
# Layout model
|
81
83
|
LayoutModel(
|
82
|
-
artifacts_path=
|
83
|
-
/ StandardPdfPipeline._layout_model_path,
|
84
|
+
artifacts_path=artifacts_path,
|
84
85
|
accelerator_options=pipeline_options.accelerator_options,
|
85
86
|
),
|
86
87
|
# Table structure model
|
87
88
|
TableStructureModel(
|
88
89
|
enabled=pipeline_options.do_table_structure,
|
89
|
-
artifacts_path=
|
90
|
-
/ StandardPdfPipeline._table_model_path,
|
90
|
+
artifacts_path=artifacts_path,
|
91
91
|
options=pipeline_options.table_structure_options,
|
92
92
|
accelerator_options=pipeline_options.accelerator_options,
|
93
93
|
),
|
@@ -101,7 +101,7 @@ class StandardPdfPipeline(PaginatedPipeline):
|
|
101
101
|
CodeFormulaModel(
|
102
102
|
enabled=pipeline_options.do_code_enrichment
|
103
103
|
or pipeline_options.do_formula_enrichment,
|
104
|
-
artifacts_path=
|
104
|
+
artifacts_path=artifacts_path,
|
105
105
|
options=CodeFormulaModelOptions(
|
106
106
|
do_code_enrichment=pipeline_options.do_code_enrichment,
|
107
107
|
do_formula_enrichment=pipeline_options.do_formula_enrichment,
|
@@ -111,7 +111,7 @@ class StandardPdfPipeline(PaginatedPipeline):
|
|
111
111
|
# Document Picture Classifier
|
112
112
|
DocumentPictureClassifier(
|
113
113
|
enabled=pipeline_options.do_picture_classification,
|
114
|
-
artifacts_path=
|
114
|
+
artifacts_path=artifacts_path,
|
115
115
|
options=DocumentPictureClassifierOptions(),
|
116
116
|
accelerator_options=pipeline_options.accelerator_options,
|
117
117
|
),
|
@@ -127,23 +127,24 @@ class StandardPdfPipeline(PaginatedPipeline):
|
|
127
127
|
def download_models_hf(
|
128
128
|
local_dir: Optional[Path] = None, force: bool = False
|
129
129
|
) -> Path:
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
force_download=force,
|
137
|
-
local_dir=local_dir,
|
138
|
-
revision="v2.1.0",
|
130
|
+
warnings.warn(
|
131
|
+
"The usage of StandardPdfPipeline.download_models_hf() is deprecated "
|
132
|
+
"use instead the utility `docling-tools models download`, or "
|
133
|
+
"the upstream method docling.utils.models_downloader.download_all()",
|
134
|
+
DeprecationWarning,
|
135
|
+
stacklevel=3,
|
139
136
|
)
|
140
137
|
|
141
|
-
|
138
|
+
output_dir = download_models(output_dir=local_dir, force=force, progress=False)
|
139
|
+
return output_dir
|
142
140
|
|
143
|
-
def get_ocr_model(
|
141
|
+
def get_ocr_model(
|
142
|
+
self, artifacts_path: Optional[Path] = None
|
143
|
+
) -> Optional[BaseOcrModel]:
|
144
144
|
if isinstance(self.pipeline_options.ocr_options, EasyOcrOptions):
|
145
145
|
return EasyOcrModel(
|
146
146
|
enabled=self.pipeline_options.do_ocr,
|
147
|
+
artifacts_path=artifacts_path,
|
147
148
|
options=self.pipeline_options.ocr_options,
|
148
149
|
accelerator_options=self.pipeline_options.accelerator_options,
|
149
150
|
)
|
docling/utils/glm_utils.py
CHANGED
@@ -307,6 +307,10 @@ def to_docling_document(doc_glm, update_name_label=False) -> DoclingDocument:
|
|
307
307
|
current_list = None
|
308
308
|
|
309
309
|
doc.add_code(text=text, prov=prov)
|
310
|
+
elif label == DocItemLabel.FORMULA:
|
311
|
+
current_list = None
|
312
|
+
|
313
|
+
doc.add_text(label=DocItemLabel.FORMULA, text="", orig=text, prov=prov)
|
310
314
|
else:
|
311
315
|
current_list = None
|
312
316
|
|
@@ -0,0 +1,72 @@
|
|
1
|
+
import logging
|
2
|
+
from pathlib import Path
|
3
|
+
from typing import Optional
|
4
|
+
|
5
|
+
from docling.datamodel.settings import settings
|
6
|
+
from docling.models.code_formula_model import CodeFormulaModel
|
7
|
+
from docling.models.document_picture_classifier import DocumentPictureClassifier
|
8
|
+
from docling.models.easyocr_model import EasyOcrModel
|
9
|
+
from docling.models.layout_model import LayoutModel
|
10
|
+
from docling.models.table_structure_model import TableStructureModel
|
11
|
+
|
12
|
+
_log = logging.getLogger(__name__)
|
13
|
+
|
14
|
+
|
15
|
+
def download_models(
|
16
|
+
output_dir: Optional[Path] = None,
|
17
|
+
*,
|
18
|
+
force: bool = False,
|
19
|
+
progress: bool = False,
|
20
|
+
with_layout: bool = True,
|
21
|
+
with_tableformer: bool = True,
|
22
|
+
with_code_formula: bool = True,
|
23
|
+
with_picture_classifier: bool = True,
|
24
|
+
with_easyocr: bool = True,
|
25
|
+
):
|
26
|
+
if output_dir is None:
|
27
|
+
output_dir = settings.cache_dir / "models"
|
28
|
+
|
29
|
+
# Make sure the folder exists
|
30
|
+
output_dir.mkdir(exist_ok=True, parents=True)
|
31
|
+
|
32
|
+
if with_layout:
|
33
|
+
_log.info(f"Downloading layout model...")
|
34
|
+
LayoutModel.download_models(
|
35
|
+
local_dir=output_dir / LayoutModel._model_repo_folder,
|
36
|
+
force=force,
|
37
|
+
progress=progress,
|
38
|
+
)
|
39
|
+
|
40
|
+
if with_tableformer:
|
41
|
+
_log.info(f"Downloading tableformer model...")
|
42
|
+
TableStructureModel.download_models(
|
43
|
+
local_dir=output_dir / TableStructureModel._model_repo_folder,
|
44
|
+
force=force,
|
45
|
+
progress=progress,
|
46
|
+
)
|
47
|
+
|
48
|
+
if with_picture_classifier:
|
49
|
+
_log.info(f"Downloading picture classifier model...")
|
50
|
+
DocumentPictureClassifier.download_models(
|
51
|
+
local_dir=output_dir / DocumentPictureClassifier._model_repo_folder,
|
52
|
+
force=force,
|
53
|
+
progress=progress,
|
54
|
+
)
|
55
|
+
|
56
|
+
if with_code_formula:
|
57
|
+
_log.info(f"Downloading code formula model...")
|
58
|
+
CodeFormulaModel.download_models(
|
59
|
+
local_dir=output_dir / CodeFormulaModel._model_repo_folder,
|
60
|
+
force=force,
|
61
|
+
progress=progress,
|
62
|
+
)
|
63
|
+
|
64
|
+
if with_easyocr:
|
65
|
+
_log.info(f"Downloading easyocr models...")
|
66
|
+
EasyOcrModel.download_models(
|
67
|
+
local_dir=output_dir / EasyOcrModel._model_repo_folder,
|
68
|
+
force=force,
|
69
|
+
progress=progress,
|
70
|
+
)
|
71
|
+
|
72
|
+
return output_dir
|
docling/utils/utils.py
CHANGED
@@ -4,6 +4,9 @@ from itertools import islice
|
|
4
4
|
from pathlib import Path
|
5
5
|
from typing import List, Union
|
6
6
|
|
7
|
+
import requests
|
8
|
+
from tqdm import tqdm
|
9
|
+
|
7
10
|
|
8
11
|
def chunkify(iterator, chunk_size):
|
9
12
|
"""Yield successive chunks of chunk_size from the iterable."""
|
@@ -39,3 +42,24 @@ def create_hash(string: str):
|
|
39
42
|
hasher.update(string.encode("utf-8"))
|
40
43
|
|
41
44
|
return hasher.hexdigest()
|
45
|
+
|
46
|
+
|
47
|
+
def download_url_with_progress(url: str, progress: bool = False) -> BytesIO:
|
48
|
+
buf = BytesIO()
|
49
|
+
with requests.get(url, stream=True, allow_redirects=True) as response:
|
50
|
+
total_size = int(response.headers.get("content-length", 0))
|
51
|
+
progress_bar = tqdm(
|
52
|
+
total=total_size,
|
53
|
+
unit="B",
|
54
|
+
unit_scale=True,
|
55
|
+
unit_divisor=1024,
|
56
|
+
disable=(not progress),
|
57
|
+
)
|
58
|
+
|
59
|
+
for chunk in response.iter_content(10 * 1024):
|
60
|
+
buf.write(chunk)
|
61
|
+
progress_bar.update(len(chunk))
|
62
|
+
progress_bar.close()
|
63
|
+
|
64
|
+
buf.seek(0)
|
65
|
+
return buf
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: docling
|
3
|
-
Version: 2.
|
3
|
+
Version: 2.19.0
|
4
4
|
Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
|
5
5
|
Home-page: https://github.com/DS4SD/docling
|
6
6
|
License: MIT
|
@@ -19,16 +19,17 @@ Classifier: Programming Language :: Python :: 3.9
|
|
19
19
|
Classifier: Programming Language :: Python :: 3.10
|
20
20
|
Classifier: Programming Language :: Python :: 3.11
|
21
21
|
Classifier: Programming Language :: Python :: 3.12
|
22
|
+
Classifier: Programming Language :: Python :: 3.13
|
22
23
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
23
24
|
Provides-Extra: ocrmac
|
24
25
|
Provides-Extra: rapidocr
|
25
26
|
Provides-Extra: tesserocr
|
26
|
-
Requires-Dist: beautifulsoup4 (>=4.12.3,<
|
27
|
+
Requires-Dist: beautifulsoup4 (>=4.12.3,<4.13.0)
|
27
28
|
Requires-Dist: certifi (>=2024.7.4)
|
28
29
|
Requires-Dist: deepsearch-glm (>=1.0.0,<2.0.0)
|
29
|
-
Requires-Dist: docling-core[chunking] (>=2.
|
30
|
+
Requires-Dist: docling-core[chunking] (>=2.17.2,<3.0.0)
|
30
31
|
Requires-Dist: docling-ibm-models (>=3.3.0,<4.0.0)
|
31
|
-
Requires-Dist: docling-parse (>=3.
|
32
|
+
Requires-Dist: docling-parse (>=3.3.0,<4.0.0)
|
32
33
|
Requires-Dist: easyocr (>=1.7,<2.0)
|
33
34
|
Requires-Dist: filetype (>=1.2.0,<2.0.0)
|
34
35
|
Requires-Dist: huggingface_hub (>=0.23,<1)
|
@@ -48,8 +49,10 @@ Requires-Dist: python-pptx (>=1.0.2,<2.0.0)
|
|
48
49
|
Requires-Dist: rapidocr-onnxruntime (>=1.4.0,<2.0.0) ; (python_version < "3.13") and (extra == "rapidocr")
|
49
50
|
Requires-Dist: requests (>=2.32.2,<3.0.0)
|
50
51
|
Requires-Dist: rtree (>=1.3.0,<2.0.0)
|
51
|
-
Requires-Dist: scipy (>=1.6.0,<
|
52
|
+
Requires-Dist: scipy (>=1.6.0,<1.14.0) ; python_version < "3.10"
|
53
|
+
Requires-Dist: scipy (>=1.6.0,<2.0.0) ; python_version >= "3.10"
|
52
54
|
Requires-Dist: tesserocr (>=2.7.1,<3.0.0) ; extra == "tesserocr"
|
55
|
+
Requires-Dist: tqdm (>=4.65.0,<5.0.0)
|
53
56
|
Requires-Dist: typer (>=0.12.5,<0.13.0)
|
54
57
|
Project-URL: Repository, https://github.com/DS4SD/docling
|
55
58
|
Description-Content-Type: text/markdown
|
@@ -94,6 +97,9 @@ Docling simplifies document processing, parsing diverse formats — including ad
|
|
94
97
|
### Coming soon
|
95
98
|
|
96
99
|
* 📝 Metadata extraction, including title, authors, references & language
|
100
|
+
* 📝 Inclusion of Visual Language Models ([SmolDocling](https://huggingface.co/blog/smolervlm#smoldocling))
|
101
|
+
* 📝 Chart understanding (Barchart, Piechart, LinePlot, etc)
|
102
|
+
* 📝 Complex chemistry understanding (Molecular structures)
|
97
103
|
|
98
104
|
## Installation
|
99
105
|
|
@@ -4,13 +4,13 @@ docling/backend/abstract_backend.py,sha256=1lNxzwDTn303aXduPDVmTyXn-5ZIoWMLYqNxA
|
|
4
4
|
docling/backend/asciidoc_backend.py,sha256=zyHxlG_BvlLwvpdNca3P6aopxOJZw8wbDFkJQQknNXk,14050
|
5
5
|
docling/backend/docling_parse_backend.py,sha256=hEEJibI1oJS0LAnFoIs6gMshS3bCqGtVxHnDNvBGZuA,7649
|
6
6
|
docling/backend/docling_parse_v2_backend.py,sha256=IpwrBrtLGwNRl5AYO-o3NjEfNRsAkuMhzvDt2HXb9Ko,8655
|
7
|
-
docling/backend/html_backend.py,sha256=
|
7
|
+
docling/backend/html_backend.py,sha256=YTPLZiEEEuGaP6G62skK3wXJ0KftuqBCl8erNXeJyoE,15893
|
8
8
|
docling/backend/json/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
9
|
docling/backend/json/docling_json_backend.py,sha256=LlFMVoZrrCfVwbDuRbNN4Xg96Lujh4xxrTBt9jGhY9I,1984
|
10
|
-
docling/backend/md_backend.py,sha256=
|
10
|
+
docling/backend/md_backend.py,sha256=NaVfcnEH-5bwVovjn76EobF6B6Wm8AhaTZ4E8k0TUPo,16826
|
11
11
|
docling/backend/msexcel_backend.py,sha256=lyJc4ShJGAN2ZfNTTuhdYTF-44cZsGyn_8Djstp3IEU,12700
|
12
|
-
docling/backend/mspowerpoint_backend.py,sha256=
|
13
|
-
docling/backend/msword_backend.py,sha256=
|
12
|
+
docling/backend/mspowerpoint_backend.py,sha256=esAyaaQe17BQFweGAGJHvImKETefY0BpvfpUSECC49w,16424
|
13
|
+
docling/backend/msword_backend.py,sha256=V4miLIcOH8DDlSCm25F_DALBW60Uf9JoSS0TB4yrQBw,20591
|
14
14
|
docling/backend/pdf_backend.py,sha256=17Pr8dWsD1C4FYUprrwMM9trDGW-JYLjrcScx1Ul4io,2048
|
15
15
|
docling/backend/pypdfium2_backend.py,sha256=QSPfp903ZtSpoNqPmcIek0HmvETrJ1kkwrdxnF5pjS0,9014
|
16
16
|
docling/backend/xml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -18,45 +18,48 @@ docling/backend/xml/pubmed_backend.py,sha256=LMnpowjnxa5SydfNC00Ll840BYraL8dCJu-
|
|
18
18
|
docling/backend/xml/uspto_backend.py,sha256=a5GxWLj2SUR5Of8TWJinhef1gKyaQSjHPVXvGiN8yG8,70324
|
19
19
|
docling/chunking/__init__.py,sha256=h83TDs0AuOV6oEPLAPrn9dpGKiU-2Vg6IRNo4cv6GDA,346
|
20
20
|
docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
21
|
-
docling/cli/main.py,sha256=
|
21
|
+
docling/cli/main.py,sha256=qShZI1f7WWn5T16YtFTeYY1CUucNjyGefIekCWvkAqc,16366
|
22
|
+
docling/cli/models.py,sha256=cjP13QZfgHZWPVJw3kJvSszJdDrRVWx-sJipZRfHEuQ,3102
|
23
|
+
docling/cli/tools.py,sha256=QhtRxQG0TVrfsMqdv5i7J0_qQy1ZZyWYnHPwJl7b5oY,322
|
22
24
|
docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
23
25
|
docling/datamodel/base_models.py,sha256=vewP1X99qfAwiUsiC2m8CBDGiQPsGyp_WkKJHYPoYn4,7026
|
24
|
-
docling/datamodel/document.py,sha256=
|
26
|
+
docling/datamodel/document.py,sha256=HkmvQKW3QSx3tAqPTnXiJbD_y1EVwR-WE3n6Gq8g1NY,13428
|
25
27
|
docling/datamodel/pipeline_options.py,sha256=f9-VQFgOdahyclGQgH_T8ZYBopkWsF_fbWbxo39ux3g,7888
|
26
|
-
docling/datamodel/settings.py,sha256=
|
27
|
-
docling/document_converter.py,sha256=
|
28
|
+
docling/datamodel/settings.py,sha256=pJi9OBqZQhsNi7RwJWQFRDKGhm3u679iN76psA3VtsY,1817
|
29
|
+
docling/document_converter.py,sha256=qaldb7Thqk59RdE-RTGtj1M7l5UzaBdnxIvGoQ7lTeo,12876
|
28
30
|
docling/exceptions.py,sha256=-FoP46rFJgz_jn5uDv2V052udEEg8gckk6uhoItchXc,85
|
29
31
|
docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
30
|
-
docling/models/base_model.py,sha256=
|
32
|
+
docling/models/base_model.py,sha256=IIf_PA933bdwHst3g_MOC4oiYQcSCIVOnxnCnN1NxEQ,2681
|
31
33
|
docling/models/base_ocr_model.py,sha256=YiUMvdjnHw9SHjnfJKT5INrPMoIGEf_Z2OApfl_VRTE,6919
|
32
|
-
docling/models/code_formula_model.py,sha256=
|
33
|
-
docling/models/document_picture_classifier.py,sha256=
|
34
|
+
docling/models/code_formula_model.py,sha256=6grbRPWaLljadheT5s4omdT6hmXfin4gJU17csWvhjY,8611
|
35
|
+
docling/models/document_picture_classifier.py,sha256=6I_j6fG5fnhIV6rqN31LYikNTZyg5isXrVs0GIqHDaY,6235
|
34
36
|
docling/models/ds_glm_model.py,sha256=CkhsP0cEWwm4wb1g3cLFriVGpVtELiUK3REDMkPwAMw,13028
|
35
|
-
docling/models/easyocr_model.py,sha256=
|
36
|
-
docling/models/layout_model.py,sha256=
|
37
|
+
docling/models/easyocr_model.py,sha256=dDy0iaR4KUrq7eFIQclMqUYap1B06PG4nC6RMlGYhSw,6886
|
38
|
+
docling/models/layout_model.py,sha256=7fQWipGV1HDrvbP4uOKa9QAicQl89jp7lailQmbFL3w,7804
|
37
39
|
docling/models/ocr_mac_model.py,sha256=bLP14UUmZcSzjDe-HLj-mtksTuBmsCTg2C1wCxUpan0,4502
|
38
40
|
docling/models/page_assemble_model.py,sha256=c5KLKwkUIdW0JcDHizWsqrpb5x_3DK28x82Q8o-3VJM,5968
|
39
41
|
docling/models/page_preprocessing_model.py,sha256=1gVrZjObKxAvXkkKvXnIFApPOggzgiTFPtt1CGbMbSs,2763
|
40
42
|
docling/models/rapid_ocr_model.py,sha256=2HXmurNRPP6qyqn7U5h9NQIs8zi0TMHf56CpcKQk0fU,5038
|
41
|
-
docling/models/table_structure_model.py,sha256=
|
43
|
+
docling/models/table_structure_model.py,sha256=UIqWlw_9JNfGsO86c00rPb4GCg-yNliKEwyhCqlsZbM,11225
|
42
44
|
docling/models/tesseract_ocr_cli_model.py,sha256=b2Is5x2gZLS6mQWnKe0y7p6UU6hRTHDfoH4D2RQ5mx0,9310
|
43
45
|
docling/models/tesseract_ocr_model.py,sha256=BN85u-4a-xzUY7Iw21Ib8L8kx4mgbDGiUtxBelLiJm8,8513
|
44
46
|
docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
45
|
-
docling/pipeline/base_pipeline.py,sha256=
|
47
|
+
docling/pipeline/base_pipeline.py,sha256=9ABK-Cr235bxE5vweoIA5rgBZV_EF8qFxAqLI27H_Pg,8749
|
46
48
|
docling/pipeline/simple_pipeline.py,sha256=mZqANqUtAOFAyqQEShErQnAUz6tJFOl6zVoazEDJ_wE,2254
|
47
|
-
docling/pipeline/standard_pdf_pipeline.py,sha256=
|
49
|
+
docling/pipeline/standard_pdf_pipeline.py,sha256=xVGLYmh677hKBSRCoHYAVn7drmowba2QGI8f-eEC5gs,10624
|
48
50
|
docling/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
49
51
|
docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
50
52
|
docling/utils/accelerator_utils.py,sha256=ZjULCn-qhxqx3frF-rJmAlWdzqgUMxH5utLHbSPev80,1367
|
51
53
|
docling/utils/export.py,sha256=KyGF1BVDHPFfHVXZc8vegsWlFfOgGPP2YckWpTadyI8,4694
|
52
|
-
docling/utils/glm_utils.py,sha256=
|
54
|
+
docling/utils/glm_utils.py,sha256=uyCoFTX9FbS1Ke0aSlkdzGLUt08dZfkgriWadkyLiiA,11856
|
53
55
|
docling/utils/layout_postprocessor.py,sha256=urRzeF9PrKiMBvA6DdHHwyLxG06CMhelgJeV5B1l6l0,24258
|
56
|
+
docling/utils/model_downloader.py,sha256=5jChSE88byGj7LvGNnB01qBw6n9ODJjnAS66PobRSJc,2267
|
54
57
|
docling/utils/ocr_utils.py,sha256=F7iOOjqolUcImUzir4qjDQd4QWSO3s6JC4WRn3U7uY4,263
|
55
58
|
docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
|
56
|
-
docling/utils/utils.py,sha256=
|
59
|
+
docling/utils/utils.py,sha256=0ozCk7zUkYzxRVmYoIB2zA1lqjQOuaQzxfGuf1wmKW4,1866
|
57
60
|
docling/utils/visualization.py,sha256=4pn-80fVuE04ken7hUg5Ar47ndRSL9MWBgdHM-1g1zU,2735
|
58
|
-
docling-2.
|
59
|
-
docling-2.
|
60
|
-
docling-2.
|
61
|
-
docling-2.
|
62
|
-
docling-2.
|
61
|
+
docling-2.19.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
|
62
|
+
docling-2.19.0.dist-info/METADATA,sha256=deXdwXb0i_n3pyEDbVGNQNw4APYoUVtXnkHmC-frXWI,8442
|
63
|
+
docling-2.19.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
64
|
+
docling-2.19.0.dist-info/entry_points.txt,sha256=cFrINXsORijdm2EWJzf1m9_rDxH9G9W1fP385-9atY4,84
|
65
|
+
docling-2.19.0.dist-info/RECORD,,
|
File without changes
|