docling 2.18.0__py3-none-any.whl → 2.20.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,6 @@
1
1
  import logging
2
2
  import sys
3
+ import warnings
3
4
  from pathlib import Path
4
5
  from typing import Optional
5
6
 
@@ -13,10 +14,13 @@ from docling.datamodel.pipeline_options import (
13
14
  EasyOcrOptions,
14
15
  OcrMacOptions,
15
16
  PdfPipelineOptions,
17
+ PictureDescriptionApiOptions,
18
+ PictureDescriptionVlmOptions,
16
19
  RapidOcrOptions,
17
20
  TesseractCliOcrOptions,
18
21
  TesseractOcrOptions,
19
22
  )
23
+ from docling.datamodel.settings import settings
20
24
  from docling.models.base_ocr_model import BaseOcrModel
21
25
  from docling.models.code_formula_model import CodeFormulaModel, CodeFormulaModelOptions
22
26
  from docling.models.document_picture_classifier import (
@@ -32,28 +36,31 @@ from docling.models.page_preprocessing_model import (
32
36
  PagePreprocessingModel,
33
37
  PagePreprocessingOptions,
34
38
  )
39
+ from docling.models.picture_description_api_model import PictureDescriptionApiModel
40
+ from docling.models.picture_description_base_model import PictureDescriptionBaseModel
41
+ from docling.models.picture_description_vlm_model import PictureDescriptionVlmModel
35
42
  from docling.models.rapid_ocr_model import RapidOcrModel
36
43
  from docling.models.table_structure_model import TableStructureModel
37
44
  from docling.models.tesseract_ocr_cli_model import TesseractOcrCliModel
38
45
  from docling.models.tesseract_ocr_model import TesseractOcrModel
39
46
  from docling.pipeline.base_pipeline import PaginatedPipeline
47
+ from docling.utils.model_downloader import download_models
40
48
  from docling.utils.profiling import ProfilingScope, TimeRecorder
41
49
 
42
50
  _log = logging.getLogger(__name__)
43
51
 
44
52
 
45
53
  class StandardPdfPipeline(PaginatedPipeline):
46
- _layout_model_path = "model_artifacts/layout"
47
- _table_model_path = "model_artifacts/tableformer"
54
+ _layout_model_path = LayoutModel._model_path
55
+ _table_model_path = TableStructureModel._model_path
48
56
 
49
57
  def __init__(self, pipeline_options: PdfPipelineOptions):
50
58
  super().__init__(pipeline_options)
51
59
  self.pipeline_options: PdfPipelineOptions
52
60
 
53
- if pipeline_options.artifacts_path is None:
54
- self.artifacts_path = self.download_models_hf()
55
- else:
56
- self.artifacts_path = Path(pipeline_options.artifacts_path)
61
+ artifacts_path: Optional[Path] = None
62
+ if pipeline_options.artifacts_path is not None:
63
+ artifacts_path = Path(pipeline_options.artifacts_path).expanduser()
57
64
 
58
65
  self.keep_images = (
59
66
  self.pipeline_options.generate_page_images
@@ -63,7 +70,7 @@ class StandardPdfPipeline(PaginatedPipeline):
63
70
 
64
71
  self.glm_model = GlmModel(options=GlmOptions())
65
72
 
66
- if (ocr_model := self.get_ocr_model()) is None:
73
+ if (ocr_model := self.get_ocr_model(artifacts_path=artifacts_path)) is None:
67
74
  raise RuntimeError(
68
75
  f"The specified OCR kind is not supported: {pipeline_options.ocr_options.kind}."
69
76
  )
@@ -79,15 +86,13 @@ class StandardPdfPipeline(PaginatedPipeline):
79
86
  ocr_model,
80
87
  # Layout model
81
88
  LayoutModel(
82
- artifacts_path=self.artifacts_path
83
- / StandardPdfPipeline._layout_model_path,
89
+ artifacts_path=artifacts_path,
84
90
  accelerator_options=pipeline_options.accelerator_options,
85
91
  ),
86
92
  # Table structure model
87
93
  TableStructureModel(
88
94
  enabled=pipeline_options.do_table_structure,
89
- artifacts_path=self.artifacts_path
90
- / StandardPdfPipeline._table_model_path,
95
+ artifacts_path=artifacts_path,
91
96
  options=pipeline_options.table_structure_options,
92
97
  accelerator_options=pipeline_options.accelerator_options,
93
98
  ),
@@ -95,13 +100,22 @@ class StandardPdfPipeline(PaginatedPipeline):
95
100
  PageAssembleModel(options=PageAssembleOptions()),
96
101
  ]
97
102
 
103
+ # Picture description model
104
+ if (
105
+ picture_description_model := self.get_picture_description_model(
106
+ artifacts_path=artifacts_path
107
+ )
108
+ ) is None:
109
+ raise RuntimeError(
110
+ f"The specified picture description kind is not supported: {pipeline_options.picture_description_options.kind}."
111
+ )
112
+
98
113
  self.enrichment_pipe = [
99
- # Other models working on `NodeItem` elements in the DoclingDocument
100
114
  # Code Formula Enrichment Model
101
115
  CodeFormulaModel(
102
116
  enabled=pipeline_options.do_code_enrichment
103
117
  or pipeline_options.do_formula_enrichment,
104
- artifacts_path=pipeline_options.artifacts_path,
118
+ artifacts_path=artifacts_path,
105
119
  options=CodeFormulaModelOptions(
106
120
  do_code_enrichment=pipeline_options.do_code_enrichment,
107
121
  do_formula_enrichment=pipeline_options.do_formula_enrichment,
@@ -111,15 +125,18 @@ class StandardPdfPipeline(PaginatedPipeline):
111
125
  # Document Picture Classifier
112
126
  DocumentPictureClassifier(
113
127
  enabled=pipeline_options.do_picture_classification,
114
- artifacts_path=pipeline_options.artifacts_path,
128
+ artifacts_path=artifacts_path,
115
129
  options=DocumentPictureClassifierOptions(),
116
130
  accelerator_options=pipeline_options.accelerator_options,
117
131
  ),
132
+ # Document Picture description
133
+ picture_description_model,
118
134
  ]
119
135
 
120
136
  if (
121
137
  self.pipeline_options.do_formula_enrichment
122
138
  or self.pipeline_options.do_code_enrichment
139
+ or self.pipeline_options.do_picture_description
123
140
  ):
124
141
  self.keep_backend = True
125
142
 
@@ -127,23 +144,24 @@ class StandardPdfPipeline(PaginatedPipeline):
127
144
  def download_models_hf(
128
145
  local_dir: Optional[Path] = None, force: bool = False
129
146
  ) -> Path:
130
- from huggingface_hub import snapshot_download
131
- from huggingface_hub.utils import disable_progress_bars
132
-
133
- disable_progress_bars()
134
- download_path = snapshot_download(
135
- repo_id="ds4sd/docling-models",
136
- force_download=force,
137
- local_dir=local_dir,
138
- revision="v2.1.0",
147
+ warnings.warn(
148
+ "The usage of StandardPdfPipeline.download_models_hf() is deprecated "
149
+ "use instead the utility `docling-tools models download`, or "
150
+ "the upstream method docling.utils.models_downloader.download_all()",
151
+ DeprecationWarning,
152
+ stacklevel=3,
139
153
  )
140
154
 
141
- return Path(download_path)
155
+ output_dir = download_models(output_dir=local_dir, force=force, progress=False)
156
+ return output_dir
142
157
 
143
- def get_ocr_model(self) -> Optional[BaseOcrModel]:
158
+ def get_ocr_model(
159
+ self, artifacts_path: Optional[Path] = None
160
+ ) -> Optional[BaseOcrModel]:
144
161
  if isinstance(self.pipeline_options.ocr_options, EasyOcrOptions):
145
162
  return EasyOcrModel(
146
163
  enabled=self.pipeline_options.do_ocr,
164
+ artifacts_path=artifacts_path,
147
165
  options=self.pipeline_options.ocr_options,
148
166
  accelerator_options=self.pipeline_options.accelerator_options,
149
167
  )
@@ -174,6 +192,29 @@ class StandardPdfPipeline(PaginatedPipeline):
174
192
  )
175
193
  return None
176
194
 
195
+ def get_picture_description_model(
196
+ self, artifacts_path: Optional[Path] = None
197
+ ) -> Optional[PictureDescriptionBaseModel]:
198
+ if isinstance(
199
+ self.pipeline_options.picture_description_options,
200
+ PictureDescriptionApiOptions,
201
+ ):
202
+ return PictureDescriptionApiModel(
203
+ enabled=self.pipeline_options.do_picture_description,
204
+ options=self.pipeline_options.picture_description_options,
205
+ )
206
+ elif isinstance(
207
+ self.pipeline_options.picture_description_options,
208
+ PictureDescriptionVlmOptions,
209
+ ):
210
+ return PictureDescriptionVlmModel(
211
+ enabled=self.pipeline_options.do_picture_description,
212
+ artifacts_path=artifacts_path,
213
+ options=self.pipeline_options.picture_description_options,
214
+ accelerator_options=self.pipeline_options.accelerator_options,
215
+ )
216
+ return None
217
+
177
218
  def initialize_page(self, conv_res: ConversionResult, page: Page) -> Page:
178
219
  with TimeRecorder(conv_res, "page_init"):
179
220
  page._backend = conv_res.input._backend.load_page(page.page_no) # type: ignore
@@ -0,0 +1,84 @@
1
+ import logging
2
+ from pathlib import Path
3
+ from typing import Optional
4
+
5
+ from docling.datamodel.pipeline_options import smolvlm_picture_description
6
+ from docling.datamodel.settings import settings
7
+ from docling.models.code_formula_model import CodeFormulaModel
8
+ from docling.models.document_picture_classifier import DocumentPictureClassifier
9
+ from docling.models.easyocr_model import EasyOcrModel
10
+ from docling.models.layout_model import LayoutModel
11
+ from docling.models.picture_description_vlm_model import PictureDescriptionVlmModel
12
+ from docling.models.table_structure_model import TableStructureModel
13
+
14
+ _log = logging.getLogger(__name__)
15
+
16
+
17
+ def download_models(
18
+ output_dir: Optional[Path] = None,
19
+ *,
20
+ force: bool = False,
21
+ progress: bool = False,
22
+ with_layout: bool = True,
23
+ with_tableformer: bool = True,
24
+ with_code_formula: bool = True,
25
+ with_picture_classifier: bool = True,
26
+ with_smolvlm: bool = True,
27
+ with_easyocr: bool = True,
28
+ ):
29
+ if output_dir is None:
30
+ output_dir = settings.cache_dir / "models"
31
+
32
+ # Make sure the folder exists
33
+ output_dir.mkdir(exist_ok=True, parents=True)
34
+
35
+ if with_layout:
36
+ _log.info(f"Downloading layout model...")
37
+ LayoutModel.download_models(
38
+ local_dir=output_dir / LayoutModel._model_repo_folder,
39
+ force=force,
40
+ progress=progress,
41
+ )
42
+
43
+ if with_tableformer:
44
+ _log.info(f"Downloading tableformer model...")
45
+ TableStructureModel.download_models(
46
+ local_dir=output_dir / TableStructureModel._model_repo_folder,
47
+ force=force,
48
+ progress=progress,
49
+ )
50
+
51
+ if with_picture_classifier:
52
+ _log.info(f"Downloading picture classifier model...")
53
+ DocumentPictureClassifier.download_models(
54
+ local_dir=output_dir / DocumentPictureClassifier._model_repo_folder,
55
+ force=force,
56
+ progress=progress,
57
+ )
58
+
59
+ if with_code_formula:
60
+ _log.info(f"Downloading code formula model...")
61
+ CodeFormulaModel.download_models(
62
+ local_dir=output_dir / CodeFormulaModel._model_repo_folder,
63
+ force=force,
64
+ progress=progress,
65
+ )
66
+
67
+ if with_smolvlm:
68
+ _log.info(f"Downloading SmolVlm model...")
69
+ PictureDescriptionVlmModel.download_models(
70
+ repo_id=smolvlm_picture_description.repo_id,
71
+ local_dir=output_dir / smolvlm_picture_description.repo_cache_folder,
72
+ force=force,
73
+ progress=progress,
74
+ )
75
+
76
+ if with_easyocr:
77
+ _log.info(f"Downloading easyocr models...")
78
+ EasyOcrModel.download_models(
79
+ local_dir=output_dir / EasyOcrModel._model_repo_folder,
80
+ force=force,
81
+ progress=progress,
82
+ )
83
+
84
+ return output_dir
docling/utils/utils.py CHANGED
@@ -4,6 +4,9 @@ from itertools import islice
4
4
  from pathlib import Path
5
5
  from typing import List, Union
6
6
 
7
+ import requests
8
+ from tqdm import tqdm
9
+
7
10
 
8
11
  def chunkify(iterator, chunk_size):
9
12
  """Yield successive chunks of chunk_size from the iterable."""
@@ -39,3 +42,24 @@ def create_hash(string: str):
39
42
  hasher.update(string.encode("utf-8"))
40
43
 
41
44
  return hasher.hexdigest()
45
+
46
+
47
+ def download_url_with_progress(url: str, progress: bool = False) -> BytesIO:
48
+ buf = BytesIO()
49
+ with requests.get(url, stream=True, allow_redirects=True) as response:
50
+ total_size = int(response.headers.get("content-length", 0))
51
+ progress_bar = tqdm(
52
+ total=total_size,
53
+ unit="B",
54
+ unit_scale=True,
55
+ unit_divisor=1024,
56
+ disable=(not progress),
57
+ )
58
+
59
+ for chunk in response.iter_content(10 * 1024):
60
+ buf.write(chunk)
61
+ progress_bar.update(len(chunk))
62
+ progress_bar.close()
63
+
64
+ buf.seek(0)
65
+ return buf
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling
3
- Version: 2.18.0
3
+ Version: 2.20.0
4
4
  Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
5
5
  Home-page: https://github.com/DS4SD/docling
6
6
  License: MIT
@@ -24,12 +24,13 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
24
24
  Provides-Extra: ocrmac
25
25
  Provides-Extra: rapidocr
26
26
  Provides-Extra: tesserocr
27
- Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
27
+ Provides-Extra: vlm
28
+ Requires-Dist: beautifulsoup4 (>=4.12.3,<4.13.0)
28
29
  Requires-Dist: certifi (>=2024.7.4)
29
30
  Requires-Dist: deepsearch-glm (>=1.0.0,<2.0.0)
30
- Requires-Dist: docling-core[chunking] (>=2.17.0,<3.0.0)
31
+ Requires-Dist: docling-core[chunking] (>=2.17.2,<3.0.0)
31
32
  Requires-Dist: docling-ibm-models (>=3.3.0,<4.0.0)
32
- Requires-Dist: docling-parse (>=3.1.0,<4.0.0)
33
+ Requires-Dist: docling-parse (>=3.3.0,<4.0.0)
33
34
  Requires-Dist: easyocr (>=1.7,<2.0)
34
35
  Requires-Dist: filetype (>=1.2.0,<2.0.0)
35
36
  Requires-Dist: huggingface_hub (>=0.23,<1)
@@ -52,6 +53,9 @@ Requires-Dist: rtree (>=1.3.0,<2.0.0)
52
53
  Requires-Dist: scipy (>=1.6.0,<1.14.0) ; python_version < "3.10"
53
54
  Requires-Dist: scipy (>=1.6.0,<2.0.0) ; python_version >= "3.10"
54
55
  Requires-Dist: tesserocr (>=2.7.1,<3.0.0) ; extra == "tesserocr"
56
+ Requires-Dist: tqdm (>=4.65.0,<5.0.0)
57
+ Requires-Dist: transformers (>=4.42.0,<4.43.0) ; (sys_platform == "darwin" and platform_machine == "x86_64") and (extra == "vlm")
58
+ Requires-Dist: transformers (>=4.46.0,<5.0.0) ; (sys_platform != "darwin" or platform_machine != "x86_64") and (extra == "vlm")
55
59
  Requires-Dist: typer (>=0.12.5,<0.13.0)
56
60
  Project-URL: Repository, https://github.com/DS4SD/docling
57
61
  Description-Content-Type: text/markdown
@@ -7,10 +7,10 @@ docling/backend/docling_parse_v2_backend.py,sha256=IpwrBrtLGwNRl5AYO-o3NjEfNRsAk
7
7
  docling/backend/html_backend.py,sha256=YTPLZiEEEuGaP6G62skK3wXJ0KftuqBCl8erNXeJyoE,15893
8
8
  docling/backend/json/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  docling/backend/json/docling_json_backend.py,sha256=LlFMVoZrrCfVwbDuRbNN4Xg96Lujh4xxrTBt9jGhY9I,1984
10
- docling/backend/md_backend.py,sha256=d7XAFHzFO9qhrCJA3raWEmZ8WXSYyy3KOE57oMeqKGc,16502
10
+ docling/backend/md_backend.py,sha256=NaVfcnEH-5bwVovjn76EobF6B6Wm8AhaTZ4E8k0TUPo,16826
11
11
  docling/backend/msexcel_backend.py,sha256=lyJc4ShJGAN2ZfNTTuhdYTF-44cZsGyn_8Djstp3IEU,12700
12
12
  docling/backend/mspowerpoint_backend.py,sha256=esAyaaQe17BQFweGAGJHvImKETefY0BpvfpUSECC49w,16424
13
- docling/backend/msword_backend.py,sha256=0iR1l3eLplPv3CPT7iGwQb50LIVf3C32KZFzwAkARrc,20573
13
+ docling/backend/msword_backend.py,sha256=V4miLIcOH8DDlSCm25F_DALBW60Uf9JoSS0TB4yrQBw,20591
14
14
  docling/backend/pdf_backend.py,sha256=17Pr8dWsD1C4FYUprrwMM9trDGW-JYLjrcScx1Ul4io,2048
15
15
  docling/backend/pypdfium2_backend.py,sha256=QSPfp903ZtSpoNqPmcIek0HmvETrJ1kkwrdxnF5pjS0,9014
16
16
  docling/backend/xml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -18,45 +18,51 @@ docling/backend/xml/pubmed_backend.py,sha256=LMnpowjnxa5SydfNC00Ll840BYraL8dCJu-
18
18
  docling/backend/xml/uspto_backend.py,sha256=a5GxWLj2SUR5Of8TWJinhef1gKyaQSjHPVXvGiN8yG8,70324
19
19
  docling/chunking/__init__.py,sha256=h83TDs0AuOV6oEPLAPrn9dpGKiU-2Vg6IRNo4cv6GDA,346
20
20
  docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
- docling/cli/main.py,sha256=K5C2yQIoM40_W3YU8a7SmneY-hWbNp_JOFPLk0NPcDI,16098
21
+ docling/cli/main.py,sha256=T9Tibi1z7YaU8jyl0HrN3h4y0cALvFVg3-4sanWd35M,16590
22
+ docling/cli/models.py,sha256=Z4IEuaXE9el5PuI6_6mR4D5Sn3y8WZzBtoIJPi6jL_s,3188
23
+ docling/cli/tools.py,sha256=QhtRxQG0TVrfsMqdv5i7J0_qQy1ZZyWYnHPwJl7b5oY,322
22
24
  docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
25
  docling/datamodel/base_models.py,sha256=vewP1X99qfAwiUsiC2m8CBDGiQPsGyp_WkKJHYPoYn4,7026
24
26
  docling/datamodel/document.py,sha256=HkmvQKW3QSx3tAqPTnXiJbD_y1EVwR-WE3n6Gq8g1NY,13428
25
- docling/datamodel/pipeline_options.py,sha256=f9-VQFgOdahyclGQgH_T8ZYBopkWsF_fbWbxo39ux3g,7888
26
- docling/datamodel/settings.py,sha256=uN9jeXMwx--tJb-DFU7nr77g0Iou13YAVDzsymTvbHg,1759
27
+ docling/datamodel/pipeline_options.py,sha256=SRlZYIXdmpFO_vyziOonttZvftRQMiFvltxNlolzYW8,9642
28
+ docling/datamodel/settings.py,sha256=pJi9OBqZQhsNi7RwJWQFRDKGhm3u679iN76psA3VtsY,1817
27
29
  docling/document_converter.py,sha256=qaldb7Thqk59RdE-RTGtj1M7l5UzaBdnxIvGoQ7lTeo,12876
28
30
  docling/exceptions.py,sha256=-FoP46rFJgz_jn5uDv2V052udEEg8gckk6uhoItchXc,85
29
31
  docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
- docling/models/base_model.py,sha256=H5X-exVaAN-XMTzxpgUc-rwH-D8Uk7-VuZtq2soNGXI,2567
32
+ docling/models/base_model.py,sha256=q_lKeQ0FT70idXlZ3JgyAv8dA8J3bZWBSDBkqTzy0lo,2679
31
33
  docling/models/base_ocr_model.py,sha256=YiUMvdjnHw9SHjnfJKT5INrPMoIGEf_Z2OApfl_VRTE,6919
32
- docling/models/code_formula_model.py,sha256=bOIKJvckZ0QpnDZ-CDiYv-CvuGvaGzJgp2PiYAidKBQ,8422
33
- docling/models/document_picture_classifier.py,sha256=RLB80ueqWZ86hdXtTKmSynCU13nT-T10vUp2sky9110,6078
34
+ docling/models/code_formula_model.py,sha256=6grbRPWaLljadheT5s4omdT6hmXfin4gJU17csWvhjY,8611
35
+ docling/models/document_picture_classifier.py,sha256=6I_j6fG5fnhIV6rqN31LYikNTZyg5isXrVs0GIqHDaY,6235
34
36
  docling/models/ds_glm_model.py,sha256=CkhsP0cEWwm4wb1g3cLFriVGpVtELiUK3REDMkPwAMw,13028
35
- docling/models/easyocr_model.py,sha256=Kakb20ioBxDmNsIqoGvSSs_vbqAWN3QQNHYtEi-eErg,4990
36
- docling/models/layout_model.py,sha256=3Fw7OM6g0j7NgItKsQOgFOCd1q6lp1DacN_db7f6QCw,6090
37
+ docling/models/easyocr_model.py,sha256=ePg1exAXeOzkBRBT-6PBSmqKFmnNFkCEd4HNDsGVgLM,6860
38
+ docling/models/layout_model.py,sha256=7fQWipGV1HDrvbP4uOKa9QAicQl89jp7lailQmbFL3w,7804
37
39
  docling/models/ocr_mac_model.py,sha256=bLP14UUmZcSzjDe-HLj-mtksTuBmsCTg2C1wCxUpan0,4502
38
40
  docling/models/page_assemble_model.py,sha256=c5KLKwkUIdW0JcDHizWsqrpb5x_3DK28x82Q8o-3VJM,5968
39
41
  docling/models/page_preprocessing_model.py,sha256=1gVrZjObKxAvXkkKvXnIFApPOggzgiTFPtt1CGbMbSs,2763
42
+ docling/models/picture_description_api_model.py,sha256=Tw4B92_5Fa5FKZeGmgUWmSFMAw5D58uzQM2XRTIXOXI,3003
43
+ docling/models/picture_description_base_model.py,sha256=rZLIW1_CaRAw_EP3zuI8ktC0ZxwO7yubhh2RkaC_8e8,1910
44
+ docling/models/picture_description_vlm_model.py,sha256=a2vYUdlcA0--_8neY0tTiU8reCf29NCbVMKwWdMy2QQ,3653
40
45
  docling/models/rapid_ocr_model.py,sha256=2HXmurNRPP6qyqn7U5h9NQIs8zi0TMHf56CpcKQk0fU,5038
41
- docling/models/table_structure_model.py,sha256=qZgoBrBh7H-RJGCTtaRGcj79g2WzZiUBTPnHqJZ-bLA,9557
46
+ docling/models/table_structure_model.py,sha256=UIqWlw_9JNfGsO86c00rPb4GCg-yNliKEwyhCqlsZbM,11225
42
47
  docling/models/tesseract_ocr_cli_model.py,sha256=b2Is5x2gZLS6mQWnKe0y7p6UU6hRTHDfoH4D2RQ5mx0,9310
43
48
  docling/models/tesseract_ocr_model.py,sha256=BN85u-4a-xzUY7Iw21Ib8L8kx4mgbDGiUtxBelLiJm8,8513
44
49
  docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
- docling/pipeline/base_pipeline.py,sha256=lK8PQiydWJ9M16kIVL7U1A2iryTRFrN5WSucVo2ohFQ,8757
50
+ docling/pipeline/base_pipeline.py,sha256=9ABK-Cr235bxE5vweoIA5rgBZV_EF8qFxAqLI27H_Pg,8749
46
51
  docling/pipeline/simple_pipeline.py,sha256=mZqANqUtAOFAyqQEShErQnAUz6tJFOl6zVoazEDJ_wE,2254
47
- docling/pipeline/standard_pdf_pipeline.py,sha256=Qefg1JSiFwipypi8TZPJ50WgXTLjwkC0wvYAl02RM2o,10480
52
+ docling/pipeline/standard_pdf_pipeline.py,sha256=UKB9COXmVGvUhTVwsKsQTjNKi86TRdGLJjknwNZPwNo,12366
48
53
  docling/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
49
54
  docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
55
  docling/utils/accelerator_utils.py,sha256=ZjULCn-qhxqx3frF-rJmAlWdzqgUMxH5utLHbSPev80,1367
51
56
  docling/utils/export.py,sha256=KyGF1BVDHPFfHVXZc8vegsWlFfOgGPP2YckWpTadyI8,4694
52
57
  docling/utils/glm_utils.py,sha256=uyCoFTX9FbS1Ke0aSlkdzGLUt08dZfkgriWadkyLiiA,11856
53
58
  docling/utils/layout_postprocessor.py,sha256=urRzeF9PrKiMBvA6DdHHwyLxG06CMhelgJeV5B1l6l0,24258
59
+ docling/utils/model_downloader.py,sha256=XK3ozGXyQcNPvrSsevTwR9VnY41JWovlsGk_ZBnu6FU,2787
54
60
  docling/utils/ocr_utils.py,sha256=F7iOOjqolUcImUzir4qjDQd4QWSO3s6JC4WRn3U7uY4,263
55
61
  docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
56
- docling/utils/utils.py,sha256=llhXSbIDNZ1MHOwBEfLHBAoJIAYI7QlPIonlI1jLUJ0,1208
62
+ docling/utils/utils.py,sha256=0ozCk7zUkYzxRVmYoIB2zA1lqjQOuaQzxfGuf1wmKW4,1866
57
63
  docling/utils/visualization.py,sha256=4pn-80fVuE04ken7hUg5Ar47ndRSL9MWBgdHM-1g1zU,2735
58
- docling-2.18.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
59
- docling-2.18.0.dist-info/METADATA,sha256=rBP1Z7m0HMpC-HjR360i2JNuIA9lqknRPjUab1mtVic,8403
60
- docling-2.18.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
61
- docling-2.18.0.dist-info/entry_points.txt,sha256=VOSzV77znM52dz5ysaDuJ0ijl1cnfrh1ZPg8od5OcTs,48
62
- docling-2.18.0.dist-info/RECORD,,
64
+ docling-2.20.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
65
+ docling-2.20.0.dist-info/METADATA,sha256=9g0XmFk8hxdswqwT5jWnrUPKswGA26JDlyV5hqJ_tCc,8720
66
+ docling-2.20.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
67
+ docling-2.20.0.dist-info/entry_points.txt,sha256=cFrINXsORijdm2EWJzf1m9_rDxH9G9W1fP385-9atY4,84
68
+ docling-2.20.0.dist-info/RECORD,,
@@ -1,3 +1,4 @@
1
1
  [console_scripts]
2
2
  docling=docling.cli.main:app
3
+ docling-tools=docling.cli.tools:app
3
4