docling 2.3.0__py3-none-any.whl → 2.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docling/document_converter.py +8 -4
- {docling-2.3.0.dist-info → docling-2.3.1.dist-info}/METADATA +4 -8
- {docling-2.3.0.dist-info → docling-2.3.1.dist-info}/RECORD +6 -6
- {docling-2.3.0.dist-info → docling-2.3.1.dist-info}/LICENSE +0 -0
- {docling-2.3.0.dist-info → docling-2.3.1.dist-info}/WHEEL +0 -0
- {docling-2.3.0.dist-info → docling-2.3.1.dist-info}/entry_points.txt +0 -0
docling/document_converter.py
CHANGED
@@ -139,6 +139,10 @@ class DocumentConverter:
|
|
139
139
|
|
140
140
|
self.initialized_pipelines: Dict[Type[BasePipeline], BasePipeline] = {}
|
141
141
|
|
142
|
+
def initialize_pipeline(self, format: InputFormat):
|
143
|
+
"""Initialize the conversion pipeline for the selected format."""
|
144
|
+
self._get_pipeline(doc_format=format)
|
145
|
+
|
142
146
|
@validate_call(config=ConfigDict(strict=True))
|
143
147
|
def convert(
|
144
148
|
self,
|
@@ -219,13 +223,13 @@ class DocumentConverter:
|
|
219
223
|
else:
|
220
224
|
_log.info(f"Skipped a document. We lost {elapsed:.2f} sec.")
|
221
225
|
|
222
|
-
def _get_pipeline(self,
|
226
|
+
def _get_pipeline(self, doc_format: InputFormat) -> Optional[BasePipeline]:
|
223
227
|
assert self.format_to_options is not None
|
224
228
|
|
225
|
-
fopt = self.format_to_options.get(
|
229
|
+
fopt = self.format_to_options.get(doc_format)
|
226
230
|
|
227
231
|
if fopt is None:
|
228
|
-
raise RuntimeError(f"Could not get pipeline for
|
232
|
+
raise RuntimeError(f"Could not get pipeline for {doc_format}")
|
229
233
|
else:
|
230
234
|
pipeline_class = fopt.pipeline_cls
|
231
235
|
pipeline_options = fopt.pipeline_options
|
@@ -256,7 +260,7 @@ class DocumentConverter:
|
|
256
260
|
self, in_doc: InputDocument, raises_on_error: bool
|
257
261
|
) -> ConversionResult:
|
258
262
|
if in_doc.valid:
|
259
|
-
pipeline = self._get_pipeline(in_doc)
|
263
|
+
pipeline = self._get_pipeline(in_doc.format)
|
260
264
|
if pipeline is None: # Can't find a default pipeline. Should this raise?
|
261
265
|
if raises_on_error:
|
262
266
|
raise RuntimeError(
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: docling
|
3
|
-
Version: 2.3.
|
3
|
+
Version: 2.3.1
|
4
4
|
Summary: Docling PDF conversion package
|
5
5
|
Home-page: https://github.com/DS4SD/docling
|
6
6
|
License: MIT
|
@@ -23,9 +23,9 @@ Provides-Extra: tesserocr
|
|
23
23
|
Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
|
24
24
|
Requires-Dist: certifi (>=2024.7.4)
|
25
25
|
Requires-Dist: deepsearch-glm (>=0.26.1,<0.27.0)
|
26
|
-
Requires-Dist: docling-core (>=2.
|
27
|
-
Requires-Dist: docling-ibm-models (>=2.0.
|
28
|
-
Requires-Dist: docling-parse (>=2.0.
|
26
|
+
Requires-Dist: docling-core (>=2.3.0,<3.0.0)
|
27
|
+
Requires-Dist: docling-ibm-models (>=2.0.3,<3.0.0)
|
28
|
+
Requires-Dist: docling-parse (>=2.0.2,<3.0.0)
|
29
29
|
Requires-Dist: easyocr (>=1.7,<2.0)
|
30
30
|
Requires-Dist: filetype (>=1.2.0,<2.0.0)
|
31
31
|
Requires-Dist: huggingface_hub (>=0.23,<1)
|
@@ -41,10 +41,6 @@ Requires-Dist: requests (>=2.32.3,<3.0.0)
|
|
41
41
|
Requires-Dist: rtree (>=1.3.0,<2.0.0)
|
42
42
|
Requires-Dist: scipy (>=1.14.1,<2.0.0)
|
43
43
|
Requires-Dist: tesserocr (>=2.7.1,<3.0.0) ; extra == "tesserocr"
|
44
|
-
Requires-Dist: torch (>=2.2.2,<2.3.0) ; sys_platform == "darwin" and platform_machine == "x86_64"
|
45
|
-
Requires-Dist: torch (>=2.2.2,<3.0.0) ; sys_platform != "darwin" or platform_machine != "x86_64"
|
46
|
-
Requires-Dist: torchvision (>=0,<1) ; sys_platform != "darwin" or platform_machine != "x86_64"
|
47
|
-
Requires-Dist: torchvision (>=0.17.2,<0.18.0) ; sys_platform == "darwin" and platform_machine == "x86_64"
|
48
44
|
Requires-Dist: typer (>=0.12.5,<0.13.0)
|
49
45
|
Project-URL: Repository, https://github.com/DS4SD/docling
|
50
46
|
Description-Content-Type: text/markdown
|
@@ -17,7 +17,7 @@ docling/datamodel/base_models.py,sha256=fmkS6iTxGZCTtNCo2zsgMmBC11Ogf2Ht-mNIlZ9G
|
|
17
17
|
docling/datamodel/document.py,sha256=9dQf_J18X_MEWs-Mg3Ed6BykFPJ79ETmkkxcssY-vYo,20698
|
18
18
|
docling/datamodel/pipeline_options.py,sha256=WNjluKC-Ww63ifkGMHwws8zIDHnOS1z5Hw7_j3S0qao,2446
|
19
19
|
docling/datamodel/settings.py,sha256=2-sYEnKLV_giGygUlBtiBd4CJYN5T9-3BdL6NpWkUYw,1155
|
20
|
-
docling/document_converter.py,sha256=
|
20
|
+
docling/document_converter.py,sha256=U52_rZQDm2wzrnsuUrvsfX2MnmOWFFhjBzfS8tEvt6Y,10595
|
21
21
|
docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
22
|
docling/models/base_model.py,sha256=Yq_-FmUhqhE20vXYG3WiQXDRTIPjik1CyuEZ8iYTGAY,701
|
23
23
|
docling/models/base_ocr_model.py,sha256=Ti0glL-_DVRfmP3MpywYVmkNf5RP6qhRg_UKzJuV1Dc,5663
|
@@ -38,8 +38,8 @@ docling/utils/export.py,sha256=KyGF1BVDHPFfHVXZc8vegsWlFfOgGPP2YckWpTadyI8,4694
|
|
38
38
|
docling/utils/layout_utils.py,sha256=vlN0rc8i0ayRGn3WnaG-pdmqEL00KKGl2zez3Gj-hrk,32074
|
39
39
|
docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
|
40
40
|
docling/utils/utils.py,sha256=llhXSbIDNZ1MHOwBEfLHBAoJIAYI7QlPIonlI1jLUJ0,1208
|
41
|
-
docling-2.3.
|
42
|
-
docling-2.3.
|
43
|
-
docling-2.3.
|
44
|
-
docling-2.3.
|
45
|
-
docling-2.3.
|
41
|
+
docling-2.3.1.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
|
42
|
+
docling-2.3.1.dist-info/METADATA,sha256=_FXRLyE1Uy6RobmdLb244I5vg7KEi3ogolWBQvDOvx4,5977
|
43
|
+
docling-2.3.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
44
|
+
docling-2.3.1.dist-info/entry_points.txt,sha256=VOSzV77znM52dz5ysaDuJ0ijl1cnfrh1ZPg8od5OcTs,48
|
45
|
+
docling-2.3.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|