docling 2.11.0__py3-none-any.whl → 2.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
docling/cli/main.py CHANGED
@@ -26,6 +26,8 @@ from docling.datamodel.base_models import (
26
26
  )
27
27
  from docling.datamodel.document import ConversionResult
28
28
  from docling.datamodel.pipeline_options import (
29
+ AcceleratorDevice,
30
+ AcceleratorOptions,
29
31
  EasyOcrOptions,
30
32
  OcrEngine,
31
33
  OcrMacOptions,
@@ -257,6 +259,10 @@ def convert(
257
259
  help="The timeout for processing each document, in seconds.",
258
260
  ),
259
261
  ] = None,
262
+ num_threads: Annotated[int, typer.Option(..., help="Number of threads")] = 4,
263
+ device: Annotated[
264
+ AcceleratorDevice, typer.Option(..., help="Accelerator device")
265
+ ] = AcceleratorDevice.AUTO,
260
266
  ):
261
267
  if verbose == 0:
262
268
  logging.basicConfig(level=logging.WARNING)
@@ -336,7 +342,9 @@ def convert(
336
342
  if ocr_lang_list is not None:
337
343
  ocr_options.lang = ocr_lang_list
338
344
 
345
+ accelerator_options = AcceleratorOptions(num_threads=num_threads, device=device)
339
346
  pipeline_options = PdfPipelineOptions(
347
+ accelerator_options=accelerator_options,
340
348
  do_ocr=ocr,
341
349
  ocr_options=ocr_options,
342
350
  do_table_structure=True,
@@ -1,8 +1,66 @@
1
+ import logging
2
+ import os
3
+ import warnings
1
4
  from enum import Enum
2
5
  from pathlib import Path
3
- from typing import List, Literal, Optional, Union
6
+ from typing import Annotated, Any, Dict, List, Literal, Optional, Tuple, Type, Union
4
7
 
5
- from pydantic import BaseModel, ConfigDict, Field
8
+ from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
9
+ from pydantic_settings import (
10
+ BaseSettings,
11
+ PydanticBaseSettingsSource,
12
+ SettingsConfigDict,
13
+ )
14
+ from typing_extensions import deprecated
15
+
16
+ _log = logging.getLogger(__name__)
17
+
18
+
19
+ class AcceleratorDevice(str, Enum):
20
+ """Devices to run model inference"""
21
+
22
+ AUTO = "auto"
23
+ CPU = "cpu"
24
+ CUDA = "cuda"
25
+ MPS = "mps"
26
+
27
+
28
+ class AcceleratorOptions(BaseSettings):
29
+ model_config = SettingsConfigDict(
30
+ env_prefix="DOCLING_", env_nested_delimiter="_", populate_by_name=True
31
+ )
32
+
33
+ num_threads: int = 4
34
+ device: AcceleratorDevice = AcceleratorDevice.AUTO
35
+
36
+ @model_validator(mode="before")
37
+ @classmethod
38
+ def check_alternative_envvars(cls, data: Any) -> Any:
39
+ r"""
40
+ Set num_threads from the "alternative" envvar OMP_NUM_THREADS.
41
+ The alternative envvar is used only if it is valid and the regular envvar is not set.
42
+
43
+ Notice: The standard pydantic settings mechanism with parameter "aliases" does not provide
44
+ the same functionality. In case the alias envvar is set and the user tries to override the
45
+ parameter in settings initialization, Pydantic treats the parameter provided in __init__()
46
+ as an extra input instead of simply overwriting the evvar value for that parameter.
47
+ """
48
+ if isinstance(data, dict):
49
+ input_num_threads = data.get("num_threads")
50
+
51
+ # Check if to set the num_threads from the alternative envvar
52
+ if input_num_threads is None:
53
+ docling_num_threads = os.getenv("DOCLING_NUM_THREADS")
54
+ omp_num_threads = os.getenv("OMP_NUM_THREADS")
55
+ if docling_num_threads is None and omp_num_threads is not None:
56
+ try:
57
+ data["num_threads"] = int(omp_num_threads)
58
+ except ValueError:
59
+ _log.error(
60
+ "Ignoring misformatted envvar OMP_NUM_THREADS '%s'",
61
+ omp_num_threads,
62
+ )
63
+ return data
6
64
 
7
65
 
8
66
  class TableFormerMode(str, Enum):
@@ -78,9 +136,11 @@ class EasyOcrOptions(OcrOptions):
78
136
 
79
137
  kind: Literal["easyocr"] = "easyocr"
80
138
  lang: List[str] = ["fr", "de", "es", "en"]
81
- use_gpu: bool = True # same default as easyocr.Reader
139
+
140
+ use_gpu: Optional[bool] = None
141
+
82
142
  model_storage_directory: Optional[str] = None
83
- download_enabled: bool = True # same default as easyocr.Reader
143
+ download_enabled: bool = True
84
144
 
85
145
  model_config = ConfigDict(
86
146
  extra="forbid",
@@ -153,6 +213,7 @@ class PipelineOptions(BaseModel):
153
213
  True # This default will be set to False on a future version of docling
154
214
  )
155
215
  document_timeout: Optional[float] = None
216
+ accelerator_options: AcceleratorOptions = AcceleratorOptions()
156
217
 
157
218
 
158
219
  class PdfPipelineOptions(PipelineOptions):
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ import warnings
2
3
  from typing import Iterable
3
4
 
4
5
  import numpy
@@ -7,16 +8,26 @@ from docling_core.types.doc import BoundingBox, CoordOrigin
7
8
 
8
9
  from docling.datamodel.base_models import Cell, OcrCell, Page
9
10
  from docling.datamodel.document import ConversionResult
10
- from docling.datamodel.pipeline_options import EasyOcrOptions
11
+ from docling.datamodel.pipeline_options import (
12
+ AcceleratorDevice,
13
+ AcceleratorOptions,
14
+ EasyOcrOptions,
15
+ )
11
16
  from docling.datamodel.settings import settings
12
17
  from docling.models.base_ocr_model import BaseOcrModel
18
+ from docling.utils.accelerator_utils import decide_device
13
19
  from docling.utils.profiling import TimeRecorder
14
20
 
15
21
  _log = logging.getLogger(__name__)
16
22
 
17
23
 
18
24
  class EasyOcrModel(BaseOcrModel):
19
- def __init__(self, enabled: bool, options: EasyOcrOptions):
25
+ def __init__(
26
+ self,
27
+ enabled: bool,
28
+ options: EasyOcrOptions,
29
+ accelerator_options: AcceleratorOptions,
30
+ ):
20
31
  super().__init__(enabled=enabled, options=options)
21
32
  self.options: EasyOcrOptions
22
33
 
@@ -31,11 +42,32 @@ class EasyOcrModel(BaseOcrModel):
31
42
  "Alternatively, Docling has support for other OCR engines. See the documentation."
32
43
  )
33
44
 
45
+ if self.options.use_gpu is None:
46
+ device = decide_device(accelerator_options.device)
47
+ # Enable easyocr GPU if running on CUDA, MPS
48
+ use_gpu = any(
49
+ [
50
+ device.startswith(x)
51
+ for x in [
52
+ AcceleratorDevice.CUDA.value,
53
+ AcceleratorDevice.MPS.value,
54
+ ]
55
+ ]
56
+ )
57
+ else:
58
+ warnings.warn(
59
+ "Deprecated field. Better to set the `accelerator_options.device` in `pipeline_options`. "
60
+ "When `use_gpu and accelerator_options.device == AcceleratorDevice.CUDA` the GPU is used "
61
+ "to run EasyOCR. Otherwise, EasyOCR runs in CPU."
62
+ )
63
+ use_gpu = self.options.use_gpu
64
+
34
65
  self.reader = easyocr.Reader(
35
66
  lang_list=self.options.lang,
36
- gpu=self.options.use_gpu,
67
+ gpu=use_gpu,
37
68
  model_storage_directory=self.options.model_storage_directory,
38
69
  download_enabled=self.options.download_enabled,
70
+ verbose=False,
39
71
  )
40
72
 
41
73
  def __call__(
@@ -9,6 +9,7 @@ from docling_core.types.doc import CoordOrigin, DocItemLabel
9
9
  from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor
10
10
  from PIL import ImageDraw
11
11
 
12
+ import docling.utils.layout_utils as lu
12
13
  from docling.datamodel.base_models import (
13
14
  BoundingBox,
14
15
  Cell,
@@ -17,9 +18,10 @@ from docling.datamodel.base_models import (
17
18
  Page,
18
19
  )
19
20
  from docling.datamodel.document import ConversionResult
21
+ from docling.datamodel.pipeline_options import AcceleratorDevice, AcceleratorOptions
20
22
  from docling.datamodel.settings import settings
21
23
  from docling.models.base_model import BasePageModel
22
- from docling.utils import layout_utils as lu
24
+ from docling.utils.accelerator_utils import decide_device
23
25
  from docling.utils.profiling import TimeRecorder
24
26
 
25
27
  _log = logging.getLogger(__name__)
@@ -46,8 +48,16 @@ class LayoutModel(BasePageModel):
46
48
  FIGURE_LABEL = DocItemLabel.PICTURE
47
49
  FORMULA_LABEL = DocItemLabel.FORMULA
48
50
 
49
- def __init__(self, artifacts_path: Path):
50
- self.layout_predictor = LayoutPredictor(artifacts_path) # TODO temporary
51
+ def __init__(self, artifacts_path: Path, accelerator_options: AcceleratorOptions):
52
+ device = decide_device(accelerator_options.device)
53
+
54
+ self.layout_predictor = LayoutPredictor(
55
+ artifact_path=str(artifacts_path),
56
+ device=device,
57
+ num_threads=accelerator_options.num_threads,
58
+ base_threshold=0.6,
59
+ blacklist_classes={"Form", "Key-Value Region"},
60
+ )
51
61
 
52
62
  def postprocess(self, clusters_in: List[Cluster], cells: List[Cell], page_height):
53
63
  MIN_INTERSECTION = 0.2
@@ -6,16 +6,26 @@ from docling_core.types.doc import BoundingBox, CoordOrigin
6
6
 
7
7
  from docling.datamodel.base_models import OcrCell, Page
8
8
  from docling.datamodel.document import ConversionResult
9
- from docling.datamodel.pipeline_options import RapidOcrOptions
9
+ from docling.datamodel.pipeline_options import (
10
+ AcceleratorDevice,
11
+ AcceleratorOptions,
12
+ RapidOcrOptions,
13
+ )
10
14
  from docling.datamodel.settings import settings
11
15
  from docling.models.base_ocr_model import BaseOcrModel
16
+ from docling.utils.accelerator_utils import decide_device
12
17
  from docling.utils.profiling import TimeRecorder
13
18
 
14
19
  _log = logging.getLogger(__name__)
15
20
 
16
21
 
17
22
  class RapidOcrModel(BaseOcrModel):
18
- def __init__(self, enabled: bool, options: RapidOcrOptions):
23
+ def __init__(
24
+ self,
25
+ enabled: bool,
26
+ options: RapidOcrOptions,
27
+ accelerator_options: AcceleratorOptions,
28
+ ):
19
29
  super().__init__(enabled=enabled, options=options)
20
30
  self.options: RapidOcrOptions
21
31
 
@@ -30,52 +40,21 @@ class RapidOcrModel(BaseOcrModel):
30
40
  "Alternatively, Docling has support for other OCR engines. See the documentation."
31
41
  )
32
42
 
33
- # This configuration option will be revamped while introducing device settings for all models.
34
- # For the moment we will default to auto and let onnx-runtime pick the best.
35
- cls_use_cuda = True
36
- rec_use_cuda = True
37
- det_use_cuda = True
38
- det_use_dml = True
39
- cls_use_dml = True
40
- rec_use_dml = True
41
-
42
- # # Same as Defaults in RapidOCR
43
- # cls_use_cuda = False
44
- # rec_use_cuda = False
45
- # det_use_cuda = False
46
- # det_use_dml = False
47
- # cls_use_dml = False
48
- # rec_use_dml = False
49
-
50
- # # If we set everything to true onnx-runtime would automatically choose the fastest accelerator
51
- # if self.options.device == self.options.Device.AUTO:
52
- # cls_use_cuda = True
53
- # rec_use_cuda = True
54
- # det_use_cuda = True
55
- # det_use_dml = True
56
- # cls_use_dml = True
57
- # rec_use_dml = True
58
-
59
- # # If we set use_cuda to true onnx would use the cuda device available in runtime if no cuda device is available it would run on CPU.
60
- # elif self.options.device == self.options.Device.CUDA:
61
- # cls_use_cuda = True
62
- # rec_use_cuda = True
63
- # det_use_cuda = True
64
-
65
- # # If we set use_dml to true onnx would use the dml device available in runtime if no dml device is available it would work on CPU.
66
- # elif self.options.device == self.options.Device.DIRECTML:
67
- # det_use_dml = True
68
- # cls_use_dml = True
69
- # rec_use_dml = True
43
+ # Decide the accelerator devices
44
+ device = decide_device(accelerator_options.device)
45
+ use_cuda = str(AcceleratorDevice.CUDA.value).lower() in device
46
+ use_dml = accelerator_options.device == AcceleratorDevice.AUTO
47
+ intra_op_num_threads = accelerator_options.num_threads
70
48
 
71
49
  self.reader = RapidOCR(
72
50
  text_score=self.options.text_score,
73
- cls_use_cuda=cls_use_cuda,
74
- rec_use_cuda=rec_use_cuda,
75
- det_use_cuda=det_use_cuda,
76
- det_use_dml=det_use_dml,
77
- cls_use_dml=cls_use_dml,
78
- rec_use_dml=rec_use_dml,
51
+ cls_use_cuda=use_cuda,
52
+ rec_use_cuda=use_cuda,
53
+ det_use_cuda=use_cuda,
54
+ det_use_dml=use_dml,
55
+ cls_use_dml=use_dml,
56
+ rec_use_dml=use_dml,
57
+ intra_op_num_threads=intra_op_num_threads,
79
58
  print_verbose=self.options.print_verbose,
80
59
  det_model_path=self.options.det_model_path,
81
60
  cls_model_path=self.options.cls_model_path,
@@ -9,15 +9,25 @@ from PIL import ImageDraw
9
9
 
10
10
  from docling.datamodel.base_models import Page, Table, TableStructurePrediction
11
11
  from docling.datamodel.document import ConversionResult
12
- from docling.datamodel.pipeline_options import TableFormerMode, TableStructureOptions
12
+ from docling.datamodel.pipeline_options import (
13
+ AcceleratorDevice,
14
+ AcceleratorOptions,
15
+ TableFormerMode,
16
+ TableStructureOptions,
17
+ )
13
18
  from docling.datamodel.settings import settings
14
19
  from docling.models.base_model import BasePageModel
20
+ from docling.utils.accelerator_utils import decide_device
15
21
  from docling.utils.profiling import TimeRecorder
16
22
 
17
23
 
18
24
  class TableStructureModel(BasePageModel):
19
25
  def __init__(
20
- self, enabled: bool, artifacts_path: Path, options: TableStructureOptions
26
+ self,
27
+ enabled: bool,
28
+ artifacts_path: Path,
29
+ options: TableStructureOptions,
30
+ accelerator_options: AcceleratorOptions,
21
31
  ):
22
32
  self.options = options
23
33
  self.do_cell_matching = self.options.do_cell_matching
@@ -26,16 +36,26 @@ class TableStructureModel(BasePageModel):
26
36
  self.enabled = enabled
27
37
  if self.enabled:
28
38
  if self.mode == TableFormerMode.ACCURATE:
29
- artifacts_path = artifacts_path / "fat"
39
+ artifacts_path = artifacts_path / "accurate"
40
+ else:
41
+ artifacts_path = artifacts_path / "fast"
30
42
 
31
43
  # Third Party
32
44
  import docling_ibm_models.tableformer.common as c
33
45
 
46
+ device = decide_device(accelerator_options.device)
47
+
48
+ # Disable MPS here, until we know why it makes things slower.
49
+ if device == AcceleratorDevice.MPS.value:
50
+ device = AcceleratorDevice.CPU.value
51
+
34
52
  self.tm_config = c.read_config(f"{artifacts_path}/tm_config.json")
35
53
  self.tm_config["model"]["save_dir"] = artifacts_path
36
54
  self.tm_model_type = self.tm_config["model"]["type"]
37
55
 
38
- self.tf_predictor = TFPredictor(self.tm_config)
56
+ self.tf_predictor = TFPredictor(
57
+ self.tm_config, device, accelerator_options.num_threads
58
+ )
39
59
  self.scale = 2.0 # Scale up table input images to 144 dpi
40
60
 
41
61
  def draw_table_and_cells(
@@ -38,7 +38,7 @@ _log = logging.getLogger(__name__)
38
38
 
39
39
 
40
40
  class StandardPdfPipeline(PaginatedPipeline):
41
- _layout_model_path = "model_artifacts/layout/beehive_v0.0.5_pt"
41
+ _layout_model_path = "model_artifacts/layout"
42
42
  _table_model_path = "model_artifacts/tableformer"
43
43
 
44
44
  def __init__(self, pipeline_options: PdfPipelineOptions):
@@ -75,7 +75,8 @@ class StandardPdfPipeline(PaginatedPipeline):
75
75
  # Layout model
76
76
  LayoutModel(
77
77
  artifacts_path=self.artifacts_path
78
- / StandardPdfPipeline._layout_model_path
78
+ / StandardPdfPipeline._layout_model_path,
79
+ accelerator_options=pipeline_options.accelerator_options,
79
80
  ),
80
81
  # Table structure model
81
82
  TableStructureModel(
@@ -83,6 +84,7 @@ class StandardPdfPipeline(PaginatedPipeline):
83
84
  artifacts_path=self.artifacts_path
84
85
  / StandardPdfPipeline._table_model_path,
85
86
  options=pipeline_options.table_structure_options,
87
+ accelerator_options=pipeline_options.accelerator_options,
86
88
  ),
87
89
  # Page assemble
88
90
  PageAssembleModel(options=PageAssembleOptions(keep_images=keep_images)),
@@ -104,7 +106,7 @@ class StandardPdfPipeline(PaginatedPipeline):
104
106
  repo_id="ds4sd/docling-models",
105
107
  force_download=force,
106
108
  local_dir=local_dir,
107
- revision="v2.0.1",
109
+ revision="v2.1.0",
108
110
  )
109
111
 
110
112
  return Path(download_path)
@@ -114,6 +116,7 @@ class StandardPdfPipeline(PaginatedPipeline):
114
116
  return EasyOcrModel(
115
117
  enabled=self.pipeline_options.do_ocr,
116
118
  options=self.pipeline_options.ocr_options,
119
+ accelerator_options=self.pipeline_options.accelerator_options,
117
120
  )
118
121
  elif isinstance(self.pipeline_options.ocr_options, TesseractCliOcrOptions):
119
122
  return TesseractOcrCliModel(
@@ -129,6 +132,7 @@ class StandardPdfPipeline(PaginatedPipeline):
129
132
  return RapidOcrModel(
130
133
  enabled=self.pipeline_options.do_ocr,
131
134
  options=self.pipeline_options.ocr_options,
135
+ accelerator_options=self.pipeline_options.accelerator_options,
132
136
  )
133
137
  elif isinstance(self.pipeline_options.ocr_options, OcrMacOptions):
134
138
  if "darwin" != sys.platform:
@@ -0,0 +1,42 @@
1
+ import logging
2
+
3
+ import torch
4
+
5
+ from docling.datamodel.pipeline_options import AcceleratorDevice
6
+
7
+ _log = logging.getLogger(__name__)
8
+
9
+
10
+ def decide_device(accelerator_device: AcceleratorDevice) -> str:
11
+ r"""
12
+ Resolve the device based on the acceleration options and the available devices in the system
13
+ Rules:
14
+ 1. AUTO: Check for the best available device on the system.
15
+ 2. User-defined: Check if the device actually exists, otherwise fall-back to CPU
16
+ """
17
+ cuda_index = 0
18
+ device = "cpu"
19
+
20
+ has_cuda = torch.backends.cuda.is_built() and torch.cuda.is_available()
21
+ has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available()
22
+
23
+ if accelerator_device == AcceleratorDevice.AUTO:
24
+ if has_cuda:
25
+ device = f"cuda:{cuda_index}"
26
+ elif has_mps:
27
+ device = "mps"
28
+
29
+ else:
30
+ if accelerator_device == AcceleratorDevice.CUDA:
31
+ if has_cuda:
32
+ device = f"cuda:{cuda_index}"
33
+ else:
34
+ _log.warning("CUDA is not available in the system. Fall back to 'CPU'")
35
+ elif accelerator_device == AcceleratorDevice.MPS:
36
+ if has_mps:
37
+ device = "mps"
38
+ else:
39
+ _log.warning("MPS is not available in the system. Fall back to 'CPU'")
40
+
41
+ _log.info("Accelerator device: '%s'", device)
42
+ return device
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling
3
- Version: 2.11.0
3
+ Version: 2.12.0
4
4
  Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
5
5
  Home-page: https://github.com/DS4SD/docling
6
6
  License: MIT
@@ -27,7 +27,7 @@ Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
27
27
  Requires-Dist: certifi (>=2024.7.4)
28
28
  Requires-Dist: deepsearch-glm (>=1.0.0,<2.0.0)
29
29
  Requires-Dist: docling-core[chunking] (>=2.9.0,<3.0.0)
30
- Requires-Dist: docling-ibm-models (>=2.0.6,<3.0.0)
30
+ Requires-Dist: docling-ibm-models (>=3.1.0,<4.0.0)
31
31
  Requires-Dist: docling-parse (>=3.0.0,<4.0.0)
32
32
  Requires-Dist: easyocr (>=1.7,<2.0)
33
33
  Requires-Dist: filetype (>=1.2.0,<2.0.0)
@@ -13,11 +13,11 @@ docling/backend/pdf_backend.py,sha256=unnw7QiRE1VXg6Pj-eYrtnFGrp5SSYiI324OlFxyv6
13
13
  docling/backend/pypdfium2_backend.py,sha256=B4bfv-dfzlWiKTfF8LN5fto_99YBu8A2c1_XIVwRUWI,8996
14
14
  docling/chunking/__init__.py,sha256=h83TDs0AuOV6oEPLAPrn9dpGKiU-2Vg6IRNo4cv6GDA,346
15
15
  docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
- docling/cli/main.py,sha256=FFDUDADvK7QNW7xCs6dlsC7Bt_BMyrKdbZewKTEjm54,14624
16
+ docling/cli/main.py,sha256=SdavhL0VTApK9JrKz0Pc1IYdnQhK-0OOaGT8zlTiN5c,15022
17
17
  docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
18
  docling/datamodel/base_models.py,sha256=vwy59eDrkzCSaay24RlUvx4zEyuaUukOdOhw3622u2I,5616
19
19
  docling/datamodel/document.py,sha256=GNlTsgKgDqdqv2dfhpYmnqymxDQWWWC8HgE8uAta8V4,10265
20
- docling/datamodel/pipeline_options.py,sha256=1ouWNE5VhZolrWMb4RE6s_AxgNFr3_3PMtxB_YQ391A,5495
20
+ docling/datamodel/pipeline_options.py,sha256=AJxnc3lHAlomkXcm-g68wylrKp1_2dttO1HQBNXleME,7649
21
21
  docling/datamodel/settings.py,sha256=JK8lZPBjUx2kD2q-Qpg-o3vOElADMcyQbRUL0EHZ7us,1263
22
22
  docling/document_converter.py,sha256=Iz5eerBWFPVJoXAMlXEivRQX2VLBiUkA07BL4NNbaEs,11583
23
23
  docling/exceptions.py,sha256=-FoP46rFJgz_jn5uDv2V052udEEg8gckk6uhoItchXc,85
@@ -25,28 +25,29 @@ docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
25
  docling/models/base_model.py,sha256=Yq_-FmUhqhE20vXYG3WiQXDRTIPjik1CyuEZ8iYTGAY,701
26
26
  docling/models/base_ocr_model.py,sha256=rGSpBF4dByITcsBaRIgvFKpiu0CrhmZS_PHIo686Dw0,6428
27
27
  docling/models/ds_glm_model.py,sha256=YJkGxV46wh7G2Wr4vVzt9b8oewkUDPWpvI6AEaZDrs0,11872
28
- docling/models/easyocr_model.py,sha256=c2m4x9dZpSc-cMgeEdFBRVBlB78uMGlYD8Q_2gzRuMU,3734
29
- docling/models/layout_model.py,sha256=ZvbTSyxvXB5yLHNEti0Wv3trz0vwGuHySI5TCdApb0U,14011
28
+ docling/models/easyocr_model.py,sha256=q9GWMRte-D7sleSb5tnTReWsx4vOeqnMEaguxDFdFms,4856
29
+ docling/models/layout_model.py,sha256=v7EvFYFtFVMa-UeXCR644sk6mbX9EvEVG5jRoDli7II,14450
30
30
  docling/models/ocr_mac_model.py,sha256=bLP14UUmZcSzjDe-HLj-mtksTuBmsCTg2C1wCxUpan0,4502
31
31
  docling/models/page_assemble_model.py,sha256=kSGNiRKhmzkpFH7xCiT3rulMsgJmUXFa6Th_eB-cLEk,7103
32
32
  docling/models/page_preprocessing_model.py,sha256=1gVrZjObKxAvXkkKvXnIFApPOggzgiTFPtt1CGbMbSs,2763
33
- docling/models/rapid_ocr_model.py,sha256=ui152cerv9b9OeWyyyefs8qMLwYn0qsE2DFE_gHmaCM,6124
34
- docling/models/table_structure_model.py,sha256=-ANSQpiN2avt3B9sbi7dHcoULUJbMBalAR5xxlrM7To,8421
33
+ docling/models/rapid_ocr_model.py,sha256=LOIvczJs3_db2o8mtrKk-pIXgC-xqWqRLu2cjA3wvy4,4980
34
+ docling/models/table_structure_model.py,sha256=g5u42ptUEtqPfFATAEBtDDjkNcEIzIBhunoT8DpYra8,9010
35
35
  docling/models/tesseract_ocr_cli_model.py,sha256=aKQBaty4cYu6zG_C5uy6Zm3eeRQo5fxIierbKixa2kc,6622
36
36
  docling/models/tesseract_ocr_model.py,sha256=RDf6iV1q-oXaGfZXv0bW6SqjHNKQvBUDlUsOkuz0neY,6095
37
37
  docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
38
  docling/pipeline/base_pipeline.py,sha256=hVvtk5E4DVZdl_SyNs_pYRUjN9C8PABhpVaeN5Z_fAY,7885
39
39
  docling/pipeline/simple_pipeline.py,sha256=mZqANqUtAOFAyqQEShErQnAUz6tJFOl6zVoazEDJ_wE,2254
40
- docling/pipeline/standard_pdf_pipeline.py,sha256=B1q8xt3Dfecpi8s8DrcfPzdATh8TYgL43FDzzcS4vEA,8885
40
+ docling/pipeline/standard_pdf_pipeline.py,sha256=iXjVLy-9q82jrU_0AZTkbz3ccrqz4WiRLYD-epxG5BQ,9174
41
41
  docling/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
42
42
  docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
43
+ docling/utils/accelerator_utils.py,sha256=ZjULCn-qhxqx3frF-rJmAlWdzqgUMxH5utLHbSPev80,1367
43
44
  docling/utils/export.py,sha256=KyGF1BVDHPFfHVXZc8vegsWlFfOgGPP2YckWpTadyI8,4694
44
45
  docling/utils/glm_utils.py,sha256=H1O_tDiRksMgw45rY9LhK6GjcZSOq5IyoGurGjoo-Ac,11211
45
46
  docling/utils/layout_utils.py,sha256=vlN0rc8i0ayRGn3WnaG-pdmqEL00KKGl2zez3Gj-hrk,32074
46
47
  docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
47
48
  docling/utils/utils.py,sha256=llhXSbIDNZ1MHOwBEfLHBAoJIAYI7QlPIonlI1jLUJ0,1208
48
- docling-2.11.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
49
- docling-2.11.0.dist-info/METADATA,sha256=ajUVy5CuNDUp0x9tMCqO2px2M-ia-Vs7frIyb0_HxMo,7731
50
- docling-2.11.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
51
- docling-2.11.0.dist-info/entry_points.txt,sha256=VOSzV77znM52dz5ysaDuJ0ijl1cnfrh1ZPg8od5OcTs,48
52
- docling-2.11.0.dist-info/RECORD,,
49
+ docling-2.12.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
50
+ docling-2.12.0.dist-info/METADATA,sha256=tltJX40w5aC-5oNy7FrMxhRuJ42YFY2fUcUXu9vpo14,7731
51
+ docling-2.12.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
52
+ docling-2.12.0.dist-info/entry_points.txt,sha256=VOSzV77znM52dz5ysaDuJ0ijl1cnfrh1ZPg8od5OcTs,48
53
+ docling-2.12.0.dist-info/RECORD,,