docling-ibm-models 1.3.3__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,7 +7,8 @@ from collections.abc import Iterable
7
7
  from typing import Union
8
8
 
9
9
  import numpy as np
10
- import onnxruntime as ort
10
+ import torch
11
+ import torchvision.transforms as T
11
12
  from PIL import Image
12
13
 
13
14
  MODEL_CHECKPOINT_FN = "model.pt"
@@ -16,14 +17,14 @@ DEFAULT_NUM_THREADS = 4
16
17
 
17
18
  class LayoutPredictor:
18
19
  r"""
19
- Document layout prediction using ONNX
20
+ Document layout prediction using torch
20
21
  """
21
22
 
22
23
  def __init__(
23
24
  self, artifact_path: str, num_threads: int = None, use_cpu_only: bool = False
24
25
  ):
25
26
  r"""
26
- Provide the artifact path that contains the LayoutModel ONNX file
27
+ Provide the artifact path that contains the LayoutModel file
27
28
 
28
29
  The number of threads is decided, in the following order, by:
29
30
  1. The init method parameter `num_threads`, if it is set.
@@ -38,13 +39,13 @@ class LayoutPredictor:
38
39
 
39
40
  Parameters
40
41
  ----------
41
- artifact_path: Path for the model ONNX file.
42
+ artifact_path: Path for the model torch file.
42
43
  num_threads: (Optional) Number of threads to run the inference.
43
44
  use_cpu_only: (Optional) If True, it forces CPU as the execution provider.
44
45
 
45
46
  Raises
46
47
  ------
47
- FileNotFoundError when the model's ONNX file is missing
48
+ FileNotFoundError when the model's torch file is missing
48
49
  """
49
50
  # Initialize classes map:
50
51
  self._classes_map = {
@@ -75,46 +76,27 @@ class LayoutPredictor:
75
76
  self._threshold = 0.6 # Score threshold
76
77
  self._image_size = 640
77
78
  self._size = np.asarray([[self._image_size, self._image_size]], dtype=np.int64)
79
+ self._use_cpu_only = use_cpu_only or ("USE_CPU_ONLY" in os.environ)
80
+
81
+ # Model file
82
+ self._torch_fn = os.path.join(artifact_path, MODEL_CHECKPOINT_FN)
83
+ if not os.path.isfile(self._torch_fn):
84
+ raise FileNotFoundError("Missing torch file: {}".format(self._torch_fn))
78
85
 
79
86
  # Get env vars
80
- self._use_cpu_only = use_cpu_only or ("USE_CPU_ONLY" in os.environ)
81
87
  if num_threads is None:
82
88
  num_threads = int(os.environ.get("OMP_NUM_THREADS", DEFAULT_NUM_THREADS))
83
89
  self._num_threads = num_threads
84
90
 
85
- # Decide the execution providers
86
- if (
87
- not self._use_cpu_only
88
- and "CUDAExecutionProvider" in ort.get_available_providers()
89
- ):
90
- providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
91
- else:
92
- providers = ["CPUExecutionProvider"]
93
- self._providers = providers
94
-
95
- # Model ONNX file
96
- self._onnx_fn = os.path.join(artifact_path, MODEL_CHECKPOINT_FN)
97
- if not os.path.isfile(self._onnx_fn):
98
- raise FileNotFoundError("Missing ONNX file: {}".format(self._onnx_fn))
99
-
100
- # ONNX options
101
- self._options = ort.SessionOptions()
102
- self._options.intra_op_num_threads = self._num_threads
103
- self.sess = ort.InferenceSession(
104
- self._onnx_fn,
105
- sess_options=self._options,
106
- providers=self._providers,
107
- )
91
+ self.model = torch.jit.load(self._torch_fn)
108
92
 
109
93
  def info(self) -> dict:
110
94
  r"""
111
95
  Get information about the configuration of LayoutPredictor
112
96
  """
113
97
  info = {
114
- "onnx_file": self._onnx_fn,
115
- "intra_op_num_threads": self._num_threads,
98
+ "torch_file": self._torch_fn,
116
99
  "use_cpu_only": self._use_cpu_only,
117
- "providers": self._providers,
118
100
  "image_size": self._image_size,
119
101
  "threshold": self._threshold,
120
102
  }
@@ -147,33 +129,35 @@ class LayoutPredictor:
147
129
  raise TypeError("Not supported input image format")
148
130
 
149
131
  w, h = page_img.size
150
- page_img = page_img.resize((self._image_size, self._image_size))
151
- page_data = np.array(page_img, dtype=np.uint8) / np.float32(255.0)
152
- page_data = np.expand_dims(np.transpose(page_data, axes=[2, 0, 1]), axis=0)
132
+ orig_size = torch.tensor([w, h])[None]
153
133
 
154
- # Predict
155
- labels, boxes, scores = self.sess.run(
156
- output_names=None,
157
- input_feed={
158
- "images": page_data,
159
- "orig_target_sizes": self._size,
160
- },
134
+ transforms = T.Compose(
135
+ [
136
+ T.Resize((640, 640)),
137
+ T.ToTensor(),
138
+ ]
161
139
  )
140
+ img = transforms(page_img)[None]
141
+ # Predict
142
+ with torch.no_grad():
143
+ labels, boxes, scores = self.model(img, orig_size)
162
144
 
163
145
  # Yield output
164
146
  for label_idx, box, score in zip(labels[0], boxes[0], scores[0]):
165
147
  # Filter out blacklisted classes
166
- label = self._classes_map[label_idx]
148
+ label_idx = int(label_idx.item())
149
+ score = float(score.item())
150
+ label = self._classes_map[label_idx + 1]
167
151
  if label in self._black_classes:
168
152
  continue
169
153
 
170
154
  # Check against threshold
171
155
  if score > self._threshold:
172
156
  yield {
173
- "l": box[0] / self._image_size * w,
174
- "t": box[1] / self._image_size * h,
175
- "r": box[2] / self._image_size * w,
176
- "b": box[3] / self._image_size * h,
157
+ "l": box[0],
158
+ "t": box[1],
159
+ "r": box[2],
160
+ "b": box[3],
177
161
  "label": label,
178
162
  "confidence": score,
179
163
  }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling-ibm-models
3
- Version: 1.3.3
3
+ Version: 1.4.0
4
4
  Summary: This package contains the AI models used by the Docling PDF conversion package
5
5
  License: MIT
6
6
  Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
@@ -24,7 +24,6 @@ Requires-Dist: jsonlines (>=3.1.0,<4.0.0)
24
24
  Requires-Dist: lxml (>=4.9.1,<5.0.0)
25
25
  Requires-Dist: mean_average_precision (>=2021.4.26.0,<2022.0.0.0)
26
26
  Requires-Dist: numpy (>=1.24.4,<2.0.0)
27
- Requires-Dist: onnxruntime (>=1.16.2,<2.0.0)
28
27
  Requires-Dist: opencv-python-headless (>=4.6.0.66,<5.0.0.0)
29
28
  Requires-Dist: torch (>=2.2.2,<2.3.0) ; sys_platform == "darwin" and platform_machine == "x86_64"
30
29
  Requires-Dist: torch (>=2.2.2,<3.0.0) ; sys_platform != "darwin" or platform_machine != "x86_64"
@@ -1,4 +1,4 @@
1
- docling_ibm_models/layoutmodel/layout_predictor.py,sha256=JHZbh6HyA2fLqaN0p9Lv3Y9P9dgkeHUqQI-JyyetocE,6042
1
+ docling_ibm_models/layoutmodel/layout_predictor.py,sha256=IDIT8UVzj-U3spWe4nWNxKZqcHkY58teiX_quRu1e0M,5253
2
2
  docling_ibm_models/tableformer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  docling_ibm_models/tableformer/common.py,sha256=2zgGZBFf4fXytEaXrZR2NU6FWdX2kxO0DHlGZmuvpNQ,3230
4
4
  docling_ibm_models/tableformer/data_management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -22,7 +22,7 @@ docling_ibm_models/tableformer/utils/app_profiler.py,sha256=Pb7o1zcikKXh7ninaNt4
22
22
  docling_ibm_models/tableformer/utils/mem_monitor.py,sha256=ycZ07fUBVVKKLTVGF54jGPDM2aTkKuZWk1kMbOS0wwQ,6353
23
23
  docling_ibm_models/tableformer/utils/torch_utils.py,sha256=uN0rK9mSXy1ewBnBnILrWebJhhVU4N-XJZBqNiLJwlQ,8893
24
24
  docling_ibm_models/tableformer/utils/utils.py,sha256=8Bxf1rEn977lFbY9NX0r5xh9PvxIRipQZX_EZW92XfA,10980
25
- docling_ibm_models-1.3.3.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
26
- docling_ibm_models-1.3.3.dist-info/METADATA,sha256=jnXUHlZVDGc0ozdZaL_79WyQe9eW4pgT9gH9ya04CO8,7088
27
- docling_ibm_models-1.3.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
28
- docling_ibm_models-1.3.3.dist-info/RECORD,,
25
+ docling_ibm_models-1.4.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
26
+ docling_ibm_models-1.4.0.dist-info/METADATA,sha256=mlcfKB6GKPyvWSxfC8nztOnhjxO3BquVbbQPLdV0LQw,7043
27
+ docling_ibm_models-1.4.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
28
+ docling_ibm_models-1.4.0.dist-info/RECORD,,