docling-ibm-models 2.0.8__py3-none-any.whl → 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docling_ibm_models/layoutmodel/layout_predictor.py +74 -66
- docling_ibm_models/tableformer/data_management/tf_predictor.py +30 -35
- {docling_ibm_models-2.0.8.dist-info → docling_ibm_models-3.0.0.dist-info}/METADATA +3 -1
- {docling_ibm_models-2.0.8.dist-info → docling_ibm_models-3.0.0.dist-info}/RECORD +6 -6
- {docling_ibm_models-2.0.8.dist-info → docling_ibm_models-3.0.0.dist-info}/LICENSE +0 -0
- {docling_ibm_models-2.0.8.dist-info → docling_ibm_models-3.0.0.dist-info}/WHEEL +0 -0
@@ -2,6 +2,7 @@
|
|
2
2
|
# Copyright IBM Corp. 2024 - 2024
|
3
3
|
# SPDX-License-Identifier: MIT
|
4
4
|
#
|
5
|
+
import logging
|
5
6
|
import os
|
6
7
|
from collections.abc import Iterable
|
7
8
|
from typing import Union
|
@@ -10,38 +11,30 @@ import numpy as np
|
|
10
11
|
import torch
|
11
12
|
import torchvision.transforms as T
|
12
13
|
from PIL import Image
|
14
|
+
from transformers import RTDetrForObjectDetection, RTDetrImageProcessor
|
13
15
|
|
14
|
-
|
15
|
-
DEFAULT_NUM_THREADS = 4
|
16
|
+
_log = logging.getLogger(__name__)
|
16
17
|
|
17
18
|
|
18
19
|
class LayoutPredictor:
|
19
|
-
|
20
|
-
Document layout prediction using
|
20
|
+
"""
|
21
|
+
Document layout prediction using safe tensors
|
21
22
|
"""
|
22
23
|
|
23
24
|
def __init__(
|
24
|
-
self,
|
25
|
+
self,
|
26
|
+
artifact_path: str,
|
27
|
+
device: str = "cpu",
|
28
|
+
num_threads: int = 4,
|
25
29
|
):
|
26
|
-
|
30
|
+
"""
|
27
31
|
Provide the artifact path that contains the LayoutModel file
|
28
32
|
|
29
|
-
The number of threads is decided, in the following order, by:
|
30
|
-
1. The init method parameter `num_threads`, if it is set.
|
31
|
-
2. The envvar "OMP_NUM_THREADS", if it is set.
|
32
|
-
3. The default value DEFAULT_NUM_THREADS.
|
33
|
-
|
34
|
-
The execution provided is decided, in the following order:
|
35
|
-
1. If the init method parameter `cpu_only` is True or the envvar "USE_CPU_ONLY" is set,
|
36
|
-
it uses the "CPUExecutionProvider".
|
37
|
-
3. Otherwise if the "CUDAExecutionProvider" is present, use:
|
38
|
-
["CUDAExecutionProvider", "CPUExecutionProvider"]:
|
39
|
-
|
40
33
|
Parameters
|
41
34
|
----------
|
42
35
|
artifact_path: Path for the model torch file.
|
43
|
-
|
44
|
-
|
36
|
+
device: (Optional) device to run the inference.
|
37
|
+
num_threads: (Optional) Number of threads to run the inference if device = 'cpu'
|
45
38
|
|
46
39
|
Raises
|
47
40
|
------
|
@@ -70,40 +63,51 @@ class LayoutPredictor:
|
|
70
63
|
}
|
71
64
|
|
72
65
|
# Blacklisted classes
|
73
|
-
self._black_classes = set(["Form", "Key-Value Region"])
|
66
|
+
self._black_classes = set() # ["Form", "Key-Value Region"])
|
74
67
|
|
75
68
|
# Set basic params
|
76
|
-
self._threshold = 0.
|
69
|
+
self._threshold = 0.3 # Score threshold
|
77
70
|
self._image_size = 640
|
78
71
|
self._size = np.asarray([[self._image_size, self._image_size]], dtype=np.int64)
|
79
|
-
self._use_cpu_only = use_cpu_only or ("USE_CPU_ONLY" in os.environ)
|
80
72
|
|
81
|
-
#
|
82
|
-
self.
|
83
|
-
if not os.path.isfile(self._torch_fn):
|
84
|
-
raise FileNotFoundError("Missing torch file: {}".format(self._torch_fn))
|
85
|
-
|
86
|
-
# Get env vars
|
87
|
-
if num_threads is None:
|
88
|
-
num_threads = int(os.environ.get("OMP_NUM_THREADS", DEFAULT_NUM_THREADS))
|
73
|
+
# Set number of threads for CPU
|
74
|
+
self._device = torch.device(device)
|
89
75
|
self._num_threads = num_threads
|
76
|
+
if device == "cpu":
|
77
|
+
torch.set_num_threads(self._num_threads)
|
78
|
+
|
79
|
+
# Model file and configurations
|
80
|
+
self._st_fn = os.path.join(artifact_path, "model.safetensors")
|
81
|
+
if not os.path.isfile(self._st_fn):
|
82
|
+
raise FileNotFoundError("Missing safe tensors file: {}".format(self._st_fn))
|
90
83
|
|
91
|
-
|
84
|
+
# Load model and move to device
|
85
|
+
processor_config = os.path.join(artifact_path, "preprocessor_config.json")
|
86
|
+
model_config = os.path.join(artifact_path, "config.json")
|
87
|
+
self._image_processor = RTDetrImageProcessor.from_json_file(processor_config)
|
88
|
+
self._model = RTDetrForObjectDetection.from_pretrained(
|
89
|
+
artifact_path, config=model_config
|
90
|
+
).to(self._device)
|
91
|
+
self._model.eval()
|
92
|
+
|
93
|
+
_log.debug("LayoutPredictor settings: {}".format(self.info()))
|
92
94
|
|
93
95
|
def info(self) -> dict:
|
94
|
-
|
96
|
+
"""
|
95
97
|
Get information about the configuration of LayoutPredictor
|
96
98
|
"""
|
97
99
|
info = {
|
98
|
-
"
|
99
|
-
"
|
100
|
+
"safe_tensors_file": self._st_fn,
|
101
|
+
"device": self._device.type,
|
102
|
+
"num_threads": self._num_threads,
|
100
103
|
"image_size": self._image_size,
|
101
104
|
"threshold": self._threshold,
|
102
105
|
}
|
103
106
|
return info
|
104
107
|
|
108
|
+
@torch.inference_mode()
|
105
109
|
def predict(self, orig_img: Union[Image.Image, np.ndarray]) -> Iterable[dict]:
|
106
|
-
|
110
|
+
"""
|
107
111
|
Predict bounding boxes for a given image.
|
108
112
|
The origin (0, 0) is the top-left corner and the predicted bbox coords are provided as:
|
109
113
|
[left, top, right, bottom]
|
@@ -128,40 +132,44 @@ class LayoutPredictor:
|
|
128
132
|
else:
|
129
133
|
raise TypeError("Not supported input image format")
|
130
134
|
|
135
|
+
resize = {"height": self._image_size, "width": self._image_size}
|
136
|
+
inputs = self._image_processor(
|
137
|
+
images=page_img,
|
138
|
+
return_tensors="pt",
|
139
|
+
size=resize,
|
140
|
+
).to(self._device)
|
141
|
+
outputs = self._model(**inputs)
|
142
|
+
results = self._image_processor.post_process_object_detection(
|
143
|
+
outputs,
|
144
|
+
target_sizes=torch.tensor([page_img.size[::-1]]),
|
145
|
+
threshold=self._threshold,
|
146
|
+
)
|
147
|
+
|
131
148
|
w, h = page_img.size
|
132
|
-
orig_size = torch.tensor([w, h])[None]
|
133
149
|
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
with torch.no_grad():
|
143
|
-
labels, boxes, scores = self.model(img, orig_size)
|
150
|
+
result = results[0]
|
151
|
+
for score, label_id, box in zip(
|
152
|
+
result["scores"], result["labels"], result["boxes"]
|
153
|
+
):
|
154
|
+
score = float(score.item())
|
155
|
+
|
156
|
+
label_id = int(label_id.item()) + 1 # Advance the label_id
|
157
|
+
label_str = self._classes_map[label_id]
|
144
158
|
|
145
|
-
# Yield output
|
146
|
-
for label_idx, box, score in zip(labels[0], boxes[0], scores[0]):
|
147
159
|
# Filter out blacklisted classes
|
148
|
-
|
149
|
-
score = float(score.item())
|
150
|
-
label = self._classes_map[label_idx + 1]
|
151
|
-
if label in self._black_classes:
|
160
|
+
if label_str in self._black_classes:
|
152
161
|
continue
|
153
162
|
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
}
|
163
|
+
bbox_float = [float(b.item()) for b in box]
|
164
|
+
l = min(w, max(0, bbox_float[0]))
|
165
|
+
t = min(h, max(0, bbox_float[1]))
|
166
|
+
r = min(w, max(0, bbox_float[2]))
|
167
|
+
b = min(h, max(0, bbox_float[3]))
|
168
|
+
yield {
|
169
|
+
"l": l,
|
170
|
+
"t": t,
|
171
|
+
"r": r,
|
172
|
+
"b": b,
|
173
|
+
"label": label_str,
|
174
|
+
"confidence": score,
|
175
|
+
}
|
@@ -2,14 +2,17 @@
|
|
2
2
|
# Copyright IBM Corp. 2024 - 2024
|
3
3
|
# SPDX-License-Identifier: MIT
|
4
4
|
#
|
5
|
+
import glob
|
5
6
|
import json
|
6
7
|
import logging
|
7
8
|
import os
|
8
9
|
from itertools import groupby
|
10
|
+
from pathlib import Path
|
9
11
|
|
10
12
|
import cv2
|
11
13
|
import numpy as np
|
12
14
|
import torch
|
15
|
+
from safetensors.torch import load_model
|
13
16
|
|
14
17
|
import docling_ibm_models.tableformer.common as c
|
15
18
|
import docling_ibm_models.tableformer.data_management.transforms as T
|
@@ -82,45 +85,27 @@ def otsl_sqr_chk(rs_list, logdebug):
|
|
82
85
|
return isSquare
|
83
86
|
|
84
87
|
|
85
|
-
def decide_device(config: dict) -> str:
|
86
|
-
r"""
|
87
|
-
Decide the inference device based on the "predict.device_mode" parameter
|
88
|
-
"""
|
89
|
-
device_mode = config["predict"].get("device_mode", "cpu")
|
90
|
-
num_gpus = torch.cuda.device_count()
|
91
|
-
|
92
|
-
if device_mode == "auto":
|
93
|
-
device = "cuda:0" if num_gpus > 0 else "cpu"
|
94
|
-
elif device_mode in ["gpu", "cuda"]:
|
95
|
-
device = "cuda:0"
|
96
|
-
else:
|
97
|
-
device = "cpu"
|
98
|
-
return device
|
99
|
-
|
100
|
-
|
101
88
|
class TFPredictor:
|
102
89
|
r"""
|
103
90
|
Table predictions for the in-memory Docling API
|
104
91
|
"""
|
105
92
|
|
106
|
-
def __init__(self, config, num_threads: int =
|
93
|
+
def __init__(self, config, device: str = "cpu", num_threads: int = 4):
|
107
94
|
r"""
|
108
|
-
The number of threads is decided, in the following order, by:
|
109
|
-
1. The init method parameter `num_threads`, if it is set.
|
110
|
-
2. The envvar "OMP_NUM_THREADS", if it is set.
|
111
|
-
3. The default value 4.
|
112
|
-
|
113
95
|
Parameters
|
114
96
|
----------
|
115
|
-
config : dict
|
116
|
-
|
97
|
+
config : dict Parameters configuration
|
98
|
+
device: (Optional) torch device to run the inference.
|
99
|
+
num_threads: (Optional) Number of threads to run the inference if device = 'cpu'
|
100
|
+
|
117
101
|
Raises
|
118
102
|
------
|
119
103
|
ValueError
|
120
104
|
When the model cannot be found
|
121
105
|
"""
|
122
|
-
self._device =
|
123
|
-
self.
|
106
|
+
# self._device = torch.device(device)
|
107
|
+
self._device = device
|
108
|
+
self._log().info("Running on device: {}".format(device))
|
124
109
|
|
125
110
|
self._config = config
|
126
111
|
self.enable_post_process = True
|
@@ -133,11 +118,10 @@ class TFPredictor:
|
|
133
118
|
|
134
119
|
self._init_word_map()
|
135
120
|
|
136
|
-
# Set the number of
|
137
|
-
if
|
138
|
-
|
139
|
-
|
140
|
-
torch.set_num_threads(num_threads)
|
121
|
+
# Set the number of threads
|
122
|
+
if device == "cpu":
|
123
|
+
self._num_threads = num_threads
|
124
|
+
torch.set_num_threads(self._num_threads)
|
141
125
|
|
142
126
|
# Load the model
|
143
127
|
self._model = self._load_model()
|
@@ -202,10 +186,21 @@ class TFPredictor:
|
|
202
186
|
if self._model_type == "TableModel02":
|
203
187
|
self._remove_padding = True
|
204
188
|
|
205
|
-
# Load model from
|
206
|
-
|
207
|
-
|
208
|
-
|
189
|
+
# Load model from safetensors
|
190
|
+
save_dir = self._config["model"]["save_dir"]
|
191
|
+
models_fn = glob.glob(f"{save_dir}/tableformer_*.safetensors")
|
192
|
+
if not models_fn:
|
193
|
+
err_msg = "Not able to find a model file for {}".format(self._model_type)
|
194
|
+
self._log().error(err_msg)
|
195
|
+
raise ValueError(err_msg)
|
196
|
+
model_fn = models_fn[
|
197
|
+
0
|
198
|
+
] # Take the first tableformer safetensors file inside the save_dir
|
199
|
+
missing, unexpected = load_model(model, model_fn, device=self._device)
|
200
|
+
if missing or unexpected:
|
201
|
+
err_msg = "Not able to load the model weights for {}".format(
|
202
|
+
self._model_type
|
203
|
+
)
|
209
204
|
self._log().error(err_msg)
|
210
205
|
raise ValueError(err_msg)
|
211
206
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: docling-ibm-models
|
3
|
-
Version:
|
3
|
+
Version: 3.0.0
|
4
4
|
Summary: This package contains the AI models used by the Docling PDF conversion package
|
5
5
|
License: MIT
|
6
6
|
Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
|
@@ -25,9 +25,11 @@ Requires-Dist: huggingface_hub (>=0.23,<1)
|
|
25
25
|
Requires-Dist: jsonlines (>=3.1.0,<4.0.0)
|
26
26
|
Requires-Dist: numpy (>=1.24.4,<3.0.0)
|
27
27
|
Requires-Dist: opencv-python-headless (>=4.6.0.66,<5.0.0.0)
|
28
|
+
Requires-Dist: safetensors[torch] (>=0.4.3,<1)
|
28
29
|
Requires-Dist: torch (>=2.2.2,<3.0.0)
|
29
30
|
Requires-Dist: torchvision (>=0,<1)
|
30
31
|
Requires-Dist: tqdm (>=4.64.0,<5.0.0)
|
32
|
+
Requires-Dist: transformers (>=4.42.0,<5.0.0)
|
31
33
|
Description-Content-Type: text/markdown
|
32
34
|
|
33
35
|
[](https://pypi.org/project/docling-ibm-models/)
|
@@ -1,11 +1,11 @@
|
|
1
|
-
docling_ibm_models/layoutmodel/layout_predictor.py,sha256=
|
1
|
+
docling_ibm_models/layoutmodel/layout_predictor.py,sha256=aVLRsKSR_DBiCyTPCQipvkHdcmUYSPOvhp0KVuuVttM,5567
|
2
2
|
docling_ibm_models/tableformer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
3
|
docling_ibm_models/tableformer/common.py,sha256=2zgGZBFf4fXytEaXrZR2NU6FWdX2kxO0DHlGZmuvpNQ,3230
|
4
4
|
docling_ibm_models/tableformer/data_management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
docling_ibm_models/tableformer/data_management/functional.py,sha256=kJntHEXFz2SP7obEcHyjAqZNZC9qh-U75MwUJALLADI,3143
|
6
6
|
docling_ibm_models/tableformer/data_management/matching_post_processor.py,sha256=meSM0jLWNLS8P95QjN6pEp095jFEbKdl9KKfRY1ocy0,58046
|
7
7
|
docling_ibm_models/tableformer/data_management/tf_cell_matcher.py,sha256=IdZTaWIRhPpyEwzZgCmviZnYacR6kbcUqBvx7ilmkKY,21250
|
8
|
-
docling_ibm_models/tableformer/data_management/tf_predictor.py,sha256=
|
8
|
+
docling_ibm_models/tableformer/data_management/tf_predictor.py,sha256=BHd6KdAX0-b9TbX01m0872MO10zWDMValyf4UTIRkAU,39008
|
9
9
|
docling_ibm_models/tableformer/data_management/transforms.py,sha256=NNaz_7GI7FCVmu_rJuenqH5VfzRSljJHUHpNQQ8Mq3Q,2983
|
10
10
|
docling_ibm_models/tableformer/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
11
|
docling_ibm_models/tableformer/models/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -22,7 +22,7 @@ docling_ibm_models/tableformer/utils/app_profiler.py,sha256=Pb7o1zcikKXh7ninaNt4
|
|
22
22
|
docling_ibm_models/tableformer/utils/mem_monitor.py,sha256=ycZ07fUBVVKKLTVGF54jGPDM2aTkKuZWk1kMbOS0wwQ,6353
|
23
23
|
docling_ibm_models/tableformer/utils/torch_utils.py,sha256=uN0rK9mSXy1ewBnBnILrWebJhhVU4N-XJZBqNiLJwlQ,8893
|
24
24
|
docling_ibm_models/tableformer/utils/utils.py,sha256=8Bxf1rEn977lFbY9NX0r5xh9PvxIRipQZX_EZW92XfA,10980
|
25
|
-
docling_ibm_models-
|
26
|
-
docling_ibm_models-
|
27
|
-
docling_ibm_models-
|
28
|
-
docling_ibm_models-
|
25
|
+
docling_ibm_models-3.0.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
|
26
|
+
docling_ibm_models-3.0.0.dist-info/METADATA,sha256=Od5o3mMpPJnIh7jaWbI1iqnwayYizM53ALYrqEN5UHQ,7023
|
27
|
+
docling_ibm_models-3.0.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
28
|
+
docling_ibm_models-3.0.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|