doc-page-extractor 0.2.0__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- doc_page_extractor/__init__.py +5 -15
- doc_page_extractor/check_env.py +40 -0
- doc_page_extractor/extractor.py +88 -215
- doc_page_extractor/model.py +97 -0
- doc_page_extractor/parser.py +51 -0
- doc_page_extractor/plot.py +52 -79
- doc_page_extractor/redacter.py +111 -0
- doc_page_extractor-1.0.2.dist-info/METADATA +120 -0
- doc_page_extractor-1.0.2.dist-info/RECORD +11 -0
- {doc_page_extractor-0.2.0.dist-info → doc_page_extractor-1.0.2.dist-info}/WHEEL +1 -2
- doc_page_extractor-1.0.2.dist-info/licenses/LICENSE +21 -0
- doc_page_extractor/clipper.py +0 -119
- doc_page_extractor/downloader.py +0 -16
- doc_page_extractor/latex.py +0 -31
- doc_page_extractor/layout_order.py +0 -237
- doc_page_extractor/layoutreader.py +0 -126
- doc_page_extractor/models.py +0 -92
- doc_page_extractor/ocr.py +0 -200
- doc_page_extractor/ocr_corrector.py +0 -126
- doc_page_extractor/onnxocr/__init__.py +0 -1
- doc_page_extractor/onnxocr/cls_postprocess.py +0 -26
- doc_page_extractor/onnxocr/db_postprocess.py +0 -246
- doc_page_extractor/onnxocr/imaug.py +0 -32
- doc_page_extractor/onnxocr/operators.py +0 -187
- doc_page_extractor/onnxocr/predict_base.py +0 -57
- doc_page_extractor/onnxocr/predict_cls.py +0 -109
- doc_page_extractor/onnxocr/predict_det.py +0 -139
- doc_page_extractor/onnxocr/predict_rec.py +0 -344
- doc_page_extractor/onnxocr/predict_system.py +0 -97
- doc_page_extractor/onnxocr/rec_postprocess.py +0 -896
- doc_page_extractor/onnxocr/utils.py +0 -71
- doc_page_extractor/overlap.py +0 -167
- doc_page_extractor/raw_optimizer.py +0 -104
- doc_page_extractor/rectangle.py +0 -72
- doc_page_extractor/rotation.py +0 -158
- doc_page_extractor/struct_eqtable/__init__.py +0 -49
- doc_page_extractor/struct_eqtable/internvl/__init__.py +0 -2
- doc_page_extractor/struct_eqtable/internvl/conversation.py +0 -394
- doc_page_extractor/struct_eqtable/internvl/internvl.py +0 -198
- doc_page_extractor/struct_eqtable/internvl/internvl_lmdeploy.py +0 -81
- doc_page_extractor/struct_eqtable/pix2s/__init__.py +0 -3
- doc_page_extractor/struct_eqtable/pix2s/pix2s.py +0 -76
- doc_page_extractor/struct_eqtable/pix2s/pix2s_trt.py +0 -1047
- doc_page_extractor/table.py +0 -70
- doc_page_extractor/types.py +0 -91
- doc_page_extractor/utils.py +0 -32
- doc_page_extractor-0.2.0.dist-info/METADATA +0 -85
- doc_page_extractor-0.2.0.dist-info/RECORD +0 -45
- doc_page_extractor-0.2.0.dist-info/licenses/LICENSE +0 -661
- doc_page_extractor-0.2.0.dist-info/top_level.txt +0 -2
- tests/__init__.py +0 -0
- tests/test_history_bus.py +0 -55
|
@@ -1,246 +0,0 @@
|
|
|
1
|
-
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
"""
|
|
15
|
-
This code is refered from:
|
|
16
|
-
https://github.com/WenmuZhou/DBNet.pytorch/blob/master/post_processing/seg_detector_representer.py
|
|
17
|
-
"""
|
|
18
|
-
from __future__ import absolute_import
|
|
19
|
-
from __future__ import division
|
|
20
|
-
from __future__ import print_function
|
|
21
|
-
|
|
22
|
-
import numpy as np
|
|
23
|
-
import cv2
|
|
24
|
-
# import paddle
|
|
25
|
-
from shapely.geometry import Polygon
|
|
26
|
-
import pyclipper
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
class DBPostProcess(object):
|
|
30
|
-
"""
|
|
31
|
-
The post process for Differentiable Binarization (DB).
|
|
32
|
-
"""
|
|
33
|
-
|
|
34
|
-
def __init__(self,
|
|
35
|
-
thresh=0.3,
|
|
36
|
-
box_thresh=0.7,
|
|
37
|
-
max_candidates=1000,
|
|
38
|
-
unclip_ratio=2.0,
|
|
39
|
-
use_dilation=False,
|
|
40
|
-
score_mode="fast",
|
|
41
|
-
box_type='quad',
|
|
42
|
-
**kwargs):
|
|
43
|
-
self.thresh = thresh
|
|
44
|
-
self.box_thresh = box_thresh
|
|
45
|
-
self.max_candidates = max_candidates
|
|
46
|
-
self.unclip_ratio = unclip_ratio
|
|
47
|
-
self.min_size = 3
|
|
48
|
-
self.score_mode = score_mode
|
|
49
|
-
self.box_type = box_type
|
|
50
|
-
assert score_mode in [
|
|
51
|
-
"slow", "fast"
|
|
52
|
-
], "Score mode must be in [slow, fast] but got: {}".format(score_mode)
|
|
53
|
-
|
|
54
|
-
self.dilation_kernel = None if not use_dilation else np.array(
|
|
55
|
-
[[1, 1], [1, 1]])
|
|
56
|
-
|
|
57
|
-
def polygons_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
|
|
58
|
-
'''
|
|
59
|
-
_bitmap: single map with shape (1, H, W),
|
|
60
|
-
whose values are binarized as {0, 1}
|
|
61
|
-
'''
|
|
62
|
-
|
|
63
|
-
bitmap = _bitmap
|
|
64
|
-
height, width = bitmap.shape
|
|
65
|
-
|
|
66
|
-
boxes = []
|
|
67
|
-
scores = []
|
|
68
|
-
|
|
69
|
-
contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8),
|
|
70
|
-
cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
|
|
71
|
-
|
|
72
|
-
for contour in contours[:self.max_candidates]:
|
|
73
|
-
epsilon = 0.002 * cv2.arcLength(contour, True)
|
|
74
|
-
approx = cv2.approxPolyDP(contour, epsilon, True)
|
|
75
|
-
points = approx.reshape((-1, 2))
|
|
76
|
-
if points.shape[0] < 4:
|
|
77
|
-
continue
|
|
78
|
-
|
|
79
|
-
score = self.box_score_fast(pred, points.reshape(-1, 2))
|
|
80
|
-
if self.box_thresh > score:
|
|
81
|
-
continue
|
|
82
|
-
|
|
83
|
-
if points.shape[0] > 2:
|
|
84
|
-
box = self.unclip(points, self.unclip_ratio)
|
|
85
|
-
if len(box) > 1:
|
|
86
|
-
continue
|
|
87
|
-
else:
|
|
88
|
-
continue
|
|
89
|
-
box = box.reshape(-1, 2)
|
|
90
|
-
|
|
91
|
-
_, sside = self.get_mini_boxes(box.reshape((-1, 1, 2)))
|
|
92
|
-
if sside < self.min_size + 2:
|
|
93
|
-
continue
|
|
94
|
-
|
|
95
|
-
box = np.array(box)
|
|
96
|
-
box[:, 0] = np.clip(
|
|
97
|
-
np.round(box[:, 0] / width * dest_width), 0, dest_width)
|
|
98
|
-
box[:, 1] = np.clip(
|
|
99
|
-
np.round(box[:, 1] / height * dest_height), 0, dest_height)
|
|
100
|
-
boxes.append(box.tolist())
|
|
101
|
-
scores.append(score)
|
|
102
|
-
return boxes, scores
|
|
103
|
-
|
|
104
|
-
def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
|
|
105
|
-
'''
|
|
106
|
-
_bitmap: single map with shape (1, H, W),
|
|
107
|
-
whose values are binarized as {0, 1}
|
|
108
|
-
'''
|
|
109
|
-
|
|
110
|
-
bitmap = _bitmap
|
|
111
|
-
height, width = bitmap.shape
|
|
112
|
-
|
|
113
|
-
outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST,
|
|
114
|
-
cv2.CHAIN_APPROX_SIMPLE)
|
|
115
|
-
if len(outs) == 3:
|
|
116
|
-
img, contours, _ = outs[0], outs[1], outs[2]
|
|
117
|
-
elif len(outs) == 2:
|
|
118
|
-
contours, _ = outs[0], outs[1]
|
|
119
|
-
|
|
120
|
-
num_contours = min(len(contours), self.max_candidates)
|
|
121
|
-
|
|
122
|
-
boxes = []
|
|
123
|
-
scores = []
|
|
124
|
-
for index in range(num_contours):
|
|
125
|
-
contour = contours[index]
|
|
126
|
-
points, sside = self.get_mini_boxes(contour)
|
|
127
|
-
if sside < self.min_size:
|
|
128
|
-
continue
|
|
129
|
-
points = np.array(points)
|
|
130
|
-
if self.score_mode == "fast":
|
|
131
|
-
score = self.box_score_fast(pred, points.reshape(-1, 2))
|
|
132
|
-
else:
|
|
133
|
-
score = self.box_score_slow(pred, contour)
|
|
134
|
-
if self.box_thresh > score:
|
|
135
|
-
continue
|
|
136
|
-
|
|
137
|
-
box = self.unclip(points, self.unclip_ratio).reshape(-1, 1, 2)
|
|
138
|
-
box, sside = self.get_mini_boxes(box)
|
|
139
|
-
if sside < self.min_size + 2:
|
|
140
|
-
continue
|
|
141
|
-
box = np.array(box)
|
|
142
|
-
|
|
143
|
-
box[:, 0] = np.clip(
|
|
144
|
-
np.round(box[:, 0] / width * dest_width), 0, dest_width)
|
|
145
|
-
box[:, 1] = np.clip(
|
|
146
|
-
np.round(box[:, 1] / height * dest_height), 0, dest_height)
|
|
147
|
-
boxes.append(box.astype("int32"))
|
|
148
|
-
scores.append(score)
|
|
149
|
-
return np.array(boxes, dtype="int32"), scores
|
|
150
|
-
|
|
151
|
-
def unclip(self, box, unclip_ratio):
|
|
152
|
-
poly = Polygon(box)
|
|
153
|
-
distance = poly.area * unclip_ratio / poly.length
|
|
154
|
-
offset = pyclipper.PyclipperOffset()
|
|
155
|
-
offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
|
|
156
|
-
expanded = np.array(offset.Execute(distance))
|
|
157
|
-
return expanded
|
|
158
|
-
|
|
159
|
-
def get_mini_boxes(self, contour):
|
|
160
|
-
bounding_box = cv2.minAreaRect(contour)
|
|
161
|
-
points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
|
|
162
|
-
|
|
163
|
-
index_1, index_2, index_3, index_4 = 0, 1, 2, 3
|
|
164
|
-
if points[1][1] > points[0][1]:
|
|
165
|
-
index_1 = 0
|
|
166
|
-
index_4 = 1
|
|
167
|
-
else:
|
|
168
|
-
index_1 = 1
|
|
169
|
-
index_4 = 0
|
|
170
|
-
if points[3][1] > points[2][1]:
|
|
171
|
-
index_2 = 2
|
|
172
|
-
index_3 = 3
|
|
173
|
-
else:
|
|
174
|
-
index_2 = 3
|
|
175
|
-
index_3 = 2
|
|
176
|
-
|
|
177
|
-
box = [
|
|
178
|
-
points[index_1], points[index_2], points[index_3], points[index_4]
|
|
179
|
-
]
|
|
180
|
-
return box, min(bounding_box[1])
|
|
181
|
-
|
|
182
|
-
def box_score_fast(self, bitmap, _box):
|
|
183
|
-
'''
|
|
184
|
-
box_score_fast: use bbox mean score as the mean score
|
|
185
|
-
'''
|
|
186
|
-
h, w = bitmap.shape[:2]
|
|
187
|
-
box = _box.copy()
|
|
188
|
-
xmin = np.clip(np.floor(box[:, 0].min()).astype("int32"), 0, w - 1)
|
|
189
|
-
xmax = np.clip(np.ceil(box[:, 0].max()).astype("int32"), 0, w - 1)
|
|
190
|
-
ymin = np.clip(np.floor(box[:, 1].min()).astype("int32"), 0, h - 1)
|
|
191
|
-
ymax = np.clip(np.ceil(box[:, 1].max()).astype("int32"), 0, h - 1)
|
|
192
|
-
|
|
193
|
-
mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
|
|
194
|
-
box[:, 0] = box[:, 0] - xmin
|
|
195
|
-
box[:, 1] = box[:, 1] - ymin
|
|
196
|
-
cv2.fillPoly(mask, box.reshape(1, -1, 2).astype("int32"), 1)
|
|
197
|
-
return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
|
|
198
|
-
|
|
199
|
-
def box_score_slow(self, bitmap, contour):
|
|
200
|
-
'''
|
|
201
|
-
box_score_slow: use polyon mean score as the mean score
|
|
202
|
-
'''
|
|
203
|
-
h, w = bitmap.shape[:2]
|
|
204
|
-
contour = contour.copy()
|
|
205
|
-
contour = np.reshape(contour, (-1, 2))
|
|
206
|
-
|
|
207
|
-
xmin = np.clip(np.min(contour[:, 0]), 0, w - 1)
|
|
208
|
-
xmax = np.clip(np.max(contour[:, 0]), 0, w - 1)
|
|
209
|
-
ymin = np.clip(np.min(contour[:, 1]), 0, h - 1)
|
|
210
|
-
ymax = np.clip(np.max(contour[:, 1]), 0, h - 1)
|
|
211
|
-
|
|
212
|
-
mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
|
|
213
|
-
|
|
214
|
-
contour[:, 0] = contour[:, 0] - xmin
|
|
215
|
-
contour[:, 1] = contour[:, 1] - ymin
|
|
216
|
-
|
|
217
|
-
cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype("int32"), 1)
|
|
218
|
-
return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
|
|
219
|
-
|
|
220
|
-
def __call__(self, outs_dict, shape_list):
|
|
221
|
-
pred = outs_dict['maps']
|
|
222
|
-
# if isinstance(pred, paddle.Tensor):
|
|
223
|
-
# pred = pred.numpy()
|
|
224
|
-
pred = pred[:, 0, :, :]
|
|
225
|
-
segmentation = pred > self.thresh
|
|
226
|
-
|
|
227
|
-
boxes_batch = []
|
|
228
|
-
for batch_index in range(pred.shape[0]):
|
|
229
|
-
src_h, src_w, ratio_h, ratio_w = shape_list[batch_index]
|
|
230
|
-
if self.dilation_kernel is not None:
|
|
231
|
-
mask = cv2.dilate(
|
|
232
|
-
np.array(segmentation[batch_index]).astype(np.uint8),
|
|
233
|
-
self.dilation_kernel)
|
|
234
|
-
else:
|
|
235
|
-
mask = segmentation[batch_index]
|
|
236
|
-
if self.box_type == 'poly':
|
|
237
|
-
boxes, scores = self.polygons_from_bitmap(pred[batch_index],
|
|
238
|
-
mask, src_w, src_h)
|
|
239
|
-
elif self.box_type == 'quad':
|
|
240
|
-
boxes, scores = self.boxes_from_bitmap(pred[batch_index], mask,
|
|
241
|
-
src_w, src_h)
|
|
242
|
-
else:
|
|
243
|
-
raise ValueError("box_type can only be one of ['quad', 'poly']")
|
|
244
|
-
|
|
245
|
-
boxes_batch.append({'points': boxes})
|
|
246
|
-
return boxes_batch
|
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
from .operators import *
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
def transform(data, ops=None):
|
|
5
|
-
"""transform"""
|
|
6
|
-
if ops is None:
|
|
7
|
-
ops = []
|
|
8
|
-
for op in ops:
|
|
9
|
-
data = op(data)
|
|
10
|
-
if data is None:
|
|
11
|
-
return None
|
|
12
|
-
return data
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def create_operators(op_param_list, global_config=None):
|
|
16
|
-
"""
|
|
17
|
-
create operators based on the config
|
|
18
|
-
|
|
19
|
-
Args:
|
|
20
|
-
params(list): a dict list, used to create some operators
|
|
21
|
-
"""
|
|
22
|
-
assert isinstance(op_param_list, list), "operator config should be a list"
|
|
23
|
-
ops = []
|
|
24
|
-
for operator in op_param_list:
|
|
25
|
-
assert isinstance(operator, dict) and len(operator) == 1, "yaml format error"
|
|
26
|
-
op_name = list(operator)[0]
|
|
27
|
-
param = {} if operator[op_name] is None else operator[op_name]
|
|
28
|
-
if global_config is not None:
|
|
29
|
-
param.update(global_config)
|
|
30
|
-
op = eval(op_name)(**param)
|
|
31
|
-
ops.append(op)
|
|
32
|
-
return ops
|
|
@@ -1,187 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
import cv2
|
|
3
|
-
import sys
|
|
4
|
-
import math
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class NormalizeImage(object):
|
|
8
|
-
""" normalize image such as substract mean, divide std
|
|
9
|
-
"""
|
|
10
|
-
|
|
11
|
-
def __init__(self, scale=None, mean=None, std=None, order='chw', **kwargs):
|
|
12
|
-
if isinstance(scale, str):
|
|
13
|
-
scale = eval(scale)
|
|
14
|
-
self.scale = np.float32(scale if scale is not None else 1.0 / 255.0)
|
|
15
|
-
mean = mean if mean is not None else [0.485, 0.456, 0.406]
|
|
16
|
-
std = std if std is not None else [0.229, 0.224, 0.225]
|
|
17
|
-
|
|
18
|
-
shape = (3, 1, 1) if order == 'chw' else (1, 1, 3)
|
|
19
|
-
self.mean = np.array(mean).reshape(shape).astype('float32')
|
|
20
|
-
self.std = np.array(std).reshape(shape).astype('float32')
|
|
21
|
-
|
|
22
|
-
def __call__(self, data):
|
|
23
|
-
img = data['image']
|
|
24
|
-
from PIL import Image
|
|
25
|
-
if isinstance(img, Image.Image):
|
|
26
|
-
img = np.array(img)
|
|
27
|
-
assert isinstance(img,
|
|
28
|
-
np.ndarray), "invalid input 'img' in NormalizeImage"
|
|
29
|
-
data['image'] = (
|
|
30
|
-
img.astype('float32') * self.scale - self.mean) / self.std
|
|
31
|
-
return data
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
class DetResizeForTest(object):
|
|
35
|
-
def __init__(self, **kwargs):
|
|
36
|
-
super(DetResizeForTest, self).__init__()
|
|
37
|
-
self.resize_type = 0
|
|
38
|
-
self.keep_ratio = False
|
|
39
|
-
if 'image_shape' in kwargs:
|
|
40
|
-
self.image_shape = kwargs['image_shape']
|
|
41
|
-
self.resize_type = 1
|
|
42
|
-
if 'keep_ratio' in kwargs:
|
|
43
|
-
self.keep_ratio = kwargs['keep_ratio']
|
|
44
|
-
elif 'limit_side_len' in kwargs:
|
|
45
|
-
self.limit_side_len = kwargs['limit_side_len']
|
|
46
|
-
self.limit_type = kwargs.get('limit_type', 'min')
|
|
47
|
-
elif 'resize_long' in kwargs:
|
|
48
|
-
self.resize_type = 2
|
|
49
|
-
self.resize_long = kwargs.get('resize_long', 960)
|
|
50
|
-
else:
|
|
51
|
-
self.limit_side_len = 736
|
|
52
|
-
self.limit_type = 'min'
|
|
53
|
-
|
|
54
|
-
def __call__(self, data):
|
|
55
|
-
img = data['image']
|
|
56
|
-
src_h, src_w, _ = img.shape
|
|
57
|
-
if sum([src_h, src_w]) < 64:
|
|
58
|
-
img = self.image_padding(img)
|
|
59
|
-
|
|
60
|
-
if self.resize_type == 0:
|
|
61
|
-
# img, shape = self.resize_image_type0(img)
|
|
62
|
-
img, [ratio_h, ratio_w] = self.resize_image_type0(img)
|
|
63
|
-
elif self.resize_type == 2:
|
|
64
|
-
img, [ratio_h, ratio_w] = self.resize_image_type2(img)
|
|
65
|
-
else:
|
|
66
|
-
# img, shape = self.resize_image_type1(img)
|
|
67
|
-
img, [ratio_h, ratio_w] = self.resize_image_type1(img)
|
|
68
|
-
data['image'] = img
|
|
69
|
-
data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w])
|
|
70
|
-
return data
|
|
71
|
-
|
|
72
|
-
def image_padding(self, im, value=0):
|
|
73
|
-
h, w, c = im.shape
|
|
74
|
-
im_pad = np.zeros((max(32, h), max(32, w), c), np.uint8) + value
|
|
75
|
-
im_pad[:h, :w, :] = im
|
|
76
|
-
return im_pad
|
|
77
|
-
|
|
78
|
-
def resize_image_type1(self, img):
|
|
79
|
-
resize_h, resize_w = self.image_shape
|
|
80
|
-
ori_h, ori_w = img.shape[:2] # (h, w, c)
|
|
81
|
-
if self.keep_ratio is True:
|
|
82
|
-
resize_w = ori_w * resize_h / ori_h
|
|
83
|
-
N = math.ceil(resize_w / 32)
|
|
84
|
-
resize_w = N * 32
|
|
85
|
-
ratio_h = float(resize_h) / ori_h
|
|
86
|
-
ratio_w = float(resize_w) / ori_w
|
|
87
|
-
img = cv2.resize(img, (int(resize_w), int(resize_h)))
|
|
88
|
-
# return img, np.array([ori_h, ori_w])
|
|
89
|
-
return img, [ratio_h, ratio_w]
|
|
90
|
-
|
|
91
|
-
def resize_image_type0(self, img):
|
|
92
|
-
"""
|
|
93
|
-
resize image to a size multiple of 32 which is required by the network
|
|
94
|
-
args:
|
|
95
|
-
img(array): array with shape [h, w, c]
|
|
96
|
-
return(tuple):
|
|
97
|
-
img, (ratio_h, ratio_w)
|
|
98
|
-
"""
|
|
99
|
-
limit_side_len = self.limit_side_len
|
|
100
|
-
h, w, c = img.shape
|
|
101
|
-
|
|
102
|
-
# limit the max side
|
|
103
|
-
if self.limit_type == 'max':
|
|
104
|
-
if max(h, w) > limit_side_len:
|
|
105
|
-
if h > w:
|
|
106
|
-
ratio = float(limit_side_len) / h
|
|
107
|
-
else:
|
|
108
|
-
ratio = float(limit_side_len) / w
|
|
109
|
-
else:
|
|
110
|
-
ratio = 1.
|
|
111
|
-
elif self.limit_type == 'min':
|
|
112
|
-
if min(h, w) < limit_side_len:
|
|
113
|
-
if h < w:
|
|
114
|
-
ratio = float(limit_side_len) / h
|
|
115
|
-
else:
|
|
116
|
-
ratio = float(limit_side_len) / w
|
|
117
|
-
else:
|
|
118
|
-
ratio = 1.
|
|
119
|
-
elif self.limit_type == 'resize_long':
|
|
120
|
-
ratio = float(limit_side_len) / max(h, w)
|
|
121
|
-
else:
|
|
122
|
-
raise Exception('not support limit type, image ')
|
|
123
|
-
resize_h = int(h * ratio)
|
|
124
|
-
resize_w = int(w * ratio)
|
|
125
|
-
|
|
126
|
-
resize_h = max(int(round(resize_h / 32) * 32), 32)
|
|
127
|
-
resize_w = max(int(round(resize_w / 32) * 32), 32)
|
|
128
|
-
|
|
129
|
-
try:
|
|
130
|
-
if int(resize_w) <= 0 or int(resize_h) <= 0:
|
|
131
|
-
return None, (None, None)
|
|
132
|
-
img = cv2.resize(img, (int(resize_w), int(resize_h)))
|
|
133
|
-
except:
|
|
134
|
-
print(img.shape, resize_w, resize_h)
|
|
135
|
-
sys.exit(0)
|
|
136
|
-
ratio_h = resize_h / float(h)
|
|
137
|
-
ratio_w = resize_w / float(w)
|
|
138
|
-
return img, [ratio_h, ratio_w]
|
|
139
|
-
|
|
140
|
-
def resize_image_type2(self, img):
|
|
141
|
-
h, w, _ = img.shape
|
|
142
|
-
|
|
143
|
-
resize_w = w
|
|
144
|
-
resize_h = h
|
|
145
|
-
|
|
146
|
-
if resize_h > resize_w:
|
|
147
|
-
ratio = float(self.resize_long) / resize_h
|
|
148
|
-
else:
|
|
149
|
-
ratio = float(self.resize_long) / resize_w
|
|
150
|
-
|
|
151
|
-
resize_h = int(resize_h * ratio)
|
|
152
|
-
resize_w = int(resize_w * ratio)
|
|
153
|
-
|
|
154
|
-
max_stride = 128
|
|
155
|
-
resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
|
|
156
|
-
resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
|
|
157
|
-
img = cv2.resize(img, (int(resize_w), int(resize_h)))
|
|
158
|
-
ratio_h = resize_h / float(h)
|
|
159
|
-
ratio_w = resize_w / float(w)
|
|
160
|
-
|
|
161
|
-
return img, [ratio_h, ratio_w]
|
|
162
|
-
|
|
163
|
-
class ToCHWImage(object):
|
|
164
|
-
""" convert hwc image to chw image
|
|
165
|
-
"""
|
|
166
|
-
|
|
167
|
-
def __init__(self, **kwargs):
|
|
168
|
-
pass
|
|
169
|
-
|
|
170
|
-
def __call__(self, data):
|
|
171
|
-
img = data['image']
|
|
172
|
-
from PIL import Image
|
|
173
|
-
if isinstance(img, Image.Image):
|
|
174
|
-
img = np.array(img)
|
|
175
|
-
data['image'] = img.transpose((2, 0, 1))
|
|
176
|
-
return data
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
class KeepKeys(object):
|
|
180
|
-
def __init__(self, keep_keys, **kwargs):
|
|
181
|
-
self.keep_keys = keep_keys
|
|
182
|
-
|
|
183
|
-
def __call__(self, data):
|
|
184
|
-
data_list = []
|
|
185
|
-
for key in self.keep_keys:
|
|
186
|
-
data_list.append(data[key])
|
|
187
|
-
return data_list
|
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
class PredictBase(object):
|
|
2
|
-
def __init__(self):
|
|
3
|
-
self._onnxruntime = None
|
|
4
|
-
|
|
5
|
-
@property
|
|
6
|
-
def onnxruntime(self):
|
|
7
|
-
if self._onnxruntime is None:
|
|
8
|
-
import onnxruntime
|
|
9
|
-
self._onnxruntime = onnxruntime
|
|
10
|
-
return self._onnxruntime
|
|
11
|
-
|
|
12
|
-
def get_onnx_session(self, model_dir, use_gpu):
|
|
13
|
-
# 使用gpu
|
|
14
|
-
if use_gpu:
|
|
15
|
-
providers = providers=['CUDAExecutionProvider']
|
|
16
|
-
else:
|
|
17
|
-
providers = providers = ['CPUExecutionProvider']
|
|
18
|
-
|
|
19
|
-
onnx_session = self.onnxruntime.InferenceSession(model_dir, None, providers=providers)
|
|
20
|
-
|
|
21
|
-
# print("providers:", onnxruntime.get_device())
|
|
22
|
-
return onnx_session
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def get_output_name(self, onnx_session):
|
|
26
|
-
"""
|
|
27
|
-
output_name = onnx_session.get_outputs()[0].name
|
|
28
|
-
:param onnx_session:
|
|
29
|
-
:return:
|
|
30
|
-
"""
|
|
31
|
-
output_name = []
|
|
32
|
-
for node in onnx_session.get_outputs():
|
|
33
|
-
output_name.append(node.name)
|
|
34
|
-
return output_name
|
|
35
|
-
|
|
36
|
-
def get_input_name(self, onnx_session):
|
|
37
|
-
"""
|
|
38
|
-
input_name = onnx_session.get_inputs()[0].name
|
|
39
|
-
:param onnx_session:
|
|
40
|
-
:return:
|
|
41
|
-
"""
|
|
42
|
-
input_name = []
|
|
43
|
-
for node in onnx_session.get_inputs():
|
|
44
|
-
input_name.append(node.name)
|
|
45
|
-
return input_name
|
|
46
|
-
|
|
47
|
-
def get_input_feed(self, input_name, image_numpy):
|
|
48
|
-
"""
|
|
49
|
-
input_feed={self.input_name: image_numpy}
|
|
50
|
-
:param input_name:
|
|
51
|
-
:param image_numpy:
|
|
52
|
-
:return:
|
|
53
|
-
"""
|
|
54
|
-
input_feed = {}
|
|
55
|
-
for name in input_name:
|
|
56
|
-
input_feed[name] = image_numpy
|
|
57
|
-
return input_feed
|
|
@@ -1,109 +0,0 @@
|
|
|
1
|
-
import cv2
|
|
2
|
-
import copy
|
|
3
|
-
import numpy as np
|
|
4
|
-
import math
|
|
5
|
-
|
|
6
|
-
from .cls_postprocess import ClsPostProcess
|
|
7
|
-
from .predict_base import PredictBase
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class TextClassifier(PredictBase):
|
|
11
|
-
def __init__(self, args):
|
|
12
|
-
super().__init__()
|
|
13
|
-
self.cls_image_shape = args.cls_image_shape
|
|
14
|
-
self.cls_batch_num = args.cls_batch_num
|
|
15
|
-
self.cls_thresh = args.cls_thresh
|
|
16
|
-
self.postprocess_op = ClsPostProcess(label_list=args.label_list)
|
|
17
|
-
self._args = args
|
|
18
|
-
|
|
19
|
-
# 初始化模型
|
|
20
|
-
self._cls_onnx_session = None
|
|
21
|
-
self._cls_input_name = None
|
|
22
|
-
self._cls_output_name = None
|
|
23
|
-
|
|
24
|
-
@property
|
|
25
|
-
def cls_onnx_session(self):
|
|
26
|
-
if self._cls_onnx_session is None:
|
|
27
|
-
self._cls_onnx_session = self.get_onnx_session(self._args.cls_model_dir, self._args.use_gpu)
|
|
28
|
-
return self._cls_onnx_session
|
|
29
|
-
|
|
30
|
-
@property
|
|
31
|
-
def cls_input_name(self):
|
|
32
|
-
if self._cls_input_name is None:
|
|
33
|
-
self._cls_input_name = self.get_input_name(self.cls_onnx_session)
|
|
34
|
-
return self._cls_input_name
|
|
35
|
-
|
|
36
|
-
@property
|
|
37
|
-
def cls_output_name(self):
|
|
38
|
-
if self._cls_output_name is None:
|
|
39
|
-
self._cls_output_name = self.get_output_name(self.cls_onnx_session)
|
|
40
|
-
return self._cls_output_name
|
|
41
|
-
|
|
42
|
-
def resize_norm_img(self, img):
|
|
43
|
-
imgC, imgH, imgW = self.cls_image_shape
|
|
44
|
-
h = img.shape[0]
|
|
45
|
-
w = img.shape[1]
|
|
46
|
-
ratio = w / float(h)
|
|
47
|
-
if math.ceil(imgH * ratio) > imgW:
|
|
48
|
-
resized_w = imgW
|
|
49
|
-
else:
|
|
50
|
-
resized_w = int(math.ceil(imgH * ratio))
|
|
51
|
-
resized_image = cv2.resize(img, (resized_w, imgH))
|
|
52
|
-
resized_image = resized_image.astype("float32")
|
|
53
|
-
if self.cls_image_shape[0] == 1:
|
|
54
|
-
resized_image = resized_image / 255
|
|
55
|
-
resized_image = resized_image[np.newaxis, :]
|
|
56
|
-
else:
|
|
57
|
-
resized_image = resized_image.transpose((2, 0, 1)) / 255
|
|
58
|
-
resized_image -= 0.5
|
|
59
|
-
resized_image /= 0.5
|
|
60
|
-
padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
|
|
61
|
-
padding_im[:, :, 0:resized_w] = resized_image
|
|
62
|
-
return padding_im
|
|
63
|
-
|
|
64
|
-
def __call__(self, img_list):
|
|
65
|
-
img_list = copy.deepcopy(img_list)
|
|
66
|
-
img_num = len(img_list)
|
|
67
|
-
# Calculate the aspect ratio of all text bars
|
|
68
|
-
width_list = []
|
|
69
|
-
for img in img_list:
|
|
70
|
-
width_list.append(img.shape[1] / float(img.shape[0]))
|
|
71
|
-
# Sorting can speed up the cls process
|
|
72
|
-
indices = np.argsort(np.array(width_list))
|
|
73
|
-
|
|
74
|
-
cls_res = [["", 0.0]] * img_num
|
|
75
|
-
batch_num = self.cls_batch_num
|
|
76
|
-
|
|
77
|
-
for beg_img_no in range(0, img_num, batch_num):
|
|
78
|
-
|
|
79
|
-
end_img_no = min(img_num, beg_img_no + batch_num)
|
|
80
|
-
norm_img_batch = []
|
|
81
|
-
max_wh_ratio = 0
|
|
82
|
-
|
|
83
|
-
for ino in range(beg_img_no, end_img_no):
|
|
84
|
-
h, w = img_list[indices[ino]].shape[0:2]
|
|
85
|
-
wh_ratio = w * 1.0 / h
|
|
86
|
-
max_wh_ratio = max(max_wh_ratio, wh_ratio)
|
|
87
|
-
for ino in range(beg_img_no, end_img_no):
|
|
88
|
-
norm_img = self.resize_norm_img(img_list[indices[ino]])
|
|
89
|
-
norm_img = norm_img[np.newaxis, :]
|
|
90
|
-
norm_img_batch.append(norm_img)
|
|
91
|
-
norm_img_batch = np.concatenate(norm_img_batch)
|
|
92
|
-
norm_img_batch = norm_img_batch.copy()
|
|
93
|
-
|
|
94
|
-
input_feed = self.get_input_feed(self.cls_input_name, norm_img_batch)
|
|
95
|
-
outputs = self.cls_onnx_session.run(
|
|
96
|
-
self.cls_output_name, input_feed=input_feed
|
|
97
|
-
)
|
|
98
|
-
|
|
99
|
-
prob_out = outputs[0]
|
|
100
|
-
|
|
101
|
-
cls_result = self.postprocess_op(prob_out)
|
|
102
|
-
for rno in range(len(cls_result)):
|
|
103
|
-
label, score = cls_result[rno]
|
|
104
|
-
cls_res[indices[beg_img_no + rno]] = [label, score]
|
|
105
|
-
if "180" in label and score > self.cls_thresh:
|
|
106
|
-
img_list[indices[beg_img_no + rno]] = cv2.rotate(
|
|
107
|
-
img_list[indices[beg_img_no + rno]], 1
|
|
108
|
-
)
|
|
109
|
-
return img_list, cls_res
|