doc-page-extractor 0.1.1__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. doc_page_extractor/__init__.py +5 -14
  2. doc_page_extractor/check_env.py +40 -0
  3. doc_page_extractor/extractor.py +87 -212
  4. doc_page_extractor/model.py +97 -0
  5. doc_page_extractor/parser.py +51 -0
  6. doc_page_extractor/plot.py +52 -79
  7. doc_page_extractor/redacter.py +111 -0
  8. doc_page_extractor-1.0.2.dist-info/METADATA +120 -0
  9. doc_page_extractor-1.0.2.dist-info/RECORD +11 -0
  10. {doc_page_extractor-0.1.1.dist-info → doc_page_extractor-1.0.2.dist-info}/WHEEL +1 -2
  11. doc_page_extractor-1.0.2.dist-info/licenses/LICENSE +21 -0
  12. doc_page_extractor/clipper.py +0 -119
  13. doc_page_extractor/downloader.py +0 -16
  14. doc_page_extractor/latex.py +0 -57
  15. doc_page_extractor/layout_order.py +0 -240
  16. doc_page_extractor/layoutreader.py +0 -126
  17. doc_page_extractor/ocr.py +0 -175
  18. doc_page_extractor/ocr_corrector.py +0 -126
  19. doc_page_extractor/onnxocr/__init__.py +0 -1
  20. doc_page_extractor/onnxocr/cls_postprocess.py +0 -26
  21. doc_page_extractor/onnxocr/db_postprocess.py +0 -246
  22. doc_page_extractor/onnxocr/imaug.py +0 -32
  23. doc_page_extractor/onnxocr/operators.py +0 -187
  24. doc_page_extractor/onnxocr/predict_base.py +0 -52
  25. doc_page_extractor/onnxocr/predict_cls.py +0 -89
  26. doc_page_extractor/onnxocr/predict_det.py +0 -120
  27. doc_page_extractor/onnxocr/predict_rec.py +0 -321
  28. doc_page_extractor/onnxocr/predict_system.py +0 -97
  29. doc_page_extractor/onnxocr/rec_postprocess.py +0 -896
  30. doc_page_extractor/onnxocr/utils.py +0 -71
  31. doc_page_extractor/overlap.py +0 -167
  32. doc_page_extractor/raw_optimizer.py +0 -104
  33. doc_page_extractor/rectangle.py +0 -72
  34. doc_page_extractor/rotation.py +0 -158
  35. doc_page_extractor/struct_eqtable/__init__.py +0 -49
  36. doc_page_extractor/struct_eqtable/internvl/__init__.py +0 -2
  37. doc_page_extractor/struct_eqtable/internvl/conversation.py +0 -394
  38. doc_page_extractor/struct_eqtable/internvl/internvl.py +0 -198
  39. doc_page_extractor/struct_eqtable/internvl/internvl_lmdeploy.py +0 -81
  40. doc_page_extractor/struct_eqtable/pix2s/__init__.py +0 -3
  41. doc_page_extractor/struct_eqtable/pix2s/pix2s.py +0 -76
  42. doc_page_extractor/struct_eqtable/pix2s/pix2s_trt.py +0 -1047
  43. doc_page_extractor/table.py +0 -71
  44. doc_page_extractor/types.py +0 -67
  45. doc_page_extractor/utils.py +0 -32
  46. doc_page_extractor-0.1.1.dist-info/METADATA +0 -84
  47. doc_page_extractor-0.1.1.dist-info/RECORD +0 -44
  48. doc_page_extractor-0.1.1.dist-info/licenses/LICENSE +0 -661
  49. doc_page_extractor-0.1.1.dist-info/top_level.txt +0 -2
  50. tests/__init__.py +0 -0
  51. tests/test_history_bus.py +0 -55
@@ -1,246 +0,0 @@
1
- # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- """
15
- This code is refered from:
16
- https://github.com/WenmuZhou/DBNet.pytorch/blob/master/post_processing/seg_detector_representer.py
17
- """
18
- from __future__ import absolute_import
19
- from __future__ import division
20
- from __future__ import print_function
21
-
22
- import numpy as np
23
- import cv2
24
- # import paddle
25
- from shapely.geometry import Polygon
26
- import pyclipper
27
-
28
-
29
- class DBPostProcess(object):
30
- """
31
- The post process for Differentiable Binarization (DB).
32
- """
33
-
34
- def __init__(self,
35
- thresh=0.3,
36
- box_thresh=0.7,
37
- max_candidates=1000,
38
- unclip_ratio=2.0,
39
- use_dilation=False,
40
- score_mode="fast",
41
- box_type='quad',
42
- **kwargs):
43
- self.thresh = thresh
44
- self.box_thresh = box_thresh
45
- self.max_candidates = max_candidates
46
- self.unclip_ratio = unclip_ratio
47
- self.min_size = 3
48
- self.score_mode = score_mode
49
- self.box_type = box_type
50
- assert score_mode in [
51
- "slow", "fast"
52
- ], "Score mode must be in [slow, fast] but got: {}".format(score_mode)
53
-
54
- self.dilation_kernel = None if not use_dilation else np.array(
55
- [[1, 1], [1, 1]])
56
-
57
- def polygons_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
58
- '''
59
- _bitmap: single map with shape (1, H, W),
60
- whose values are binarized as {0, 1}
61
- '''
62
-
63
- bitmap = _bitmap
64
- height, width = bitmap.shape
65
-
66
- boxes = []
67
- scores = []
68
-
69
- contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8),
70
- cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
71
-
72
- for contour in contours[:self.max_candidates]:
73
- epsilon = 0.002 * cv2.arcLength(contour, True)
74
- approx = cv2.approxPolyDP(contour, epsilon, True)
75
- points = approx.reshape((-1, 2))
76
- if points.shape[0] < 4:
77
- continue
78
-
79
- score = self.box_score_fast(pred, points.reshape(-1, 2))
80
- if self.box_thresh > score:
81
- continue
82
-
83
- if points.shape[0] > 2:
84
- box = self.unclip(points, self.unclip_ratio)
85
- if len(box) > 1:
86
- continue
87
- else:
88
- continue
89
- box = box.reshape(-1, 2)
90
-
91
- _, sside = self.get_mini_boxes(box.reshape((-1, 1, 2)))
92
- if sside < self.min_size + 2:
93
- continue
94
-
95
- box = np.array(box)
96
- box[:, 0] = np.clip(
97
- np.round(box[:, 0] / width * dest_width), 0, dest_width)
98
- box[:, 1] = np.clip(
99
- np.round(box[:, 1] / height * dest_height), 0, dest_height)
100
- boxes.append(box.tolist())
101
- scores.append(score)
102
- return boxes, scores
103
-
104
- def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
105
- '''
106
- _bitmap: single map with shape (1, H, W),
107
- whose values are binarized as {0, 1}
108
- '''
109
-
110
- bitmap = _bitmap
111
- height, width = bitmap.shape
112
-
113
- outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST,
114
- cv2.CHAIN_APPROX_SIMPLE)
115
- if len(outs) == 3:
116
- img, contours, _ = outs[0], outs[1], outs[2]
117
- elif len(outs) == 2:
118
- contours, _ = outs[0], outs[1]
119
-
120
- num_contours = min(len(contours), self.max_candidates)
121
-
122
- boxes = []
123
- scores = []
124
- for index in range(num_contours):
125
- contour = contours[index]
126
- points, sside = self.get_mini_boxes(contour)
127
- if sside < self.min_size:
128
- continue
129
- points = np.array(points)
130
- if self.score_mode == "fast":
131
- score = self.box_score_fast(pred, points.reshape(-1, 2))
132
- else:
133
- score = self.box_score_slow(pred, contour)
134
- if self.box_thresh > score:
135
- continue
136
-
137
- box = self.unclip(points, self.unclip_ratio).reshape(-1, 1, 2)
138
- box, sside = self.get_mini_boxes(box)
139
- if sside < self.min_size + 2:
140
- continue
141
- box = np.array(box)
142
-
143
- box[:, 0] = np.clip(
144
- np.round(box[:, 0] / width * dest_width), 0, dest_width)
145
- box[:, 1] = np.clip(
146
- np.round(box[:, 1] / height * dest_height), 0, dest_height)
147
- boxes.append(box.astype("int32"))
148
- scores.append(score)
149
- return np.array(boxes, dtype="int32"), scores
150
-
151
- def unclip(self, box, unclip_ratio):
152
- poly = Polygon(box)
153
- distance = poly.area * unclip_ratio / poly.length
154
- offset = pyclipper.PyclipperOffset()
155
- offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
156
- expanded = np.array(offset.Execute(distance))
157
- return expanded
158
-
159
- def get_mini_boxes(self, contour):
160
- bounding_box = cv2.minAreaRect(contour)
161
- points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
162
-
163
- index_1, index_2, index_3, index_4 = 0, 1, 2, 3
164
- if points[1][1] > points[0][1]:
165
- index_1 = 0
166
- index_4 = 1
167
- else:
168
- index_1 = 1
169
- index_4 = 0
170
- if points[3][1] > points[2][1]:
171
- index_2 = 2
172
- index_3 = 3
173
- else:
174
- index_2 = 3
175
- index_3 = 2
176
-
177
- box = [
178
- points[index_1], points[index_2], points[index_3], points[index_4]
179
- ]
180
- return box, min(bounding_box[1])
181
-
182
- def box_score_fast(self, bitmap, _box):
183
- '''
184
- box_score_fast: use bbox mean score as the mean score
185
- '''
186
- h, w = bitmap.shape[:2]
187
- box = _box.copy()
188
- xmin = np.clip(np.floor(box[:, 0].min()).astype("int32"), 0, w - 1)
189
- xmax = np.clip(np.ceil(box[:, 0].max()).astype("int32"), 0, w - 1)
190
- ymin = np.clip(np.floor(box[:, 1].min()).astype("int32"), 0, h - 1)
191
- ymax = np.clip(np.ceil(box[:, 1].max()).astype("int32"), 0, h - 1)
192
-
193
- mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
194
- box[:, 0] = box[:, 0] - xmin
195
- box[:, 1] = box[:, 1] - ymin
196
- cv2.fillPoly(mask, box.reshape(1, -1, 2).astype("int32"), 1)
197
- return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
198
-
199
- def box_score_slow(self, bitmap, contour):
200
- '''
201
- box_score_slow: use polyon mean score as the mean score
202
- '''
203
- h, w = bitmap.shape[:2]
204
- contour = contour.copy()
205
- contour = np.reshape(contour, (-1, 2))
206
-
207
- xmin = np.clip(np.min(contour[:, 0]), 0, w - 1)
208
- xmax = np.clip(np.max(contour[:, 0]), 0, w - 1)
209
- ymin = np.clip(np.min(contour[:, 1]), 0, h - 1)
210
- ymax = np.clip(np.max(contour[:, 1]), 0, h - 1)
211
-
212
- mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
213
-
214
- contour[:, 0] = contour[:, 0] - xmin
215
- contour[:, 1] = contour[:, 1] - ymin
216
-
217
- cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype("int32"), 1)
218
- return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
219
-
220
- def __call__(self, outs_dict, shape_list):
221
- pred = outs_dict['maps']
222
- # if isinstance(pred, paddle.Tensor):
223
- # pred = pred.numpy()
224
- pred = pred[:, 0, :, :]
225
- segmentation = pred > self.thresh
226
-
227
- boxes_batch = []
228
- for batch_index in range(pred.shape[0]):
229
- src_h, src_w, ratio_h, ratio_w = shape_list[batch_index]
230
- if self.dilation_kernel is not None:
231
- mask = cv2.dilate(
232
- np.array(segmentation[batch_index]).astype(np.uint8),
233
- self.dilation_kernel)
234
- else:
235
- mask = segmentation[batch_index]
236
- if self.box_type == 'poly':
237
- boxes, scores = self.polygons_from_bitmap(pred[batch_index],
238
- mask, src_w, src_h)
239
- elif self.box_type == 'quad':
240
- boxes, scores = self.boxes_from_bitmap(pred[batch_index], mask,
241
- src_w, src_h)
242
- else:
243
- raise ValueError("box_type can only be one of ['quad', 'poly']")
244
-
245
- boxes_batch.append({'points': boxes})
246
- return boxes_batch
@@ -1,32 +0,0 @@
1
- from .operators import *
2
-
3
-
4
- def transform(data, ops=None):
5
- """transform"""
6
- if ops is None:
7
- ops = []
8
- for op in ops:
9
- data = op(data)
10
- if data is None:
11
- return None
12
- return data
13
-
14
-
15
- def create_operators(op_param_list, global_config=None):
16
- """
17
- create operators based on the config
18
-
19
- Args:
20
- params(list): a dict list, used to create some operators
21
- """
22
- assert isinstance(op_param_list, list), "operator config should be a list"
23
- ops = []
24
- for operator in op_param_list:
25
- assert isinstance(operator, dict) and len(operator) == 1, "yaml format error"
26
- op_name = list(operator)[0]
27
- param = {} if operator[op_name] is None else operator[op_name]
28
- if global_config is not None:
29
- param.update(global_config)
30
- op = eval(op_name)(**param)
31
- ops.append(op)
32
- return ops
@@ -1,187 +0,0 @@
1
- import numpy as np
2
- import cv2
3
- import sys
4
- import math
5
-
6
-
7
- class NormalizeImage(object):
8
- """ normalize image such as substract mean, divide std
9
- """
10
-
11
- def __init__(self, scale=None, mean=None, std=None, order='chw', **kwargs):
12
- if isinstance(scale, str):
13
- scale = eval(scale)
14
- self.scale = np.float32(scale if scale is not None else 1.0 / 255.0)
15
- mean = mean if mean is not None else [0.485, 0.456, 0.406]
16
- std = std if std is not None else [0.229, 0.224, 0.225]
17
-
18
- shape = (3, 1, 1) if order == 'chw' else (1, 1, 3)
19
- self.mean = np.array(mean).reshape(shape).astype('float32')
20
- self.std = np.array(std).reshape(shape).astype('float32')
21
-
22
- def __call__(self, data):
23
- img = data['image']
24
- from PIL import Image
25
- if isinstance(img, Image.Image):
26
- img = np.array(img)
27
- assert isinstance(img,
28
- np.ndarray), "invalid input 'img' in NormalizeImage"
29
- data['image'] = (
30
- img.astype('float32') * self.scale - self.mean) / self.std
31
- return data
32
-
33
-
34
- class DetResizeForTest(object):
35
- def __init__(self, **kwargs):
36
- super(DetResizeForTest, self).__init__()
37
- self.resize_type = 0
38
- self.keep_ratio = False
39
- if 'image_shape' in kwargs:
40
- self.image_shape = kwargs['image_shape']
41
- self.resize_type = 1
42
- if 'keep_ratio' in kwargs:
43
- self.keep_ratio = kwargs['keep_ratio']
44
- elif 'limit_side_len' in kwargs:
45
- self.limit_side_len = kwargs['limit_side_len']
46
- self.limit_type = kwargs.get('limit_type', 'min')
47
- elif 'resize_long' in kwargs:
48
- self.resize_type = 2
49
- self.resize_long = kwargs.get('resize_long', 960)
50
- else:
51
- self.limit_side_len = 736
52
- self.limit_type = 'min'
53
-
54
- def __call__(self, data):
55
- img = data['image']
56
- src_h, src_w, _ = img.shape
57
- if sum([src_h, src_w]) < 64:
58
- img = self.image_padding(img)
59
-
60
- if self.resize_type == 0:
61
- # img, shape = self.resize_image_type0(img)
62
- img, [ratio_h, ratio_w] = self.resize_image_type0(img)
63
- elif self.resize_type == 2:
64
- img, [ratio_h, ratio_w] = self.resize_image_type2(img)
65
- else:
66
- # img, shape = self.resize_image_type1(img)
67
- img, [ratio_h, ratio_w] = self.resize_image_type1(img)
68
- data['image'] = img
69
- data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w])
70
- return data
71
-
72
- def image_padding(self, im, value=0):
73
- h, w, c = im.shape
74
- im_pad = np.zeros((max(32, h), max(32, w), c), np.uint8) + value
75
- im_pad[:h, :w, :] = im
76
- return im_pad
77
-
78
- def resize_image_type1(self, img):
79
- resize_h, resize_w = self.image_shape
80
- ori_h, ori_w = img.shape[:2] # (h, w, c)
81
- if self.keep_ratio is True:
82
- resize_w = ori_w * resize_h / ori_h
83
- N = math.ceil(resize_w / 32)
84
- resize_w = N * 32
85
- ratio_h = float(resize_h) / ori_h
86
- ratio_w = float(resize_w) / ori_w
87
- img = cv2.resize(img, (int(resize_w), int(resize_h)))
88
- # return img, np.array([ori_h, ori_w])
89
- return img, [ratio_h, ratio_w]
90
-
91
- def resize_image_type0(self, img):
92
- """
93
- resize image to a size multiple of 32 which is required by the network
94
- args:
95
- img(array): array with shape [h, w, c]
96
- return(tuple):
97
- img, (ratio_h, ratio_w)
98
- """
99
- limit_side_len = self.limit_side_len
100
- h, w, c = img.shape
101
-
102
- # limit the max side
103
- if self.limit_type == 'max':
104
- if max(h, w) > limit_side_len:
105
- if h > w:
106
- ratio = float(limit_side_len) / h
107
- else:
108
- ratio = float(limit_side_len) / w
109
- else:
110
- ratio = 1.
111
- elif self.limit_type == 'min':
112
- if min(h, w) < limit_side_len:
113
- if h < w:
114
- ratio = float(limit_side_len) / h
115
- else:
116
- ratio = float(limit_side_len) / w
117
- else:
118
- ratio = 1.
119
- elif self.limit_type == 'resize_long':
120
- ratio = float(limit_side_len) / max(h, w)
121
- else:
122
- raise Exception('not support limit type, image ')
123
- resize_h = int(h * ratio)
124
- resize_w = int(w * ratio)
125
-
126
- resize_h = max(int(round(resize_h / 32) * 32), 32)
127
- resize_w = max(int(round(resize_w / 32) * 32), 32)
128
-
129
- try:
130
- if int(resize_w) <= 0 or int(resize_h) <= 0:
131
- return None, (None, None)
132
- img = cv2.resize(img, (int(resize_w), int(resize_h)))
133
- except:
134
- print(img.shape, resize_w, resize_h)
135
- sys.exit(0)
136
- ratio_h = resize_h / float(h)
137
- ratio_w = resize_w / float(w)
138
- return img, [ratio_h, ratio_w]
139
-
140
- def resize_image_type2(self, img):
141
- h, w, _ = img.shape
142
-
143
- resize_w = w
144
- resize_h = h
145
-
146
- if resize_h > resize_w:
147
- ratio = float(self.resize_long) / resize_h
148
- else:
149
- ratio = float(self.resize_long) / resize_w
150
-
151
- resize_h = int(resize_h * ratio)
152
- resize_w = int(resize_w * ratio)
153
-
154
- max_stride = 128
155
- resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
156
- resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
157
- img = cv2.resize(img, (int(resize_w), int(resize_h)))
158
- ratio_h = resize_h / float(h)
159
- ratio_w = resize_w / float(w)
160
-
161
- return img, [ratio_h, ratio_w]
162
-
163
- class ToCHWImage(object):
164
- """ convert hwc image to chw image
165
- """
166
-
167
- def __init__(self, **kwargs):
168
- pass
169
-
170
- def __call__(self, data):
171
- img = data['image']
172
- from PIL import Image
173
- if isinstance(img, Image.Image):
174
- img = np.array(img)
175
- data['image'] = img.transpose((2, 0, 1))
176
- return data
177
-
178
-
179
- class KeepKeys(object):
180
- def __init__(self, keep_keys, **kwargs):
181
- self.keep_keys = keep_keys
182
-
183
- def __call__(self, data):
184
- data_list = []
185
- for key in self.keep_keys:
186
- data_list.append(data[key])
187
- return data_list
@@ -1,52 +0,0 @@
1
- import onnxruntime
2
-
3
- class PredictBase(object):
4
- def __init__(self):
5
- pass
6
-
7
- def get_onnx_session(self, model_dir, use_gpu):
8
- # 使用gpu
9
- if use_gpu:
10
- providers = providers=['CUDAExecutionProvider']
11
- else:
12
- providers = providers = ['CPUExecutionProvider']
13
-
14
- onnx_session = onnxruntime.InferenceSession(model_dir, None,providers=providers)
15
-
16
- # print("providers:", onnxruntime.get_device())
17
- return onnx_session
18
-
19
-
20
- def get_output_name(self, onnx_session):
21
- """
22
- output_name = onnx_session.get_outputs()[0].name
23
- :param onnx_session:
24
- :return:
25
- """
26
- output_name = []
27
- for node in onnx_session.get_outputs():
28
- output_name.append(node.name)
29
- return output_name
30
-
31
- def get_input_name(self, onnx_session):
32
- """
33
- input_name = onnx_session.get_inputs()[0].name
34
- :param onnx_session:
35
- :return:
36
- """
37
- input_name = []
38
- for node in onnx_session.get_inputs():
39
- input_name.append(node.name)
40
- return input_name
41
-
42
- def get_input_feed(self, input_name, image_numpy):
43
- """
44
- input_feed={self.input_name: image_numpy}
45
- :param input_name:
46
- :param image_numpy:
47
- :return:
48
- """
49
- input_feed = {}
50
- for name in input_name:
51
- input_feed[name] = image_numpy
52
- return input_feed
@@ -1,89 +0,0 @@
1
- import cv2
2
- import copy
3
- import numpy as np
4
- import math
5
-
6
- from .cls_postprocess import ClsPostProcess
7
- from .predict_base import PredictBase
8
-
9
-
10
- class TextClassifier(PredictBase):
11
- def __init__(self, args):
12
- self.cls_image_shape = args.cls_image_shape
13
- self.cls_batch_num = args.cls_batch_num
14
- self.cls_thresh = args.cls_thresh
15
- self.postprocess_op = ClsPostProcess(label_list=args.label_list)
16
-
17
- # 初始化模型
18
- self.cls_onnx_session = self.get_onnx_session(args.cls_model_dir, args.use_gpu)
19
- self.cls_input_name = self.get_input_name(self.cls_onnx_session)
20
- self.cls_output_name = self.get_output_name(self.cls_onnx_session)
21
-
22
- def resize_norm_img(self, img):
23
- imgC, imgH, imgW = self.cls_image_shape
24
- h = img.shape[0]
25
- w = img.shape[1]
26
- ratio = w / float(h)
27
- if math.ceil(imgH * ratio) > imgW:
28
- resized_w = imgW
29
- else:
30
- resized_w = int(math.ceil(imgH * ratio))
31
- resized_image = cv2.resize(img, (resized_w, imgH))
32
- resized_image = resized_image.astype("float32")
33
- if self.cls_image_shape[0] == 1:
34
- resized_image = resized_image / 255
35
- resized_image = resized_image[np.newaxis, :]
36
- else:
37
- resized_image = resized_image.transpose((2, 0, 1)) / 255
38
- resized_image -= 0.5
39
- resized_image /= 0.5
40
- padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
41
- padding_im[:, :, 0:resized_w] = resized_image
42
- return padding_im
43
-
44
- def __call__(self, img_list):
45
- img_list = copy.deepcopy(img_list)
46
- img_num = len(img_list)
47
- # Calculate the aspect ratio of all text bars
48
- width_list = []
49
- for img in img_list:
50
- width_list.append(img.shape[1] / float(img.shape[0]))
51
- # Sorting can speed up the cls process
52
- indices = np.argsort(np.array(width_list))
53
-
54
- cls_res = [["", 0.0]] * img_num
55
- batch_num = self.cls_batch_num
56
-
57
- for beg_img_no in range(0, img_num, batch_num):
58
-
59
- end_img_no = min(img_num, beg_img_no + batch_num)
60
- norm_img_batch = []
61
- max_wh_ratio = 0
62
-
63
- for ino in range(beg_img_no, end_img_no):
64
- h, w = img_list[indices[ino]].shape[0:2]
65
- wh_ratio = w * 1.0 / h
66
- max_wh_ratio = max(max_wh_ratio, wh_ratio)
67
- for ino in range(beg_img_no, end_img_no):
68
- norm_img = self.resize_norm_img(img_list[indices[ino]])
69
- norm_img = norm_img[np.newaxis, :]
70
- norm_img_batch.append(norm_img)
71
- norm_img_batch = np.concatenate(norm_img_batch)
72
- norm_img_batch = norm_img_batch.copy()
73
-
74
- input_feed = self.get_input_feed(self.cls_input_name, norm_img_batch)
75
- outputs = self.cls_onnx_session.run(
76
- self.cls_output_name, input_feed=input_feed
77
- )
78
-
79
- prob_out = outputs[0]
80
-
81
- cls_result = self.postprocess_op(prob_out)
82
- for rno in range(len(cls_result)):
83
- label, score = cls_result[rno]
84
- cls_res[indices[beg_img_no + rno]] = [label, score]
85
- if "180" in label and score > self.cls_thresh:
86
- img_list[indices[beg_img_no + rno]] = cv2.rotate(
87
- img_list[indices[beg_img_no + rno]], 1
88
- )
89
- return img_list, cls_res