doc-page-extractor 0.2.0__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. doc_page_extractor/__init__.py +5 -15
  2. doc_page_extractor/check_env.py +40 -0
  3. doc_page_extractor/extractor.py +88 -215
  4. doc_page_extractor/model.py +97 -0
  5. doc_page_extractor/parser.py +51 -0
  6. doc_page_extractor/plot.py +52 -79
  7. doc_page_extractor/redacter.py +111 -0
  8. doc_page_extractor-1.0.2.dist-info/METADATA +120 -0
  9. doc_page_extractor-1.0.2.dist-info/RECORD +11 -0
  10. {doc_page_extractor-0.2.0.dist-info → doc_page_extractor-1.0.2.dist-info}/WHEEL +1 -2
  11. doc_page_extractor-1.0.2.dist-info/licenses/LICENSE +21 -0
  12. doc_page_extractor/clipper.py +0 -119
  13. doc_page_extractor/downloader.py +0 -16
  14. doc_page_extractor/latex.py +0 -31
  15. doc_page_extractor/layout_order.py +0 -237
  16. doc_page_extractor/layoutreader.py +0 -126
  17. doc_page_extractor/models.py +0 -92
  18. doc_page_extractor/ocr.py +0 -200
  19. doc_page_extractor/ocr_corrector.py +0 -126
  20. doc_page_extractor/onnxocr/__init__.py +0 -1
  21. doc_page_extractor/onnxocr/cls_postprocess.py +0 -26
  22. doc_page_extractor/onnxocr/db_postprocess.py +0 -246
  23. doc_page_extractor/onnxocr/imaug.py +0 -32
  24. doc_page_extractor/onnxocr/operators.py +0 -187
  25. doc_page_extractor/onnxocr/predict_base.py +0 -57
  26. doc_page_extractor/onnxocr/predict_cls.py +0 -109
  27. doc_page_extractor/onnxocr/predict_det.py +0 -139
  28. doc_page_extractor/onnxocr/predict_rec.py +0 -344
  29. doc_page_extractor/onnxocr/predict_system.py +0 -97
  30. doc_page_extractor/onnxocr/rec_postprocess.py +0 -896
  31. doc_page_extractor/onnxocr/utils.py +0 -71
  32. doc_page_extractor/overlap.py +0 -167
  33. doc_page_extractor/raw_optimizer.py +0 -104
  34. doc_page_extractor/rectangle.py +0 -72
  35. doc_page_extractor/rotation.py +0 -158
  36. doc_page_extractor/struct_eqtable/__init__.py +0 -49
  37. doc_page_extractor/struct_eqtable/internvl/__init__.py +0 -2
  38. doc_page_extractor/struct_eqtable/internvl/conversation.py +0 -394
  39. doc_page_extractor/struct_eqtable/internvl/internvl.py +0 -198
  40. doc_page_extractor/struct_eqtable/internvl/internvl_lmdeploy.py +0 -81
  41. doc_page_extractor/struct_eqtable/pix2s/__init__.py +0 -3
  42. doc_page_extractor/struct_eqtable/pix2s/pix2s.py +0 -76
  43. doc_page_extractor/struct_eqtable/pix2s/pix2s_trt.py +0 -1047
  44. doc_page_extractor/table.py +0 -70
  45. doc_page_extractor/types.py +0 -91
  46. doc_page_extractor/utils.py +0 -32
  47. doc_page_extractor-0.2.0.dist-info/METADATA +0 -85
  48. doc_page_extractor-0.2.0.dist-info/RECORD +0 -45
  49. doc_page_extractor-0.2.0.dist-info/licenses/LICENSE +0 -661
  50. doc_page_extractor-0.2.0.dist-info/top_level.txt +0 -2
  51. tests/__init__.py +0 -0
  52. tests/test_history_bus.py +0 -55
@@ -1,246 +0,0 @@
1
- # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- """
15
- This code is refered from:
16
- https://github.com/WenmuZhou/DBNet.pytorch/blob/master/post_processing/seg_detector_representer.py
17
- """
18
- from __future__ import absolute_import
19
- from __future__ import division
20
- from __future__ import print_function
21
-
22
- import numpy as np
23
- import cv2
24
- # import paddle
25
- from shapely.geometry import Polygon
26
- import pyclipper
27
-
28
-
29
- class DBPostProcess(object):
30
- """
31
- The post process for Differentiable Binarization (DB).
32
- """
33
-
34
- def __init__(self,
35
- thresh=0.3,
36
- box_thresh=0.7,
37
- max_candidates=1000,
38
- unclip_ratio=2.0,
39
- use_dilation=False,
40
- score_mode="fast",
41
- box_type='quad',
42
- **kwargs):
43
- self.thresh = thresh
44
- self.box_thresh = box_thresh
45
- self.max_candidates = max_candidates
46
- self.unclip_ratio = unclip_ratio
47
- self.min_size = 3
48
- self.score_mode = score_mode
49
- self.box_type = box_type
50
- assert score_mode in [
51
- "slow", "fast"
52
- ], "Score mode must be in [slow, fast] but got: {}".format(score_mode)
53
-
54
- self.dilation_kernel = None if not use_dilation else np.array(
55
- [[1, 1], [1, 1]])
56
-
57
- def polygons_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
58
- '''
59
- _bitmap: single map with shape (1, H, W),
60
- whose values are binarized as {0, 1}
61
- '''
62
-
63
- bitmap = _bitmap
64
- height, width = bitmap.shape
65
-
66
- boxes = []
67
- scores = []
68
-
69
- contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8),
70
- cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
71
-
72
- for contour in contours[:self.max_candidates]:
73
- epsilon = 0.002 * cv2.arcLength(contour, True)
74
- approx = cv2.approxPolyDP(contour, epsilon, True)
75
- points = approx.reshape((-1, 2))
76
- if points.shape[0] < 4:
77
- continue
78
-
79
- score = self.box_score_fast(pred, points.reshape(-1, 2))
80
- if self.box_thresh > score:
81
- continue
82
-
83
- if points.shape[0] > 2:
84
- box = self.unclip(points, self.unclip_ratio)
85
- if len(box) > 1:
86
- continue
87
- else:
88
- continue
89
- box = box.reshape(-1, 2)
90
-
91
- _, sside = self.get_mini_boxes(box.reshape((-1, 1, 2)))
92
- if sside < self.min_size + 2:
93
- continue
94
-
95
- box = np.array(box)
96
- box[:, 0] = np.clip(
97
- np.round(box[:, 0] / width * dest_width), 0, dest_width)
98
- box[:, 1] = np.clip(
99
- np.round(box[:, 1] / height * dest_height), 0, dest_height)
100
- boxes.append(box.tolist())
101
- scores.append(score)
102
- return boxes, scores
103
-
104
- def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
105
- '''
106
- _bitmap: single map with shape (1, H, W),
107
- whose values are binarized as {0, 1}
108
- '''
109
-
110
- bitmap = _bitmap
111
- height, width = bitmap.shape
112
-
113
- outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST,
114
- cv2.CHAIN_APPROX_SIMPLE)
115
- if len(outs) == 3:
116
- img, contours, _ = outs[0], outs[1], outs[2]
117
- elif len(outs) == 2:
118
- contours, _ = outs[0], outs[1]
119
-
120
- num_contours = min(len(contours), self.max_candidates)
121
-
122
- boxes = []
123
- scores = []
124
- for index in range(num_contours):
125
- contour = contours[index]
126
- points, sside = self.get_mini_boxes(contour)
127
- if sside < self.min_size:
128
- continue
129
- points = np.array(points)
130
- if self.score_mode == "fast":
131
- score = self.box_score_fast(pred, points.reshape(-1, 2))
132
- else:
133
- score = self.box_score_slow(pred, contour)
134
- if self.box_thresh > score:
135
- continue
136
-
137
- box = self.unclip(points, self.unclip_ratio).reshape(-1, 1, 2)
138
- box, sside = self.get_mini_boxes(box)
139
- if sside < self.min_size + 2:
140
- continue
141
- box = np.array(box)
142
-
143
- box[:, 0] = np.clip(
144
- np.round(box[:, 0] / width * dest_width), 0, dest_width)
145
- box[:, 1] = np.clip(
146
- np.round(box[:, 1] / height * dest_height), 0, dest_height)
147
- boxes.append(box.astype("int32"))
148
- scores.append(score)
149
- return np.array(boxes, dtype="int32"), scores
150
-
151
- def unclip(self, box, unclip_ratio):
152
- poly = Polygon(box)
153
- distance = poly.area * unclip_ratio / poly.length
154
- offset = pyclipper.PyclipperOffset()
155
- offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
156
- expanded = np.array(offset.Execute(distance))
157
- return expanded
158
-
159
- def get_mini_boxes(self, contour):
160
- bounding_box = cv2.minAreaRect(contour)
161
- points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
162
-
163
- index_1, index_2, index_3, index_4 = 0, 1, 2, 3
164
- if points[1][1] > points[0][1]:
165
- index_1 = 0
166
- index_4 = 1
167
- else:
168
- index_1 = 1
169
- index_4 = 0
170
- if points[3][1] > points[2][1]:
171
- index_2 = 2
172
- index_3 = 3
173
- else:
174
- index_2 = 3
175
- index_3 = 2
176
-
177
- box = [
178
- points[index_1], points[index_2], points[index_3], points[index_4]
179
- ]
180
- return box, min(bounding_box[1])
181
-
182
- def box_score_fast(self, bitmap, _box):
183
- '''
184
- box_score_fast: use bbox mean score as the mean score
185
- '''
186
- h, w = bitmap.shape[:2]
187
- box = _box.copy()
188
- xmin = np.clip(np.floor(box[:, 0].min()).astype("int32"), 0, w - 1)
189
- xmax = np.clip(np.ceil(box[:, 0].max()).astype("int32"), 0, w - 1)
190
- ymin = np.clip(np.floor(box[:, 1].min()).astype("int32"), 0, h - 1)
191
- ymax = np.clip(np.ceil(box[:, 1].max()).astype("int32"), 0, h - 1)
192
-
193
- mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
194
- box[:, 0] = box[:, 0] - xmin
195
- box[:, 1] = box[:, 1] - ymin
196
- cv2.fillPoly(mask, box.reshape(1, -1, 2).astype("int32"), 1)
197
- return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
198
-
199
- def box_score_slow(self, bitmap, contour):
200
- '''
201
- box_score_slow: use polyon mean score as the mean score
202
- '''
203
- h, w = bitmap.shape[:2]
204
- contour = contour.copy()
205
- contour = np.reshape(contour, (-1, 2))
206
-
207
- xmin = np.clip(np.min(contour[:, 0]), 0, w - 1)
208
- xmax = np.clip(np.max(contour[:, 0]), 0, w - 1)
209
- ymin = np.clip(np.min(contour[:, 1]), 0, h - 1)
210
- ymax = np.clip(np.max(contour[:, 1]), 0, h - 1)
211
-
212
- mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
213
-
214
- contour[:, 0] = contour[:, 0] - xmin
215
- contour[:, 1] = contour[:, 1] - ymin
216
-
217
- cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype("int32"), 1)
218
- return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
219
-
220
- def __call__(self, outs_dict, shape_list):
221
- pred = outs_dict['maps']
222
- # if isinstance(pred, paddle.Tensor):
223
- # pred = pred.numpy()
224
- pred = pred[:, 0, :, :]
225
- segmentation = pred > self.thresh
226
-
227
- boxes_batch = []
228
- for batch_index in range(pred.shape[0]):
229
- src_h, src_w, ratio_h, ratio_w = shape_list[batch_index]
230
- if self.dilation_kernel is not None:
231
- mask = cv2.dilate(
232
- np.array(segmentation[batch_index]).astype(np.uint8),
233
- self.dilation_kernel)
234
- else:
235
- mask = segmentation[batch_index]
236
- if self.box_type == 'poly':
237
- boxes, scores = self.polygons_from_bitmap(pred[batch_index],
238
- mask, src_w, src_h)
239
- elif self.box_type == 'quad':
240
- boxes, scores = self.boxes_from_bitmap(pred[batch_index], mask,
241
- src_w, src_h)
242
- else:
243
- raise ValueError("box_type can only be one of ['quad', 'poly']")
244
-
245
- boxes_batch.append({'points': boxes})
246
- return boxes_batch
@@ -1,32 +0,0 @@
1
- from .operators import *
2
-
3
-
4
- def transform(data, ops=None):
5
- """transform"""
6
- if ops is None:
7
- ops = []
8
- for op in ops:
9
- data = op(data)
10
- if data is None:
11
- return None
12
- return data
13
-
14
-
15
- def create_operators(op_param_list, global_config=None):
16
- """
17
- create operators based on the config
18
-
19
- Args:
20
- params(list): a dict list, used to create some operators
21
- """
22
- assert isinstance(op_param_list, list), "operator config should be a list"
23
- ops = []
24
- for operator in op_param_list:
25
- assert isinstance(operator, dict) and len(operator) == 1, "yaml format error"
26
- op_name = list(operator)[0]
27
- param = {} if operator[op_name] is None else operator[op_name]
28
- if global_config is not None:
29
- param.update(global_config)
30
- op = eval(op_name)(**param)
31
- ops.append(op)
32
- return ops
@@ -1,187 +0,0 @@
1
- import numpy as np
2
- import cv2
3
- import sys
4
- import math
5
-
6
-
7
- class NormalizeImage(object):
8
- """ normalize image such as substract mean, divide std
9
- """
10
-
11
- def __init__(self, scale=None, mean=None, std=None, order='chw', **kwargs):
12
- if isinstance(scale, str):
13
- scale = eval(scale)
14
- self.scale = np.float32(scale if scale is not None else 1.0 / 255.0)
15
- mean = mean if mean is not None else [0.485, 0.456, 0.406]
16
- std = std if std is not None else [0.229, 0.224, 0.225]
17
-
18
- shape = (3, 1, 1) if order == 'chw' else (1, 1, 3)
19
- self.mean = np.array(mean).reshape(shape).astype('float32')
20
- self.std = np.array(std).reshape(shape).astype('float32')
21
-
22
- def __call__(self, data):
23
- img = data['image']
24
- from PIL import Image
25
- if isinstance(img, Image.Image):
26
- img = np.array(img)
27
- assert isinstance(img,
28
- np.ndarray), "invalid input 'img' in NormalizeImage"
29
- data['image'] = (
30
- img.astype('float32') * self.scale - self.mean) / self.std
31
- return data
32
-
33
-
34
- class DetResizeForTest(object):
35
- def __init__(self, **kwargs):
36
- super(DetResizeForTest, self).__init__()
37
- self.resize_type = 0
38
- self.keep_ratio = False
39
- if 'image_shape' in kwargs:
40
- self.image_shape = kwargs['image_shape']
41
- self.resize_type = 1
42
- if 'keep_ratio' in kwargs:
43
- self.keep_ratio = kwargs['keep_ratio']
44
- elif 'limit_side_len' in kwargs:
45
- self.limit_side_len = kwargs['limit_side_len']
46
- self.limit_type = kwargs.get('limit_type', 'min')
47
- elif 'resize_long' in kwargs:
48
- self.resize_type = 2
49
- self.resize_long = kwargs.get('resize_long', 960)
50
- else:
51
- self.limit_side_len = 736
52
- self.limit_type = 'min'
53
-
54
- def __call__(self, data):
55
- img = data['image']
56
- src_h, src_w, _ = img.shape
57
- if sum([src_h, src_w]) < 64:
58
- img = self.image_padding(img)
59
-
60
- if self.resize_type == 0:
61
- # img, shape = self.resize_image_type0(img)
62
- img, [ratio_h, ratio_w] = self.resize_image_type0(img)
63
- elif self.resize_type == 2:
64
- img, [ratio_h, ratio_w] = self.resize_image_type2(img)
65
- else:
66
- # img, shape = self.resize_image_type1(img)
67
- img, [ratio_h, ratio_w] = self.resize_image_type1(img)
68
- data['image'] = img
69
- data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w])
70
- return data
71
-
72
- def image_padding(self, im, value=0):
73
- h, w, c = im.shape
74
- im_pad = np.zeros((max(32, h), max(32, w), c), np.uint8) + value
75
- im_pad[:h, :w, :] = im
76
- return im_pad
77
-
78
- def resize_image_type1(self, img):
79
- resize_h, resize_w = self.image_shape
80
- ori_h, ori_w = img.shape[:2] # (h, w, c)
81
- if self.keep_ratio is True:
82
- resize_w = ori_w * resize_h / ori_h
83
- N = math.ceil(resize_w / 32)
84
- resize_w = N * 32
85
- ratio_h = float(resize_h) / ori_h
86
- ratio_w = float(resize_w) / ori_w
87
- img = cv2.resize(img, (int(resize_w), int(resize_h)))
88
- # return img, np.array([ori_h, ori_w])
89
- return img, [ratio_h, ratio_w]
90
-
91
- def resize_image_type0(self, img):
92
- """
93
- resize image to a size multiple of 32 which is required by the network
94
- args:
95
- img(array): array with shape [h, w, c]
96
- return(tuple):
97
- img, (ratio_h, ratio_w)
98
- """
99
- limit_side_len = self.limit_side_len
100
- h, w, c = img.shape
101
-
102
- # limit the max side
103
- if self.limit_type == 'max':
104
- if max(h, w) > limit_side_len:
105
- if h > w:
106
- ratio = float(limit_side_len) / h
107
- else:
108
- ratio = float(limit_side_len) / w
109
- else:
110
- ratio = 1.
111
- elif self.limit_type == 'min':
112
- if min(h, w) < limit_side_len:
113
- if h < w:
114
- ratio = float(limit_side_len) / h
115
- else:
116
- ratio = float(limit_side_len) / w
117
- else:
118
- ratio = 1.
119
- elif self.limit_type == 'resize_long':
120
- ratio = float(limit_side_len) / max(h, w)
121
- else:
122
- raise Exception('not support limit type, image ')
123
- resize_h = int(h * ratio)
124
- resize_w = int(w * ratio)
125
-
126
- resize_h = max(int(round(resize_h / 32) * 32), 32)
127
- resize_w = max(int(round(resize_w / 32) * 32), 32)
128
-
129
- try:
130
- if int(resize_w) <= 0 or int(resize_h) <= 0:
131
- return None, (None, None)
132
- img = cv2.resize(img, (int(resize_w), int(resize_h)))
133
- except:
134
- print(img.shape, resize_w, resize_h)
135
- sys.exit(0)
136
- ratio_h = resize_h / float(h)
137
- ratio_w = resize_w / float(w)
138
- return img, [ratio_h, ratio_w]
139
-
140
- def resize_image_type2(self, img):
141
- h, w, _ = img.shape
142
-
143
- resize_w = w
144
- resize_h = h
145
-
146
- if resize_h > resize_w:
147
- ratio = float(self.resize_long) / resize_h
148
- else:
149
- ratio = float(self.resize_long) / resize_w
150
-
151
- resize_h = int(resize_h * ratio)
152
- resize_w = int(resize_w * ratio)
153
-
154
- max_stride = 128
155
- resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
156
- resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
157
- img = cv2.resize(img, (int(resize_w), int(resize_h)))
158
- ratio_h = resize_h / float(h)
159
- ratio_w = resize_w / float(w)
160
-
161
- return img, [ratio_h, ratio_w]
162
-
163
- class ToCHWImage(object):
164
- """ convert hwc image to chw image
165
- """
166
-
167
- def __init__(self, **kwargs):
168
- pass
169
-
170
- def __call__(self, data):
171
- img = data['image']
172
- from PIL import Image
173
- if isinstance(img, Image.Image):
174
- img = np.array(img)
175
- data['image'] = img.transpose((2, 0, 1))
176
- return data
177
-
178
-
179
- class KeepKeys(object):
180
- def __init__(self, keep_keys, **kwargs):
181
- self.keep_keys = keep_keys
182
-
183
- def __call__(self, data):
184
- data_list = []
185
- for key in self.keep_keys:
186
- data_list.append(data[key])
187
- return data_list
@@ -1,57 +0,0 @@
1
- class PredictBase(object):
2
- def __init__(self):
3
- self._onnxruntime = None
4
-
5
- @property
6
- def onnxruntime(self):
7
- if self._onnxruntime is None:
8
- import onnxruntime
9
- self._onnxruntime = onnxruntime
10
- return self._onnxruntime
11
-
12
- def get_onnx_session(self, model_dir, use_gpu):
13
- # 使用gpu
14
- if use_gpu:
15
- providers = providers=['CUDAExecutionProvider']
16
- else:
17
- providers = providers = ['CPUExecutionProvider']
18
-
19
- onnx_session = self.onnxruntime.InferenceSession(model_dir, None, providers=providers)
20
-
21
- # print("providers:", onnxruntime.get_device())
22
- return onnx_session
23
-
24
-
25
- def get_output_name(self, onnx_session):
26
- """
27
- output_name = onnx_session.get_outputs()[0].name
28
- :param onnx_session:
29
- :return:
30
- """
31
- output_name = []
32
- for node in onnx_session.get_outputs():
33
- output_name.append(node.name)
34
- return output_name
35
-
36
- def get_input_name(self, onnx_session):
37
- """
38
- input_name = onnx_session.get_inputs()[0].name
39
- :param onnx_session:
40
- :return:
41
- """
42
- input_name = []
43
- for node in onnx_session.get_inputs():
44
- input_name.append(node.name)
45
- return input_name
46
-
47
- def get_input_feed(self, input_name, image_numpy):
48
- """
49
- input_feed={self.input_name: image_numpy}
50
- :param input_name:
51
- :param image_numpy:
52
- :return:
53
- """
54
- input_feed = {}
55
- for name in input_name:
56
- input_feed[name] = image_numpy
57
- return input_feed
@@ -1,109 +0,0 @@
1
- import cv2
2
- import copy
3
- import numpy as np
4
- import math
5
-
6
- from .cls_postprocess import ClsPostProcess
7
- from .predict_base import PredictBase
8
-
9
-
10
- class TextClassifier(PredictBase):
11
- def __init__(self, args):
12
- super().__init__()
13
- self.cls_image_shape = args.cls_image_shape
14
- self.cls_batch_num = args.cls_batch_num
15
- self.cls_thresh = args.cls_thresh
16
- self.postprocess_op = ClsPostProcess(label_list=args.label_list)
17
- self._args = args
18
-
19
- # 初始化模型
20
- self._cls_onnx_session = None
21
- self._cls_input_name = None
22
- self._cls_output_name = None
23
-
24
- @property
25
- def cls_onnx_session(self):
26
- if self._cls_onnx_session is None:
27
- self._cls_onnx_session = self.get_onnx_session(self._args.cls_model_dir, self._args.use_gpu)
28
- return self._cls_onnx_session
29
-
30
- @property
31
- def cls_input_name(self):
32
- if self._cls_input_name is None:
33
- self._cls_input_name = self.get_input_name(self.cls_onnx_session)
34
- return self._cls_input_name
35
-
36
- @property
37
- def cls_output_name(self):
38
- if self._cls_output_name is None:
39
- self._cls_output_name = self.get_output_name(self.cls_onnx_session)
40
- return self._cls_output_name
41
-
42
- def resize_norm_img(self, img):
43
- imgC, imgH, imgW = self.cls_image_shape
44
- h = img.shape[0]
45
- w = img.shape[1]
46
- ratio = w / float(h)
47
- if math.ceil(imgH * ratio) > imgW:
48
- resized_w = imgW
49
- else:
50
- resized_w = int(math.ceil(imgH * ratio))
51
- resized_image = cv2.resize(img, (resized_w, imgH))
52
- resized_image = resized_image.astype("float32")
53
- if self.cls_image_shape[0] == 1:
54
- resized_image = resized_image / 255
55
- resized_image = resized_image[np.newaxis, :]
56
- else:
57
- resized_image = resized_image.transpose((2, 0, 1)) / 255
58
- resized_image -= 0.5
59
- resized_image /= 0.5
60
- padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
61
- padding_im[:, :, 0:resized_w] = resized_image
62
- return padding_im
63
-
64
- def __call__(self, img_list):
65
- img_list = copy.deepcopy(img_list)
66
- img_num = len(img_list)
67
- # Calculate the aspect ratio of all text bars
68
- width_list = []
69
- for img in img_list:
70
- width_list.append(img.shape[1] / float(img.shape[0]))
71
- # Sorting can speed up the cls process
72
- indices = np.argsort(np.array(width_list))
73
-
74
- cls_res = [["", 0.0]] * img_num
75
- batch_num = self.cls_batch_num
76
-
77
- for beg_img_no in range(0, img_num, batch_num):
78
-
79
- end_img_no = min(img_num, beg_img_no + batch_num)
80
- norm_img_batch = []
81
- max_wh_ratio = 0
82
-
83
- for ino in range(beg_img_no, end_img_no):
84
- h, w = img_list[indices[ino]].shape[0:2]
85
- wh_ratio = w * 1.0 / h
86
- max_wh_ratio = max(max_wh_ratio, wh_ratio)
87
- for ino in range(beg_img_no, end_img_no):
88
- norm_img = self.resize_norm_img(img_list[indices[ino]])
89
- norm_img = norm_img[np.newaxis, :]
90
- norm_img_batch.append(norm_img)
91
- norm_img_batch = np.concatenate(norm_img_batch)
92
- norm_img_batch = norm_img_batch.copy()
93
-
94
- input_feed = self.get_input_feed(self.cls_input_name, norm_img_batch)
95
- outputs = self.cls_onnx_session.run(
96
- self.cls_output_name, input_feed=input_feed
97
- )
98
-
99
- prob_out = outputs[0]
100
-
101
- cls_result = self.postprocess_op(prob_out)
102
- for rno in range(len(cls_result)):
103
- label, score = cls_result[rno]
104
- cls_res[indices[beg_img_no + rno]] = [label, score]
105
- if "180" in label and score > self.cls_thresh:
106
- img_list[indices[beg_img_no + rno]] = cv2.rotate(
107
- img_list[indices[beg_img_no + rno]], 1
108
- )
109
- return img_list, cls_res