magic-pdf 1.2.2__py3-none-any.whl → 1.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. magic_pdf/data/batch_build_dataset.py +156 -0
  2. magic_pdf/data/dataset.py +56 -25
  3. magic_pdf/data/utils.py +108 -9
  4. magic_pdf/dict2md/ocr_mkcontent.py +4 -3
  5. magic_pdf/libs/pdf_image_tools.py +11 -6
  6. magic_pdf/libs/performance_stats.py +12 -1
  7. magic_pdf/libs/version.py +1 -1
  8. magic_pdf/model/batch_analyze.py +175 -201
  9. magic_pdf/model/doc_analyze_by_custom_model.py +142 -92
  10. magic_pdf/model/pdf_extract_kit.py +5 -38
  11. magic_pdf/model/sub_modules/language_detection/utils.py +2 -4
  12. magic_pdf/model/sub_modules/language_detection/yolov11/YOLOv11.py +24 -19
  13. magic_pdf/model/sub_modules/layout/doclayout_yolo/DocLayoutYOLO.py +3 -1
  14. magic_pdf/model/sub_modules/mfd/yolov8/YOLOv8.py +3 -1
  15. magic_pdf/model/sub_modules/mfr/unimernet/Unimernet.py +31 -102
  16. magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/__init__.py +13 -0
  17. magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/modeling_unimernet.py +189 -0
  18. magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/__init__.py +8 -0
  19. magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/configuration_unimer_mbart.py +163 -0
  20. magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/modeling_unimer_mbart.py +2351 -0
  21. magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/__init__.py +9 -0
  22. magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/configuration_unimer_swin.py +132 -0
  23. magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/image_processing_unimer_swin.py +132 -0
  24. magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/modeling_unimer_swin.py +1084 -0
  25. magic_pdf/model/sub_modules/model_init.py +50 -37
  26. magic_pdf/model/sub_modules/model_utils.py +18 -12
  27. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/__init__.py +1 -0
  28. magic_pdf/model/sub_modules/ocr/{paddleocr → paddleocr2pytorch}/ocr_utils.py +102 -97
  29. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorch_paddle.py +193 -0
  30. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/base_ocr_v20.py +39 -0
  31. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/__init__.py +8 -0
  32. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/imaug/__init__.py +48 -0
  33. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/imaug/operators.py +418 -0
  34. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/__init__.py +25 -0
  35. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/base_model.py +105 -0
  36. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/__init__.py +62 -0
  37. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/det_mobilenet_v3.py +269 -0
  38. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_hgnet.py +290 -0
  39. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_lcnetv3.py +516 -0
  40. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mobilenet_v3.py +136 -0
  41. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mv1_enhance.py +234 -0
  42. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_svtrnet.py +638 -0
  43. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/common.py +76 -0
  44. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/__init__.py +43 -0
  45. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/cls_head.py +23 -0
  46. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/det_db_head.py +109 -0
  47. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_ctc_head.py +54 -0
  48. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_multi_head.py +58 -0
  49. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/__init__.py +29 -0
  50. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/db_fpn.py +456 -0
  51. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/intracl.py +117 -0
  52. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/rnn.py +228 -0
  53. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/__init__.py +33 -0
  54. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/cls_postprocess.py +20 -0
  55. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/db_postprocess.py +179 -0
  56. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/rec_postprocess.py +690 -0
  57. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/__init__.py +0 -0
  58. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/arch_config.yaml +383 -0
  59. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/arabic_dict.txt +162 -0
  60. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/chinese_cht_dict.txt +8421 -0
  61. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/cyrillic_dict.txt +163 -0
  62. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/devanagari_dict.txt +167 -0
  63. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/en_dict.txt +95 -0
  64. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/japan_dict.txt +4399 -0
  65. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ka_dict.txt +153 -0
  66. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/korean_dict.txt +3688 -0
  67. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/latin_dict.txt +185 -0
  68. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocr_keys_v1.txt +6623 -0
  69. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ta_dict.txt +128 -0
  70. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/te_dict.txt +151 -0
  71. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/models_config.yml +49 -0
  72. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/__init__.py +1 -0
  73. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/__init__.py +1 -0
  74. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_cls.py +106 -0
  75. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_det.py +217 -0
  76. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_rec.py +440 -0
  77. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_system.py +104 -0
  78. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/pytorchocr_utility.py +227 -0
  79. magic_pdf/model/sub_modules/table/rapidtable/rapid_table.py +15 -19
  80. magic_pdf/pdf_parse_union_core_v2.py +112 -74
  81. magic_pdf/pre_proc/ocr_dict_merge.py +9 -1
  82. magic_pdf/pre_proc/ocr_span_list_modify.py +51 -0
  83. magic_pdf/resources/model_config/model_configs.yaml +1 -1
  84. magic_pdf/resources/slanet_plus/slanet-plus.onnx +0 -0
  85. magic_pdf/tools/cli.py +30 -12
  86. magic_pdf/tools/common.py +90 -12
  87. {magic_pdf-1.2.2.dist-info → magic_pdf-1.3.1.dist-info}/METADATA +92 -59
  88. magic_pdf-1.3.1.dist-info/RECORD +203 -0
  89. {magic_pdf-1.2.2.dist-info → magic_pdf-1.3.1.dist-info}/WHEEL +1 -1
  90. magic_pdf/model/sub_modules/ocr/paddleocr/ppocr_273_mod.py +0 -204
  91. magic_pdf/model/sub_modules/ocr/paddleocr/ppocr_291_mod.py +0 -213
  92. magic_pdf/model/sub_modules/table/structeqtable/struct_eqtable.py +0 -37
  93. magic_pdf/model/sub_modules/table/tablemaster/tablemaster_paddle.py +0 -71
  94. magic_pdf/resources/model_config/UniMERNet/demo.yaml +0 -46
  95. magic_pdf/resources/model_config/layoutlmv3/layoutlmv3_base_inference.yaml +0 -351
  96. magic_pdf-1.2.2.dist-info/RECORD +0 -147
  97. /magic_pdf/model/sub_modules/{ocr/paddleocr/__init__.py → mfr/unimernet/unimernet_hf/unimer_mbart/tokenization_unimer_mbart.py} +0 -0
  98. /magic_pdf/model/sub_modules/{table/structeqtable → ocr/paddleocr2pytorch/pytorchocr}/__init__.py +0 -0
  99. /magic_pdf/model/sub_modules/{table/tablemaster → ocr/paddleocr2pytorch/pytorchocr/modeling}/__init__.py +0 -0
  100. {magic_pdf-1.2.2.dist-info → magic_pdf-1.3.1.dist-info}/LICENSE.md +0 -0
  101. {magic_pdf-1.2.2.dist-info → magic_pdf-1.3.1.dist-info}/entry_points.txt +0 -0
  102. {magic_pdf-1.2.2.dist-info → magic_pdf-1.3.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,516 @@
1
+ # copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from __future__ import absolute_import, division, print_function
16
+
17
+ import torch
18
+ import torch.nn.functional as F
19
+ from torch import nn
20
+
21
+ from ..common import Activation
22
+
23
+ NET_CONFIG_det = {
24
+ "blocks2":
25
+ # k, in_c, out_c, s, use_se
26
+ [[3, 16, 32, 1, False]],
27
+ "blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]],
28
+ "blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]],
29
+ "blocks5": [
30
+ [3, 128, 256, 2, False],
31
+ [5, 256, 256, 1, False],
32
+ [5, 256, 256, 1, False],
33
+ [5, 256, 256, 1, False],
34
+ [5, 256, 256, 1, False],
35
+ ],
36
+ "blocks6": [
37
+ [5, 256, 512, 2, True],
38
+ [5, 512, 512, 1, True],
39
+ [5, 512, 512, 1, False],
40
+ [5, 512, 512, 1, False],
41
+ ],
42
+ }
43
+
44
+ NET_CONFIG_rec = {
45
+ "blocks2":
46
+ # k, in_c, out_c, s, use_se
47
+ [[3, 16, 32, 1, False]],
48
+ "blocks3": [[3, 32, 64, 1, False], [3, 64, 64, 1, False]],
49
+ "blocks4": [[3, 64, 128, (2, 1), False], [3, 128, 128, 1, False]],
50
+ "blocks5": [
51
+ [3, 128, 256, (1, 2), False],
52
+ [5, 256, 256, 1, False],
53
+ [5, 256, 256, 1, False],
54
+ [5, 256, 256, 1, False],
55
+ [5, 256, 256, 1, False],
56
+ ],
57
+ "blocks6": [
58
+ [5, 256, 512, (2, 1), True],
59
+ [5, 512, 512, 1, True],
60
+ [5, 512, 512, (2, 1), False],
61
+ [5, 512, 512, 1, False],
62
+ ],
63
+ }
64
+
65
+
66
+ def make_divisible(v, divisor=16, min_value=None):
67
+ if min_value is None:
68
+ min_value = divisor
69
+ new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
70
+ if new_v < 0.9 * v:
71
+ new_v += divisor
72
+ return new_v
73
+
74
+
75
+ class LearnableAffineBlock(nn.Module):
76
+ def __init__(self, scale_value=1.0, bias_value=0.0, lr_mult=1.0, lab_lr=0.1):
77
+ super().__init__()
78
+ self.scale = nn.Parameter(torch.Tensor([scale_value]))
79
+ self.bias = nn.Parameter(torch.Tensor([bias_value]))
80
+
81
+ def forward(self, x):
82
+ return self.scale * x + self.bias
83
+
84
+
85
+ class ConvBNLayer(nn.Module):
86
+ def __init__(
87
+ self, in_channels, out_channels, kernel_size, stride, groups=1, lr_mult=1.0
88
+ ):
89
+ super().__init__()
90
+ self.conv = nn.Conv2d(
91
+ in_channels=in_channels,
92
+ out_channels=out_channels,
93
+ kernel_size=kernel_size,
94
+ stride=stride,
95
+ padding=(kernel_size - 1) // 2,
96
+ groups=groups,
97
+ bias=False,
98
+ )
99
+
100
+ self.bn = nn.BatchNorm2d(
101
+ out_channels,
102
+ )
103
+
104
+ def forward(self, x):
105
+ x = self.conv(x)
106
+ x = self.bn(x)
107
+ return x
108
+
109
+
110
+ class Act(nn.Module):
111
+ def __init__(self, act="hswish", lr_mult=1.0, lab_lr=0.1):
112
+ super().__init__()
113
+ if act == "hswish":
114
+ self.act = nn.Hardswish(inplace=True)
115
+ else:
116
+ assert act == "relu"
117
+ self.act = Activation(act)
118
+ self.lab = LearnableAffineBlock(lr_mult=lr_mult, lab_lr=lab_lr)
119
+
120
+ def forward(self, x):
121
+ return self.lab(self.act(x))
122
+
123
+
124
+ class LearnableRepLayer(nn.Module):
125
+ def __init__(
126
+ self,
127
+ in_channels,
128
+ out_channels,
129
+ kernel_size,
130
+ stride=1,
131
+ groups=1,
132
+ num_conv_branches=1,
133
+ lr_mult=1.0,
134
+ lab_lr=0.1,
135
+ ):
136
+ super().__init__()
137
+ self.is_repped = False
138
+ self.groups = groups
139
+ self.stride = stride
140
+ self.kernel_size = kernel_size
141
+ self.in_channels = in_channels
142
+ self.out_channels = out_channels
143
+ self.num_conv_branches = num_conv_branches
144
+ self.padding = (kernel_size - 1) // 2
145
+
146
+ self.identity = (
147
+ nn.BatchNorm2d(
148
+ num_features=in_channels,
149
+ )
150
+ if out_channels == in_channels and stride == 1
151
+ else None
152
+ )
153
+
154
+ self.conv_kxk = nn.ModuleList(
155
+ [
156
+ ConvBNLayer(
157
+ in_channels,
158
+ out_channels,
159
+ kernel_size,
160
+ stride,
161
+ groups=groups,
162
+ lr_mult=lr_mult,
163
+ )
164
+ for _ in range(self.num_conv_branches)
165
+ ]
166
+ )
167
+
168
+ self.conv_1x1 = (
169
+ ConvBNLayer(
170
+ in_channels, out_channels, 1, stride, groups=groups, lr_mult=lr_mult
171
+ )
172
+ if kernel_size > 1
173
+ else None
174
+ )
175
+
176
+ self.lab = LearnableAffineBlock(lr_mult=lr_mult, lab_lr=lab_lr)
177
+ self.act = Act(lr_mult=lr_mult, lab_lr=lab_lr)
178
+
179
+ def forward(self, x):
180
+ # for export
181
+ if self.is_repped:
182
+ out = self.lab(self.reparam_conv(x))
183
+ if self.stride != 2:
184
+ out = self.act(out)
185
+ return out
186
+
187
+ out = 0
188
+ if self.identity is not None:
189
+ out += self.identity(x)
190
+
191
+ if self.conv_1x1 is not None:
192
+ out += self.conv_1x1(x)
193
+
194
+ for conv in self.conv_kxk:
195
+ out += conv(x)
196
+
197
+ out = self.lab(out)
198
+ if self.stride != 2:
199
+ out = self.act(out)
200
+ return out
201
+
202
+ def rep(self):
203
+ if self.is_repped:
204
+ return
205
+ kernel, bias = self._get_kernel_bias()
206
+ self.reparam_conv = nn.Conv2d(
207
+ in_channels=self.in_channels,
208
+ out_channels=self.out_channels,
209
+ kernel_size=self.kernel_size,
210
+ stride=self.stride,
211
+ padding=self.padding,
212
+ groups=self.groups,
213
+ )
214
+ self.reparam_conv.weight.data = kernel
215
+ self.reparam_conv.bias.data = bias
216
+ self.is_repped = True
217
+
218
+ def _pad_kernel_1x1_to_kxk(self, kernel1x1, pad):
219
+ if not isinstance(kernel1x1, torch.Tensor):
220
+ return 0
221
+ else:
222
+ return nn.functional.pad(kernel1x1, [pad, pad, pad, pad])
223
+
224
+ def _get_kernel_bias(self):
225
+ kernel_conv_1x1, bias_conv_1x1 = self._fuse_bn_tensor(self.conv_1x1)
226
+ kernel_conv_1x1 = self._pad_kernel_1x1_to_kxk(
227
+ kernel_conv_1x1, self.kernel_size // 2
228
+ )
229
+
230
+ kernel_identity, bias_identity = self._fuse_bn_tensor(self.identity)
231
+
232
+ kernel_conv_kxk = 0
233
+ bias_conv_kxk = 0
234
+ for conv in self.conv_kxk:
235
+ kernel, bias = self._fuse_bn_tensor(conv)
236
+ kernel_conv_kxk += kernel
237
+ bias_conv_kxk += bias
238
+
239
+ kernel_reparam = kernel_conv_kxk + kernel_conv_1x1 + kernel_identity
240
+ bias_reparam = bias_conv_kxk + bias_conv_1x1 + bias_identity
241
+ return kernel_reparam, bias_reparam
242
+
243
+ def _fuse_bn_tensor(self, branch):
244
+ if not branch:
245
+ return 0, 0
246
+ elif isinstance(branch, ConvBNLayer):
247
+ kernel = branch.conv.weight
248
+ running_mean = branch.bn._mean
249
+ running_var = branch.bn._variance
250
+ gamma = branch.bn.weight
251
+ beta = branch.bn.bias
252
+ eps = branch.bn._epsilon
253
+ else:
254
+ assert isinstance(branch, nn.BatchNorm2d)
255
+ if not hasattr(self, "id_tensor"):
256
+ input_dim = self.in_channels // self.groups
257
+ kernel_value = torch.zeros(
258
+ (self.in_channels, input_dim, self.kernel_size, self.kernel_size),
259
+ dtype=branch.weight.dtype,
260
+ )
261
+ for i in range(self.in_channels):
262
+ kernel_value[
263
+ i, i % input_dim, self.kernel_size // 2, self.kernel_size // 2
264
+ ] = 1
265
+ self.id_tensor = kernel_value
266
+ kernel = self.id_tensor
267
+ running_mean = branch._mean
268
+ running_var = branch._variance
269
+ gamma = branch.weight
270
+ beta = branch.bias
271
+ eps = branch._epsilon
272
+ std = (running_var + eps).sqrt()
273
+ t = (gamma / std).reshape((-1, 1, 1, 1))
274
+ return kernel * t, beta - running_mean * gamma / std
275
+
276
+
277
+ class SELayer(nn.Module):
278
+ def __init__(self, channel, reduction=4, lr_mult=1.0):
279
+ super().__init__()
280
+ self.avg_pool = nn.AdaptiveAvgPool2d(1)
281
+ self.conv1 = nn.Conv2d(
282
+ in_channels=channel,
283
+ out_channels=channel // reduction,
284
+ kernel_size=1,
285
+ stride=1,
286
+ padding=0,
287
+ )
288
+ self.relu = nn.ReLU()
289
+ self.conv2 = nn.Conv2d(
290
+ in_channels=channel // reduction,
291
+ out_channels=channel,
292
+ kernel_size=1,
293
+ stride=1,
294
+ padding=0,
295
+ )
296
+ self.hardsigmoid = nn.Hardsigmoid(inplace=True)
297
+
298
+ def forward(self, x):
299
+ identity = x
300
+ x = self.avg_pool(x)
301
+ x = self.conv1(x)
302
+ x = self.relu(x)
303
+ x = self.conv2(x)
304
+ x = self.hardsigmoid(x)
305
+ x = identity * x
306
+ return x
307
+
308
+
309
+ class LCNetV3Block(nn.Module):
310
+ def __init__(
311
+ self,
312
+ in_channels,
313
+ out_channels,
314
+ stride,
315
+ dw_size,
316
+ use_se=False,
317
+ conv_kxk_num=4,
318
+ lr_mult=1.0,
319
+ lab_lr=0.1,
320
+ ):
321
+ super().__init__()
322
+ self.use_se = use_se
323
+ self.dw_conv = LearnableRepLayer(
324
+ in_channels=in_channels,
325
+ out_channels=in_channels,
326
+ kernel_size=dw_size,
327
+ stride=stride,
328
+ groups=in_channels,
329
+ num_conv_branches=conv_kxk_num,
330
+ lr_mult=lr_mult,
331
+ lab_lr=lab_lr,
332
+ )
333
+ if use_se:
334
+ self.se = SELayer(in_channels, lr_mult=lr_mult)
335
+ self.pw_conv = LearnableRepLayer(
336
+ in_channels=in_channels,
337
+ out_channels=out_channels,
338
+ kernel_size=1,
339
+ stride=1,
340
+ num_conv_branches=conv_kxk_num,
341
+ lr_mult=lr_mult,
342
+ lab_lr=lab_lr,
343
+ )
344
+
345
+ def forward(self, x):
346
+ x = self.dw_conv(x)
347
+ if self.use_se:
348
+ x = self.se(x)
349
+ x = self.pw_conv(x)
350
+ return x
351
+
352
+
353
+ class PPLCNetV3(nn.Module):
354
+ def __init__(
355
+ self,
356
+ scale=1.0,
357
+ conv_kxk_num=4,
358
+ lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
359
+ lab_lr=0.1,
360
+ det=False,
361
+ **kwargs
362
+ ):
363
+ super().__init__()
364
+ self.scale = scale
365
+ self.lr_mult_list = lr_mult_list
366
+ self.det = det
367
+
368
+ self.net_config = NET_CONFIG_det if self.det else NET_CONFIG_rec
369
+
370
+ assert isinstance(
371
+ self.lr_mult_list, (list, tuple)
372
+ ), "lr_mult_list should be in (list, tuple) but got {}".format(
373
+ type(self.lr_mult_list)
374
+ )
375
+ assert (
376
+ len(self.lr_mult_list) == 6
377
+ ), "lr_mult_list length should be 6 but got {}".format(len(self.lr_mult_list))
378
+
379
+ self.conv1 = ConvBNLayer(
380
+ in_channels=3,
381
+ out_channels=make_divisible(16 * scale),
382
+ kernel_size=3,
383
+ stride=2,
384
+ lr_mult=self.lr_mult_list[0],
385
+ )
386
+
387
+ self.blocks2 = nn.Sequential(
388
+ *[
389
+ LCNetV3Block(
390
+ in_channels=make_divisible(in_c * scale),
391
+ out_channels=make_divisible(out_c * scale),
392
+ dw_size=k,
393
+ stride=s,
394
+ use_se=se,
395
+ conv_kxk_num=conv_kxk_num,
396
+ lr_mult=self.lr_mult_list[1],
397
+ lab_lr=lab_lr,
398
+ )
399
+ for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks2"])
400
+ ]
401
+ )
402
+
403
+ self.blocks3 = nn.Sequential(
404
+ *[
405
+ LCNetV3Block(
406
+ in_channels=make_divisible(in_c * scale),
407
+ out_channels=make_divisible(out_c * scale),
408
+ dw_size=k,
409
+ stride=s,
410
+ use_se=se,
411
+ conv_kxk_num=conv_kxk_num,
412
+ lr_mult=self.lr_mult_list[2],
413
+ lab_lr=lab_lr,
414
+ )
415
+ for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks3"])
416
+ ]
417
+ )
418
+
419
+ self.blocks4 = nn.Sequential(
420
+ *[
421
+ LCNetV3Block(
422
+ in_channels=make_divisible(in_c * scale),
423
+ out_channels=make_divisible(out_c * scale),
424
+ dw_size=k,
425
+ stride=s,
426
+ use_se=se,
427
+ conv_kxk_num=conv_kxk_num,
428
+ lr_mult=self.lr_mult_list[3],
429
+ lab_lr=lab_lr,
430
+ )
431
+ for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks4"])
432
+ ]
433
+ )
434
+
435
+ self.blocks5 = nn.Sequential(
436
+ *[
437
+ LCNetV3Block(
438
+ in_channels=make_divisible(in_c * scale),
439
+ out_channels=make_divisible(out_c * scale),
440
+ dw_size=k,
441
+ stride=s,
442
+ use_se=se,
443
+ conv_kxk_num=conv_kxk_num,
444
+ lr_mult=self.lr_mult_list[4],
445
+ lab_lr=lab_lr,
446
+ )
447
+ for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks5"])
448
+ ]
449
+ )
450
+
451
+ self.blocks6 = nn.Sequential(
452
+ *[
453
+ LCNetV3Block(
454
+ in_channels=make_divisible(in_c * scale),
455
+ out_channels=make_divisible(out_c * scale),
456
+ dw_size=k,
457
+ stride=s,
458
+ use_se=se,
459
+ conv_kxk_num=conv_kxk_num,
460
+ lr_mult=self.lr_mult_list[5],
461
+ lab_lr=lab_lr,
462
+ )
463
+ for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks6"])
464
+ ]
465
+ )
466
+ self.out_channels = make_divisible(512 * scale)
467
+
468
+ if self.det:
469
+ mv_c = [16, 24, 56, 480]
470
+ self.out_channels = [
471
+ make_divisible(self.net_config["blocks3"][-1][2] * scale),
472
+ make_divisible(self.net_config["blocks4"][-1][2] * scale),
473
+ make_divisible(self.net_config["blocks5"][-1][2] * scale),
474
+ make_divisible(self.net_config["blocks6"][-1][2] * scale),
475
+ ]
476
+
477
+ self.layer_list = nn.ModuleList(
478
+ [
479
+ nn.Conv2d(self.out_channels[0], int(mv_c[0] * scale), 1, 1, 0),
480
+ nn.Conv2d(self.out_channels[1], int(mv_c[1] * scale), 1, 1, 0),
481
+ nn.Conv2d(self.out_channels[2], int(mv_c[2] * scale), 1, 1, 0),
482
+ nn.Conv2d(self.out_channels[3], int(mv_c[3] * scale), 1, 1, 0),
483
+ ]
484
+ )
485
+ self.out_channels = [
486
+ int(mv_c[0] * scale),
487
+ int(mv_c[1] * scale),
488
+ int(mv_c[2] * scale),
489
+ int(mv_c[3] * scale),
490
+ ]
491
+
492
+ def forward(self, x):
493
+ out_list = []
494
+ x = self.conv1(x)
495
+ x = self.blocks2(x)
496
+ x = self.blocks3(x)
497
+ out_list.append(x)
498
+ x = self.blocks4(x)
499
+ out_list.append(x)
500
+ x = self.blocks5(x)
501
+ out_list.append(x)
502
+ x = self.blocks6(x)
503
+ out_list.append(x)
504
+
505
+ if self.det:
506
+ out_list[0] = self.layer_list[0](out_list[0])
507
+ out_list[1] = self.layer_list[1](out_list[1])
508
+ out_list[2] = self.layer_list[2](out_list[2])
509
+ out_list[3] = self.layer_list[3](out_list[3])
510
+ return out_list
511
+
512
+ if self.training:
513
+ x = F.adaptive_avg_pool2d(x, [1, 40])
514
+ else:
515
+ x = F.avg_pool2d(x, [3, 2])
516
+ return x
@@ -0,0 +1,136 @@
1
+ from torch import nn
2
+
3
+ from .det_mobilenet_v3 import ConvBNLayer, ResidualUnit, make_divisible
4
+
5
+
6
+ class MobileNetV3(nn.Module):
7
+ def __init__(
8
+ self,
9
+ in_channels=3,
10
+ model_name="small",
11
+ scale=0.5,
12
+ large_stride=None,
13
+ small_stride=None,
14
+ **kwargs
15
+ ):
16
+ super(MobileNetV3, self).__init__()
17
+ if small_stride is None:
18
+ small_stride = [2, 2, 2, 2]
19
+ if large_stride is None:
20
+ large_stride = [1, 2, 2, 2]
21
+
22
+ assert isinstance(
23
+ large_stride, list
24
+ ), "large_stride type must " "be list but got {}".format(type(large_stride))
25
+ assert isinstance(
26
+ small_stride, list
27
+ ), "small_stride type must " "be list but got {}".format(type(small_stride))
28
+ assert (
29
+ len(large_stride) == 4
30
+ ), "large_stride length must be " "4 but got {}".format(len(large_stride))
31
+ assert (
32
+ len(small_stride) == 4
33
+ ), "small_stride length must be " "4 but got {}".format(len(small_stride))
34
+
35
+ if model_name == "large":
36
+ cfg = [
37
+ # k, exp, c, se, nl, s,
38
+ [3, 16, 16, False, "relu", large_stride[0]],
39
+ [3, 64, 24, False, "relu", (large_stride[1], 1)],
40
+ [3, 72, 24, False, "relu", 1],
41
+ [5, 72, 40, True, "relu", (large_stride[2], 1)],
42
+ [5, 120, 40, True, "relu", 1],
43
+ [5, 120, 40, True, "relu", 1],
44
+ [3, 240, 80, False, "hard_swish", 1],
45
+ [3, 200, 80, False, "hard_swish", 1],
46
+ [3, 184, 80, False, "hard_swish", 1],
47
+ [3, 184, 80, False, "hard_swish", 1],
48
+ [3, 480, 112, True, "hard_swish", 1],
49
+ [3, 672, 112, True, "hard_swish", 1],
50
+ [5, 672, 160, True, "hard_swish", (large_stride[3], 1)],
51
+ [5, 960, 160, True, "hard_swish", 1],
52
+ [5, 960, 160, True, "hard_swish", 1],
53
+ ]
54
+ cls_ch_squeeze = 960
55
+ elif model_name == "small":
56
+ cfg = [
57
+ # k, exp, c, se, nl, s,
58
+ [3, 16, 16, True, "relu", (small_stride[0], 1)],
59
+ [3, 72, 24, False, "relu", (small_stride[1], 1)],
60
+ [3, 88, 24, False, "relu", 1],
61
+ [5, 96, 40, True, "hard_swish", (small_stride[2], 1)],
62
+ [5, 240, 40, True, "hard_swish", 1],
63
+ [5, 240, 40, True, "hard_swish", 1],
64
+ [5, 120, 48, True, "hard_swish", 1],
65
+ [5, 144, 48, True, "hard_swish", 1],
66
+ [5, 288, 96, True, "hard_swish", (small_stride[3], 1)],
67
+ [5, 576, 96, True, "hard_swish", 1],
68
+ [5, 576, 96, True, "hard_swish", 1],
69
+ ]
70
+ cls_ch_squeeze = 576
71
+ else:
72
+ raise NotImplementedError(
73
+ "mode[" + model_name + "_model] is not implemented!"
74
+ )
75
+
76
+ supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25]
77
+ assert (
78
+ scale in supported_scale
79
+ ), "supported scales are {} but input scale is {}".format(
80
+ supported_scale, scale
81
+ )
82
+
83
+ inplanes = 16
84
+ # conv1
85
+ self.conv1 = ConvBNLayer(
86
+ in_channels=in_channels,
87
+ out_channels=make_divisible(inplanes * scale),
88
+ kernel_size=3,
89
+ stride=2,
90
+ padding=1,
91
+ groups=1,
92
+ if_act=True,
93
+ act="hard_swish",
94
+ name="conv1",
95
+ )
96
+ i = 0
97
+ block_list = []
98
+ inplanes = make_divisible(inplanes * scale)
99
+ for k, exp, c, se, nl, s in cfg:
100
+ block_list.append(
101
+ ResidualUnit(
102
+ in_channels=inplanes,
103
+ mid_channels=make_divisible(scale * exp),
104
+ out_channels=make_divisible(scale * c),
105
+ kernel_size=k,
106
+ stride=s,
107
+ use_se=se,
108
+ act=nl,
109
+ name="conv" + str(i + 2),
110
+ )
111
+ )
112
+ inplanes = make_divisible(scale * c)
113
+ i += 1
114
+ self.blocks = nn.Sequential(*block_list)
115
+
116
+ self.conv2 = ConvBNLayer(
117
+ in_channels=inplanes,
118
+ out_channels=make_divisible(scale * cls_ch_squeeze),
119
+ kernel_size=1,
120
+ stride=1,
121
+ padding=0,
122
+ groups=1,
123
+ if_act=True,
124
+ act="hard_swish",
125
+ name="conv_last",
126
+ )
127
+
128
+ self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
129
+ self.out_channels = make_divisible(scale * cls_ch_squeeze)
130
+
131
+ def forward(self, x):
132
+ x = self.conv1(x)
133
+ x = self.blocks(x)
134
+ x = self.conv2(x)
135
+ x = self.pool(x)
136
+ return x