magic-pdf 1.3.11__py3-none-any.whl → 1.3.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- magic_pdf/data/utils.py +4 -4
- magic_pdf/dict2md/ocr_mkcontent.py +36 -22
- magic_pdf/libs/version.py +1 -1
- magic_pdf/model/batch_analyze.py +14 -1
- magic_pdf/model/doc_analyze_by_custom_model.py +1 -1
- magic_pdf/model/sub_modules/model_utils.py +4 -4
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/__init__.py +2 -1
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_pphgnetv2.py +810 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/rnn.py +18 -5
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/arch_config.yaml +68 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocrv5_dict.txt +18383 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/models_config.yml +8 -0
- {magic_pdf-1.3.11.dist-info → magic_pdf-1.3.12.dist-info}/METADATA +15 -1
- {magic_pdf-1.3.11.dist-info → magic_pdf-1.3.12.dist-info}/RECORD +18 -16
- {magic_pdf-1.3.11.dist-info → magic_pdf-1.3.12.dist-info}/LICENSE.md +0 -0
- {magic_pdf-1.3.11.dist-info → magic_pdf-1.3.12.dist-info}/WHEEL +0 -0
- {magic_pdf-1.3.11.dist-info → magic_pdf-1.3.12.dist-info}/entry_points.txt +0 -0
- {magic_pdf-1.3.11.dist-info → magic_pdf-1.3.12.dist-info}/top_level.txt +0 -0
@@ -9,14 +9,27 @@ class Im2Seq(nn.Module):
|
|
9
9
|
super().__init__()
|
10
10
|
self.out_channels = in_channels
|
11
11
|
|
12
|
+
# def forward(self, x):
|
13
|
+
# B, C, H, W = x.shape
|
14
|
+
# # assert H == 1
|
15
|
+
# x = x.squeeze(dim=2)
|
16
|
+
# # x = x.transpose([0, 2, 1]) # paddle (NTC)(batch, width, channels)
|
17
|
+
# x = x.permute(0, 2, 1)
|
18
|
+
# return x
|
19
|
+
|
12
20
|
def forward(self, x):
|
13
21
|
B, C, H, W = x.shape
|
14
|
-
#
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
22
|
+
# 处理四维张量,将空间维度展平为序列
|
23
|
+
if H == 1:
|
24
|
+
# 原来的处理逻辑,适用于H=1的情况
|
25
|
+
x = x.squeeze(dim=2)
|
26
|
+
x = x.permute(0, 2, 1) # (B, W, C)
|
27
|
+
else:
|
28
|
+
# 处理H不为1的情况
|
29
|
+
x = x.permute(0, 2, 3, 1) # (B, H, W, C)
|
30
|
+
x = x.reshape(B, H * W, C) # (B, H*W, C)
|
19
31
|
|
32
|
+
return x
|
20
33
|
|
21
34
|
class EncoderWithRNN_(nn.Module):
|
22
35
|
def __init__(self, in_channels, hidden_size):
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/arch_config.yaml
CHANGED
@@ -104,6 +104,22 @@ ch_PP-OCRv4_det_infer:
|
|
104
104
|
name: DBHead
|
105
105
|
k: 50
|
106
106
|
|
107
|
+
ch_PP-OCRv5_det_infer:
|
108
|
+
model_type: det
|
109
|
+
algorithm: DB
|
110
|
+
Transform: null
|
111
|
+
Backbone:
|
112
|
+
name: PPLCNetV3
|
113
|
+
scale: 0.75
|
114
|
+
det: True
|
115
|
+
Neck:
|
116
|
+
name: RSEFPN
|
117
|
+
out_channels: 96
|
118
|
+
shortcut: True
|
119
|
+
Head:
|
120
|
+
name: DBHead
|
121
|
+
k: 50
|
122
|
+
|
107
123
|
ch_PP-OCRv4_det_server_infer:
|
108
124
|
model_type: det
|
109
125
|
algorithm: DB
|
@@ -196,6 +212,58 @@ ch_PP-OCRv4_rec_server_doc_infer:
|
|
196
212
|
nrtr_dim: 384
|
197
213
|
max_text_length: 25
|
198
214
|
|
215
|
+
ch_PP-OCRv5_rec_server_infer:
|
216
|
+
model_type: rec
|
217
|
+
algorithm: SVTR_HGNet
|
218
|
+
Transform:
|
219
|
+
Backbone:
|
220
|
+
name: PPHGNetV2_B4
|
221
|
+
text_rec: True
|
222
|
+
Head:
|
223
|
+
name: MultiHead
|
224
|
+
out_channels_list:
|
225
|
+
CTCLabelDecode: 18385
|
226
|
+
head_list:
|
227
|
+
- CTCHead:
|
228
|
+
Neck:
|
229
|
+
name: svtr
|
230
|
+
dims: 120
|
231
|
+
depth: 2
|
232
|
+
hidden_dims: 120
|
233
|
+
kernel_size: [ 1, 3 ]
|
234
|
+
use_guide: True
|
235
|
+
Head:
|
236
|
+
fc_decay: 0.00001
|
237
|
+
- NRTRHead:
|
238
|
+
nrtr_dim: 384
|
239
|
+
max_text_length: 25
|
240
|
+
|
241
|
+
ch_PP-OCRv5_rec_infer:
|
242
|
+
model_type: rec
|
243
|
+
algorithm: SVTR_HGNet
|
244
|
+
Transform:
|
245
|
+
Backbone:
|
246
|
+
name: PPLCNetV3
|
247
|
+
scale: 0.95
|
248
|
+
Head:
|
249
|
+
name: MultiHead
|
250
|
+
out_channels_list:
|
251
|
+
CTCLabelDecode: 18385
|
252
|
+
head_list:
|
253
|
+
- CTCHead:
|
254
|
+
Neck:
|
255
|
+
name: svtr
|
256
|
+
dims: 120
|
257
|
+
depth: 2
|
258
|
+
hidden_dims: 120
|
259
|
+
kernel_size: [ 1, 3 ]
|
260
|
+
use_guide: True
|
261
|
+
Head:
|
262
|
+
fc_decay: 0.00001
|
263
|
+
- NRTRHead:
|
264
|
+
nrtr_dim: 384
|
265
|
+
max_text_length: 25
|
266
|
+
|
199
267
|
chinese_cht_PP-OCRv3_rec_infer:
|
200
268
|
model_type: rec
|
201
269
|
algorithm: SVTR
|