magic-pdf 1.3.11__py3-none-any.whl → 1.3.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,14 +9,27 @@ class Im2Seq(nn.Module):
9
9
  super().__init__()
10
10
  self.out_channels = in_channels
11
11
 
12
+ # def forward(self, x):
13
+ # B, C, H, W = x.shape
14
+ # # assert H == 1
15
+ # x = x.squeeze(dim=2)
16
+ # # x = x.transpose([0, 2, 1]) # paddle (NTC)(batch, width, channels)
17
+ # x = x.permute(0, 2, 1)
18
+ # return x
19
+
12
20
  def forward(self, x):
13
21
  B, C, H, W = x.shape
14
- # assert H == 1
15
- x = x.squeeze(dim=2)
16
- # x = x.transpose([0, 2, 1]) # paddle (NTC)(batch, width, channels)
17
- x = x.permute(0, 2, 1)
18
- return x
22
+ # 处理四维张量,将空间维度展平为序列
23
+ if H == 1:
24
+ # 原来的处理逻辑,适用于H=1的情况
25
+ x = x.squeeze(dim=2)
26
+ x = x.permute(0, 2, 1) # (B, W, C)
27
+ else:
28
+ # 处理H不为1的情况
29
+ x = x.permute(0, 2, 3, 1) # (B, H, W, C)
30
+ x = x.reshape(B, H * W, C) # (B, H*W, C)
19
31
 
32
+ return x
20
33
 
21
34
  class EncoderWithRNN_(nn.Module):
22
35
  def __init__(self, in_channels, hidden_size):
@@ -104,6 +104,22 @@ ch_PP-OCRv4_det_infer:
104
104
  name: DBHead
105
105
  k: 50
106
106
 
107
+ ch_PP-OCRv5_det_infer:
108
+ model_type: det
109
+ algorithm: DB
110
+ Transform: null
111
+ Backbone:
112
+ name: PPLCNetV3
113
+ scale: 0.75
114
+ det: True
115
+ Neck:
116
+ name: RSEFPN
117
+ out_channels: 96
118
+ shortcut: True
119
+ Head:
120
+ name: DBHead
121
+ k: 50
122
+
107
123
  ch_PP-OCRv4_det_server_infer:
108
124
  model_type: det
109
125
  algorithm: DB
@@ -196,6 +212,58 @@ ch_PP-OCRv4_rec_server_doc_infer:
196
212
  nrtr_dim: 384
197
213
  max_text_length: 25
198
214
 
215
+ ch_PP-OCRv5_rec_server_infer:
216
+ model_type: rec
217
+ algorithm: SVTR_HGNet
218
+ Transform:
219
+ Backbone:
220
+ name: PPHGNetV2_B4
221
+ text_rec: True
222
+ Head:
223
+ name: MultiHead
224
+ out_channels_list:
225
+ CTCLabelDecode: 18385
226
+ head_list:
227
+ - CTCHead:
228
+ Neck:
229
+ name: svtr
230
+ dims: 120
231
+ depth: 2
232
+ hidden_dims: 120
233
+ kernel_size: [ 1, 3 ]
234
+ use_guide: True
235
+ Head:
236
+ fc_decay: 0.00001
237
+ - NRTRHead:
238
+ nrtr_dim: 384
239
+ max_text_length: 25
240
+
241
+ ch_PP-OCRv5_rec_infer:
242
+ model_type: rec
243
+ algorithm: SVTR_HGNet
244
+ Transform:
245
+ Backbone:
246
+ name: PPLCNetV3
247
+ scale: 0.95
248
+ Head:
249
+ name: MultiHead
250
+ out_channels_list:
251
+ CTCLabelDecode: 18385
252
+ head_list:
253
+ - CTCHead:
254
+ Neck:
255
+ name: svtr
256
+ dims: 120
257
+ depth: 2
258
+ hidden_dims: 120
259
+ kernel_size: [ 1, 3 ]
260
+ use_guide: True
261
+ Head:
262
+ fc_decay: 0.00001
263
+ - NRTRHead:
264
+ nrtr_dim: 384
265
+ max_text_length: 25
266
+
199
267
  chinese_cht_PP-OCRv3_rec_infer:
200
268
  model_type: rec
201
269
  algorithm: SVTR