mineru 2.5.3__py3-none-any.whl → 2.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. mineru/backend/pipeline/model_init.py +25 -3
  2. mineru/backend/pipeline/model_json_to_middle_json.py +2 -2
  3. mineru/backend/pipeline/model_list.py +0 -1
  4. mineru/backend/utils.py +24 -0
  5. mineru/backend/vlm/model_output_to_middle_json.py +2 -2
  6. mineru/backend/vlm/{custom_logits_processors.py → utils.py} +36 -2
  7. mineru/backend/vlm/vlm_analyze.py +43 -50
  8. mineru/backend/vlm/vlm_magic_model.py +155 -1
  9. mineru/cli/common.py +26 -23
  10. mineru/cli/fast_api.py +2 -8
  11. mineru/cli/gradio_app.py +104 -13
  12. mineru/cli/models_download.py +1 -0
  13. mineru/model/mfr/pp_formulanet_plus_m/predict_formula.py +152 -0
  14. mineru/model/mfr/pp_formulanet_plus_m/processors.py +657 -0
  15. mineru/model/mfr/unimernet/unimernet_hf/modeling_unimernet.py +1 -326
  16. mineru/model/mfr/utils.py +338 -0
  17. mineru/model/ocr/paddleocr2pytorch/pytorch_paddle.py +103 -16
  18. mineru/model/table/rec/unet_table/main.py +1 -1
  19. mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/data/imaug/operators.py +5 -5
  20. mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/backbones/__init__.py +2 -1
  21. mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/backbones/rec_lcnetv3.py +7 -7
  22. mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/backbones/rec_pphgnetv2.py +2 -2
  23. mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/heads/__init__.py +2 -0
  24. mineru/model/utils/pytorchocr/modeling/heads/rec_ppformulanet_head.py +1383 -0
  25. mineru/model/utils/pytorchocr/modeling/heads/rec_unimernet_head.py +2631 -0
  26. mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/postprocess/rec_postprocess.py +25 -28
  27. mineru/model/utils/pytorchocr/utils/__init__.py +0 -0
  28. mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/arch_config.yaml +130 -0
  29. mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_arabic_dict.txt +747 -0
  30. mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_cyrillic_dict.txt +850 -0
  31. mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_devanagari_dict.txt +568 -0
  32. mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_ta_dict.txt +513 -0
  33. mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_te_dict.txt +540 -0
  34. mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/models_config.yml +15 -15
  35. mineru/model/utils/pytorchocr/utils/resources/pp_formulanet_arch_config.yaml +24 -0
  36. mineru/model/utils/tools/infer/__init__.py +1 -0
  37. mineru/model/{ocr/paddleocr2pytorch → utils}/tools/infer/predict_det.py +6 -3
  38. mineru/model/{ocr/paddleocr2pytorch → utils}/tools/infer/predict_rec.py +16 -25
  39. mineru/model/vlm_vllm_model/server.py +4 -1
  40. mineru/resources/header.html +2 -2
  41. mineru/utils/enum_class.py +1 -0
  42. mineru/utils/guess_suffix_or_lang.py +9 -1
  43. mineru/utils/llm_aided.py +4 -2
  44. mineru/utils/ocr_utils.py +16 -0
  45. mineru/utils/table_merge.py +102 -13
  46. mineru/version.py +1 -1
  47. {mineru-2.5.3.dist-info → mineru-2.6.0.dist-info}/METADATA +33 -6
  48. mineru-2.6.0.dist-info/RECORD +195 -0
  49. mineru-2.5.3.dist-info/RECORD +0 -181
  50. /mineru/model/{ocr/paddleocr2pytorch/pytorchocr → mfr/pp_formulanet_plus_m}/__init__.py +0 -0
  51. /mineru/model/{ocr/paddleocr2pytorch/tools/infer → utils}/__init__.py +0 -0
  52. /mineru/model/{ocr/paddleocr2pytorch/pytorchocr/modeling → utils/pytorchocr}/__init__.py +0 -0
  53. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/base_ocr_v20.py +0 -0
  54. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/data/__init__.py +0 -0
  55. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/data/imaug/__init__.py +0 -0
  56. /mineru/model/{ocr/paddleocr2pytorch/pytorchocr/utils → utils/pytorchocr/modeling}/__init__.py +0 -0
  57. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/architectures/__init__.py +0 -0
  58. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/architectures/base_model.py +0 -0
  59. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/backbones/det_mobilenet_v3.py +0 -0
  60. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/backbones/rec_donut_swin.py +0 -0
  61. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/backbones/rec_hgnet.py +0 -0
  62. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/backbones/rec_mobilenet_v3.py +0 -0
  63. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/backbones/rec_mv1_enhance.py +0 -0
  64. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/backbones/rec_svtrnet.py +0 -0
  65. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/common.py +0 -0
  66. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/heads/cls_head.py +0 -0
  67. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/heads/det_db_head.py +0 -0
  68. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/heads/rec_ctc_head.py +0 -0
  69. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/heads/rec_multi_head.py +0 -0
  70. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/necks/__init__.py +0 -0
  71. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/necks/db_fpn.py +0 -0
  72. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/necks/intracl.py +0 -0
  73. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/necks/rnn.py +0 -0
  74. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/postprocess/__init__.py +0 -0
  75. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/postprocess/cls_postprocess.py +0 -0
  76. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/postprocess/db_postprocess.py +0 -0
  77. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/arabic_dict.txt +0 -0
  78. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/chinese_cht_dict.txt +0 -0
  79. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/cyrillic_dict.txt +0 -0
  80. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/devanagari_dict.txt +0 -0
  81. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/en_dict.txt +0 -0
  82. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/japan_dict.txt +0 -0
  83. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/ka_dict.txt +0 -0
  84. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/korean_dict.txt +0 -0
  85. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/latin_dict.txt +0 -0
  86. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/ppocr_keys_v1.txt +0 -0
  87. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/ppocrv4_doc_dict.txt +0 -0
  88. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/ppocrv5_dict.txt +0 -0
  89. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/ppocrv5_el_dict.txt +0 -0
  90. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/ppocrv5_en_dict.txt +0 -0
  91. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/ppocrv5_eslav_dict.txt +0 -0
  92. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/ppocrv5_korean_dict.txt +0 -0
  93. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/ppocrv5_latin_dict.txt +0 -0
  94. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/ppocrv5_th_dict.txt +0 -0
  95. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/ta_dict.txt +0 -0
  96. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/te_dict.txt +0 -0
  97. /mineru/model/{ocr/paddleocr2pytorch → utils}/tools/__init__.py +0 -0
  98. /mineru/model/{ocr/paddleocr2pytorch → utils}/tools/infer/predict_cls.py +0 -0
  99. /mineru/model/{ocr/paddleocr2pytorch → utils}/tools/infer/predict_system.py +0 -0
  100. /mineru/model/{ocr/paddleocr2pytorch → utils}/tools/infer/pytorchocr_utility.py +0 -0
  101. {mineru-2.5.3.dist-info → mineru-2.6.0.dist-info}/WHEEL +0 -0
  102. {mineru-2.5.3.dist-info → mineru-2.6.0.dist-info}/entry_points.txt +0 -0
  103. {mineru-2.5.3.dist-info → mineru-2.6.0.dist-info}/licenses/LICENSE.md +0 -0
  104. {mineru-2.5.3.dist-info → mineru-2.6.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,540 @@
1
+ !
2
+ "
3
+ #
4
+ $
5
+ %
6
+ &
7
+ '
8
+ (
9
+ )
10
+ *
11
+ +
12
+ ,
13
+ -
14
+ .
15
+ /
16
+ 0
17
+ 1
18
+ 2
19
+ 3
20
+ 4
21
+ 5
22
+ 6
23
+ 7
24
+ 8
25
+ 9
26
+ :
27
+ ;
28
+ <
29
+ =
30
+ >
31
+ ?
32
+ A
33
+ B
34
+ C
35
+ D
36
+ E
37
+ F
38
+ G
39
+ H
40
+ I
41
+ J
42
+ K
43
+ L
44
+ M
45
+ N
46
+ O
47
+ P
48
+ Q
49
+ R
50
+ S
51
+ T
52
+ U
53
+ V
54
+ W
55
+ X
56
+ Y
57
+ Z
58
+ [
59
+ ]
60
+ _
61
+ `
62
+ a
63
+ b
64
+ c
65
+ d
66
+ e
67
+ f
68
+ g
69
+ h
70
+ i
71
+ j
72
+ k
73
+ l
74
+ m
75
+ n
76
+ o
77
+ p
78
+ q
79
+ r
80
+ s
81
+ t
82
+ u
83
+ v
84
+ w
85
+ x
86
+ y
87
+ z
88
+ ©
89
+
90
+ {
91
+ }
92
+ \
93
+ |
94
+ @
95
+ ^
96
+ ~
97
+ ÷
98
+
99
+
100
+
101
+ ·
102
+ ±
103
+
104
+
105
+
106
+
107
+
108
+
109
+
110
+
111
+
112
+
113
+
114
+
115
+
116
+
117
+
118
+
119
+
120
+
121
+
122
+
123
+
124
+
125
+
126
+
127
+
128
+
129
+
130
+
131
+
132
+
133
+
134
+
135
+
136
+
137
+
138
+
139
+
140
+
141
+
142
+
143
+
144
+
145
+
146
+
147
+ ®
148
+
149
+ Ω
150
+
151
+
152
+
153
+
154
+
155
+
156
+
157
+ ¢
158
+
159
+ £
160
+ ¥
161
+
162
+
163
+
164
+
165
+
166
+
167
+
168
+
169
+
170
+
171
+
172
+
173
+
174
+
175
+
176
+
177
+
178
+
179
+
180
+
181
+
182
+
183
+
184
+
185
+
186
+
187
+
188
+
189
+
190
+
191
+
192
+
193
+
194
+
195
+
196
+
197
+
198
+
199
+
200
+
201
+
202
+
203
+
204
+
205
+
206
+
207
+
208
+
209
+
210
+
211
+
212
+
213
+
214
+
215
+
216
+
217
+
218
+
219
+
220
+
221
+
222
+
223
+
224
+
225
+
226
+
227
+
228
+ 𝑢
229
+
230
+
231
+
232
+ 𝜓
233
+
234
+
235
+ ƒ
236
+
237
+
238
+
239
+
240
+
241
+
242
+ À
243
+ Á
244
+ Â
245
+ Ã
246
+ Ä
247
+ Å
248
+ Æ
249
+ Ç
250
+ È
251
+ É
252
+ Ê
253
+ Ë
254
+ Ì
255
+ Í
256
+ Î
257
+ Ï
258
+ Ð
259
+ Ñ
260
+ Ò
261
+ Ó
262
+ Ô
263
+ Õ
264
+ Ö
265
+ Ø
266
+ Ù
267
+ Ú
268
+ Û
269
+ Ü
270
+ Ý
271
+ Þ
272
+ à
273
+ á
274
+ â
275
+ ã
276
+ ä
277
+ å
278
+ æ
279
+ ç
280
+ è
281
+ é
282
+ ê
283
+ ë
284
+ ì
285
+ í
286
+ î
287
+ ï
288
+ ð
289
+ ñ
290
+ ò
291
+ ó
292
+ ô
293
+ õ
294
+ ö
295
+ ø
296
+ ù
297
+ ú
298
+ û
299
+ ü
300
+ ý
301
+ þ
302
+ ÿ
303
+ ¡
304
+ ¤
305
+ ¦
306
+ §
307
+ ¨
308
+ ª
309
+ «
310
+ ¬
311
+ ¯
312
+ °
313
+ ²
314
+ ³
315
+ ´
316
+ µ
317
+
318
+ ¸
319
+ ¹
320
+ º
321
+ »
322
+ ¼
323
+ ½
324
+ ¾
325
+ ¿
326
+ ×
327
+
328
+
329
+
330
+
331
+
332
+
333
+
334
+
335
+
336
+
337
+
338
+
339
+
340
+
341
+
342
+
343
+
344
+
345
+
346
+
347
+
348
+
349
+
350
+
351
+
352
+
353
+
354
+
355
+
356
+
357
+
358
+
359
+
360
+
361
+
362
+
363
+
364
+
365
+
366
+
367
+
368
+
369
+
370
+
371
+
372
+
373
+
374
+
375
+
376
+
377
+
378
+
379
+
380
+
381
+
382
+
383
+
384
+
385
+
386
+
387
+
388
+
389
+
390
+
391
+
392
+
393
+ Α
394
+ α
395
+ Β
396
+ β
397
+ Γ
398
+ γ
399
+ Δ
400
+ δ
401
+ Ε
402
+ ε
403
+ Ζ
404
+ ζ
405
+ Η
406
+ η
407
+ Θ
408
+ θ
409
+ Ι
410
+ ι
411
+ Κ
412
+ κ
413
+ Λ
414
+ λ
415
+ Μ
416
+ μ
417
+ Ν
418
+ ν
419
+ Ξ
420
+ ξ
421
+ Ο
422
+ ο
423
+ Π
424
+ π
425
+ Ρ
426
+ ρ
427
+ Σ
428
+ σ
429
+ ς
430
+ Τ
431
+ τ
432
+ Υ
433
+ υ
434
+ Φ
435
+ φ
436
+ Χ
437
+ χ
438
+ Ψ
439
+ ψ
440
+ ω
441
+
442
+
443
+
444
+
445
+
446
+
447
+
448
+
449
+
450
+
451
+
452
+
453
+
454
+
455
+
456
+
457
+
458
+
459
+
460
+
461
+
462
+
463
+
464
+
465
+
466
+
467
+
468
+
469
+
470
+
471
+
472
+
473
+
474
+
475
+
476
+
477
+
478
+
479
+
480
+
481
+
482
+
483
+
484
+
485
+
486
+
487
+
488
+
489
+
490
+
491
+
492
+
493
+
494
+
495
+
496
+
497
+
498
+
499
+ ి
500
+
501
+
502
+
503
+
504
+
505
+
506
+
507
+
508
+
509
+
510
+
511
+
512
+
513
+
514
+
515
+
516
+
517
+
518
+
519
+
520
+
521
+
522
+
523
+
524
+
525
+
526
+
527
+
528
+
529
+
530
+
531
+
532
+
533
+
534
+
535
+
536
+
537
+
538
+
539
+
540
+ ౿
@@ -36,13 +36,13 @@ lang:
36
36
  rec: ch_PP-OCRv5_rec_server_infer.pth
37
37
  dict: ppocrv5_dict.txt
38
38
  ta:
39
- det: Multilingual_PP-OCRv3_det_infer.pth
40
- rec: ta_PP-OCRv3_rec_infer.pth
41
- dict: ta_dict.txt
39
+ det: ch_PP-OCRv5_det_infer.pth
40
+ rec: ta_PP-OCRv5_rec_infer.pth
41
+ dict: ppocrv5_ta_dict.txt
42
42
  te:
43
- det: Multilingual_PP-OCRv3_det_infer.pth
44
- rec: te_PP-OCRv3_rec_infer.pth
45
- dict: te_dict.txt
43
+ det: ch_PP-OCRv5_det_infer.pth
44
+ rec: te_PP-OCRv5_rec_infer.pth
45
+ dict: ppocrv5_te_dict.txt
46
46
  ka:
47
47
  det: Multilingual_PP-OCRv3_det_infer.pth
48
48
  rec: ka_PP-OCRv3_rec_infer.pth
@@ -52,17 +52,17 @@ lang:
52
52
  rec: latin_PP-OCRv5_rec_infer.pth
53
53
  dict: ppocrv5_latin_dict.txt
54
54
  arabic:
55
- det: Multilingual_PP-OCRv3_det_infer.pth
56
- rec: arabic_PP-OCRv3_rec_infer.pth
57
- dict: arabic_dict.txt
55
+ det: ch_PP-OCRv5_det_infer.pth
56
+ rec: arabic_PP-OCRv5_rec_infer.pth
57
+ dict: ppocrv5_arabic_dict.txt
58
58
  cyrillic:
59
- det: Multilingual_PP-OCRv3_det_infer.pth
60
- rec: cyrillic_PP-OCRv3_rec_infer.pth
61
- dict: cyrillic_dict.txt
59
+ det: ch_PP-OCRv5_det_infer.pth
60
+ rec: cyrillic_PP-OCRv5_rec_infer.pth
61
+ dict: ppocrv5_cyrillic_dict.txt
62
62
  devanagari:
63
- det: Multilingual_PP-OCRv3_det_infer.pth
64
- rec: devanagari_PP-OCRv3_rec_infer.pth
65
- dict: devanagari_dict.txt
63
+ det: ch_PP-OCRv5_det_infer.pth
64
+ rec: devanagari_PP-OCRv5_rec_infer.pth
65
+ dict: ppocrv5_devanagari_dict.txt
66
66
  east_slavic:
67
67
  det: ch_PP-OCRv5_det_infer.pth
68
68
  rec: eslav_PP-OCRv5_rec_infer.pth
@@ -0,0 +1,24 @@
1
+ Architecture:
2
+ model_type: rec
3
+ algorithm: PP-FormulaNet_plus-M
4
+ in_channels: 3
5
+ Transform:
6
+ Backbone:
7
+ name: PPHGNetV2_B6_Formula
8
+ class_num: 1024
9
+
10
+ Head:
11
+ name: PPFormulaNet_Head
12
+ max_new_tokens: 2560
13
+ decoder_start_token_id: 0
14
+ decoder_ffn_dim: 2048
15
+ decoder_hidden_size: 512
16
+ decoder_layers: 6
17
+ temperature: 0.2
18
+ do_sample: False
19
+ top_p: 0.95
20
+ encoder_hidden_size: 2048
21
+ is_export: False
22
+ length_aware: False
23
+ use_parallel: False
24
+ parallel_step: 0
@@ -0,0 +1 @@
1
+ # Copyright (c) Opendatalab. All rights reserved.
@@ -116,6 +116,9 @@ class TextDetector(BaseOCRV20):
116
116
  self.load_pytorch_weights(self.weights_path)
117
117
  self.net.eval()
118
118
  self.net.to(self.device)
119
+ for module in self.net.modules():
120
+ if hasattr(module, 'rep'):
121
+ module.rep()
119
122
 
120
123
  def _batch_process_same_size(self, img_list):
121
124
  """
@@ -293,7 +296,7 @@ class TextDetector(BaseOCRV20):
293
296
  return dt_boxes
294
297
 
295
298
  def __call__(self, img):
296
- ori_im = img.copy()
299
+ ori_shape = img.shape
297
300
  data = {'image': img}
298
301
  data = transform(data, self.preprocess_op)
299
302
  img, shape_list = data
@@ -331,9 +334,9 @@ class TextDetector(BaseOCRV20):
331
334
  if (self.det_algorithm == "SAST" and
332
335
  self.det_sast_polygon) or (self.det_algorithm in ["PSE", "FCE"] and
333
336
  self.postprocess_op.box_type == 'poly'):
334
- dt_boxes = self.filter_tag_det_res_only_clip(dt_boxes, ori_im.shape)
337
+ dt_boxes = self.filter_tag_det_res_only_clip(dt_boxes, ori_shape)
335
338
  else:
336
- dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im.shape)
339
+ dt_boxes = self.filter_tag_det_res(dt_boxes, ori_shape)
337
340
 
338
341
  elapse = time.time() - starttime
339
342
  return dt_boxes, elapse
@@ -9,6 +9,7 @@ from tqdm import tqdm
9
9
  from ...pytorchocr.base_ocr_v20 import BaseOCRV20
10
10
  from . import pytorchocr_utility as utility
11
11
  from ...pytorchocr.postprocess import build_post_process
12
+ from ...pytorchocr.modeling.backbones.rec_hgnet import ConvBNAct
12
13
 
13
14
 
14
15
  class TextRecognizer(BaseOCRV20):
@@ -93,6 +94,12 @@ class TextRecognizer(BaseOCRV20):
93
94
  self.load_state_dict(weights)
94
95
  self.net.eval()
95
96
  self.net.to(self.device)
97
+ for module in self.net.modules():
98
+ if isinstance(module, ConvBNAct):
99
+ if module.use_act:
100
+ torch.quantization.fuse_modules(module, ['conv', 'bn', 'act'], inplace=True)
101
+ else:
102
+ torch.quantization.fuse_modules(module, ['conv', 'bn'], inplace=True)
96
103
 
97
104
  def resize_norm_img(self, img, max_wh_ratio):
98
105
  imgC, imgH, imgW = self.rec_image_shape
@@ -125,23 +132,15 @@ class TextRecognizer(BaseOCRV20):
125
132
 
126
133
  assert imgC == img.shape[2]
127
134
  max_wh_ratio = max(max_wh_ratio, imgW / imgH)
128
- imgW = int((imgH * max_wh_ratio))
135
+ imgW = int(imgH * max_wh_ratio)
129
136
  imgW = max(min(imgW, self.limited_max_width), self.limited_min_width)
130
137
  h, w = img.shape[:2]
131
138
  ratio = w / float(h)
132
- ratio_imgH = math.ceil(imgH * ratio)
133
- ratio_imgH = max(ratio_imgH, self.limited_min_width)
134
- if ratio_imgH > imgW:
135
- resized_w = imgW
136
- else:
137
- resized_w = int(ratio_imgH)
138
- resized_image = cv2.resize(img, (resized_w, imgH))
139
- resized_image = resized_image.astype('float32')
140
- resized_image = resized_image.transpose((2, 0, 1)) / 255
141
- resized_image -= 0.5
142
- resized_image /= 0.5
139
+ ratio_imgH = max(math.ceil(imgH * ratio), self.limited_min_width)
140
+ resized_w = min(imgW, int(ratio_imgH))
141
+ resized_image = cv2.resize(img, (resized_w, imgH)) /127.5 - 1
143
142
  padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
144
- padding_im[:, :, 0:resized_w] = resized_image
143
+ padding_im[:, :, 0:resized_w] = resized_image.transpose((2, 0, 1))
145
144
  return padding_im
146
145
 
147
146
  def resize_norm_img_svtr(self, img, image_shape):
@@ -307,12 +306,7 @@ class TextRecognizer(BaseOCRV20):
307
306
  for beg_img_no in range(0, img_num, batch_num):
308
307
  end_img_no = min(img_num, beg_img_no + batch_num)
309
308
  norm_img_batch = []
310
- max_wh_ratio = 0
311
- for ino in range(beg_img_no, end_img_no):
312
- # h, w = img_list[ino].shape[0:2]
313
- h, w = img_list[indices[ino]].shape[0:2]
314
- wh_ratio = w * 1.0 / h
315
- max_wh_ratio = max(max_wh_ratio, wh_ratio)
309
+ max_wh_ratio = width_list[indices[end_img_no - 1]]
316
310
  for ino in range(beg_img_no, end_img_no):
317
311
  if self.rec_algorithm == "SAR":
318
312
  norm_img, _, _, valid_ratio = self.resize_norm_img_sar(
@@ -420,14 +414,11 @@ class TextRecognizer(BaseOCRV20):
420
414
  with torch.no_grad():
421
415
  inp = torch.from_numpy(norm_img_batch)
422
416
  inp = inp.to(self.device)
423
- prob_out = self.net(inp)
417
+ preds = self.net(inp)
424
418
 
425
- if isinstance(prob_out, list):
426
- preds = [v.cpu().numpy() for v in prob_out]
427
- else:
428
- preds = prob_out.cpu().numpy()
419
+ with torch.no_grad():
420
+ rec_result = self.postprocess_op(preds)
429
421
 
430
- rec_result = self.postprocess_op(preds)
431
422
  for rno in range(len(rec_result)):
432
423
  rec_res[indices[beg_img_no + rno]] = rec_result[rno]
433
424
  elapse += time.time() - starttime