mineru 2.5.4__py3-none-any.whl → 2.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. mineru/backend/pipeline/model_init.py +25 -3
  2. mineru/backend/pipeline/model_json_to_middle_json.py +2 -2
  3. mineru/backend/pipeline/model_list.py +0 -1
  4. mineru/backend/utils.py +24 -0
  5. mineru/backend/vlm/model_output_to_middle_json.py +2 -2
  6. mineru/backend/vlm/{custom_logits_processors.py → utils.py} +36 -2
  7. mineru/backend/vlm/vlm_analyze.py +43 -50
  8. mineru/backend/vlm/vlm_magic_model.py +155 -1
  9. mineru/cli/common.py +25 -22
  10. mineru/cli/fast_api.py +2 -8
  11. mineru/cli/gradio_app.py +96 -9
  12. mineru/cli/models_download.py +1 -0
  13. mineru/model/mfr/pp_formulanet_plus_m/predict_formula.py +152 -0
  14. mineru/model/mfr/pp_formulanet_plus_m/processors.py +657 -0
  15. mineru/model/mfr/unimernet/unimernet_hf/modeling_unimernet.py +1 -326
  16. mineru/model/mfr/utils.py +338 -0
  17. mineru/model/ocr/paddleocr2pytorch/pytorch_paddle.py +103 -16
  18. mineru/model/table/rec/unet_table/main.py +1 -1
  19. mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/data/imaug/operators.py +5 -5
  20. mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/backbones/__init__.py +2 -1
  21. mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/backbones/rec_lcnetv3.py +7 -7
  22. mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/backbones/rec_pphgnetv2.py +2 -2
  23. mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/heads/__init__.py +2 -0
  24. mineru/model/utils/pytorchocr/modeling/heads/rec_ppformulanet_head.py +1383 -0
  25. mineru/model/utils/pytorchocr/modeling/heads/rec_unimernet_head.py +2631 -0
  26. mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/postprocess/rec_postprocess.py +25 -28
  27. mineru/model/utils/pytorchocr/utils/__init__.py +0 -0
  28. mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/arch_config.yaml +130 -0
  29. mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_arabic_dict.txt +747 -0
  30. mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_cyrillic_dict.txt +850 -0
  31. mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_devanagari_dict.txt +568 -0
  32. mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_ta_dict.txt +513 -0
  33. mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_te_dict.txt +540 -0
  34. mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/models_config.yml +15 -15
  35. mineru/model/utils/pytorchocr/utils/resources/pp_formulanet_arch_config.yaml +24 -0
  36. mineru/model/utils/tools/infer/__init__.py +1 -0
  37. mineru/model/{ocr/paddleocr2pytorch → utils}/tools/infer/predict_det.py +6 -3
  38. mineru/model/{ocr/paddleocr2pytorch → utils}/tools/infer/predict_rec.py +16 -25
  39. mineru/model/vlm_vllm_model/server.py +4 -1
  40. mineru/resources/header.html +2 -2
  41. mineru/utils/enum_class.py +1 -0
  42. mineru/utils/llm_aided.py +4 -2
  43. mineru/utils/ocr_utils.py +16 -0
  44. mineru/utils/table_merge.py +102 -13
  45. mineru/version.py +1 -1
  46. {mineru-2.5.4.dist-info → mineru-2.6.0.dist-info}/METADATA +32 -8
  47. mineru-2.6.0.dist-info/RECORD +195 -0
  48. mineru-2.5.4.dist-info/RECORD +0 -181
  49. /mineru/model/{ocr/paddleocr2pytorch/pytorchocr → mfr/pp_formulanet_plus_m}/__init__.py +0 -0
  50. /mineru/model/{ocr/paddleocr2pytorch/tools/infer → utils}/__init__.py +0 -0
  51. /mineru/model/{ocr/paddleocr2pytorch/pytorchocr/modeling → utils/pytorchocr}/__init__.py +0 -0
  52. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/base_ocr_v20.py +0 -0
  53. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/data/__init__.py +0 -0
  54. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/data/imaug/__init__.py +0 -0
  55. /mineru/model/{ocr/paddleocr2pytorch/pytorchocr/utils → utils/pytorchocr/modeling}/__init__.py +0 -0
  56. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/architectures/__init__.py +0 -0
  57. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/architectures/base_model.py +0 -0
  58. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/backbones/det_mobilenet_v3.py +0 -0
  59. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/backbones/rec_donut_swin.py +0 -0
  60. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/backbones/rec_hgnet.py +0 -0
  61. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/backbones/rec_mobilenet_v3.py +0 -0
  62. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/backbones/rec_mv1_enhance.py +0 -0
  63. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/backbones/rec_svtrnet.py +0 -0
  64. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/common.py +0 -0
  65. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/heads/cls_head.py +0 -0
  66. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/heads/det_db_head.py +0 -0
  67. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/heads/rec_ctc_head.py +0 -0
  68. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/heads/rec_multi_head.py +0 -0
  69. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/necks/__init__.py +0 -0
  70. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/necks/db_fpn.py +0 -0
  71. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/necks/intracl.py +0 -0
  72. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/necks/rnn.py +0 -0
  73. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/postprocess/__init__.py +0 -0
  74. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/postprocess/cls_postprocess.py +0 -0
  75. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/postprocess/db_postprocess.py +0 -0
  76. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/arabic_dict.txt +0 -0
  77. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/chinese_cht_dict.txt +0 -0
  78. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/cyrillic_dict.txt +0 -0
  79. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/devanagari_dict.txt +0 -0
  80. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/en_dict.txt +0 -0
  81. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/japan_dict.txt +0 -0
  82. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/ka_dict.txt +0 -0
  83. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/korean_dict.txt +0 -0
  84. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/latin_dict.txt +0 -0
  85. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/ppocr_keys_v1.txt +0 -0
  86. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/ppocrv4_doc_dict.txt +0 -0
  87. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/ppocrv5_dict.txt +0 -0
  88. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/ppocrv5_el_dict.txt +0 -0
  89. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/ppocrv5_en_dict.txt +0 -0
  90. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/ppocrv5_eslav_dict.txt +0 -0
  91. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/ppocrv5_korean_dict.txt +0 -0
  92. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/ppocrv5_latin_dict.txt +0 -0
  93. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/ppocrv5_th_dict.txt +0 -0
  94. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/ta_dict.txt +0 -0
  95. /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/te_dict.txt +0 -0
  96. /mineru/model/{ocr/paddleocr2pytorch → utils}/tools/__init__.py +0 -0
  97. /mineru/model/{ocr/paddleocr2pytorch → utils}/tools/infer/predict_cls.py +0 -0
  98. /mineru/model/{ocr/paddleocr2pytorch → utils}/tools/infer/predict_system.py +0 -0
  99. /mineru/model/{ocr/paddleocr2pytorch → utils}/tools/infer/pytorchocr_utility.py +0 -0
  100. {mineru-2.5.4.dist-info → mineru-2.6.0.dist-info}/WHEEL +0 -0
  101. {mineru-2.5.4.dist-info → mineru-2.6.0.dist-info}/entry_points.txt +0 -0
  102. {mineru-2.5.4.dist-info → mineru-2.6.0.dist-info}/licenses/LICENSE.md +0 -0
  103. {mineru-2.5.4.dist-info → mineru-2.6.0.dist-info}/top_level.txt +0 -0
@@ -47,7 +47,7 @@ class BaseRecLabelDecode(object):
47
47
  self.dict = {}
48
48
  for i, char in enumerate(dict_character):
49
49
  self.dict[char] = i
50
- self.character = dict_character
50
+ self.character = np.array(dict_character)
51
51
 
52
52
  def pred_reverse(self, pred):
53
53
  pred_re = []
@@ -143,27 +143,27 @@ class BaseRecLabelDecode(object):
143
143
  ):
144
144
  """ convert text-index into text-label. """
145
145
  result_list = []
146
- ignored_tokens = self.get_ignored_tokens()
147
- batch_size = len(text_index)
146
+ batch_size = text_index.shape[0]
147
+ blank_word = self.get_ignored_tokens()[0]
148
148
  for batch_idx in range(batch_size):
149
- char_list = []
150
- conf_list = []
151
- for idx in range(len(text_index[batch_idx])):
152
- if text_index[batch_idx][idx] in ignored_tokens:
153
- continue
154
- if is_remove_duplicate:
155
- # only for predict
156
- if idx > 0 and text_index[batch_idx][idx - 1] == text_index[
157
- batch_idx][idx]:
158
- continue
159
- char_list.append(self.character[int(text_index[batch_idx][
160
- idx])])
161
- if text_prob is not None:
162
- conf_list.append(text_prob[batch_idx][idx])
163
- else:
164
- conf_list.append(1)
165
- text = ''.join(char_list)
166
- result_list.append((text, np.mean(conf_list)))
149
+ probs = None if text_prob is None else np.array(text_prob[batch_idx])
150
+ sequence = text_index[batch_idx]
151
+
152
+ final_mask = sequence != blank_word
153
+ if is_remove_duplicate:
154
+ duplicate_mask = np.insert(sequence[1:] != sequence[:-1], 0, True)
155
+ final_mask &= duplicate_mask
156
+
157
+ sequence = sequence[final_mask]
158
+ probs = None if probs is None else probs[final_mask]
159
+ text = "".join(self.character[sequence])
160
+
161
+ if text_prob is not None and probs is not None and len(probs) > 0:
162
+ mean_conf = np.mean(probs)
163
+ else:
164
+ # 如果没有提供概率或最终结果为空,则默认置信度为1.0
165
+ mean_conf = 1.0
166
+ result_list.append((text, mean_conf))
167
167
  return result_list
168
168
 
169
169
  def get_ignored_tokens(self):
@@ -181,13 +181,10 @@ class CTCLabelDecode(BaseRecLabelDecode):
181
181
  use_space_char)
182
182
 
183
183
  def __call__(self, preds, label=None, return_word_box=False, *args, **kwargs):
184
- if isinstance(preds, torch.Tensor):
185
- preds = preds.numpy()
186
- preds_idx = preds.argmax(axis=2)
187
- preds_prob = preds.max(axis=2)
184
+ preds_prob, preds_idx = preds.max(axis=2)
188
185
  text = self.decode(
189
- preds_idx,
190
- preds_prob,
186
+ preds_idx.cpu().numpy(),
187
+ preds_prob.float().cpu().numpy(),
191
188
  is_remove_duplicate=True,
192
189
  return_word_box=return_word_box,
193
190
  )
@@ -199,7 +196,7 @@ class CTCLabelDecode(BaseRecLabelDecode):
199
196
 
200
197
  if label is None:
201
198
  return text
202
- label = self.decode(label)
199
+ label = self.decode(label.cpu().numpy())
203
200
  return text, label
204
201
 
205
202
  def add_special_char(self, dict_character):
File without changes
@@ -631,6 +631,136 @@ en_PP-OCRv5_rec_infer:
631
631
  name: MultiHead
632
632
  out_channels_list:
633
633
  CTCLabelDecode: 438
634
+ head_list:
635
+ - CTCHead:
636
+ Neck:
637
+ name: svtr
638
+ dims: 120
639
+ depth: 2
640
+ hidden_dims: 120
641
+ kernel_size: [ 1, 3 ]
642
+ use_guide: True
643
+ Head:
644
+ fc_decay: 0.00001
645
+ - NRTRHead:
646
+ nrtr_dim: 384
647
+ max_text_length: 25
648
+
649
+ arabic_PP-OCRv5_rec_infer:
650
+ model_type: rec
651
+ algorithm: SVTR_LCNet
652
+ Transform:
653
+ Backbone:
654
+ name: PPLCNetV3
655
+ scale: 0.95
656
+ Head:
657
+ name: MultiHead
658
+ out_channels_list:
659
+ CTCLabelDecode: 749
660
+ head_list:
661
+ - CTCHead:
662
+ Neck:
663
+ name: svtr
664
+ dims: 120
665
+ depth: 2
666
+ hidden_dims: 120
667
+ kernel_size: [ 1, 3 ]
668
+ use_guide: True
669
+ Head:
670
+ fc_decay: 0.00001
671
+ - NRTRHead:
672
+ nrtr_dim: 384
673
+ max_text_length: 25
674
+
675
+ cyrillic_PP-OCRv5_rec_infer:
676
+ model_type: rec
677
+ algorithm: SVTR_LCNet
678
+ Transform:
679
+ Backbone:
680
+ name: PPLCNetV3
681
+ scale: 0.95
682
+ Head:
683
+ name: MultiHead
684
+ out_channels_list:
685
+ CTCLabelDecode: 852
686
+ head_list:
687
+ - CTCHead:
688
+ Neck:
689
+ name: svtr
690
+ dims: 120
691
+ depth: 2
692
+ hidden_dims: 120
693
+ kernel_size: [ 1, 3 ]
694
+ use_guide: True
695
+ Head:
696
+ fc_decay: 0.00001
697
+ - NRTRHead:
698
+ nrtr_dim: 384
699
+ max_text_length: 25
700
+
701
+ devanagari_PP-OCRv5_rec_infer:
702
+ model_type: rec
703
+ algorithm: SVTR_LCNet
704
+ Transform:
705
+ Backbone:
706
+ name: PPLCNetV3
707
+ scale: 0.95
708
+ Head:
709
+ name: MultiHead
710
+ out_channels_list:
711
+ CTCLabelDecode: 570
712
+ head_list:
713
+ - CTCHead:
714
+ Neck:
715
+ name: svtr
716
+ dims: 120
717
+ depth: 2
718
+ hidden_dims: 120
719
+ kernel_size: [ 1, 3 ]
720
+ use_guide: True
721
+ Head:
722
+ fc_decay: 0.00001
723
+ - NRTRHead:
724
+ nrtr_dim: 384
725
+ max_text_length: 25
726
+
727
+ ta_PP-OCRv5_rec_infer:
728
+ model_type: rec
729
+ algorithm: SVTR_LCNet
730
+ Transform:
731
+ Backbone:
732
+ name: PPLCNetV3
733
+ scale: 0.95
734
+ Head:
735
+ name: MultiHead
736
+ out_channels_list:
737
+ CTCLabelDecode: 515
738
+ head_list:
739
+ - CTCHead:
740
+ Neck:
741
+ name: svtr
742
+ dims: 120
743
+ depth: 2
744
+ hidden_dims: 120
745
+ kernel_size: [ 1, 3 ]
746
+ use_guide: True
747
+ Head:
748
+ fc_decay: 0.00001
749
+ - NRTRHead:
750
+ nrtr_dim: 384
751
+ max_text_length: 25
752
+
753
+ te_PP-OCRv5_rec_infer:
754
+ model_type: rec
755
+ algorithm: SVTR_LCNet
756
+ Transform:
757
+ Backbone:
758
+ name: PPLCNetV3
759
+ scale: 0.95
760
+ Head:
761
+ name: MultiHead
762
+ out_channels_list:
763
+ CTCLabelDecode: 542
634
764
  head_list:
635
765
  - CTCHead:
636
766
  Neck: