openocr-python 0.0.9__py3-none-any.whl → 0.1.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. openocr/__init__.py +35 -1
  2. openocr/configs/dataset/rec/evaluation.yaml +41 -0
  3. openocr/configs/dataset/rec/ltb.yaml +9 -0
  4. openocr/configs/dataset/rec/mjsynth.yaml +11 -0
  5. openocr/configs/dataset/rec/openvino.yaml +25 -0
  6. openocr/configs/dataset/rec/ost.yaml +17 -0
  7. openocr/configs/dataset/rec/synthtext.yaml +7 -0
  8. openocr/configs/dataset/rec/test.yaml +77 -0
  9. openocr/configs/dataset/rec/textocr.yaml +13 -0
  10. openocr/configs/dataset/rec/textocr_horizontal.yaml +13 -0
  11. openocr/configs/dataset/rec/union14m_b.yaml +47 -0
  12. openocr/configs/dataset/rec/union14m_l_filtered.yaml +35 -0
  13. openocr/configs/rec/cmer/cmer.yml +127 -0
  14. openocr/configs/rec/mdiff4str/svtrv2_mdiffdecoder_base.yml +152 -0
  15. openocr/configs/rec/mdiff4str/svtrv2_mdiffdecoder_small.yml +152 -0
  16. openocr/configs/rec/unirec/focalsvtr_ardecoder_unirec.yml +114 -0
  17. openocr/configs/rec/unirec/opendoc_pipeline.yml +105 -0
  18. openocr/demo_gradio.py +28 -8
  19. openocr/demo_opendoc.py +572 -0
  20. openocr/demo_unirec.py +392 -0
  21. openocr/opendet/losses/__init__.py +5 -7
  22. openocr/opendet/preprocess/crop_resize.py +2 -1
  23. openocr/openocr.py +685 -0
  24. openocr/openrec/losses/__init__.py +8 -3
  25. openocr/openrec/losses/cmer_loss.py +12 -0
  26. openocr/openrec/losses/mdiff_loss.py +11 -0
  27. openocr/openrec/losses/unirec_loss.py +12 -0
  28. openocr/openrec/metrics/__init__.py +4 -1
  29. openocr/openrec/metrics/rec_metric_cmer.py +328 -0
  30. openocr/openrec/modeling/cmer_modeling/modeling_cmer.py +643 -0
  31. openocr/openrec/modeling/decoders/__init__.py +1 -0
  32. openocr/openrec/modeling/decoders/ctc_decoder.py +1 -1
  33. openocr/openrec/modeling/decoders/dan_decoder.py +4 -4
  34. openocr/openrec/modeling/decoders/dptr_parseq_clip_b_decoder.py +1563 -1398
  35. openocr/openrec/modeling/decoders/mdiff_decoder.py +587 -0
  36. openocr/openrec/modeling/decoders/smtr_decoder.py +99 -48
  37. openocr/openrec/modeling/unirec_modeling/configuration_unirec.py +166 -0
  38. openocr/openrec/modeling/unirec_modeling/modeling_unirec.py +433 -0
  39. openocr/openrec/optimizer/__init__.py +4 -3
  40. openocr/openrec/optimizer/lr.py +49 -0
  41. openocr/openrec/postprocess/__init__.py +2 -0
  42. openocr/openrec/postprocess/abinet_postprocess.py +1 -1
  43. openocr/openrec/postprocess/ar_postprocess.py +1 -1
  44. openocr/openrec/postprocess/cmer_postprocess.py +86 -0
  45. openocr/openrec/postprocess/cppd_postprocess.py +1 -1
  46. openocr/openrec/postprocess/igtr_postprocess.py +1 -1
  47. openocr/openrec/postprocess/lister_postprocess.py +1 -1
  48. openocr/openrec/postprocess/mgp_postprocess.py +1 -1
  49. openocr/openrec/postprocess/nrtr_postprocess.py +2 -2
  50. openocr/openrec/postprocess/smtr_postprocess.py +1 -1
  51. openocr/openrec/postprocess/srn_postprocess.py +1 -1
  52. openocr/openrec/postprocess/unirec_postprocess.py +58 -0
  53. openocr/openrec/postprocess/visionlan_postprocess.py +1 -1
  54. openocr/openrec/preprocess/__init__.py +5 -0
  55. openocr/openrec/preprocess/ce_label_encode.py +1 -1
  56. openocr/openrec/preprocess/cmer_label_encode.py +1025 -0
  57. openocr/openrec/preprocess/ctc_label_encode.py +1 -1
  58. openocr/openrec/preprocess/dptr_label_encode.py +177 -157
  59. openocr/openrec/preprocess/igtr_label_encode.py +4 -2
  60. openocr/openrec/preprocess/mdiff_label_encode.py +312 -0
  61. openocr/openrec/preprocess/rec_aug.py +128 -2
  62. openocr/openrec/preprocess/resize.py +57 -0
  63. openocr/openrec/preprocess/unirec_label_encode.py +62 -0
  64. openocr/tools/data/__init__.py +78 -55
  65. openocr/tools/data/cmer_web_dataset.py +310 -0
  66. openocr/tools/data/native_size_dataset.py +753 -0
  67. openocr/tools/data/native_size_sampler.py +158 -0
  68. openocr/tools/data/ratio_dataset_tvresize.py +2 -0
  69. openocr/tools/data/ratio_sampler.py +2 -1
  70. openocr/tools/download/download_dataset.py +38 -0
  71. openocr/tools/download/utils.py +28 -0
  72. openocr/tools/download_example_images.py +236 -0
  73. openocr/tools/engine/trainer.py +155 -39
  74. openocr/tools/eval_rec_all_ch.py +2 -2
  75. openocr/tools/infer_det.py +20 -2
  76. openocr/tools/infer_doc.py +898 -0
  77. openocr/tools/infer_doc_onnx.py +1172 -0
  78. openocr/tools/infer_e2e.py +27 -10
  79. openocr/tools/infer_rec.py +64 -15
  80. openocr/tools/infer_unirec_onnx.py +730 -0
  81. openocr/tools/to_markdown.py +468 -0
  82. openocr/tools/utils/ckpt.py +17 -5
  83. openocr/tools/utils/opendoc_onnx_utils/utils.py +1052 -0
  84. openocr_python-0.1.0.dev0.dist-info/METADATA +324 -0
  85. {openocr_python-0.0.9.dist-info → openocr_python-0.1.0.dev0.dist-info}/RECORD +89 -45
  86. {openocr_python-0.0.9.dist-info → openocr_python-0.1.0.dev0.dist-info}/WHEEL +1 -1
  87. openocr_python-0.1.0.dev0.dist-info/entry_points.txt +2 -0
  88. openocr_python-0.0.9.dist-info/METADATA +0 -149
  89. /openocr_python-0.0.9.dist-info/LICENCE → /openocr_python-0.1.0.dev0.dist-info/licenses/LICENSE +0 -0
  90. {openocr_python-0.0.9.dist-info → openocr_python-0.1.0.dev0.dist-info}/top_level.txt +0 -0
openocr/__init__.py CHANGED
@@ -8,4 +8,38 @@ __dir__ = os.path.dirname(os.path.abspath(__file__))
8
8
  sys.path.append(__dir__)
9
9
  sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..')))
10
10
 
11
- from tools.infer_e2e import OpenOCR, OpenDetector, OpenRecognizer
11
+ # from .tools.infer_e2e import OpenOCRE2E, OpenDetector, OpenRecognizer
12
+ # from .tools.infer_unirec_onnx import UniRecONNX
13
+ # from .tools.infer_doc_onnx import OpenDocONNX
14
+ from .openocr import OpenOCR, main
15
+
16
+ __version__ = '0.1.0.dev'
17
+
18
+ # Lazy import for demo interfaces to avoid initialization on import
19
+ def launch_openocr_demo(*args, **kwargs):
20
+ """Launch Gradio OCR demo"""
21
+ from .demo_gradio import launch_demo
22
+ return launch_demo(*args, **kwargs)
23
+
24
+ def launch_unirec_demo(*args, **kwargs):
25
+ """Launch UniRec demo"""
26
+ from .demo_unirec import launch_demo
27
+ return launch_demo(*args, **kwargs)
28
+
29
+ def launch_opendoc_demo(*args, **kwargs):
30
+ """Launch OpenDoc demo"""
31
+ from .demo_opendoc import launch_demo
32
+ return launch_demo(*args, **kwargs)
33
+
34
+ __all__ = [
35
+ 'OpenOCRE2E',
36
+ 'OpenDetector',
37
+ 'OpenRecognizer',
38
+ 'UniRecONNX',
39
+ 'OpenDocONNX',
40
+ 'OpenOCR',
41
+ 'main',
42
+ 'launch_openocr_demo',
43
+ 'launch_unirec_demo',
44
+ 'launch_opendoc_demo',
45
+ ]
@@ -0,0 +1,41 @@
1
+ root: ../evaluation
2
+ task: str
3
+ download_links:
4
+ # IC15_1811
5
+ - https://drive.usercontent.google.com/download?id=1eGY0kXNV1qVxeUpoGzs-ioUO-ky7msH6&authuser=0&confirm=t
6
+ - https://drive.usercontent.google.com/download?id=1BWv7aLoLAT7avY326gXP3GJF48UZpuBC&authuser=0&confirm=t
7
+ # SVT
8
+ - https://drive.usercontent.google.com/download?id=1ecEZ4cJ7dIbTCZRltE0s5KzUotQWagH-&authuser=0&confirm=t
9
+ - https://drive.usercontent.google.com/download?id=1OygBP7i9R-3Pwi6WodCcW31J8CUMugOJ&authuser=0&confirm=t
10
+ # IIIT5k
11
+ - https://drive.usercontent.google.com/download?id=1PJ9_IvIGZTS5hHdGLnpKuYKZcCO8jE0E&authuser=0&confirm=t
12
+ - https://drive.usercontent.google.com/download?id=10P3MixSBt1v8k8_6aFfziC33Z5IlM6Uf&authuser=0&confirm=t
13
+ # IC13_857
14
+ - https://drive.usercontent.google.com/download?id=1-wMHOFBXJaOaY-UD00nDn6qw2s_8R4Vd&authuser=0&confirm=t
15
+ - https://drive.usercontent.google.com/download?id=1J1QCFtOFxFKiLJIgTqZ6eRo9Y5QGqHpA&authuser=0&confirm=t
16
+ # SVTP
17
+ - https://drive.usercontent.google.com/download?id=1kckwfZkdaHG8k_FW5IIJKUaYZkF21Hza&authuser=0&confirm=t
18
+ - https://drive.usercontent.google.com/download?id=1x61lm_ea7lvIdxNPMG-jy-5W0MxtdH0N&authuser=0&confirm=t
19
+ # CUTE80
20
+ - https://drive.usercontent.google.com/download?id=1Zv_91c81tinLy5Je89HPr-5wUSnqXKIB&authuser=0&confirm=t
21
+ - https://drive.usercontent.google.com/download?id=1OuJ6QoJ9AlyNHIM9j2WedAPxTnac7kyY&authuser=0&confirm=t
22
+ filenames:
23
+ # IC15_1811
24
+ - ../evaluation/IC15_1811/data.mdb
25
+ - ../evaluation/IC15_1811/lock.mdb
26
+ # SVT
27
+ - ../evaluation/SVT/data.mdb
28
+ - ../evaluation/SVT/lock.mdb
29
+ # IIIT5k
30
+ - ../evaluation/IIIT5k/data.mdb
31
+ - ../evaluation/IIIT5k/lock.mdb
32
+ # IC13_857
33
+ - ../evaluation/IC13_857/data.mdb
34
+ - ../evaluation/IC13_857/lock.mdb
35
+ # SVTP
36
+ - ../evaluation/SVTP/data.mdb
37
+ - ../evaluation/SVTP/lock.mdb
38
+ # CUTE80
39
+ - ../evaluation/CUTE80/data.mdb
40
+ - ../evaluation/CUTE80/lock.mdb
41
+ check_validity: true
@@ -0,0 +1,9 @@
1
+ root: ../ltb
2
+ task: str
3
+ download_links:
4
+ - https://drive.usercontent.google.com/download?id=16AEA1YGTsyVB44uEjKi4ZUV1snjCYBr4&authuser=0&confirm=t
5
+ - https://drive.usercontent.google.com/download?id=1xU4OStrOaI23bPG4flWAPWn2YrQe2bmY&authuser=0&confirm=t
6
+ filenames:
7
+ - ../ltb/data.mdb
8
+ - ../ltb/lock.mdb
9
+ check_validity: true
@@ -0,0 +1,11 @@
1
+ root: ../synth
2
+ task: str
3
+ download_links:
4
+ - https://drive.usercontent.google.com/download?id=1FIoplSFZ-BKQoRDHDXsVMKa844e-K8PD&authuser=0&confirm=t
5
+ - https://drive.usercontent.google.com/download?id=1eckTvaeRtlTZvbO2orrVz-cIuIk6i87K&authuser=0&confirm=t
6
+ - https://drive.usercontent.google.com/download?id=1PBXTf-2PnmEvJBsqzJqxxRwzhAZGTiMG&authuser=0&confirm=t
7
+ filenames:
8
+ - ../synth/MJ_train.zip
9
+ - ../synth/MJ_val.zip
10
+ - ../synth/MJ_test.zip
11
+ check_validity: true
@@ -0,0 +1,25 @@
1
+ root: ../OpenVINO
2
+ task: str
3
+ download_links:
4
+ # train_1
5
+ - https://drive.usercontent.google.com/download?id=1q23QAIRTyG0t-bBm4aAwRwiqB6VUfphw&authuser=0&confirm=
6
+ # train_2
7
+ - https://drive.usercontent.google.com/download?id=1AtbaJljM68cbZqi5lcM92d9VkQUCbSqI&authuser=0&confirm=
8
+ # train_5
9
+ - https://drive.usercontent.google.com/download?id=1dejstYnJ8_sESuO_uvwi__jT1B8gPxf3&authuser=0&confirm=t
10
+ # train_f
11
+ - https://drive.usercontent.google.com/download?id=1C4akchTc7-yi1OS_sJ3KP693UKcnecke&authuser=0&confirm=t
12
+ # validation
13
+ - https://drive.usercontent.google.com/download?id=17TRzSQhuK_juAxAv3KmX0y13pQP2cz6R&authuser=0&confirm=t
14
+ filenames:
15
+ # train_1
16
+ - ../OpenVINO/train_1.zip
17
+ # train_2
18
+ - ../OpenVINO/train_2.zip
19
+ # train_5
20
+ - ../OpenVINO/train_5.zip
21
+ # train_f
22
+ - ../OpenVINO/train_f.zip
23
+ # validation
24
+ - ../OpenVINO/validation.zip
25
+ check_validity: true
@@ -0,0 +1,17 @@
1
+ root: ../OST
2
+ task: str
3
+ download_links:
4
+ # OST heavy
5
+ - https://drive.usercontent.google.com/download?id=1RGpIFbD_SRlrzZFBoVF_LGvetNx1-5pg&authuser=0&confirm=t
6
+ - https://drive.usercontent.google.com/download?id=1Th4MfDf44k0EBpIqCLqVoGRu6G-FP1hq&authuser=0&confirm=t
7
+ # OST weak
8
+ - https://drive.usercontent.google.com/download?id=1z5CTDJucUnvALG12Q4UXk1DDKJDd8WJn&authuser=0&confirm=t
9
+ - https://drive.usercontent.google.com/download?id=1V17TTkX3sjpV7v0km_F2SDCK0tL3k_ls&authuser=0&confirm=t
10
+ filenames:
11
+ # OST heavy
12
+ - ../OST/heavy/data.mdb
13
+ - ../OST/heavy/lock.mdb
14
+ # OST weak
15
+ - ../OST/weak/data.mdb
16
+ - ../OST/weak/lock.mdb
17
+ check_validity: true
@@ -0,0 +1,7 @@
1
+ root: ../synth
2
+ task: str
3
+ download_links:
4
+ - https://drive.usercontent.google.com/download?id=1T-enqkq6_l2HqrsV3da_h0oJ7CUKu_oc&authuser=0&confirm=t
5
+ filenames:
6
+ - ../synth/ST.zip
7
+ check_validity: true
@@ -0,0 +1,77 @@
1
+ root: ../test
2
+ task: str
3
+ download_links:
4
+ # IC13_857
5
+ - https://drive.usercontent.google.com/download?id=1PZSCbe6_DI8MlCqCRWXGT2PP92_frIXq&authuser=0&confirm=t
6
+ - https://drive.usercontent.google.com/download?id=1qkN7NDg0zUHxUiZHAeEatDTqlsgpFWp3&authuser=0&confirm=t
7
+ # IC15_2077
8
+ - https://drive.usercontent.google.com/download?id=1dFkY3DNbr-Mepn3TWBiA9COEJ63fGFcp&authuser=0&confirm=t
9
+ - https://drive.usercontent.google.com/download?id=1UvVwLNZ3tS1YdTBa8MulPzjeVezKaDro&authuser=0&confirm=t
10
+ # SVTP
11
+ - https://drive.usercontent.google.com/download?id=1aofeerilxJ7J3S7QxuCEXbmXTpz8Xshx&authuser=0&confirm=t
12
+ - https://drive.usercontent.google.com/download?id=1rJ1KoO4K_VUxEAUN_bMgBGzK8_JZAAno&authuser=0&confirm=t
13
+ # IIIT5k
14
+ - https://drive.usercontent.google.com/download?id=1XFO2M1Kbgwv3-iTNTmhQXAEjNmKYOeoT&authuser=0&confirm=t
15
+ - https://drive.usercontent.google.com/download?id=1stwK2hFsyaV7HHsEG9EYgnUQebNb2_nG&authuser=0&confirm=t
16
+ # COCOv1.4
17
+ - https://drive.usercontent.google.com/download?id=1Se2QSGS19xx7Gfy-SUdX9mlAOr2eYsfA&authuser=0&confirm=t
18
+ - https://drive.usercontent.google.com/download?id=1xvekFi389QfkH7yS0JIVV0QzjhUspjDv&authuser=0&confirm=t
19
+ # IC15_1811
20
+ - https://drive.usercontent.google.com/download?id=1pHsw8wrThD9EGEE6AusQLZozefSj4iyR&authuser=0&confirm=t
21
+ - https://drive.usercontent.google.com/download?id=1TXZ1qHuKAksaAlvd3qMv4IHKnN-IJW9a&authuser=0&confirm=t
22
+ # Uber
23
+ - https://drive.usercontent.google.com/download?id=1L2j6BZeLTGQ1FIl8HB_D3AFiWLltGV5r&authuser=0&confirm=t
24
+ - https://drive.usercontent.google.com/download?id=12DUj28yzLWxFO_gfMfSjTkRujYD5MNEE&authuser=0&confirm=t
25
+ # IC13_1095
26
+ - https://drive.usercontent.google.com/download?id=1fu8onMt3Z6fDLNAiHcm-sQ2qCXduE-FU&authuser=0&confirm=t
27
+ - https://drive.usercontent.google.com/download?id=1OQAZtLj8U2Cl4L0ErGFsz6vGIVTTWasD&authuser=0&confirm=t
28
+ # IC13_1015
29
+ - https://drive.usercontent.google.com/download?id=1mbsfuvWB282HYfn9tbqcj1nUDkLXcSNB&authuser=0&confirm=t
30
+ - https://drive.usercontent.google.com/download?id=1QGogU_hV-oN7iY2POutdD2LDcmK6plnV&authuser=0&confirm=t
31
+ # ArT
32
+ - https://drive.usercontent.google.com/download?id=1-53knSy-uTSngCG7wyBngVyTuTCmdnWl&authuser=0&confirm=t
33
+ - https://drive.usercontent.google.com/download?id=172EsSaf7BVaB1ORtohi-Jc_8SuUKZGGf&authuser=0&confirm=t
34
+ # SVT
35
+ - https://drive.usercontent.google.com/download?id=1p7aVUr9Yr7c4X4YUBvk2-YP28rraHjn9&authuser=0&confirm=t
36
+ - https://drive.usercontent.google.com/download?id=1ALmhvSleZ0yf-lcdbQPP3M9Zc3oqnXij&authuser=0&confirm=t
37
+ # CUTE80
38
+ - https://drive.usercontent.google.com/download?id=1Ujr4axHKnu54P2rIGUhkjdM6XlhDYrI_&authuser=0&confirm=t
39
+ - https://drive.usercontent.google.com/download?id=1DvZi9L3MqjO2zRUyCg3YvP4qMAt2bsme&authuser=0&confirm=t
40
+ filenames:
41
+ # IC13_857
42
+ - ../test/IC13_857/data.mdb
43
+ - ../test/IC13_857/lock.mdb
44
+ # IC15_2077
45
+ - ../test/IC15_2077/data.mdb
46
+ - ../test/IC15_2077/lock.mdb
47
+ # SVTP
48
+ - ../test/SVTP/data.mdb
49
+ - ../test/SVTP/lock.mdb
50
+ # IIIT5k
51
+ - ../test/IIIT5k/data.mdb
52
+ - ../test/IIIT5k/lock.mdb
53
+ # COCOv1.4
54
+ - ../test/COCOv1.4/data.mdb
55
+ - ../test/COCOv1.4/lock.mdb
56
+ # IC15_1811
57
+ - ../test/IC15_1811/data.mdb
58
+ - ../test/IC15_1811/lock.mdb
59
+ # Uber
60
+ - ../test/Uber/data.mdb
61
+ - ../test/Uber/lock.mdb
62
+ # IC13_1095
63
+ - ../test/IC13_1095/data.mdb
64
+ - ../test/IC13_1095/lock.mdb
65
+ # IC13_1015
66
+ - ../test/IC13_1015/data.mdb
67
+ - ../test/IC13_1015/lock.mdb
68
+ # ArT
69
+ - ../test/ArT/data.mdb
70
+ - ../test/ArT/lock.mdb
71
+ # SVT
72
+ - ../test/SVT/data.mdb
73
+ - ../test/SVT/lock.mdb
74
+ # CUTE80
75
+ - ../test/CUTE80/data.mdb
76
+ - ../test/CUTE80/lock.mdb
77
+ check_validity: true
@@ -0,0 +1,13 @@
1
+ root: ../TextOCR
2
+ task: str
3
+ download_links:
4
+ # train
5
+ - https://drive.usercontent.google.com/download?id=1jVjJFno4pnsU0Cp_kn4MIXQrChmELy92&authuser=0&confirm=
6
+ # val
7
+ - https://drive.usercontent.google.com/download?id=1ubIRu01MXIek6OvInu-XjaIbw6277-vw&authuser=0&confirm=t
8
+ filenames:
9
+ # train
10
+ - ../TextOCR/train.zip
11
+ # val
12
+ - ../TextOCR/val.zip
13
+ check_validity: true
@@ -0,0 +1,13 @@
1
+ root: ../TextOCR_horizontal
2
+ task: str
3
+ download_links:
4
+ # train
5
+ - https://drive.usercontent.google.com/download?id=1sWH6J11xbjQb8SH7fdG_8mIKVI81ZQy5&authuser=0&confirm=
6
+ # val
7
+ - https://drive.usercontent.google.com/download?id=1gIE-AU2o-5hvg288-bjphO6UkI5AEQ2d&authuser=0&confirm=t
8
+ filenames:
9
+ # train
10
+ - ../TextOCR_horizontal/train.zip
11
+ # val
12
+ - ../TextOCR_horizontal/val.zip
13
+ check_validity: true
@@ -0,0 +1,47 @@
1
+ root: ../u14m
2
+ task: str
3
+ download_links:
4
+ # artistic
5
+ - https://drive.usercontent.google.com/download?id=1Je2DTuFHnkXDI99yDnm9Anl5naWaCQwd&authuser=0&confirm=t
6
+ - https://drive.usercontent.google.com/download?id=1xtT_Q0juBJUIvAG55qBxoVNNTECd2usZ&authuser=0&confirm=t
7
+ # contextless
8
+ - https://drive.usercontent.google.com/download?id=1_0OzyzWhZOmGrHkayFTVrzhrQrNRDRPR&authuser=0&confirm=t
9
+ - https://drive.usercontent.google.com/download?id=1PPgC42y3xoM9bR0HQFbDYbcT3PzMdD_y&authuser=0&confirm=t
10
+ # salient
11
+ - https://drive.usercontent.google.com/download?id=1tHLMYBmTqRnxvFOTT3dfLfQiundqFWfd&authuser=0&confirm=t
12
+ - https://drive.usercontent.google.com/download?id=13NQgpAtCK0kh9M5E2pAUmKKEp6Qu5Xwj&authuser=0&confirm=t
13
+ # multi_words
14
+ - https://drive.usercontent.google.com/download?id=1IlnDKX3V_Vp9gsDGFB0xoqsVLH1vtxUI&authuser=0&confirm=t
15
+ - https://drive.usercontent.google.com/download?id=1mFFjC7C0CwevvkwFU9YeVbZBdps_3Qpb&authuser=0&confirm=t
16
+ # curve
17
+ - https://drive.usercontent.google.com/download?id=1MxhMd85cmhUtI2lmtXhZQuFk7lav0_fw&authuser=0&confirm=t
18
+ - https://drive.usercontent.google.com/download?id=1N03g-4e-kJG2mRvlM0c5TrwWAkd-iG-Q&authuser=0&confirm=t
19
+ # general
20
+ - https://drive.usercontent.google.com/download?id=1Oqt7OaycP466NWoDmoJ3FqS8YP3YRgvu&authuser=0&confirm=t
21
+ - https://drive.usercontent.google.com/download?id=1K0MrX5eYNt8IIGFHXCwg0_oI5OF5PPFO&authuser=0&confirm=t
22
+ # multi_oriented
23
+ - https://drive.usercontent.google.com/download?id=1TKZFcZPVk0ThqfF-AGhJk_OCLg0ykKbv&authuser=0&confirm=t
24
+ - https://drive.usercontent.google.com/download?id=1PAoLMUWuR7O2-7XRoKkNzQcSiznErQzD&authuser=0&confirm=t
25
+ filenames:
26
+ # artistic
27
+ - ../u14m/artistic/data.mdb
28
+ - ../u14m/artistic/lock.mdb
29
+ # contextless
30
+ - ../u14m/contextless/data.mdb
31
+ - ../u14m/contextless/lock.mdb
32
+ # salient
33
+ - ../u14m/salient/data.mdb
34
+ - ../u14m/salient/lock.mdb
35
+ # multi_words
36
+ - ../u14m/multi_words/data.mdb
37
+ - ../u14m/multi_words/lock.mdb
38
+ # curve
39
+ - ../u14m/curve/data.mdb
40
+ - ../u14m/curve/lock.mdb
41
+ # general
42
+ - ../u14m/general/data.mdb
43
+ - ../u14m/general/lock.mdb
44
+ # multi_oriented
45
+ - ../u14m/multi_oriented/data.mdb
46
+ - ../u14m/multi_oriented/lock.mdb
47
+ check_validity: true
@@ -0,0 +1,35 @@
1
+ root: ../Union14M-L-LMDB-Filtered
2
+ task: str
3
+ download_links:
4
+ # train_challenging
5
+ - https://drive.usercontent.google.com/download?id=1etwzBgGHjsFsb0sygsaRnKbanW2PMe07&authuser=0&confirm=t
6
+ - https://drive.usercontent.google.com/download?id=1ly6FJfPjItwGlVQ-ifTrzzM3rVu3Ezhr&authuser=0&confirm=t
7
+ # train_easy
8
+ - https://drive.usercontent.google.com/download?id=1_zeNluTnywIaa5h3PN-Ah9tKyByypot7&authuser=0&confirm=t
9
+ - https://drive.usercontent.google.com/download?id=1caYLeQHDidXgVBDi9IWXbO1gg__DYq9a&authuser=0&confirm=t
10
+ # train_hard
11
+ - https://drive.usercontent.google.com/download?id=1eP6s2xyYPZX9gykvWA4VSOc3Fqul_UB_&authuser=0&confirm=t
12
+ - https://drive.usercontent.google.com/download?id=1-ZlCvocX8P5uVRclUXp_5DNGLDzd16EO&authuser=0&confirm=t
13
+ # train_medium
14
+ - https://drive.usercontent.google.com/download?id=1s_CoaLNJEr-UxHYiqZ5jOcliMCFiRUUy&authuser=0&confirm=t
15
+ - https://drive.usercontent.google.com/download?id=1Wpj6WVpZ5Ily77kVwfQ18CiZBzkgmEnF&authuser=0&confirm=t
16
+ # train_normal
17
+ - https://drive.usercontent.google.com/download?id=1jPt44arlAswl9cXZjzmVcdpptdTPpJ3I&authuser=0&confirm=t
18
+ - https://drive.usercontent.google.com/download?id=1Rfc5kE03AzOUv7B_eYcBhUV8KMQ2MZ1m&authuser=0&confirm=t
19
+ filenames:
20
+ # train_challenging
21
+ - ../Union14M-L-LMDB-Filtered/train_challenging/data.mdb
22
+ - ../Union14M-L-LMDB-Filtered/train_challenging/lock.mdb
23
+ # train_easy
24
+ - ../Union14M-L-LMDB-Filtered/train_easy/data.mdb
25
+ - ../Union14M-L-LMDB-Filtered/train_easy/lock.mdb
26
+ # train_hard
27
+ - ../Union14M-L-LMDB-Filtered/train_hard/data.mdb
28
+ - ../Union14M-L-LMDB-Filtered/train_hard/lock.mdb
29
+ # train_medium
30
+ - ../Union14M-L-LMDB-Filtered/train_medium/data.mdb
31
+ - ../Union14M-L-LMDB-Filtered/train_medium/lock.mdb
32
+ # train_normal
33
+ - ../Union14M-L-LMDB-Filtered/train_normal/data.mdb
34
+ - ../Union14M-L-LMDB-Filtered/train_normal/lock.mdb
35
+ check_validity: true
@@ -0,0 +1,127 @@
1
+ Global:
2
+ device: gpu
3
+ epoch_num: 1
4
+ log_smooth_window: 20
5
+ print_batch_step: 10
6
+ output_dir: ./output/cmer
7
+ save_epoch_step: [0, 1]
8
+ save_batch_step: [30000, 10000]
9
+ eval_epoch_step: [200, 1]
10
+ eval_batch_step: [100000000, 2000]
11
+ total_iter_steps: 1000000
12
+ cal_metric_during_train: False
13
+ # pretrained_model: /path/to/pretrained/model.pth
14
+ checkpoints:
15
+ resume_from_iter: False
16
+ use_tensorboard: false
17
+ infer_img: ./cmer_test_image
18
+ max_text_length: &max_text_length 1024
19
+ use_space_char: &use_space_char True
20
+ save_res_path: ./output/cmer/infer_results.txt
21
+ use_amp: True
22
+ use_ema: False
23
+ use_transformers: True
24
+ grad_clip_val: 1.0
25
+
26
+ Optimizer:
27
+ name: AdamW
28
+ lr: 3.0e-5
29
+ weight_decay: 0.05
30
+ filter_bias_and_bn: True
31
+ fused: True
32
+
33
+ LRScheduler:
34
+ name: WarmupCosineLR
35
+ warmup_steps: 5000
36
+ eta_min: 3.0e-7
37
+
38
+
39
+ Architecture:
40
+ model_type: rec
41
+ algorithm: CMER
42
+ in_channels: 3
43
+ Transform:
44
+ Encoder:
45
+ Decoder:
46
+ out_channels: -1
47
+ vision_config:
48
+ num_layers: 8
49
+ num_heads: 16
50
+ hidden_dim: 1024
51
+ down_sample_ratio: 32
52
+ decoder_config:
53
+ attention_bias: false
54
+ attention_dropout: 0.0
55
+ bos_token_id: 2
56
+ decoder_layers: 8
57
+ eos_token_id: 3
58
+ head_dim: 16
59
+ hidden_act: "gelu"
60
+ hidden_size: 1024
61
+ initializer_range: 0.02
62
+ intermediate_size: 3072
63
+ layer_types: null
64
+ max_position_embeddings: 4096
65
+ max_window_layers: 28
66
+ num_attention_heads: 16
67
+ num_key_value_heads: 16
68
+ pad_token_id: 1
69
+ rms_norm_eps: 1.0e-06
70
+ rope_scaling: null
71
+ rope_theta: 100000.0
72
+ sliding_window: 4096
73
+ tie_word_embeddings: false
74
+ use_cache: true
75
+ use_sliding_window: true
76
+ vocab_size: 23948
77
+
78
+ Loss:
79
+ name: CMERLoss
80
+
81
+ PostProcess:
82
+ name: CMERLabelDecode
83
+
84
+ Train:
85
+ dataset:
86
+ name: CMERWebDataSet
87
+ data_dir: "/path/to/MER-17M"
88
+ max_text_length: 1024
89
+ processor: CMERProcessor
90
+ processor_source: openrec.preprocess.cmer_label_encode
91
+ transforms:
92
+ - DecodeImagePIL: # load image
93
+ img_mode: RGB
94
+ - CMERProcessor:
95
+
96
+ loader:
97
+ batch_size_per_card: 16
98
+ drop_last: True
99
+ shuffle: False
100
+ num_workers: 8
101
+ pin_memory: True
102
+
103
+
104
+ # no use
105
+ Eval:
106
+ dataset:
107
+ name: CMERWebDataSet
108
+ data_dir: "/path/to/MER-17M"
109
+ processor: CMERProcessor
110
+ processor_source: openrec.preprocess.cmer_label_encode
111
+ transforms:
112
+ - DecodeImagePIL: # load image
113
+ img_mode: RGB
114
+ - CMERProcessor:
115
+
116
+ loader:
117
+ batch_size_per_card: 8
118
+ drop_last: False
119
+ shuffle: False
120
+ num_workers: 8
121
+ pin_memory: False
122
+
123
+
124
+ Metric:
125
+ name: "CMERMetric"
126
+ args:
127
+ main_indicator: bleu
@@ -0,0 +1,152 @@
1
+ Global:
2
+ device: gpu
3
+ epoch_num: 40
4
+ log_smooth_window: 20
5
+ print_batch_step: 10
6
+ output_dir: ./output/rec/u14m_filter/svtrv2_mdiffdecoder_base/
7
+ save_epoch_step: [30, 1]
8
+ # evaluation is run every 2000 iterations
9
+ eval_batch_step: [0, 500]
10
+ eval_epoch_step: [0, 1]
11
+ cal_metric_during_train: False
12
+ pretrained_model:
13
+ checkpoints:
14
+ use_tensorboard: false
15
+ infer_img:
16
+ # for data or label process
17
+ character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt # 96en
18
+ # ./tools/utils/ppocr_keys_v1.txt # ch
19
+ max_text_length: &max_text_length 25
20
+ use_space_char: &use_space_char False
21
+ save_res_path: ./output/rec/u14m_filter/predicts_svtrv2_mdiffdecoder_base.txt
22
+ use_amp: True
23
+ grad_clip_val: 20.0
24
+
25
+ Optimizer:
26
+ name: AdamW
27
+ lr: 0.0005 # for 8gpus bs128/gpu
28
+ weight_decay: 0.05
29
+ filter_bias_and_bn: True
30
+
31
+ LRScheduler:
32
+ name: OneCycleLR
33
+ warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
34
+ cycle_momentum: False
35
+
36
+ Architecture:
37
+ model_type: rec
38
+ algorithm: MDiff4STR
39
+ in_channels: 3
40
+ Transform:
41
+ Encoder:
42
+ name: SVTRv2LNConvTwo33
43
+ use_pos_embed: False
44
+ dims: [128, 256, 384]
45
+ depths: [6, 6, 6]
46
+ num_heads: [4, 8, 12]
47
+ mixer: [['Conv','Conv','Conv','Conv','Conv','Conv'],['Conv','Conv','FGlobal','Global','Global','Global'],['Global','Global','Global','Global','Global','Global']]
48
+ local_k: [[5, 5], [5, 5], [-1, -1]]
49
+ sub_k: [[1, 1], [2, 1], [-1, -1]]
50
+ last_stage: false
51
+ feat2d: False
52
+ Decoder:
53
+ name: MDiffDecoder
54
+ num_decoder_layers: 6
55
+ nhead: 6
56
+ max_len: *max_text_length
57
+ parallel_decoding: False
58
+ autoregressive_decoding: False
59
+ low_confidence_decoding: False
60
+ random_mask_decoding: False
61
+ semi_autoregressive_decoding: False
62
+ cloze_mask_decoding: False
63
+ sampler_step: 3
64
+ sample_k: &sample_k 3
65
+ temperature: 1.0
66
+
67
+ Loss:
68
+ name: MDiffLoss
69
+
70
+ PostProcess:
71
+ name: ARLabelDecode
72
+ character_dict_path: *character_dict_path
73
+ use_space_char: *use_space_char
74
+
75
+ Metric:
76
+ name: RecMetric
77
+ main_indicator: acc
78
+ is_filter: True
79
+
80
+ Train:
81
+ dataset:
82
+ name: RatioDataSetTVResize
83
+ ds_width: True
84
+ padding: false
85
+ data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_train_challenging',
86
+ '../Union14M-L-LMDB-Filtered/filter_train_hard',
87
+ '../Union14M-L-LMDB-Filtered/filter_train_medium',
88
+ '../Union14M-L-LMDB-Filtered/filter_train_normal',
89
+ '../Union14M-L-LMDB-Filtered/filter_train_easy',
90
+ ]
91
+ transforms:
92
+ - DecodeImagePIL: # load image
93
+ img_mode: RGB
94
+ - PARSeqAugPIL:
95
+ - MDiffLabelEncode: # Class handling label
96
+ character_dict_path: *character_dict_path
97
+ use_space_char: *use_space_char
98
+ max_text_length: *max_text_length
99
+ sample_num: *sample_k
100
+ - KeepKeys:
101
+ keep_keys: ['image', 'label', 'reflect_ids', 'noisy_batch', 'masked_indices', 'p_mask', 'length'] # dataloader will return list in this order
102
+ sampler:
103
+ name: RatioSampler
104
+ scales: [[128, 32]] # w, h
105
+ # divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
106
+ first_bs: &bs 256
107
+ fix_bs: false
108
+ divided_factor: [4, 16] # w, h
109
+ is_training: True
110
+ loader:
111
+ shuffle: True
112
+ batch_size_per_card: *bs
113
+ drop_last: True
114
+ max_ratio: &max_ratio 4
115
+ num_workers: 4
116
+
117
+ Eval:
118
+ dataset:
119
+ name: RatioDataSetTVResize
120
+ ds_width: True
121
+ padding: False
122
+ data_dir_list: [
123
+ '../evaluation/CUTE80',
124
+ '../evaluation/IC13_857',
125
+ '../evaluation/IC15_1811',
126
+ '../evaluation/IIIT5k_3000',
127
+ '../evaluation/SVT',
128
+ '../evaluation/SVTP',
129
+ ]
130
+ transforms:
131
+ - DecodeImagePIL: # load image
132
+ img_mode: RGB
133
+ - ARLabelEncode: # Class handling label
134
+ character_dict_path: *character_dict_path
135
+ use_space_char: *use_space_char
136
+ max_text_length: *max_text_length
137
+ - KeepKeys:
138
+ keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
139
+ sampler:
140
+ name: RatioSampler
141
+ scales: [[128, 32]] # w, h
142
+ # divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
143
+ first_bs: *bs
144
+ fix_bs: false
145
+ divided_factor: [4, 16] # w, h
146
+ is_training: False
147
+ loader:
148
+ shuffle: False
149
+ drop_last: False
150
+ batch_size_per_card: *bs
151
+ max_ratio: *max_ratio
152
+ num_workers: 4