magic-pdf 1.2.2__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- magic_pdf/data/batch_build_dataset.py +156 -0
- magic_pdf/data/dataset.py +44 -24
- magic_pdf/data/utils.py +108 -9
- magic_pdf/dict2md/ocr_mkcontent.py +4 -3
- magic_pdf/libs/pdf_image_tools.py +11 -6
- magic_pdf/libs/performance_stats.py +12 -1
- magic_pdf/libs/version.py +1 -1
- magic_pdf/model/batch_analyze.py +175 -201
- magic_pdf/model/doc_analyze_by_custom_model.py +137 -92
- magic_pdf/model/pdf_extract_kit.py +5 -38
- magic_pdf/model/sub_modules/language_detection/utils.py +2 -4
- magic_pdf/model/sub_modules/language_detection/yolov11/YOLOv11.py +24 -19
- magic_pdf/model/sub_modules/layout/doclayout_yolo/DocLayoutYOLO.py +3 -1
- magic_pdf/model/sub_modules/mfd/yolov8/YOLOv8.py +3 -1
- magic_pdf/model/sub_modules/mfr/unimernet/Unimernet.py +31 -102
- magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/__init__.py +13 -0
- magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/modeling_unimernet.py +189 -0
- magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/__init__.py +8 -0
- magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/configuration_unimer_mbart.py +163 -0
- magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/modeling_unimer_mbart.py +2351 -0
- magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/__init__.py +9 -0
- magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/configuration_unimer_swin.py +132 -0
- magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/image_processing_unimer_swin.py +132 -0
- magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/modeling_unimer_swin.py +1084 -0
- magic_pdf/model/sub_modules/model_init.py +50 -37
- magic_pdf/model/sub_modules/model_utils.py +17 -11
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/__init__.py +1 -0
- magic_pdf/model/sub_modules/ocr/{paddleocr → paddleocr2pytorch}/ocr_utils.py +102 -97
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorch_paddle.py +193 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/base_ocr_v20.py +39 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/__init__.py +8 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/imaug/__init__.py +48 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/imaug/operators.py +418 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/__init__.py +25 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/base_model.py +105 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/__init__.py +62 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/det_mobilenet_v3.py +269 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_hgnet.py +290 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_lcnetv3.py +516 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mobilenet_v3.py +136 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mv1_enhance.py +234 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_svtrnet.py +638 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/common.py +76 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/__init__.py +43 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/cls_head.py +23 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/det_db_head.py +109 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_ctc_head.py +54 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_multi_head.py +58 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/__init__.py +29 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/db_fpn.py +456 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/intracl.py +117 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/rnn.py +228 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/__init__.py +33 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/cls_postprocess.py +20 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/db_postprocess.py +179 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/rec_postprocess.py +690 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/__init__.py +0 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/arch_config.yaml +383 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/arabic_dict.txt +162 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/chinese_cht_dict.txt +8421 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/cyrillic_dict.txt +163 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/devanagari_dict.txt +167 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/en_dict.txt +95 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/japan_dict.txt +4399 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ka_dict.txt +153 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/korean_dict.txt +3688 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/latin_dict.txt +185 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocr_keys_v1.txt +6623 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ta_dict.txt +128 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/te_dict.txt +151 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/models_config.yml +49 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/__init__.py +1 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/__init__.py +1 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_cls.py +106 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_det.py +217 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_rec.py +440 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_system.py +104 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/pytorchocr_utility.py +227 -0
- magic_pdf/model/sub_modules/table/rapidtable/rapid_table.py +10 -18
- magic_pdf/pdf_parse_union_core_v2.py +112 -74
- magic_pdf/pre_proc/ocr_dict_merge.py +9 -1
- magic_pdf/pre_proc/ocr_span_list_modify.py +51 -0
- magic_pdf/resources/model_config/model_configs.yaml +1 -1
- magic_pdf/tools/cli.py +30 -12
- magic_pdf/tools/common.py +90 -12
- {magic_pdf-1.2.2.dist-info → magic_pdf-1.3.0.dist-info}/METADATA +50 -40
- magic_pdf-1.3.0.dist-info/RECORD +202 -0
- magic_pdf/model/sub_modules/ocr/paddleocr/ppocr_273_mod.py +0 -204
- magic_pdf/model/sub_modules/ocr/paddleocr/ppocr_291_mod.py +0 -213
- magic_pdf/model/sub_modules/table/structeqtable/struct_eqtable.py +0 -37
- magic_pdf/model/sub_modules/table/tablemaster/tablemaster_paddle.py +0 -71
- magic_pdf/resources/model_config/UniMERNet/demo.yaml +0 -46
- magic_pdf/resources/model_config/layoutlmv3/layoutlmv3_base_inference.yaml +0 -351
- magic_pdf-1.2.2.dist-info/RECORD +0 -147
- /magic_pdf/model/sub_modules/{ocr/paddleocr/__init__.py → mfr/unimernet/unimernet_hf/unimer_mbart/tokenization_unimer_mbart.py} +0 -0
- /magic_pdf/model/sub_modules/{table/structeqtable → ocr/paddleocr2pytorch/pytorchocr}/__init__.py +0 -0
- /magic_pdf/model/sub_modules/{table/tablemaster → ocr/paddleocr2pytorch/pytorchocr/modeling}/__init__.py +0 -0
- {magic_pdf-1.2.2.dist-info → magic_pdf-1.3.0.dist-info}/LICENSE.md +0 -0
- {magic_pdf-1.2.2.dist-info → magic_pdf-1.3.0.dist-info}/WHEEL +0 -0
- {magic_pdf-1.2.2.dist-info → magic_pdf-1.3.0.dist-info}/entry_points.txt +0 -0
- {magic_pdf-1.2.2.dist-info → magic_pdf-1.3.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,202 @@
|
|
1
|
+
magic_pdf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
magic_pdf/pdf_parse_union_core_v2.py,sha256=qRLsYctJxthgo2zv56WDV8dU8dmaykRdsHOFiBcDoKM,40385
|
3
|
+
magic_pdf/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
+
magic_pdf/config/constants.py,sha256=fXGzANULnJWLPxwYp3BEFWx-rnorzpySMx63ffyEyq4,1272
|
5
|
+
magic_pdf/config/drop_reason.py,sha256=CqjMzBE96Qo8OeFvhhhItY8WhyqsKhE3DmyJLoQZNCc,2248
|
6
|
+
magic_pdf/config/drop_tag.py,sha256=CjveyzhAsHm_bfXB7ZZNKruw1NR-WdKD8Hz6OhQdG0A,680
|
7
|
+
magic_pdf/config/enums.py,sha256=CImYuw4sbKpq9zrj6zrrEvtdoGkjxDt8S8ByNVDpypU,89
|
8
|
+
magic_pdf/config/exceptions.py,sha256=2tsJxYUebVeimyYBGQkc9Nd1kIakTmWmz3SDcfJWy54,784
|
9
|
+
magic_pdf/config/make_content_config.py,sha256=J2eJIhVHBPGwX18zVQomQUOxs8LcfeGLxLgdBEeRvLg,248
|
10
|
+
magic_pdf/config/model_block_type.py,sha256=y5ie2ZLvo-h8OdVk8HOEha6qK0OJFtLmtOhYjrV680g,166
|
11
|
+
magic_pdf/config/ocr_content_type.py,sha256=e_7RBTdShaWvWhMO2SFou7GM521elMH_Jtn5usbHWdY,890
|
12
|
+
magic_pdf/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
|
+
magic_pdf/data/batch_build_dataset.py,sha256=rS4f50hBc7IvSqa_Gd84E_tSYpQ66BMaeZkCPd5Ajxw,4601
|
14
|
+
magic_pdf/data/dataset.py,sha256=pSz2Ukj9-bEKyg06Wsl9jBhCWeX_0moBtaKujQuYwqI,11830
|
15
|
+
magic_pdf/data/read_api.py,sha256=_faBnYE3iU_EiQLNFjVM6a8IQtOGAcSQNYBZsTSN1d8,5225
|
16
|
+
magic_pdf/data/schemas.py,sha256=oIUTBzK8Wq8Wuy8A_uilWAbVhucRvOs9_f3lSKYgcmQ,664
|
17
|
+
magic_pdf/data/utils.py,sha256=dNWIJECPXaakKocI4z5Tq6vhDDSnR-bVWQV7DO2w_A8,5335
|
18
|
+
magic_pdf/data/data_reader_writer/__init__.py,sha256=QtevUaeSivv9dQKi3Tomfn4Z0E4To0cB8qXTnglxaHc,705
|
19
|
+
magic_pdf/data/data_reader_writer/base.py,sha256=nqmAcdHOXMOJO6RAT3ILligDFaw8Op0STyCw5yOzAbI,1706
|
20
|
+
magic_pdf/data/data_reader_writer/filebase.py,sha256=VbNAxLyo0Io0j7iprJERt_TqxzHAtA7cUyPIaJstToU,2146
|
21
|
+
magic_pdf/data/data_reader_writer/multi_bucket_s3.py,sha256=4pEJ8PPd3nX7sccHobCs0mbDM8BiqDP_sAEz7CIvpNI,5938
|
22
|
+
magic_pdf/data/data_reader_writer/s3.py,sha256=9Oy1cNuXMwG1e8PgZ7AR-pn_MqHAhkgAGnyEZCYoYAA,2408
|
23
|
+
magic_pdf/data/io/__init__.py,sha256=WKaIlu8i5AWYxFCGNJcorAfMnlUQDOF8CX07Ycfnu2c,294
|
24
|
+
magic_pdf/data/io/base.py,sha256=SqNQqe30ZvoVvg7GVv-hLMCjN6yBgDyQQWeLgGsTfhQ,1118
|
25
|
+
magic_pdf/data/io/http.py,sha256=XlKB0DNf4a_uUnfgcclvaaOtmE7lmddx0DnK8A-emAM,958
|
26
|
+
magic_pdf/data/io/s3.py,sha256=hyA7sbNriQy64xd_uyJ7acN_oneQ1Pdmoc7_xcvkue8,3606
|
27
|
+
magic_pdf/dict2md/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
28
|
+
magic_pdf/dict2md/ocr_mkcontent.py,sha256=U4DqKfD4dJ2S5Z8NEAGhuLYkEOIeC-BWuArMbwi7BJs,13784
|
29
|
+
magic_pdf/filter/__init__.py,sha256=_7lSez_myu4b6cdzPpQ-NfREuqeBSq_QdyBPKVLyq2U,1505
|
30
|
+
magic_pdf/filter/pdf_classify_by_type.py,sha256=YNYXamxYgEiSujwilCNHOtrwpgJGDiQ597qJfardDVc,42354
|
31
|
+
magic_pdf/filter/pdf_meta_scan.py,sha256=eOuM0-JgaXvHolSgepGoNDJDmv_uITWLQpH_0MfnVQw,17478
|
32
|
+
magic_pdf/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
33
|
+
magic_pdf/integrations/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
34
|
+
magic_pdf/integrations/rag/api.py,sha256=t38wvIBzLje4_JzTP3dewMLqV-tQJ-A3B92Sj2oyrfs,2507
|
35
|
+
magic_pdf/integrations/rag/type.py,sha256=Z_1g_ZIOCsb7-FmZBudReIXj8nzGrgj_BygCalhJdmk,3193
|
36
|
+
magic_pdf/integrations/rag/utils.py,sha256=DCb-UhC8TElb6Eq7_6NmmETreKEk5DVE18hNL8sTEBk,11762
|
37
|
+
magic_pdf/libs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
38
|
+
magic_pdf/libs/boxbase.py,sha256=DKZXhwpJd-HE2_Du7NmkeeYW5gG-iwX3GeSWL7rYGv0,16956
|
39
|
+
magic_pdf/libs/clean_memory.py,sha256=OsQexCjmBO2i-Hv-0uYQfn72dbUWR8sTW81nG2zlNQQ,479
|
40
|
+
magic_pdf/libs/commons.py,sha256=xD0fGA16KNB5rhbl4zRrOqdrNHYwaRablT_s9W2ZTbw,1174
|
41
|
+
magic_pdf/libs/config_reader.py,sha256=9GXK7jtDyA_jxXKWZAQ69rB02v5UW4mOmo1IaOYOkW0,4374
|
42
|
+
magic_pdf/libs/convert_utils.py,sha256=Ov-lsfCLBPz_15iSJXIslBNmrSf_E_1g_XDWJy8NgO8,143
|
43
|
+
magic_pdf/libs/coordinate_transform.py,sha256=Bbop2cP2uz2ZG0U0gwd7J6EKkgABq5Rv03qf2LMPw80,429
|
44
|
+
magic_pdf/libs/draw_bbox.py,sha256=hpUmpPiQVu7UgWQa3M49dS22G6A9gcG2jpq4dQjTjzA,18331
|
45
|
+
magic_pdf/libs/hash_utils.py,sha256=VEKK9WfFoZgrPfi8kfITjLpr8Ahufs8tXh9R1Y5lAL8,404
|
46
|
+
magic_pdf/libs/json_compressor.py,sha256=6-KCu0lb5ksmyqWtQGb4QqmP-FjRb5dP7P-Hevcn68g,875
|
47
|
+
magic_pdf/libs/language.py,sha256=7RT3mxSa7jdpoC5ySd7ZddHA7TO7UsnmDOWiYZAxuyg,1433
|
48
|
+
magic_pdf/libs/local_math.py,sha256=tqljQOgqh3fZc146HYhO88JXJaiXMVwArBkk_CSGICc,177
|
49
|
+
magic_pdf/libs/markdown_utils.py,sha256=86v2BmsSV4NkoRZrH4uQD1youJhYFF3vIKr_vDeg3z0,270
|
50
|
+
magic_pdf/libs/path_utils.py,sha256=Hykw_l5CU736b2egHV9P7B-qh3QNKO4nZSGCbsi0Z8E,1043
|
51
|
+
magic_pdf/libs/pdf_check.py,sha256=7GWWvDR6g_rj_fE6XJlbTq5AFVX11ngRIzT0N18F214,3396
|
52
|
+
magic_pdf/libs/pdf_image_tools.py,sha256=_au7plmKKctpPKozBumSKgP8689q4vH1mU8VMLO0IbM,2260
|
53
|
+
magic_pdf/libs/performance_stats.py,sha256=DW-c6nUTUnWKGTONRKfpucsYZm1ake016F9K7jJwbik,2136
|
54
|
+
magic_pdf/libs/safe_filename.py,sha256=ckwcM_eqoysTb5id8czp-tXq2G9da0-l3pshZDCHQtE,236
|
55
|
+
magic_pdf/libs/version.py,sha256=F5mW07pSyGrqDNY2Ehr-UpDzpBtN-FsYU0QGZWf6PJE,22
|
56
|
+
magic_pdf/model/__init__.py,sha256=sa-dO2k-TLy25I2gRrzjm_cQeYfzMf-pLwBJHkIxGo0,51
|
57
|
+
magic_pdf/model/batch_analyze.py,sha256=iE8WzD13edqyPcqarKJfgUxPxudXGNRck_FDtsPGYdg,11212
|
58
|
+
magic_pdf/model/doc_analyze_by_custom_model.py,sha256=cnfjCv9fTGYOgjyRBVQWHc57qbbK7PsoLKFLEQhIcho,10057
|
59
|
+
magic_pdf/model/magic_model.py,sha256=yZKWo_wRck_-YLyFGRiUHGar8sV1Y6458BFLbyBAt74,30682
|
60
|
+
magic_pdf/model/model_list.py,sha256=aqfEJlEfbib3D3ISrxc0Coh6SbffYh8Yq2FlQN35_zA,213
|
61
|
+
magic_pdf/model/pdf_extract_kit.py,sha256=C3sKqRkoD20Ldmo-cqGn1zRldEL-l5NYqcFvd05_fGU,10845
|
62
|
+
magic_pdf/model/pp_structure_v2.py,sha256=NcqFWL4nUtjl82MFak8HX_8V3i4Aw_fK4dATrIp5uGs,3840
|
63
|
+
magic_pdf/model/sub_modules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
64
|
+
magic_pdf/model/sub_modules/model_init.py,sha256=e2t95kxiuU47luOHByDokbQ2uob6oQhEA4b6UGVfMjY,8303
|
65
|
+
magic_pdf/model/sub_modules/model_utils.py,sha256=f8ceLYSHsNkTaAVUokYhAgsaW7hpQa8KwLseMXuKTF4,2638
|
66
|
+
magic_pdf/model/sub_modules/language_detection/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
67
|
+
magic_pdf/model/sub_modules/language_detection/utils.py,sha256=Q__v6DdNJztt8GhVSuSB0txahVq-aj8RLhWn2VScx4w,3047
|
68
|
+
magic_pdf/model/sub_modules/language_detection/yolov11/YOLOv11.py,sha256=7T8eFl8zlZe6F0j0jSB3jrwOapDft320JQ1fuWxpvAY,5230
|
69
|
+
magic_pdf/model/sub_modules/language_detection/yolov11/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
70
|
+
magic_pdf/model/sub_modules/layout/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
71
|
+
magic_pdf/model/sub_modules/layout/doclayout_yolo/DocLayoutYOLO.py,sha256=c-OmqY9DOgkIWqiDrNGe0-E5RS6-diwnPPsN5pbPw-s,2346
|
72
|
+
magic_pdf/model/sub_modules/layout/doclayout_yolo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
73
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
74
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/backbone.py,sha256=1cvSCczgvwOLdvzWyqttoYPMHsXmnzI3w9abJ1bAXoM,7106
|
75
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/beit.py,sha256=e-INve6bpEx_0FM5wYbQcEcelc79tzDlCljTVHaGt1w,30450
|
76
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/deit.py,sha256=Qyn5UWutZ-0GJczexCh-oMMSXtav_g3ovumMFJp8Om4,17000
|
77
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/model_init.py,sha256=PhWqqRwgSSmXTaUlLIjGqnBUNjzxwYDKgMzKjnxNy1k,4528
|
78
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/rcnn_vl.py,sha256=nI4G6AeLRmjavNhs5S2USKh0ozn-ftMuW0F0m_eVy3c,6649
|
79
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/visualizer.py,sha256=H6UYeCCbaN2gbDjGthTkKkPoyWxfE3azRjsR7fVBwnw,49797
|
80
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/__init__.py,sha256=C4N9gXJr7is7uznvQefQ7dOhlzEhdp86Lgh-7p0Y-08,186
|
81
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/__init__.py,sha256=W7V62JOh12NdMZj2H1sde3Il0AqW2VKplmHEsLle6tg,76
|
82
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/cord.py,sha256=jR_lRZxy8SeEvTK3FdlXmQHF0kefJf7ZqwM_8pvyI5E,8153
|
83
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/data_collator.py,sha256=M2TE47BprHSuQJYcoMeWOSpqkr_nh8VK6t2l26XWmxg,6279
|
84
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/funsd.py,sha256=Ez9tMeruHncJlkKQ7iRGBB9Pk1uWtgxlGeqs-sOmIG0,5214
|
85
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/image_utils.py,sha256=vuNOMzYw_h7jmaD2XUqkGlrjDEPB7XUts16GRICBmG4,10334
|
86
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/xfund.py,sha256=6jLKyc_4VhbHY4YEzBXm5RkPdsd9ldnUGXFZBLiJ-_s,8270
|
87
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/__init__.py,sha256=d5bm3Rx-jTrgfJDWrzD7t5R5CdHfug9dCNvUEneIYW4,190
|
88
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/__init__.py,sha256=a04w_C0B4P9jF-3I_tXCj3fLmfFQR5XSKGbhgGm--pM,1216
|
89
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/configuration_layoutlmv3.py,sha256=CJBcAmmLeRFVMN1YjWefoUW7hk0KXek0Eb_tergKl4Y,2150
|
90
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/modeling_layoutlmv3.py,sha256=mdo8tO-DrJcv0Lbk9Pp98n3NQXYOnFFyXQWjU7t35kA,54633
|
91
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3.py,sha256=diKlrfxYjKAmYrUgjYdx-FXLh-swShC3tl-EBX1b3oI,1197
|
92
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3_fast.py,sha256=0lxiG69_fGpSSBYA9CBLnDa_qqa1rInZ0pJpqBwZ0Yw,1372
|
93
|
+
magic_pdf/model/sub_modules/mfd/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
94
|
+
magic_pdf/model/sub_modules/mfd/yolov8/YOLOv8.py,sha256=Wm6jh70l_q_P_LQxa_pmLbg9OnHZyEKF1Dfln7Y2c8w,1114
|
95
|
+
magic_pdf/model/sub_modules/mfd/yolov8/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
96
|
+
magic_pdf/model/sub_modules/mfr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
97
|
+
magic_pdf/model/sub_modules/mfr/unimernet/Unimernet.py,sha256=hq48TP1Ac8Y4FlK7GamnU-WZTQfdZotxBKuFhOIjrcM,5349
|
98
|
+
magic_pdf/model/sub_modules/mfr/unimernet/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
99
|
+
magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/__init__.py,sha256=kHcISG8GS4TWJW34SCJCei1jxo6HxvO00aC0dqyNFgI,413
|
100
|
+
magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/modeling_unimernet.py,sha256=F_rwlFytWOwOntNhhZoUqFAyXgzvq_HVjKyBizwlnjo,7913
|
101
|
+
magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/__init__.py,sha256=9T2rBpyGX5YFQYj89-mWujRokOuz4xgNreBuegcg1_c,228
|
102
|
+
magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/configuration_unimer_mbart.py,sha256=hwRar3pqN_cVs3TRTNSuhB4wacBncfJ-qvaTajRb0xc,7934
|
103
|
+
magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/modeling_unimer_mbart.py,sha256=Y5nwqDRzSb8zAbY1U0HGJGVtFggJW1zATP8RdnasNcA,113605
|
104
|
+
magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/tokenization_unimer_mbart.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
105
|
+
magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/__init__.py,sha256=8_1DKwDCDUBkeHYiJJ6MZnodZBsatHbqhygh11s9eEA,267
|
106
|
+
magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/configuration_unimer_swin.py,sha256=OX3eRUKBnKCXtxJOG3sdNoB1IV-Z7efgWU-gaclYOGA,5780
|
107
|
+
magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/image_processing_unimer_swin.py,sha256=tlcCxOJVRus_35lCz4faMVZ8ulAjGxK5Yu9Y_IeHsDY,4406
|
108
|
+
magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/modeling_unimer_swin.py,sha256=Q_fdmFHUBtEoAfWp9aowdwTCE2MIFMOPbYjoSyXK2iU,48929
|
109
|
+
magic_pdf/model/sub_modules/ocr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
110
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
111
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/ocr_utils.py,sha256=3qxu0lAjqzZQ2Ci-C_wz_YSakyq_5-KnckA3-5bICTM,12589
|
112
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorch_paddle.py,sha256=FaPo02L1IJKybGYfydsohOiHstJIL8d5UKzGck2tYvk,7283
|
113
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
114
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/base_ocr_v20.py,sha256=5bI7MAu65r-vn28krwdJ6pjZMkEvWjspE7EQaTsRERw,1319
|
115
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/__init__.py,sha256=YYu3c-W4fgEErxxDM98uQ3oWwPEh-6w75LY4zcj4VtM,199
|
116
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/imaug/__init__.py,sha256=c4H0gXPRweQ0wMFnkrCLTR6MrtG-e4kUinxwq2G1V9U,1480
|
117
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/imaug/operators.py,sha256=uwS8t-6hVIBag3jJd3yiBM4DW_dEiynp22_WFmVppjA,14205
|
118
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
119
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/common.py,sha256=3r2jTvPYQS4IgTvIqR4l6bBVwR7jn-87rSmpv3tlqxI,2294
|
120
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/__init__.py,sha256=DCA9FS4mE5oCHDlBhUrkYLdxFeQIbhPj4P8oJ_gRZD8,832
|
121
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/base_model.py,sha256=RhV2Dm-os08kCFylT57zRu72Hq_RJdFy3xQe1MPaCuU,3588
|
122
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/__init__.py,sha256=TOLvLNeJhnAOus0D5jAq2TM67vLrtbzYlThTOhc-idc,1960
|
123
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/det_mobilenet_v3.py,sha256=r0gWnA1Xmt0Zw4FQLx7kf-WWwZd_26PfNzhM05drcuE,8334
|
124
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_hgnet.py,sha256=UsIbzqN_koyGoSh1TA9r27SggpHbeKS3HmmS-A2Aw04,8341
|
125
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_lcnetv3.py,sha256=3PNQG9B1cHe1hAg0NhcFR8p87rZnmH0jTBcfid-ZnKQ,15995
|
126
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mobilenet_v3.py,sha256=mJmE6xGpjHZH2Vaw16LlIlqRFFm9R9yRsSJEa3Yn3nw,4822
|
127
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mv1_enhance.py,sha256=K4p9KFYNmltV3y3QsxHIASNxoqlGtxgAoCxeFofyCmw,6726
|
128
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_svtrnet.py,sha256=AIaUZ3IWBkRz2pWmanBjS0QdJcYnimMSV4MWofNpQcg,20222
|
129
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/__init__.py,sha256=7vl3hyn6Ug_DNtqdfUL1Hz9scA_ptch2FCDzNddpQgU,1282
|
130
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/cls_head.py,sha256=puIy5GlUtAKer6eS4HWKu07PzRd-HlDAqIz5WqjBHaA,596
|
131
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/det_db_head.py,sha256=-k8bpuGQw_xIVDsumrfimOxg0O-oP2MOAyDJTjU70Ro,3633
|
132
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_ctc_head.py,sha256=ywyk5RJgUITdXvrUZk2yBSWKsaZIqnTofdFbuQUtwjU,1311
|
133
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_multi_head.py,sha256=K40SMA8tAVWu-3fwgfh3jGWeVFAdVnMyHjeZeI9OO7Q,2016
|
134
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/__init__.py,sha256=634L1y-QWv5P8opNiSmKvQEx3Uskc20RG8DYiCdbl8U,1030
|
135
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/db_fpn.py,sha256=TLF2pSyvRC0oPzL0eVyNlg3W6Zvfr4J8fD1nziVB7uI,14146
|
136
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/intracl.py,sha256=w2QdwdI9BpiW92VS4mqL31sVERIbY53TfbD5Q6okiaY,3410
|
137
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/rnn.py,sha256=olZwnKQexVlMx0gJi0FVYAm38TxNn5BM6F-OrdHKEgk,7019
|
138
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/__init__.py,sha256=iC1Ol6CTxRWZBUyQ_5IVMR6kIurv9WJPOWWo7NAuZBA,1183
|
139
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/cls_postprocess.py,sha256=1VVWXT_b1vhGb7PGvqyfUQ3Ip7LupH62vPva98GtjTA,685
|
140
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/db_postprocess.py,sha256=EAc-cmhXtxLfFA6h5C871hIcDfXsjPDISiVCKwxh-qM,6339
|
141
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/rec_postprocess.py,sha256=HiHNr4bhW5U1j4pYoyi8fPOaFsn8TUc4nSB6q8chfV4,26899
|
142
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
143
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/arch_config.yaml,sha256=eEzg5D5L3MHFL4H02gZnxdDiqtSCUzZDnt5pqDAmgCI,6980
|
144
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/models_config.yml,sha256=GOtAGMAretviqDXak409PPav7qHYMDBwSs9wxlSANRA,1388
|
145
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/arabic_dict.txt,sha256=xbaXD14RWk0Vpc7fAHpephuszp1j-Qi3IWC4VrFKu70,407
|
146
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/chinese_cht_dict.txt,sha256=gyVR_uHy-8l1CHctgevcjboSwA3pejXHHJ3fQ92sGoM,33443
|
147
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/cyrillic_dict.txt,sha256=NpqCxsjEeXhKXXJkSLg7Hq-1_vCkEppeqjkpYl3c0TI,410
|
148
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/devanagari_dict.txt,sha256=tfG-bYu_8aGfuWxdTKlqQjOAI0u30s4OB7WDittNGOo,508
|
149
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/en_dict.txt,sha256=VmLfnS0D8OjKDTsGSdasurkEtqFLPTUhRjxxw3xmjOM,190
|
150
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/japan_dict.txt,sha256=Hc_LQe7JBXapRbMITyKt4RztUG4k8Uh5JFsHFpjzCOg,17332
|
151
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ka_dict.txt,sha256=-tP3ZZQyde7CE0pvvJtSeFQmZBEE1OfbOhWdxz80Hd4,452
|
152
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/korean_dict.txt,sha256=qh_ciuj3zUCg7E7bRy6wQh4RQn5sz-6ZFUQHQsGLCiA,14480
|
153
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/latin_dict.txt,sha256=jm1ONil4jDXDH35TAofWFHtUm7eiZb1nCLsoETRCniw,468
|
154
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocr_keys_v1.txt,sha256=KLI2KtSrLcOHaapy_rU146nds_0qdYWgWSDmOTsdx_c,26249
|
155
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ta_dict.txt,sha256=6T5pSBSv2f8ekYtvS7Qmf7TGWpNE7l10ZPkTW5DAonA,352
|
156
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/te_dict.txt,sha256=7plGpg13AZd0dOiYg2lKTKIOqjhoojM0v3lA3NAI8Pk,429
|
157
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/__init__.py,sha256=xEqR65Z8YOzOLorLjK0LCHos2zX-tCuxSrxndjU00hE,49
|
158
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
159
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_cls.py,sha256=8RmKl1vejnZl65caHZNV2ta6hMsg5B_LE-FuqCO8T8A,4225
|
160
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_det.py,sha256=cRBKE0blzryj3Ar6yM0FKKgxmZdgMc44NDNl1S2wiRs,9136
|
161
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_rec.py,sha256=_fLTWjEmDZwXC-zzPT37PHO-nNlEvafemo2CyPJS7_w,19216
|
162
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_system.py,sha256=hkegkn6hq2v2zqHVAP615-k-fkTS8swRYSbZeoqmSI8,3822
|
163
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/pytorchocr_utility.py,sha256=i1PFN-_kefJUUZ4Vk7igs1TU8gfErTDlDXY6-8Uaurw,9323
|
164
|
+
magic_pdf/model/sub_modules/reading_oreder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
165
|
+
magic_pdf/model/sub_modules/reading_oreder/layoutreader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
166
|
+
magic_pdf/model/sub_modules/reading_oreder/layoutreader/helpers.py,sha256=IVUFcNMDF3-kio-BIxjppHnWS3eHPqvvNihIw2fbIFM,4372
|
167
|
+
magic_pdf/model/sub_modules/reading_oreder/layoutreader/xycut.py,sha256=ezNSq_Y4UXiztB58hbXJsjTJlOBqWIjuW5A2uLSaZSo,7349
|
168
|
+
magic_pdf/model/sub_modules/table/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
169
|
+
magic_pdf/model/sub_modules/table/table_utils.py,sha256=B9BC4f5EEjlt2ldYxrIC8Wic2Tz3t3gTJeEyK3ggrOU,282
|
170
|
+
magic_pdf/model/sub_modules/table/rapidtable/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
171
|
+
magic_pdf/model/sub_modules/table/rapidtable/rapid_table.py,sha256=Vh1rWxj6t9PrXw7yhTNdJvTHy2wSVITTbGG2fD0dOZg,2515
|
172
|
+
magic_pdf/operators/__init__.py,sha256=liU2-WYUvsQ1G4PYBppyvokS9z5IjrnlVMtoBAC1REI,2630
|
173
|
+
magic_pdf/operators/models.py,sha256=mRqbCVrxxaUVDpEBAsXaK7EL1M-goICkE1W0FYgewio,5305
|
174
|
+
magic_pdf/operators/pipes.py,sha256=XgBgisKQd_ruW-3Tw4v5LhqloZUHgn2aFcpi_q8LbCs,6767
|
175
|
+
magic_pdf/post_proc/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
176
|
+
magic_pdf/post_proc/llm_aided.py,sha256=yzhu2cCpUZjdwf3v0swYDgSs9VWIfMAoXepYIP1EMZs,6367
|
177
|
+
magic_pdf/post_proc/para_split_v3.py,sha256=SPN_VVGvFX5KpFMGw9OzgoE-kTZq-FF036i0cIImGH8,16975
|
178
|
+
magic_pdf/pre_proc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
179
|
+
magic_pdf/pre_proc/construct_page_dict.py,sha256=OFmq5XRKi6fYIo-lmGlL-NB16Sf0egzsfEx-fT2uYrc,660
|
180
|
+
magic_pdf/pre_proc/cut_image.py,sha256=NDzbxwD7z7Tb4uAxL4KR6LzURFdN1Tzr4nPvj-VmEqc,1225
|
181
|
+
magic_pdf/pre_proc/ocr_detect_all_bboxes.py,sha256=nt88ttXCEI_1ihAF7HU15SQjwM69V-iJmk-L_nyzA6o,9328
|
182
|
+
magic_pdf/pre_proc/ocr_dict_merge.py,sha256=PscKGF0uJIjMxZRM69FLUs1SZO_wOswDQQV1f0M2xAo,5627
|
183
|
+
magic_pdf/pre_proc/ocr_span_list_modify.py,sha256=bs5RLvk4kIyx9_Hqq0FU3AGPPxE8Sxs97Uwlf1sBryM,4725
|
184
|
+
magic_pdf/pre_proc/remove_bbox_overlap.py,sha256=mcdxAh4P56NZ3Ij8h3vW8qC_SrszfXflVWuWUuUiTNg,3089
|
185
|
+
magic_pdf/resources/fasttext-langdetect/lid.176.ftz,sha256=jzRyz-hzintgmejpmcPL-uDc0VaWqsfXc4qAOdtgPoM,938013
|
186
|
+
magic_pdf/resources/model_config/model_configs.yaml,sha256=SoyoDmXYQX4ltKclG6ZcpdSA8dX5auSgILBvaA0wPkg,325
|
187
|
+
magic_pdf/resources/yolov11-langdetect/yolo_v11_ft.pt,sha256=dV4lcudF8wimEbAooYbvISvFhrXjp9i0rMRqv9VW6hY,3204667
|
188
|
+
magic_pdf/spark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
189
|
+
magic_pdf/spark/spark_api.py,sha256=BYO6zlRW0cEnIUB3ZzNQTu_LsPHEVitqiUN7gy3x_wo,1124
|
190
|
+
magic_pdf/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
191
|
+
magic_pdf/tools/cli.py,sha256=_oa-M5Hcopa5RZudVzrEip2W8pa9422Lmat7tMBJO5M,5171
|
192
|
+
magic_pdf/tools/cli_dev.py,sha256=3RbubfTIagWoFYdu8wSDanr-BJDjFGeDet55jTy7He0,3948
|
193
|
+
magic_pdf/tools/common.py,sha256=LoUz6Y36_U2odZqzBNKXngFNa6plf01U7_5jlDAFXaQ,12313
|
194
|
+
magic_pdf/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
195
|
+
magic_pdf/utils/annotations.py,sha256=82ou3uELNbQWa9hOFFkVt0gsIskAKf5msCv5J2IJ5V0,211
|
196
|
+
magic_pdf/utils/office_to_pdf.py,sha256=7aj-Ls2v8saD-Rgu_t3FIc-J3Ka9wnmiEH5zY-H1Vxs,729
|
197
|
+
magic_pdf-1.3.0.dist-info/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
|
198
|
+
magic_pdf-1.3.0.dist-info/METADATA,sha256=Z4WBVqCxPlHu7TC2pLGhrCfVpp0Im11NBZNowFQsMyM,41909
|
199
|
+
magic_pdf-1.3.0.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
200
|
+
magic_pdf-1.3.0.dist-info/entry_points.txt,sha256=wXwYke3j8fqDQTocUspL-CqDUEv3Tfcwp09fM8dZAhA,98
|
201
|
+
magic_pdf-1.3.0.dist-info/top_level.txt,sha256=J9I0AzmHWGkp9c6DL8Oe4mEx3yYphLzkRn4H25Lg1rE,10
|
202
|
+
magic_pdf-1.3.0.dist-info/RECORD,,
|
@@ -1,204 +0,0 @@
|
|
1
|
-
import copy
|
2
|
-
import platform
|
3
|
-
import time
|
4
|
-
import cv2
|
5
|
-
import numpy as np
|
6
|
-
import torch
|
7
|
-
|
8
|
-
from paddleocr import PaddleOCR
|
9
|
-
from ppocr.utils.logging import get_logger
|
10
|
-
from ppocr.utils.utility import alpha_to_color, binarize_img
|
11
|
-
from tools.infer.predict_system import sorted_boxes
|
12
|
-
from tools.infer.utility import get_rotate_crop_image, get_minarea_rect_crop
|
13
|
-
|
14
|
-
from magic_pdf.model.sub_modules.ocr.paddleocr.ocr_utils import update_det_boxes, merge_det_boxes, check_img, \
|
15
|
-
ONNXModelSingleton
|
16
|
-
|
17
|
-
logger = get_logger()
|
18
|
-
|
19
|
-
|
20
|
-
class ModifiedPaddleOCR(PaddleOCR):
|
21
|
-
def __init__(self, *args, **kwargs):
|
22
|
-
|
23
|
-
super().__init__(*args, **kwargs)
|
24
|
-
self.lang = kwargs.get('lang', 'ch')
|
25
|
-
# 在cpu架构为arm且不支持cuda时调用onnx、
|
26
|
-
if not torch.cuda.is_available() and platform.machine() in ['arm64', 'aarch64']:
|
27
|
-
self.use_onnx = True
|
28
|
-
onnx_model_manager = ONNXModelSingleton()
|
29
|
-
self.additional_ocr = onnx_model_manager.get_onnx_model(**kwargs)
|
30
|
-
else:
|
31
|
-
self.use_onnx = False
|
32
|
-
|
33
|
-
def ocr(self,
|
34
|
-
img,
|
35
|
-
det=True,
|
36
|
-
rec=True,
|
37
|
-
cls=True,
|
38
|
-
bin=False,
|
39
|
-
inv=False,
|
40
|
-
alpha_color=(255, 255, 255),
|
41
|
-
mfd_res=None,
|
42
|
-
):
|
43
|
-
"""
|
44
|
-
OCR with PaddleOCR
|
45
|
-
args:
|
46
|
-
img: img for OCR, support ndarray, img_path and list or ndarray
|
47
|
-
det: use text detection or not. If False, only rec will be exec. Default is True
|
48
|
-
rec: use text recognition or not. If False, only det will be exec. Default is True
|
49
|
-
cls: use angle classifier or not. Default is True. If True, the text with rotation of 180 degrees can be recognized. If no text is rotated by 180 degrees, use cls=False to get better performance. Text with rotation of 90 or 270 degrees can be recognized even if cls=False.
|
50
|
-
bin: binarize image to black and white. Default is False.
|
51
|
-
inv: invert image colors. Default is False.
|
52
|
-
alpha_color: set RGB color Tuple for transparent parts replacement. Default is pure white.
|
53
|
-
"""
|
54
|
-
assert isinstance(img, (np.ndarray, list, str, bytes))
|
55
|
-
if isinstance(img, list) and det == True:
|
56
|
-
logger.error('When input a list of images, det must be false')
|
57
|
-
exit(0)
|
58
|
-
if cls == True and self.use_angle_cls == False:
|
59
|
-
pass
|
60
|
-
# logger.warning(
|
61
|
-
# 'Since the angle classifier is not initialized, it will not be used during the forward process'
|
62
|
-
# )
|
63
|
-
|
64
|
-
img = check_img(img)
|
65
|
-
# for infer pdf file
|
66
|
-
if isinstance(img, list):
|
67
|
-
if self.page_num > len(img) or self.page_num == 0:
|
68
|
-
self.page_num = len(img)
|
69
|
-
imgs = img[:self.page_num]
|
70
|
-
else:
|
71
|
-
imgs = [img]
|
72
|
-
|
73
|
-
def preprocess_image(_image):
|
74
|
-
_image = alpha_to_color(_image, alpha_color)
|
75
|
-
if inv:
|
76
|
-
_image = cv2.bitwise_not(_image)
|
77
|
-
if bin:
|
78
|
-
_image = binarize_img(_image)
|
79
|
-
return _image
|
80
|
-
|
81
|
-
if det and rec:
|
82
|
-
ocr_res = []
|
83
|
-
for img in imgs:
|
84
|
-
img = preprocess_image(img)
|
85
|
-
dt_boxes, rec_res, _ = self.__call__(img, cls, mfd_res=mfd_res)
|
86
|
-
if not dt_boxes and not rec_res:
|
87
|
-
ocr_res.append(None)
|
88
|
-
continue
|
89
|
-
tmp_res = [[box.tolist(), res]
|
90
|
-
for box, res in zip(dt_boxes, rec_res)]
|
91
|
-
ocr_res.append(tmp_res)
|
92
|
-
return ocr_res
|
93
|
-
elif det and not rec:
|
94
|
-
ocr_res = []
|
95
|
-
for img in imgs:
|
96
|
-
img = preprocess_image(img)
|
97
|
-
if self.lang in ['ch'] and self.use_onnx:
|
98
|
-
dt_boxes, elapse = self.additional_ocr.text_detector(img)
|
99
|
-
else:
|
100
|
-
dt_boxes, elapse = self.text_detector(img)
|
101
|
-
if dt_boxes is None:
|
102
|
-
ocr_res.append(None)
|
103
|
-
continue
|
104
|
-
dt_boxes = sorted_boxes(dt_boxes)
|
105
|
-
# merge_det_boxes 和 update_det_boxes 都会把poly转成bbox再转回poly,因此需要过滤所有倾斜程度较大的文本框
|
106
|
-
dt_boxes = merge_det_boxes(dt_boxes)
|
107
|
-
if mfd_res:
|
108
|
-
bef = time.time()
|
109
|
-
dt_boxes = update_det_boxes(dt_boxes, mfd_res)
|
110
|
-
aft = time.time()
|
111
|
-
logger.debug("split text box by formula, new dt_boxes num : {}, elapsed : {}".format(
|
112
|
-
len(dt_boxes), aft - bef))
|
113
|
-
tmp_res = [box.tolist() for box in dt_boxes]
|
114
|
-
ocr_res.append(tmp_res)
|
115
|
-
return ocr_res
|
116
|
-
else:
|
117
|
-
ocr_res = []
|
118
|
-
cls_res = []
|
119
|
-
for img in imgs:
|
120
|
-
if not isinstance(img, list):
|
121
|
-
img = preprocess_image(img)
|
122
|
-
img = [img]
|
123
|
-
if self.use_angle_cls and cls:
|
124
|
-
img, cls_res_tmp, elapse = self.text_classifier(img)
|
125
|
-
if not rec:
|
126
|
-
cls_res.append(cls_res_tmp)
|
127
|
-
if self.lang in ['ch'] and self.use_onnx:
|
128
|
-
rec_res, elapse = self.additional_ocr.text_recognizer(img)
|
129
|
-
else:
|
130
|
-
rec_res, elapse = self.text_recognizer(img)
|
131
|
-
ocr_res.append(rec_res)
|
132
|
-
if not rec:
|
133
|
-
return cls_res
|
134
|
-
return ocr_res
|
135
|
-
|
136
|
-
def __call__(self, img, cls=True, mfd_res=None):
|
137
|
-
time_dict = {'det': 0, 'rec': 0, 'cls': 0, 'all': 0}
|
138
|
-
|
139
|
-
if img is None:
|
140
|
-
logger.debug("no valid image provided")
|
141
|
-
return None, None, time_dict
|
142
|
-
|
143
|
-
start = time.time()
|
144
|
-
ori_im = img.copy()
|
145
|
-
if self.lang in ['ch'] and self.use_onnx:
|
146
|
-
dt_boxes, elapse = self.additional_ocr.text_detector(img)
|
147
|
-
else:
|
148
|
-
dt_boxes, elapse = self.text_detector(img)
|
149
|
-
time_dict['det'] = elapse
|
150
|
-
|
151
|
-
if dt_boxes is None:
|
152
|
-
logger.debug("no dt_boxes found, elapsed : {}".format(elapse))
|
153
|
-
end = time.time()
|
154
|
-
time_dict['all'] = end - start
|
155
|
-
return None, None, time_dict
|
156
|
-
else:
|
157
|
-
logger.debug("dt_boxes num : {}, elapsed : {}".format(
|
158
|
-
len(dt_boxes), elapse))
|
159
|
-
img_crop_list = []
|
160
|
-
|
161
|
-
dt_boxes = sorted_boxes(dt_boxes)
|
162
|
-
|
163
|
-
# merge_det_boxes 和 update_det_boxes 都会把poly转成bbox再转回poly,因此需要过滤所有倾斜程度较大的文本框
|
164
|
-
dt_boxes = merge_det_boxes(dt_boxes)
|
165
|
-
|
166
|
-
if mfd_res:
|
167
|
-
bef = time.time()
|
168
|
-
dt_boxes = update_det_boxes(dt_boxes, mfd_res)
|
169
|
-
aft = time.time()
|
170
|
-
logger.debug("split text box by formula, new dt_boxes num : {}, elapsed : {}".format(
|
171
|
-
len(dt_boxes), aft - bef))
|
172
|
-
|
173
|
-
for bno in range(len(dt_boxes)):
|
174
|
-
tmp_box = copy.deepcopy(dt_boxes[bno])
|
175
|
-
if self.args.det_box_type == "quad":
|
176
|
-
img_crop = get_rotate_crop_image(ori_im, tmp_box)
|
177
|
-
else:
|
178
|
-
img_crop = get_minarea_rect_crop(ori_im, tmp_box)
|
179
|
-
img_crop_list.append(img_crop)
|
180
|
-
if self.use_angle_cls and cls:
|
181
|
-
img_crop_list, angle_list, elapse = self.text_classifier(
|
182
|
-
img_crop_list)
|
183
|
-
time_dict['cls'] = elapse
|
184
|
-
logger.debug("cls num : {}, elapsed : {}".format(
|
185
|
-
len(img_crop_list), elapse))
|
186
|
-
if self.lang in ['ch'] and self.use_onnx:
|
187
|
-
rec_res, elapse = self.additional_ocr.text_recognizer(img_crop_list)
|
188
|
-
else:
|
189
|
-
rec_res, elapse = self.text_recognizer(img_crop_list)
|
190
|
-
time_dict['rec'] = elapse
|
191
|
-
logger.debug("rec_res num : {}, elapsed : {}".format(
|
192
|
-
len(rec_res), elapse))
|
193
|
-
if self.args.save_crop_res:
|
194
|
-
self.draw_crop_rec_res(self.args.crop_res_save_dir, img_crop_list,
|
195
|
-
rec_res)
|
196
|
-
filter_boxes, filter_rec_res = [], []
|
197
|
-
for box, rec_result in zip(dt_boxes, rec_res):
|
198
|
-
text, score = rec_result
|
199
|
-
if score >= self.drop_score:
|
200
|
-
filter_boxes.append(box)
|
201
|
-
filter_rec_res.append(rec_result)
|
202
|
-
end = time.time()
|
203
|
-
time_dict['all'] = end - start
|
204
|
-
return filter_boxes, filter_rec_res, time_dict
|
@@ -1,213 +0,0 @@
|
|
1
|
-
import copy
|
2
|
-
import time
|
3
|
-
|
4
|
-
|
5
|
-
import cv2
|
6
|
-
import numpy as np
|
7
|
-
from paddleocr import PaddleOCR
|
8
|
-
from paddleocr.paddleocr import check_img, logger
|
9
|
-
from paddleocr.ppocr.utils.utility import alpha_to_color, binarize_img
|
10
|
-
from paddleocr.tools.infer.predict_system import sorted_boxes
|
11
|
-
from paddleocr.tools.infer.utility import slice_generator, merge_fragmented, get_rotate_crop_image, \
|
12
|
-
get_minarea_rect_crop
|
13
|
-
|
14
|
-
from magic_pdf.model.sub_modules.ocr.paddleocr.ocr_utils import update_det_boxes
|
15
|
-
|
16
|
-
|
17
|
-
class ModifiedPaddleOCR(PaddleOCR):
|
18
|
-
|
19
|
-
def ocr(
|
20
|
-
self,
|
21
|
-
img,
|
22
|
-
det=True,
|
23
|
-
rec=True,
|
24
|
-
cls=True,
|
25
|
-
bin=False,
|
26
|
-
inv=False,
|
27
|
-
alpha_color=(255, 255, 255),
|
28
|
-
slice={},
|
29
|
-
mfd_res=None,
|
30
|
-
):
|
31
|
-
"""
|
32
|
-
OCR with PaddleOCR
|
33
|
-
|
34
|
-
Args:
|
35
|
-
img: Image for OCR. It can be an ndarray, img_path, or a list of ndarrays.
|
36
|
-
det: Use text detection or not. If False, only text recognition will be executed. Default is True.
|
37
|
-
rec: Use text recognition or not. If False, only text detection will be executed. Default is True.
|
38
|
-
cls: Use angle classifier or not. Default is True. If True, the text with a rotation of 180 degrees can be recognized. If no text is rotated by 180 degrees, use cls=False to get better performance.
|
39
|
-
bin: Binarize image to black and white. Default is False.
|
40
|
-
inv: Invert image colors. Default is False.
|
41
|
-
alpha_color: Set RGB color Tuple for transparent parts replacement. Default is pure white.
|
42
|
-
slice: Use sliding window inference for large images. Both det and rec must be True. Requires int values for slice["horizontal_stride"], slice["vertical_stride"], slice["merge_x_thres"], slice["merge_y_thres"] (See doc/doc_en/slice_en.md). Default is {}.
|
43
|
-
|
44
|
-
Returns:
|
45
|
-
If both det and rec are True, returns a list of OCR results for each image. Each OCR result is a list of bounding boxes and recognized text for each detected text region.
|
46
|
-
If det is True and rec is False, returns a list of detected bounding boxes for each image.
|
47
|
-
If det is False and rec is True, returns a list of recognized text for each image.
|
48
|
-
If both det and rec are False, returns a list of angle classification results for each image.
|
49
|
-
|
50
|
-
Raises:
|
51
|
-
AssertionError: If the input image is not of type ndarray, list, str, or bytes.
|
52
|
-
SystemExit: If det is True and the input is a list of images.
|
53
|
-
|
54
|
-
Note:
|
55
|
-
- If the angle classifier is not initialized (use_angle_cls=False), it will not be used during the forward process.
|
56
|
-
- For PDF files, if the input is a list of images and the page_num is specified, only the first page_num images will be processed.
|
57
|
-
- The preprocess_image function is used to preprocess the input image by applying alpha color replacement, inversion, and binarization if specified.
|
58
|
-
"""
|
59
|
-
assert isinstance(img, (np.ndarray, list, str, bytes))
|
60
|
-
if isinstance(img, list) and det == True:
|
61
|
-
logger.error("When input a list of images, det must be false")
|
62
|
-
exit(0)
|
63
|
-
if cls == True and self.use_angle_cls == False:
|
64
|
-
logger.warning(
|
65
|
-
"Since the angle classifier is not initialized, it will not be used during the forward process"
|
66
|
-
)
|
67
|
-
|
68
|
-
img, flag_gif, flag_pdf = check_img(img, alpha_color)
|
69
|
-
# for infer pdf file
|
70
|
-
if isinstance(img, list) and flag_pdf:
|
71
|
-
if self.page_num > len(img) or self.page_num == 0:
|
72
|
-
imgs = img
|
73
|
-
else:
|
74
|
-
imgs = img[: self.page_num]
|
75
|
-
else:
|
76
|
-
imgs = [img]
|
77
|
-
|
78
|
-
def preprocess_image(_image):
|
79
|
-
_image = alpha_to_color(_image, alpha_color)
|
80
|
-
if inv:
|
81
|
-
_image = cv2.bitwise_not(_image)
|
82
|
-
if bin:
|
83
|
-
_image = binarize_img(_image)
|
84
|
-
return _image
|
85
|
-
|
86
|
-
if det and rec:
|
87
|
-
ocr_res = []
|
88
|
-
for img in imgs:
|
89
|
-
img = preprocess_image(img)
|
90
|
-
dt_boxes, rec_res, _ = self.__call__(img, cls, slice, mfd_res=mfd_res)
|
91
|
-
if not dt_boxes and not rec_res:
|
92
|
-
ocr_res.append(None)
|
93
|
-
continue
|
94
|
-
tmp_res = [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)]
|
95
|
-
ocr_res.append(tmp_res)
|
96
|
-
return ocr_res
|
97
|
-
elif det and not rec:
|
98
|
-
ocr_res = []
|
99
|
-
for img in imgs:
|
100
|
-
img = preprocess_image(img)
|
101
|
-
dt_boxes, elapse = self.text_detector(img)
|
102
|
-
if dt_boxes.size == 0:
|
103
|
-
ocr_res.append(None)
|
104
|
-
continue
|
105
|
-
tmp_res = [box.tolist() for box in dt_boxes]
|
106
|
-
ocr_res.append(tmp_res)
|
107
|
-
return ocr_res
|
108
|
-
else:
|
109
|
-
ocr_res = []
|
110
|
-
cls_res = []
|
111
|
-
for img in imgs:
|
112
|
-
if not isinstance(img, list):
|
113
|
-
img = preprocess_image(img)
|
114
|
-
img = [img]
|
115
|
-
if self.use_angle_cls and cls:
|
116
|
-
img, cls_res_tmp, elapse = self.text_classifier(img)
|
117
|
-
if not rec:
|
118
|
-
cls_res.append(cls_res_tmp)
|
119
|
-
rec_res, elapse = self.text_recognizer(img)
|
120
|
-
ocr_res.append(rec_res)
|
121
|
-
if not rec:
|
122
|
-
return cls_res
|
123
|
-
return ocr_res
|
124
|
-
|
125
|
-
def __call__(self, img, cls=True, slice={}, mfd_res=None):
|
126
|
-
time_dict = {"det": 0, "rec": 0, "cls": 0, "all": 0}
|
127
|
-
|
128
|
-
if img is None:
|
129
|
-
logger.debug("no valid image provided")
|
130
|
-
return None, None, time_dict
|
131
|
-
|
132
|
-
start = time.time()
|
133
|
-
ori_im = img.copy()
|
134
|
-
if slice:
|
135
|
-
slice_gen = slice_generator(
|
136
|
-
img,
|
137
|
-
horizontal_stride=slice["horizontal_stride"],
|
138
|
-
vertical_stride=slice["vertical_stride"],
|
139
|
-
)
|
140
|
-
elapsed = []
|
141
|
-
dt_slice_boxes = []
|
142
|
-
for slice_crop, v_start, h_start in slice_gen:
|
143
|
-
dt_boxes, elapse = self.text_detector(slice_crop, use_slice=True)
|
144
|
-
if dt_boxes.size:
|
145
|
-
dt_boxes[:, :, 0] += h_start
|
146
|
-
dt_boxes[:, :, 1] += v_start
|
147
|
-
dt_slice_boxes.append(dt_boxes)
|
148
|
-
elapsed.append(elapse)
|
149
|
-
dt_boxes = np.concatenate(dt_slice_boxes)
|
150
|
-
|
151
|
-
dt_boxes = merge_fragmented(
|
152
|
-
boxes=dt_boxes,
|
153
|
-
x_threshold=slice["merge_x_thres"],
|
154
|
-
y_threshold=slice["merge_y_thres"],
|
155
|
-
)
|
156
|
-
elapse = sum(elapsed)
|
157
|
-
else:
|
158
|
-
dt_boxes, elapse = self.text_detector(img)
|
159
|
-
|
160
|
-
time_dict["det"] = elapse
|
161
|
-
|
162
|
-
if dt_boxes is None:
|
163
|
-
logger.debug("no dt_boxes found, elapsed : {}".format(elapse))
|
164
|
-
end = time.time()
|
165
|
-
time_dict["all"] = end - start
|
166
|
-
return None, None, time_dict
|
167
|
-
else:
|
168
|
-
logger.debug(
|
169
|
-
"dt_boxes num : {}, elapsed : {}".format(len(dt_boxes), elapse)
|
170
|
-
)
|
171
|
-
img_crop_list = []
|
172
|
-
|
173
|
-
dt_boxes = sorted_boxes(dt_boxes)
|
174
|
-
|
175
|
-
if mfd_res:
|
176
|
-
bef = time.time()
|
177
|
-
dt_boxes = update_det_boxes(dt_boxes, mfd_res)
|
178
|
-
aft = time.time()
|
179
|
-
logger.debug("split text box by formula, new dt_boxes num : {}, elapsed : {}".format(
|
180
|
-
len(dt_boxes), aft - bef))
|
181
|
-
|
182
|
-
for bno in range(len(dt_boxes)):
|
183
|
-
tmp_box = copy.deepcopy(dt_boxes[bno])
|
184
|
-
if self.args.det_box_type == "quad":
|
185
|
-
img_crop = get_rotate_crop_image(ori_im, tmp_box)
|
186
|
-
else:
|
187
|
-
img_crop = get_minarea_rect_crop(ori_im, tmp_box)
|
188
|
-
img_crop_list.append(img_crop)
|
189
|
-
if self.use_angle_cls and cls:
|
190
|
-
img_crop_list, angle_list, elapse = self.text_classifier(img_crop_list)
|
191
|
-
time_dict["cls"] = elapse
|
192
|
-
logger.debug(
|
193
|
-
"cls num : {}, elapsed : {}".format(len(img_crop_list), elapse)
|
194
|
-
)
|
195
|
-
if len(img_crop_list) > 1000:
|
196
|
-
logger.debug(
|
197
|
-
f"rec crops num: {len(img_crop_list)}, time and memory cost may be large."
|
198
|
-
)
|
199
|
-
|
200
|
-
rec_res, elapse = self.text_recognizer(img_crop_list)
|
201
|
-
time_dict["rec"] = elapse
|
202
|
-
logger.debug("rec_res num : {}, elapsed : {}".format(len(rec_res), elapse))
|
203
|
-
if self.args.save_crop_res:
|
204
|
-
self.draw_crop_rec_res(self.args.crop_res_save_dir, img_crop_list, rec_res)
|
205
|
-
filter_boxes, filter_rec_res = [], []
|
206
|
-
for box, rec_result in zip(dt_boxes, rec_res):
|
207
|
-
text, score = rec_result[0], rec_result[1]
|
208
|
-
if score >= self.drop_score:
|
209
|
-
filter_boxes.append(box)
|
210
|
-
filter_rec_res.append(rec_result)
|
211
|
-
end = time.time()
|
212
|
-
time_dict["all"] = end - start
|
213
|
-
return filter_boxes, filter_rec_res, time_dict
|