magic-pdf 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- magic_pdf/data/batch_build_dataset.py +156 -0
- magic_pdf/data/dataset.py +44 -24
- magic_pdf/data/utils.py +108 -9
- magic_pdf/dict2md/ocr_mkcontent.py +4 -3
- magic_pdf/libs/pdf_image_tools.py +11 -6
- magic_pdf/libs/performance_stats.py +12 -1
- magic_pdf/libs/version.py +1 -1
- magic_pdf/model/batch_analyze.py +175 -201
- magic_pdf/model/doc_analyze_by_custom_model.py +137 -92
- magic_pdf/model/pdf_extract_kit.py +5 -38
- magic_pdf/model/sub_modules/language_detection/utils.py +2 -4
- magic_pdf/model/sub_modules/language_detection/yolov11/YOLOv11.py +24 -19
- magic_pdf/model/sub_modules/layout/doclayout_yolo/DocLayoutYOLO.py +3 -1
- magic_pdf/model/sub_modules/mfd/yolov8/YOLOv8.py +3 -1
- magic_pdf/model/sub_modules/mfr/unimernet/Unimernet.py +31 -102
- magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/__init__.py +13 -0
- magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/modeling_unimernet.py +189 -0
- magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/__init__.py +8 -0
- magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/configuration_unimer_mbart.py +163 -0
- magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/modeling_unimer_mbart.py +2351 -0
- magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/__init__.py +9 -0
- magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/configuration_unimer_swin.py +132 -0
- magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/image_processing_unimer_swin.py +132 -0
- magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/modeling_unimer_swin.py +1084 -0
- magic_pdf/model/sub_modules/model_init.py +50 -37
- magic_pdf/model/sub_modules/model_utils.py +17 -11
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/__init__.py +1 -0
- magic_pdf/model/sub_modules/ocr/{paddleocr → paddleocr2pytorch}/ocr_utils.py +102 -97
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorch_paddle.py +193 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/base_ocr_v20.py +39 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/__init__.py +8 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/imaug/__init__.py +48 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/imaug/operators.py +418 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/__init__.py +25 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/base_model.py +105 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/__init__.py +62 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/det_mobilenet_v3.py +269 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_hgnet.py +290 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_lcnetv3.py +516 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mobilenet_v3.py +136 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mv1_enhance.py +234 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_svtrnet.py +638 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/common.py +76 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/__init__.py +43 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/cls_head.py +23 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/det_db_head.py +109 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_ctc_head.py +54 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_multi_head.py +58 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/__init__.py +29 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/db_fpn.py +456 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/intracl.py +117 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/rnn.py +228 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/__init__.py +33 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/cls_postprocess.py +20 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/db_postprocess.py +179 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/rec_postprocess.py +690 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/__init__.py +0 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/arch_config.yaml +383 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/arabic_dict.txt +162 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/chinese_cht_dict.txt +8421 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/cyrillic_dict.txt +163 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/devanagari_dict.txt +167 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/en_dict.txt +95 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/japan_dict.txt +4399 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ka_dict.txt +153 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/korean_dict.txt +3688 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/latin_dict.txt +185 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocr_keys_v1.txt +6623 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ta_dict.txt +128 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/te_dict.txt +151 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/models_config.yml +49 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/__init__.py +1 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/__init__.py +1 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_cls.py +106 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_det.py +217 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_rec.py +440 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_system.py +104 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/pytorchocr_utility.py +227 -0
- magic_pdf/model/sub_modules/table/rapidtable/rapid_table.py +10 -18
- magic_pdf/pdf_parse_union_core_v2.py +112 -74
- magic_pdf/post_proc/para_split_v3.py +16 -13
- magic_pdf/pre_proc/ocr_dict_merge.py +9 -1
- magic_pdf/pre_proc/ocr_span_list_modify.py +51 -0
- magic_pdf/resources/model_config/model_configs.yaml +1 -1
- magic_pdf/tools/cli.py +30 -12
- magic_pdf/tools/common.py +90 -12
- {magic_pdf-1.2.1.dist-info → magic_pdf-1.3.0.dist-info}/METADATA +51 -41
- magic_pdf-1.3.0.dist-info/RECORD +202 -0
- magic_pdf/model/sub_modules/ocr/paddleocr/ppocr_273_mod.py +0 -204
- magic_pdf/model/sub_modules/ocr/paddleocr/ppocr_291_mod.py +0 -213
- magic_pdf/model/sub_modules/table/structeqtable/struct_eqtable.py +0 -37
- magic_pdf/model/sub_modules/table/tablemaster/tablemaster_paddle.py +0 -71
- magic_pdf/resources/model_config/UniMERNet/demo.yaml +0 -46
- magic_pdf/resources/model_config/layoutlmv3/layoutlmv3_base_inference.yaml +0 -351
- magic_pdf-1.2.1.dist-info/RECORD +0 -147
- /magic_pdf/model/sub_modules/{ocr/paddleocr/__init__.py → mfr/unimernet/unimernet_hf/unimer_mbart/tokenization_unimer_mbart.py} +0 -0
- /magic_pdf/model/sub_modules/{table/structeqtable → ocr/paddleocr2pytorch/pytorchocr}/__init__.py +0 -0
- /magic_pdf/model/sub_modules/{table/tablemaster → ocr/paddleocr2pytorch/pytorchocr/modeling}/__init__.py +0 -0
- {magic_pdf-1.2.1.dist-info → magic_pdf-1.3.0.dist-info}/LICENSE.md +0 -0
- {magic_pdf-1.2.1.dist-info → magic_pdf-1.3.0.dist-info}/WHEEL +0 -0
- {magic_pdf-1.2.1.dist-info → magic_pdf-1.3.0.dist-info}/entry_points.txt +0 -0
- {magic_pdf-1.2.1.dist-info → magic_pdf-1.3.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,202 @@
|
|
1
|
+
magic_pdf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
magic_pdf/pdf_parse_union_core_v2.py,sha256=qRLsYctJxthgo2zv56WDV8dU8dmaykRdsHOFiBcDoKM,40385
|
3
|
+
magic_pdf/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
+
magic_pdf/config/constants.py,sha256=fXGzANULnJWLPxwYp3BEFWx-rnorzpySMx63ffyEyq4,1272
|
5
|
+
magic_pdf/config/drop_reason.py,sha256=CqjMzBE96Qo8OeFvhhhItY8WhyqsKhE3DmyJLoQZNCc,2248
|
6
|
+
magic_pdf/config/drop_tag.py,sha256=CjveyzhAsHm_bfXB7ZZNKruw1NR-WdKD8Hz6OhQdG0A,680
|
7
|
+
magic_pdf/config/enums.py,sha256=CImYuw4sbKpq9zrj6zrrEvtdoGkjxDt8S8ByNVDpypU,89
|
8
|
+
magic_pdf/config/exceptions.py,sha256=2tsJxYUebVeimyYBGQkc9Nd1kIakTmWmz3SDcfJWy54,784
|
9
|
+
magic_pdf/config/make_content_config.py,sha256=J2eJIhVHBPGwX18zVQomQUOxs8LcfeGLxLgdBEeRvLg,248
|
10
|
+
magic_pdf/config/model_block_type.py,sha256=y5ie2ZLvo-h8OdVk8HOEha6qK0OJFtLmtOhYjrV680g,166
|
11
|
+
magic_pdf/config/ocr_content_type.py,sha256=e_7RBTdShaWvWhMO2SFou7GM521elMH_Jtn5usbHWdY,890
|
12
|
+
magic_pdf/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
|
+
magic_pdf/data/batch_build_dataset.py,sha256=rS4f50hBc7IvSqa_Gd84E_tSYpQ66BMaeZkCPd5Ajxw,4601
|
14
|
+
magic_pdf/data/dataset.py,sha256=pSz2Ukj9-bEKyg06Wsl9jBhCWeX_0moBtaKujQuYwqI,11830
|
15
|
+
magic_pdf/data/read_api.py,sha256=_faBnYE3iU_EiQLNFjVM6a8IQtOGAcSQNYBZsTSN1d8,5225
|
16
|
+
magic_pdf/data/schemas.py,sha256=oIUTBzK8Wq8Wuy8A_uilWAbVhucRvOs9_f3lSKYgcmQ,664
|
17
|
+
magic_pdf/data/utils.py,sha256=dNWIJECPXaakKocI4z5Tq6vhDDSnR-bVWQV7DO2w_A8,5335
|
18
|
+
magic_pdf/data/data_reader_writer/__init__.py,sha256=QtevUaeSivv9dQKi3Tomfn4Z0E4To0cB8qXTnglxaHc,705
|
19
|
+
magic_pdf/data/data_reader_writer/base.py,sha256=nqmAcdHOXMOJO6RAT3ILligDFaw8Op0STyCw5yOzAbI,1706
|
20
|
+
magic_pdf/data/data_reader_writer/filebase.py,sha256=VbNAxLyo0Io0j7iprJERt_TqxzHAtA7cUyPIaJstToU,2146
|
21
|
+
magic_pdf/data/data_reader_writer/multi_bucket_s3.py,sha256=4pEJ8PPd3nX7sccHobCs0mbDM8BiqDP_sAEz7CIvpNI,5938
|
22
|
+
magic_pdf/data/data_reader_writer/s3.py,sha256=9Oy1cNuXMwG1e8PgZ7AR-pn_MqHAhkgAGnyEZCYoYAA,2408
|
23
|
+
magic_pdf/data/io/__init__.py,sha256=WKaIlu8i5AWYxFCGNJcorAfMnlUQDOF8CX07Ycfnu2c,294
|
24
|
+
magic_pdf/data/io/base.py,sha256=SqNQqe30ZvoVvg7GVv-hLMCjN6yBgDyQQWeLgGsTfhQ,1118
|
25
|
+
magic_pdf/data/io/http.py,sha256=XlKB0DNf4a_uUnfgcclvaaOtmE7lmddx0DnK8A-emAM,958
|
26
|
+
magic_pdf/data/io/s3.py,sha256=hyA7sbNriQy64xd_uyJ7acN_oneQ1Pdmoc7_xcvkue8,3606
|
27
|
+
magic_pdf/dict2md/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
28
|
+
magic_pdf/dict2md/ocr_mkcontent.py,sha256=U4DqKfD4dJ2S5Z8NEAGhuLYkEOIeC-BWuArMbwi7BJs,13784
|
29
|
+
magic_pdf/filter/__init__.py,sha256=_7lSez_myu4b6cdzPpQ-NfREuqeBSq_QdyBPKVLyq2U,1505
|
30
|
+
magic_pdf/filter/pdf_classify_by_type.py,sha256=YNYXamxYgEiSujwilCNHOtrwpgJGDiQ597qJfardDVc,42354
|
31
|
+
magic_pdf/filter/pdf_meta_scan.py,sha256=eOuM0-JgaXvHolSgepGoNDJDmv_uITWLQpH_0MfnVQw,17478
|
32
|
+
magic_pdf/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
33
|
+
magic_pdf/integrations/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
34
|
+
magic_pdf/integrations/rag/api.py,sha256=t38wvIBzLje4_JzTP3dewMLqV-tQJ-A3B92Sj2oyrfs,2507
|
35
|
+
magic_pdf/integrations/rag/type.py,sha256=Z_1g_ZIOCsb7-FmZBudReIXj8nzGrgj_BygCalhJdmk,3193
|
36
|
+
magic_pdf/integrations/rag/utils.py,sha256=DCb-UhC8TElb6Eq7_6NmmETreKEk5DVE18hNL8sTEBk,11762
|
37
|
+
magic_pdf/libs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
38
|
+
magic_pdf/libs/boxbase.py,sha256=DKZXhwpJd-HE2_Du7NmkeeYW5gG-iwX3GeSWL7rYGv0,16956
|
39
|
+
magic_pdf/libs/clean_memory.py,sha256=OsQexCjmBO2i-Hv-0uYQfn72dbUWR8sTW81nG2zlNQQ,479
|
40
|
+
magic_pdf/libs/commons.py,sha256=xD0fGA16KNB5rhbl4zRrOqdrNHYwaRablT_s9W2ZTbw,1174
|
41
|
+
magic_pdf/libs/config_reader.py,sha256=9GXK7jtDyA_jxXKWZAQ69rB02v5UW4mOmo1IaOYOkW0,4374
|
42
|
+
magic_pdf/libs/convert_utils.py,sha256=Ov-lsfCLBPz_15iSJXIslBNmrSf_E_1g_XDWJy8NgO8,143
|
43
|
+
magic_pdf/libs/coordinate_transform.py,sha256=Bbop2cP2uz2ZG0U0gwd7J6EKkgABq5Rv03qf2LMPw80,429
|
44
|
+
magic_pdf/libs/draw_bbox.py,sha256=hpUmpPiQVu7UgWQa3M49dS22G6A9gcG2jpq4dQjTjzA,18331
|
45
|
+
magic_pdf/libs/hash_utils.py,sha256=VEKK9WfFoZgrPfi8kfITjLpr8Ahufs8tXh9R1Y5lAL8,404
|
46
|
+
magic_pdf/libs/json_compressor.py,sha256=6-KCu0lb5ksmyqWtQGb4QqmP-FjRb5dP7P-Hevcn68g,875
|
47
|
+
magic_pdf/libs/language.py,sha256=7RT3mxSa7jdpoC5ySd7ZddHA7TO7UsnmDOWiYZAxuyg,1433
|
48
|
+
magic_pdf/libs/local_math.py,sha256=tqljQOgqh3fZc146HYhO88JXJaiXMVwArBkk_CSGICc,177
|
49
|
+
magic_pdf/libs/markdown_utils.py,sha256=86v2BmsSV4NkoRZrH4uQD1youJhYFF3vIKr_vDeg3z0,270
|
50
|
+
magic_pdf/libs/path_utils.py,sha256=Hykw_l5CU736b2egHV9P7B-qh3QNKO4nZSGCbsi0Z8E,1043
|
51
|
+
magic_pdf/libs/pdf_check.py,sha256=7GWWvDR6g_rj_fE6XJlbTq5AFVX11ngRIzT0N18F214,3396
|
52
|
+
magic_pdf/libs/pdf_image_tools.py,sha256=_au7plmKKctpPKozBumSKgP8689q4vH1mU8VMLO0IbM,2260
|
53
|
+
magic_pdf/libs/performance_stats.py,sha256=DW-c6nUTUnWKGTONRKfpucsYZm1ake016F9K7jJwbik,2136
|
54
|
+
magic_pdf/libs/safe_filename.py,sha256=ckwcM_eqoysTb5id8czp-tXq2G9da0-l3pshZDCHQtE,236
|
55
|
+
magic_pdf/libs/version.py,sha256=F5mW07pSyGrqDNY2Ehr-UpDzpBtN-FsYU0QGZWf6PJE,22
|
56
|
+
magic_pdf/model/__init__.py,sha256=sa-dO2k-TLy25I2gRrzjm_cQeYfzMf-pLwBJHkIxGo0,51
|
57
|
+
magic_pdf/model/batch_analyze.py,sha256=iE8WzD13edqyPcqarKJfgUxPxudXGNRck_FDtsPGYdg,11212
|
58
|
+
magic_pdf/model/doc_analyze_by_custom_model.py,sha256=cnfjCv9fTGYOgjyRBVQWHc57qbbK7PsoLKFLEQhIcho,10057
|
59
|
+
magic_pdf/model/magic_model.py,sha256=yZKWo_wRck_-YLyFGRiUHGar8sV1Y6458BFLbyBAt74,30682
|
60
|
+
magic_pdf/model/model_list.py,sha256=aqfEJlEfbib3D3ISrxc0Coh6SbffYh8Yq2FlQN35_zA,213
|
61
|
+
magic_pdf/model/pdf_extract_kit.py,sha256=C3sKqRkoD20Ldmo-cqGn1zRldEL-l5NYqcFvd05_fGU,10845
|
62
|
+
magic_pdf/model/pp_structure_v2.py,sha256=NcqFWL4nUtjl82MFak8HX_8V3i4Aw_fK4dATrIp5uGs,3840
|
63
|
+
magic_pdf/model/sub_modules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
64
|
+
magic_pdf/model/sub_modules/model_init.py,sha256=e2t95kxiuU47luOHByDokbQ2uob6oQhEA4b6UGVfMjY,8303
|
65
|
+
magic_pdf/model/sub_modules/model_utils.py,sha256=f8ceLYSHsNkTaAVUokYhAgsaW7hpQa8KwLseMXuKTF4,2638
|
66
|
+
magic_pdf/model/sub_modules/language_detection/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
67
|
+
magic_pdf/model/sub_modules/language_detection/utils.py,sha256=Q__v6DdNJztt8GhVSuSB0txahVq-aj8RLhWn2VScx4w,3047
|
68
|
+
magic_pdf/model/sub_modules/language_detection/yolov11/YOLOv11.py,sha256=7T8eFl8zlZe6F0j0jSB3jrwOapDft320JQ1fuWxpvAY,5230
|
69
|
+
magic_pdf/model/sub_modules/language_detection/yolov11/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
70
|
+
magic_pdf/model/sub_modules/layout/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
71
|
+
magic_pdf/model/sub_modules/layout/doclayout_yolo/DocLayoutYOLO.py,sha256=c-OmqY9DOgkIWqiDrNGe0-E5RS6-diwnPPsN5pbPw-s,2346
|
72
|
+
magic_pdf/model/sub_modules/layout/doclayout_yolo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
73
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
74
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/backbone.py,sha256=1cvSCczgvwOLdvzWyqttoYPMHsXmnzI3w9abJ1bAXoM,7106
|
75
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/beit.py,sha256=e-INve6bpEx_0FM5wYbQcEcelc79tzDlCljTVHaGt1w,30450
|
76
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/deit.py,sha256=Qyn5UWutZ-0GJczexCh-oMMSXtav_g3ovumMFJp8Om4,17000
|
77
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/model_init.py,sha256=PhWqqRwgSSmXTaUlLIjGqnBUNjzxwYDKgMzKjnxNy1k,4528
|
78
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/rcnn_vl.py,sha256=nI4G6AeLRmjavNhs5S2USKh0ozn-ftMuW0F0m_eVy3c,6649
|
79
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/visualizer.py,sha256=H6UYeCCbaN2gbDjGthTkKkPoyWxfE3azRjsR7fVBwnw,49797
|
80
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/__init__.py,sha256=C4N9gXJr7is7uznvQefQ7dOhlzEhdp86Lgh-7p0Y-08,186
|
81
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/__init__.py,sha256=W7V62JOh12NdMZj2H1sde3Il0AqW2VKplmHEsLle6tg,76
|
82
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/cord.py,sha256=jR_lRZxy8SeEvTK3FdlXmQHF0kefJf7ZqwM_8pvyI5E,8153
|
83
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/data_collator.py,sha256=M2TE47BprHSuQJYcoMeWOSpqkr_nh8VK6t2l26XWmxg,6279
|
84
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/funsd.py,sha256=Ez9tMeruHncJlkKQ7iRGBB9Pk1uWtgxlGeqs-sOmIG0,5214
|
85
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/image_utils.py,sha256=vuNOMzYw_h7jmaD2XUqkGlrjDEPB7XUts16GRICBmG4,10334
|
86
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/xfund.py,sha256=6jLKyc_4VhbHY4YEzBXm5RkPdsd9ldnUGXFZBLiJ-_s,8270
|
87
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/__init__.py,sha256=d5bm3Rx-jTrgfJDWrzD7t5R5CdHfug9dCNvUEneIYW4,190
|
88
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/__init__.py,sha256=a04w_C0B4P9jF-3I_tXCj3fLmfFQR5XSKGbhgGm--pM,1216
|
89
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/configuration_layoutlmv3.py,sha256=CJBcAmmLeRFVMN1YjWefoUW7hk0KXek0Eb_tergKl4Y,2150
|
90
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/modeling_layoutlmv3.py,sha256=mdo8tO-DrJcv0Lbk9Pp98n3NQXYOnFFyXQWjU7t35kA,54633
|
91
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3.py,sha256=diKlrfxYjKAmYrUgjYdx-FXLh-swShC3tl-EBX1b3oI,1197
|
92
|
+
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3_fast.py,sha256=0lxiG69_fGpSSBYA9CBLnDa_qqa1rInZ0pJpqBwZ0Yw,1372
|
93
|
+
magic_pdf/model/sub_modules/mfd/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
94
|
+
magic_pdf/model/sub_modules/mfd/yolov8/YOLOv8.py,sha256=Wm6jh70l_q_P_LQxa_pmLbg9OnHZyEKF1Dfln7Y2c8w,1114
|
95
|
+
magic_pdf/model/sub_modules/mfd/yolov8/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
96
|
+
magic_pdf/model/sub_modules/mfr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
97
|
+
magic_pdf/model/sub_modules/mfr/unimernet/Unimernet.py,sha256=hq48TP1Ac8Y4FlK7GamnU-WZTQfdZotxBKuFhOIjrcM,5349
|
98
|
+
magic_pdf/model/sub_modules/mfr/unimernet/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
99
|
+
magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/__init__.py,sha256=kHcISG8GS4TWJW34SCJCei1jxo6HxvO00aC0dqyNFgI,413
|
100
|
+
magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/modeling_unimernet.py,sha256=F_rwlFytWOwOntNhhZoUqFAyXgzvq_HVjKyBizwlnjo,7913
|
101
|
+
magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/__init__.py,sha256=9T2rBpyGX5YFQYj89-mWujRokOuz4xgNreBuegcg1_c,228
|
102
|
+
magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/configuration_unimer_mbart.py,sha256=hwRar3pqN_cVs3TRTNSuhB4wacBncfJ-qvaTajRb0xc,7934
|
103
|
+
magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/modeling_unimer_mbart.py,sha256=Y5nwqDRzSb8zAbY1U0HGJGVtFggJW1zATP8RdnasNcA,113605
|
104
|
+
magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/tokenization_unimer_mbart.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
105
|
+
magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/__init__.py,sha256=8_1DKwDCDUBkeHYiJJ6MZnodZBsatHbqhygh11s9eEA,267
|
106
|
+
magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/configuration_unimer_swin.py,sha256=OX3eRUKBnKCXtxJOG3sdNoB1IV-Z7efgWU-gaclYOGA,5780
|
107
|
+
magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/image_processing_unimer_swin.py,sha256=tlcCxOJVRus_35lCz4faMVZ8ulAjGxK5Yu9Y_IeHsDY,4406
|
108
|
+
magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/modeling_unimer_swin.py,sha256=Q_fdmFHUBtEoAfWp9aowdwTCE2MIFMOPbYjoSyXK2iU,48929
|
109
|
+
magic_pdf/model/sub_modules/ocr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
110
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
111
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/ocr_utils.py,sha256=3qxu0lAjqzZQ2Ci-C_wz_YSakyq_5-KnckA3-5bICTM,12589
|
112
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorch_paddle.py,sha256=FaPo02L1IJKybGYfydsohOiHstJIL8d5UKzGck2tYvk,7283
|
113
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
114
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/base_ocr_v20.py,sha256=5bI7MAu65r-vn28krwdJ6pjZMkEvWjspE7EQaTsRERw,1319
|
115
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/__init__.py,sha256=YYu3c-W4fgEErxxDM98uQ3oWwPEh-6w75LY4zcj4VtM,199
|
116
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/imaug/__init__.py,sha256=c4H0gXPRweQ0wMFnkrCLTR6MrtG-e4kUinxwq2G1V9U,1480
|
117
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/imaug/operators.py,sha256=uwS8t-6hVIBag3jJd3yiBM4DW_dEiynp22_WFmVppjA,14205
|
118
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
119
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/common.py,sha256=3r2jTvPYQS4IgTvIqR4l6bBVwR7jn-87rSmpv3tlqxI,2294
|
120
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/__init__.py,sha256=DCA9FS4mE5oCHDlBhUrkYLdxFeQIbhPj4P8oJ_gRZD8,832
|
121
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/base_model.py,sha256=RhV2Dm-os08kCFylT57zRu72Hq_RJdFy3xQe1MPaCuU,3588
|
122
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/__init__.py,sha256=TOLvLNeJhnAOus0D5jAq2TM67vLrtbzYlThTOhc-idc,1960
|
123
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/det_mobilenet_v3.py,sha256=r0gWnA1Xmt0Zw4FQLx7kf-WWwZd_26PfNzhM05drcuE,8334
|
124
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_hgnet.py,sha256=UsIbzqN_koyGoSh1TA9r27SggpHbeKS3HmmS-A2Aw04,8341
|
125
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_lcnetv3.py,sha256=3PNQG9B1cHe1hAg0NhcFR8p87rZnmH0jTBcfid-ZnKQ,15995
|
126
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mobilenet_v3.py,sha256=mJmE6xGpjHZH2Vaw16LlIlqRFFm9R9yRsSJEa3Yn3nw,4822
|
127
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mv1_enhance.py,sha256=K4p9KFYNmltV3y3QsxHIASNxoqlGtxgAoCxeFofyCmw,6726
|
128
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_svtrnet.py,sha256=AIaUZ3IWBkRz2pWmanBjS0QdJcYnimMSV4MWofNpQcg,20222
|
129
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/__init__.py,sha256=7vl3hyn6Ug_DNtqdfUL1Hz9scA_ptch2FCDzNddpQgU,1282
|
130
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/cls_head.py,sha256=puIy5GlUtAKer6eS4HWKu07PzRd-HlDAqIz5WqjBHaA,596
|
131
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/det_db_head.py,sha256=-k8bpuGQw_xIVDsumrfimOxg0O-oP2MOAyDJTjU70Ro,3633
|
132
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_ctc_head.py,sha256=ywyk5RJgUITdXvrUZk2yBSWKsaZIqnTofdFbuQUtwjU,1311
|
133
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_multi_head.py,sha256=K40SMA8tAVWu-3fwgfh3jGWeVFAdVnMyHjeZeI9OO7Q,2016
|
134
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/__init__.py,sha256=634L1y-QWv5P8opNiSmKvQEx3Uskc20RG8DYiCdbl8U,1030
|
135
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/db_fpn.py,sha256=TLF2pSyvRC0oPzL0eVyNlg3W6Zvfr4J8fD1nziVB7uI,14146
|
136
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/intracl.py,sha256=w2QdwdI9BpiW92VS4mqL31sVERIbY53TfbD5Q6okiaY,3410
|
137
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/rnn.py,sha256=olZwnKQexVlMx0gJi0FVYAm38TxNn5BM6F-OrdHKEgk,7019
|
138
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/__init__.py,sha256=iC1Ol6CTxRWZBUyQ_5IVMR6kIurv9WJPOWWo7NAuZBA,1183
|
139
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/cls_postprocess.py,sha256=1VVWXT_b1vhGb7PGvqyfUQ3Ip7LupH62vPva98GtjTA,685
|
140
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/db_postprocess.py,sha256=EAc-cmhXtxLfFA6h5C871hIcDfXsjPDISiVCKwxh-qM,6339
|
141
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/rec_postprocess.py,sha256=HiHNr4bhW5U1j4pYoyi8fPOaFsn8TUc4nSB6q8chfV4,26899
|
142
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
143
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/arch_config.yaml,sha256=eEzg5D5L3MHFL4H02gZnxdDiqtSCUzZDnt5pqDAmgCI,6980
|
144
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/models_config.yml,sha256=GOtAGMAretviqDXak409PPav7qHYMDBwSs9wxlSANRA,1388
|
145
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/arabic_dict.txt,sha256=xbaXD14RWk0Vpc7fAHpephuszp1j-Qi3IWC4VrFKu70,407
|
146
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/chinese_cht_dict.txt,sha256=gyVR_uHy-8l1CHctgevcjboSwA3pejXHHJ3fQ92sGoM,33443
|
147
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/cyrillic_dict.txt,sha256=NpqCxsjEeXhKXXJkSLg7Hq-1_vCkEppeqjkpYl3c0TI,410
|
148
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/devanagari_dict.txt,sha256=tfG-bYu_8aGfuWxdTKlqQjOAI0u30s4OB7WDittNGOo,508
|
149
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/en_dict.txt,sha256=VmLfnS0D8OjKDTsGSdasurkEtqFLPTUhRjxxw3xmjOM,190
|
150
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/japan_dict.txt,sha256=Hc_LQe7JBXapRbMITyKt4RztUG4k8Uh5JFsHFpjzCOg,17332
|
151
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ka_dict.txt,sha256=-tP3ZZQyde7CE0pvvJtSeFQmZBEE1OfbOhWdxz80Hd4,452
|
152
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/korean_dict.txt,sha256=qh_ciuj3zUCg7E7bRy6wQh4RQn5sz-6ZFUQHQsGLCiA,14480
|
153
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/latin_dict.txt,sha256=jm1ONil4jDXDH35TAofWFHtUm7eiZb1nCLsoETRCniw,468
|
154
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocr_keys_v1.txt,sha256=KLI2KtSrLcOHaapy_rU146nds_0qdYWgWSDmOTsdx_c,26249
|
155
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ta_dict.txt,sha256=6T5pSBSv2f8ekYtvS7Qmf7TGWpNE7l10ZPkTW5DAonA,352
|
156
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/te_dict.txt,sha256=7plGpg13AZd0dOiYg2lKTKIOqjhoojM0v3lA3NAI8Pk,429
|
157
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/__init__.py,sha256=xEqR65Z8YOzOLorLjK0LCHos2zX-tCuxSrxndjU00hE,49
|
158
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
159
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_cls.py,sha256=8RmKl1vejnZl65caHZNV2ta6hMsg5B_LE-FuqCO8T8A,4225
|
160
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_det.py,sha256=cRBKE0blzryj3Ar6yM0FKKgxmZdgMc44NDNl1S2wiRs,9136
|
161
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_rec.py,sha256=_fLTWjEmDZwXC-zzPT37PHO-nNlEvafemo2CyPJS7_w,19216
|
162
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_system.py,sha256=hkegkn6hq2v2zqHVAP615-k-fkTS8swRYSbZeoqmSI8,3822
|
163
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/pytorchocr_utility.py,sha256=i1PFN-_kefJUUZ4Vk7igs1TU8gfErTDlDXY6-8Uaurw,9323
|
164
|
+
magic_pdf/model/sub_modules/reading_oreder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
165
|
+
magic_pdf/model/sub_modules/reading_oreder/layoutreader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
166
|
+
magic_pdf/model/sub_modules/reading_oreder/layoutreader/helpers.py,sha256=IVUFcNMDF3-kio-BIxjppHnWS3eHPqvvNihIw2fbIFM,4372
|
167
|
+
magic_pdf/model/sub_modules/reading_oreder/layoutreader/xycut.py,sha256=ezNSq_Y4UXiztB58hbXJsjTJlOBqWIjuW5A2uLSaZSo,7349
|
168
|
+
magic_pdf/model/sub_modules/table/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
169
|
+
magic_pdf/model/sub_modules/table/table_utils.py,sha256=B9BC4f5EEjlt2ldYxrIC8Wic2Tz3t3gTJeEyK3ggrOU,282
|
170
|
+
magic_pdf/model/sub_modules/table/rapidtable/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
171
|
+
magic_pdf/model/sub_modules/table/rapidtable/rapid_table.py,sha256=Vh1rWxj6t9PrXw7yhTNdJvTHy2wSVITTbGG2fD0dOZg,2515
|
172
|
+
magic_pdf/operators/__init__.py,sha256=liU2-WYUvsQ1G4PYBppyvokS9z5IjrnlVMtoBAC1REI,2630
|
173
|
+
magic_pdf/operators/models.py,sha256=mRqbCVrxxaUVDpEBAsXaK7EL1M-goICkE1W0FYgewio,5305
|
174
|
+
magic_pdf/operators/pipes.py,sha256=XgBgisKQd_ruW-3Tw4v5LhqloZUHgn2aFcpi_q8LbCs,6767
|
175
|
+
magic_pdf/post_proc/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
176
|
+
magic_pdf/post_proc/llm_aided.py,sha256=yzhu2cCpUZjdwf3v0swYDgSs9VWIfMAoXepYIP1EMZs,6367
|
177
|
+
magic_pdf/post_proc/para_split_v3.py,sha256=SPN_VVGvFX5KpFMGw9OzgoE-kTZq-FF036i0cIImGH8,16975
|
178
|
+
magic_pdf/pre_proc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
179
|
+
magic_pdf/pre_proc/construct_page_dict.py,sha256=OFmq5XRKi6fYIo-lmGlL-NB16Sf0egzsfEx-fT2uYrc,660
|
180
|
+
magic_pdf/pre_proc/cut_image.py,sha256=NDzbxwD7z7Tb4uAxL4KR6LzURFdN1Tzr4nPvj-VmEqc,1225
|
181
|
+
magic_pdf/pre_proc/ocr_detect_all_bboxes.py,sha256=nt88ttXCEI_1ihAF7HU15SQjwM69V-iJmk-L_nyzA6o,9328
|
182
|
+
magic_pdf/pre_proc/ocr_dict_merge.py,sha256=PscKGF0uJIjMxZRM69FLUs1SZO_wOswDQQV1f0M2xAo,5627
|
183
|
+
magic_pdf/pre_proc/ocr_span_list_modify.py,sha256=bs5RLvk4kIyx9_Hqq0FU3AGPPxE8Sxs97Uwlf1sBryM,4725
|
184
|
+
magic_pdf/pre_proc/remove_bbox_overlap.py,sha256=mcdxAh4P56NZ3Ij8h3vW8qC_SrszfXflVWuWUuUiTNg,3089
|
185
|
+
magic_pdf/resources/fasttext-langdetect/lid.176.ftz,sha256=jzRyz-hzintgmejpmcPL-uDc0VaWqsfXc4qAOdtgPoM,938013
|
186
|
+
magic_pdf/resources/model_config/model_configs.yaml,sha256=SoyoDmXYQX4ltKclG6ZcpdSA8dX5auSgILBvaA0wPkg,325
|
187
|
+
magic_pdf/resources/yolov11-langdetect/yolo_v11_ft.pt,sha256=dV4lcudF8wimEbAooYbvISvFhrXjp9i0rMRqv9VW6hY,3204667
|
188
|
+
magic_pdf/spark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
189
|
+
magic_pdf/spark/spark_api.py,sha256=BYO6zlRW0cEnIUB3ZzNQTu_LsPHEVitqiUN7gy3x_wo,1124
|
190
|
+
magic_pdf/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
191
|
+
magic_pdf/tools/cli.py,sha256=_oa-M5Hcopa5RZudVzrEip2W8pa9422Lmat7tMBJO5M,5171
|
192
|
+
magic_pdf/tools/cli_dev.py,sha256=3RbubfTIagWoFYdu8wSDanr-BJDjFGeDet55jTy7He0,3948
|
193
|
+
magic_pdf/tools/common.py,sha256=LoUz6Y36_U2odZqzBNKXngFNa6plf01U7_5jlDAFXaQ,12313
|
194
|
+
magic_pdf/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
195
|
+
magic_pdf/utils/annotations.py,sha256=82ou3uELNbQWa9hOFFkVt0gsIskAKf5msCv5J2IJ5V0,211
|
196
|
+
magic_pdf/utils/office_to_pdf.py,sha256=7aj-Ls2v8saD-Rgu_t3FIc-J3Ka9wnmiEH5zY-H1Vxs,729
|
197
|
+
magic_pdf-1.3.0.dist-info/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
|
198
|
+
magic_pdf-1.3.0.dist-info/METADATA,sha256=Z4WBVqCxPlHu7TC2pLGhrCfVpp0Im11NBZNowFQsMyM,41909
|
199
|
+
magic_pdf-1.3.0.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
200
|
+
magic_pdf-1.3.0.dist-info/entry_points.txt,sha256=wXwYke3j8fqDQTocUspL-CqDUEv3Tfcwp09fM8dZAhA,98
|
201
|
+
magic_pdf-1.3.0.dist-info/top_level.txt,sha256=J9I0AzmHWGkp9c6DL8Oe4mEx3yYphLzkRn4H25Lg1rE,10
|
202
|
+
magic_pdf-1.3.0.dist-info/RECORD,,
|
@@ -1,204 +0,0 @@
|
|
1
|
-
import copy
|
2
|
-
import platform
|
3
|
-
import time
|
4
|
-
import cv2
|
5
|
-
import numpy as np
|
6
|
-
import torch
|
7
|
-
|
8
|
-
from paddleocr import PaddleOCR
|
9
|
-
from ppocr.utils.logging import get_logger
|
10
|
-
from ppocr.utils.utility import alpha_to_color, binarize_img
|
11
|
-
from tools.infer.predict_system import sorted_boxes
|
12
|
-
from tools.infer.utility import get_rotate_crop_image, get_minarea_rect_crop
|
13
|
-
|
14
|
-
from magic_pdf.model.sub_modules.ocr.paddleocr.ocr_utils import update_det_boxes, merge_det_boxes, check_img, \
|
15
|
-
ONNXModelSingleton
|
16
|
-
|
17
|
-
logger = get_logger()
|
18
|
-
|
19
|
-
|
20
|
-
class ModifiedPaddleOCR(PaddleOCR):
|
21
|
-
def __init__(self, *args, **kwargs):
|
22
|
-
|
23
|
-
super().__init__(*args, **kwargs)
|
24
|
-
self.lang = kwargs.get('lang', 'ch')
|
25
|
-
# 在cpu架构为arm且不支持cuda时调用onnx、
|
26
|
-
if not torch.cuda.is_available() and platform.machine() in ['arm64', 'aarch64']:
|
27
|
-
self.use_onnx = True
|
28
|
-
onnx_model_manager = ONNXModelSingleton()
|
29
|
-
self.additional_ocr = onnx_model_manager.get_onnx_model(**kwargs)
|
30
|
-
else:
|
31
|
-
self.use_onnx = False
|
32
|
-
|
33
|
-
def ocr(self,
|
34
|
-
img,
|
35
|
-
det=True,
|
36
|
-
rec=True,
|
37
|
-
cls=True,
|
38
|
-
bin=False,
|
39
|
-
inv=False,
|
40
|
-
alpha_color=(255, 255, 255),
|
41
|
-
mfd_res=None,
|
42
|
-
):
|
43
|
-
"""
|
44
|
-
OCR with PaddleOCR
|
45
|
-
args:
|
46
|
-
img: img for OCR, support ndarray, img_path and list or ndarray
|
47
|
-
det: use text detection or not. If False, only rec will be exec. Default is True
|
48
|
-
rec: use text recognition or not. If False, only det will be exec. Default is True
|
49
|
-
cls: use angle classifier or not. Default is True. If True, the text with rotation of 180 degrees can be recognized. If no text is rotated by 180 degrees, use cls=False to get better performance. Text with rotation of 90 or 270 degrees can be recognized even if cls=False.
|
50
|
-
bin: binarize image to black and white. Default is False.
|
51
|
-
inv: invert image colors. Default is False.
|
52
|
-
alpha_color: set RGB color Tuple for transparent parts replacement. Default is pure white.
|
53
|
-
"""
|
54
|
-
assert isinstance(img, (np.ndarray, list, str, bytes))
|
55
|
-
if isinstance(img, list) and det == True:
|
56
|
-
logger.error('When input a list of images, det must be false')
|
57
|
-
exit(0)
|
58
|
-
if cls == True and self.use_angle_cls == False:
|
59
|
-
pass
|
60
|
-
# logger.warning(
|
61
|
-
# 'Since the angle classifier is not initialized, it will not be used during the forward process'
|
62
|
-
# )
|
63
|
-
|
64
|
-
img = check_img(img)
|
65
|
-
# for infer pdf file
|
66
|
-
if isinstance(img, list):
|
67
|
-
if self.page_num > len(img) or self.page_num == 0:
|
68
|
-
self.page_num = len(img)
|
69
|
-
imgs = img[:self.page_num]
|
70
|
-
else:
|
71
|
-
imgs = [img]
|
72
|
-
|
73
|
-
def preprocess_image(_image):
|
74
|
-
_image = alpha_to_color(_image, alpha_color)
|
75
|
-
if inv:
|
76
|
-
_image = cv2.bitwise_not(_image)
|
77
|
-
if bin:
|
78
|
-
_image = binarize_img(_image)
|
79
|
-
return _image
|
80
|
-
|
81
|
-
if det and rec:
|
82
|
-
ocr_res = []
|
83
|
-
for img in imgs:
|
84
|
-
img = preprocess_image(img)
|
85
|
-
dt_boxes, rec_res, _ = self.__call__(img, cls, mfd_res=mfd_res)
|
86
|
-
if not dt_boxes and not rec_res:
|
87
|
-
ocr_res.append(None)
|
88
|
-
continue
|
89
|
-
tmp_res = [[box.tolist(), res]
|
90
|
-
for box, res in zip(dt_boxes, rec_res)]
|
91
|
-
ocr_res.append(tmp_res)
|
92
|
-
return ocr_res
|
93
|
-
elif det and not rec:
|
94
|
-
ocr_res = []
|
95
|
-
for img in imgs:
|
96
|
-
img = preprocess_image(img)
|
97
|
-
if self.lang in ['ch'] and self.use_onnx:
|
98
|
-
dt_boxes, elapse = self.additional_ocr.text_detector(img)
|
99
|
-
else:
|
100
|
-
dt_boxes, elapse = self.text_detector(img)
|
101
|
-
if dt_boxes is None:
|
102
|
-
ocr_res.append(None)
|
103
|
-
continue
|
104
|
-
dt_boxes = sorted_boxes(dt_boxes)
|
105
|
-
# merge_det_boxes 和 update_det_boxes 都会把poly转成bbox再转回poly,因此需要过滤所有倾斜程度较大的文本框
|
106
|
-
dt_boxes = merge_det_boxes(dt_boxes)
|
107
|
-
if mfd_res:
|
108
|
-
bef = time.time()
|
109
|
-
dt_boxes = update_det_boxes(dt_boxes, mfd_res)
|
110
|
-
aft = time.time()
|
111
|
-
logger.debug("split text box by formula, new dt_boxes num : {}, elapsed : {}".format(
|
112
|
-
len(dt_boxes), aft - bef))
|
113
|
-
tmp_res = [box.tolist() for box in dt_boxes]
|
114
|
-
ocr_res.append(tmp_res)
|
115
|
-
return ocr_res
|
116
|
-
else:
|
117
|
-
ocr_res = []
|
118
|
-
cls_res = []
|
119
|
-
for img in imgs:
|
120
|
-
if not isinstance(img, list):
|
121
|
-
img = preprocess_image(img)
|
122
|
-
img = [img]
|
123
|
-
if self.use_angle_cls and cls:
|
124
|
-
img, cls_res_tmp, elapse = self.text_classifier(img)
|
125
|
-
if not rec:
|
126
|
-
cls_res.append(cls_res_tmp)
|
127
|
-
if self.lang in ['ch'] and self.use_onnx:
|
128
|
-
rec_res, elapse = self.additional_ocr.text_recognizer(img)
|
129
|
-
else:
|
130
|
-
rec_res, elapse = self.text_recognizer(img)
|
131
|
-
ocr_res.append(rec_res)
|
132
|
-
if not rec:
|
133
|
-
return cls_res
|
134
|
-
return ocr_res
|
135
|
-
|
136
|
-
def __call__(self, img, cls=True, mfd_res=None):
|
137
|
-
time_dict = {'det': 0, 'rec': 0, 'cls': 0, 'all': 0}
|
138
|
-
|
139
|
-
if img is None:
|
140
|
-
logger.debug("no valid image provided")
|
141
|
-
return None, None, time_dict
|
142
|
-
|
143
|
-
start = time.time()
|
144
|
-
ori_im = img.copy()
|
145
|
-
if self.lang in ['ch'] and self.use_onnx:
|
146
|
-
dt_boxes, elapse = self.additional_ocr.text_detector(img)
|
147
|
-
else:
|
148
|
-
dt_boxes, elapse = self.text_detector(img)
|
149
|
-
time_dict['det'] = elapse
|
150
|
-
|
151
|
-
if dt_boxes is None:
|
152
|
-
logger.debug("no dt_boxes found, elapsed : {}".format(elapse))
|
153
|
-
end = time.time()
|
154
|
-
time_dict['all'] = end - start
|
155
|
-
return None, None, time_dict
|
156
|
-
else:
|
157
|
-
logger.debug("dt_boxes num : {}, elapsed : {}".format(
|
158
|
-
len(dt_boxes), elapse))
|
159
|
-
img_crop_list = []
|
160
|
-
|
161
|
-
dt_boxes = sorted_boxes(dt_boxes)
|
162
|
-
|
163
|
-
# merge_det_boxes 和 update_det_boxes 都会把poly转成bbox再转回poly,因此需要过滤所有倾斜程度较大的文本框
|
164
|
-
dt_boxes = merge_det_boxes(dt_boxes)
|
165
|
-
|
166
|
-
if mfd_res:
|
167
|
-
bef = time.time()
|
168
|
-
dt_boxes = update_det_boxes(dt_boxes, mfd_res)
|
169
|
-
aft = time.time()
|
170
|
-
logger.debug("split text box by formula, new dt_boxes num : {}, elapsed : {}".format(
|
171
|
-
len(dt_boxes), aft - bef))
|
172
|
-
|
173
|
-
for bno in range(len(dt_boxes)):
|
174
|
-
tmp_box = copy.deepcopy(dt_boxes[bno])
|
175
|
-
if self.args.det_box_type == "quad":
|
176
|
-
img_crop = get_rotate_crop_image(ori_im, tmp_box)
|
177
|
-
else:
|
178
|
-
img_crop = get_minarea_rect_crop(ori_im, tmp_box)
|
179
|
-
img_crop_list.append(img_crop)
|
180
|
-
if self.use_angle_cls and cls:
|
181
|
-
img_crop_list, angle_list, elapse = self.text_classifier(
|
182
|
-
img_crop_list)
|
183
|
-
time_dict['cls'] = elapse
|
184
|
-
logger.debug("cls num : {}, elapsed : {}".format(
|
185
|
-
len(img_crop_list), elapse))
|
186
|
-
if self.lang in ['ch'] and self.use_onnx:
|
187
|
-
rec_res, elapse = self.additional_ocr.text_recognizer(img_crop_list)
|
188
|
-
else:
|
189
|
-
rec_res, elapse = self.text_recognizer(img_crop_list)
|
190
|
-
time_dict['rec'] = elapse
|
191
|
-
logger.debug("rec_res num : {}, elapsed : {}".format(
|
192
|
-
len(rec_res), elapse))
|
193
|
-
if self.args.save_crop_res:
|
194
|
-
self.draw_crop_rec_res(self.args.crop_res_save_dir, img_crop_list,
|
195
|
-
rec_res)
|
196
|
-
filter_boxes, filter_rec_res = [], []
|
197
|
-
for box, rec_result in zip(dt_boxes, rec_res):
|
198
|
-
text, score = rec_result
|
199
|
-
if score >= self.drop_score:
|
200
|
-
filter_boxes.append(box)
|
201
|
-
filter_rec_res.append(rec_result)
|
202
|
-
end = time.time()
|
203
|
-
time_dict['all'] = end - start
|
204
|
-
return filter_boxes, filter_rec_res, time_dict
|
@@ -1,213 +0,0 @@
|
|
1
|
-
import copy
|
2
|
-
import time
|
3
|
-
|
4
|
-
|
5
|
-
import cv2
|
6
|
-
import numpy as np
|
7
|
-
from paddleocr import PaddleOCR
|
8
|
-
from paddleocr.paddleocr import check_img, logger
|
9
|
-
from paddleocr.ppocr.utils.utility import alpha_to_color, binarize_img
|
10
|
-
from paddleocr.tools.infer.predict_system import sorted_boxes
|
11
|
-
from paddleocr.tools.infer.utility import slice_generator, merge_fragmented, get_rotate_crop_image, \
|
12
|
-
get_minarea_rect_crop
|
13
|
-
|
14
|
-
from magic_pdf.model.sub_modules.ocr.paddleocr.ocr_utils import update_det_boxes
|
15
|
-
|
16
|
-
|
17
|
-
class ModifiedPaddleOCR(PaddleOCR):
|
18
|
-
|
19
|
-
def ocr(
|
20
|
-
self,
|
21
|
-
img,
|
22
|
-
det=True,
|
23
|
-
rec=True,
|
24
|
-
cls=True,
|
25
|
-
bin=False,
|
26
|
-
inv=False,
|
27
|
-
alpha_color=(255, 255, 255),
|
28
|
-
slice={},
|
29
|
-
mfd_res=None,
|
30
|
-
):
|
31
|
-
"""
|
32
|
-
OCR with PaddleOCR
|
33
|
-
|
34
|
-
Args:
|
35
|
-
img: Image for OCR. It can be an ndarray, img_path, or a list of ndarrays.
|
36
|
-
det: Use text detection or not. If False, only text recognition will be executed. Default is True.
|
37
|
-
rec: Use text recognition or not. If False, only text detection will be executed. Default is True.
|
38
|
-
cls: Use angle classifier or not. Default is True. If True, the text with a rotation of 180 degrees can be recognized. If no text is rotated by 180 degrees, use cls=False to get better performance.
|
39
|
-
bin: Binarize image to black and white. Default is False.
|
40
|
-
inv: Invert image colors. Default is False.
|
41
|
-
alpha_color: Set RGB color Tuple for transparent parts replacement. Default is pure white.
|
42
|
-
slice: Use sliding window inference for large images. Both det and rec must be True. Requires int values for slice["horizontal_stride"], slice["vertical_stride"], slice["merge_x_thres"], slice["merge_y_thres"] (See doc/doc_en/slice_en.md). Default is {}.
|
43
|
-
|
44
|
-
Returns:
|
45
|
-
If both det and rec are True, returns a list of OCR results for each image. Each OCR result is a list of bounding boxes and recognized text for each detected text region.
|
46
|
-
If det is True and rec is False, returns a list of detected bounding boxes for each image.
|
47
|
-
If det is False and rec is True, returns a list of recognized text for each image.
|
48
|
-
If both det and rec are False, returns a list of angle classification results for each image.
|
49
|
-
|
50
|
-
Raises:
|
51
|
-
AssertionError: If the input image is not of type ndarray, list, str, or bytes.
|
52
|
-
SystemExit: If det is True and the input is a list of images.
|
53
|
-
|
54
|
-
Note:
|
55
|
-
- If the angle classifier is not initialized (use_angle_cls=False), it will not be used during the forward process.
|
56
|
-
- For PDF files, if the input is a list of images and the page_num is specified, only the first page_num images will be processed.
|
57
|
-
- The preprocess_image function is used to preprocess the input image by applying alpha color replacement, inversion, and binarization if specified.
|
58
|
-
"""
|
59
|
-
assert isinstance(img, (np.ndarray, list, str, bytes))
|
60
|
-
if isinstance(img, list) and det == True:
|
61
|
-
logger.error("When input a list of images, det must be false")
|
62
|
-
exit(0)
|
63
|
-
if cls == True and self.use_angle_cls == False:
|
64
|
-
logger.warning(
|
65
|
-
"Since the angle classifier is not initialized, it will not be used during the forward process"
|
66
|
-
)
|
67
|
-
|
68
|
-
img, flag_gif, flag_pdf = check_img(img, alpha_color)
|
69
|
-
# for infer pdf file
|
70
|
-
if isinstance(img, list) and flag_pdf:
|
71
|
-
if self.page_num > len(img) or self.page_num == 0:
|
72
|
-
imgs = img
|
73
|
-
else:
|
74
|
-
imgs = img[: self.page_num]
|
75
|
-
else:
|
76
|
-
imgs = [img]
|
77
|
-
|
78
|
-
def preprocess_image(_image):
|
79
|
-
_image = alpha_to_color(_image, alpha_color)
|
80
|
-
if inv:
|
81
|
-
_image = cv2.bitwise_not(_image)
|
82
|
-
if bin:
|
83
|
-
_image = binarize_img(_image)
|
84
|
-
return _image
|
85
|
-
|
86
|
-
if det and rec:
|
87
|
-
ocr_res = []
|
88
|
-
for img in imgs:
|
89
|
-
img = preprocess_image(img)
|
90
|
-
dt_boxes, rec_res, _ = self.__call__(img, cls, slice, mfd_res=mfd_res)
|
91
|
-
if not dt_boxes and not rec_res:
|
92
|
-
ocr_res.append(None)
|
93
|
-
continue
|
94
|
-
tmp_res = [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)]
|
95
|
-
ocr_res.append(tmp_res)
|
96
|
-
return ocr_res
|
97
|
-
elif det and not rec:
|
98
|
-
ocr_res = []
|
99
|
-
for img in imgs:
|
100
|
-
img = preprocess_image(img)
|
101
|
-
dt_boxes, elapse = self.text_detector(img)
|
102
|
-
if dt_boxes.size == 0:
|
103
|
-
ocr_res.append(None)
|
104
|
-
continue
|
105
|
-
tmp_res = [box.tolist() for box in dt_boxes]
|
106
|
-
ocr_res.append(tmp_res)
|
107
|
-
return ocr_res
|
108
|
-
else:
|
109
|
-
ocr_res = []
|
110
|
-
cls_res = []
|
111
|
-
for img in imgs:
|
112
|
-
if not isinstance(img, list):
|
113
|
-
img = preprocess_image(img)
|
114
|
-
img = [img]
|
115
|
-
if self.use_angle_cls and cls:
|
116
|
-
img, cls_res_tmp, elapse = self.text_classifier(img)
|
117
|
-
if not rec:
|
118
|
-
cls_res.append(cls_res_tmp)
|
119
|
-
rec_res, elapse = self.text_recognizer(img)
|
120
|
-
ocr_res.append(rec_res)
|
121
|
-
if not rec:
|
122
|
-
return cls_res
|
123
|
-
return ocr_res
|
124
|
-
|
125
|
-
def __call__(self, img, cls=True, slice={}, mfd_res=None):
|
126
|
-
time_dict = {"det": 0, "rec": 0, "cls": 0, "all": 0}
|
127
|
-
|
128
|
-
if img is None:
|
129
|
-
logger.debug("no valid image provided")
|
130
|
-
return None, None, time_dict
|
131
|
-
|
132
|
-
start = time.time()
|
133
|
-
ori_im = img.copy()
|
134
|
-
if slice:
|
135
|
-
slice_gen = slice_generator(
|
136
|
-
img,
|
137
|
-
horizontal_stride=slice["horizontal_stride"],
|
138
|
-
vertical_stride=slice["vertical_stride"],
|
139
|
-
)
|
140
|
-
elapsed = []
|
141
|
-
dt_slice_boxes = []
|
142
|
-
for slice_crop, v_start, h_start in slice_gen:
|
143
|
-
dt_boxes, elapse = self.text_detector(slice_crop, use_slice=True)
|
144
|
-
if dt_boxes.size:
|
145
|
-
dt_boxes[:, :, 0] += h_start
|
146
|
-
dt_boxes[:, :, 1] += v_start
|
147
|
-
dt_slice_boxes.append(dt_boxes)
|
148
|
-
elapsed.append(elapse)
|
149
|
-
dt_boxes = np.concatenate(dt_slice_boxes)
|
150
|
-
|
151
|
-
dt_boxes = merge_fragmented(
|
152
|
-
boxes=dt_boxes,
|
153
|
-
x_threshold=slice["merge_x_thres"],
|
154
|
-
y_threshold=slice["merge_y_thres"],
|
155
|
-
)
|
156
|
-
elapse = sum(elapsed)
|
157
|
-
else:
|
158
|
-
dt_boxes, elapse = self.text_detector(img)
|
159
|
-
|
160
|
-
time_dict["det"] = elapse
|
161
|
-
|
162
|
-
if dt_boxes is None:
|
163
|
-
logger.debug("no dt_boxes found, elapsed : {}".format(elapse))
|
164
|
-
end = time.time()
|
165
|
-
time_dict["all"] = end - start
|
166
|
-
return None, None, time_dict
|
167
|
-
else:
|
168
|
-
logger.debug(
|
169
|
-
"dt_boxes num : {}, elapsed : {}".format(len(dt_boxes), elapse)
|
170
|
-
)
|
171
|
-
img_crop_list = []
|
172
|
-
|
173
|
-
dt_boxes = sorted_boxes(dt_boxes)
|
174
|
-
|
175
|
-
if mfd_res:
|
176
|
-
bef = time.time()
|
177
|
-
dt_boxes = update_det_boxes(dt_boxes, mfd_res)
|
178
|
-
aft = time.time()
|
179
|
-
logger.debug("split text box by formula, new dt_boxes num : {}, elapsed : {}".format(
|
180
|
-
len(dt_boxes), aft - bef))
|
181
|
-
|
182
|
-
for bno in range(len(dt_boxes)):
|
183
|
-
tmp_box = copy.deepcopy(dt_boxes[bno])
|
184
|
-
if self.args.det_box_type == "quad":
|
185
|
-
img_crop = get_rotate_crop_image(ori_im, tmp_box)
|
186
|
-
else:
|
187
|
-
img_crop = get_minarea_rect_crop(ori_im, tmp_box)
|
188
|
-
img_crop_list.append(img_crop)
|
189
|
-
if self.use_angle_cls and cls:
|
190
|
-
img_crop_list, angle_list, elapse = self.text_classifier(img_crop_list)
|
191
|
-
time_dict["cls"] = elapse
|
192
|
-
logger.debug(
|
193
|
-
"cls num : {}, elapsed : {}".format(len(img_crop_list), elapse)
|
194
|
-
)
|
195
|
-
if len(img_crop_list) > 1000:
|
196
|
-
logger.debug(
|
197
|
-
f"rec crops num: {len(img_crop_list)}, time and memory cost may be large."
|
198
|
-
)
|
199
|
-
|
200
|
-
rec_res, elapse = self.text_recognizer(img_crop_list)
|
201
|
-
time_dict["rec"] = elapse
|
202
|
-
logger.debug("rec_res num : {}, elapsed : {}".format(len(rec_res), elapse))
|
203
|
-
if self.args.save_crop_res:
|
204
|
-
self.draw_crop_rec_res(self.args.crop_res_save_dir, img_crop_list, rec_res)
|
205
|
-
filter_boxes, filter_rec_res = [], []
|
206
|
-
for box, rec_result in zip(dt_boxes, rec_res):
|
207
|
-
text, score = rec_result[0], rec_result[1]
|
208
|
-
if score >= self.drop_score:
|
209
|
-
filter_boxes.append(box)
|
210
|
-
filter_rec_res.append(rec_result)
|
211
|
-
end = time.time()
|
212
|
-
time_dict["all"] = end - start
|
213
|
-
return filter_boxes, filter_rec_res, time_dict
|