magic-pdf 0.7.1__py3-none-any.whl → 0.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- magic_pdf/dict2md/ocr_mkcontent.py +130 -76
- magic_pdf/integrations/__init__.py +0 -0
- magic_pdf/integrations/rag/__init__.py +0 -0
- magic_pdf/integrations/rag/api.py +82 -0
- magic_pdf/integrations/rag/type.py +82 -0
- magic_pdf/integrations/rag/utils.py +285 -0
- magic_pdf/layout/layout_sort.py +472 -283
- magic_pdf/libs/boxbase.py +188 -149
- magic_pdf/libs/draw_bbox.py +113 -87
- magic_pdf/libs/ocr_content_type.py +21 -18
- magic_pdf/libs/version.py +1 -1
- magic_pdf/model/doc_analyze_by_custom_model.py +14 -2
- magic_pdf/model/magic_model.py +283 -166
- magic_pdf/model/model_list.py +8 -0
- magic_pdf/model/pdf_extract_kit.py +105 -15
- magic_pdf/model/pek_sub_modules/self_modify.py +84 -0
- magic_pdf/para/para_split_v2.py +26 -27
- magic_pdf/pdf_parse_union_core.py +34 -6
- magic_pdf/pipe/AbsPipe.py +4 -1
- magic_pdf/pipe/OCRPipe.py +7 -4
- magic_pdf/pipe/TXTPipe.py +7 -4
- magic_pdf/pipe/UNIPipe.py +11 -6
- magic_pdf/pre_proc/ocr_detect_all_bboxes.py +12 -3
- magic_pdf/pre_proc/ocr_dict_merge.py +60 -59
- magic_pdf/tools/cli.py +56 -29
- magic_pdf/tools/cli_dev.py +61 -64
- magic_pdf/tools/common.py +57 -37
- magic_pdf/user_api.py +17 -9
- {magic_pdf-0.7.1.dist-info → magic_pdf-0.8.1.dist-info}/METADATA +72 -27
- {magic_pdf-0.7.1.dist-info → magic_pdf-0.8.1.dist-info}/RECORD +34 -29
- {magic_pdf-0.7.1.dist-info → magic_pdf-0.8.1.dist-info}/LICENSE.md +0 -0
- {magic_pdf-0.7.1.dist-info → magic_pdf-0.8.1.dist-info}/WHEEL +0 -0
- {magic_pdf-0.7.1.dist-info → magic_pdf-0.8.1.dist-info}/entry_points.txt +0 -0
- {magic_pdf-0.7.1.dist-info → magic_pdf-0.8.1.dist-info}/top_level.txt +0 -0
@@ -1,32 +1,37 @@
|
|
1
1
|
magic_pdf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
2
|
magic_pdf/pdf_parse_by_ocr.py,sha256=IWnSWt1Z-d35xRqspzdLR2iUtma_SAu4W7K4kEk8SHc,638
|
3
3
|
magic_pdf/pdf_parse_by_txt.py,sha256=KUSH7Gh83CZmdyWw59pqDskwyJ2Kg-jU-9fnQGJQEs4,537
|
4
|
-
magic_pdf/pdf_parse_union_core.py,sha256=
|
5
|
-
magic_pdf/user_api.py,sha256=
|
4
|
+
magic_pdf/pdf_parse_union_core.py,sha256=AGIrP7ahc6Ycku0PxAlbjZhwqsdJ8iuRPIn-PFASKWY,11772
|
5
|
+
magic_pdf/user_api.py,sha256=gzmUHOYlmqPtuw1eZ3Qe2VZCG29v403oN0DURbEsyS8,3417
|
6
6
|
magic_pdf/dict2md/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
7
|
magic_pdf/dict2md/mkcontent.py,sha256=rWUY-2opd0jeowEUEVOV_uWcKum1Q7ng4nOoT6-ka_s,17459
|
8
|
-
magic_pdf/dict2md/ocr_mkcontent.py,sha256=
|
8
|
+
magic_pdf/dict2md/ocr_mkcontent.py,sha256=YILNQm8Nq1VMpgKTGF_fRNEzkrEg1aQKHxWI0lVQucI,18565
|
9
9
|
magic_pdf/filter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
10
|
magic_pdf/filter/pdf_classify_by_type.py,sha256=spmDO-f2ihAl1d6-EP-j271Yi50oyu6mw4X2kRd_m0s,42320
|
11
11
|
magic_pdf/filter/pdf_meta_scan.py,sha256=5R2XDiBZw0xd4ugbDxuyk6fztGlT5jFsGN85hLvo-hQ,17390
|
12
|
+
magic_pdf/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
|
+
magic_pdf/integrations/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
|
+
magic_pdf/integrations/rag/api.py,sha256=t38wvIBzLje4_JzTP3dewMLqV-tQJ-A3B92Sj2oyrfs,2507
|
15
|
+
magic_pdf/integrations/rag/type.py,sha256=Z_1g_ZIOCsb7-FmZBudReIXj8nzGrgj_BygCalhJdmk,3193
|
16
|
+
magic_pdf/integrations/rag/utils.py,sha256=UX_EySxi-WA1nwFLq6IpVQQ7mMAkMl257oEELaqpSzc,11833
|
12
17
|
magic_pdf/layout/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
18
|
magic_pdf/layout/bbox_sort.py,sha256=PzzaBf6MC_AZ-ZWGU0Kg-KIsw874l_gML73mM3hE4Ps,30807
|
14
19
|
magic_pdf/layout/layout_det_utils.py,sha256=NCYBTvsrULE3Cue53aMD1MfXTmOL9Xy0nivl6ku2cls,9137
|
15
|
-
magic_pdf/layout/layout_sort.py,sha256=
|
20
|
+
magic_pdf/layout/layout_sort.py,sha256=jtacQVcxnuYAksvEqtS0DH-v6U8qyjX-jmyZgDJ-egA,37005
|
16
21
|
magic_pdf/layout/layout_spiler_recog.py,sha256=QjBSgB-a7J2yjUR1eaCs9ZD7URtiRnV6W934hpAeuC4,3067
|
17
22
|
magic_pdf/layout/mcol_sort.py,sha256=ADnLisBJBHXDKYChcf2lzTb_TC_vZ4q89_CSN8mwEJc,11331
|
18
23
|
magic_pdf/libs/Constants.py,sha256=rdJVadmgN0UlIB-xcMQ9j7Qk9q1Qahxt3KEY-vL7hSU,774
|
19
24
|
magic_pdf/libs/MakeContentConfig.py,sha256=UDZPpsv8q4DqTy8h0vRtrT2kHqWiVI205VnVhlUEQc0,206
|
20
25
|
magic_pdf/libs/ModelBlockTypeEnum.py,sha256=kalXPbo5ya6hKhhBHPGlHl1yjWOURoXZWQM3rVUyPsY,164
|
21
26
|
magic_pdf/libs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
|
-
magic_pdf/libs/boxbase.py,sha256=
|
27
|
+
magic_pdf/libs/boxbase.py,sha256=YGIVYWBHyBSopcTxd5e3FVw6QQUnRMzyYgFJmcEl4Hc,15842
|
23
28
|
magic_pdf/libs/calc_span_stats.py,sha256=5vnU27DcbkFDRSAoLqAmX0KQ3I9ehWkEgh_t9hxg_zI,10147
|
24
29
|
magic_pdf/libs/commons.py,sha256=6Zu9-OyamyCNDY7qj0SxR-rux-ggj9im3CVPtC4ubB8,7108
|
25
30
|
magic_pdf/libs/config_reader.py,sha256=dPx6JJJuCw9AzNgKtrTG1elmfdeN6gDhgFK9r15-NsE,2505
|
26
31
|
magic_pdf/libs/convert_utils.py,sha256=Ov-lsfCLBPz_15iSJXIslBNmrSf_E_1g_XDWJy8NgO8,143
|
27
32
|
magic_pdf/libs/coordinate_transform.py,sha256=Bbop2cP2uz2ZG0U0gwd7J6EKkgABq5Rv03qf2LMPw80,429
|
28
33
|
magic_pdf/libs/detect_language_from_model.py,sha256=Uln8F9qs8EJOw4EgI7KRlaU3lD_mK8KMTlADLFtz8fk,816
|
29
|
-
magic_pdf/libs/draw_bbox.py,sha256=
|
34
|
+
magic_pdf/libs/draw_bbox.py,sha256=sbpN4LJ19k5Fh4NsY79mrLEc1-9XIoyoZJb9hL9A5b4,13703
|
30
35
|
magic_pdf/libs/drop_reason.py,sha256=IfjPSrPLMmVziqjOXPep7r_ioQKFRahDgbOW1SD-Tuw,2148
|
31
36
|
magic_pdf/libs/drop_tag.py,sha256=bZDg3bIVWvBT1Ec1icwj5WLOkt5-hI6eRYZ2tX9_a74,673
|
32
37
|
magic_pdf/libs/hash_utils.py,sha256=VEKK9WfFoZgrPfi8kfITjLpr8Ahufs8tXh9R1Y5lAL8,404
|
@@ -35,24 +40,24 @@ magic_pdf/libs/language.py,sha256=Hj5-lrGoNExxdHLbkcNG-c27U4AjJ9AZPdZblaNSehU,10
|
|
35
40
|
magic_pdf/libs/local_math.py,sha256=tqljQOgqh3fZc146HYhO88JXJaiXMVwArBkk_CSGICc,177
|
36
41
|
magic_pdf/libs/markdown_utils.py,sha256=cLxLXjRhrNp_wCHvtglrGA_FVdrvfd1KULeTtj1p18w,944
|
37
42
|
magic_pdf/libs/nlp_utils.py,sha256=-X9W3-Ns5ZdDYFvyyEq6i6P2b5hCATaFEZeOjwNOH9M,6901
|
38
|
-
magic_pdf/libs/ocr_content_type.py,sha256=
|
43
|
+
magic_pdf/libs/ocr_content_type.py,sha256=xpPF5Unhiw9hPVlgsvPtFPpi7ReJo5dvZevluE3lfPc,852
|
39
44
|
magic_pdf/libs/path_utils.py,sha256=Hykw_l5CU736b2egHV9P7B-qh3QNKO4nZSGCbsi0Z8E,1043
|
40
45
|
magic_pdf/libs/pdf_check.py,sha256=MAe8wzwT0qvPf_I72wEZG7k1g4haNHS7oUtLqkB5rlE,2145
|
41
46
|
magic_pdf/libs/pdf_image_tools.py,sha256=CAd01giTKr_UJz1_QtDOARG9G9z69GFpzRZwcWSfLtE,1282
|
42
47
|
magic_pdf/libs/safe_filename.py,sha256=ckwcM_eqoysTb5id8czp-tXq2G9da0-l3pshZDCHQtE,236
|
43
48
|
magic_pdf/libs/textbase.py,sha256=SC1Frhz3Fb7V7n2SFRBsl7Bmg0JZdlvZskq0lfW1vIk,732
|
44
|
-
magic_pdf/libs/version.py,sha256=
|
49
|
+
magic_pdf/libs/version.py,sha256=Ocl79hbbH8_jdr5dGC90VR1cAvZc05Rc0tkZttUnMjo,22
|
45
50
|
magic_pdf/libs/vis_utils.py,sha256=hTOTEakKV0pGMbk0tbRkVI_tku7A3dGc96ynObZ4kwI,10207
|
46
51
|
magic_pdf/model/__init__.py,sha256=1QcfMKET0xQhSaZMjNQHi_TjzSSDR6PI5mjkmaXHPe8,52
|
47
|
-
magic_pdf/model/doc_analyze_by_custom_model.py,sha256=
|
48
|
-
magic_pdf/model/magic_model.py,sha256=
|
49
|
-
magic_pdf/model/model_list.py,sha256=
|
50
|
-
magic_pdf/model/pdf_extract_kit.py,sha256=
|
52
|
+
magic_pdf/model/doc_analyze_by_custom_model.py,sha256=Ht1zZAB4WV3b-AWdRV5m5fuidlD6ZhNkvJM0j9i1l_E,4809
|
53
|
+
magic_pdf/model/magic_model.py,sha256=afSd9D0S31uEAseY98rJrL9BybBMeSMm5jbeWoWuWWo,30694
|
54
|
+
magic_pdf/model/model_list.py,sha256=tJ9jtMB93HGx8Rmt8wmQSDFXZBUIPQrwaaYsep4luTM,183
|
55
|
+
magic_pdf/model/pdf_extract_kit.py,sha256=Bdxqo3AGXs0VByFVj6ZEOm4T6wXTZwkZsRRFtxasNQM,17901
|
51
56
|
magic_pdf/model/ppTableModel.py,sha256=wWiui9VOjkKYlNX-viPqsWpzgkNJ-9_S2Se-j4oyLqU,2687
|
52
57
|
magic_pdf/model/pp_structure_v2.py,sha256=1sn8IJK0d5ZmqJ2XFt9FdaSdI0RQf-iwNAWBrVrIeuc,2872
|
53
58
|
magic_pdf/model/pek_sub_modules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
54
59
|
magic_pdf/model/pek_sub_modules/post_process.py,sha256=HzRxV2sVR3Qo8XKYEHhT6tae-bYTb6dnAfGP6gfVNaM,1135
|
55
|
-
magic_pdf/model/pek_sub_modules/self_modify.py,sha256=
|
60
|
+
magic_pdf/model/pek_sub_modules/self_modify.py,sha256=NGUr8t4bXSeh38hwrfs6qxhf0IW-f3J96bnrg1xw8BA,14281
|
56
61
|
magic_pdf/model/pek_sub_modules/layoutlmv3/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
57
62
|
magic_pdf/model/pek_sub_modules/layoutlmv3/backbone.py,sha256=1cvSCczgvwOLdvzWyqttoYPMHsXmnzI3w9abJ1bAXoM,7106
|
58
63
|
magic_pdf/model/pek_sub_modules/layoutlmv3/beit.py,sha256=e-INve6bpEx_0FM5wYbQcEcelc79tzDlCljTVHaGt1w,30450
|
@@ -85,14 +90,14 @@ magic_pdf/para/exceptions.py,sha256=kpjGxrSZ-drNmoKlmuQ0asTjI8cKKKWsdDDBoDHQP9M,
|
|
85
90
|
magic_pdf/para/layout_match_processor.py,sha256=yr4FEO7GJ502udShqGRqIJQ_FQxoa0aG_mhmWd8nLwI,1554
|
86
91
|
magic_pdf/para/para_pipeline.py,sha256=zLaCHI9jLi1UPzh0lHP44mUjpKVTHS0gE_5YrkjVqEY,11796
|
87
92
|
magic_pdf/para/para_split.py,sha256=-UJM2jREW_2h3ZlJAU7dRD8bK3CMGKuhJrfgqv3Auvk,31310
|
88
|
-
magic_pdf/para/para_split_v2.py,sha256=
|
93
|
+
magic_pdf/para/para_split_v2.py,sha256=ZIiLzpvVL364x1zcEG9IbT6ARJ-6JnWLIVrsDmf4w1M,36878
|
89
94
|
magic_pdf/para/raw_processor.py,sha256=mHxD9FrdOSXH7NqM41s55URyCyuyACvm9kKtowkIb3k,6317
|
90
95
|
magic_pdf/para/stats.py,sha256=-6Pf9Y8jkP1uJOYWiHUjw9Lb-Fb9GY7MHr_ok7x2GX0,9731
|
91
96
|
magic_pdf/para/title_processor.py,sha256=pYZv9vEkIjAtCz8jIUtl9AVUy_ib5SdAZmMVoZtsMRI,38593
|
92
|
-
magic_pdf/pipe/AbsPipe.py,sha256=
|
93
|
-
magic_pdf/pipe/OCRPipe.py,sha256=
|
94
|
-
magic_pdf/pipe/TXTPipe.py,sha256=
|
95
|
-
magic_pdf/pipe/UNIPipe.py,sha256=
|
97
|
+
magic_pdf/pipe/AbsPipe.py,sha256=btbCoH30x5ECzOwoYdMn-MQQPhMOB6IaoDriC0M5oe8,4265
|
98
|
+
magic_pdf/pipe/OCRPipe.py,sha256=rCff7CPMLDGDzF4SfaLKeUk-6PNXVkzWRo96hS7XqSM,1605
|
99
|
+
magic_pdf/pipe/TXTPipe.py,sha256=AiO6yAq8XpT66jP3bMaNH2L97y3jRr10xQFbx5VzrxE,1664
|
100
|
+
magic_pdf/pipe/UNIPipe.py,sha256=X8paSZTvxgKHxUcVLB2LFMoPhdk215zHKXSNOSGOjeg,4101
|
96
101
|
magic_pdf/pipe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
97
102
|
magic_pdf/post_proc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
98
103
|
magic_pdf/post_proc/detect_para.py,sha256=5LX86ueHQGOV9CNimAxqZH4R3KTi78leum1de_Na0pw,126181
|
@@ -114,9 +119,9 @@ magic_pdf/pre_proc/equations_replace.py,sha256=fXj7ZV7F3YtkDYrAhE9g5tHk4_3pVUyLb
|
|
114
119
|
magic_pdf/pre_proc/fix_image.py,sha256=5MOfkXc8abfIp49g-68vll40wwTUZ5tcQ2gtsJuFmvs,11486
|
115
120
|
magic_pdf/pre_proc/fix_table.py,sha256=20sqJe27fAXcL7_C0qQ9mpsggmH37WuX-wPYWyRgACA,13227
|
116
121
|
magic_pdf/pre_proc/main_text_font.py,sha256=1gkjvPuBdKC4oVFkLvnRm2zghsLtVlfAEMKXouyVonM,1048
|
117
|
-
magic_pdf/pre_proc/ocr_detect_all_bboxes.py,sha256=
|
122
|
+
magic_pdf/pre_proc/ocr_detect_all_bboxes.py,sha256=vFT3bsqQ2_GvcYeFS4J4UQCxxwgEUFk5x3TxId6m8BE,7110
|
118
123
|
magic_pdf/pre_proc/ocr_detect_layout.py,sha256=DW0_HXzmcbW22cXKIYFsyZNFh8mEjSHXIFVjXndJsvQ,5878
|
119
|
-
magic_pdf/pre_proc/ocr_dict_merge.py,sha256=
|
124
|
+
magic_pdf/pre_proc/ocr_dict_merge.py,sha256=8t81fXGHdVuJ5-AEKdyx3Q7JH3OMSNZ-GgsYWHl_VjE,13118
|
120
125
|
magic_pdf/pre_proc/ocr_span_list_modify.py,sha256=jqBheXF8EuYCfS9tn6typr-aE57nfMoeBC36J5GjpbQ,11519
|
121
126
|
magic_pdf/pre_proc/pdf_pre_filter.py,sha256=FIMwe8Lei9LI2RmkqiaSyTHV5b7ViADbpyBwgVwZH-c,2687
|
122
127
|
magic_pdf/pre_proc/post_layout_split.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -138,12 +143,12 @@ magic_pdf/rw/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
138
143
|
magic_pdf/spark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
139
144
|
magic_pdf/spark/spark_api.py,sha256=eSLXTjMYW5Ya41VMIApRVfji1ZxEZXdH9ZdsL6fy5Kw,1131
|
140
145
|
magic_pdf/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
141
|
-
magic_pdf/tools/cli.py,sha256=
|
142
|
-
magic_pdf/tools/cli_dev.py,sha256=
|
143
|
-
magic_pdf/tools/common.py,sha256=
|
144
|
-
magic_pdf-0.
|
145
|
-
magic_pdf-0.
|
146
|
-
magic_pdf-0.
|
147
|
-
magic_pdf-0.
|
148
|
-
magic_pdf-0.
|
149
|
-
magic_pdf-0.
|
146
|
+
magic_pdf/tools/cli.py,sha256=tUeJhGudJIrCDMNQDRTWtNGE_4E0TWpKTuEkf5y_2uk,2734
|
147
|
+
magic_pdf/tools/cli_dev.py,sha256=3e5eyCQEt_EujXZu5fUAWr_W-YQQVqS9pB0Qgw7t1D8,4122
|
148
|
+
magic_pdf/tools/common.py,sha256=2KsqN0rNcuyt9B3vAoF-HeeiwdJbO3iO5VE1zBZ1VCw,4859
|
149
|
+
magic_pdf-0.8.1.dist-info/LICENSE.md,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
|
150
|
+
magic_pdf-0.8.1.dist-info/METADATA,sha256=fWU35cJWoz62IWXDGIC3PsTDIz_vgV_orr1eEjvjjaQ,34142
|
151
|
+
magic_pdf-0.8.1.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
152
|
+
magic_pdf-0.8.1.dist-info/entry_points.txt,sha256=wXwYke3j8fqDQTocUspL-CqDUEv3Tfcwp09fM8dZAhA,98
|
153
|
+
magic_pdf-0.8.1.dist-info/top_level.txt,sha256=J9I0AzmHWGkp9c6DL8Oe4mEx3yYphLzkRn4H25Lg1rE,10
|
154
|
+
magic_pdf-0.8.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|