magic-pdf 0.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- magic_pdf/__init__.py +0 -0
- magic_pdf/cli/__init__.py +0 -0
- magic_pdf/cli/magicpdf.py +294 -0
- magic_pdf/dict2md/__init__.py +0 -0
- magic_pdf/dict2md/mkcontent.py +397 -0
- magic_pdf/dict2md/ocr_mkcontent.py +356 -0
- magic_pdf/filter/__init__.py +0 -0
- magic_pdf/filter/pdf_classify_by_type.py +381 -0
- magic_pdf/filter/pdf_meta_scan.py +368 -0
- magic_pdf/layout/__init__.py +0 -0
- magic_pdf/layout/bbox_sort.py +681 -0
- magic_pdf/layout/layout_det_utils.py +182 -0
- magic_pdf/layout/layout_sort.py +732 -0
- magic_pdf/layout/layout_spiler_recog.py +101 -0
- magic_pdf/layout/mcol_sort.py +336 -0
- magic_pdf/libs/Constants.py +11 -0
- magic_pdf/libs/MakeContentConfig.py +10 -0
- magic_pdf/libs/ModelBlockTypeEnum.py +9 -0
- magic_pdf/libs/__init__.py +0 -0
- magic_pdf/libs/boxbase.py +408 -0
- magic_pdf/libs/calc_span_stats.py +239 -0
- magic_pdf/libs/commons.py +204 -0
- magic_pdf/libs/config_reader.py +63 -0
- magic_pdf/libs/convert_utils.py +5 -0
- magic_pdf/libs/coordinate_transform.py +9 -0
- magic_pdf/libs/detect_language_from_model.py +21 -0
- magic_pdf/libs/draw_bbox.py +227 -0
- magic_pdf/libs/drop_reason.py +27 -0
- magic_pdf/libs/drop_tag.py +19 -0
- magic_pdf/libs/hash_utils.py +15 -0
- magic_pdf/libs/json_compressor.py +27 -0
- magic_pdf/libs/language.py +31 -0
- magic_pdf/libs/markdown_utils.py +31 -0
- magic_pdf/libs/math.py +9 -0
- magic_pdf/libs/nlp_utils.py +203 -0
- magic_pdf/libs/ocr_content_type.py +21 -0
- magic_pdf/libs/path_utils.py +23 -0
- magic_pdf/libs/pdf_image_tools.py +33 -0
- magic_pdf/libs/safe_filename.py +11 -0
- magic_pdf/libs/textbase.py +33 -0
- magic_pdf/libs/version.py +1 -0
- magic_pdf/libs/vis_utils.py +308 -0
- magic_pdf/model/__init__.py +0 -0
- magic_pdf/model/doc_analyze_by_360layout.py +8 -0
- magic_pdf/model/doc_analyze_by_pp_structurev2.py +125 -0
- magic_pdf/model/magic_model.py +632 -0
- magic_pdf/para/__init__.py +0 -0
- magic_pdf/para/block_continuation_processor.py +562 -0
- magic_pdf/para/block_termination_processor.py +480 -0
- magic_pdf/para/commons.py +222 -0
- magic_pdf/para/denoise.py +246 -0
- magic_pdf/para/draw.py +121 -0
- magic_pdf/para/exceptions.py +198 -0
- magic_pdf/para/layout_match_processor.py +40 -0
- magic_pdf/para/para_pipeline.py +297 -0
- magic_pdf/para/para_split.py +644 -0
- magic_pdf/para/para_split_v2.py +772 -0
- magic_pdf/para/raw_processor.py +207 -0
- magic_pdf/para/stats.py +268 -0
- magic_pdf/para/title_processor.py +1014 -0
- magic_pdf/pdf_parse_by_ocr.py +219 -0
- magic_pdf/pdf_parse_by_ocr_v2.py +17 -0
- magic_pdf/pdf_parse_by_txt.py +410 -0
- magic_pdf/pdf_parse_by_txt_v2.py +56 -0
- magic_pdf/pdf_parse_for_train.py +685 -0
- magic_pdf/pdf_parse_union_core.py +241 -0
- magic_pdf/pipe/AbsPipe.py +112 -0
- magic_pdf/pipe/OCRPipe.py +28 -0
- magic_pdf/pipe/TXTPipe.py +29 -0
- magic_pdf/pipe/UNIPipe.py +83 -0
- magic_pdf/pipe/__init__.py +0 -0
- magic_pdf/post_proc/__init__.py +0 -0
- magic_pdf/post_proc/detect_para.py +3472 -0
- magic_pdf/post_proc/pdf_post_filter.py +67 -0
- magic_pdf/post_proc/remove_footnote.py +153 -0
- magic_pdf/pre_proc/__init__.py +0 -0
- magic_pdf/pre_proc/citationmarker_remove.py +157 -0
- magic_pdf/pre_proc/construct_page_dict.py +72 -0
- magic_pdf/pre_proc/cut_image.py +71 -0
- magic_pdf/pre_proc/detect_equation.py +134 -0
- magic_pdf/pre_proc/detect_footer_by_model.py +64 -0
- magic_pdf/pre_proc/detect_footer_header_by_statistics.py +284 -0
- magic_pdf/pre_proc/detect_footnote.py +170 -0
- magic_pdf/pre_proc/detect_header.py +64 -0
- magic_pdf/pre_proc/detect_images.py +647 -0
- magic_pdf/pre_proc/detect_page_number.py +64 -0
- magic_pdf/pre_proc/detect_tables.py +62 -0
- magic_pdf/pre_proc/equations_replace.py +559 -0
- magic_pdf/pre_proc/fix_image.py +244 -0
- magic_pdf/pre_proc/fix_table.py +270 -0
- magic_pdf/pre_proc/main_text_font.py +23 -0
- magic_pdf/pre_proc/ocr_detect_all_bboxes.py +115 -0
- magic_pdf/pre_proc/ocr_detect_layout.py +133 -0
- magic_pdf/pre_proc/ocr_dict_merge.py +336 -0
- magic_pdf/pre_proc/ocr_span_list_modify.py +258 -0
- magic_pdf/pre_proc/pdf_pre_filter.py +74 -0
- magic_pdf/pre_proc/post_layout_split.py +0 -0
- magic_pdf/pre_proc/remove_bbox_overlap.py +98 -0
- magic_pdf/pre_proc/remove_colored_strip_bbox.py +79 -0
- magic_pdf/pre_proc/remove_footer_header.py +117 -0
- magic_pdf/pre_proc/remove_rotate_bbox.py +188 -0
- magic_pdf/pre_proc/resolve_bbox_conflict.py +191 -0
- magic_pdf/pre_proc/solve_line_alien.py +29 -0
- magic_pdf/pre_proc/statistics.py +12 -0
- magic_pdf/rw/AbsReaderWriter.py +34 -0
- magic_pdf/rw/DiskReaderWriter.py +66 -0
- magic_pdf/rw/S3ReaderWriter.py +107 -0
- magic_pdf/rw/__init__.py +0 -0
- magic_pdf/spark/__init__.py +0 -0
- magic_pdf/spark/spark_api.py +51 -0
- magic_pdf/train_utils/__init__.py +0 -0
- magic_pdf/train_utils/convert_to_train_format.py +65 -0
- magic_pdf/train_utils/extract_caption.py +59 -0
- magic_pdf/train_utils/remove_footer_header.py +159 -0
- magic_pdf/train_utils/vis_utils.py +327 -0
- magic_pdf/user_api.py +136 -0
- magic_pdf-0.5.4.dist-info/LICENSE.md +661 -0
- magic_pdf-0.5.4.dist-info/METADATA +24 -0
- magic_pdf-0.5.4.dist-info/RECORD +121 -0
- magic_pdf-0.5.4.dist-info/WHEEL +5 -0
- magic_pdf-0.5.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,24 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: magic-pdf
|
3
|
+
Version: 0.5.4
|
4
|
+
Requires-Python: >=3.9
|
5
|
+
License-File: LICENSE.md
|
6
|
+
Requires-Dist: boto3 >=1.28.43
|
7
|
+
Requires-Dist: Brotli >=1.1.0
|
8
|
+
Requires-Dist: click >=8.1.7
|
9
|
+
Requires-Dist: Distance >=0.1.3
|
10
|
+
Requires-Dist: PyMuPDF >=1.24.5
|
11
|
+
Requires-Dist: loguru >=0.6.0
|
12
|
+
Requires-Dist: matplotlib >=3.8.3
|
13
|
+
Requires-Dist: numpy >=1.21.6
|
14
|
+
Requires-Dist: pandas >=1.3.5
|
15
|
+
Requires-Dist: fast-langdetect >=0.1.1
|
16
|
+
Requires-Dist: regex >=2023.12.25
|
17
|
+
Requires-Dist: termcolor >=2.4.0
|
18
|
+
Requires-Dist: wordninja >=2.0.0
|
19
|
+
Requires-Dist: scikit-learn >=1.0.2
|
20
|
+
Requires-Dist: nltk ==3.8.1
|
21
|
+
Requires-Dist: s3pathlib >=2.1.1
|
22
|
+
Requires-Dist: paddlepaddle
|
23
|
+
Requires-Dist: paddleocr
|
24
|
+
|
@@ -0,0 +1,121 @@
|
|
1
|
+
magic_pdf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
magic_pdf/pdf_parse_by_ocr.py,sha256=jYEfc4P6o4f7mZqfPFITs3NWlBxe4v1gwLh1yu2VDIQ,8950
|
3
|
+
magic_pdf/pdf_parse_by_ocr_v2.py,sha256=1XVRwmcGRtxMMRYgqkJcctD4tLjCprgpGQsHpeVQktM,637
|
4
|
+
magic_pdf/pdf_parse_by_txt.py,sha256=5_kdfvDkv_XwDove2AW7SopGysYLJ1-tsOQy2yuII1Y,21932
|
5
|
+
magic_pdf/pdf_parse_by_txt_v2.py,sha256=mGadyYamoCNGNsKOQM1uXQR65zMUKyL24yURGHADmVs,1908
|
6
|
+
magic_pdf/pdf_parse_for_train.py,sha256=Oby61DMjJ716Jj_ri7lwXfv2Chus0pbBR2RPXrmBW08,28661
|
7
|
+
magic_pdf/pdf_parse_union_core.py,sha256=a67iQuEfuslAEF-wQplGZKXUuz5mT3HiCyvuR52E6Gw,10584
|
8
|
+
magic_pdf/user_api.py,sha256=qaPK7A_VG10tPjHgmURDVOmVkeilCVnRqBbL27LaF7Q,4694
|
9
|
+
magic_pdf/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
|
+
magic_pdf/cli/magicpdf.py,sha256=rTGrXc0hWv-nCqE--QRm1jwiGHaxnxUKf9jlAYWyXoQ,10885
|
11
|
+
magic_pdf/dict2md/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
|
+
magic_pdf/dict2md/mkcontent.py,sha256=rWUY-2opd0jeowEUEVOV_uWcKum1Q7ng4nOoT6-ka_s,17459
|
13
|
+
magic_pdf/dict2md/ocr_mkcontent.py,sha256=Y0nFbCX7zSVLq-vQqJvR8azumd0003ixrk5wy0vIJxU,15068
|
14
|
+
magic_pdf/filter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
|
+
magic_pdf/filter/pdf_classify_by_type.py,sha256=c11aGI-I5zhLdZQnRLeTVPcmoK14ahr8aP7hTHl8_kM,41990
|
16
|
+
magic_pdf/filter/pdf_meta_scan.py,sha256=KLih7jfVqABhdeZ9tAu9-WZm0W0wX-PKCws4mFBGtYk,17001
|
17
|
+
magic_pdf/layout/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
18
|
+
magic_pdf/layout/bbox_sort.py,sha256=PzzaBf6MC_AZ-ZWGU0Kg-KIsw874l_gML73mM3hE4Ps,30807
|
19
|
+
magic_pdf/layout/layout_det_utils.py,sha256=NCYBTvsrULE3Cue53aMD1MfXTmOL9Xy0nivl6ku2cls,9137
|
20
|
+
magic_pdf/layout/layout_sort.py,sha256=ovqRX1xcRA7E7s8VvsI7ZNbaNSElJe07bApCh5hxwIE,33533
|
21
|
+
magic_pdf/layout/layout_spiler_recog.py,sha256=QjBSgB-a7J2yjUR1eaCs9ZD7URtiRnV6W934hpAeuC4,3067
|
22
|
+
magic_pdf/layout/mcol_sort.py,sha256=ADnLisBJBHXDKYChcf2lzTb_TC_vZ4q89_CSN8mwEJc,11331
|
23
|
+
magic_pdf/libs/Constants.py,sha256=AwQw5aK7JkWjerEyq5vxxMTHH1Gvku8K9NS8xjHKimI,189
|
24
|
+
magic_pdf/libs/MakeContentConfig.py,sha256=UDZPpsv8q4DqTy8h0vRtrT2kHqWiVI205VnVhlUEQc0,206
|
25
|
+
magic_pdf/libs/ModelBlockTypeEnum.py,sha256=kalXPbo5ya6hKhhBHPGlHl1yjWOURoXZWQM3rVUyPsY,164
|
26
|
+
magic_pdf/libs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
27
|
+
magic_pdf/libs/boxbase.py,sha256=MvD0DypR4sTEF3T2RrI_yJ8mPDUBYHAqAaau2mnBSxY,15343
|
28
|
+
magic_pdf/libs/calc_span_stats.py,sha256=5vnU27DcbkFDRSAoLqAmX0KQ3I9ehWkEgh_t9hxg_zI,10147
|
29
|
+
magic_pdf/libs/commons.py,sha256=6Zu9-OyamyCNDY7qj0SxR-rux-ggj9im3CVPtC4ubB8,7108
|
30
|
+
magic_pdf/libs/config_reader.py,sha256=ADe9DknbSBb3-JQlQJix-fkVDPIQCkytl4mKdXnIraA,1607
|
31
|
+
magic_pdf/libs/convert_utils.py,sha256=Ov-lsfCLBPz_15iSJXIslBNmrSf_E_1g_XDWJy8NgO8,143
|
32
|
+
magic_pdf/libs/coordinate_transform.py,sha256=Bbop2cP2uz2ZG0U0gwd7J6EKkgABq5Rv03qf2LMPw80,429
|
33
|
+
magic_pdf/libs/detect_language_from_model.py,sha256=Uln8F9qs8EJOw4EgI7KRlaU3lD_mK8KMTlADLFtz8fk,816
|
34
|
+
magic_pdf/libs/draw_bbox.py,sha256=90FDAYN3dxgN07_xRzdUgnDAyEswpl9VCXaDo_SMZkA,9449
|
35
|
+
magic_pdf/libs/drop_reason.py,sha256=IfjPSrPLMmVziqjOXPep7r_ioQKFRahDgbOW1SD-Tuw,2148
|
36
|
+
magic_pdf/libs/drop_tag.py,sha256=bZDg3bIVWvBT1Ec1icwj5WLOkt5-hI6eRYZ2tX9_a74,673
|
37
|
+
magic_pdf/libs/hash_utils.py,sha256=VEKK9WfFoZgrPfi8kfITjLpr8Ahufs8tXh9R1Y5lAL8,404
|
38
|
+
magic_pdf/libs/json_compressor.py,sha256=6-KCu0lb5ksmyqWtQGb4QqmP-FjRb5dP7P-Hevcn68g,875
|
39
|
+
magic_pdf/libs/language.py,sha256=klymhpJyFSc9ukUPwIQmCx1DwGMuXueosaFjmMzETQw,812
|
40
|
+
magic_pdf/libs/markdown_utils.py,sha256=cLxLXjRhrNp_wCHvtglrGA_FVdrvfd1KULeTtj1p18w,944
|
41
|
+
magic_pdf/libs/math.py,sha256=tqljQOgqh3fZc146HYhO88JXJaiXMVwArBkk_CSGICc,177
|
42
|
+
magic_pdf/libs/nlp_utils.py,sha256=-X9W3-Ns5ZdDYFvyyEq6i6P2b5hCATaFEZeOjwNOH9M,6901
|
43
|
+
magic_pdf/libs/ocr_content_type.py,sha256=DiGTYppd6WlibwCAeVpIy3NHCQkglfIAQsJ_ffu5BPw,526
|
44
|
+
magic_pdf/libs/path_utils.py,sha256=YYh8a0K8KiUhFDd_S1oLohL8n8fcSos4iMj74YEJ57s,538
|
45
|
+
magic_pdf/libs/pdf_image_tools.py,sha256=CAd01giTKr_UJz1_QtDOARG9G9z69GFpzRZwcWSfLtE,1282
|
46
|
+
magic_pdf/libs/safe_filename.py,sha256=ckwcM_eqoysTb5id8czp-tXq2G9da0-l3pshZDCHQtE,236
|
47
|
+
magic_pdf/libs/textbase.py,sha256=SC1Frhz3Fb7V7n2SFRBsl7Bmg0JZdlvZskq0lfW1vIk,732
|
48
|
+
magic_pdf/libs/version.py,sha256=DITpct-LrdIsTgwx2NgH5Ghx5y8Xgz1YMimy1ZV5RTY,22
|
49
|
+
magic_pdf/libs/vis_utils.py,sha256=hTOTEakKV0pGMbk0tbRkVI_tku7A3dGc96ynObZ4kwI,10207
|
50
|
+
magic_pdf/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
51
|
+
magic_pdf/model/doc_analyze_by_360layout.py,sha256=GbchKPJRVcrxvwNXMnR4vt8lOLPauTWMl-43ayyhX7U,221
|
52
|
+
magic_pdf/model/doc_analyze_by_pp_structurev2.py,sha256=ry2sLGt10ShgvHZvhpf_QA0QGG9kXRdoAsYmxLNcPWE,4082
|
53
|
+
magic_pdf/model/magic_model.py,sha256=2H6Gz1mg0f0YCvz-TLIWrAWXCQLgZftBXJNRPlSIjwc,25077
|
54
|
+
magic_pdf/para/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
55
|
+
magic_pdf/para/block_continuation_processor.py,sha256=IkReB5hirjm0OAirNzQQpnWe2f2bdP3Hug3Ef8qTRDA,22749
|
56
|
+
magic_pdf/para/block_termination_processor.py,sha256=YU3ZYqJy9e3OQmOuQYZrR6AUpmAlQ0mhj0PgZZPZ_fM,17957
|
57
|
+
magic_pdf/para/commons.py,sha256=VdJ8SY9qJTtcRyx8HH-PFeZSJwL4Tsf50197RD_-dwc,5414
|
58
|
+
magic_pdf/para/denoise.py,sha256=J7dM2KNnbdzAd2A3agB04U6L1GL9RrhAs-MLrq-_Ftg,10443
|
59
|
+
magic_pdf/para/draw.py,sha256=KyWc03do_WuBKQ028HYzepYwbIkel9ID0uqRhuPVOHc,5643
|
60
|
+
magic_pdf/para/exceptions.py,sha256=kpjGxrSZ-drNmoKlmuQ0asTjI8cKKKWsdDDBoDHQP9M,4978
|
61
|
+
magic_pdf/para/layout_match_processor.py,sha256=yr4FEO7GJ502udShqGRqIJQ_FQxoa0aG_mhmWd8nLwI,1554
|
62
|
+
magic_pdf/para/para_pipeline.py,sha256=zLaCHI9jLi1UPzh0lHP44mUjpKVTHS0gE_5YrkjVqEY,11796
|
63
|
+
magic_pdf/para/para_split.py,sha256=-UJM2jREW_2h3ZlJAU7dRD8bK3CMGKuhJrfgqv3Auvk,31310
|
64
|
+
magic_pdf/para/para_split_v2.py,sha256=a04dsUFE3JD4DA9e2DULJgbKrcqWuCfK58de1p-T3Io,36610
|
65
|
+
magic_pdf/para/raw_processor.py,sha256=mHxD9FrdOSXH7NqM41s55URyCyuyACvm9kKtowkIb3k,6317
|
66
|
+
magic_pdf/para/stats.py,sha256=-6Pf9Y8jkP1uJOYWiHUjw9Lb-Fb9GY7MHr_ok7x2GX0,9731
|
67
|
+
magic_pdf/para/title_processor.py,sha256=pYZv9vEkIjAtCz8jIUtl9AVUy_ib5SdAZmMVoZtsMRI,38593
|
68
|
+
magic_pdf/pipe/AbsPipe.py,sha256=jUngTfYeVeltp03QwTcZvmBYghTgA5Gd7SdZSsFUr0o,3932
|
69
|
+
magic_pdf/pipe/OCRPipe.py,sha256=av5-mJ-tYSdV13v1DbVruf1rUZ72YeAxTkN-x52mSfo,1198
|
70
|
+
magic_pdf/pipe/TXTPipe.py,sha256=-RSHV-CJfod4bvOghd3XsaQ1iMiSPkrW5R5j4VSTJW4,1257
|
71
|
+
magic_pdf/pipe/UNIPipe.py,sha256=FRnB2L1swqhsT8gGjeV4NCx9_ACtcRKGeUQcTQp_uRM,3509
|
72
|
+
magic_pdf/pipe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
73
|
+
magic_pdf/post_proc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
74
|
+
magic_pdf/post_proc/detect_para.py,sha256=5LX86ueHQGOV9CNimAxqZH4R3KTi78leum1de_Na0pw,126181
|
75
|
+
magic_pdf/post_proc/pdf_post_filter.py,sha256=FeZceyjGG_UvBrBoa51Ohge5edQzCoJtZTaocidKCHg,2530
|
76
|
+
magic_pdf/post_proc/remove_footnote.py,sha256=701P7xRu6gzLaEHfb2xkYpLZI4CwK2FAo7Ggho4bOTI,7596
|
77
|
+
magic_pdf/pre_proc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
78
|
+
magic_pdf/pre_proc/citationmarker_remove.py,sha256=81iFoNLhZ2vOZi1g2zBH0S2UEfyZdhBY43qKKdxr6mo,6521
|
79
|
+
magic_pdf/pre_proc/construct_page_dict.py,sha256=lp3zBmInlWYYIcGC1-NSqT9s44AjDvlnWxDPeZoBVSY,3043
|
80
|
+
magic_pdf/pre_proc/cut_image.py,sha256=bbeELTg2-SFyHkVEnGAL_7S6k8hyy1xtDSoFmXDQDOA,2768
|
81
|
+
magic_pdf/pre_proc/detect_equation.py,sha256=9omDHKTI8QO9Qd46eVFHWhZeMmTNx7XDuWRgjXI-KFA,6627
|
82
|
+
magic_pdf/pre_proc/detect_footer_by_model.py,sha256=_EghAM_zWBcqVY8XBkbSoprKqKUa0mlN1U8YNWxNNLI,2848
|
83
|
+
magic_pdf/pre_proc/detect_footer_header_by_statistics.py,sha256=924soXZ51QVpitPgVgnwbC7BqOZI30j5hGW5zP86y-w,11250
|
84
|
+
magic_pdf/pre_proc/detect_footnote.py,sha256=UxFuTCRwXdAv3wKCgRQJJVt12hM9O9oPTwzPAChQXoM,8309
|
85
|
+
magic_pdf/pre_proc/detect_header.py,sha256=KOmRehgKMuMqNa_2weXkdNSiRVWMFgLMQE4e1itbY7g,2848
|
86
|
+
magic_pdf/pre_proc/detect_images.py,sha256=8DwGGTb5IjxqADZDTc_ngwJrTYXxK2qpRqI2FBoPr00,30432
|
87
|
+
magic_pdf/pre_proc/detect_page_number.py,sha256=qvYrBbCtBbREvw-MySL_p7byCRvcm1fkLJ5ZB4TP8OM,2848
|
88
|
+
magic_pdf/pre_proc/detect_tables.py,sha256=srJzgLVeVuOsqnESqfdJfVukTF84K8qmI5mgFX_BZGs,2800
|
89
|
+
magic_pdf/pre_proc/equations_replace.py,sha256=rpEHD5IvavRgUSus0tLjKcIv2faStp_Ii9vf09HVUtI,20384
|
90
|
+
magic_pdf/pre_proc/fix_image.py,sha256=5MOfkXc8abfIp49g-68vll40wwTUZ5tcQ2gtsJuFmvs,11486
|
91
|
+
magic_pdf/pre_proc/fix_table.py,sha256=20sqJe27fAXcL7_C0qQ9mpsggmH37WuX-wPYWyRgACA,13227
|
92
|
+
magic_pdf/pre_proc/main_text_font.py,sha256=1gkjvPuBdKC4oVFkLvnRm2zghsLtVlfAEMKXouyVonM,1048
|
93
|
+
magic_pdf/pre_proc/ocr_detect_all_bboxes.py,sha256=K5VHG_hNzqR6va0TB24SONMS7NGYBrbY4pxgEijLNlk,5305
|
94
|
+
magic_pdf/pre_proc/ocr_detect_layout.py,sha256=DW0_HXzmcbW22cXKIYFsyZNFh8mEjSHXIFVjXndJsvQ,5878
|
95
|
+
magic_pdf/pre_proc/ocr_dict_merge.py,sha256=lz1viJJkjZAklKZYmkWP39YBnv5BoWVr3fv6mmNr33E,12620
|
96
|
+
magic_pdf/pre_proc/ocr_span_list_modify.py,sha256=nIkbEcV0EBBQbwzvfnjDPseHxVzYxFRQmtel3GnXdPo,11290
|
97
|
+
magic_pdf/pre_proc/pdf_pre_filter.py,sha256=FIMwe8Lei9LI2RmkqiaSyTHV5b7ViADbpyBwgVwZH-c,2687
|
98
|
+
magic_pdf/pre_proc/post_layout_split.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
99
|
+
magic_pdf/pre_proc/remove_bbox_overlap.py,sha256=u_ObNLkZ8pPDNBUkSMpA9ffiSpfz42B4807cdBPZmLU,3085
|
100
|
+
magic_pdf/pre_proc/remove_colored_strip_bbox.py,sha256=QbFCsiFFracBNC_kLlZgwvRQ-OQ8saexgbYABlhjDQE,3633
|
101
|
+
magic_pdf/pre_proc/remove_footer_header.py,sha256=nUC28KXkaIPQZL2g94omcnfeyB4s3MBqo_-8KKvcZxQ,5691
|
102
|
+
magic_pdf/pre_proc/remove_rotate_bbox.py,sha256=0FlBXeiEwjZAGAWo-DiMptclFOj04POuH_dovSA4HUI,7772
|
103
|
+
magic_pdf/pre_proc/resolve_bbox_conflict.py,sha256=bJiegofPUeDyi--oZjfipQ5Q5RLm6TOCW0TLXbPii_Q,7307
|
104
|
+
magic_pdf/pre_proc/solve_line_alien.py,sha256=aNoQptPcC38Sm1I2ABhgw8jeH_5kjsRHx3VYlFFtm1g,853
|
105
|
+
magic_pdf/pre_proc/statistics.py,sha256=_9jGlXq0iXd03UMxB92ZqCiu7cjNkG5vHvFlTF_9ytA,220
|
106
|
+
magic_pdf/rw/AbsReaderWriter.py,sha256=1Hd6Xo2g12CaRAo5Sze-R_GSQA6GQ0rQwSmgQvw4V_c,1297
|
107
|
+
magic_pdf/rw/DiskReaderWriter.py,sha256=0tt8lbRyqrOfFgGlhjt24YMdj2xN7QUIVysfhFIxPgo,2113
|
108
|
+
magic_pdf/rw/S3ReaderWriter.py,sha256=O7Quf3CUqXBjMz4sIE7kNVI3TIQROeg5PuXneAacieY,4474
|
109
|
+
magic_pdf/rw/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
110
|
+
magic_pdf/spark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
111
|
+
magic_pdf/spark/spark_api.py,sha256=eSLXTjMYW5Ya41VMIApRVfji1ZxEZXdH9ZdsL6fy5Kw,1131
|
112
|
+
magic_pdf/train_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
113
|
+
magic_pdf/train_utils/convert_to_train_format.py,sha256=ifo2FAoBMa_etCvz0O4v03xO6mHV8IPXYxHH7-OcHfk,2443
|
114
|
+
magic_pdf/train_utils/extract_caption.py,sha256=gommEqIEWLplSDEJWD7_66daqlOBsWhpRBW1DHpkny4,1825
|
115
|
+
magic_pdf/train_utils/remove_footer_header.py,sha256=pyeNNdJ-th3wl5Xwb10ZLYNaFN4-6BmahoMFE8VTNNs,5978
|
116
|
+
magic_pdf/train_utils/vis_utils.py,sha256=MV9N9cT3ifJ35u7LFKGF9I_bOIQrtU1zcsxu2hj3aqM,10111
|
117
|
+
magic_pdf-0.5.4.dist-info/LICENSE.md,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
|
118
|
+
magic_pdf-0.5.4.dist-info/METADATA,sha256=hIojO9tsfaKc8J5UhdZR8dGWsg19vSFTEKj7bM-V80M,669
|
119
|
+
magic_pdf-0.5.4.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
120
|
+
magic_pdf-0.5.4.dist-info/top_level.txt,sha256=J9I0AzmHWGkp9c6DL8Oe4mEx3yYphLzkRn4H25Lg1rE,10
|
121
|
+
magic_pdf-0.5.4.dist-info/RECORD,,
|
@@ -0,0 +1 @@
|
|
1
|
+
magic_pdf
|