magic-pdf 1.3.11__py3-none-any.whl → 1.3.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,17 @@
1
1
  lang:
2
2
  ch_lite:
3
+ det: ch_PP-OCRv3_det_infer.pth
4
+ rec: ch_PP-OCRv5_rec_infer.pth
5
+ dict: ppocrv5_dict.txt
6
+ ch_lite_v4:
3
7
  det: ch_PP-OCRv3_det_infer.pth
4
8
  rec: ch_PP-OCRv4_rec_infer.pth
5
9
  dict: ppocr_keys_v1.txt
6
10
  ch_server:
11
+ det: ch_PP-OCRv3_det_infer.pth
12
+ rec: ch_PP-OCRv5_rec_server_infer.pth
13
+ dict: ppocrv5_dict.txt
14
+ ch_server_v4:
7
15
  det: ch_PP-OCRv3_det_infer.pth
8
16
  rec: ch_PP-OCRv4_rec_server_infer.pth
9
17
  dict: ppocr_keys_v1.txt
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: magic-pdf
3
- Version: 1.3.11
3
+ Version: 1.3.12
4
4
  Summary: A practical tool for converting PDF to Markdown
5
5
  License: AGPL-3.0
6
6
  Project-URL: Home, https://mineru.net/
@@ -107,6 +107,20 @@ Easier to use: Just grab MinerU Desktop. No coding, no login, just a simple inte
107
107
  </div>
108
108
 
109
109
  # Changelog
110
+ - 2025/05/24 1.3.12 Released
111
+ - Added support for ppocrv5 model, updated `ch_server` model to `PP-OCRv5_rec_server` and `ch_lite` model to `PP-OCRv5_rec_mobile` (model update required)
112
+ - In testing, we found that ppocrv5(server) shows some improvement for handwritten documents, but slightly lower accuracy than v4_server_doc for other document types. Therefore, the default ch model remains unchanged as `PP-OCRv4_server_rec_doc`.
113
+ - Since ppocrv5 enhances recognition capabilities for handwritten text and special characters, you can manually select ppocrv5 models for Japanese, traditional Chinese mixed scenarios and handwritten document scenarios
114
+ - You can select the appropriate model through the lang parameter `lang='ch_server'` (python api) or `--lang ch_server` (command line):
115
+ - `ch`: `PP-OCRv4_rec_server_doc` (default) (Chinese, English, Japanese, Traditional Chinese mixed/15k dictionary)
116
+ - `ch_server`: `PP-OCRv5_rec_server` (Chinese, English, Japanese, Traditional Chinese mixed + handwriting/18k dictionary)
117
+ - `ch_lite`: `PP-OCRv5_rec_mobile` (Chinese, English, Japanese, Traditional Chinese mixed + handwriting/18k dictionary)
118
+ - `ch_server_v4`: `PP-OCRv4_rec_server` (Chinese, English mixed/6k dictionary)
119
+ - `ch_lite_v4`: `PP-OCRv4_rec_mobile` (Chinese, English mixed/6k dictionary)
120
+ - Added support for handwritten documents by optimizing layout recognition of handwritten text areas
121
+ - This feature is supported by default, no additional configuration needed
122
+ - You can refer to the instructions above to manually select ppocrv5 model for better handwritten document parsing
123
+ - The demos on `huggingface` and `modelscope` have been updated to support handwriting recognition and ppocrv5 models, which you can experience online
110
124
  - 2025/04/29 1.3.10 Released
111
125
  - Support for custom formula delimiters can be achieved by modifying the `latex-delimiter-config` item in the `magic-pdf.json` file under the user directory.
112
126
  - 2025/04/27 1.3.9 Released
@@ -14,7 +14,7 @@ magic_pdf/data/batch_build_dataset.py,sha256=KQoWFJDqCwRQug8-fTuciSwff58AYRjCNP6
14
14
  magic_pdf/data/dataset.py,sha256=2v-a7kA6dRUDQpjlAVE5We1tMATR-MYKzQCcBhNci5g,12258
15
15
  magic_pdf/data/read_api.py,sha256=qcG0T4c4ME5CkSRDjX2Wa2JQ_HW4GqzXAixI3_DZG_U,5234
16
16
  magic_pdf/data/schemas.py,sha256=oIUTBzK8Wq8Wuy8A_uilWAbVhucRvOs9_f3lSKYgcmQ,664
17
- magic_pdf/data/utils.py,sha256=dNWIJECPXaakKocI4z5Tq6vhDDSnR-bVWQV7DO2w_A8,5335
17
+ magic_pdf/data/utils.py,sha256=SA91UzbB6kS_24YVRtZ6HRpKPeCc6Ea4EaQNUuwQio8,5339
18
18
  magic_pdf/data/data_reader_writer/__init__.py,sha256=QtevUaeSivv9dQKi3Tomfn4Z0E4To0cB8qXTnglxaHc,705
19
19
  magic_pdf/data/data_reader_writer/base.py,sha256=nqmAcdHOXMOJO6RAT3ILligDFaw8Op0STyCw5yOzAbI,1706
20
20
  magic_pdf/data/data_reader_writer/filebase.py,sha256=VbNAxLyo0Io0j7iprJERt_TqxzHAtA7cUyPIaJstToU,2146
@@ -25,7 +25,7 @@ magic_pdf/data/io/base.py,sha256=SqNQqe30ZvoVvg7GVv-hLMCjN6yBgDyQQWeLgGsTfhQ,111
25
25
  magic_pdf/data/io/http.py,sha256=XlKB0DNf4a_uUnfgcclvaaOtmE7lmddx0DnK8A-emAM,958
26
26
  magic_pdf/data/io/s3.py,sha256=hyA7sbNriQy64xd_uyJ7acN_oneQ1Pdmoc7_xcvkue8,3606
27
27
  magic_pdf/dict2md/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
- magic_pdf/dict2md/ocr_mkcontent.py,sha256=jRGoSNeR3XBgzGhKdQ25CmsdW0pi7NA-5NY3TB2pja0,14421
28
+ magic_pdf/dict2md/ocr_mkcontent.py,sha256=cb9yd-PBEnc-rpyhvcNlmFJyTlhUNDnwY4LMMDz8mSM,15369
29
29
  magic_pdf/filter/__init__.py,sha256=_7lSez_myu4b6cdzPpQ-NfREuqeBSq_QdyBPKVLyq2U,1505
30
30
  magic_pdf/filter/pdf_classify_by_type.py,sha256=YNYXamxYgEiSujwilCNHOtrwpgJGDiQ597qJfardDVc,42354
31
31
  magic_pdf/filter/pdf_meta_scan.py,sha256=eOuM0-JgaXvHolSgepGoNDJDmv_uITWLQpH_0MfnVQw,17478
@@ -52,17 +52,17 @@ magic_pdf/libs/pdf_check.py,sha256=7GWWvDR6g_rj_fE6XJlbTq5AFVX11ngRIzT0N18F214,3
52
52
  magic_pdf/libs/pdf_image_tools.py,sha256=_au7plmKKctpPKozBumSKgP8689q4vH1mU8VMLO0IbM,2260
53
53
  magic_pdf/libs/performance_stats.py,sha256=DW-c6nUTUnWKGTONRKfpucsYZm1ake016F9K7jJwbik,2136
54
54
  magic_pdf/libs/safe_filename.py,sha256=ckwcM_eqoysTb5id8czp-tXq2G9da0-l3pshZDCHQtE,236
55
- magic_pdf/libs/version.py,sha256=cYqar-M7VsRkfFRByjCNAq9dYm1GavgcND7ugDzle7Q,23
55
+ magic_pdf/libs/version.py,sha256=2Rgd7p7gALBiXufv4uSeaC9anR_muEfi4m5NMAgfMHI,23
56
56
  magic_pdf/model/__init__.py,sha256=sa-dO2k-TLy25I2gRrzjm_cQeYfzMf-pLwBJHkIxGo0,51
57
- magic_pdf/model/batch_analyze.py,sha256=F0WsjbQ6z9txdiUiVy6n6zhyJWJ-4moljNx8fe8HFws,10977
58
- magic_pdf/model/doc_analyze_by_custom_model.py,sha256=R-d3_ZLZtlAztA23yaDvC0YC2-heXxrBBEyGlX7or7c,10547
57
+ magic_pdf/model/batch_analyze.py,sha256=Lt2wcnxjfwEtdzZ5ALkoHhchxFQXgLykdngBkkgDzoE,11666
58
+ magic_pdf/model/doc_analyze_by_custom_model.py,sha256=Vpa3C78ZKi2o6ViXjNiDfEzaHM6l5dYDDrexJYFmWKM,10547
59
59
  magic_pdf/model/magic_model.py,sha256=yZKWo_wRck_-YLyFGRiUHGar8sV1Y6458BFLbyBAt74,30682
60
60
  magic_pdf/model/model_list.py,sha256=aqfEJlEfbib3D3ISrxc0Coh6SbffYh8Yq2FlQN35_zA,213
61
61
  magic_pdf/model/pdf_extract_kit.py,sha256=C3sKqRkoD20Ldmo-cqGn1zRldEL-l5NYqcFvd05_fGU,10845
62
62
  magic_pdf/model/pp_structure_v2.py,sha256=NcqFWL4nUtjl82MFak8HX_8V3i4Aw_fK4dATrIp5uGs,3840
63
63
  magic_pdf/model/sub_modules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
64
64
  magic_pdf/model/sub_modules/model_init.py,sha256=e2t95kxiuU47luOHByDokbQ2uob6oQhEA4b6UGVfMjY,8303
65
- magic_pdf/model/sub_modules/model_utils.py,sha256=Md5yOki9uqW31sWIi7AKRwAJNKnCJBVSfQx6LXRKngs,12166
65
+ magic_pdf/model/sub_modules/model_utils.py,sha256=uZnI93ZNnTBPb8ajCDGvPlJY3m0O54NKO5e8ySfIbCc,12219
66
66
  magic_pdf/model/sub_modules/language_detection/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
67
67
  magic_pdf/model/sub_modules/language_detection/utils.py,sha256=Q__v6DdNJztt8GhVSuSB0txahVq-aj8RLhWn2VScx4w,3047
68
68
  magic_pdf/model/sub_modules/language_detection/yolov11/YOLOv11.py,sha256=7T8eFl8zlZe6F0j0jSB3jrwOapDft320JQ1fuWxpvAY,5230
@@ -119,12 +119,13 @@ magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/__init__.p
119
119
  magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/common.py,sha256=3r2jTvPYQS4IgTvIqR4l6bBVwR7jn-87rSmpv3tlqxI,2294
120
120
  magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/__init__.py,sha256=DCA9FS4mE5oCHDlBhUrkYLdxFeQIbhPj4P8oJ_gRZD8,832
121
121
  magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/base_model.py,sha256=RhV2Dm-os08kCFylT57zRu72Hq_RJdFy3xQe1MPaCuU,3588
122
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/__init__.py,sha256=TOLvLNeJhnAOus0D5jAq2TM67vLrtbzYlThTOhc-idc,1960
122
+ magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/__init__.py,sha256=Y2U-bQ7xVSh6MMTGzc7rb0kbMq_QniZiP7_CYIvONXk,2035
123
123
  magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/det_mobilenet_v3.py,sha256=r0gWnA1Xmt0Zw4FQLx7kf-WWwZd_26PfNzhM05drcuE,8334
124
124
  magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_hgnet.py,sha256=UsIbzqN_koyGoSh1TA9r27SggpHbeKS3HmmS-A2Aw04,8341
125
125
  magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_lcnetv3.py,sha256=3PNQG9B1cHe1hAg0NhcFR8p87rZnmH0jTBcfid-ZnKQ,15995
126
126
  magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mobilenet_v3.py,sha256=mJmE6xGpjHZH2Vaw16LlIlqRFFm9R9yRsSJEa3Yn3nw,4822
127
127
  magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mv1_enhance.py,sha256=K4p9KFYNmltV3y3QsxHIASNxoqlGtxgAoCxeFofyCmw,6726
128
+ magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_pphgnetv2.py,sha256=RIyHwb_jzDmwn6zGTD4LoK4n9HYxBvvb-QlUMnDk3fk,27525
128
129
  magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_svtrnet.py,sha256=AIaUZ3IWBkRz2pWmanBjS0QdJcYnimMSV4MWofNpQcg,20222
129
130
  magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/__init__.py,sha256=7vl3hyn6Ug_DNtqdfUL1Hz9scA_ptch2FCDzNddpQgU,1282
130
131
  magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/cls_head.py,sha256=puIy5GlUtAKer6eS4HWKu07PzRd-HlDAqIz5WqjBHaA,596
@@ -134,14 +135,14 @@ magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_
134
135
  magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/__init__.py,sha256=634L1y-QWv5P8opNiSmKvQEx3Uskc20RG8DYiCdbl8U,1030
135
136
  magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/db_fpn.py,sha256=TLF2pSyvRC0oPzL0eVyNlg3W6Zvfr4J8fD1nziVB7uI,14146
136
137
  magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/intracl.py,sha256=w2QdwdI9BpiW92VS4mqL31sVERIbY53TfbD5Q6okiaY,3410
137
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/rnn.py,sha256=olZwnKQexVlMx0gJi0FVYAm38TxNn5BM6F-OrdHKEgk,7019
138
+ magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/rnn.py,sha256=F0Iw-3IHuh4jcWy2b1Tyb8IRinDkiAvTSdtnWDfWzUc,7488
138
139
  magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/__init__.py,sha256=iC1Ol6CTxRWZBUyQ_5IVMR6kIurv9WJPOWWo7NAuZBA,1183
139
140
  magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/cls_postprocess.py,sha256=1VVWXT_b1vhGb7PGvqyfUQ3Ip7LupH62vPva98GtjTA,685
140
141
  magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/db_postprocess.py,sha256=EAc-cmhXtxLfFA6h5C871hIcDfXsjPDISiVCKwxh-qM,6339
141
142
  magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/rec_postprocess.py,sha256=HiHNr4bhW5U1j4pYoyi8fPOaFsn8TUc4nSB6q8chfV4,26899
142
143
  magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
143
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/arch_config.yaml,sha256=UzG7gH1unPJWwICJWdz1VNUXmetSqrSb_iR5o59Lo9A,7508
144
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/models_config.yml,sha256=_AjgxSML_Tj2VkARYFIrhZzrz36PZX9koGRF-xZC2ho,1629
144
+ magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/arch_config.yaml,sha256=-KTBQ8MEI_mCTAHVuMoNmsXV7DmkkYcjriLwDwrPi3Y,8817
145
+ magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/models_config.yml,sha256=RoWRKzU9nvwYnVAHae9HcDR-t0OyrS6NLZ2hgZcolrY,1860
145
146
  magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/arabic_dict.txt,sha256=xbaXD14RWk0Vpc7fAHpephuszp1j-Qi3IWC4VrFKu70,407
146
147
  magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/chinese_cht_dict.txt,sha256=gyVR_uHy-8l1CHctgevcjboSwA3pejXHHJ3fQ92sGoM,33443
147
148
  magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/cyrillic_dict.txt,sha256=NpqCxsjEeXhKXXJkSLg7Hq-1_vCkEppeqjkpYl3c0TI,410
@@ -153,6 +154,7 @@ magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dic
153
154
  magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/latin_dict.txt,sha256=jm1ONil4jDXDH35TAofWFHtUm7eiZb1nCLsoETRCniw,468
154
155
  magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocr_keys_v1.txt,sha256=KLI2KtSrLcOHaapy_rU146nds_0qdYWgWSDmOTsdx_c,26249
155
156
  magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocrv4_doc_dict.txt,sha256=pbw4h8Q8kB5aP5exP_rfHFdU7efMjJ9aviLodafEg3I,62346
157
+ magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocrv5_dict.txt,sha256=0Zeen3lMRkwNLgtwp_4U3ZeOncZEwOcfFBWM34NCrxs,74012
156
158
  magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ta_dict.txt,sha256=6T5pSBSv2f8ekYtvS7Qmf7TGWpNE7l10ZPkTW5DAonA,352
157
159
  magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/te_dict.txt,sha256=7plGpg13AZd0dOiYg2lKTKIOqjhoojM0v3lA3NAI8Pk,429
158
160
  magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/__init__.py,sha256=xEqR65Z8YOzOLorLjK0LCHos2zX-tCuxSrxndjU00hE,49
@@ -196,9 +198,9 @@ magic_pdf/tools/common.py,sha256=-x0RSFr7SNbdYq7DntaLYmQmaxyF-xKSf4xMpSUTzA0,126
196
198
  magic_pdf/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
197
199
  magic_pdf/utils/annotations.py,sha256=82ou3uELNbQWa9hOFFkVt0gsIskAKf5msCv5J2IJ5V0,211
198
200
  magic_pdf/utils/office_to_pdf.py,sha256=bFRYe6v3-pfx5R8-bV8cmf12jPnOrYZsleKoECTXzbM,3958
199
- magic_pdf-1.3.11.dist-info/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
200
- magic_pdf-1.3.11.dist-info/METADATA,sha256=MFffMcv9-ZlVG8jBKDtTPofJC_F-x_OPU4aLwxd-aUs,47765
201
- magic_pdf-1.3.11.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
202
- magic_pdf-1.3.11.dist-info/entry_points.txt,sha256=wXwYke3j8fqDQTocUspL-CqDUEv3Tfcwp09fM8dZAhA,98
203
- magic_pdf-1.3.11.dist-info/top_level.txt,sha256=J9I0AzmHWGkp9c6DL8Oe4mEx3yYphLzkRn4H25Lg1rE,10
204
- magic_pdf-1.3.11.dist-info/RECORD,,
201
+ magic_pdf-1.3.12.dist-info/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
202
+ magic_pdf-1.3.12.dist-info/METADATA,sha256=9UO2LqbOUz-SEObMHTrJaatZElHw4jrD_c4aQFC6q9I,49562
203
+ magic_pdf-1.3.12.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
204
+ magic_pdf-1.3.12.dist-info/entry_points.txt,sha256=wXwYke3j8fqDQTocUspL-CqDUEv3Tfcwp09fM8dZAhA,98
205
+ magic_pdf-1.3.12.dist-info/top_level.txt,sha256=J9I0AzmHWGkp9c6DL8Oe4mEx3yYphLzkRn4H25Lg1rE,10
206
+ magic_pdf-1.3.12.dist-info/RECORD,,