magic-pdf 1.0.1__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- magic_pdf/dict2md/ocr_mkcontent.py +24 -0
- magic_pdf/filter/__init__.py +1 -1
- magic_pdf/filter/pdf_classify_by_type.py +6 -4
- magic_pdf/filter/pdf_meta_scan.py +4 -4
- magic_pdf/libs/boxbase.py +5 -2
- magic_pdf/libs/draw_bbox.py +14 -2
- magic_pdf/libs/language.py +9 -0
- magic_pdf/libs/pdf_check.py +11 -1
- magic_pdf/libs/version.py +1 -1
- magic_pdf/model/batch_analyze.py +103 -99
- magic_pdf/model/doc_analyze_by_custom_model.py +87 -36
- magic_pdf/model/magic_model.py +161 -4
- magic_pdf/model/pdf_extract_kit.py +23 -28
- magic_pdf/model/sub_modules/language_detection/yolov11/YOLOv11.py +4 -3
- magic_pdf/model/sub_modules/layout/doclayout_yolo/DocLayoutYOLO.py +7 -3
- magic_pdf/model/sub_modules/mfr/unimernet/Unimernet.py +1 -1
- magic_pdf/model/sub_modules/model_init.py +34 -19
- magic_pdf/model/sub_modules/ocr/paddleocr/ocr_utils.py +33 -26
- magic_pdf/model/sub_modules/table/rapidtable/rapid_table.py +25 -6
- magic_pdf/pdf_parse_union_core_v2.py +176 -61
- magic_pdf/post_proc/llm_aided.py +55 -24
- magic_pdf/pre_proc/ocr_dict_merge.py +14 -2
- magic_pdf/pre_proc/ocr_span_list_modify.py +1 -1
- magic_pdf/resources/model_config/model_configs.yaml +2 -2
- {magic_pdf-1.0.1.dist-info → magic_pdf-1.2.0.dist-info}/METADATA +36 -19
- {magic_pdf-1.0.1.dist-info → magic_pdf-1.2.0.dist-info}/RECORD +30 -30
- {magic_pdf-1.0.1.dist-info → magic_pdf-1.2.0.dist-info}/LICENSE.md +0 -0
- {magic_pdf-1.0.1.dist-info → magic_pdf-1.2.0.dist-info}/WHEEL +0 -0
- {magic_pdf-1.0.1.dist-info → magic_pdf-1.2.0.dist-info}/entry_points.txt +0 -0
- {magic_pdf-1.0.1.dist-info → magic_pdf-1.2.0.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: magic-pdf
|
3
|
-
Version: 1.0
|
3
|
+
Version: 1.2.0
|
4
4
|
Summary: A practical tool for converting PDF to Markdown
|
5
5
|
Home-page: https://github.com/opendatalab/MinerU
|
6
6
|
Requires-Python: >=3.9
|
@@ -13,7 +13,7 @@ Requires-Dist: fast-langdetect>=0.2.3
|
|
13
13
|
Requires-Dist: loguru>=0.6.0
|
14
14
|
Requires-Dist: numpy<2.0.0,>=1.21.6
|
15
15
|
Requires-Dist: pydantic>=2.7.2
|
16
|
-
Requires-Dist: PyMuPDF
|
16
|
+
Requires-Dist: PyMuPDF<=1.24.14,>=1.24.9
|
17
17
|
Requires-Dist: scikit-learn>=1.0.2
|
18
18
|
Requires-Dist: torch>=2.2.2
|
19
19
|
Requires-Dist: transformers
|
@@ -27,17 +27,17 @@ Requires-Dist: paddleocr==2.7.3; extra == "full"
|
|
27
27
|
Requires-Dist: struct-eqtable==0.3.2; extra == "full"
|
28
28
|
Requires-Dist: einops; extra == "full"
|
29
29
|
Requires-Dist: accelerate; extra == "full"
|
30
|
-
Requires-Dist: doclayout-yolo==0.0.
|
31
|
-
Requires-Dist: rapidocr-paddle; extra == "full"
|
32
|
-
Requires-Dist: rapidocr-onnxruntime; extra == "full"
|
33
|
-
Requires-Dist: rapid-table
|
30
|
+
Requires-Dist: doclayout-yolo==0.0.2b1; extra == "full"
|
31
|
+
Requires-Dist: rapidocr-paddle<2.0.0,>=1.4.5; extra == "full"
|
32
|
+
Requires-Dist: rapidocr-onnxruntime<2.0.0,>=1.4.4; extra == "full"
|
33
|
+
Requires-Dist: rapid-table<2.0.0,>=1.0.3; extra == "full"
|
34
34
|
Requires-Dist: PyYAML; extra == "full"
|
35
35
|
Requires-Dist: openai; extra == "full"
|
36
36
|
Requires-Dist: detectron2; extra == "full"
|
37
|
-
Requires-Dist: paddlepaddle==3.0.0b1; platform_system == "Linux" and extra == "full"
|
38
37
|
Requires-Dist: matplotlib; (platform_system == "Linux" or platform_system == "Darwin") and extra == "full"
|
38
|
+
Requires-Dist: paddlepaddle==3.0.0rc1; (platform_system == "Linux" or platform_system == "Darwin") and extra == "full"
|
39
39
|
Requires-Dist: matplotlib<=3.9.0; platform_system == "Windows" and extra == "full"
|
40
|
-
Requires-Dist: paddlepaddle==2.6.1;
|
40
|
+
Requires-Dist: paddlepaddle==2.6.1; platform_system == "Windows" and extra == "full"
|
41
41
|
Provides-Extra: lite
|
42
42
|
Requires-Dist: paddleocr==2.7.3; extra == "lite"
|
43
43
|
Requires-Dist: paddlepaddle==3.0.0b1; platform_system == "Linux" and extra == "lite"
|
@@ -61,7 +61,7 @@ Requires-Dist: albumentations<=1.4.20; extra == "old-linux"
|
|
61
61
|
[](https://pepy.tech/project/magic-pdf)
|
62
62
|
[](https://pepy.tech/project/magic-pdf)
|
63
63
|
|
64
|
-
[](https://mineru.net/OpenSourceTools/Extractor?source=github)
|
65
65
|
[](https://huggingface.co/spaces/opendatalab/MinerU)
|
66
66
|
[](https://www.modelscope.cn/studios/OpenDataLab/MinerU)
|
67
67
|
[](https://colab.research.google.com/gist/myhloli/3b3a00a4a0a61577b6c30f989092d20d/mineru_demo.ipynb)
|
@@ -80,7 +80,7 @@ Requires-Dist: albumentations<=1.4.20; extra == "old-linux"
|
|
80
80
|
<a href="https://github.com/opendatalab/PDF-Extract-Kit">PDF-Extract-Kit: High-Quality PDF Extraction Toolkit</a>🔥🔥🔥
|
81
81
|
<br>
|
82
82
|
<br>
|
83
|
-
<a href="https://mineru.
|
83
|
+
<a href="https://mineru.net/client?source=github">
|
84
84
|
Easier to use: Just grab MinerU Desktop. No coding, no login, just a simple interface and smooth interactions. Enjoy it without any fuss!</a>🚀🚀🚀
|
85
85
|
|
86
86
|
</p>
|
@@ -88,13 +88,30 @@ Easier to use: Just grab MinerU Desktop. No coding, no login, just a simple inte
|
|
88
88
|
<!-- join us -->
|
89
89
|
|
90
90
|
<p align="center">
|
91
|
-
👋 join us on <a href="https://discord.gg/Tdedn9GTXq" target="_blank">Discord</a> and <a href="
|
91
|
+
👋 join us on <a href="https://discord.gg/Tdedn9GTXq" target="_blank">Discord</a> and <a href="http://mineru.space/s/V85Yl" target="_blank">WeChat</a>
|
92
92
|
</p>
|
93
93
|
|
94
94
|
</div>
|
95
95
|
|
96
96
|
# Changelog
|
97
|
-
- 2025/
|
97
|
+
- 2025/02/24 1.2.0 released. This version includes several fixes and improvements to enhance parsing efficiency and accuracy:
|
98
|
+
- Performance Optimization
|
99
|
+
- Increased classification speed for PDF documents in auto mode.
|
100
|
+
- Parsing Optimization
|
101
|
+
- Improved parsing logic for documents containing watermarks, significantly enhancing the parsing results for such documents.
|
102
|
+
- Enhanced the matching logic for multiple images/tables and captions within a single page, improving the accuracy of image-text matching in complex layouts.
|
103
|
+
- Bug Fixes
|
104
|
+
- Fixed an issue where image/table spans were incorrectly filled into text blocks under certain conditions.
|
105
|
+
- Resolved an issue where title blocks were empty in some cases.
|
106
|
+
- 2025/01/22 1.1.0 released. In this version we have focused on improving parsing accuracy and efficiency:
|
107
|
+
- Model capability upgrade (requires re-executing the [model download process](docs/how_to_download_models_en.md) to obtain incremental updates of model files)
|
108
|
+
- The layout recognition model has been upgraded to the latest `doclayout_yolo(2501)` model, improving layout recognition accuracy.
|
109
|
+
- The formula parsing model has been upgraded to the latest `unimernet(2501)` model, improving formula recognition accuracy.
|
110
|
+
- Performance optimization
|
111
|
+
- On devices that meet certain configuration requirements (16GB+ VRAM), by optimizing resource usage and restructuring the processing pipeline, overall parsing speed has been increased by more than 50%.
|
112
|
+
- Parsing effect optimization
|
113
|
+
- Added a new heading classification feature (testing version, enabled by default) to the online demo([mineru.net](https://mineru.net/OpenSourceTools/Extractor)/[huggingface](https://huggingface.co/spaces/opendatalab/MinerU)/[modelscope](https://www.modelscope.cn/studios/OpenDataLab/MinerU)), which supports hierarchical classification of headings, thereby enhancing document structuring.
|
114
|
+
- 2025/01/10 1.0.1 released. This is our first official release, where we have introduced a completely new API interface and enhanced compatibility through extensive refactoring, as well as a brand new automatic language identification feature:
|
98
115
|
- New API Interface
|
99
116
|
- For the data-side API, we have introduced the Dataset class, designed to provide a robust and flexible data processing framework. This framework currently supports a variety of document formats, including images (.jpg and .png), PDFs, Word documents (.doc and .docx), and PowerPoint presentations (.ppt and .pptx). It ensures effective support for data processing tasks ranging from simple to complex.
|
100
117
|
- For the user-side API, we have meticulously designed the MinerU processing workflow as a series of composable Stages. Each Stage represents a specific processing step, allowing users to define new Stages according to their needs and creatively combine these stages to customize their data processing workflows.
|
@@ -272,10 +289,9 @@ There are three different ways to experience MinerU:
|
|
272
289
|
|
273
290
|
### Online Demo
|
274
291
|
|
275
|
-
|
276
|
-
[](https://mineru.org.cn/OpenSourceTools/Extractor?source=github)
|
292
|
+
Synced with dev branch updates:
|
277
293
|
|
278
|
-
|
294
|
+
[](https://mineru.net/OpenSourceTools/Extractor?source=github)
|
279
295
|
[](https://huggingface.co/spaces/opendatalab/MinerU)
|
280
296
|
[](https://www.modelscope.cn/studios/OpenDataLab/MinerU)
|
281
297
|
|
@@ -284,8 +300,8 @@ Test Version (Synced with dev branch updates, testing new features):
|
|
284
300
|
#### 1. Install magic-pdf
|
285
301
|
|
286
302
|
```bash
|
287
|
-
conda create -n
|
288
|
-
conda activate
|
303
|
+
conda create -n mineru python=3.10
|
304
|
+
conda activate mineru
|
289
305
|
pip install -U "magic-pdf[full]" --extra-index-url https://wheels.myhloli.com
|
290
306
|
```
|
291
307
|
|
@@ -320,6 +336,7 @@ You can modify certain configurations in this file to enable or disable features
|
|
320
336
|
},
|
321
337
|
"table-config": {
|
322
338
|
"model": "rapid_table", // Default to using "rapid_table", can be switched to "tablemaster" or "struct_eqtable".
|
339
|
+
"sub_model": "slanet_plus", // When the model is "rapid_table", you can choose a sub_model. The options are "slanet_plus" and "unitable"
|
323
340
|
"enable": true, // The table recognition feature is enabled by default. If you need to disable it, please change the value here to "false".
|
324
341
|
"max_time": 400
|
325
342
|
}
|
@@ -344,7 +361,7 @@ If your device supports CUDA and meets the GPU requirements of the mainline envi
|
|
344
361
|
```bash
|
345
362
|
wget https://github.com/opendatalab/MinerU/raw/master/docker/global/Dockerfile -O Dockerfile
|
346
363
|
docker build -t mineru:latest .
|
347
|
-
docker run
|
364
|
+
docker run -it --name mineru --gpus=all mineru:latest /bin/bash -c "echo 'source /opt/mineru_venv/bin/activate' >> ~/.bashrc && exec bash"
|
348
365
|
magic-pdf --help
|
349
366
|
```
|
350
367
|
|
@@ -403,6 +420,7 @@ TODO
|
|
403
420
|
- [x] Reading order based on the model
|
404
421
|
- [x] Recognition of `index` and `list` in the main text
|
405
422
|
- [x] Table recognition
|
423
|
+
- [x] Heading Classification
|
406
424
|
- [ ] Code block recognition in the main text
|
407
425
|
- [ ] [Chemical formula recognition](docs/chemical_knowledge_introduction/introduction.pdf)
|
408
426
|
- [ ] Geometric shape recognition
|
@@ -412,7 +430,6 @@ TODO
|
|
412
430
|
- Reading order is determined by the model based on the spatial distribution of readable content, and may be out of order in some areas under extremely complex layouts.
|
413
431
|
- Vertical text is not supported.
|
414
432
|
- Tables of contents and lists are recognized through rules, and some uncommon list formats may not be recognized.
|
415
|
-
- Only one level of headings is supported; hierarchical headings are not currently supported.
|
416
433
|
- Code blocks are not yet supported in the layout model.
|
417
434
|
- Comic books, art albums, primary school textbooks, and exercises cannot be parsed well.
|
418
435
|
- Table recognition may result in row/column recognition errors in complex tables.
|
@@ -1,5 +1,5 @@
|
|
1
1
|
magic_pdf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
magic_pdf/pdf_parse_union_core_v2.py,sha256=
|
2
|
+
magic_pdf/pdf_parse_union_core_v2.py,sha256=jIrXgU_gKL4toJ6GsCoDxByszaN8mAr5vrEy_c63ewk,38310
|
3
3
|
magic_pdf/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
magic_pdf/config/constants.py,sha256=fXGzANULnJWLPxwYp3BEFWx-rnorzpySMx63ffyEyq4,1272
|
5
5
|
magic_pdf/config/drop_reason.py,sha256=CqjMzBE96Qo8OeFvhhhItY8WhyqsKhE3DmyJLoQZNCc,2248
|
@@ -24,49 +24,49 @@ magic_pdf/data/io/base.py,sha256=SqNQqe30ZvoVvg7GVv-hLMCjN6yBgDyQQWeLgGsTfhQ,111
|
|
24
24
|
magic_pdf/data/io/http.py,sha256=XlKB0DNf4a_uUnfgcclvaaOtmE7lmddx0DnK8A-emAM,958
|
25
25
|
magic_pdf/data/io/s3.py,sha256=hyA7sbNriQy64xd_uyJ7acN_oneQ1Pdmoc7_xcvkue8,3606
|
26
26
|
magic_pdf/dict2md/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
27
|
-
magic_pdf/dict2md/ocr_mkcontent.py,sha256=
|
28
|
-
magic_pdf/filter/__init__.py,sha256=
|
29
|
-
magic_pdf/filter/pdf_classify_by_type.py,sha256=
|
30
|
-
magic_pdf/filter/pdf_meta_scan.py,sha256=
|
27
|
+
magic_pdf/dict2md/ocr_mkcontent.py,sha256=ZZTaiIn18OWuWKGbDdpoOZ3VMhe_3_JKwrKCfzDiSk0,13715
|
28
|
+
magic_pdf/filter/__init__.py,sha256=_7lSez_myu4b6cdzPpQ-NfREuqeBSq_QdyBPKVLyq2U,1505
|
29
|
+
magic_pdf/filter/pdf_classify_by_type.py,sha256=YNYXamxYgEiSujwilCNHOtrwpgJGDiQ597qJfardDVc,42354
|
30
|
+
magic_pdf/filter/pdf_meta_scan.py,sha256=eOuM0-JgaXvHolSgepGoNDJDmv_uITWLQpH_0MfnVQw,17478
|
31
31
|
magic_pdf/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
32
32
|
magic_pdf/integrations/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
33
33
|
magic_pdf/integrations/rag/api.py,sha256=t38wvIBzLje4_JzTP3dewMLqV-tQJ-A3B92Sj2oyrfs,2507
|
34
34
|
magic_pdf/integrations/rag/type.py,sha256=Z_1g_ZIOCsb7-FmZBudReIXj8nzGrgj_BygCalhJdmk,3193
|
35
35
|
magic_pdf/integrations/rag/utils.py,sha256=DCb-UhC8TElb6Eq7_6NmmETreKEk5DVE18hNL8sTEBk,11762
|
36
36
|
magic_pdf/libs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
37
|
-
magic_pdf/libs/boxbase.py,sha256=
|
37
|
+
magic_pdf/libs/boxbase.py,sha256=DKZXhwpJd-HE2_Du7NmkeeYW5gG-iwX3GeSWL7rYGv0,16956
|
38
38
|
magic_pdf/libs/clean_memory.py,sha256=OsQexCjmBO2i-Hv-0uYQfn72dbUWR8sTW81nG2zlNQQ,479
|
39
39
|
magic_pdf/libs/commons.py,sha256=xD0fGA16KNB5rhbl4zRrOqdrNHYwaRablT_s9W2ZTbw,1174
|
40
40
|
magic_pdf/libs/config_reader.py,sha256=9GXK7jtDyA_jxXKWZAQ69rB02v5UW4mOmo1IaOYOkW0,4374
|
41
41
|
magic_pdf/libs/convert_utils.py,sha256=Ov-lsfCLBPz_15iSJXIslBNmrSf_E_1g_XDWJy8NgO8,143
|
42
42
|
magic_pdf/libs/coordinate_transform.py,sha256=Bbop2cP2uz2ZG0U0gwd7J6EKkgABq5Rv03qf2LMPw80,429
|
43
|
-
magic_pdf/libs/draw_bbox.py,sha256=
|
43
|
+
magic_pdf/libs/draw_bbox.py,sha256=hpUmpPiQVu7UgWQa3M49dS22G6A9gcG2jpq4dQjTjzA,18331
|
44
44
|
magic_pdf/libs/hash_utils.py,sha256=VEKK9WfFoZgrPfi8kfITjLpr8Ahufs8tXh9R1Y5lAL8,404
|
45
45
|
magic_pdf/libs/json_compressor.py,sha256=6-KCu0lb5ksmyqWtQGb4QqmP-FjRb5dP7P-Hevcn68g,875
|
46
|
-
magic_pdf/libs/language.py,sha256=
|
46
|
+
magic_pdf/libs/language.py,sha256=7RT3mxSa7jdpoC5ySd7ZddHA7TO7UsnmDOWiYZAxuyg,1433
|
47
47
|
magic_pdf/libs/local_math.py,sha256=tqljQOgqh3fZc146HYhO88JXJaiXMVwArBkk_CSGICc,177
|
48
48
|
magic_pdf/libs/markdown_utils.py,sha256=86v2BmsSV4NkoRZrH4uQD1youJhYFF3vIKr_vDeg3z0,270
|
49
49
|
magic_pdf/libs/path_utils.py,sha256=Hykw_l5CU736b2egHV9P7B-qh3QNKO4nZSGCbsi0Z8E,1043
|
50
|
-
magic_pdf/libs/pdf_check.py,sha256=
|
50
|
+
magic_pdf/libs/pdf_check.py,sha256=7GWWvDR6g_rj_fE6XJlbTq5AFVX11ngRIzT0N18F214,3396
|
51
51
|
magic_pdf/libs/pdf_image_tools.py,sha256=kjzSEbm7K0yiHv8kJ4VbZ9HHktM8qvAv3LhxRyDZEQk,1987
|
52
52
|
magic_pdf/libs/safe_filename.py,sha256=ckwcM_eqoysTb5id8czp-tXq2G9da0-l3pshZDCHQtE,236
|
53
|
-
magic_pdf/libs/version.py,sha256=
|
53
|
+
magic_pdf/libs/version.py,sha256=MpAT5hgNoHnTtG1XRD_GV_A7QrHVU6vJjGSw_8qMGA4,22
|
54
54
|
magic_pdf/model/__init__.py,sha256=sa-dO2k-TLy25I2gRrzjm_cQeYfzMf-pLwBJHkIxGo0,51
|
55
|
-
magic_pdf/model/batch_analyze.py,sha256=
|
56
|
-
magic_pdf/model/doc_analyze_by_custom_model.py,sha256=
|
57
|
-
magic_pdf/model/magic_model.py,sha256=
|
55
|
+
magic_pdf/model/batch_analyze.py,sha256=sbrgOJWycb1Ep6e62CPi6jEyG6VSeklIxc4PmrqaLhM,11933
|
56
|
+
magic_pdf/model/doc_analyze_by_custom_model.py,sha256=wma0aq6RyxAepEqnaiTJ9_pWWKLVBj39c6xWA85dxzA,8068
|
57
|
+
magic_pdf/model/magic_model.py,sha256=OcKhSJ_PyAAldgpKPiPxi2uuvnj3Sf4SvXi_5Rv0a6Q,30667
|
58
58
|
magic_pdf/model/model_list.py,sha256=aqfEJlEfbib3D3ISrxc0Coh6SbffYh8Yq2FlQN35_zA,213
|
59
|
-
magic_pdf/model/pdf_extract_kit.py,sha256=
|
59
|
+
magic_pdf/model/pdf_extract_kit.py,sha256=Rd51VNZPKRA_tUbDss-b44d84K6WDG2S87a37Ax7HUA,12224
|
60
60
|
magic_pdf/model/pp_structure_v2.py,sha256=NcqFWL4nUtjl82MFak8HX_8V3i4Aw_fK4dATrIp5uGs,3840
|
61
61
|
magic_pdf/model/sub_modules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
62
|
-
magic_pdf/model/sub_modules/model_init.py,sha256
|
62
|
+
magic_pdf/model/sub_modules/model_init.py,sha256=Ltwi3Nd5PdVVXRF9fto5nImFVg6w-twAMzOLV_F-c3g,7693
|
63
63
|
magic_pdf/model/sub_modules/model_utils.py,sha256=2pI1Xcr2zCF3b64e4WoFtIbjSmTVYBE4zjyHB23gvmE,2488
|
64
64
|
magic_pdf/model/sub_modules/language_detection/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
65
65
|
magic_pdf/model/sub_modules/language_detection/utils.py,sha256=5nec_loLyYCJ5o6n38AYLz2SKmRvHDCBdt6ka84EaGM,3096
|
66
|
-
magic_pdf/model/sub_modules/language_detection/yolov11/YOLOv11.py,sha256=
|
66
|
+
magic_pdf/model/sub_modules/language_detection/yolov11/YOLOv11.py,sha256=bl2i7kweoJNdj47FlE9h0B_-nNQrMcW9mCLQ1puMEH8,4893
|
67
67
|
magic_pdf/model/sub_modules/language_detection/yolov11/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
68
68
|
magic_pdf/model/sub_modules/layout/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
69
|
-
magic_pdf/model/sub_modules/layout/doclayout_yolo/DocLayoutYOLO.py,sha256=
|
69
|
+
magic_pdf/model/sub_modules/layout/doclayout_yolo/DocLayoutYOLO.py,sha256=gy7rc8poO-Zr8511NJjuBV8Uryq5k3JKrstLtCONg0c,2237
|
70
70
|
magic_pdf/model/sub_modules/layout/doclayout_yolo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
71
71
|
magic_pdf/model/sub_modules/layout/layoutlmv3/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
72
72
|
magic_pdf/model/sub_modules/layout/layoutlmv3/backbone.py,sha256=1cvSCczgvwOLdvzWyqttoYPMHsXmnzI3w9abJ1bAXoM,7106
|
@@ -92,11 +92,11 @@ magic_pdf/model/sub_modules/mfd/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
|
|
92
92
|
magic_pdf/model/sub_modules/mfd/yolov8/YOLOv8.py,sha256=QfHbMr1br0pOJUu1NJEMgA6yw11G0yFImJv_AfW48_c,1008
|
93
93
|
magic_pdf/model/sub_modules/mfd/yolov8/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
94
94
|
magic_pdf/model/sub_modules/mfr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
95
|
-
magic_pdf/model/sub_modules/mfr/unimernet/Unimernet.py,sha256=
|
95
|
+
magic_pdf/model/sub_modules/mfr/unimernet/Unimernet.py,sha256=R05qw54QuLl2btNWdkxf4yCjDeEj8o0786e-gz_Xv8k,5290
|
96
96
|
magic_pdf/model/sub_modules/mfr/unimernet/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
97
97
|
magic_pdf/model/sub_modules/ocr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
98
98
|
magic_pdf/model/sub_modules/ocr/paddleocr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
99
|
-
magic_pdf/model/sub_modules/ocr/paddleocr/ocr_utils.py,sha256=
|
99
|
+
magic_pdf/model/sub_modules/ocr/paddleocr/ocr_utils.py,sha256=biuLnQWUquZkxmObjpg33iVCPPJKbRA4kx0Uo6OvGyc,12672
|
100
100
|
magic_pdf/model/sub_modules/ocr/paddleocr/ppocr_273_mod.py,sha256=QBBeFN1iF7nj5gqQ5sQXjhpwy8lB4c96gubnRDBuDNU,8424
|
101
101
|
magic_pdf/model/sub_modules/ocr/paddleocr/ppocr_291_mod.py,sha256=VouMTvi6M5TV6pQdlpusgfyZapxiZ_Wi7Ff53eMC3rE,8996
|
102
102
|
magic_pdf/model/sub_modules/reading_oreder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -106,7 +106,7 @@ magic_pdf/model/sub_modules/reading_oreder/layoutreader/xycut.py,sha256=ezNSq_Y4
|
|
106
106
|
magic_pdf/model/sub_modules/table/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
107
107
|
magic_pdf/model/sub_modules/table/table_utils.py,sha256=B9BC4f5EEjlt2ldYxrIC8Wic2Tz3t3gTJeEyK3ggrOU,282
|
108
108
|
magic_pdf/model/sub_modules/table/rapidtable/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
109
|
-
magic_pdf/model/sub_modules/table/rapidtable/rapid_table.py,sha256=
|
109
|
+
magic_pdf/model/sub_modules/table/rapidtable/rapid_table.py,sha256=6TUO6wiA4oZQB2_VP6kngZF6-2cI6mAP57Qf2lv6LVw,2922
|
110
110
|
magic_pdf/model/sub_modules/table/structeqtable/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
111
111
|
magic_pdf/model/sub_modules/table/structeqtable/struct_eqtable.py,sha256=SrNPm-uOFEvN5muFGbXTAuwzXm-rCiaihVdqbydIBIA,1131
|
112
112
|
magic_pdf/model/sub_modules/table/tablemaster/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -115,17 +115,17 @@ magic_pdf/operators/__init__.py,sha256=liU2-WYUvsQ1G4PYBppyvokS9z5IjrnlVMtoBAC1R
|
|
115
115
|
magic_pdf/operators/models.py,sha256=mRqbCVrxxaUVDpEBAsXaK7EL1M-goICkE1W0FYgewio,5305
|
116
116
|
magic_pdf/operators/pipes.py,sha256=XgBgisKQd_ruW-3Tw4v5LhqloZUHgn2aFcpi_q8LbCs,6767
|
117
117
|
magic_pdf/post_proc/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
118
|
-
magic_pdf/post_proc/llm_aided.py,sha256=
|
118
|
+
magic_pdf/post_proc/llm_aided.py,sha256=yzhu2cCpUZjdwf3v0swYDgSs9VWIfMAoXepYIP1EMZs,6367
|
119
119
|
magic_pdf/post_proc/para_split_v3.py,sha256=v4SdQn4OZdHRXpWQMfQ-FGJz_tglQ88uFUqpwY542Fo,16922
|
120
120
|
magic_pdf/pre_proc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
121
121
|
magic_pdf/pre_proc/construct_page_dict.py,sha256=OFmq5XRKi6fYIo-lmGlL-NB16Sf0egzsfEx-fT2uYrc,660
|
122
122
|
magic_pdf/pre_proc/cut_image.py,sha256=NDzbxwD7z7Tb4uAxL4KR6LzURFdN1Tzr4nPvj-VmEqc,1225
|
123
123
|
magic_pdf/pre_proc/ocr_detect_all_bboxes.py,sha256=nt88ttXCEI_1ihAF7HU15SQjwM69V-iJmk-L_nyzA6o,9328
|
124
|
-
magic_pdf/pre_proc/ocr_dict_merge.py,sha256=
|
125
|
-
magic_pdf/pre_proc/ocr_span_list_modify.py,sha256=
|
124
|
+
magic_pdf/pre_proc/ocr_dict_merge.py,sha256=vrbLIzNIjxrm7PonfHaFdY6qaicc0uIly62SJwgZ5UM,5496
|
125
|
+
magic_pdf/pre_proc/ocr_span_list_modify.py,sha256=xrgC9vR0poklZuY4Og41pZVdXzuaGFg3BnQ01X60dpo,3102
|
126
126
|
magic_pdf/pre_proc/remove_bbox_overlap.py,sha256=mcdxAh4P56NZ3Ij8h3vW8qC_SrszfXflVWuWUuUiTNg,3089
|
127
127
|
magic_pdf/resources/fasttext-langdetect/lid.176.ftz,sha256=jzRyz-hzintgmejpmcPL-uDc0VaWqsfXc4qAOdtgPoM,938013
|
128
|
-
magic_pdf/resources/model_config/model_configs.yaml,sha256=
|
128
|
+
magic_pdf/resources/model_config/model_configs.yaml,sha256=v3HwFTmIbXJJEBXUHHHMnZQKRo6ZQtP3cncSebh-5gc,322
|
129
129
|
magic_pdf/resources/model_config/UniMERNet/demo.yaml,sha256=Jdaim2D2lAYrV9rhc1X5Sy2_IacGOrfysJhxEUgSElo,827
|
130
130
|
magic_pdf/resources/model_config/layoutlmv3/layoutlmv3_base_inference.yaml,sha256=9aNAEYgpHTAWpcUrDvuPG2y4V-Qw8QdcJefi96y8yDU,6109
|
131
131
|
magic_pdf/resources/yolov11-langdetect/yolo_v11_ft.pt,sha256=dV4lcudF8wimEbAooYbvISvFhrXjp9i0rMRqv9VW6hY,3204667
|
@@ -138,9 +138,9 @@ magic_pdf/tools/common.py,sha256=1LfMeXBBsb3WlGeNAze_pPOYXQ8Qbfh-JgRXweojHKo,838
|
|
138
138
|
magic_pdf/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
139
139
|
magic_pdf/utils/annotations.py,sha256=82ou3uELNbQWa9hOFFkVt0gsIskAKf5msCv5J2IJ5V0,211
|
140
140
|
magic_pdf/utils/office_to_pdf.py,sha256=7aj-Ls2v8saD-Rgu_t3FIc-J3Ka9wnmiEH5zY-H1Vxs,729
|
141
|
-
magic_pdf-1.0.
|
142
|
-
magic_pdf-1.0.
|
143
|
-
magic_pdf-1.0.
|
144
|
-
magic_pdf-1.0.
|
145
|
-
magic_pdf-1.0.
|
146
|
-
magic_pdf-1.0.
|
141
|
+
magic_pdf-1.2.0.dist-info/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
|
142
|
+
magic_pdf-1.2.0.dist-info/METADATA,sha256=7iel3MItxKhJc1Bbfh_NMbDp8a23k9G1vA8LYEw2k_U,40720
|
143
|
+
magic_pdf-1.2.0.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
144
|
+
magic_pdf-1.2.0.dist-info/entry_points.txt,sha256=wXwYke3j8fqDQTocUspL-CqDUEv3Tfcwp09fM8dZAhA,98
|
145
|
+
magic_pdf-1.2.0.dist-info/top_level.txt,sha256=J9I0AzmHWGkp9c6DL8Oe4mEx3yYphLzkRn4H25Lg1rE,10
|
146
|
+
magic_pdf-1.2.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|