magic-pdf 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- magic_pdf/libs/boxbase.py +5 -2
- magic_pdf/libs/draw_bbox.py +14 -2
- magic_pdf/libs/language.py +9 -0
- magic_pdf/libs/version.py +1 -1
- magic_pdf/model/batch_analyze.py +103 -99
- magic_pdf/model/doc_analyze_by_custom_model.py +77 -18
- magic_pdf/model/pdf_extract_kit.py +23 -21
- magic_pdf/model/sub_modules/layout/doclayout_yolo/DocLayoutYOLO.py +7 -3
- magic_pdf/model/sub_modules/mfr/unimernet/Unimernet.py +1 -1
- magic_pdf/model/sub_modules/model_init.py +4 -3
- magic_pdf/model/sub_modules/ocr/paddleocr/ocr_utils.py +33 -26
- magic_pdf/model/sub_modules/table/rapidtable/rapid_table.py +25 -6
- magic_pdf/pdf_parse_union_core_v2.py +137 -32
- magic_pdf/post_proc/llm_aided.py +59 -26
- magic_pdf/post_proc/llm_aided_ocr.py +689 -0
- magic_pdf/pre_proc/ocr_span_list_modify.py +1 -1
- magic_pdf/resources/model_config/model_configs.yaml +2 -2
- {magic_pdf-1.0.0.dist-info → magic_pdf-1.1.0.dist-info}/METADATA +50 -41
- {magic_pdf-1.0.0.dist-info → magic_pdf-1.1.0.dist-info}/RECORD +23 -22
- {magic_pdf-1.0.0.dist-info → magic_pdf-1.1.0.dist-info}/WHEEL +1 -1
- {magic_pdf-1.0.0.dist-info → magic_pdf-1.1.0.dist-info}/LICENSE.md +0 -0
- {magic_pdf-1.0.0.dist-info → magic_pdf-1.1.0.dist-info}/entry_points.txt +0 -0
- {magic_pdf-1.0.0.dist-info → magic_pdf-1.1.0.dist-info}/top_level.txt +0 -0
@@ -36,7 +36,7 @@ def remove_overlaps_low_confidence_spans(spans):
|
|
36
36
|
def check_chars_is_overlap_in_span(chars):
|
37
37
|
for i in range(len(chars)):
|
38
38
|
for j in range(i + 1, len(chars)):
|
39
|
-
if calculate_iou(chars[i]['bbox'], chars[j]['bbox']) > 0.
|
39
|
+
if calculate_iou(chars[i]['bbox'], chars[j]['bbox']) > 0.35:
|
40
40
|
return True
|
41
41
|
return False
|
42
42
|
|
@@ -1,8 +1,8 @@
|
|
1
1
|
weights:
|
2
2
|
layoutlmv3: Layout/LayoutLMv3/model_final.pth
|
3
|
-
doclayout_yolo: Layout/YOLO/
|
3
|
+
doclayout_yolo: Layout/YOLO/doclayout_yolo_docstructbench_imgsz1280_2501.pt
|
4
4
|
yolo_v8_mfd: MFD/YOLO/yolo_v8_ft.pt
|
5
|
-
unimernet_small: MFR/
|
5
|
+
unimernet_small: MFR/unimernet_small_2501
|
6
6
|
struct_eqtable: TabRec/StructEqTable
|
7
7
|
tablemaster: TabRec/TableMaster
|
8
8
|
rapid_table: TabRec/RapidTable
|
@@ -1,49 +1,49 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: magic-pdf
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.1.0
|
4
4
|
Summary: A practical tool for converting PDF to Markdown
|
5
5
|
Home-page: https://github.com/opendatalab/MinerU
|
6
6
|
Requires-Python: >=3.9
|
7
7
|
Description-Content-Type: text/markdown
|
8
8
|
License-File: LICENSE.md
|
9
|
-
Requires-Dist: boto3>=1.28.43
|
10
|
-
Requires-Dist: Brotli>=1.1.0
|
11
|
-
Requires-Dist: click>=8.1.7
|
12
|
-
Requires-Dist: fast-langdetect>=0.2.3
|
13
|
-
Requires-Dist: loguru>=0.6.0
|
14
|
-
Requires-Dist: numpy<2.0.0,>=1.21.6
|
15
|
-
Requires-Dist: pydantic>=2.7.2
|
16
|
-
Requires-Dist: PyMuPDF
|
17
|
-
Requires-Dist: scikit-learn>=1.0.2
|
18
|
-
Requires-Dist: torch>=2.2.2
|
9
|
+
Requires-Dist: boto3 >=1.28.43
|
10
|
+
Requires-Dist: Brotli >=1.1.0
|
11
|
+
Requires-Dist: click >=8.1.7
|
12
|
+
Requires-Dist: fast-langdetect >=0.2.3
|
13
|
+
Requires-Dist: loguru >=0.6.0
|
14
|
+
Requires-Dist: numpy <2.0.0,>=1.21.6
|
15
|
+
Requires-Dist: pydantic >=2.7.2
|
16
|
+
Requires-Dist: PyMuPDF <=1.24.14,>=1.24.9
|
17
|
+
Requires-Dist: scikit-learn >=1.0.2
|
18
|
+
Requires-Dist: torch >=2.2.2
|
19
19
|
Requires-Dist: transformers
|
20
|
-
Requires-Dist: pdfminer.six==20231228
|
20
|
+
Requires-Dist: pdfminer.six ==20231228
|
21
21
|
Provides-Extra: full
|
22
|
-
Requires-Dist: unimernet==0.2.3; extra ==
|
23
|
-
Requires-Dist: torch<=2.3.1,>=2.2.2; extra ==
|
24
|
-
Requires-Dist: torchvision<=0.18.1,>=0.17.2; extra ==
|
25
|
-
Requires-Dist: ultralytics>=8.3.48; extra ==
|
26
|
-
Requires-Dist: paddleocr==2.7.3; extra ==
|
27
|
-
Requires-Dist: struct-eqtable==0.3.2; extra ==
|
28
|
-
Requires-Dist: einops; extra ==
|
29
|
-
Requires-Dist: accelerate; extra ==
|
30
|
-
Requires-Dist: doclayout-yolo==0.0.
|
31
|
-
Requires-Dist: rapidocr-paddle; extra ==
|
32
|
-
Requires-Dist: rapidocr-onnxruntime; extra ==
|
33
|
-
Requires-Dist: rapid-table
|
34
|
-
Requires-Dist: PyYAML; extra ==
|
35
|
-
Requires-Dist: openai; extra ==
|
36
|
-
Requires-Dist: detectron2; extra ==
|
37
|
-
Requires-Dist: paddlepaddle==3.0.0b1; platform_system == "Linux" and extra ==
|
38
|
-
Requires-Dist: matplotlib; (platform_system == "Linux" or platform_system == "Darwin") and extra ==
|
39
|
-
Requires-Dist: matplotlib<=3.9.0; platform_system == "Windows" and extra ==
|
40
|
-
Requires-Dist: paddlepaddle==2.6.1; (platform_system == "Windows" or platform_system == "Darwin") and extra ==
|
22
|
+
Requires-Dist: unimernet ==0.2.3 ; extra == 'full'
|
23
|
+
Requires-Dist: torch <=2.3.1,>=2.2.2 ; extra == 'full'
|
24
|
+
Requires-Dist: torchvision <=0.18.1,>=0.17.2 ; extra == 'full'
|
25
|
+
Requires-Dist: ultralytics >=8.3.48 ; extra == 'full'
|
26
|
+
Requires-Dist: paddleocr ==2.7.3 ; extra == 'full'
|
27
|
+
Requires-Dist: struct-eqtable ==0.3.2 ; extra == 'full'
|
28
|
+
Requires-Dist: einops ; extra == 'full'
|
29
|
+
Requires-Dist: accelerate ; extra == 'full'
|
30
|
+
Requires-Dist: doclayout-yolo ==0.0.2b1 ; extra == 'full'
|
31
|
+
Requires-Dist: rapidocr-paddle ; extra == 'full'
|
32
|
+
Requires-Dist: rapidocr-onnxruntime ; extra == 'full'
|
33
|
+
Requires-Dist: rapid-table <2.0.0,>=1.0.3 ; extra == 'full'
|
34
|
+
Requires-Dist: PyYAML ; extra == 'full'
|
35
|
+
Requires-Dist: openai ; extra == 'full'
|
36
|
+
Requires-Dist: detectron2 ; extra == 'full'
|
37
|
+
Requires-Dist: paddlepaddle ==3.0.0b1 ; (platform_system == "Linux") and extra == 'full'
|
38
|
+
Requires-Dist: matplotlib ; (platform_system == "Linux" or platform_system == "Darwin") and extra == 'full'
|
39
|
+
Requires-Dist: matplotlib <=3.9.0 ; (platform_system == "Windows") and extra == 'full'
|
40
|
+
Requires-Dist: paddlepaddle ==2.6.1 ; (platform_system == "Windows" or platform_system == "Darwin") and extra == 'full'
|
41
41
|
Provides-Extra: lite
|
42
|
-
Requires-Dist: paddleocr==2.7.3; extra ==
|
43
|
-
Requires-Dist: paddlepaddle==3.0.0b1; platform_system == "Linux" and extra ==
|
44
|
-
Requires-Dist: paddlepaddle==2.6.1; (platform_system == "Windows" or platform_system == "Darwin") and extra ==
|
42
|
+
Requires-Dist: paddleocr ==2.7.3 ; extra == 'lite'
|
43
|
+
Requires-Dist: paddlepaddle ==3.0.0b1 ; (platform_system == "Linux") and extra == 'lite'
|
44
|
+
Requires-Dist: paddlepaddle ==2.6.1 ; (platform_system == "Windows" or platform_system == "Darwin") and extra == 'lite'
|
45
45
|
Provides-Extra: old_linux
|
46
|
-
Requires-Dist: albumentations<=1.4.20; extra ==
|
46
|
+
Requires-Dist: albumentations <=1.4.20 ; extra == 'old_linux'
|
47
47
|
|
48
48
|
<div align="center" xmlns="http://www.w3.org/1999/html">
|
49
49
|
<!-- logo -->
|
@@ -61,7 +61,7 @@ Requires-Dist: albumentations<=1.4.20; extra == "old-linux"
|
|
61
61
|
[](https://pepy.tech/project/magic-pdf)
|
62
62
|
[](https://pepy.tech/project/magic-pdf)
|
63
63
|
|
64
|
-
[](https://mineru.net/OpenSourceTools/Extractor?source=github)
|
65
65
|
[](https://huggingface.co/spaces/opendatalab/MinerU)
|
66
66
|
[](https://www.modelscope.cn/studios/OpenDataLab/MinerU)
|
67
67
|
[](https://colab.research.google.com/gist/myhloli/3b3a00a4a0a61577b6c30f989092d20d/mineru_demo.ipynb)
|
@@ -80,7 +80,7 @@ Requires-Dist: albumentations<=1.4.20; extra == "old-linux"
|
|
80
80
|
<a href="https://github.com/opendatalab/PDF-Extract-Kit">PDF-Extract-Kit: High-Quality PDF Extraction Toolkit</a>🔥🔥🔥
|
81
81
|
<br>
|
82
82
|
<br>
|
83
|
-
<a href="https://mineru.
|
83
|
+
<a href="https://mineru.net/client?source=github">
|
84
84
|
Easier to use: Just grab MinerU Desktop. No coding, no login, just a simple interface and smooth interactions. Enjoy it without any fuss!</a>🚀🚀🚀
|
85
85
|
|
86
86
|
</p>
|
@@ -88,13 +88,21 @@ Easier to use: Just grab MinerU Desktop. No coding, no login, just a simple inte
|
|
88
88
|
<!-- join us -->
|
89
89
|
|
90
90
|
<p align="center">
|
91
|
-
👋 join us on <a href="https://discord.gg/Tdedn9GTXq" target="_blank">Discord</a> and <a href="
|
91
|
+
👋 join us on <a href="https://discord.gg/Tdedn9GTXq" target="_blank">Discord</a> and <a href="http://mineru.space/s/V85Yl" target="_blank">WeChat</a>
|
92
92
|
</p>
|
93
93
|
|
94
94
|
</div>
|
95
95
|
|
96
96
|
# Changelog
|
97
|
-
- 2025/01/
|
97
|
+
- 2025/01/22 1.1.0 released. In this version we have focused on improving parsing accuracy and efficiency:
|
98
|
+
- Model capability upgrade (requires re-executing the [model download process](docs/how_to_download_models_en.md) to obtain incremental updates of model files)
|
99
|
+
- The layout recognition model has been upgraded to the latest `doclayout_yolo(2501)` model, improving layout recognition accuracy.
|
100
|
+
- The formula parsing model has been upgraded to the latest `unimernet(2501)` model, improving formula recognition accuracy.
|
101
|
+
- Performance optimization
|
102
|
+
- On devices that meet certain configuration requirements (16GB+ VRAM), by optimizing resource usage and restructuring the processing pipeline, overall parsing speed has been increased by more than 50%.
|
103
|
+
- Parsing effect optimization
|
104
|
+
- Added a new heading classification feature (testing version, enabled by default) to the online demo([mineru.net](https://mineru.net/OpenSourceTools/Extractor)/[huggingface](https://huggingface.co/spaces/opendatalab/MinerU)/[modelscope](https://www.modelscope.cn/studios/OpenDataLab/MinerU)), which supports hierarchical classification of headings, thereby enhancing document structuring.
|
105
|
+
- 2025/01/10 1.0.1 released. This is our first official release, where we have introduced a completely new API interface and enhanced compatibility through extensive refactoring, as well as a brand new automatic language identification feature:
|
98
106
|
- New API Interface
|
99
107
|
- For the data-side API, we have introduced the Dataset class, designed to provide a robust and flexible data processing framework. This framework currently supports a variety of document formats, including images (.jpg and .png), PDFs, Word documents (.doc and .docx), and PowerPoint presentations (.ppt and .pptx). It ensures effective support for data processing tasks ranging from simple to complex.
|
100
108
|
- For the user-side API, we have meticulously designed the MinerU processing workflow as a series of composable Stages. Each Stage represents a specific processing step, allowing users to define new Stages according to their needs and creatively combine these stages to customize their data processing workflows.
|
@@ -273,7 +281,7 @@ There are three different ways to experience MinerU:
|
|
273
281
|
### Online Demo
|
274
282
|
|
275
283
|
Stable Version (Stable version verified by QA):
|
276
|
-
[](https://mineru.
|
284
|
+
[](https://mineru.net/OpenSourceTools/Extractor?source=github)
|
277
285
|
|
278
286
|
Test Version (Synced with dev branch updates, testing new features):
|
279
287
|
[](https://huggingface.co/spaces/opendatalab/MinerU)
|
@@ -320,6 +328,7 @@ You can modify certain configurations in this file to enable or disable features
|
|
320
328
|
},
|
321
329
|
"table-config": {
|
322
330
|
"model": "rapid_table", // Default to using "rapid_table", can be switched to "tablemaster" or "struct_eqtable".
|
331
|
+
"sub_model": "slanet_plus", // When the model is "rapid_table", you can choose a sub_model. The options are "slanet_plus" and "unitable"
|
323
332
|
"enable": true, // The table recognition feature is enabled by default. If you need to disable it, please change the value here to "false".
|
324
333
|
"max_time": 400
|
325
334
|
}
|
@@ -403,6 +412,7 @@ TODO
|
|
403
412
|
- [x] Reading order based on the model
|
404
413
|
- [x] Recognition of `index` and `list` in the main text
|
405
414
|
- [x] Table recognition
|
415
|
+
- [x] Heading Classification
|
406
416
|
- [ ] Code block recognition in the main text
|
407
417
|
- [ ] [Chemical formula recognition](docs/chemical_knowledge_introduction/introduction.pdf)
|
408
418
|
- [ ] Geometric shape recognition
|
@@ -412,7 +422,6 @@ TODO
|
|
412
422
|
- Reading order is determined by the model based on the spatial distribution of readable content, and may be out of order in some areas under extremely complex layouts.
|
413
423
|
- Vertical text is not supported.
|
414
424
|
- Tables of contents and lists are recognized through rules, and some uncommon list formats may not be recognized.
|
415
|
-
- Only one level of headings is supported; hierarchical headings are not currently supported.
|
416
425
|
- Code blocks are not yet supported in the layout model.
|
417
426
|
- Comic books, art albums, primary school textbooks, and exercises cannot be parsed well.
|
418
427
|
- Table recognition may result in row/column recognition errors in complex tables.
|
@@ -1,5 +1,5 @@
|
|
1
1
|
magic_pdf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
magic_pdf/pdf_parse_union_core_v2.py,sha256=
|
2
|
+
magic_pdf/pdf_parse_union_core_v2.py,sha256=qh-Vj7v8EenC_f_MNMa76i1DVuckulQo1QC1IOw8LRE,37723
|
3
3
|
magic_pdf/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
magic_pdf/config/constants.py,sha256=fXGzANULnJWLPxwYp3BEFWx-rnorzpySMx63ffyEyq4,1272
|
5
5
|
magic_pdf/config/drop_reason.py,sha256=CqjMzBE96Qo8OeFvhhhItY8WhyqsKhE3DmyJLoQZNCc,2248
|
@@ -34,39 +34,39 @@ magic_pdf/integrations/rag/api.py,sha256=t38wvIBzLje4_JzTP3dewMLqV-tQJ-A3B92Sj2o
|
|
34
34
|
magic_pdf/integrations/rag/type.py,sha256=Z_1g_ZIOCsb7-FmZBudReIXj8nzGrgj_BygCalhJdmk,3193
|
35
35
|
magic_pdf/integrations/rag/utils.py,sha256=DCb-UhC8TElb6Eq7_6NmmETreKEk5DVE18hNL8sTEBk,11762
|
36
36
|
magic_pdf/libs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
37
|
-
magic_pdf/libs/boxbase.py,sha256=
|
37
|
+
magic_pdf/libs/boxbase.py,sha256=DKZXhwpJd-HE2_Du7NmkeeYW5gG-iwX3GeSWL7rYGv0,16956
|
38
38
|
magic_pdf/libs/clean_memory.py,sha256=OsQexCjmBO2i-Hv-0uYQfn72dbUWR8sTW81nG2zlNQQ,479
|
39
39
|
magic_pdf/libs/commons.py,sha256=xD0fGA16KNB5rhbl4zRrOqdrNHYwaRablT_s9W2ZTbw,1174
|
40
40
|
magic_pdf/libs/config_reader.py,sha256=9GXK7jtDyA_jxXKWZAQ69rB02v5UW4mOmo1IaOYOkW0,4374
|
41
41
|
magic_pdf/libs/convert_utils.py,sha256=Ov-lsfCLBPz_15iSJXIslBNmrSf_E_1g_XDWJy8NgO8,143
|
42
42
|
magic_pdf/libs/coordinate_transform.py,sha256=Bbop2cP2uz2ZG0U0gwd7J6EKkgABq5Rv03qf2LMPw80,429
|
43
|
-
magic_pdf/libs/draw_bbox.py,sha256=
|
43
|
+
magic_pdf/libs/draw_bbox.py,sha256=hpUmpPiQVu7UgWQa3M49dS22G6A9gcG2jpq4dQjTjzA,18331
|
44
44
|
magic_pdf/libs/hash_utils.py,sha256=VEKK9WfFoZgrPfi8kfITjLpr8Ahufs8tXh9R1Y5lAL8,404
|
45
45
|
magic_pdf/libs/json_compressor.py,sha256=6-KCu0lb5ksmyqWtQGb4QqmP-FjRb5dP7P-Hevcn68g,875
|
46
|
-
magic_pdf/libs/language.py,sha256=
|
46
|
+
magic_pdf/libs/language.py,sha256=7RT3mxSa7jdpoC5ySd7ZddHA7TO7UsnmDOWiYZAxuyg,1433
|
47
47
|
magic_pdf/libs/local_math.py,sha256=tqljQOgqh3fZc146HYhO88JXJaiXMVwArBkk_CSGICc,177
|
48
48
|
magic_pdf/libs/markdown_utils.py,sha256=86v2BmsSV4NkoRZrH4uQD1youJhYFF3vIKr_vDeg3z0,270
|
49
49
|
magic_pdf/libs/path_utils.py,sha256=Hykw_l5CU736b2egHV9P7B-qh3QNKO4nZSGCbsi0Z8E,1043
|
50
50
|
magic_pdf/libs/pdf_check.py,sha256=zBwUThKKBtnrNPmgE10lYsTy1Kq7j_6IejO7JR0J4pA,3118
|
51
51
|
magic_pdf/libs/pdf_image_tools.py,sha256=kjzSEbm7K0yiHv8kJ4VbZ9HHktM8qvAv3LhxRyDZEQk,1987
|
52
52
|
magic_pdf/libs/safe_filename.py,sha256=ckwcM_eqoysTb5id8czp-tXq2G9da0-l3pshZDCHQtE,236
|
53
|
-
magic_pdf/libs/version.py,sha256=
|
53
|
+
magic_pdf/libs/version.py,sha256=LGVQyDsWifdACo7qztwb8RWWHds1E7uQ-ZqD8SAjyw4,22
|
54
54
|
magic_pdf/model/__init__.py,sha256=sa-dO2k-TLy25I2gRrzjm_cQeYfzMf-pLwBJHkIxGo0,51
|
55
|
-
magic_pdf/model/batch_analyze.py,sha256=
|
56
|
-
magic_pdf/model/doc_analyze_by_custom_model.py,sha256=
|
55
|
+
magic_pdf/model/batch_analyze.py,sha256=sbrgOJWycb1Ep6e62CPi6jEyG6VSeklIxc4PmrqaLhM,11933
|
56
|
+
magic_pdf/model/doc_analyze_by_custom_model.py,sha256=zryHy3ljcEvDqOWivXZQrpau_jPtt6x1lLOZaOkk_tI,8153
|
57
57
|
magic_pdf/model/magic_model.py,sha256=Nt74oZGYUcbm4qdOQtN-hbKhXxlWO2LVv3K9yXvteWY,25204
|
58
58
|
magic_pdf/model/model_list.py,sha256=aqfEJlEfbib3D3ISrxc0Coh6SbffYh8Yq2FlQN35_zA,213
|
59
|
-
magic_pdf/model/pdf_extract_kit.py,sha256=
|
59
|
+
magic_pdf/model/pdf_extract_kit.py,sha256=S-UVZQroUe-eEAJzuOucdCh9FCAWy2DVAZow3dGUiWI,12520
|
60
60
|
magic_pdf/model/pp_structure_v2.py,sha256=NcqFWL4nUtjl82MFak8HX_8V3i4Aw_fK4dATrIp5uGs,3840
|
61
61
|
magic_pdf/model/sub_modules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
62
|
-
magic_pdf/model/sub_modules/model_init.py,sha256
|
62
|
+
magic_pdf/model/sub_modules/model_init.py,sha256=RCv6BkRLEFBKrfVReRvIvbRQ21BZLz8jj-AKQhwHkhw,6520
|
63
63
|
magic_pdf/model/sub_modules/model_utils.py,sha256=2pI1Xcr2zCF3b64e4WoFtIbjSmTVYBE4zjyHB23gvmE,2488
|
64
64
|
magic_pdf/model/sub_modules/language_detection/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
65
65
|
magic_pdf/model/sub_modules/language_detection/utils.py,sha256=5nec_loLyYCJ5o6n38AYLz2SKmRvHDCBdt6ka84EaGM,3096
|
66
66
|
magic_pdf/model/sub_modules/language_detection/yolov11/YOLOv11.py,sha256=GW_9WkqIzpJm1MFJexZ2ZvA6AjoqM-6yh8p4LupJhas,4762
|
67
67
|
magic_pdf/model/sub_modules/language_detection/yolov11/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
68
68
|
magic_pdf/model/sub_modules/layout/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
69
|
-
magic_pdf/model/sub_modules/layout/doclayout_yolo/DocLayoutYOLO.py,sha256=
|
69
|
+
magic_pdf/model/sub_modules/layout/doclayout_yolo/DocLayoutYOLO.py,sha256=gy7rc8poO-Zr8511NJjuBV8Uryq5k3JKrstLtCONg0c,2237
|
70
70
|
magic_pdf/model/sub_modules/layout/doclayout_yolo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
71
71
|
magic_pdf/model/sub_modules/layout/layoutlmv3/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
72
72
|
magic_pdf/model/sub_modules/layout/layoutlmv3/backbone.py,sha256=1cvSCczgvwOLdvzWyqttoYPMHsXmnzI3w9abJ1bAXoM,7106
|
@@ -92,11 +92,11 @@ magic_pdf/model/sub_modules/mfd/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
|
|
92
92
|
magic_pdf/model/sub_modules/mfd/yolov8/YOLOv8.py,sha256=QfHbMr1br0pOJUu1NJEMgA6yw11G0yFImJv_AfW48_c,1008
|
93
93
|
magic_pdf/model/sub_modules/mfd/yolov8/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
94
94
|
magic_pdf/model/sub_modules/mfr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
95
|
-
magic_pdf/model/sub_modules/mfr/unimernet/Unimernet.py,sha256=
|
95
|
+
magic_pdf/model/sub_modules/mfr/unimernet/Unimernet.py,sha256=R05qw54QuLl2btNWdkxf4yCjDeEj8o0786e-gz_Xv8k,5290
|
96
96
|
magic_pdf/model/sub_modules/mfr/unimernet/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
97
97
|
magic_pdf/model/sub_modules/ocr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
98
98
|
magic_pdf/model/sub_modules/ocr/paddleocr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
99
|
-
magic_pdf/model/sub_modules/ocr/paddleocr/ocr_utils.py,sha256=
|
99
|
+
magic_pdf/model/sub_modules/ocr/paddleocr/ocr_utils.py,sha256=biuLnQWUquZkxmObjpg33iVCPPJKbRA4kx0Uo6OvGyc,12672
|
100
100
|
magic_pdf/model/sub_modules/ocr/paddleocr/ppocr_273_mod.py,sha256=QBBeFN1iF7nj5gqQ5sQXjhpwy8lB4c96gubnRDBuDNU,8424
|
101
101
|
magic_pdf/model/sub_modules/ocr/paddleocr/ppocr_291_mod.py,sha256=VouMTvi6M5TV6pQdlpusgfyZapxiZ_Wi7Ff53eMC3rE,8996
|
102
102
|
magic_pdf/model/sub_modules/reading_oreder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -106,7 +106,7 @@ magic_pdf/model/sub_modules/reading_oreder/layoutreader/xycut.py,sha256=ezNSq_Y4
|
|
106
106
|
magic_pdf/model/sub_modules/table/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
107
107
|
magic_pdf/model/sub_modules/table/table_utils.py,sha256=B9BC4f5EEjlt2ldYxrIC8Wic2Tz3t3gTJeEyK3ggrOU,282
|
108
108
|
magic_pdf/model/sub_modules/table/rapidtable/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
109
|
-
magic_pdf/model/sub_modules/table/rapidtable/rapid_table.py,sha256=
|
109
|
+
magic_pdf/model/sub_modules/table/rapidtable/rapid_table.py,sha256=6TUO6wiA4oZQB2_VP6kngZF6-2cI6mAP57Qf2lv6LVw,2922
|
110
110
|
magic_pdf/model/sub_modules/table/structeqtable/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
111
111
|
magic_pdf/model/sub_modules/table/structeqtable/struct_eqtable.py,sha256=SrNPm-uOFEvN5muFGbXTAuwzXm-rCiaihVdqbydIBIA,1131
|
112
112
|
magic_pdf/model/sub_modules/table/tablemaster/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -115,17 +115,18 @@ magic_pdf/operators/__init__.py,sha256=liU2-WYUvsQ1G4PYBppyvokS9z5IjrnlVMtoBAC1R
|
|
115
115
|
magic_pdf/operators/models.py,sha256=mRqbCVrxxaUVDpEBAsXaK7EL1M-goICkE1W0FYgewio,5305
|
116
116
|
magic_pdf/operators/pipes.py,sha256=XgBgisKQd_ruW-3Tw4v5LhqloZUHgn2aFcpi_q8LbCs,6767
|
117
117
|
magic_pdf/post_proc/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
118
|
-
magic_pdf/post_proc/llm_aided.py,sha256=
|
118
|
+
magic_pdf/post_proc/llm_aided.py,sha256=p-XwDObLkDv5rPxsdI7092MP-rHCMr1uAUq3fs7Zc-E,6334
|
119
|
+
magic_pdf/post_proc/llm_aided_ocr.py,sha256=89kxzEQVqNGSUtmvgcg2AVDDmgb43bamdRxXbwS2FxQ,33557
|
119
120
|
magic_pdf/post_proc/para_split_v3.py,sha256=v4SdQn4OZdHRXpWQMfQ-FGJz_tglQ88uFUqpwY542Fo,16922
|
120
121
|
magic_pdf/pre_proc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
121
122
|
magic_pdf/pre_proc/construct_page_dict.py,sha256=OFmq5XRKi6fYIo-lmGlL-NB16Sf0egzsfEx-fT2uYrc,660
|
122
123
|
magic_pdf/pre_proc/cut_image.py,sha256=NDzbxwD7z7Tb4uAxL4KR6LzURFdN1Tzr4nPvj-VmEqc,1225
|
123
124
|
magic_pdf/pre_proc/ocr_detect_all_bboxes.py,sha256=nt88ttXCEI_1ihAF7HU15SQjwM69V-iJmk-L_nyzA6o,9328
|
124
125
|
magic_pdf/pre_proc/ocr_dict_merge.py,sha256=Ycgz2whzotL7kwl0-mHNV48QOQ2j4tRXqLSQrJRojYg,4847
|
125
|
-
magic_pdf/pre_proc/ocr_span_list_modify.py,sha256=
|
126
|
+
magic_pdf/pre_proc/ocr_span_list_modify.py,sha256=xrgC9vR0poklZuY4Og41pZVdXzuaGFg3BnQ01X60dpo,3102
|
126
127
|
magic_pdf/pre_proc/remove_bbox_overlap.py,sha256=mcdxAh4P56NZ3Ij8h3vW8qC_SrszfXflVWuWUuUiTNg,3089
|
127
128
|
magic_pdf/resources/fasttext-langdetect/lid.176.ftz,sha256=jzRyz-hzintgmejpmcPL-uDc0VaWqsfXc4qAOdtgPoM,938013
|
128
|
-
magic_pdf/resources/model_config/model_configs.yaml,sha256=
|
129
|
+
magic_pdf/resources/model_config/model_configs.yaml,sha256=v3HwFTmIbXJJEBXUHHHMnZQKRo6ZQtP3cncSebh-5gc,322
|
129
130
|
magic_pdf/resources/model_config/UniMERNet/demo.yaml,sha256=Jdaim2D2lAYrV9rhc1X5Sy2_IacGOrfysJhxEUgSElo,827
|
130
131
|
magic_pdf/resources/model_config/layoutlmv3/layoutlmv3_base_inference.yaml,sha256=9aNAEYgpHTAWpcUrDvuPG2y4V-Qw8QdcJefi96y8yDU,6109
|
131
132
|
magic_pdf/resources/yolov11-langdetect/yolo_v11_ft.pt,sha256=dV4lcudF8wimEbAooYbvISvFhrXjp9i0rMRqv9VW6hY,3204667
|
@@ -138,9 +139,9 @@ magic_pdf/tools/common.py,sha256=1LfMeXBBsb3WlGeNAze_pPOYXQ8Qbfh-JgRXweojHKo,838
|
|
138
139
|
magic_pdf/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
139
140
|
magic_pdf/utils/annotations.py,sha256=82ou3uELNbQWa9hOFFkVt0gsIskAKf5msCv5J2IJ5V0,211
|
140
141
|
magic_pdf/utils/office_to_pdf.py,sha256=7aj-Ls2v8saD-Rgu_t3FIc-J3Ka9wnmiEH5zY-H1Vxs,729
|
141
|
-
magic_pdf-1.
|
142
|
-
magic_pdf-1.
|
143
|
-
magic_pdf-1.
|
144
|
-
magic_pdf-1.
|
145
|
-
magic_pdf-1.
|
146
|
-
magic_pdf-1.
|
142
|
+
magic_pdf-1.1.0.dist-info/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
|
143
|
+
magic_pdf-1.1.0.dist-info/METADATA,sha256=Ud48caL9BHS-ZuLN-3VpswLJFPqao7KqY0aqfF0ApOo,40958
|
144
|
+
magic_pdf-1.1.0.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
145
|
+
magic_pdf-1.1.0.dist-info/entry_points.txt,sha256=wXwYke3j8fqDQTocUspL-CqDUEv3Tfcwp09fM8dZAhA,98
|
146
|
+
magic_pdf-1.1.0.dist-info/top_level.txt,sha256=J9I0AzmHWGkp9c6DL8Oe4mEx3yYphLzkRn4H25Lg1rE,10
|
147
|
+
magic_pdf-1.1.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|