magic-pdf 0.5.8__py3-none-any.whl → 0.5.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- magic_pdf/libs/version.py +1 -1
- magic_pdf-0.5.9.dist-info/METADATA +96 -0
- {magic_pdf-0.5.8.dist-info → magic_pdf-0.5.9.dist-info}/RECORD +7 -6
- magic_pdf-0.5.9.dist-info/entry_points.txt +2 -0
- magic_pdf-0.5.8.dist-info/METADATA +0 -28
- {magic_pdf-0.5.8.dist-info → magic_pdf-0.5.9.dist-info}/LICENSE.md +0 -0
- {magic_pdf-0.5.8.dist-info → magic_pdf-0.5.9.dist-info}/WHEEL +0 -0
- {magic_pdf-0.5.8.dist-info → magic_pdf-0.5.9.dist-info}/top_level.txt +0 -0
magic_pdf/libs/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.5.
|
1
|
+
__version__ = "0.5.9"
|
@@ -0,0 +1,96 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: magic-pdf
|
3
|
+
Version: 0.5.9
|
4
|
+
Summary: A practical tool for converting PDF to Markdown
|
5
|
+
Home-page: https://github.com/magicpdf/Magic-PDF
|
6
|
+
Requires-Python: >=3.9
|
7
|
+
Description-Content-Type: text/markdown
|
8
|
+
License-File: LICENSE.md
|
9
|
+
Requires-Dist: boto3 >=1.28.43
|
10
|
+
Requires-Dist: Brotli >=1.1.0
|
11
|
+
Requires-Dist: click >=8.1.7
|
12
|
+
Requires-Dist: Distance >=0.1.3
|
13
|
+
Requires-Dist: PyMuPDF >=1.24.5
|
14
|
+
Requires-Dist: loguru >=0.6.0
|
15
|
+
Requires-Dist: matplotlib >=3.8.3
|
16
|
+
Requires-Dist: numpy >=1.21.6
|
17
|
+
Requires-Dist: pandas >=1.3.5
|
18
|
+
Requires-Dist: fast-langdetect >=0.1.1
|
19
|
+
Requires-Dist: regex >=2023.12.25
|
20
|
+
Requires-Dist: termcolor >=2.4.0
|
21
|
+
Requires-Dist: wordninja >=2.0.0
|
22
|
+
Requires-Dist: scikit-learn >=1.0.2
|
23
|
+
Requires-Dist: nltk ==3.8.1
|
24
|
+
Requires-Dist: s3pathlib >=2.1.1
|
25
|
+
Requires-Dist: paddleocr
|
26
|
+
Requires-Dist: pdfminer.six >=20231228
|
27
|
+
Provides-Extra: cpu
|
28
|
+
Requires-Dist: paddlepaddle ; extra == 'cpu'
|
29
|
+
Provides-Extra: gpu
|
30
|
+
Requires-Dist: paddlepaddle-gpu ; extra == 'gpu'
|
31
|
+
|
32
|
+
<div id="top"></div>
|
33
|
+
<div align="center">
|
34
|
+
|
35
|
+
[](https://github.com/magicpdf/Magic-PDF)
|
36
|
+
[](https://github.com/magicpdf/Magic-PDF)
|
37
|
+
[](https://github.com/magicpdf/Magic-PDF/tree/main/LICENSE)
|
38
|
+
[](https://github.com/magicpdf/Magic-PDF/issues)
|
39
|
+
[](https://github.com/magicpdf/Magic-PDF/issues)
|
40
|
+
|
41
|
+
[English](README.md) | [简体中文](README_zh-CN.md)
|
42
|
+
|
43
|
+
</div>
|
44
|
+
|
45
|
+
<div align="center">
|
46
|
+
|
47
|
+
</div>
|
48
|
+
|
49
|
+
# Magic-PDF
|
50
|
+
|
51
|
+
## Introduction
|
52
|
+
|
53
|
+
Magic-PDF is a tool designed to convert PDF documents into Markdown format, capable of processing files stored locally or on object storage supporting S3 protocol.
|
54
|
+
|
55
|
+
Key features include:
|
56
|
+
|
57
|
+
- Support for multiple front-end model inputs
|
58
|
+
- Removal of headers, footers, footnotes, and page numbers
|
59
|
+
- Human-readable layout formatting
|
60
|
+
- Retains the original document's structure and formatting, including headings, paragraphs, lists, and more
|
61
|
+
- Extraction and display of images and tables within markdown
|
62
|
+
- Conversion of equations into LaTeX format
|
63
|
+
- Automatic detection and conversion of garbled PDFs
|
64
|
+
- Compatibility with CPU and GPU environments
|
65
|
+
- Available for Windows, Linux, and macOS platforms
|
66
|
+
|
67
|
+
## Getting Started
|
68
|
+
|
69
|
+
### Requirements
|
70
|
+
|
71
|
+
- Python 3.9 or newer
|
72
|
+
|
73
|
+
### Usage Instructions
|
74
|
+
|
75
|
+
1. **Install Magic-PDF**
|
76
|
+
|
77
|
+
```bash
|
78
|
+
pip install magic-pdf[cpu] # Install the CPU version
|
79
|
+
or
|
80
|
+
pip install magic-pdf[gpu] # Install the GPU version
|
81
|
+
```
|
82
|
+
|
83
|
+
2. **Usage via Command Line**
|
84
|
+
|
85
|
+
```bash
|
86
|
+
magic-pdf --help
|
87
|
+
```
|
88
|
+
|
89
|
+
## License Information
|
90
|
+
|
91
|
+
See [LICENSE.md](https://github.com/magicpdf/Magic-PDF/blob/master/LICENSE.md) for details.
|
92
|
+
|
93
|
+
## Acknowledgments
|
94
|
+
|
95
|
+
- [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR)
|
96
|
+
- [PyMuPDF](https://github.com/pymupdf/PyMuPDF)
|
@@ -46,7 +46,7 @@ magic_pdf/libs/pdf_check.py,sha256=MAe8wzwT0qvPf_I72wEZG7k1g4haNHS7oUtLqkB5rlE,2
|
|
46
46
|
magic_pdf/libs/pdf_image_tools.py,sha256=CAd01giTKr_UJz1_QtDOARG9G9z69GFpzRZwcWSfLtE,1282
|
47
47
|
magic_pdf/libs/safe_filename.py,sha256=ckwcM_eqoysTb5id8czp-tXq2G9da0-l3pshZDCHQtE,236
|
48
48
|
magic_pdf/libs/textbase.py,sha256=SC1Frhz3Fb7V7n2SFRBsl7Bmg0JZdlvZskq0lfW1vIk,732
|
49
|
-
magic_pdf/libs/version.py,sha256=
|
49
|
+
magic_pdf/libs/version.py,sha256=JXLyhF5WmLgRZBfWGz9zWe2g5ISKSLpn2jp8yLaC-s4,22
|
50
50
|
magic_pdf/libs/vis_utils.py,sha256=hTOTEakKV0pGMbk0tbRkVI_tku7A3dGc96ynObZ4kwI,10207
|
51
51
|
magic_pdf/model/360_layout_analysis.py,sha256=GbchKPJRVcrxvwNXMnR4vt8lOLPauTWMl-43ayyhX7U,221
|
52
52
|
magic_pdf/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -117,8 +117,9 @@ magic_pdf/train_utils/convert_to_train_format.py,sha256=ifo2FAoBMa_etCvz0O4v03xO
|
|
117
117
|
magic_pdf/train_utils/extract_caption.py,sha256=gommEqIEWLplSDEJWD7_66daqlOBsWhpRBW1DHpkny4,1825
|
118
118
|
magic_pdf/train_utils/remove_footer_header.py,sha256=pyeNNdJ-th3wl5Xwb10ZLYNaFN4-6BmahoMFE8VTNNs,5978
|
119
119
|
magic_pdf/train_utils/vis_utils.py,sha256=MV9N9cT3ifJ35u7LFKGF9I_bOIQrtU1zcsxu2hj3aqM,10111
|
120
|
-
magic_pdf-0.5.
|
121
|
-
magic_pdf-0.5.
|
122
|
-
magic_pdf-0.5.
|
123
|
-
magic_pdf-0.5.
|
124
|
-
magic_pdf-0.5.
|
120
|
+
magic_pdf-0.5.9.dist-info/LICENSE.md,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
|
121
|
+
magic_pdf-0.5.9.dist-info/METADATA,sha256=6Y0tWpKEWrjYaNVrBWddqU9mn4EKR8cSbka47hUSmog,2971
|
122
|
+
magic_pdf-0.5.9.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
123
|
+
magic_pdf-0.5.9.dist-info/entry_points.txt,sha256=NbSkSmE08UuTwdoJD8Uofq8iyufySA4x7jmIIk4YCzI,57
|
124
|
+
magic_pdf-0.5.9.dist-info/top_level.txt,sha256=J9I0AzmHWGkp9c6DL8Oe4mEx3yYphLzkRn4H25Lg1rE,10
|
125
|
+
magic_pdf-0.5.9.dist-info/RECORD,,
|
@@ -1,28 +0,0 @@
|
|
1
|
-
Metadata-Version: 2.1
|
2
|
-
Name: magic-pdf
|
3
|
-
Version: 0.5.8
|
4
|
-
Requires-Python: >=3.9
|
5
|
-
License-File: LICENSE.md
|
6
|
-
Requires-Dist: boto3 >=1.28.43
|
7
|
-
Requires-Dist: Brotli >=1.1.0
|
8
|
-
Requires-Dist: click >=8.1.7
|
9
|
-
Requires-Dist: Distance >=0.1.3
|
10
|
-
Requires-Dist: PyMuPDF >=1.24.5
|
11
|
-
Requires-Dist: loguru >=0.6.0
|
12
|
-
Requires-Dist: matplotlib >=3.8.3
|
13
|
-
Requires-Dist: numpy >=1.21.6
|
14
|
-
Requires-Dist: pandas >=1.3.5
|
15
|
-
Requires-Dist: fast-langdetect >=0.1.1
|
16
|
-
Requires-Dist: regex >=2023.12.25
|
17
|
-
Requires-Dist: termcolor >=2.4.0
|
18
|
-
Requires-Dist: wordninja >=2.0.0
|
19
|
-
Requires-Dist: scikit-learn >=1.0.2
|
20
|
-
Requires-Dist: nltk ==3.8.1
|
21
|
-
Requires-Dist: s3pathlib >=2.1.1
|
22
|
-
Requires-Dist: paddleocr
|
23
|
-
Requires-Dist: pdfminer.six >=20231228
|
24
|
-
Provides-Extra: cpu
|
25
|
-
Requires-Dist: paddlepaddle ; extra == 'cpu'
|
26
|
-
Provides-Extra: gpu
|
27
|
-
Requires-Dist: paddlepaddle-gpu ; extra == 'gpu'
|
28
|
-
|
File without changes
|
File without changes
|
File without changes
|