magic-pdf 0.5.8__py3-none-any.whl → 0.5.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
magic_pdf/libs/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.5.8"
1
+ __version__ = "0.5.9"
@@ -0,0 +1,96 @@
1
+ Metadata-Version: 2.1
2
+ Name: magic-pdf
3
+ Version: 0.5.9
4
+ Summary: A practical tool for converting PDF to Markdown
5
+ Home-page: https://github.com/magicpdf/Magic-PDF
6
+ Requires-Python: >=3.9
7
+ Description-Content-Type: text/markdown
8
+ License-File: LICENSE.md
9
+ Requires-Dist: boto3 >=1.28.43
10
+ Requires-Dist: Brotli >=1.1.0
11
+ Requires-Dist: click >=8.1.7
12
+ Requires-Dist: Distance >=0.1.3
13
+ Requires-Dist: PyMuPDF >=1.24.5
14
+ Requires-Dist: loguru >=0.6.0
15
+ Requires-Dist: matplotlib >=3.8.3
16
+ Requires-Dist: numpy >=1.21.6
17
+ Requires-Dist: pandas >=1.3.5
18
+ Requires-Dist: fast-langdetect >=0.1.1
19
+ Requires-Dist: regex >=2023.12.25
20
+ Requires-Dist: termcolor >=2.4.0
21
+ Requires-Dist: wordninja >=2.0.0
22
+ Requires-Dist: scikit-learn >=1.0.2
23
+ Requires-Dist: nltk ==3.8.1
24
+ Requires-Dist: s3pathlib >=2.1.1
25
+ Requires-Dist: paddleocr
26
+ Requires-Dist: pdfminer.six >=20231228
27
+ Provides-Extra: cpu
28
+ Requires-Dist: paddlepaddle ; extra == 'cpu'
29
+ Provides-Extra: gpu
30
+ Requires-Dist: paddlepaddle-gpu ; extra == 'gpu'
31
+
32
+ <div id="top"></div>
33
+ <div align="center">
34
+
35
+ [![stars](https://img.shields.io/github/stars/magicpdf/Magic-PDF.svg)](https://github.com/magicpdf/Magic-PDF)
36
+ [![forks](https://img.shields.io/github/forks/magicpdf/Magic-PDF.svg)](https://github.com/magicpdf/Magic-PDF)
37
+ [![license](https://img.shields.io/github/license/magicpdf/Magic-PDF.svg)](https://github.com/magicpdf/Magic-PDF/tree/main/LICENSE)
38
+ [![issue resolution](https://img.shields.io/github/issues-closed-raw/magicpdf/Magic-PDF)](https://github.com/magicpdf/Magic-PDF/issues)
39
+ [![open issues](https://img.shields.io/github/issues-raw/magicpdf/Magic-PDF)](https://github.com/magicpdf/Magic-PDF/issues)
40
+
41
+ [English](README.md) | [简体中文](README_zh-CN.md)
42
+
43
+ </div>
44
+
45
+ <div align="center">
46
+
47
+ </div>
48
+
49
+ # Magic-PDF
50
+
51
+ ## Introduction
52
+
53
+ Magic-PDF is a tool designed to convert PDF documents into Markdown format, capable of processing files stored locally or on object storage supporting S3 protocol.
54
+
55
+ Key features include:
56
+
57
+ - Support for multiple front-end model inputs
58
+ - Removal of headers, footers, footnotes, and page numbers
59
+ - Human-readable layout formatting
60
+ - Retains the original document's structure and formatting, including headings, paragraphs, lists, and more
61
+ - Extraction and display of images and tables within markdown
62
+ - Conversion of equations into LaTeX format
63
+ - Automatic detection and conversion of garbled PDFs
64
+ - Compatibility with CPU and GPU environments
65
+ - Available for Windows, Linux, and macOS platforms
66
+
67
+ ## Getting Started
68
+
69
+ ### Requirements
70
+
71
+ - Python 3.9 or newer
72
+
73
+ ### Usage Instructions
74
+
75
+ 1. **Install Magic-PDF**
76
+
77
+ ```bash
78
+ pip install magic-pdf[cpu] # Install the CPU version
79
+ or
80
+ pip install magic-pdf[gpu] # Install the GPU version
81
+ ```
82
+
83
+ 2. **Usage via Command Line**
84
+
85
+ ```bash
86
+ magic-pdf --help
87
+ ```
88
+
89
+ ## License Information
90
+
91
+ See [LICENSE.md](https://github.com/magicpdf/Magic-PDF/blob/master/LICENSE.md) for details.
92
+
93
+ ## Acknowledgments
94
+
95
+ - [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR)
96
+ - [PyMuPDF](https://github.com/pymupdf/PyMuPDF)
@@ -46,7 +46,7 @@ magic_pdf/libs/pdf_check.py,sha256=MAe8wzwT0qvPf_I72wEZG7k1g4haNHS7oUtLqkB5rlE,2
46
46
  magic_pdf/libs/pdf_image_tools.py,sha256=CAd01giTKr_UJz1_QtDOARG9G9z69GFpzRZwcWSfLtE,1282
47
47
  magic_pdf/libs/safe_filename.py,sha256=ckwcM_eqoysTb5id8czp-tXq2G9da0-l3pshZDCHQtE,236
48
48
  magic_pdf/libs/textbase.py,sha256=SC1Frhz3Fb7V7n2SFRBsl7Bmg0JZdlvZskq0lfW1vIk,732
49
- magic_pdf/libs/version.py,sha256=bDuZ37zImJZsQ3a4pW87q4kg-zsIBrUFAv1aumIf_7k,22
49
+ magic_pdf/libs/version.py,sha256=JXLyhF5WmLgRZBfWGz9zWe2g5ISKSLpn2jp8yLaC-s4,22
50
50
  magic_pdf/libs/vis_utils.py,sha256=hTOTEakKV0pGMbk0tbRkVI_tku7A3dGc96ynObZ4kwI,10207
51
51
  magic_pdf/model/360_layout_analysis.py,sha256=GbchKPJRVcrxvwNXMnR4vt8lOLPauTWMl-43ayyhX7U,221
52
52
  magic_pdf/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -117,8 +117,9 @@ magic_pdf/train_utils/convert_to_train_format.py,sha256=ifo2FAoBMa_etCvz0O4v03xO
117
117
  magic_pdf/train_utils/extract_caption.py,sha256=gommEqIEWLplSDEJWD7_66daqlOBsWhpRBW1DHpkny4,1825
118
118
  magic_pdf/train_utils/remove_footer_header.py,sha256=pyeNNdJ-th3wl5Xwb10ZLYNaFN4-6BmahoMFE8VTNNs,5978
119
119
  magic_pdf/train_utils/vis_utils.py,sha256=MV9N9cT3ifJ35u7LFKGF9I_bOIQrtU1zcsxu2hj3aqM,10111
120
- magic_pdf-0.5.8.dist-info/LICENSE.md,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
121
- magic_pdf-0.5.8.dist-info/METADATA,sha256=Z7HrhP7T0_dQOjCX-CztMe77Mbt90IoY8JH0IhmRHH0,814
122
- magic_pdf-0.5.8.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
123
- magic_pdf-0.5.8.dist-info/top_level.txt,sha256=J9I0AzmHWGkp9c6DL8Oe4mEx3yYphLzkRn4H25Lg1rE,10
124
- magic_pdf-0.5.8.dist-info/RECORD,,
120
+ magic_pdf-0.5.9.dist-info/LICENSE.md,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
121
+ magic_pdf-0.5.9.dist-info/METADATA,sha256=6Y0tWpKEWrjYaNVrBWddqU9mn4EKR8cSbka47hUSmog,2971
122
+ magic_pdf-0.5.9.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
123
+ magic_pdf-0.5.9.dist-info/entry_points.txt,sha256=NbSkSmE08UuTwdoJD8Uofq8iyufySA4x7jmIIk4YCzI,57
124
+ magic_pdf-0.5.9.dist-info/top_level.txt,sha256=J9I0AzmHWGkp9c6DL8Oe4mEx3yYphLzkRn4H25Lg1rE,10
125
+ magic_pdf-0.5.9.dist-info/RECORD,,
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ magic-pdf = magic_pdf.cli.magicpdf:cli
@@ -1,28 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: magic-pdf
3
- Version: 0.5.8
4
- Requires-Python: >=3.9
5
- License-File: LICENSE.md
6
- Requires-Dist: boto3 >=1.28.43
7
- Requires-Dist: Brotli >=1.1.0
8
- Requires-Dist: click >=8.1.7
9
- Requires-Dist: Distance >=0.1.3
10
- Requires-Dist: PyMuPDF >=1.24.5
11
- Requires-Dist: loguru >=0.6.0
12
- Requires-Dist: matplotlib >=3.8.3
13
- Requires-Dist: numpy >=1.21.6
14
- Requires-Dist: pandas >=1.3.5
15
- Requires-Dist: fast-langdetect >=0.1.1
16
- Requires-Dist: regex >=2023.12.25
17
- Requires-Dist: termcolor >=2.4.0
18
- Requires-Dist: wordninja >=2.0.0
19
- Requires-Dist: scikit-learn >=1.0.2
20
- Requires-Dist: nltk ==3.8.1
21
- Requires-Dist: s3pathlib >=2.1.1
22
- Requires-Dist: paddleocr
23
- Requires-Dist: pdfminer.six >=20231228
24
- Provides-Extra: cpu
25
- Requires-Dist: paddlepaddle ; extra == 'cpu'
26
- Provides-Extra: gpu
27
- Requires-Dist: paddlepaddle-gpu ; extra == 'gpu'
28
-