magic-pdf 0.10.6__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. magic_pdf/config/constants.py +2 -0
  2. magic_pdf/config/exceptions.py +7 -0
  3. magic_pdf/data/data_reader_writer/filebase.py +1 -1
  4. magic_pdf/data/data_reader_writer/multi_bucket_s3.py +8 -6
  5. magic_pdf/data/dataset.py +13 -1
  6. magic_pdf/data/read_api.py +59 -12
  7. magic_pdf/data/utils.py +35 -0
  8. magic_pdf/dict2md/ocr_mkcontent.py +14 -13
  9. magic_pdf/libs/clean_memory.py +11 -4
  10. magic_pdf/libs/config_reader.py +9 -0
  11. magic_pdf/libs/draw_bbox.py +8 -12
  12. magic_pdf/libs/language.py +3 -0
  13. magic_pdf/libs/version.py +1 -1
  14. magic_pdf/model/__init__.py +1 -125
  15. magic_pdf/model/batch_analyze.py +275 -0
  16. magic_pdf/model/doc_analyze_by_custom_model.py +4 -51
  17. magic_pdf/model/magic_model.py +4 -435
  18. magic_pdf/model/model_list.py +1 -0
  19. magic_pdf/model/pdf_extract_kit.py +33 -22
  20. magic_pdf/model/sub_modules/language_detection/__init__.py +1 -0
  21. magic_pdf/model/sub_modules/language_detection/utils.py +82 -0
  22. magic_pdf/model/sub_modules/language_detection/yolov11/YOLOv11.py +139 -0
  23. magic_pdf/model/sub_modules/language_detection/yolov11/__init__.py +1 -0
  24. magic_pdf/model/sub_modules/layout/doclayout_yolo/DocLayoutYOLO.py +44 -7
  25. magic_pdf/model/sub_modules/mfd/yolov8/YOLOv8.py +21 -2
  26. magic_pdf/model/sub_modules/mfr/unimernet/Unimernet.py +70 -27
  27. magic_pdf/model/sub_modules/model_init.py +30 -4
  28. magic_pdf/model/sub_modules/model_utils.py +8 -2
  29. magic_pdf/model/sub_modules/ocr/paddleocr/ocr_utils.py +51 -1
  30. magic_pdf/model/sub_modules/ocr/paddleocr/ppocr_273_mod.py +32 -6
  31. magic_pdf/model/sub_modules/table/rapidtable/rapid_table.py +42 -7
  32. magic_pdf/operators/__init__.py +94 -0
  33. magic_pdf/{model/operators.py → operators/models.py} +2 -38
  34. magic_pdf/{pipe/operators.py → operators/pipes.py} +70 -17
  35. magic_pdf/pdf_parse_union_core_v2.py +71 -17
  36. magic_pdf/post_proc/__init__.py +1 -0
  37. magic_pdf/post_proc/llm_aided.py +133 -0
  38. magic_pdf/pre_proc/ocr_span_list_modify.py +8 -0
  39. magic_pdf/pre_proc/remove_bbox_overlap.py +1 -1
  40. magic_pdf/resources/yolov11-langdetect/yolo_v11_ft.pt +0 -0
  41. magic_pdf/tools/cli.py +36 -11
  42. magic_pdf/tools/common.py +28 -18
  43. magic_pdf/utils/office_to_pdf.py +29 -0
  44. {magic_pdf-0.10.6.dist-info → magic_pdf-1.0.1.dist-info}/METADATA +73 -23
  45. {magic_pdf-0.10.6.dist-info → magic_pdf-1.0.1.dist-info}/RECORD +50 -53
  46. magic_pdf/para/__init__.py +0 -0
  47. magic_pdf/pdf_parse_by_ocr.py +0 -22
  48. magic_pdf/pdf_parse_by_txt.py +0 -23
  49. magic_pdf/pipe/AbsPipe.py +0 -99
  50. magic_pdf/pipe/OCRPipe.py +0 -80
  51. magic_pdf/pipe/TXTPipe.py +0 -42
  52. magic_pdf/pipe/UNIPipe.py +0 -150
  53. magic_pdf/pipe/__init__.py +0 -0
  54. magic_pdf/rw/AbsReaderWriter.py +0 -17
  55. magic_pdf/rw/DiskReaderWriter.py +0 -74
  56. magic_pdf/rw/S3ReaderWriter.py +0 -142
  57. magic_pdf/rw/__init__.py +0 -0
  58. magic_pdf/user_api.py +0 -144
  59. /magic_pdf/{para → post_proc}/para_split_v3.py +0 -0
  60. {magic_pdf-0.10.6.dist-info → magic_pdf-1.0.1.dist-info}/LICENSE.md +0 -0
  61. {magic_pdf-0.10.6.dist-info → magic_pdf-1.0.1.dist-info}/WHEEL +0 -0
  62. {magic_pdf-0.10.6.dist-info → magic_pdf-1.0.1.dist-info}/entry_points.txt +0 -0
  63. {magic_pdf-0.10.6.dist-info → magic_pdf-1.0.1.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,14 @@ def remove_overlaps_low_confidence_spans(spans):
33
33
  return spans, dropped_spans
34
34
 
35
35
 
36
+ def check_chars_is_overlap_in_span(chars):
37
+ for i in range(len(chars)):
38
+ for j in range(i + 1, len(chars)):
39
+ if calculate_iou(chars[i]['bbox'], chars[j]['bbox']) > 0.9:
40
+ return True
41
+ return False
42
+
43
+
36
44
  def remove_overlaps_min_spans(spans):
37
45
  dropped_spans = []
38
46
  # 删除重叠spans中较小的那些
@@ -70,7 +70,7 @@ def _remove_overlap_between_bboxes(arr):
70
70
  res[i] = None
71
71
  else:
72
72
  keeps[idx] = False
73
- drop_reasons.append(drop_reasons)
73
+ drop_reasons.append(drop_reason)
74
74
  if keeps[idx]:
75
75
  res[idx] = v
76
76
  return res, drop_reasons
magic_pdf/tools/cli.py CHANGED
@@ -1,13 +1,20 @@
1
1
  import os
2
- from pathlib import Path
3
-
2
+ import shutil
3
+ import tempfile
4
4
  import click
5
+ import fitz
5
6
  from loguru import logger
7
+ from pathlib import Path
6
8
 
7
9
  import magic_pdf.model as model_config
8
10
  from magic_pdf.data.data_reader_writer import FileBasedDataReader
9
11
  from magic_pdf.libs.version import __version__
10
12
  from magic_pdf.tools.common import do_parse, parse_pdf_methods
13
+ from magic_pdf.utils.office_to_pdf import convert_file_to_pdf
14
+
15
+ pdf_suffixes = ['.pdf']
16
+ ms_office_suffixes = ['.ppt', '.pptx', '.doc', '.docx']
17
+ image_suffixes = ['.png', '.jpeg', '.jpg']
11
18
 
12
19
 
13
20
  @click.command()
@@ -21,7 +28,7 @@ from magic_pdf.tools.common import do_parse, parse_pdf_methods
21
28
  'path',
22
29
  type=click.Path(exists=True),
23
30
  required=True,
24
- help='local pdf filepath or directory',
31
+ help='local filepath or directory. support PDF, PPT, PPTX, DOC, DOCX, PNG, JPG files',
25
32
  )
26
33
  @click.option(
27
34
  '-o',
@@ -83,12 +90,27 @@ def cli(path, output_dir, method, lang, debug_able, start_page_id, end_page_id):
83
90
  model_config.__use_inside_model__ = True
84
91
  model_config.__model_mode__ = 'full'
85
92
  os.makedirs(output_dir, exist_ok=True)
93
+ temp_dir = tempfile.mkdtemp()
94
+ def read_fn(path: Path):
95
+ if path.suffix in ms_office_suffixes:
96
+ convert_file_to_pdf(str(path), temp_dir)
97
+ fn = os.path.join(temp_dir, f"{path.stem}.pdf")
98
+ elif path.suffix in image_suffixes:
99
+ with open(str(path), 'rb') as f:
100
+ bits = f.read()
101
+ pdf_bytes = fitz.open(stream=bits).convert_to_pdf()
102
+ fn = os.path.join(temp_dir, f"{path.stem}.pdf")
103
+ with open(fn, 'wb') as f:
104
+ f.write(pdf_bytes)
105
+ elif path.suffix in pdf_suffixes:
106
+ fn = str(path)
107
+ else:
108
+ raise Exception(f"Unknown file suffix: {path.suffix}")
109
+
110
+ disk_rw = FileBasedDataReader(os.path.dirname(fn))
111
+ return disk_rw.read(os.path.basename(fn))
86
112
 
87
- def read_fn(path):
88
- disk_rw = FileBasedDataReader(os.path.dirname(path))
89
- return disk_rw.read(os.path.basename(path))
90
-
91
- def parse_doc(doc_path: str):
113
+ def parse_doc(doc_path: Path):
92
114
  try:
93
115
  file_name = str(Path(doc_path).stem)
94
116
  pdf_data = read_fn(doc_path)
@@ -108,10 +130,13 @@ def cli(path, output_dir, method, lang, debug_able, start_page_id, end_page_id):
108
130
  logger.exception(e)
109
131
 
110
132
  if os.path.isdir(path):
111
- for doc_path in Path(path).glob('*.pdf'):
112
- parse_doc(doc_path)
133
+ for doc_path in Path(path).glob('*'):
134
+ if doc_path.suffix in pdf_suffixes + image_suffixes + ms_office_suffixes:
135
+ parse_doc(doc_path)
113
136
  else:
114
- parse_doc(path)
137
+ parse_doc(Path(path))
138
+
139
+ shutil.rmtree(temp_dir)
115
140
 
116
141
 
117
142
  if __name__ == '__main__':
magic_pdf/tools/common.py CHANGED
@@ -9,8 +9,9 @@ from magic_pdf.config.enums import SupportedPdfParseMethod
9
9
  from magic_pdf.config.make_content_config import DropMode, MakeMode
10
10
  from magic_pdf.data.data_reader_writer import FileBasedDataWriter
11
11
  from magic_pdf.data.dataset import PymuDocDataset
12
+ from magic_pdf.libs.draw_bbox import draw_char_bbox
12
13
  from magic_pdf.model.doc_analyze_by_custom_model import doc_analyze
13
- from magic_pdf.model.operators import InferenceResult
14
+ from magic_pdf.operators.models import InferenceResult
14
15
 
15
16
  # from io import BytesIO
16
17
  # from pypdf import PdfReader, PdfWriter
@@ -83,6 +84,7 @@ def do_parse(
83
84
  f_make_md_mode=MakeMode.MM_MD,
84
85
  f_draw_model_bbox=False,
85
86
  f_draw_line_sort_bbox=False,
87
+ f_draw_char_bbox=False,
86
88
  start_page_id=0,
87
89
  end_page_id=None,
88
90
  lang=None,
@@ -94,9 +96,7 @@ def do_parse(
94
96
  logger.warning('debug mode is on')
95
97
  f_draw_model_bbox = True
96
98
  f_draw_line_sort_bbox = True
97
-
98
- if lang == '':
99
- lang = None
99
+ # f_draw_char_bbox = True
100
100
 
101
101
  pdf_bytes = convert_pdf_bytes_to_bytes_by_pymupdf(
102
102
  pdf_bytes, start_page_id, end_page_id
@@ -109,7 +109,7 @@ def do_parse(
109
109
  )
110
110
  image_dir = str(os.path.basename(local_image_dir))
111
111
 
112
- ds = PymuDocDataset(pdf_bytes)
112
+ ds = PymuDocDataset(pdf_bytes, lang=lang)
113
113
 
114
114
  if len(model_list) == 0:
115
115
  if model_config.__use_inside_model__:
@@ -118,50 +118,50 @@ def do_parse(
118
118
  infer_result = ds.apply(
119
119
  doc_analyze,
120
120
  ocr=False,
121
- lang=lang,
121
+ lang=ds._lang,
122
122
  layout_model=layout_model,
123
123
  formula_enable=formula_enable,
124
124
  table_enable=table_enable,
125
125
  )
126
126
  pipe_result = infer_result.pipe_txt_mode(
127
- image_writer, debug_mode=True, lang=lang
127
+ image_writer, debug_mode=True, lang=ds._lang
128
128
  )
129
129
  else:
130
130
  infer_result = ds.apply(
131
131
  doc_analyze,
132
132
  ocr=True,
133
- lang=lang,
133
+ lang=ds._lang,
134
134
  layout_model=layout_model,
135
135
  formula_enable=formula_enable,
136
136
  table_enable=table_enable,
137
137
  )
138
138
  pipe_result = infer_result.pipe_ocr_mode(
139
- image_writer, debug_mode=True, lang=lang
139
+ image_writer, debug_mode=True, lang=ds._lang
140
140
  )
141
141
 
142
142
  elif parse_method == 'txt':
143
143
  infer_result = ds.apply(
144
144
  doc_analyze,
145
145
  ocr=False,
146
- lang=lang,
146
+ lang=ds._lang,
147
147
  layout_model=layout_model,
148
148
  formula_enable=formula_enable,
149
149
  table_enable=table_enable,
150
150
  )
151
151
  pipe_result = infer_result.pipe_txt_mode(
152
- image_writer, debug_mode=True, lang=lang
152
+ image_writer, debug_mode=True, lang=ds._lang
153
153
  )
154
154
  elif parse_method == 'ocr':
155
155
  infer_result = ds.apply(
156
156
  doc_analyze,
157
157
  ocr=True,
158
- lang=lang,
158
+ lang=ds._lang,
159
159
  layout_model=layout_model,
160
160
  formula_enable=formula_enable,
161
161
  table_enable=table_enable,
162
162
  )
163
163
  pipe_result = infer_result.pipe_ocr_mode(
164
- image_writer, debug_mode=True, lang=lang
164
+ image_writer, debug_mode=True, lang=ds._lang
165
165
  )
166
166
  else:
167
167
  logger.error('unknown parse method')
@@ -170,19 +170,26 @@ def do_parse(
170
170
  logger.error('need model list input')
171
171
  exit(2)
172
172
  else:
173
+
173
174
  infer_result = InferenceResult(model_list, ds)
174
175
  if parse_method == 'ocr':
175
176
  pipe_result = infer_result.pipe_ocr_mode(
176
- image_writer, debug_mode=True, lang=lang
177
+ image_writer, debug_mode=True, lang=ds._lang
177
178
  )
178
179
  elif parse_method == 'txt':
179
180
  pipe_result = infer_result.pipe_txt_mode(
180
- image_writer, debug_mode=True, lang=lang
181
+ image_writer, debug_mode=True, lang=ds._lang
181
182
  )
182
183
  else:
183
- pipe_result = infer_result.pipe_auto_mode(
184
- image_writer, debug_mode=True, lang=lang
185
- )
184
+ if ds.classify() == SupportedPdfParseMethod.TXT:
185
+ pipe_result = infer_result.pipe_txt_mode(
186
+ image_writer, debug_mode=True, lang=ds._lang
187
+ )
188
+ else:
189
+ pipe_result = infer_result.pipe_ocr_mode(
190
+ image_writer, debug_mode=True, lang=ds._lang
191
+ )
192
+
186
193
 
187
194
  if f_draw_model_bbox:
188
195
  infer_result.draw_model(
@@ -201,6 +208,9 @@ def do_parse(
201
208
  os.path.join(local_md_dir, f'{pdf_file_name}_line_sort.pdf')
202
209
  )
203
210
 
211
+ if f_draw_char_bbox:
212
+ draw_char_bbox(pdf_bytes, local_md_dir, f'{pdf_file_name}_char_bbox.pdf')
213
+
204
214
  if f_dump_md:
205
215
  pipe_result.dump_md(
206
216
  md_writer,
@@ -0,0 +1,29 @@
1
+ import os
2
+ import subprocess
3
+ from pathlib import Path
4
+
5
+
6
+ class ConvertToPdfError(Exception):
7
+ def __init__(self, msg):
8
+ self.msg = msg
9
+ super().__init__(self.msg)
10
+
11
+
12
+ def convert_file_to_pdf(input_path, output_dir):
13
+ if not os.path.isfile(input_path):
14
+ raise FileNotFoundError(f"The input file {input_path} does not exist.")
15
+
16
+ os.makedirs(output_dir, exist_ok=True)
17
+
18
+ cmd = [
19
+ 'soffice',
20
+ '--headless',
21
+ '--convert-to', 'pdf',
22
+ '--outdir', str(output_dir),
23
+ str(input_path)
24
+ ]
25
+
26
+ process = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
27
+
28
+ if process.returncode != 0:
29
+ raise ConvertToPdfError(process.stderr.decode())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: magic-pdf
3
- Version: 0.10.6
3
+ Version: 1.0.1
4
4
  Summary: A practical tool for converting PDF to Markdown
5
5
  Home-page: https://github.com/opendatalab/MinerU
6
6
  Requires-Python: >=3.9
@@ -9,17 +9,17 @@ License-File: LICENSE.md
9
9
  Requires-Dist: boto3>=1.28.43
10
10
  Requires-Dist: Brotli>=1.1.0
11
11
  Requires-Dist: click>=8.1.7
12
- Requires-Dist: fast-langdetect==0.2.0
12
+ Requires-Dist: fast-langdetect>=0.2.3
13
13
  Requires-Dist: loguru>=0.6.0
14
14
  Requires-Dist: numpy<2.0.0,>=1.21.6
15
- Requires-Dist: pydantic<2.8.0,>=2.7.2
15
+ Requires-Dist: pydantic>=2.7.2
16
16
  Requires-Dist: PyMuPDF>=1.24.9
17
17
  Requires-Dist: scikit-learn>=1.0.2
18
18
  Requires-Dist: torch>=2.2.2
19
19
  Requires-Dist: transformers
20
20
  Requires-Dist: pdfminer.six==20231228
21
21
  Provides-Extra: full
22
- Requires-Dist: unimernet==0.2.2; extra == "full"
22
+ Requires-Dist: unimernet==0.2.3; extra == "full"
23
23
  Requires-Dist: torch<=2.3.1,>=2.2.2; extra == "full"
24
24
  Requires-Dist: torchvision<=0.18.1,>=0.17.2; extra == "full"
25
25
  Requires-Dist: ultralytics>=8.3.48; extra == "full"
@@ -29,8 +29,10 @@ Requires-Dist: einops; extra == "full"
29
29
  Requires-Dist: accelerate; extra == "full"
30
30
  Requires-Dist: doclayout-yolo==0.0.2; extra == "full"
31
31
  Requires-Dist: rapidocr-paddle; extra == "full"
32
- Requires-Dist: rapid-table; extra == "full"
32
+ Requires-Dist: rapidocr-onnxruntime; extra == "full"
33
+ Requires-Dist: rapid-table==0.3.0; extra == "full"
33
34
  Requires-Dist: PyYAML; extra == "full"
35
+ Requires-Dist: openai; extra == "full"
34
36
  Requires-Dist: detectron2; extra == "full"
35
37
  Requires-Dist: paddlepaddle==3.0.0b1; platform_system == "Linux" and extra == "full"
36
38
  Requires-Dist: matplotlib; (platform_system == "Linux" or platform_system == "Darwin") and extra == "full"
@@ -59,7 +61,7 @@ Requires-Dist: albumentations<=1.4.20; extra == "old-linux"
59
61
  [![Downloads](https://static.pepy.tech/badge/magic-pdf)](https://pepy.tech/project/magic-pdf)
60
62
  [![Downloads](https://static.pepy.tech/badge/magic-pdf/month)](https://pepy.tech/project/magic-pdf)
61
63
 
62
- [![OpenDataLab](https://img.shields.io/badge/Demo_on_OpenDataLab-blue?logo=&labelColor=white)](https://opendatalab.com/OpenSourceTools/Extractor/PDF)
64
+ [![OpenDataLab](https://img.shields.io/badge/Demo_on_OpenDataLab-blue?logo=&labelColor=white)](https://mineru.org.cn/OpenSourceTools/Extractor?source=github)
63
65
  [![HuggingFace](https://img.shields.io/badge/Demo_on_HuggingFace-yellow.svg?logo=&labelColor=white)](https://huggingface.co/spaces/opendatalab/MinerU)
64
66
  [![ModelScope](https://img.shields.io/badge/Demo_on_ModelScope-purple?logo=&labelColor=white)](https://www.modelscope.cn/studios/OpenDataLab/MinerU)
65
67
  [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/gist/myhloli/3b3a00a4a0a61577b6c30f989092d20d/mineru_demo.ipynb)
@@ -76,6 +78,11 @@ Requires-Dist: albumentations<=1.4.20; extra == "old-linux"
76
78
 
77
79
  <p align="center">
78
80
  <a href="https://github.com/opendatalab/PDF-Extract-Kit">PDF-Extract-Kit: High-Quality PDF Extraction Toolkit</a>🔥🔥🔥
81
+ <br>
82
+ <br>
83
+ <a href="https://mineru.org.cn/client?source=github">
84
+ Easier to use: Just grab MinerU Desktop. No coding, no login, just a simple interface and smooth interactions. Enjoy it without any fuss!</a>🚀🚀🚀
85
+
79
86
  </p>
80
87
 
81
88
  <!-- join us -->
@@ -87,6 +94,15 @@ Requires-Dist: albumentations<=1.4.20; extra == "old-linux"
87
94
  </div>
88
95
 
89
96
  # Changelog
97
+ - 2025/01/10 1.0.0 released. This is our first official release, where we have introduced a completely new API interface and enhanced compatibility through extensive refactoring, as well as a brand new automatic language identification feature:
98
+ - New API Interface
99
+ - For the data-side API, we have introduced the Dataset class, designed to provide a robust and flexible data processing framework. This framework currently supports a variety of document formats, including images (.jpg and .png), PDFs, Word documents (.doc and .docx), and PowerPoint presentations (.ppt and .pptx). It ensures effective support for data processing tasks ranging from simple to complex.
100
+ - For the user-side API, we have meticulously designed the MinerU processing workflow as a series of composable Stages. Each Stage represents a specific processing step, allowing users to define new Stages according to their needs and creatively combine these stages to customize their data processing workflows.
101
+ - Enhanced Compatibility
102
+ - By optimizing the dependency environment and configuration items, we ensure stable and efficient operation on ARM architecture Linux systems.
103
+ - We have deeply integrated with Huawei Ascend NPU acceleration, providing autonomous and controllable high-performance computing capabilities. This supports the localization and development of AI application platforms in China. [Ascend NPU Acceleration](docs/README_Ascend_NPU_Acceleration_zh_CN.md)
104
+ - Automatic Language Identification
105
+ - By introducing a new language recognition model, setting the `lang` configuration to `auto` during document parsing will automatically select the appropriate OCR language model, improving the accuracy of scanned document parsing.
90
106
  - 2024/11/22 0.10.0 released. Introducing hybrid OCR text extraction capabilities,
91
107
  - Significantly improved parsing performance in complex text distribution scenarios such as dense formulas, irregular span regions, and text represented by images.
92
108
  - Combines the dual advantages of accurate content extraction and faster speed in text mode, and more precise span/line region recognition in OCR mode.
@@ -126,6 +142,7 @@ Requires-Dist: albumentations<=1.4.20; extra == "old-linux"
126
142
  <li><a href="#online-demo">Online Demo</a></li>
127
143
  <li><a href="#quick-cpu-demo">Quick CPU Demo</a></li>
128
144
  <li><a href="#using-gpu">Using GPU</a></li>
145
+ <li><a href="#using-npu">Using NPU</a></li>
129
146
  </ul>
130
147
  </li>
131
148
  <li><a href="#usage">Usage</a>
@@ -174,7 +191,7 @@ https://github.com/user-attachments/assets/4bea02c9-6d54-4cd6-97ed-dff14340982c
174
191
  - OCR supports detection and recognition of 84 languages.
175
192
  - Supports multiple output formats, such as multimodal and NLP Markdown, JSON sorted by reading order, and rich intermediate formats.
176
193
  - Supports various visualization results, including layout visualization and span visualization, for efficient confirmation of output quality.
177
- - Supports both CPU and GPU environments.
194
+ - Supports running in a pure CPU environment, and also supports GPU(CUDA)/NPU(CANN)/MPS acceleration
178
195
  - Compatible with Windows, Linux, and Mac platforms.
179
196
 
180
197
  ## Quick Start
@@ -185,7 +202,10 @@ There are three different ways to experience MinerU:
185
202
 
186
203
  - [Online Demo (No Installation Required)](#online-demo)
187
204
  - [Quick CPU Demo (Windows, Linux, Mac)](#quick-cpu-demo)
188
- - [Linux/Windows + CUDA](#Using-GPU)
205
+ - Accelerate inference by using CUDA/CANN/MPS
206
+ - [Linux/Windows + CUDA](#Using-GPU)
207
+ - [Linux + CANN](#using-npu)
208
+ - [MacOS + MPS](#using-mps)
189
209
 
190
210
  > [!WARNING]
191
211
  > **Pre-installation Notice—Hardware and Software Environment Support**
@@ -201,20 +221,24 @@ There are three different ways to experience MinerU:
201
221
  <td colspan="3" rowspan="2">Operating System</td>
202
222
  </tr>
203
223
  <tr>
204
- <td>Ubuntu 22.04 LTS</td>
224
+ <td>Linux after 2019</td>
205
225
  <td>Windows 10 / 11</td>
206
226
  <td>macOS 11+</td>
207
227
  </tr>
208
228
  <tr>
209
229
  <td colspan="3">CPU</td>
210
- <td>x86_64(unsupported ARM Linux)</td>
230
+ <td>x86_64 / arm64</td>
211
231
  <td>x86_64(unsupported ARM Windows)</td>
212
232
  <td>x86_64 / arm64</td>
213
233
  </tr>
214
234
  <tr>
215
- <td colspan="3">Memory</td>
235
+ <td colspan="3">Memory Requirements</td>
216
236
  <td colspan="3">16GB or more, recommended 32GB+</td>
217
237
  </tr>
238
+ <tr>
239
+ <td colspan="3">Storage Requirements</td>
240
+ <td colspan="3">20GB or more, with a preference for SSD</td>
241
+ </tr>
218
242
  <tr>
219
243
  <td colspan="3">Python Version</td>
220
244
  <td colspan="3">3.10(Please make sure to create a Python 3.10 virtual environment using conda)</td>
@@ -231,6 +255,12 @@ There are three different ways to experience MinerU:
231
255
  <td>11.8 (manual installation) + cuDNN v8.7.0 (manual installation)</td>
232
256
  <td>None</td>
233
257
  </tr>
258
+ <tr>
259
+ <td colspan="3">CANN Environment(NPU support)</td>
260
+ <td>8.0+(Ascend 910b)</td>
261
+ <td>None</td>
262
+ <td>None</td>
263
+ </tr>
234
264
  <tr>
235
265
  <td rowspan="2">GPU Hardware Support List</td>
236
266
  <td colspan="2">GPU VRAM 8GB or more</td>
@@ -243,7 +273,7 @@ There are three different ways to experience MinerU:
243
273
  ### Online Demo
244
274
 
245
275
  Stable Version (Stable version verified by QA):
246
- [![OpenDataLab](https://img.shields.io/badge/Demo_on_OpenDataLab-blue?logo=&labelColor=white)](https://opendatalab.com/OpenSourceTools/Extractor/PDF)
276
+ [![OpenDataLab](https://img.shields.io/badge/Demo_on_OpenDataLab-blue?logo=&labelColor=white)](https://mineru.org.cn/OpenSourceTools/Extractor?source=github)
247
277
 
248
278
  Test Version (Synced with dev branch updates, testing new features):
249
279
  [![HuggingFace](https://img.shields.io/badge/Demo_on_HuggingFace-yellow.svg?logo=&labelColor=white)](https://huggingface.co/spaces/opendatalab/MinerU)
@@ -256,7 +286,7 @@ Test Version (Synced with dev branch updates, testing new features):
256
286
  ```bash
257
287
  conda create -n MinerU python=3.10
258
288
  conda activate MinerU
259
- pip install -U magic-pdf[full] --extra-index-url https://wheels.myhloli.com
289
+ pip install -U "magic-pdf[full]" --extra-index-url https://wheels.myhloli.com
260
290
  ```
261
291
 
262
292
  #### 2. Download model weight files
@@ -281,7 +311,7 @@ You can modify certain configurations in this file to enable or disable features
281
311
  {
282
312
  // other config
283
313
  "layout-config": {
284
- "model": "layoutlmv3" // Please change to "doclayout_yolo" when using doclayout_yolo.
314
+ "model": "doclayout_yolo" // Please change to "layoutlmv3" when using layoutlmv3.
285
315
  },
286
316
  "formula-config": {
287
317
  "mfd_model": "yolo_v8_mfd",
@@ -290,7 +320,7 @@ You can modify certain configurations in this file to enable or disable features
290
320
  },
291
321
  "table-config": {
292
322
  "model": "rapid_table", // Default to using "rapid_table", can be switched to "tablemaster" or "struct_eqtable".
293
- "enable": false, // The table recognition feature is disabled by default. If you need to enable it, please change the value here to "true".
323
+ "enable": true, // The table recognition feature is enabled by default. If you need to disable it, please change the value here to "false".
294
324
  "max_time": 400
295
325
  }
296
326
  }
@@ -312,29 +342,49 @@ If your device supports CUDA and meets the GPU requirements of the mainline envi
312
342
  > docker run --rm --gpus=all nvidia/cuda:12.1.0-base-ubuntu22.04 nvidia-smi
313
343
  > ```
314
344
  ```bash
315
- wget https://github.com/opendatalab/MinerU/raw/master/Dockerfile
345
+ wget https://github.com/opendatalab/MinerU/raw/master/docker/global/Dockerfile -O Dockerfile
316
346
  docker build -t mineru:latest .
317
- docker run --rm -it --gpus=all mineru:latest /bin/bash
347
+ docker run --rm -it --gpus=all mineru:latest /bin/bash -c "echo 'source /opt/mineru_venv/bin/activate' >> ~/.bashrc && exec bash"
318
348
  magic-pdf --help
319
349
  ```
320
350
 
351
+ ### Using NPU
352
+
353
+ If your device has NPU acceleration hardware, you can follow the tutorial below to use NPU acceleration:
354
+
355
+ [Ascend NPU Acceleration](docs/README_Ascend_NPU_Acceleration_zh_CN.md)
356
+
357
+ ### Using MPS
358
+
359
+ If your device uses Apple silicon chips, you can enable MPS acceleration for certain supported tasks (such as layout detection and formula detection).
360
+
361
+ You can enable MPS acceleration by setting the `device-mode` parameter to `mps` in the `magic-pdf.json` configuration file.
362
+
363
+ ```json
364
+ {
365
+ // other config
366
+ "device-mode": "mps"
367
+ }
368
+ ```
369
+
370
+ > [!TIP]
371
+ > Since the formula recognition task cannot utilize MPS acceleration, you can disable the formula recognition feature in tasks where it is not needed to achieve optimal performance.
372
+ >
373
+ > You can disable the formula recognition feature by setting the `enable` parameter in the `formula-config` section to `false`.
374
+
321
375
  ## Usage
322
376
 
323
377
  ### Command Line
324
378
 
325
- [Using MinerU via Command Line](https://mineru.readthedocs.io/en/latest/user_guide/quick_start/command_line.html)
379
+ [Using MinerU via Command Line](https://mineru.readthedocs.io/en/latest/user_guide/usage/command_line.html)
326
380
 
327
381
  > [!TIP]
328
382
  > For more information about the output files, please refer to the [Output File Description](docs/output_file_en_us.md).
329
383
 
330
384
  ### API
331
385
 
332
- [Using MinerU via Python API](https://mineru.readthedocs.io/en/latest/user_guide/quick_start/to_markdown.html)
333
-
334
- For detailed implementation, refer to:
386
+ [Using MinerU via Python API](https://mineru.readthedocs.io/en/latest/user_guide/usage/api.html)
335
387
 
336
- - [demo.py Simplest Processing Method](demo/demo.py)
337
- - [magic_pdf_parse_main.py More Detailed Processing Workflow](demo/magic_pdf_parse_main.py)
338
388
 
339
389
  ### Deploy Derived Projects
340
390