mineru 2.6.2__py3-none-any.whl → 2.6.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -281,28 +281,20 @@ class BatchAnalyze:
281
281
 
282
282
  # 按分辨率分组并同时完成padding
283
283
  # RESOLUTION_GROUP_STRIDE = 32
284
- RESOLUTION_GROUP_STRIDE = 64 # 定义分辨率分组的步进值
284
+ RESOLUTION_GROUP_STRIDE = 64
285
285
 
286
286
  resolution_groups = defaultdict(list)
287
287
  for crop_info in lang_crop_list:
288
288
  cropped_img = crop_info[0]
289
289
  h, w = cropped_img.shape[:2]
290
- # 使用更大的分组容差,减少分组数量
291
- # 将尺寸标准化到32的倍数
292
- normalized_h = ((h + RESOLUTION_GROUP_STRIDE) // RESOLUTION_GROUP_STRIDE) * RESOLUTION_GROUP_STRIDE # 向上取整到32的倍数
293
- normalized_w = ((w + RESOLUTION_GROUP_STRIDE) // RESOLUTION_GROUP_STRIDE) * RESOLUTION_GROUP_STRIDE
294
- group_key = (normalized_h, normalized_w)
290
+ # 直接计算目标尺寸并用作分组键
291
+ target_h = ((h + RESOLUTION_GROUP_STRIDE - 1) // RESOLUTION_GROUP_STRIDE) * RESOLUTION_GROUP_STRIDE
292
+ target_w = ((w + RESOLUTION_GROUP_STRIDE - 1) // RESOLUTION_GROUP_STRIDE) * RESOLUTION_GROUP_STRIDE
293
+ group_key = (target_h, target_w)
295
294
  resolution_groups[group_key].append(crop_info)
296
295
 
297
296
  # 对每个分辨率组进行批处理
298
- for group_key, group_crops in tqdm(resolution_groups.items(), desc=f"OCR-det {lang}"):
299
-
300
- # 计算目标尺寸(组内最大尺寸,向上取整到32的倍数)
301
- max_h = max(crop_info[0].shape[0] for crop_info in group_crops)
302
- max_w = max(crop_info[0].shape[1] for crop_info in group_crops)
303
- target_h = ((max_h + RESOLUTION_GROUP_STRIDE - 1) // RESOLUTION_GROUP_STRIDE) * RESOLUTION_GROUP_STRIDE
304
- target_w = ((max_w + RESOLUTION_GROUP_STRIDE - 1) // RESOLUTION_GROUP_STRIDE) * RESOLUTION_GROUP_STRIDE
305
-
297
+ for (target_h, target_w), group_crops in tqdm(resolution_groups.items(), desc=f"OCR-det {lang}"):
306
298
  # 对所有图像进行padding到统一尺寸
307
299
  batch_images = []
308
300
  for crop_info in group_crops:
@@ -310,49 +302,34 @@ class BatchAnalyze:
310
302
  h, w = img.shape[:2]
311
303
  # 创建目标尺寸的白色背景
312
304
  padded_img = np.ones((target_h, target_w, 3), dtype=np.uint8) * 255
313
- # 将原图像粘贴到左上角
314
305
  padded_img[:h, :w] = img
315
306
  batch_images.append(padded_img)
316
307
 
317
308
  # 批处理检测
318
- det_batch_size = min(len(batch_images), self.batch_ratio * OCR_DET_BASE_BATCH_SIZE) # 增加批处理大小
319
- # logger.debug(f"OCR-det batch: {det_batch_size} images, target size: {target_h}x{target_w}")
309
+ det_batch_size = min(len(batch_images), self.batch_ratio * OCR_DET_BASE_BATCH_SIZE)
320
310
  batch_results = ocr_model.text_detector.batch_predict(batch_images, det_batch_size)
321
311
 
322
312
  # 处理批处理结果
323
- for i, (crop_info, (dt_boxes, elapse)) in enumerate(zip(group_crops, batch_results)):
313
+ for crop_info, (dt_boxes, _) in zip(group_crops, batch_results):
324
314
  bgr_image, useful_list, ocr_res_list_dict, res, adjusted_mfdetrec_res, _lang = crop_info
325
315
 
326
316
  if dt_boxes is not None and len(dt_boxes) > 0:
327
- # 直接应用原始OCR流程中的关键处理步骤
328
-
329
- # 1. 排序检测框
330
- if len(dt_boxes) > 0:
331
- dt_boxes_sorted = sorted_boxes(dt_boxes)
332
- else:
333
- dt_boxes_sorted = []
334
-
335
- # 2. 合并相邻检测框
336
- if dt_boxes_sorted:
337
- dt_boxes_merged = merge_det_boxes(dt_boxes_sorted)
338
- else:
339
- dt_boxes_merged = []
340
-
341
- # 3. 根据公式位置更新检测框(关键步骤!)
342
- if dt_boxes_merged and adjusted_mfdetrec_res:
343
- dt_boxes_final = update_det_boxes(dt_boxes_merged, adjusted_mfdetrec_res)
344
- else:
345
- dt_boxes_final = dt_boxes_merged
346
-
347
- # 构造OCR结果格式
348
- ocr_res = [box.tolist() if hasattr(box, 'tolist') else box for box in dt_boxes_final]
349
-
350
- if ocr_res:
317
+ # 处理检测框
318
+ dt_boxes_sorted = sorted_boxes(dt_boxes)
319
+ dt_boxes_merged = merge_det_boxes(dt_boxes_sorted) if dt_boxes_sorted else []
320
+
321
+ # 根据公式位置更新检测框
322
+ dt_boxes_final = (update_det_boxes(dt_boxes_merged, adjusted_mfdetrec_res)
323
+ if dt_boxes_merged and adjusted_mfdetrec_res
324
+ else dt_boxes_merged)
325
+
326
+ if dt_boxes_final:
327
+ ocr_res = [box.tolist() if hasattr(box, 'tolist') else box for box in dt_boxes_final]
351
328
  ocr_result_list = get_ocr_result_list(
352
329
  ocr_res, useful_list, ocr_res_list_dict['ocr_enable'], bgr_image, _lang
353
330
  )
354
-
355
331
  ocr_res_list_dict['layout_res'].extend(ocr_result_list)
332
+
356
333
  else:
357
334
  # 原始单张处理模式
358
335
  for ocr_res_list_dict in tqdm(ocr_res_list_all_page, desc="OCR-det Predict"):
@@ -8,7 +8,7 @@ from ...model.layout.doclayoutyolo import DocLayoutYOLOModel
8
8
  from ...model.mfd.yolo_v8 import YOLOv8MFDModel
9
9
  from ...model.mfr.unimernet.Unimernet import UnimernetModel
10
10
  from ...model.mfr.pp_formulanet_plus_m.predict_formula import FormulaRecognizer
11
- from ...model.ocr.paddleocr2pytorch.pytorch_paddle import PytorchPaddleOCR
11
+ from mineru.model.ocr.pytorch_paddle import PytorchPaddleOCR
12
12
  from ...model.ori_cls.paddle_ori_cls import PaddleOrientationClsModel
13
13
  from ...model.table.cls.paddle_table_cls import PaddleTableClsModel
14
14
  # from ...model.table.rec.RapidTable import RapidTableModel
@@ -148,7 +148,7 @@ def page_model_info_to_page_info(page_model_info, image_dict, page, image_writer
148
148
  fix_discarded_blocks = fix_discarded_block(discarded_block_with_spans)
149
149
 
150
150
  """如果当前页面没有有效的bbox则跳过"""
151
- if len(all_bboxes) == 0:
151
+ if len(all_bboxes) == 0 and len(fix_discarded_blocks) == 0:
152
152
  return None
153
153
 
154
154
  """对image/table/interline_equation截图"""
@@ -191,11 +191,20 @@ def merge_para_with_text(para_block):
191
191
  def make_blocks_to_content_list(para_block, img_buket_path, page_idx, page_size):
192
192
  para_type = para_block['type']
193
193
  para_content = {}
194
- if para_type in [BlockType.TEXT, BlockType.LIST, BlockType.INDEX]:
194
+ if para_type in [
195
+ BlockType.TEXT,
196
+ BlockType.LIST,
197
+ BlockType.INDEX,
198
+ ]:
195
199
  para_content = {
196
200
  'type': ContentType.TEXT,
197
201
  'text': merge_para_with_text(para_block),
198
202
  }
203
+ elif para_type == BlockType.DISCARDED:
204
+ para_content = {
205
+ 'type': para_type,
206
+ 'text': merge_para_with_text(para_block),
207
+ }
199
208
  elif para_type == BlockType.TITLE:
200
209
  para_content = {
201
210
  'type': ContentType.TEXT,
@@ -268,15 +277,19 @@ def union_make(pdf_info_dict: list,
268
277
  output_content = []
269
278
  for page_info in pdf_info_dict:
270
279
  paras_of_layout = page_info.get('para_blocks')
280
+ paras_of_discarded = page_info.get('discarded_blocks')
271
281
  page_idx = page_info.get('page_idx')
272
282
  page_size = page_info.get('page_size')
273
- if not paras_of_layout:
274
- continue
275
283
  if make_mode in [MakeMode.MM_MD, MakeMode.NLP_MD]:
284
+ if not paras_of_layout:
285
+ continue
276
286
  page_markdown = make_blocks_to_markdown(paras_of_layout, make_mode, img_buket_path)
277
287
  output_content.extend(page_markdown)
278
288
  elif make_mode == MakeMode.CONTENT_LIST:
279
- for para_block in paras_of_layout:
289
+ para_blocks = (paras_of_layout or []) + (paras_of_discarded or [])
290
+ if not para_blocks:
291
+ continue
292
+ for para_block in para_blocks:
280
293
  para_content = make_blocks_to_content_list(para_block, img_buket_path, page_idx, page_size)
281
294
  if para_content:
282
295
  output_content.append(para_content)
@@ -8,6 +8,7 @@ from .utils import enable_custom_logits_processors, set_default_gpu_memory_utili
8
8
  from .model_output_to_middle_json import result_to_middle_json
9
9
  from ...data.data_reader_writer import DataWriter
10
10
  from mineru.utils.pdf_image_tools import load_images_from_pdf
11
+ from ...utils.check_mac_env import is_mac_os_version_supported
11
12
  from ...utils.config_reader import get_device
12
13
 
13
14
  from ...utils.enum_class import ImageType
@@ -47,7 +48,7 @@ class ModelSingleton:
47
48
  for param in ["batch_size", "max_concurrency", "http_timeout"]:
48
49
  if param in kwargs:
49
50
  del kwargs[param]
50
- if backend in ['transformers', 'vllm-engine', "vllm-async-engine"] and not model_path:
51
+ if backend in ['transformers', 'vllm-engine', "vllm-async-engine", "mlx-engine"] and not model_path:
51
52
  model_path = auto_download_and_get_model_root_path("/","vlm")
52
53
  if backend == "transformers":
53
54
  try:
@@ -75,6 +76,15 @@ class ModelSingleton:
75
76
  )
76
77
  if batch_size == 0:
77
78
  batch_size = set_default_batch_size()
79
+ elif backend == "mlx-engine":
80
+ mlx_supported = is_mac_os_version_supported()
81
+ if not mlx_supported:
82
+ raise EnvironmentError("mlx-engine backend is only supported on macOS 13.5+ with Apple Silicon.")
83
+ try:
84
+ from mlx_vlm import load as mlx_load
85
+ except ImportError:
86
+ raise ImportError("Please install mlx-vlm to use the mlx-engine backend.")
87
+ model, processor = mlx_load(model_path)
78
88
  else:
79
89
  if os.getenv('OMP_NUM_THREADS') is None:
80
90
  os.environ["OMP_NUM_THREADS"] = "1"
@@ -248,13 +248,16 @@ def union_make(pdf_info_dict: list,
248
248
  paras_of_discarded = page_info.get('discarded_blocks')
249
249
  page_idx = page_info.get('page_idx')
250
250
  page_size = page_info.get('page_size')
251
- if not paras_of_layout:
252
- continue
253
251
  if make_mode in [MakeMode.MM_MD, MakeMode.NLP_MD]:
252
+ if not paras_of_layout:
253
+ continue
254
254
  page_markdown = mk_blocks_to_markdown(paras_of_layout, make_mode, formula_enable, table_enable, img_buket_path)
255
255
  output_content.extend(page_markdown)
256
256
  elif make_mode == MakeMode.CONTENT_LIST:
257
- for para_block in paras_of_layout+paras_of_discarded:
257
+ para_blocks = (paras_of_layout or []) + (paras_of_discarded or [])
258
+ if not para_blocks:
259
+ continue
260
+ for para_block in para_blocks:
258
261
  para_content = make_blocks_to_content_list(para_block, img_buket_path, page_idx, page_size)
259
262
  output_content.append(para_content)
260
263
 
mineru/cli/client.py CHANGED
@@ -4,6 +4,7 @@ import click
4
4
  from pathlib import Path
5
5
  from loguru import logger
6
6
 
7
+ from mineru.utils.check_mac_env import is_mac_os_version_supported
7
8
  from mineru.utils.cli_parser import arg_parse
8
9
  from mineru.utils.config_reader import get_device
9
10
  from mineru.utils.guess_suffix_or_lang import guess_suffix_by_path
@@ -11,6 +12,11 @@ from mineru.utils.model_utils import get_vram
11
12
  from ..version import __version__
12
13
  from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
13
14
 
15
+
16
+ backends = ['pipeline', 'vlm-transformers', 'vlm-vllm-engine', 'vlm-http-client']
17
+ if is_mac_os_version_supported():
18
+ backends.append("vlm-mlx-engine")
19
+
14
20
  @click.command(context_settings=dict(ignore_unknown_options=True, allow_extra_args=True))
15
21
  @click.pass_context
16
22
  @click.version_option(__version__,
@@ -38,25 +44,28 @@ from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
38
44
  '--method',
39
45
  'method',
40
46
  type=click.Choice(['auto', 'txt', 'ocr']),
41
- help="""the method for parsing pdf:
42
- auto: Automatically determine the method based on the file type.
43
- txt: Use text extraction method.
44
- ocr: Use OCR method for image-based PDFs.
47
+ help="""\b
48
+ the method for parsing pdf:
49
+ auto: Automatically determine the method based on the file type.
50
+ txt: Use text extraction method.
51
+ ocr: Use OCR method for image-based PDFs.
45
52
  Without method specified, 'auto' will be used by default.
46
- Adapted only for the case where the backend is set to "pipeline".""",
53
+ Adapted only for the case where the backend is set to 'pipeline'.""",
47
54
  default='auto',
48
55
  )
49
56
  @click.option(
50
57
  '-b',
51
58
  '--backend',
52
59
  'backend',
53
- type=click.Choice(['pipeline', 'vlm-transformers', 'vlm-vllm-engine', 'vlm-http-client']),
54
- help="""the backend for parsing pdf:
55
- pipeline: More general.
56
- vlm-transformers: More general.
57
- vlm-vllm-engine: Faster(engine).
58
- vlm-http-client: Faster(client).
59
- without method specified, pipeline will be used by default.""",
60
+ type=click.Choice(backends),
61
+ help="""\b
62
+ the backend for parsing pdf:
63
+ pipeline: More general.
64
+ vlm-transformers: More general, but slower.
65
+ vlm-mlx-engine: Faster than transformers.
66
+ vlm-vllm-engine: Faster(engine).
67
+ vlm-http-client: Faster(client).
68
+ Without method specified, pipeline will be used by default.""",
60
69
  default='pipeline',
61
70
  )
62
71
  @click.option(
@@ -66,7 +75,7 @@ from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
66
75
  type=click.Choice(['ch', 'ch_server', 'ch_lite', 'en', 'korean', 'japan', 'chinese_cht', 'ta', 'te', 'ka', 'th', 'el',
67
76
  'latin', 'arabic', 'east_slavic', 'cyrillic', 'devanagari']),
68
77
  help="""
69
- Input the languages in the pdf (if known) to improve OCR accuracy. Optional.
78
+ Input the languages in the pdf (if known) to improve OCR accuracy.
70
79
  Without languages specified, 'ch' will be used by default.
71
80
  Adapted only for the case where the backend is set to "pipeline".
72
81
  """,
@@ -119,7 +128,8 @@ from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
119
128
  '--device',
120
129
  'device_mode',
121
130
  type=str,
122
- help='Device mode for model inference, e.g., "cpu", "cuda", "cuda:0", "npu", "npu:0", "mps". Adapted only for the case where the backend is set to "pipeline". ',
131
+ help="""Device mode for model inference, e.g., "cpu", "cuda", "cuda:0", "npu", "npu:0", "mps".
132
+ Adapted only for the case where the backend is set to "pipeline" and "vlm-transformers". """,
123
133
  default=None,
124
134
  )
125
135
  @click.option(
mineru/cli/gradio_app.py CHANGED
@@ -13,6 +13,7 @@ from gradio_pdf import PDF
13
13
  from loguru import logger
14
14
 
15
15
  from mineru.cli.common import prepare_env, read_fn, aio_do_parse, pdf_suffixes, image_suffixes
16
+ from mineru.utils.check_mac_env import is_mac_os_version_supported
16
17
  from mineru.utils.cli_parser import arg_parse
17
18
  from mineru.utils.hash_utils import str_sha256
18
19
 
@@ -273,7 +274,7 @@ def to_pdf(file_path):
273
274
 
274
275
  # 更新界面函数
275
276
  def update_interface(backend_choice):
276
- if backend_choice in ["vlm-transformers", "vlm-vllm-async-engine"]:
277
+ if backend_choice in ["vlm-transformers", "vlm-vllm-async-engine", "vlm-mlx-engine"]:
277
278
  return gr.update(visible=False), gr.update(visible=False)
278
279
  elif backend_choice in ["vlm-http-client"]:
279
280
  return gr.update(visible=True), gr.update(visible=False)
@@ -381,6 +382,8 @@ def main(ctx,
381
382
  preferred_option = "vlm-vllm-async-engine"
382
383
  else:
383
384
  drop_list = ["pipeline", "vlm-transformers", "vlm-http-client"]
385
+ if is_mac_os_version_supported():
386
+ drop_list.append("vlm-mlx-engine")
384
387
  preferred_option = "pipeline"
385
388
  backend = gr.Dropdown(drop_list, label="Backend", value=preferred_option)
386
389
  with gr.Row(visible=False) as client_options:
@@ -21,7 +21,7 @@ def download_and_modify_json(url, local_filename, modifications):
21
21
  if os.path.exists(local_filename):
22
22
  data = json.load(open(local_filename))
23
23
  config_version = data.get('config_version', '0.0.0')
24
- if config_version < '1.3.0':
24
+ if config_version < '1.3.1':
25
25
  data = download_json(url)
26
26
  else:
27
27
  data = download_json(url)
@@ -134,7 +134,7 @@ def get_model_params(lang, config):
134
134
  raise Exception (f'Language {lang} not supported')
135
135
 
136
136
 
137
- root_dir = os.path.join(Path(__file__).resolve().parent.parent.parent, 'utils')
137
+ root_dir = os.path.join(Path(__file__).resolve().parent.parent, 'utils')
138
138
 
139
139
 
140
140
  class PytorchPaddleOCR(TextSystem):
@@ -11,7 +11,7 @@ from rapid_table import ModelType, RapidTable, RapidTableInput
11
11
  from rapid_table.utils import RapidTableOutput
12
12
  from tqdm import tqdm
13
13
 
14
- from mineru.model.ocr.paddleocr2pytorch.pytorch_paddle import PytorchPaddleOCR
14
+ from mineru.model.ocr.pytorch_paddle import PytorchPaddleOCR
15
15
  from mineru.utils.enum_class import ModelPath
16
16
  from mineru.utils.models_download_utils import auto_download_and_get_model_root_path
17
17
 
@@ -179,13 +179,14 @@ def insert_lines_into_block(block_bbox, line_height, page_w, page_h):
179
179
  def model_init(model_name: str):
180
180
  from transformers import LayoutLMv3ForTokenClassification
181
181
  device_name = get_device()
182
+ device = torch.device(device_name)
182
183
  bf_16_support = False
183
184
  if device_name.startswith("cuda"):
184
- bf_16_support = torch.cuda.is_bf16_supported()
185
+ if torch.cuda.get_device_properties(device).major >= 8:
186
+ bf_16_support = True
185
187
  elif device_name.startswith("mps"):
186
188
  bf_16_support = True
187
189
 
188
- device = torch.device(device_name)
189
190
  if model_name == 'layoutreader':
190
191
  # 检测modelscope的缓存目录是否存在
191
192
  layoutreader_model_dir = os.path.join(auto_download_and_get_model_root_path(ModelPath.layout_reader), ModelPath.layout_reader)
@@ -0,0 +1,30 @@
1
+ # Copyright (c) Opendatalab. All rights reserved.
2
+ import platform
3
+
4
+ from packaging import version
5
+
6
+
7
+ # Detect if the current environment is a Mac computer
8
+ def is_mac_environment() -> bool:
9
+ return platform.system() == "Darwin"
10
+
11
+
12
+ # Detect if CPU is Apple Silicon architecture
13
+ def is_apple_silicon_cpu() -> bool:
14
+ return platform.machine() in ["arm64", "aarch64"]
15
+
16
+
17
+ # If Mac computer with Apple Silicon architecture, check if macOS version is 13.5 or above
18
+ def is_mac_os_version_supported(min_version: str = "13.5") -> bool:
19
+ if not is_mac_environment() or not is_apple_silicon_cpu():
20
+ return False
21
+ mac_version = platform.mac_ver()[0]
22
+ if not mac_version:
23
+ return False
24
+ # print("Mac OS Version:", mac_version)
25
+ return version.parse(mac_version) >= version.parse(min_version)
26
+
27
+ if __name__ == "__main__":
28
+ print("Is Mac Environment:", is_mac_environment())
29
+ print("Is Apple Silicon CPU:", is_apple_silicon_cpu())
30
+ print("Is Mac OS Version Supported (>=13.5):", is_mac_os_version_supported())
mineru/utils/llm_aided.py CHANGED
@@ -84,16 +84,21 @@ Corrected title list:
84
84
  max_retries = 3
85
85
  dict_completion = None
86
86
 
87
+ # Build API call parameters
88
+ api_params = {
89
+ "model": title_aided_config["model"],
90
+ "messages": [{'role': 'user', 'content': title_optimize_prompt}],
91
+ "temperature": 0.7,
92
+ "stream": True,
93
+ }
94
+
95
+ # Only add extra_body when explicitly specified in config
96
+ if "enable_thinking" in title_aided_config:
97
+ api_params["extra_body"] = {"enable_thinking": title_aided_config["enable_thinking"]}
98
+
87
99
  while retry_count < max_retries:
88
100
  try:
89
- completion = client.chat.completions.create(
90
- model=title_aided_config["model"],
91
- messages=[
92
- {'role': 'user', 'content': title_optimize_prompt}],
93
- extra_body={"enable_thinking": False},
94
- temperature=0.7,
95
- stream=True,
96
- )
101
+ completion = client.chat.completions.create(**api_params)
97
102
  content_pieces = []
98
103
  for chunk in completion:
99
104
  if chunk.choices and chunk.choices[0].delta.content is not None:
mineru/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "2.6.2"
1
+ __version__ = "2.6.3"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mineru
3
- Version: 2.6.2
3
+ Version: 2.6.3
4
4
  Summary: A practical tool for converting PDF to Markdown
5
5
  License: AGPL-3.0
6
6
  Project-URL: homepage, https://mineru.net/
@@ -37,7 +37,7 @@ Requires-Dist: scikit-image<1.0.0,>=0.25.0
37
37
  Requires-Dist: openai<3,>=1.70.0
38
38
  Requires-Dist: beautifulsoup4<5,>=4.13.5
39
39
  Requires-Dist: magika<0.7.0,>=0.6.2
40
- Requires-Dist: mineru-vl-utils<1,>=0.1.14
40
+ Requires-Dist: mineru-vl-utils<1,>=0.1.15
41
41
  Provides-Extra: test
42
42
  Requires-Dist: mineru[core]; extra == "test"
43
43
  Requires-Dist: pytest; extra == "test"
@@ -50,6 +50,8 @@ Requires-Dist: transformers<5.0.0,>=4.51.1; extra == "vlm"
50
50
  Requires-Dist: accelerate>=1.5.1; extra == "vlm"
51
51
  Provides-Extra: vllm
52
52
  Requires-Dist: vllm<0.12,>=0.10.1.1; extra == "vllm"
53
+ Provides-Extra: mlx
54
+ Requires-Dist: mlx-vlm<0.4,>=0.3.3; extra == "mlx"
53
55
  Provides-Extra: pipeline
54
56
  Requires-Dist: matplotlib<4,>=3.10; extra == "pipeline"
55
57
  Requires-Dist: ultralytics<9,>=8.3.48; extra == "pipeline"
@@ -76,6 +78,7 @@ Requires-Dist: mineru[vlm]; extra == "core"
76
78
  Requires-Dist: mineru[pipeline]; extra == "core"
77
79
  Requires-Dist: mineru[api]; extra == "core"
78
80
  Requires-Dist: mineru[gradio]; extra == "core"
81
+ Requires-Dist: mineru[mlx]; sys_platform == "darwin" and extra == "core"
79
82
  Provides-Extra: all
80
83
  Requires-Dist: mineru[core]; extra == "all"
81
84
  Requires-Dist: mineru[vllm]; extra == "all"
@@ -127,6 +130,10 @@ Dynamic: license-file
127
130
  </div>
128
131
 
129
132
  # Changelog
133
+ - 2025/10/31 2.6.3 Release
134
+ - Added support for a new backend `vlm-mlx-engine`, enabling MLX-accelerated inference for the MinerU2.5 model on Apple Silicon devices. Compared to the `vlm-transformers` backend, `vlm-mlx-engine` delivers a 100%–200% speed improvement.
135
+ - Bug fixes: #3849, #3859
136
+
130
137
  - 2025/10/24 2.6.2 Release
131
138
  - `pipeline` backend optimizations
132
139
  - Added experimental support for Chinese formulas, which can be enabled by setting the environment variable `export MINERU_FORMULA_CH_SUPPORT=1`. This feature may cause a slight decrease in MFR speed and failures in recognizing some long formulas. It is recommended to enable it only when parsing Chinese formulas is needed. To disable this feature, set the environment variable to `0`.
@@ -666,7 +673,7 @@ https://github.com/user-attachments/assets/4bea02c9-6d54-4cd6-97ed-dff14340982c
666
673
  - Automatically recognize and convert formulas in the document to LaTeX format.
667
674
  - Automatically recognize and convert tables in the document to HTML format.
668
675
  - Automatically detect scanned PDFs and garbled PDFs and enable OCR functionality.
669
- - OCR supports detection and recognition of 84 languages.
676
+ - OCR supports detection and recognition of 109 languages.
670
677
  - Supports multiple output formats, such as multimodal and NLP Markdown, JSON sorted by reading order, and rich intermediate formats.
671
678
  - Supports various visualization results, including layout visualization and span visualization, for efficient confirmation of output quality.
672
679
  - Supports running in a pure CPU environment, and also supports GPU(CUDA)/NPU(CANN)/MPS acceleration
@@ -703,41 +710,70 @@ A WebUI developed based on Gradio, with a simple interface and only core parsing
703
710
  > In non-mainline environments, due to the diversity of hardware and software configurations, as well as third-party dependency compatibility issues, we cannot guarantee 100% project availability. Therefore, for users who wish to use this project in non-recommended environments, we suggest carefully reading the documentation and FAQ first. Most issues already have corresponding solutions in the FAQ. We also encourage community feedback to help us gradually expand support.
704
711
 
705
712
  <table>
706
- <tr>
707
- <td>Parsing Backend</td>
708
- <td>pipeline</td>
709
- <td>vlm-transformers</td>
710
- <td>vlm-vllm</td>
711
- </tr>
712
- <tr>
713
- <td>Operating System</td>
714
- <td>Linux / Windows / macOS</td>
715
- <td>Linux / Windows</td>
716
- <td>Linux / Windows (via WSL2)</td>
717
- </tr>
718
- <tr>
719
- <td>CPU Inference Support</td>
720
- <td>✅</td>
721
- <td colspan="2">❌</td>
722
- </tr>
723
- <tr>
724
- <td>GPU Requirements</td>
725
- <td>Turing architecture and later, 6GB+ VRAM or Apple Silicon</td>
726
- <td colspan="2">Turing architecture and later, 8GB+ VRAM</td>
727
- </tr>
728
- <tr>
729
- <td>Memory Requirements</td>
730
- <td colspan="3">Minimum 16GB+, recommended 32GB+</td>
731
- </tr>
732
- <tr>
733
- <td>Disk Space Requirements</td>
734
- <td colspan="3">20GB+, SSD recommended</td>
735
- </tr>
736
- <tr>
737
- <td>Python Version</td>
738
- <td colspan="3">3.10-3.13</td>
739
- </tr>
713
+ <thead>
714
+ <tr>
715
+ <th rowspan="2">Parsing Backend</th>
716
+ <th rowspan="2">pipeline <br> (Accuracy<sup>1</sup> 82+)</th>
717
+ <th colspan="4">vlm (Accuracy<sup>1</sup> 90+)</th>
718
+ </tr>
719
+ <tr>
720
+ <th>transformers</th>
721
+ <th>mlx-engine</th>
722
+ <th>vllm-engine / <br>vllm-async-engine</th>
723
+ <th>http-client</th>
724
+ </tr>
725
+ </thead>
726
+ <tbody>
727
+ <tr>
728
+ <th>Backend Features</th>
729
+ <td>Fast, no hallucinations</td>
730
+ <td>Good compatibility, <br>but slower</td>
731
+ <td>Faster than transformers</td>
732
+ <td>Fast, compatible with the vLLM ecosystem</td>
733
+ <td>Suitable for OpenAI-compatible servers<sup>5</sup></td>
734
+ </tr>
735
+ <tr>
736
+ <th>Operating System</th>
737
+ <td colspan="2" style="text-align:center;">Linux<sup>2</sup> / Windows / macOS</td>
738
+ <td style="text-align:center;">macOS<sup>3</sup></td>
739
+ <td style="text-align:center;">Linux<sup>2</sup> / Windows<sup>4</sup> </td>
740
+ <td>Any</td>
741
+ </tr>
742
+ <tr>
743
+ <th>CPU inference support</th>
744
+ <td colspan="2" style="text-align:center;">✅</td>
745
+ <td colspan="2" style="text-align:center;">❌</td>
746
+ <td>Not required</td>
747
+ </tr>
748
+ <tr>
749
+ <th>GPU Requirements</th><td colspan="2" style="text-align:center;">Volta or later architectures, 6 GB VRAM or more, or Apple Silicon</td>
750
+ <td>Apple Silicon</td>
751
+ <td>Volta or later architectures, 8 GB VRAM or more</td>
752
+ <td>Not required</td>
753
+ </tr>
754
+ <tr>
755
+ <th>Memory Requirements</th>
756
+ <td colspan="4" style="text-align:center;">Minimum 16 GB, 32 GB recommended</td>
757
+ <td>8 GB</td>
758
+ </tr>
759
+ <tr>
760
+ <th>Disk Space Requirements</th>
761
+ <td colspan="4" style="text-align:center;">20 GB or more, SSD recommended</td>
762
+ <td>2 GB</td>
763
+ </tr>
764
+ <tr>
765
+ <th>Python Version</th>
766
+ <td colspan="5" style="text-align:center;">3.10-3.13</td>
767
+ </tr>
768
+ </tbody>
740
769
  </table>
770
+
771
+ <sup>1</sup> Accuracy metric is the End-to-End Evaluation Overall score of OmniDocBench (v1.5), tested on the latest `MinerU` version.
772
+ <sup>2</sup> Linux supports only distributions released in 2019 or later.
773
+ <sup>3</sup> MLX requires macOS 13.5 or later, recommended for use with version 14.0 or higher.
774
+ <sup>4</sup> Windows vLLM support via WSL2(Windows Subsystem for Linux).
775
+ <sup>5</sup> Servers compatible with the OpenAI API, such as local or remote model services deployed via inference frameworks like `vLLM`, `SGLang`, or `LMDeploy`.
776
+
741
777
 
742
778
  ### Install MinerU
743
779
 
@@ -1,28 +1,28 @@
1
1
  mineru/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
2
- mineru/version.py,sha256=53Sii4w6BIWn-1RhaTyqUO46gDe4nDCRQDAcpsWFH24,22
2
+ mineru/version.py,sha256=uJ6TLK18jhCrL0aclBja7NzlAGLAyZjVpX-gq3d461k,22
3
3
  mineru/backend/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
4
4
  mineru/backend/utils.py,sha256=GLJU3IznDmhE1_qNmkU1UOtsuskIHBezgsEVO6Uar-Y,698
5
5
  mineru/backend/pipeline/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
6
- mineru/backend/pipeline/batch_analyze.py,sha256=dOnktvOMjfg84w1H34YlJg6N9_x6Yfvf14NIpOQcZqQ,22221
7
- mineru/backend/pipeline/model_init.py,sha256=OfB2MMjNmZcHl4fkqS1fT5R8I3LVoSKAHGtl8PcBfBs,9372
8
- mineru/backend/pipeline/model_json_to_middle_json.py,sha256=DtB7kE_7CtxwOMcb6QYeKzY6vMwUJNpavc5fn9z9oiI,10916
6
+ mineru/backend/pipeline/batch_analyze.py,sha256=gnilKhFlMe8-55X2PJnb-ZSVeZIS-5DxIbMpHnwLne8,20889
7
+ mineru/backend/pipeline/model_init.py,sha256=OAylOcQD9gu5TBcX7nMt7X5NpJMtQICI5IvEQ648lpI,9358
8
+ mineru/backend/pipeline/model_json_to_middle_json.py,sha256=reXkUR_wKmJD64d7vRNXMxFviwkzDlGjRshpdwsVquI,10951
9
9
  mineru/backend/pipeline/model_list.py,sha256=7cXMBfZrP0K6qWueg1D_-WoUANeSINzkn_ic9E7YQLs,222
10
10
  mineru/backend/pipeline/para_split.py,sha256=Kq95MmvkPm7rKxlCSGiTvVKyF7CErHI2eGGAs5sLl0Q,17119
11
11
  mineru/backend/pipeline/pipeline_analyze.py,sha256=rbO5AetOdnxR5ctkoDzFCFoElkz7Jgb7gi2Ct596NK8,6655
12
12
  mineru/backend/pipeline/pipeline_magic_model.py,sha256=w8jGx8f6yZN0Wf2yPP3L9rYKc9rogxreZCrUJzJvPO8,14974
13
- mineru/backend/pipeline/pipeline_middle_json_mkcontent.py,sha256=xWWOFmYL6hB8PLrxQFyRJ72dAmTIDHtqiWV-WFUfR44,14081
13
+ mineru/backend/pipeline/pipeline_middle_json_mkcontent.py,sha256=YlnEbbUnkniZXS13aLo5mjfFQvQM5SrIVvTAGBZsLmw,14478
14
14
  mineru/backend/vlm/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
15
15
  mineru/backend/vlm/model_output_to_middle_json.py,sha256=AqYX44gS9crUO_t7SuUatD71EVjow6pI6yA2Ik3gQ0s,5139
16
16
  mineru/backend/vlm/utils.py,sha256=woGqyRI4S7p69daLCU07XNXWTV27aLf7YBjjVH1x-5o,2794
17
- mineru/backend/vlm/vlm_analyze.py,sha256=nzwTGndwZFfTEvHppakyDKZxph7SYOuUZW3johY5F8c,8154
17
+ mineru/backend/vlm/vlm_analyze.py,sha256=7c5_JN1F9YTDNNgA_Rmw6xX1PI7gcIT4A4ujtGQHH9Q,8792
18
18
  mineru/backend/vlm/vlm_magic_model.py,sha256=Pd0sOr7G1crAJIVeq6h_03gNSuxmV5U8dvGTGT_rrjs,23452
19
- mineru/backend/vlm/vlm_middle_json_mkcontent.py,sha256=Ie95XpwTgi7EmidcwE_scvXMRQjE2xASU_Rm_F8EP-I,13377
19
+ mineru/backend/vlm/vlm_middle_json_mkcontent.py,sha256=5V-AU9KkxxMn0DDSQBrb15I4GVpEyiQy8uNI_tQhS6M,13498
20
20
  mineru/cli/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
21
- mineru/cli/client.py,sha256=uo7db9Wqj1Mc11MYuaM-bi54BfKKU3SFB9Urc8md5X4,6641
21
+ mineru/cli/client.py,sha256=ArnoT2psOQRnTqLpsFwPaoi-l444iIVkbBn90Pm16n8,6915
22
22
  mineru/cli/common.py,sha256=jxFJMdc-02UMO3SXAtcZ6aIdPrakAE6DCccZ9kDlPKc,14276
23
23
  mineru/cli/fast_api.py,sha256=t5bda769VbM5iokAboiJfPIOnm-r5GTFReE-KQy8L3g,10941
24
- mineru/cli/gradio_app.py,sha256=8rMdW7grwBUn0MdXyG4eOTQUzKWq6nErtMWl-vGdWbU,14525
25
- mineru/cli/models_download.py,sha256=7KA-Boe-eIt3WW6eyaxM1HfubTXLsQ8sMmT1H1X7vAc,4815
24
+ mineru/cli/gradio_app.py,sha256=6dA0ARpdOoewFeXmHrleF1amCgBV9ilY_nkWAmAmN8A,14731
25
+ mineru/cli/models_download.py,sha256=LNfoIpUlJM7m7qb2SiCxtjMDw4jILBQtZwNP2JoY81U,4815
26
26
  mineru/cli/vlm_vllm_server.py,sha256=fQJyD-gIPQ41hR_6aIaDJczl66N310t0CiZEBAfX5mc,90
27
27
  mineru/data/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
28
28
  mineru/data/data_reader_writer/__init__.py,sha256=9qnGNrsuGBMwwfsQy6oChdkz--a_LPdYWE0VZZr0yr4,490
@@ -62,8 +62,7 @@ mineru/model/mfr/unimernet/unimernet_hf/unimer_swin/configuration_unimer_swin.py
62
62
  mineru/model/mfr/unimernet/unimernet_hf/unimer_swin/image_processing_unimer_swin.py,sha256=a9kCvwzJJSRrKQNtW2oOpTwrapzep8BjGFWLhLF1T0k,6036
63
63
  mineru/model/mfr/unimernet/unimernet_hf/unimer_swin/modeling_unimer_swin.py,sha256=Q_fdmFHUBtEoAfWp9aowdwTCE2MIFMOPbYjoSyXK2iU,48929
64
64
  mineru/model/ocr/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
65
- mineru/model/ocr/paddleocr2pytorch/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
66
- mineru/model/ocr/paddleocr2pytorch/pytorch_paddle.py,sha256=wZOw82q1NARNHBW2Lk5zumjdAqzPZqnhV6rvMULvLs8,9207
65
+ mineru/model/ocr/pytorch_paddle.py,sha256=cHMTl5sKyn4BY2207-7GQ4eZl9BQUcs5ucxw_NFezII,9200
67
66
  mineru/model/ori_cls/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
68
67
  mineru/model/ori_cls/paddle_ori_cls.py,sha256=VIS22IerHST7g60AC9r2PEQIG6NQWeQaH1OrXIxNTsg,11943
69
68
  mineru/model/reading_order/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
@@ -72,7 +71,7 @@ mineru/model/reading_order/xycut.py,sha256=ezNSq_Y4UXiztB58hbXJsjTJlOBqWIjuW5A2u
72
71
  mineru/model/table/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
73
72
  mineru/model/table/cls/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
74
73
  mineru/model/table/cls/paddle_table_cls.py,sha256=5PtieKQnAzgMNRTZFgnqQsGWKTEQ3yyFWQnBRIjfQ4A,5781
75
- mineru/model/table/rec/RapidTable.py,sha256=FxO3dLNKfQrgcQU7gRI0kLAxllnoHWZptCtyyHNuMpM,5973
74
+ mineru/model/table/rec/RapidTable.py,sha256=2dNdGJsVdsGfRm6r3deERUMst5RIxH0YuiGALkQbNTw,5955
76
75
  mineru/model/table/rec/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
77
76
  mineru/model/table/rec/slanet_plus/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
78
77
  mineru/model/table/rec/slanet_plus/main.py,sha256=vfrcvQ9JBf32YZU9eNoetoqdpcrFNsA1WNqQBsG8i2o,7646
@@ -152,8 +151,9 @@ mineru/resources/header.html,sha256=PUselBXLBn8gfeP3zwEtj6zIxfhcCN4vN_B796nQFNQ,
152
151
  mineru/resources/fasttext-langdetect/lid.176.ftz,sha256=jzRyz-hzintgmejpmcPL-uDc0VaWqsfXc4qAOdtgPoM,938013
153
152
  mineru/utils/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
154
153
  mineru/utils/block_pre_proc.py,sha256=uGBmxf2MR9bplTnQI8xHjCI-kj3plRhJr0hcWKidbOQ,9632
155
- mineru/utils/block_sort.py,sha256=mViceDw3O2ksBDFxt-wmX67bCZOwKyp68yZnEjS3Ijc,12934
154
+ mineru/utils/block_sort.py,sha256=5e1mOLB3W7xu5Y1hmhvGSHPL_aQ41R_4VXcP4vjYAOU,12976
156
155
  mineru/utils/boxbase.py,sha256=moP660AmZq_udHEsfvFkTQdJ4gjrrBwN7t0Enx7CIL8,6903
156
+ mineru/utils/check_mac_env.py,sha256=pbmbcnS60zZRqNRBxFJbbPrCosU1lC464b0v6JUlgaE,1031
157
157
  mineru/utils/cli_parser.py,sha256=4seFAu1kulsYnw6WM2q_cxgEOt2tErZVkI-LNEF_kGw,1445
158
158
  mineru/utils/config_reader.py,sha256=IRVWTpBnbnRpck6eXZUKw-fcLt7hon5S4uqWW-RBb1w,4075
159
159
  mineru/utils/cut_image.py,sha256=g3m4nfcJNWlxi-P0kpXTtlmspXkMcLCfGwmYuQ-Z2hE,751
@@ -163,7 +163,7 @@ mineru/utils/format_utils.py,sha256=2s89vHcSISjuolk8Hvg3K-5-rRbiT3Us7eFLzUKrNKs,
163
163
  mineru/utils/guess_suffix_or_lang.py,sha256=nznyQpUn1BSA8JNw9HuG3pVV-xtVAtrtcGuHZ-VXt9M,856
164
164
  mineru/utils/hash_utils.py,sha256=UPS_8NRBmVumdyOv16Lmv6Ly2xK8OVDJEe5gG6gKIFk,857
165
165
  mineru/utils/language.py,sha256=7RT3mxSa7jdpoC5ySd7ZddHA7TO7UsnmDOWiYZAxuyg,1433
166
- mineru/utils/llm_aided.py,sha256=eBGKCD7cJBjkyn38yqCdh0S-fgRG9fLuQCByLDQuyWs,4983
166
+ mineru/utils/llm_aided.py,sha256=9WUytvxenSAuaWR4sTQhVPQ5h8pY0wVOH1O2sj_6dLs,5149
167
167
  mineru/utils/magic_model_utils.py,sha256=2xOvi4oqg3MSw1FUrJTnYDtWeFrrm6qbmlEorLZSaYs,5650
168
168
  mineru/utils/model_utils.py,sha256=6OsgFLsABX5JuShSzCMSNHWV-yi-1cjwHweafyxIgRo,18448
169
169
  mineru/utils/models_download_utils.py,sha256=UfjvwhxO6BkJHa5JSpEVNZ71GoLMPMmJpym3THET2T4,2957
@@ -176,9 +176,9 @@ mineru/utils/run_async.py,sha256=rPeP4BCZerR8VByRDhiYzfZiahLVqoZEBVAS54dAjNg,128
176
176
  mineru/utils/span_block_fix.py,sha256=0eVQjJCrT03woRt9hoh6Uu42Tp1dacfGTv2x3B9qq94,8797
177
177
  mineru/utils/span_pre_proc.py,sha256=h41q2uQajI0xQbc_30hqaju1dv3oVYxBAlKgURl8HIc,13692
178
178
  mineru/utils/table_merge.py,sha256=d98zNbM1ZQ8V1kUt6RugParNUNPv7DGL-XKIzR3iJVQ,15360
179
- mineru-2.6.2.dist-info/licenses/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
180
- mineru-2.6.2.dist-info/METADATA,sha256=QGCp0YLuKymDMYmMZuOn8IYM-kpbKas5nKF7yl3la_0,68440
181
- mineru-2.6.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
182
- mineru-2.6.2.dist-info/entry_points.txt,sha256=luXmbhPiZK_tKlRgWuYOaW_V6EFpG-yJcAevVv9MEqE,252
183
- mineru-2.6.2.dist-info/top_level.txt,sha256=zuGQfZcbsHv4I4oKI9gaKPqEWBFm6xJroKuug2LnKP8,7
184
- mineru-2.6.2.dist-info/RECORD,,
179
+ mineru-2.6.3.dist-info/licenses/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
180
+ mineru-2.6.3.dist-info/METADATA,sha256=mUBTxFG5tgdyQ1caZVRNrk4MhIX36PECY09dwCNYXZ4,70689
181
+ mineru-2.6.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
182
+ mineru-2.6.3.dist-info/entry_points.txt,sha256=luXmbhPiZK_tKlRgWuYOaW_V6EFpG-yJcAevVv9MEqE,252
183
+ mineru-2.6.3.dist-info/top_level.txt,sha256=zuGQfZcbsHv4I4oKI9gaKPqEWBFm6xJroKuug2LnKP8,7
184
+ mineru-2.6.3.dist-info/RECORD,,
@@ -1 +0,0 @@
1
- # Copyright (c) Opendatalab. All rights reserved.
File without changes