mineru 2.6.3__py3-none-any.whl → 2.6.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -99,7 +99,10 @@ def doc_analyze(
99
99
  _lang = lang_list[pdf_idx]
100
100
 
101
101
  # 收集每个数据集中的页面
102
+ # load_images_start = time.time()
102
103
  images_list, pdf_doc = load_images_from_pdf(pdf_bytes, image_type=ImageType.PIL)
104
+ # load_images_time = round(time.time() - load_images_start, 2)
105
+ # logger.debug(f"load images cost: {load_images_time}, speed: {round(len(images_list) / load_images_time, 3)} images/s")
103
106
  all_image_lists.append(images_list)
104
107
  all_pdf_docs.append(pdf_doc)
105
108
  for page_idx in range(len(images_list)):
@@ -3,6 +3,7 @@ import os
3
3
  from loguru import logger
4
4
  from packaging import version
5
5
 
6
+ from mineru.utils.check_sys_env import is_windows_environment, is_linux_environment
6
7
  from mineru.utils.config_reader import get_device
7
8
  from mineru.utils.model_utils import get_vram
8
9
 
@@ -11,14 +12,16 @@ def enable_custom_logits_processors() -> bool:
11
12
  import torch
12
13
  from vllm import __version__ as vllm_version
13
14
 
14
- if not torch.cuda.is_available():
15
+ if torch.cuda.is_available():
16
+ major, minor = torch.cuda.get_device_capability()
17
+ # 正确计算Compute Capability
18
+ compute_capability = f"{major}.{minor}"
19
+ elif hasattr(torch, 'npu') and torch.npu.is_available():
20
+ compute_capability = "8.0"
21
+ else:
15
22
  logger.info("CUDA not available, disabling custom_logits_processors")
16
23
  return False
17
24
 
18
- major, minor = torch.cuda.get_device_capability()
19
- # 正确计算Compute Capability
20
- compute_capability = f"{major}.{minor}"
21
-
22
25
  # 安全地处理环境变量
23
26
  vllm_use_v1_str = os.getenv('VLLM_USE_V1', "1")
24
27
  if vllm_use_v1_str.isdigit():
@@ -44,6 +47,29 @@ def enable_custom_logits_processors() -> bool:
44
47
  return True
45
48
 
46
49
 
50
+ def set_lmdeploy_backend(device_type: str) -> str:
51
+ if device_type.lower() in ["ascend", "maca", "camb"]:
52
+ lmdeploy_backend = "pytorch"
53
+ elif device_type.lower() in ["cuda"]:
54
+ import torch
55
+ if not torch.cuda.is_available():
56
+ raise ValueError("CUDA is not available.")
57
+ if is_windows_environment():
58
+ lmdeploy_backend = "turbomind"
59
+ elif is_linux_environment():
60
+ major, minor = torch.cuda.get_device_capability()
61
+ compute_capability = f"{major}.{minor}"
62
+ if version.parse(compute_capability) >= version.parse("8.0"):
63
+ lmdeploy_backend = "pytorch"
64
+ else:
65
+ lmdeploy_backend = "turbomind"
66
+ else:
67
+ raise ValueError("Unsupported operating system.")
68
+ else:
69
+ raise ValueError(f"Unsupported lmdeploy device type: {device_type}")
70
+ return lmdeploy_backend
71
+
72
+
47
73
  def set_default_gpu_memory_utilization() -> float:
48
74
  from vllm import __version__ as vllm_version
49
75
  if version.parse(vllm_version) >= version.parse("0.11.0"):
@@ -4,11 +4,12 @@ import time
4
4
 
5
5
  from loguru import logger
6
6
 
7
- from .utils import enable_custom_logits_processors, set_default_gpu_memory_utilization, set_default_batch_size
7
+ from .utils import enable_custom_logits_processors, set_default_gpu_memory_utilization, set_default_batch_size, \
8
+ set_lmdeploy_backend
8
9
  from .model_output_to_middle_json import result_to_middle_json
9
10
  from ...data.data_reader_writer import DataWriter
10
11
  from mineru.utils.pdf_image_tools import load_images_from_pdf
11
- from ...utils.check_mac_env import is_mac_os_version_supported
12
+ from ...utils.check_sys_env import is_mac_os_version_supported
12
13
  from ...utils.config_reader import get_device
13
14
 
14
15
  from ...utils.enum_class import ImageType
@@ -40,94 +41,149 @@ class ModelSingleton:
40
41
  model = None
41
42
  processor = None
42
43
  vllm_llm = None
44
+ lmdeploy_engine = None
43
45
  vllm_async_llm = None
44
46
  batch_size = kwargs.get("batch_size", 0) # for transformers backend only
45
47
  max_concurrency = kwargs.get("max_concurrency", 100) # for http-client backend only
46
48
  http_timeout = kwargs.get("http_timeout", 600) # for http-client backend only
49
+ server_headers = kwargs.get("server_headers", None) # for http-client backend only
50
+ max_retries = kwargs.get("max_retries", 3) # for http-client backend only
51
+ retry_backoff_factor = kwargs.get("retry_backoff_factor", 0.5) # for http-client backend only
47
52
  # 从kwargs中移除这些参数,避免传递给不相关的初始化函数
48
- for param in ["batch_size", "max_concurrency", "http_timeout"]:
53
+ for param in ["batch_size", "max_concurrency", "http_timeout", "server_headers", "max_retries", "retry_backoff_factor"]:
49
54
  if param in kwargs:
50
55
  del kwargs[param]
51
- if backend in ['transformers', 'vllm-engine', "vllm-async-engine", "mlx-engine"] and not model_path:
56
+ if backend not in ["http-client"] and not model_path:
52
57
  model_path = auto_download_and_get_model_root_path("/","vlm")
53
- if backend == "transformers":
58
+ if backend == "transformers":
59
+ try:
60
+ from transformers import (
61
+ AutoProcessor,
62
+ Qwen2VLForConditionalGeneration,
63
+ )
64
+ from transformers import __version__ as transformers_version
65
+ except ImportError:
66
+ raise ImportError("Please install transformers to use the transformers backend.")
67
+
68
+ if version.parse(transformers_version) >= version.parse("4.56.0"):
69
+ dtype_key = "dtype"
70
+ else:
71
+ dtype_key = "torch_dtype"
72
+ device = get_device()
73
+ model = Qwen2VLForConditionalGeneration.from_pretrained(
74
+ model_path,
75
+ device_map={"": device},
76
+ **{dtype_key: "auto"}, # type: ignore
77
+ )
78
+ processor = AutoProcessor.from_pretrained(
79
+ model_path,
80
+ use_fast=True,
81
+ )
82
+ if batch_size == 0:
83
+ batch_size = set_default_batch_size()
84
+ elif backend == "mlx-engine":
85
+ mlx_supported = is_mac_os_version_supported()
86
+ if not mlx_supported:
87
+ raise EnvironmentError("mlx-engine backend is only supported on macOS 13.5+ with Apple Silicon.")
88
+ try:
89
+ from mlx_vlm import load as mlx_load
90
+ except ImportError:
91
+ raise ImportError("Please install mlx-vlm to use the mlx-engine backend.")
92
+ model, processor = mlx_load(model_path)
93
+ else:
94
+ if os.getenv('OMP_NUM_THREADS') is None:
95
+ os.environ["OMP_NUM_THREADS"] = "1"
96
+
97
+ if backend == "vllm-engine":
54
98
  try:
55
- from transformers import (
56
- AutoProcessor,
57
- Qwen2VLForConditionalGeneration,
58
- )
59
- from transformers import __version__ as transformers_version
99
+ import vllm
60
100
  except ImportError:
61
- raise ImportError("Please install transformers to use the transformers backend.")
62
-
63
- if version.parse(transformers_version) >= version.parse("4.56.0"):
64
- dtype_key = "dtype"
101
+ raise ImportError("Please install vllm to use the vllm-engine backend.")
102
+ if "gpu_memory_utilization" not in kwargs:
103
+ kwargs["gpu_memory_utilization"] = set_default_gpu_memory_utilization()
104
+ if "model" not in kwargs:
105
+ kwargs["model"] = model_path
106
+ if enable_custom_logits_processors() and ("logits_processors" not in kwargs):
107
+ from mineru_vl_utils import MinerULogitsProcessor
108
+ kwargs["logits_processors"] = [MinerULogitsProcessor]
109
+ # 使用kwargs为 vllm初始化参数
110
+ vllm_llm = vllm.LLM(**kwargs)
111
+ elif backend == "vllm-async-engine":
112
+ try:
113
+ from vllm.engine.arg_utils import AsyncEngineArgs
114
+ from vllm.v1.engine.async_llm import AsyncLLM
115
+ except ImportError:
116
+ raise ImportError("Please install vllm to use the vllm-async-engine backend.")
117
+ if "gpu_memory_utilization" not in kwargs:
118
+ kwargs["gpu_memory_utilization"] = set_default_gpu_memory_utilization()
119
+ if "model" not in kwargs:
120
+ kwargs["model"] = model_path
121
+ if enable_custom_logits_processors() and ("logits_processors" not in kwargs):
122
+ from mineru_vl_utils import MinerULogitsProcessor
123
+ kwargs["logits_processors"] = [MinerULogitsProcessor]
124
+ # 使用kwargs为 vllm初始化参数
125
+ vllm_async_llm = AsyncLLM.from_engine_args(AsyncEngineArgs(**kwargs))
126
+ elif backend == "lmdeploy-engine":
127
+ try:
128
+ from lmdeploy import PytorchEngineConfig, TurbomindEngineConfig
129
+ from lmdeploy.serve.vl_async_engine import VLAsyncEngine
130
+ except ImportError:
131
+ raise ImportError("Please install lmdeploy to use the lmdeploy-engine backend.")
132
+ if "cache_max_entry_count" not in kwargs:
133
+ kwargs["cache_max_entry_count"] = 0.5
134
+
135
+ device_type = os.getenv("MINERU_LMDEPLOY_DEVICE", "")
136
+ if device_type == "":
137
+ if "lmdeploy_device" in kwargs:
138
+ device_type = kwargs.pop("lmdeploy_device")
139
+ if device_type not in ["cuda", "ascend", "maca", "camb"]:
140
+ raise ValueError(f"Unsupported lmdeploy device type: {device_type}")
141
+ else:
142
+ device_type = "cuda"
143
+ lm_backend = os.getenv("MINERU_LMDEPLOY_BACKEND", "")
144
+ if lm_backend == "":
145
+ if "lmdeploy_backend" in kwargs:
146
+ lm_backend = kwargs.pop("lmdeploy_backend")
147
+ if lm_backend not in ["pytorch", "turbomind"]:
148
+ raise ValueError(f"Unsupported lmdeploy backend: {lm_backend}")
149
+ else:
150
+ lm_backend = set_lmdeploy_backend(device_type)
151
+ logger.info(f"lmdeploy device is: {device_type}, lmdeploy backend is: {lm_backend}")
152
+
153
+ if lm_backend == "pytorch":
154
+ kwargs["device_type"] = device_type
155
+ backend_config = PytorchEngineConfig(**kwargs)
156
+ elif lm_backend == "turbomind":
157
+ backend_config = TurbomindEngineConfig(**kwargs)
65
158
  else:
66
- dtype_key = "torch_dtype"
67
- device = get_device()
68
- model = Qwen2VLForConditionalGeneration.from_pretrained(
69
- model_path,
70
- device_map={"": device},
71
- **{dtype_key: "auto"}, # type: ignore
72
- )
73
- processor = AutoProcessor.from_pretrained(
159
+ raise ValueError(f"Unsupported lmdeploy backend: {lm_backend}")
160
+
161
+ log_level = 'ERROR'
162
+ from lmdeploy.utils import get_logger
163
+ lm_logger = get_logger('lmdeploy')
164
+ lm_logger.setLevel(log_level)
165
+ if os.getenv('TM_LOG_LEVEL') is None:
166
+ os.environ['TM_LOG_LEVEL'] = log_level
167
+
168
+ lmdeploy_engine = VLAsyncEngine(
74
169
  model_path,
75
- use_fast=True,
170
+ backend=lm_backend,
171
+ backend_config=backend_config,
76
172
  )
77
- if batch_size == 0:
78
- batch_size = set_default_batch_size()
79
- elif backend == "mlx-engine":
80
- mlx_supported = is_mac_os_version_supported()
81
- if not mlx_supported:
82
- raise EnvironmentError("mlx-engine backend is only supported on macOS 13.5+ with Apple Silicon.")
83
- try:
84
- from mlx_vlm import load as mlx_load
85
- except ImportError:
86
- raise ImportError("Please install mlx-vlm to use the mlx-engine backend.")
87
- model, processor = mlx_load(model_path)
88
- else:
89
- if os.getenv('OMP_NUM_THREADS') is None:
90
- os.environ["OMP_NUM_THREADS"] = "1"
91
-
92
- if backend == "vllm-engine":
93
- try:
94
- import vllm
95
- from mineru_vl_utils import MinerULogitsProcessor
96
- except ImportError:
97
- raise ImportError("Please install vllm to use the vllm-engine backend.")
98
- if "gpu_memory_utilization" not in kwargs:
99
- kwargs["gpu_memory_utilization"] = set_default_gpu_memory_utilization()
100
- if "model" not in kwargs:
101
- kwargs["model"] = model_path
102
- if enable_custom_logits_processors() and ("logits_processors" not in kwargs):
103
- kwargs["logits_processors"] = [MinerULogitsProcessor]
104
- # 使用kwargs为 vllm初始化参数
105
- vllm_llm = vllm.LLM(**kwargs)
106
- elif backend == "vllm-async-engine":
107
- try:
108
- from vllm.engine.arg_utils import AsyncEngineArgs
109
- from vllm.v1.engine.async_llm import AsyncLLM
110
- from mineru_vl_utils import MinerULogitsProcessor
111
- except ImportError:
112
- raise ImportError("Please install vllm to use the vllm-async-engine backend.")
113
- if "gpu_memory_utilization" not in kwargs:
114
- kwargs["gpu_memory_utilization"] = set_default_gpu_memory_utilization()
115
- if "model" not in kwargs:
116
- kwargs["model"] = model_path
117
- if enable_custom_logits_processors() and ("logits_processors" not in kwargs):
118
- kwargs["logits_processors"] = [MinerULogitsProcessor]
119
- # 使用kwargs为 vllm初始化参数
120
- vllm_async_llm = AsyncLLM.from_engine_args(AsyncEngineArgs(**kwargs))
121
173
  self._models[key] = MinerUClient(
122
174
  backend=backend,
123
175
  model=model,
124
176
  processor=processor,
177
+ lmdeploy_engine=lmdeploy_engine,
125
178
  vllm_llm=vllm_llm,
126
179
  vllm_async_llm=vllm_async_llm,
127
180
  server_url=server_url,
128
181
  batch_size=batch_size,
129
182
  max_concurrency=max_concurrency,
130
183
  http_timeout=http_timeout,
184
+ server_headers=server_headers,
185
+ max_retries=max_retries,
186
+ retry_backoff_factor=retry_backoff_factor,
131
187
  )
132
188
  elapsed = round(time.time() - start_time, 2)
133
189
  logger.info(f"get {backend} predictor cost: {elapsed}s")
@@ -177,7 +233,7 @@ async def aio_doc_analyze(
177
233
  images_list, pdf_doc = load_images_from_pdf(pdf_bytes, image_type=ImageType.PIL)
178
234
  images_pil_list = [image_dict["img_pil"] for image_dict in images_list]
179
235
  # load_images_time = round(time.time() - load_images_start, 2)
180
- # logger.info(f"load images cost: {load_images_time}, speed: {round(len(images_base64_list)/load_images_time, 3)} images/s")
236
+ # logger.debug(f"load images cost: {load_images_time}, speed: {round(len(images_pil_list)/load_images_time, 3)} images/s")
181
237
 
182
238
  # infer_start = time.time()
183
239
  results = await predictor.aio_batch_two_step_extract(images=images_pil_list)
mineru/cli/client.py CHANGED
@@ -4,7 +4,7 @@ import click
4
4
  from pathlib import Path
5
5
  from loguru import logger
6
6
 
7
- from mineru.utils.check_mac_env import is_mac_os_version_supported
7
+ from mineru.utils.check_sys_env import is_mac_os_version_supported
8
8
  from mineru.utils.cli_parser import arg_parse
9
9
  from mineru.utils.config_reader import get_device
10
10
  from mineru.utils.guess_suffix_or_lang import guess_suffix_by_path
@@ -13,7 +13,7 @@ from ..version import __version__
13
13
  from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
14
14
 
15
15
 
16
- backends = ['pipeline', 'vlm-transformers', 'vlm-vllm-engine', 'vlm-http-client']
16
+ backends = ['pipeline', 'vlm-transformers', 'vlm-vllm-engine', 'vlm-lmdeploy-engine', 'vlm-http-client']
17
17
  if is_mac_os_version_supported():
18
18
  backends.append("vlm-mlx-engine")
19
19
 
@@ -62,9 +62,10 @@ if is_mac_os_version_supported():
62
62
  the backend for parsing pdf:
63
63
  pipeline: More general.
64
64
  vlm-transformers: More general, but slower.
65
- vlm-mlx-engine: Faster than transformers.
66
- vlm-vllm-engine: Faster(engine).
67
- vlm-http-client: Faster(client).
65
+ vlm-mlx-engine: Faster than transformers(macOS 13.5+).
66
+ vlm-vllm-engine: Faster(vllm-engine).
67
+ vlm-lmdeploy-engine: Faster(lmdeploy-engine).
68
+ vlm-http-client: Faster(client suitable for openai-compatible servers).
68
69
  Without method specified, pipeline will be used by default.""",
69
70
  default='pipeline',
70
71
  )
mineru/cli/common.py CHANGED
@@ -5,8 +5,8 @@ import os
5
5
  import copy
6
6
  from pathlib import Path
7
7
 
8
- import pypdfium2 as pdfium
9
8
  from loguru import logger
9
+ import pypdfium2 as pdfium
10
10
 
11
11
  from mineru.data.data_reader_writer import FileBasedDataWriter
12
12
  from mineru.utils.draw_bbox import draw_layout_bbox, draw_span_bbox, draw_line_sort_bbox
@@ -16,10 +16,17 @@ from mineru.utils.pdf_image_tools import images_bytes_to_pdf_bytes
16
16
  from mineru.backend.vlm.vlm_middle_json_mkcontent import union_make as vlm_union_make
17
17
  from mineru.backend.vlm.vlm_analyze import doc_analyze as vlm_doc_analyze
18
18
  from mineru.backend.vlm.vlm_analyze import aio_doc_analyze as aio_vlm_doc_analyze
19
+ from mineru.utils.pdf_page_id import get_end_page_id
20
+
21
+ if os.getenv("MINERU_LMDEPLOY_DEVICE", "") == "maca":
22
+ import torch
23
+ torch.backends.cudnn.enabled = False
24
+
19
25
 
20
26
  pdf_suffixes = ["pdf"]
21
27
  image_suffixes = ["png", "jpeg", "jp2", "webp", "gif", "bmp", "jpg", "tiff"]
22
28
 
29
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
23
30
 
24
31
  def read_fn(path):
25
32
  if not isinstance(path, Path):
@@ -44,24 +51,21 @@ def prepare_env(output_dir, pdf_file_name, parse_method):
44
51
 
45
52
 
46
53
  def convert_pdf_bytes_to_bytes_by_pypdfium2(pdf_bytes, start_page_id=0, end_page_id=None):
54
+ pdf = pdfium.PdfDocument(pdf_bytes)
55
+ output_pdf = pdfium.PdfDocument.new()
47
56
  try:
48
- # 从字节数据加载PDF
49
- pdf = pdfium.PdfDocument(pdf_bytes)
50
-
51
- # 确定结束页
52
- end_page_id = end_page_id if end_page_id is not None and end_page_id >= 0 else len(pdf) - 1
53
- if end_page_id > len(pdf) - 1:
54
- logger.warning("end_page_id is out of range, use pdf_docs length")
55
- end_page_id = len(pdf) - 1
56
-
57
- # 创建一个新的PDF文档
58
- output_pdf = pdfium.PdfDocument.new()
59
-
60
- # 选择要导入的页面索引
61
- page_indices = list(range(start_page_id, end_page_id + 1))
62
-
63
- # 从原PDF导入页面到新PDF
64
- output_pdf.import_pages(pdf, page_indices)
57
+ end_page_id = get_end_page_id(end_page_id, len(pdf))
58
+
59
+ # 逐页导入,失败则跳过
60
+ output_index = 0
61
+ for page_index in range(start_page_id, end_page_id + 1):
62
+ try:
63
+ output_pdf.import_pages(pdf, pages=[page_index])
64
+ output_index += 1
65
+ except Exception as page_error:
66
+ output_pdf.del_page(output_index)
67
+ logger.warning(f"Failed to import page {page_index}: {page_error}, skipping this page.")
68
+ continue
65
69
 
66
70
  # 将新PDF保存到内存缓冲区
67
71
  output_buffer = io.BytesIO()
@@ -69,13 +73,11 @@ def convert_pdf_bytes_to_bytes_by_pypdfium2(pdf_bytes, start_page_id=0, end_page
69
73
 
70
74
  # 获取字节数据
71
75
  output_bytes = output_buffer.getvalue()
72
-
73
- pdf.close() # 关闭原PDF文档以释放资源
74
- output_pdf.close() # 关闭新PDF文档以释放资源
75
76
  except Exception as e:
76
77
  logger.warning(f"Error in converting PDF bytes: {e}, Using original PDF bytes.")
77
78
  output_bytes = pdf_bytes
78
-
79
+ pdf.close()
80
+ output_pdf.close()
79
81
  return output_bytes
80
82
 
81
83
 
mineru/cli/gradio_app.py CHANGED
@@ -13,7 +13,7 @@ from gradio_pdf import PDF
13
13
  from loguru import logger
14
14
 
15
15
  from mineru.cli.common import prepare_env, read_fn, aio_do_parse, pdf_suffixes, image_suffixes
16
- from mineru.utils.check_mac_env import is_mac_os_version_supported
16
+ from mineru.utils.check_sys_env import is_mac_os_version_supported
17
17
  from mineru.utils.cli_parser import arg_parse
18
18
  from mineru.utils.hash_utils import str_sha256
19
19
 
@@ -274,7 +274,7 @@ def to_pdf(file_path):
274
274
 
275
275
  # 更新界面函数
276
276
  def update_interface(backend_choice):
277
- if backend_choice in ["vlm-transformers", "vlm-vllm-async-engine", "vlm-mlx-engine"]:
277
+ if backend_choice in ["vlm-transformers", "vlm-vllm-async-engine", "vlm-lmdeploy-engine", "vlm-mlx-engine"]:
278
278
  return gr.update(visible=False), gr.update(visible=False)
279
279
  elif backend_choice in ["vlm-http-client"]:
280
280
  return gr.update(visible=True), gr.update(visible=False)
@@ -301,6 +301,13 @@ def update_interface(backend_choice):
301
301
  help="Enable vLLM engine backend for faster processing.",
302
302
  default=False,
303
303
  )
304
+ @click.option(
305
+ '--enable-lmdeploy-engine',
306
+ 'lmdeploy_engine_enable',
307
+ type=bool,
308
+ help="Enable LMDeploy engine backend for faster processing.",
309
+ default=False,
310
+ )
304
311
  @click.option(
305
312
  '--enable-api',
306
313
  'api_enable',
@@ -338,7 +345,7 @@ def update_interface(backend_choice):
338
345
  default='all',
339
346
  )
340
347
  def main(ctx,
341
- example_enable, vllm_engine_enable, api_enable, max_convert_pages,
348
+ example_enable, vllm_engine_enable, lmdeploy_engine_enable, api_enable, max_convert_pages,
342
349
  server_name, server_port, latex_delimiters_type, **kwargs
343
350
  ):
344
351
 
@@ -367,6 +374,20 @@ def main(ctx,
367
374
  print("vLLM engine init successfully.")
368
375
  except Exception as e:
369
376
  logger.exception(e)
377
+ elif lmdeploy_engine_enable:
378
+ try:
379
+ print("Start init LMDeploy engine...")
380
+ from mineru.backend.vlm.vlm_analyze import ModelSingleton
381
+ model_singleton = ModelSingleton()
382
+ predictor = model_singleton.get_model(
383
+ "lmdeploy-engine",
384
+ None,
385
+ None,
386
+ **kwargs
387
+ )
388
+ print("LMDeploy engine init successfully.")
389
+ except Exception as e:
390
+ logger.exception(e)
370
391
  suffixes = [f".{suffix}" for suffix in pdf_suffixes + image_suffixes]
371
392
  with gr.Blocks() as demo:
372
393
  gr.HTML(header)
@@ -380,6 +401,9 @@ def main(ctx,
380
401
  if vllm_engine_enable:
381
402
  drop_list = ["pipeline", "vlm-vllm-async-engine"]
382
403
  preferred_option = "vlm-vllm-async-engine"
404
+ elif lmdeploy_engine_enable:
405
+ drop_list = ["pipeline", "vlm-lmdeploy-engine"]
406
+ preferred_option = "vlm-lmdeploy-engine"
383
407
  else:
384
408
  drop_list = ["pipeline", "vlm-transformers", "vlm-http-client"]
385
409
  if is_mac_os_version_supported():
@@ -453,4 +477,4 @@ def main(ctx,
453
477
 
454
478
 
455
479
  if __name__ == '__main__':
456
- main()
480
+ main()
@@ -0,0 +1,61 @@
1
+ import click
2
+ import sys
3
+
4
+ from loguru import logger
5
+
6
+
7
+ def vllm_server():
8
+ from mineru.model.vlm.vllm_server import main
9
+ main()
10
+
11
+
12
+ def lmdeploy_server():
13
+ from mineru.model.vlm.lmdeploy_server import main
14
+ main()
15
+
16
+
17
+ @click.command(context_settings=dict(ignore_unknown_options=True, allow_extra_args=True))
18
+ @click.option(
19
+ '-e',
20
+ '--engine',
21
+ 'inference_engine',
22
+ type=click.Choice(['auto', 'vllm', 'lmdeploy']),
23
+ default='auto',
24
+ help='Select the inference engine used to accelerate VLM inference, default is "auto".',
25
+ )
26
+ @click.pass_context
27
+ def openai_server(ctx, inference_engine):
28
+ sys.argv = [sys.argv[0]] + ctx.args
29
+ if inference_engine == 'auto':
30
+ try:
31
+ import vllm
32
+ inference_engine = 'vllm'
33
+ logger.info("Using vLLM as the inference engine for VLM server.")
34
+ except ImportError:
35
+ logger.info("vLLM not found, attempting to use LMDeploy as the inference engine for VLM server.")
36
+ try:
37
+ import lmdeploy
38
+ inference_engine = 'lmdeploy'
39
+ # Success message moved after successful import
40
+ logger.info("Using LMDeploy as the inference engine for VLM server.")
41
+ except ImportError:
42
+ logger.error("Neither vLLM nor LMDeploy is installed. Please install at least one of them.")
43
+ sys.exit(1)
44
+
45
+ if inference_engine == 'vllm':
46
+ try:
47
+ import vllm
48
+ except ImportError:
49
+ logger.error("vLLM is not installed. Please install vLLM or choose LMDeploy as the inference engine.")
50
+ sys.exit(1)
51
+ vllm_server()
52
+ elif inference_engine == 'lmdeploy':
53
+ try:
54
+ import lmdeploy
55
+ except ImportError:
56
+ logger.error("LMDeploy is not installed. Please install LMDeploy or choose vLLM as the inference engine.")
57
+ sys.exit(1)
58
+ lmdeploy_server()
59
+
60
+ if __name__ == "__main__":
61
+ openai_server()
@@ -16,6 +16,7 @@ from typing import Any, Dict, List, Tuple
16
16
 
17
17
  import numpy as np
18
18
 
19
+ from mineru.utils.os_env_config import get_op_num_threads
19
20
  from .table_structure_utils import (
20
21
  OrtInferSession,
21
22
  TableLabelDecode,
@@ -29,6 +30,9 @@ class TableStructurer:
29
30
  self.preprocess_op = TablePreprocess()
30
31
  self.batch_preprocess_op = BatchTablePreprocess()
31
32
 
33
+ config["intra_op_num_threads"] = get_op_num_threads("MINERU_INTRA_OP_NUM_THREADS")
34
+ config["inter_op_num_threads"] = get_op_num_threads("MINERU_INTER_OP_NUM_THREADS")
35
+
32
36
  self.session = OrtInferSession(config)
33
37
 
34
38
  self.character = self.session.get_metadata()
@@ -5,6 +5,8 @@ from typing import Optional, Dict, Any, Tuple
5
5
  import cv2
6
6
  import numpy as np
7
7
  from skimage import measure
8
+
9
+ from mineru.utils.os_env_config import get_op_num_threads
8
10
  from .utils import OrtInferSession, resize_img
9
11
  from .utils_table_line_rec import (
10
12
  get_table_line,
@@ -28,6 +30,9 @@ class TSRUnet:
28
30
  self.inp_height = 1024
29
31
  self.inp_width = 1024
30
32
 
33
+ config["intra_op_num_threads"] = get_op_num_threads("MINERU_INTRA_OP_NUM_THREADS")
34
+ config["inter_op_num_threads"] = get_op_num_threads("MINERU_INTER_OP_NUM_THREADS")
35
+
31
36
  self.session = OrtInferSession(config)
32
37
 
33
38
  def __call__(
@@ -0,0 +1,93 @@
1
+ import os
2
+ import sys
3
+
4
+ from loguru import logger
5
+
6
+ from mineru.backend.vlm.utils import set_lmdeploy_backend
7
+ from mineru.utils.models_download_utils import auto_download_and_get_model_root_path
8
+
9
+
10
+ def main():
11
+ args = sys.argv[1:]
12
+
13
+ has_port_arg = False
14
+ has_gpu_memory_utilization_arg = False
15
+ has_log_level_arg = False
16
+ device_type = ""
17
+ lm_backend = ""
18
+
19
+ # 检查现有参数
20
+ indices_to_remove = []
21
+
22
+ for i, arg in enumerate(args):
23
+ if arg == "--server-port" or arg.startswith("--server-port="):
24
+ has_port_arg = True
25
+ if arg == "--cache-max-entry-count" or arg.startswith("--cache-max-entry-count="):
26
+ has_gpu_memory_utilization_arg = True
27
+ if arg == "--log-level" or arg.startswith("--log-level="):
28
+ has_log_level_arg = True
29
+ if arg == "--backend" or arg == "--lmdeploy-backend":
30
+ if i + 1 < len(args):
31
+ lm_backend = args[i + 1]
32
+ indices_to_remove.extend([i, i + 1])
33
+ elif arg.startswith("--backend=") or arg.startswith("--lmdeploy-backend="):
34
+ lm_backend = arg.split("=", 1)[1]
35
+ indices_to_remove.append(i)
36
+ if arg == "--device" or arg == "--lmdeploy-device":
37
+ if i + 1 < len(args):
38
+ device_type = args[i + 1]
39
+ indices_to_remove.extend([i, i + 1])
40
+ elif arg.startswith("--device=") or arg.startswith("--lmdeploy-device="):
41
+ device_type = arg.split("=", 1)[1]
42
+ indices_to_remove.append(i)
43
+
44
+ # 从后往前删除,避免索引错位
45
+ for i in sorted(set(indices_to_remove), reverse=True):
46
+ args.pop(i)
47
+
48
+ # 添加默认参数
49
+ if not has_port_arg:
50
+ args.extend(["--server-port", "30000"])
51
+ if not has_gpu_memory_utilization_arg:
52
+ args.extend(["--cache-max-entry-count", "0.5"])
53
+ if not has_log_level_arg:
54
+ args.extend(["--log-level", "ERROR"])
55
+
56
+ device_type = os.getenv("MINERU_LMDEPLOY_DEVICE", device_type)
57
+ if device_type == "":
58
+ device_type = "cuda"
59
+ elif device_type not in ["cuda", "ascend", "maca", "camb"]:
60
+ raise ValueError(f"Unsupported lmdeploy device type: {device_type}")
61
+ lm_backend = os.getenv("MINERU_LMDEPLOY_BACKEND", lm_backend)
62
+ if lm_backend == "":
63
+ lm_backend = set_lmdeploy_backend(device_type)
64
+ elif lm_backend not in ["pytorch", "turbomind"]:
65
+ raise ValueError(f"Unsupported lmdeploy backend: {lm_backend}")
66
+ logger.info(f"lmdeploy device is: {device_type}, lmdeploy backend is: {lm_backend}")
67
+
68
+ if lm_backend == "pytorch":
69
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
70
+
71
+
72
+ args.extend(["--device", device_type])
73
+ args.extend(["--backend", lm_backend])
74
+
75
+ model_path = auto_download_and_get_model_root_path("/", "vlm")
76
+
77
+ # logger.debug(args)
78
+
79
+ # 重构参数,将模型路径作为位置参数
80
+ sys.argv = [sys.argv[0]] + ["serve", "api_server", model_path] + args
81
+
82
+ if os.getenv('OMP_NUM_THREADS') is None:
83
+ os.environ["OMP_NUM_THREADS"] = "1"
84
+
85
+ # 启动 lmdeploy 服务器
86
+ print(f"start lmdeploy server: {sys.argv}")
87
+
88
+ # 使用os.system调用启动lmdeploy服务器
89
+ os.system("lmdeploy " + " ".join(sys.argv[1:]))
90
+
91
+
92
+ if __name__ == "__main__":
93
+ main()
@@ -4,11 +4,19 @@ import platform
4
4
  from packaging import version
5
5
 
6
6
 
7
+ def is_windows_environment() -> bool:
8
+ return platform.system() == "Windows"
9
+
10
+
7
11
  # Detect if the current environment is a Mac computer
8
12
  def is_mac_environment() -> bool:
9
13
  return platform.system() == "Darwin"
10
14
 
11
15
 
16
+ def is_linux_environment() -> bool:
17
+ return platform.system() == "Linux"
18
+
19
+
12
20
  # Detect if CPU is Apple Silicon architecture
13
21
  def is_apple_silicon_cpu() -> bool:
14
22
  return platform.machine() in ["arm64", "aarch64"]
@@ -0,0 +1,30 @@
1
+ import os
2
+
3
+
4
+ def get_op_num_threads(env_name: str) -> int:
5
+ env_value = os.getenv(env_name, None)
6
+ return get_value_from_string(env_value, -1)
7
+
8
+
9
+ def get_load_images_timeout() -> int:
10
+ env_value = os.getenv('MINERU_PDF_RENDER_TIMEOUT', None)
11
+ return get_value_from_string(env_value, 300)
12
+
13
+
14
+ def get_value_from_string(env_value: str, default_value: int) -> int:
15
+ if env_value is not None:
16
+ try:
17
+ num_threads = int(env_value)
18
+ if num_threads > 0:
19
+ return num_threads
20
+ except ValueError:
21
+ return default_value
22
+ return default_value
23
+
24
+
25
+ if __name__ == '__main__':
26
+ print(get_value_from_string('1', -1))
27
+ print(get_value_from_string('0', -1))
28
+ print(get_value_from_string('-1', -1))
29
+ print(get_value_from_string('abc', -1))
30
+ print(get_load_images_timeout())
@@ -1,4 +1,5 @@
1
1
  # Copyright (c) Opendatalab. All rights reserved.
2
+ import os
2
3
  from io import BytesIO
3
4
 
4
5
  import numpy as np
@@ -7,9 +8,14 @@ from loguru import logger
7
8
  from PIL import Image
8
9
 
9
10
  from mineru.data.data_reader_writer import FileBasedDataWriter
11
+ from mineru.utils.check_sys_env import is_windows_environment
12
+ from mineru.utils.os_env_config import get_load_images_timeout
10
13
  from mineru.utils.pdf_reader import image_to_b64str, image_to_bytes, page_to_image
11
- from .enum_class import ImageType
12
- from .hash_utils import str_sha256
14
+ from mineru.utils.enum_class import ImageType
15
+ from mineru.utils.hash_utils import str_sha256
16
+ from mineru.utils.pdf_page_id import get_end_page_id
17
+
18
+ from concurrent.futures import ProcessPoolExecutor, TimeoutError as FuturesTimeoutError
13
19
 
14
20
 
15
21
  def pdf_page_to_image(page: pdfium.PdfPage, dpi=200, image_type=ImageType.PIL) -> dict:
@@ -35,7 +41,106 @@ def pdf_page_to_image(page: pdfium.PdfPage, dpi=200, image_type=ImageType.PIL) -
35
41
  return image_dict
36
42
 
37
43
 
44
+ def _load_images_from_pdf_worker(pdf_bytes, dpi, start_page_id, end_page_id, image_type):
45
+ """用于进程池的包装函数"""
46
+ return load_images_from_pdf_core(pdf_bytes, dpi, start_page_id, end_page_id, image_type)
47
+
48
+
38
49
  def load_images_from_pdf(
50
+ pdf_bytes: bytes,
51
+ dpi=200,
52
+ start_page_id=0,
53
+ end_page_id=None,
54
+ image_type=ImageType.PIL,
55
+ timeout=None,
56
+ threads=4,
57
+ ):
58
+ """带超时控制的 PDF 转图片函数,支持多进程加速
59
+
60
+ Args:
61
+ pdf_bytes (bytes): PDF 文件的 bytes
62
+ dpi (int, optional): reset the dpi of dpi. Defaults to 200.
63
+ start_page_id (int, optional): 起始页码. Defaults to 0.
64
+ end_page_id (int | None, optional): 结束页码. Defaults to None.
65
+ image_type (ImageType, optional): 图片类型. Defaults to ImageType.PIL.
66
+ timeout (int | None, optional): 超时时间(秒)。如果为 None,则从环境变量 MINERU_PDF_LOAD_IMAGES_TIMEOUT 读取,若未设置则默认为 300 秒。
67
+ threads (int): 进程数,默认 4
68
+
69
+ Raises:
70
+ TimeoutError: 当转换超时时抛出
71
+ """
72
+ pdf_doc = pdfium.PdfDocument(pdf_bytes)
73
+ if is_windows_environment():
74
+ # Windows 环境下不使用多进程
75
+ return load_images_from_pdf_core(
76
+ pdf_bytes,
77
+ dpi,
78
+ start_page_id,
79
+ get_end_page_id(end_page_id, len(pdf_doc)),
80
+ image_type
81
+ ), pdf_doc
82
+ else:
83
+ if timeout is None:
84
+ timeout = get_load_images_timeout()
85
+ end_page_id = get_end_page_id(end_page_id, len(pdf_doc))
86
+
87
+ # 计算总页数
88
+ total_pages = end_page_id - start_page_id + 1
89
+
90
+ # 实际使用的进程数不超过总页数
91
+ actual_threads = min(os.cpu_count() or 1, threads, total_pages)
92
+
93
+ # 根据实际进程数分组页面范围
94
+ pages_per_thread = max(1, total_pages // actual_threads)
95
+ page_ranges = []
96
+
97
+ for i in range(actual_threads):
98
+ range_start = start_page_id + i * pages_per_thread
99
+ if i == actual_threads - 1:
100
+ # 最后一个进程处理剩余所有页面
101
+ range_end = end_page_id
102
+ else:
103
+ range_end = start_page_id + (i + 1) * pages_per_thread - 1
104
+
105
+ page_ranges.append((range_start, range_end))
106
+
107
+ # logger.debug(f"PDF to images using {actual_threads} processes, page ranges: {page_ranges}")
108
+
109
+ with ProcessPoolExecutor(max_workers=actual_threads) as executor:
110
+ # 提交所有任务
111
+ futures = []
112
+ for range_start, range_end in page_ranges:
113
+ future = executor.submit(
114
+ _load_images_from_pdf_worker,
115
+ pdf_bytes,
116
+ dpi,
117
+ range_start,
118
+ range_end,
119
+ image_type
120
+ )
121
+ futures.append((range_start, future))
122
+
123
+ try:
124
+ # 收集结果并按页码排序
125
+ all_results = []
126
+ for range_start, future in futures:
127
+ images_list = future.result(timeout=timeout)
128
+ all_results.append((range_start, images_list))
129
+
130
+ # 按起始页码排序并合并结果
131
+ all_results.sort(key=lambda x: x[0])
132
+ images_list = []
133
+ for _, imgs in all_results:
134
+ images_list.extend(imgs)
135
+
136
+ return images_list, pdf_doc
137
+ except FuturesTimeoutError:
138
+ pdf_doc.close()
139
+ executor.shutdown(wait=False, cancel_futures=True)
140
+ raise TimeoutError(f"PDF to images conversion timeout after {timeout}s")
141
+
142
+
143
+ def load_images_from_pdf_core(
39
144
  pdf_bytes: bytes,
40
145
  dpi=200,
41
146
  start_page_id=0,
@@ -45,18 +150,17 @@ def load_images_from_pdf(
45
150
  images_list = []
46
151
  pdf_doc = pdfium.PdfDocument(pdf_bytes)
47
152
  pdf_page_num = len(pdf_doc)
48
- end_page_id = end_page_id if end_page_id is not None and end_page_id >= 0 else pdf_page_num - 1
49
- if end_page_id > pdf_page_num - 1:
50
- logger.warning("end_page_id is out of range, use images length")
51
- end_page_id = pdf_page_num - 1
52
-
53
- for index in range(0, pdf_page_num):
54
- if start_page_id <= index <= end_page_id:
55
- page = pdf_doc[index]
56
- image_dict = pdf_page_to_image(page, dpi=dpi, image_type=image_type)
57
- images_list.append(image_dict)
58
-
59
- return images_list, pdf_doc
153
+ end_page_id = get_end_page_id(end_page_id, pdf_page_num)
154
+
155
+ for index in range(start_page_id, end_page_id + 1):
156
+ # logger.debug(f"Converting page {index}/{pdf_page_num} to image")
157
+ page = pdf_doc[index]
158
+ image_dict = pdf_page_to_image(page, dpi=dpi, image_type=image_type)
159
+ images_list.append(image_dict)
160
+
161
+ pdf_doc.close()
162
+
163
+ return images_list
60
164
 
61
165
 
62
166
  def cut_image(bbox: tuple, page_num: int, page_pil_img, return_path, image_writer: FileBasedDataWriter, scale=2):
@@ -0,0 +1,10 @@
1
+ # Copyright (c) Opendatalab. All rights reserved.
2
+ from loguru import logger
3
+
4
+
5
+ def get_end_page_id(end_page_id, pdf_page_num):
6
+ end_page_id = end_page_id if end_page_id is not None and end_page_id >= 0 else pdf_page_num - 1
7
+ if end_page_id > pdf_page_num - 1:
8
+ logger.warning("end_page_id is out of range, use images length")
9
+ end_page_id = pdf_page_num - 1
10
+ return end_page_id
mineru/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "2.6.3"
1
+ __version__ = "2.6.5"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mineru
3
- Version: 2.6.3
3
+ Version: 2.6.5
4
4
  Summary: A practical tool for converting PDF to Markdown
5
5
  License: AGPL-3.0
6
6
  Project-URL: homepage, https://mineru.net/
@@ -36,8 +36,9 @@ Requires-Dist: fast-langdetect<0.3.0,>=0.2.3
36
36
  Requires-Dist: scikit-image<1.0.0,>=0.25.0
37
37
  Requires-Dist: openai<3,>=1.70.0
38
38
  Requires-Dist: beautifulsoup4<5,>=4.13.5
39
- Requires-Dist: magika<0.7.0,>=0.6.2
40
- Requires-Dist: mineru-vl-utils<1,>=0.1.15
39
+ Requires-Dist: magika<1.1.0,>=0.6.2
40
+ Requires-Dist: mineru-vl-utils<1,>=0.1.17
41
+ Requires-Dist: qwen-vl-utils<1,>=0.0.14
41
42
  Provides-Extra: test
42
43
  Requires-Dist: mineru[core]; extra == "test"
43
44
  Requires-Dist: pytest; extra == "test"
@@ -46,10 +47,12 @@ Requires-Dist: coverage; extra == "test"
46
47
  Requires-Dist: fuzzywuzzy; extra == "test"
47
48
  Provides-Extra: vlm
48
49
  Requires-Dist: torch<3,>=2.6.0; extra == "vlm"
49
- Requires-Dist: transformers<5.0.0,>=4.51.1; extra == "vlm"
50
+ Requires-Dist: transformers!=4.57.2,<5.0.0,>=4.51.1; extra == "vlm"
50
51
  Requires-Dist: accelerate>=1.5.1; extra == "vlm"
51
52
  Provides-Extra: vllm
52
53
  Requires-Dist: vllm<0.12,>=0.10.1.1; extra == "vllm"
54
+ Provides-Extra: lmdeploy
55
+ Requires-Dist: lmdeploy<0.12,>=0.10.2; extra == "lmdeploy"
53
56
  Provides-Extra: mlx
54
57
  Requires-Dist: mlx-vlm<0.4,>=0.3.3; extra == "mlx"
55
58
  Provides-Extra: pipeline
@@ -81,7 +84,8 @@ Requires-Dist: mineru[gradio]; extra == "core"
81
84
  Requires-Dist: mineru[mlx]; sys_platform == "darwin" and extra == "core"
82
85
  Provides-Extra: all
83
86
  Requires-Dist: mineru[core]; extra == "all"
84
- Requires-Dist: mineru[vllm]; extra == "all"
87
+ Requires-Dist: mineru[vllm]; sys_platform == "linux" and extra == "all"
88
+ Requires-Dist: mineru[lmdeploy]; sys_platform == "windows" and extra == "all"
85
89
  Dynamic: license-file
86
90
 
87
91
  <div align="center" xmlns="http://www.w3.org/1999/html">
@@ -130,6 +134,13 @@ Dynamic: license-file
130
134
  </div>
131
135
 
132
136
  # Changelog
137
+ - 2025/11/26 2.6.5 Release
138
+ - Added support for a new backend vlm-lmdeploy-engine. Its usage is similar to vlm-vllm-(async)engine, but it uses lmdeploy as the inference engine and additionally supports native inference acceleration on Windows platforms compared to vllm.
139
+
140
+ - 2025/11/04 2.6.4 Release
141
+ - Added timeout configuration for PDF image rendering, default is 300 seconds, can be configured via environment variable `MINERU_PDF_RENDER_TIMEOUT` to prevent long blocking of the rendering process caused by some abnormal PDF files.
142
+ - Added CPU thread count configuration options for ONNX models, default is the system CPU core count, can be configured via environment variables `MINERU_INTRA_OP_NUM_THREADS` and `MINERU_INTER_OP_NUM_THREADS` to reduce CPU resource contention conflicts in high concurrency scenarios.
143
+
133
144
  - 2025/10/31 2.6.3 Release
134
145
  - Added support for a new backend `vlm-mlx-engine`, enabling MLX-accelerated inference for the MinerU2.5 model on Apple Silicon devices. Compared to the `vlm-transformers` backend, `vlm-mlx-engine` delivers a 100%–200% speed improvement.
135
146
  - Bug fixes: #3849, #3859
@@ -714,12 +725,13 @@ A WebUI developed based on Gradio, with a simple interface and only core parsing
714
725
  <tr>
715
726
  <th rowspan="2">Parsing Backend</th>
716
727
  <th rowspan="2">pipeline <br> (Accuracy<sup>1</sup> 82+)</th>
717
- <th colspan="4">vlm (Accuracy<sup>1</sup> 90+)</th>
728
+ <th colspan="5">vlm (Accuracy<sup>1</sup> 90+)</th>
718
729
  </tr>
719
730
  <tr>
720
731
  <th>transformers</th>
721
732
  <th>mlx-engine</th>
722
733
  <th>vllm-engine / <br>vllm-async-engine</th>
734
+ <th>lmdeploy-engine</th>
723
735
  <th>http-client</th>
724
736
  </tr>
725
737
  </thead>
@@ -730,40 +742,42 @@ A WebUI developed based on Gradio, with a simple interface and only core parsing
730
742
  <td>Good compatibility, <br>but slower</td>
731
743
  <td>Faster than transformers</td>
732
744
  <td>Fast, compatible with the vLLM ecosystem</td>
733
- <td>Suitable for OpenAI-compatible servers<sup>5</sup></td>
745
+ <td>Fast, compatible with the LMDeploy ecosystem</td>
746
+ <td>Suitable for OpenAI-compatible servers<sup>6</sup></td>
734
747
  </tr>
735
748
  <tr>
736
749
  <th>Operating System</th>
737
750
  <td colspan="2" style="text-align:center;">Linux<sup>2</sup> / Windows / macOS</td>
738
751
  <td style="text-align:center;">macOS<sup>3</sup></td>
739
752
  <td style="text-align:center;">Linux<sup>2</sup> / Windows<sup>4</sup> </td>
753
+ <td style="text-align:center;">Linux<sup>2</sup> / Windows<sup>5</sup> </td>
740
754
  <td>Any</td>
741
755
  </tr>
742
756
  <tr>
743
757
  <th>CPU inference support</th>
744
758
  <td colspan="2" style="text-align:center;">✅</td>
745
- <td colspan="2" style="text-align:center;">❌</td>
759
+ <td colspan="3" style="text-align:center;">❌</td>
746
760
  <td>Not required</td>
747
761
  </tr>
748
762
  <tr>
749
763
  <th>GPU Requirements</th><td colspan="2" style="text-align:center;">Volta or later architectures, 6 GB VRAM or more, or Apple Silicon</td>
750
764
  <td>Apple Silicon</td>
751
- <td>Volta or later architectures, 8 GB VRAM or more</td>
765
+ <td colspan="2" style="text-align:center;">Volta or later architectures, 8 GB VRAM or more</td>
752
766
  <td>Not required</td>
753
767
  </tr>
754
768
  <tr>
755
769
  <th>Memory Requirements</th>
756
- <td colspan="4" style="text-align:center;">Minimum 16 GB, 32 GB recommended</td>
770
+ <td colspan="5" style="text-align:center;">Minimum 16 GB, 32 GB recommended</td>
757
771
  <td>8 GB</td>
758
772
  </tr>
759
773
  <tr>
760
774
  <th>Disk Space Requirements</th>
761
- <td colspan="4" style="text-align:center;">20 GB or more, SSD recommended</td>
775
+ <td colspan="5" style="text-align:center;">20 GB or more, SSD recommended</td>
762
776
  <td>2 GB</td>
763
777
  </tr>
764
778
  <tr>
765
779
  <th>Python Version</th>
766
- <td colspan="5" style="text-align:center;">3.10-3.13</td>
780
+ <td colspan="6" style="text-align:center;">3.10-3.13<sup>7</sup></td>
767
781
  </tr>
768
782
  </tbody>
769
783
  </table>
@@ -772,7 +786,9 @@ A WebUI developed based on Gradio, with a simple interface and only core parsing
772
786
  <sup>2</sup> Linux supports only distributions released in 2019 or later.
773
787
  <sup>3</sup> MLX requires macOS 13.5 or later, recommended for use with version 14.0 or higher.
774
788
  <sup>4</sup> Windows vLLM support via WSL2(Windows Subsystem for Linux).
775
- <sup>5</sup> Servers compatible with the OpenAI API, such as local or remote model services deployed via inference frameworks like `vLLM`, `SGLang`, or `LMDeploy`.
789
+ <sup>5</sup> Windows LMDeploy can only use the `turbomind` backend, which is slightly slower than the `pytorch` backend. If performance is critical, it is recommended to run it via WSL2.
790
+ <sup>6</sup> Servers compatible with the OpenAI API, such as local or remote model services deployed via inference frameworks like `vLLM`, `SGLang`, or `LMDeploy`.
791
+ <sup>7</sup> Windows + LMDeploy only supports Python versions 3.10–3.12, as the critical dependency `ray` does not yet support Python 3.13 on Windows.
776
792
 
777
793
 
778
794
  ### Install MinerU
@@ -792,8 +808,8 @@ uv pip install -e .[core]
792
808
  ```
793
809
 
794
810
  > [!TIP]
795
- > `mineru[core]` includes all core features except `vLLM` acceleration, compatible with Windows / Linux / macOS systems, suitable for most users.
796
- > If you need to use `vLLM` acceleration for VLM model inference or install a lightweight client on edge devices, please refer to the documentation [Extension Modules Installation Guide](https://opendatalab.github.io/MinerU/quick_start/extension_modules/).
811
+ > `mineru[core]` includes all core features except `vLLM`/`LMDeploy` acceleration, compatible with Windows / Linux / macOS systems, suitable for most users.
812
+ > If you need to use `vLLM`/`LMDeploy` acceleration for VLM model inference or install a lightweight client on edge devices, please refer to the documentation [Extension Modules Installation Guide](https://opendatalab.github.io/MinerU/quick_start/extension_modules/).
797
813
 
798
814
  ---
799
815
 
@@ -1,5 +1,5 @@
1
1
  mineru/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
2
- mineru/version.py,sha256=uJ6TLK18jhCrL0aclBja7NzlAGLAyZjVpX-gq3d461k,22
2
+ mineru/version.py,sha256=b8L3dijps7oaMPmOpJzOuXwvOcbIuro9wWmuPwiL87o,22
3
3
  mineru/backend/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
4
4
  mineru/backend/utils.py,sha256=GLJU3IznDmhE1_qNmkU1UOtsuskIHBezgsEVO6Uar-Y,698
5
5
  mineru/backend/pipeline/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
@@ -8,22 +8,22 @@ mineru/backend/pipeline/model_init.py,sha256=OAylOcQD9gu5TBcX7nMt7X5NpJMtQICI5Iv
8
8
  mineru/backend/pipeline/model_json_to_middle_json.py,sha256=reXkUR_wKmJD64d7vRNXMxFviwkzDlGjRshpdwsVquI,10951
9
9
  mineru/backend/pipeline/model_list.py,sha256=7cXMBfZrP0K6qWueg1D_-WoUANeSINzkn_ic9E7YQLs,222
10
10
  mineru/backend/pipeline/para_split.py,sha256=Kq95MmvkPm7rKxlCSGiTvVKyF7CErHI2eGGAs5sLl0Q,17119
11
- mineru/backend/pipeline/pipeline_analyze.py,sha256=rbO5AetOdnxR5ctkoDzFCFoElkz7Jgb7gi2Ct596NK8,6655
11
+ mineru/backend/pipeline/pipeline_analyze.py,sha256=O_HGifodg03VZbmTve-U6Cmo0T03AmuK86t1v1J9X-Q,6897
12
12
  mineru/backend/pipeline/pipeline_magic_model.py,sha256=w8jGx8f6yZN0Wf2yPP3L9rYKc9rogxreZCrUJzJvPO8,14974
13
13
  mineru/backend/pipeline/pipeline_middle_json_mkcontent.py,sha256=YlnEbbUnkniZXS13aLo5mjfFQvQM5SrIVvTAGBZsLmw,14478
14
14
  mineru/backend/vlm/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
15
15
  mineru/backend/vlm/model_output_to_middle_json.py,sha256=AqYX44gS9crUO_t7SuUatD71EVjow6pI6yA2Ik3gQ0s,5139
16
- mineru/backend/vlm/utils.py,sha256=woGqyRI4S7p69daLCU07XNXWTV27aLf7YBjjVH1x-5o,2794
17
- mineru/backend/vlm/vlm_analyze.py,sha256=7c5_JN1F9YTDNNgA_Rmw6xX1PI7gcIT4A4ujtGQHH9Q,8792
16
+ mineru/backend/vlm/utils.py,sha256=taiPNKtsykImUYkkosk1CjxFIJEutygK8iZTLly-ZqU,3905
17
+ mineru/backend/vlm/vlm_analyze.py,sha256=wP3vuYGVec0hRsDAuzfSm2HD4Muu7wSWL767qxd_yqw,11690
18
18
  mineru/backend/vlm/vlm_magic_model.py,sha256=Pd0sOr7G1crAJIVeq6h_03gNSuxmV5U8dvGTGT_rrjs,23452
19
19
  mineru/backend/vlm/vlm_middle_json_mkcontent.py,sha256=5V-AU9KkxxMn0DDSQBrb15I4GVpEyiQy8uNI_tQhS6M,13498
20
20
  mineru/cli/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
21
- mineru/cli/client.py,sha256=ArnoT2psOQRnTqLpsFwPaoi-l444iIVkbBn90Pm16n8,6915
22
- mineru/cli/common.py,sha256=jxFJMdc-02UMO3SXAtcZ6aIdPrakAE6DCccZ9kDlPKc,14276
21
+ mineru/cli/client.py,sha256=__CQknekVeq6s72JzHQRPSpR7mfNaO-ob9wq6oiEj6s,7047
22
+ mineru/cli/common.py,sha256=zhNOJCOnTSMbWdUWSZG-nf0odv5vBRtdZYZ1UbUPH3g,14369
23
23
  mineru/cli/fast_api.py,sha256=t5bda769VbM5iokAboiJfPIOnm-r5GTFReE-KQy8L3g,10941
24
- mineru/cli/gradio_app.py,sha256=6dA0ARpdOoewFeXmHrleF1amCgBV9ilY_nkWAmAmN8A,14731
24
+ mineru/cli/gradio_app.py,sha256=EUPuRHHCOECrE3E3VNEeuMDYeC3nicurOYfk8YJSOMw,15646
25
25
  mineru/cli/models_download.py,sha256=LNfoIpUlJM7m7qb2SiCxtjMDw4jILBQtZwNP2JoY81U,4815
26
- mineru/cli/vlm_vllm_server.py,sha256=fQJyD-gIPQ41hR_6aIaDJczl66N310t0CiZEBAfX5mc,90
26
+ mineru/cli/vlm_server.py,sha256=27HaqO3wpMXSA_nA3CC6JOBTHK3q66SP00cD6m9HuQE,1974
27
27
  mineru/data/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
28
28
  mineru/data/data_reader_writer/__init__.py,sha256=9qnGNrsuGBMwwfsQy6oChdkz--a_LPdYWE0VZZr0yr4,490
29
29
  mineru/data/data_reader_writer/base.py,sha256=nqmAcdHOXMOJO6RAT3ILligDFaw8Op0STyCw5yOzAbI,1706
@@ -77,12 +77,12 @@ mineru/model/table/rec/slanet_plus/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQe
77
77
  mineru/model/table/rec/slanet_plus/main.py,sha256=vfrcvQ9JBf32YZU9eNoetoqdpcrFNsA1WNqQBsG8i2o,7646
78
78
  mineru/model/table/rec/slanet_plus/matcher.py,sha256=uwF-wCLaYlaQ3JQ_-YywGVl1XQYnx7G_RTuWLW8JlBk,7321
79
79
  mineru/model/table/rec/slanet_plus/matcher_utils.py,sha256=9wt_ydeeViLd57bU6g3lnXXni49qLSra2C6wSFQZkiw,9597
80
- mineru/model/table/rec/slanet_plus/table_structure.py,sha256=Ve9eUdA0ivHf5bf9gwvHHfb7-E7drJLP3S3MPlh3uZ0,3844
80
+ mineru/model/table/rec/slanet_plus/table_structure.py,sha256=qt-HPYIQyp0aWG_MmnM_sMQCV8ZLb4rALSueyCohPgM,4085
81
81
  mineru/model/table/rec/slanet_plus/table_structure_utils.py,sha256=YYSkwN2WdLx7qkWMSGkPY7yXOH5ENVhg5CsRGhtZ5Wk,19281
82
82
  mineru/model/table/rec/unet_table/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
83
83
  mineru/model/table/rec/unet_table/main.py,sha256=J13Q7_6stYyedmVedf9CZD7R0tuguGfTg3Z3ob4GDuM,15565
84
84
  mineru/model/table/rec/unet_table/table_recover.py,sha256=rSyeWyuP10M8dLKA5e0n4P2DXMYbVbmgLxEcdZA8_0E,9059
85
- mineru/model/table/rec/unet_table/table_structure_unet.py,sha256=beBMmBHAOR2lAuf2rcOKRSbFaJqwuIgMJWxWQsFmIRI,7908
85
+ mineru/model/table/rec/unet_table/table_structure_unet.py,sha256=hnmYLzZFRlK0Y4gr874G9GaLahcKnNZYNun869FdmH8,8150
86
86
  mineru/model/table/rec/unet_table/utils.py,sha256=CYAqJW0wePJk4NAemb8W203N7E32v0ujiWbxanDhd8I,16083
87
87
  mineru/model/table/rec/unet_table/utils_table_line_rec.py,sha256=zrCdPwI4M8nu0FEfd7lRJAe0z8kYq3KFbzwElM82USE,11174
88
88
  mineru/model/table/rec/unet_table/utils_table_recover.py,sha256=XksJsY82ZS0kqUnNT-jvaYzxJ3V3svMSzj0puwIau1k,10651
@@ -145,15 +145,16 @@ mineru/model/utils/tools/infer/predict_det.py,sha256=vYQREn7vELXxBsr72CCCVvm1gwV
145
145
  mineru/model/utils/tools/infer/predict_rec.py,sha256=-BH93JDisu0kT6CyHA4plUOKcb2L-UvDk7Tein5uwt4,19209
146
146
  mineru/model/utils/tools/infer/predict_system.py,sha256=hkegkn6hq2v2zqHVAP615-k-fkTS8swRYSbZeoqmSI8,3822
147
147
  mineru/model/utils/tools/infer/pytorchocr_utility.py,sha256=i1PFN-_kefJUUZ4Vk7igs1TU8gfErTDlDXY6-8Uaurw,9323
148
- mineru/model/vlm_vllm_model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
149
- mineru/model/vlm_vllm_model/server.py,sha256=w5ddusPbcVaEoWAo_BRjmwv_Ywxrc_bCMRhxihoyykY,2263
148
+ mineru/model/vlm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
149
+ mineru/model/vlm/lmdeploy_server.py,sha256=PvxJNcUIKB8VzWMDXeV1t0SHSgz_ULO36ZAzJbppz90,3262
150
+ mineru/model/vlm/vllm_server.py,sha256=w5ddusPbcVaEoWAo_BRjmwv_Ywxrc_bCMRhxihoyykY,2263
150
151
  mineru/resources/header.html,sha256=PUselBXLBn8gfeP3zwEtj6zIxfhcCN4vN_B796nQFNQ,4410
151
152
  mineru/resources/fasttext-langdetect/lid.176.ftz,sha256=jzRyz-hzintgmejpmcPL-uDc0VaWqsfXc4qAOdtgPoM,938013
152
153
  mineru/utils/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
153
154
  mineru/utils/block_pre_proc.py,sha256=uGBmxf2MR9bplTnQI8xHjCI-kj3plRhJr0hcWKidbOQ,9632
154
155
  mineru/utils/block_sort.py,sha256=5e1mOLB3W7xu5Y1hmhvGSHPL_aQ41R_4VXcP4vjYAOU,12976
155
156
  mineru/utils/boxbase.py,sha256=moP660AmZq_udHEsfvFkTQdJ4gjrrBwN7t0Enx7CIL8,6903
156
- mineru/utils/check_mac_env.py,sha256=pbmbcnS60zZRqNRBxFJbbPrCosU1lC464b0v6JUlgaE,1031
157
+ mineru/utils/check_sys_env.py,sha256=TRjzg4xWyoSGrgv4KaP225A-99xBgLAfZ1cPcGqrBAA,1191
157
158
  mineru/utils/cli_parser.py,sha256=4seFAu1kulsYnw6WM2q_cxgEOt2tErZVkI-LNEF_kGw,1445
158
159
  mineru/utils/config_reader.py,sha256=IRVWTpBnbnRpck6eXZUKw-fcLt7hon5S4uqWW-RBb1w,4075
159
160
  mineru/utils/cut_image.py,sha256=g3m4nfcJNWlxi-P0kpXTtlmspXkMcLCfGwmYuQ-Z2hE,751
@@ -168,17 +169,19 @@ mineru/utils/magic_model_utils.py,sha256=2xOvi4oqg3MSw1FUrJTnYDtWeFrrm6qbmlEorLZ
168
169
  mineru/utils/model_utils.py,sha256=6OsgFLsABX5JuShSzCMSNHWV-yi-1cjwHweafyxIgRo,18448
169
170
  mineru/utils/models_download_utils.py,sha256=UfjvwhxO6BkJHa5JSpEVNZ71GoLMPMmJpym3THET2T4,2957
170
171
  mineru/utils/ocr_utils.py,sha256=lPIrwNUib5mrzUkponRYHuUCdjV2qvETNLSzOLyflrU,15990
172
+ mineru/utils/os_env_config.py,sha256=ZNtkR4KrJW72CeIoTNzGDL6tMKv_hL8nzvWIssGWbqY,842
171
173
  mineru/utils/pdf_classify.py,sha256=6DF5pH_9Uq83fsFtp7n4i-OdYQGzoNOV9L0VBUhgBMQ,8078
172
- mineru/utils/pdf_image_tools.py,sha256=mioLEHOdDtM1YbspNaa0wWhnLw_4-H7rdHlIM40vrT4,4077
174
+ mineru/utils/pdf_image_tools.py,sha256=86_xvsGOEde5QGlKz5uJemjoO1upr6n_K7o3lCdyIjQ,7981
175
+ mineru/utils/pdf_page_id.py,sha256=em966k12CRW4Rj49RGiLB_8ILwkXPBnWRetApax3eTs,400
173
176
  mineru/utils/pdf_reader.py,sha256=WeINm5SyWBUXT0wP9lzIbeHs8P6WUIkN6nVL5X4LzG4,3267
174
177
  mineru/utils/pdf_text_tool.py,sha256=KEztjfdqsIHHuiTEAMAL7Lr1OS3R7Ur-uTqGiCRjReQ,1364
175
178
  mineru/utils/run_async.py,sha256=rPeP4BCZerR8VByRDhiYzfZiahLVqoZEBVAS54dAjNg,1286
176
179
  mineru/utils/span_block_fix.py,sha256=0eVQjJCrT03woRt9hoh6Uu42Tp1dacfGTv2x3B9qq94,8797
177
180
  mineru/utils/span_pre_proc.py,sha256=h41q2uQajI0xQbc_30hqaju1dv3oVYxBAlKgURl8HIc,13692
178
181
  mineru/utils/table_merge.py,sha256=d98zNbM1ZQ8V1kUt6RugParNUNPv7DGL-XKIzR3iJVQ,15360
179
- mineru-2.6.3.dist-info/licenses/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
180
- mineru-2.6.3.dist-info/METADATA,sha256=mUBTxFG5tgdyQ1caZVRNrk4MhIX36PECY09dwCNYXZ4,70689
181
- mineru-2.6.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
182
- mineru-2.6.3.dist-info/entry_points.txt,sha256=luXmbhPiZK_tKlRgWuYOaW_V6EFpG-yJcAevVv9MEqE,252
183
- mineru-2.6.3.dist-info/top_level.txt,sha256=zuGQfZcbsHv4I4oKI9gaKPqEWBFm6xJroKuug2LnKP8,7
184
- mineru-2.6.3.dist-info/RECORD,,
182
+ mineru-2.6.5.dist-info/licenses/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
183
+ mineru-2.6.5.dist-info/METADATA,sha256=BUj9fYR_NiRpYGqXWd3J_fOTE8IN0bdl0PgY6FUGVcg,72362
184
+ mineru-2.6.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
185
+ mineru-2.6.5.dist-info/entry_points.txt,sha256=JbtrCPhx1T32s7TONUsteKg-24ZwRT1HSiFtW5jypVw,376
186
+ mineru-2.6.5.dist-info/top_level.txt,sha256=zuGQfZcbsHv4I4oKI9gaKPqEWBFm6xJroKuug2LnKP8,7
187
+ mineru-2.6.5.dist-info/RECORD,,
@@ -2,5 +2,7 @@
2
2
  mineru = mineru.cli:client.main
3
3
  mineru-api = mineru.cli.fast_api:main
4
4
  mineru-gradio = mineru.cli.gradio_app:main
5
+ mineru-lmdeploy-server = mineru.cli.vlm_server:lmdeploy_server
5
6
  mineru-models-download = mineru.cli.models_download:download_models
6
- mineru-vllm-server = mineru.cli.vlm_vllm_server:main
7
+ mineru-openai-server = mineru.cli.vlm_server:openai_server
8
+ mineru-vllm-server = mineru.cli.vlm_server:vllm_server
@@ -1,4 +0,0 @@
1
- from mineru.model.vlm_vllm_model.server import main
2
-
3
- if __name__ == "__main__":
4
- main()
File without changes
File without changes