mineru 2.6.4__py3-none-any.whl → 2.6.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -159,7 +159,6 @@ def batch_image_analyze(
159
159
 
160
160
  model_manager = ModelSingleton()
161
161
 
162
- batch_ratio = 1
163
162
  device = get_device()
164
163
 
165
164
  if str(device).startswith('npu'):
@@ -173,25 +172,21 @@ def batch_image_analyze(
173
172
  "Please ensure that the torch_npu package is installed correctly."
174
173
  ) from e
175
174
 
176
- if str(device).startswith('npu') or str(device).startswith('cuda'):
177
- vram = get_vram(device)
178
- if vram is not None:
179
- gpu_memory = int(os.getenv('MINERU_VIRTUAL_VRAM_SIZE', round(vram)))
180
- if gpu_memory >= 16:
181
- batch_ratio = 16
182
- elif gpu_memory >= 12:
183
- batch_ratio = 8
184
- elif gpu_memory >= 8:
185
- batch_ratio = 4
186
- elif gpu_memory >= 6:
187
- batch_ratio = 2
188
- else:
189
- batch_ratio = 1
190
- logger.info(f'gpu_memory: {gpu_memory} GB, batch_ratio: {batch_ratio}')
191
- else:
192
- # Default batch_ratio when VRAM can't be determined
193
- batch_ratio = 1
194
- logger.info(f'Could not determine GPU memory, using default batch_ratio: {batch_ratio}')
175
+ gpu_memory = get_vram(device)
176
+ if gpu_memory >= 16:
177
+ batch_ratio = 16
178
+ elif gpu_memory >= 12:
179
+ batch_ratio = 8
180
+ elif gpu_memory >= 8:
181
+ batch_ratio = 4
182
+ elif gpu_memory >= 6:
183
+ batch_ratio = 2
184
+ else:
185
+ batch_ratio = 1
186
+ logger.info(
187
+ f'GPU Memory: {gpu_memory} GB, Batch Ratio: {batch_ratio}. '
188
+ f'You can set MINERU_VIRTUAL_VRAM_SIZE environment variable to adjust GPU memory allocation.'
189
+ )
195
190
 
196
191
  # 检测torch的版本号
197
192
  import torch
@@ -3,6 +3,7 @@ import os
3
3
  from loguru import logger
4
4
  from packaging import version
5
5
 
6
+ from mineru.utils.check_sys_env import is_windows_environment, is_linux_environment
6
7
  from mineru.utils.config_reader import get_device
7
8
  from mineru.utils.model_utils import get_vram
8
9
 
@@ -11,14 +12,16 @@ def enable_custom_logits_processors() -> bool:
11
12
  import torch
12
13
  from vllm import __version__ as vllm_version
13
14
 
14
- if not torch.cuda.is_available():
15
+ if torch.cuda.is_available():
16
+ major, minor = torch.cuda.get_device_capability()
17
+ # 正确计算Compute Capability
18
+ compute_capability = f"{major}.{minor}"
19
+ elif hasattr(torch, 'npu') and torch.npu.is_available():
20
+ compute_capability = "8.0"
21
+ else:
15
22
  logger.info("CUDA not available, disabling custom_logits_processors")
16
23
  return False
17
24
 
18
- major, minor = torch.cuda.get_device_capability()
19
- # 正确计算Compute Capability
20
- compute_capability = f"{major}.{minor}"
21
-
22
25
  # 安全地处理环境变量
23
26
  vllm_use_v1_str = os.getenv('VLLM_USE_V1', "1")
24
27
  if vllm_use_v1_str.isdigit():
@@ -44,6 +47,29 @@ def enable_custom_logits_processors() -> bool:
44
47
  return True
45
48
 
46
49
 
50
+ def set_lmdeploy_backend(device_type: str) -> str:
51
+ if device_type.lower() in ["ascend", "maca", "camb"]:
52
+ lmdeploy_backend = "pytorch"
53
+ elif device_type.lower() in ["cuda"]:
54
+ import torch
55
+ if not torch.cuda.is_available():
56
+ raise ValueError("CUDA is not available.")
57
+ if is_windows_environment():
58
+ lmdeploy_backend = "turbomind"
59
+ elif is_linux_environment():
60
+ major, minor = torch.cuda.get_device_capability()
61
+ compute_capability = f"{major}.{minor}"
62
+ if version.parse(compute_capability) >= version.parse("8.0"):
63
+ lmdeploy_backend = "pytorch"
64
+ else:
65
+ lmdeploy_backend = "turbomind"
66
+ else:
67
+ raise ValueError("Unsupported operating system.")
68
+ else:
69
+ raise ValueError(f"Unsupported lmdeploy device type: {device_type}")
70
+ return lmdeploy_backend
71
+
72
+
47
73
  def set_default_gpu_memory_utilization() -> float:
48
74
  from vllm import __version__ as vllm_version
49
75
  if version.parse(vllm_version) >= version.parse("0.11.0"):
@@ -55,20 +81,16 @@ def set_default_gpu_memory_utilization() -> float:
55
81
  def set_default_batch_size() -> int:
56
82
  try:
57
83
  device = get_device()
58
- vram = get_vram(device)
59
- if vram is not None:
60
- gpu_memory = int(os.getenv('MINERU_VIRTUAL_VRAM_SIZE', round(vram)))
61
- if gpu_memory >= 16:
62
- batch_size = 8
63
- elif gpu_memory >= 8:
64
- batch_size = 4
65
- else:
66
- batch_size = 1
67
- logger.info(f'gpu_memory: {gpu_memory} GB, batch_size: {batch_size}')
84
+ gpu_memory = get_vram(device)
85
+
86
+ if gpu_memory >= 16:
87
+ batch_size = 8
88
+ elif gpu_memory >= 8:
89
+ batch_size = 4
68
90
  else:
69
- # Default batch_ratio when VRAM can't be determined
70
91
  batch_size = 1
71
- logger.info(f'Could not determine GPU memory, using default batch_ratio: {batch_size}')
92
+ logger.info(f'gpu_memory: {gpu_memory} GB, batch_size: {batch_size}')
93
+
72
94
  except Exception as e:
73
95
  logger.warning(f'Error determining VRAM: {e}, using default batch_ratio: 1')
74
96
  batch_size = 1
@@ -4,7 +4,8 @@ import time
4
4
 
5
5
  from loguru import logger
6
6
 
7
- from .utils import enable_custom_logits_processors, set_default_gpu_memory_utilization, set_default_batch_size
7
+ from .utils import enable_custom_logits_processors, set_default_gpu_memory_utilization, set_default_batch_size, \
8
+ set_lmdeploy_backend
8
9
  from .model_output_to_middle_json import result_to_middle_json
9
10
  from ...data.data_reader_writer import DataWriter
10
11
  from mineru.utils.pdf_image_tools import load_images_from_pdf
@@ -40,94 +41,149 @@ class ModelSingleton:
40
41
  model = None
41
42
  processor = None
42
43
  vllm_llm = None
44
+ lmdeploy_engine = None
43
45
  vllm_async_llm = None
44
46
  batch_size = kwargs.get("batch_size", 0) # for transformers backend only
45
47
  max_concurrency = kwargs.get("max_concurrency", 100) # for http-client backend only
46
48
  http_timeout = kwargs.get("http_timeout", 600) # for http-client backend only
49
+ server_headers = kwargs.get("server_headers", None) # for http-client backend only
50
+ max_retries = kwargs.get("max_retries", 3) # for http-client backend only
51
+ retry_backoff_factor = kwargs.get("retry_backoff_factor", 0.5) # for http-client backend only
47
52
  # 从kwargs中移除这些参数,避免传递给不相关的初始化函数
48
- for param in ["batch_size", "max_concurrency", "http_timeout"]:
53
+ for param in ["batch_size", "max_concurrency", "http_timeout", "server_headers", "max_retries", "retry_backoff_factor"]:
49
54
  if param in kwargs:
50
55
  del kwargs[param]
51
- if backend in ['transformers', 'vllm-engine', "vllm-async-engine", "mlx-engine"] and not model_path:
56
+ if backend not in ["http-client"] and not model_path:
52
57
  model_path = auto_download_and_get_model_root_path("/","vlm")
53
- if backend == "transformers":
58
+ if backend == "transformers":
59
+ try:
60
+ from transformers import (
61
+ AutoProcessor,
62
+ Qwen2VLForConditionalGeneration,
63
+ )
64
+ from transformers import __version__ as transformers_version
65
+ except ImportError:
66
+ raise ImportError("Please install transformers to use the transformers backend.")
67
+
68
+ if version.parse(transformers_version) >= version.parse("4.56.0"):
69
+ dtype_key = "dtype"
70
+ else:
71
+ dtype_key = "torch_dtype"
72
+ device = get_device()
73
+ model = Qwen2VLForConditionalGeneration.from_pretrained(
74
+ model_path,
75
+ device_map={"": device},
76
+ **{dtype_key: "auto"}, # type: ignore
77
+ )
78
+ processor = AutoProcessor.from_pretrained(
79
+ model_path,
80
+ use_fast=True,
81
+ )
82
+ if batch_size == 0:
83
+ batch_size = set_default_batch_size()
84
+ elif backend == "mlx-engine":
85
+ mlx_supported = is_mac_os_version_supported()
86
+ if not mlx_supported:
87
+ raise EnvironmentError("mlx-engine backend is only supported on macOS 13.5+ with Apple Silicon.")
88
+ try:
89
+ from mlx_vlm import load as mlx_load
90
+ except ImportError:
91
+ raise ImportError("Please install mlx-vlm to use the mlx-engine backend.")
92
+ model, processor = mlx_load(model_path)
93
+ else:
94
+ if os.getenv('OMP_NUM_THREADS') is None:
95
+ os.environ["OMP_NUM_THREADS"] = "1"
96
+
97
+ if backend == "vllm-engine":
54
98
  try:
55
- from transformers import (
56
- AutoProcessor,
57
- Qwen2VLForConditionalGeneration,
58
- )
59
- from transformers import __version__ as transformers_version
99
+ import vllm
60
100
  except ImportError:
61
- raise ImportError("Please install transformers to use the transformers backend.")
62
-
63
- if version.parse(transformers_version) >= version.parse("4.56.0"):
64
- dtype_key = "dtype"
101
+ raise ImportError("Please install vllm to use the vllm-engine backend.")
102
+ if "gpu_memory_utilization" not in kwargs:
103
+ kwargs["gpu_memory_utilization"] = set_default_gpu_memory_utilization()
104
+ if "model" not in kwargs:
105
+ kwargs["model"] = model_path
106
+ if enable_custom_logits_processors() and ("logits_processors" not in kwargs):
107
+ from mineru_vl_utils import MinerULogitsProcessor
108
+ kwargs["logits_processors"] = [MinerULogitsProcessor]
109
+ # 使用kwargs为 vllm初始化参数
110
+ vllm_llm = vllm.LLM(**kwargs)
111
+ elif backend == "vllm-async-engine":
112
+ try:
113
+ from vllm.engine.arg_utils import AsyncEngineArgs
114
+ from vllm.v1.engine.async_llm import AsyncLLM
115
+ except ImportError:
116
+ raise ImportError("Please install vllm to use the vllm-async-engine backend.")
117
+ if "gpu_memory_utilization" not in kwargs:
118
+ kwargs["gpu_memory_utilization"] = set_default_gpu_memory_utilization()
119
+ if "model" not in kwargs:
120
+ kwargs["model"] = model_path
121
+ if enable_custom_logits_processors() and ("logits_processors" not in kwargs):
122
+ from mineru_vl_utils import MinerULogitsProcessor
123
+ kwargs["logits_processors"] = [MinerULogitsProcessor]
124
+ # 使用kwargs为 vllm初始化参数
125
+ vllm_async_llm = AsyncLLM.from_engine_args(AsyncEngineArgs(**kwargs))
126
+ elif backend == "lmdeploy-engine":
127
+ try:
128
+ from lmdeploy import PytorchEngineConfig, TurbomindEngineConfig
129
+ from lmdeploy.serve.vl_async_engine import VLAsyncEngine
130
+ except ImportError:
131
+ raise ImportError("Please install lmdeploy to use the lmdeploy-engine backend.")
132
+ if "cache_max_entry_count" not in kwargs:
133
+ kwargs["cache_max_entry_count"] = 0.5
134
+
135
+ device_type = os.getenv("MINERU_LMDEPLOY_DEVICE", "")
136
+ if device_type == "":
137
+ if "lmdeploy_device" in kwargs:
138
+ device_type = kwargs.pop("lmdeploy_device")
139
+ if device_type not in ["cuda", "ascend", "maca", "camb"]:
140
+ raise ValueError(f"Unsupported lmdeploy device type: {device_type}")
141
+ else:
142
+ device_type = "cuda"
143
+ lm_backend = os.getenv("MINERU_LMDEPLOY_BACKEND", "")
144
+ if lm_backend == "":
145
+ if "lmdeploy_backend" in kwargs:
146
+ lm_backend = kwargs.pop("lmdeploy_backend")
147
+ if lm_backend not in ["pytorch", "turbomind"]:
148
+ raise ValueError(f"Unsupported lmdeploy backend: {lm_backend}")
149
+ else:
150
+ lm_backend = set_lmdeploy_backend(device_type)
151
+ logger.info(f"lmdeploy device is: {device_type}, lmdeploy backend is: {lm_backend}")
152
+
153
+ if lm_backend == "pytorch":
154
+ kwargs["device_type"] = device_type
155
+ backend_config = PytorchEngineConfig(**kwargs)
156
+ elif lm_backend == "turbomind":
157
+ backend_config = TurbomindEngineConfig(**kwargs)
65
158
  else:
66
- dtype_key = "torch_dtype"
67
- device = get_device()
68
- model = Qwen2VLForConditionalGeneration.from_pretrained(
69
- model_path,
70
- device_map={"": device},
71
- **{dtype_key: "auto"}, # type: ignore
72
- )
73
- processor = AutoProcessor.from_pretrained(
159
+ raise ValueError(f"Unsupported lmdeploy backend: {lm_backend}")
160
+
161
+ log_level = 'ERROR'
162
+ from lmdeploy.utils import get_logger
163
+ lm_logger = get_logger('lmdeploy')
164
+ lm_logger.setLevel(log_level)
165
+ if os.getenv('TM_LOG_LEVEL') is None:
166
+ os.environ['TM_LOG_LEVEL'] = log_level
167
+
168
+ lmdeploy_engine = VLAsyncEngine(
74
169
  model_path,
75
- use_fast=True,
170
+ backend=lm_backend,
171
+ backend_config=backend_config,
76
172
  )
77
- if batch_size == 0:
78
- batch_size = set_default_batch_size()
79
- elif backend == "mlx-engine":
80
- mlx_supported = is_mac_os_version_supported()
81
- if not mlx_supported:
82
- raise EnvironmentError("mlx-engine backend is only supported on macOS 13.5+ with Apple Silicon.")
83
- try:
84
- from mlx_vlm import load as mlx_load
85
- except ImportError:
86
- raise ImportError("Please install mlx-vlm to use the mlx-engine backend.")
87
- model, processor = mlx_load(model_path)
88
- else:
89
- if os.getenv('OMP_NUM_THREADS') is None:
90
- os.environ["OMP_NUM_THREADS"] = "1"
91
-
92
- if backend == "vllm-engine":
93
- try:
94
- import vllm
95
- from mineru_vl_utils import MinerULogitsProcessor
96
- except ImportError:
97
- raise ImportError("Please install vllm to use the vllm-engine backend.")
98
- if "gpu_memory_utilization" not in kwargs:
99
- kwargs["gpu_memory_utilization"] = set_default_gpu_memory_utilization()
100
- if "model" not in kwargs:
101
- kwargs["model"] = model_path
102
- if enable_custom_logits_processors() and ("logits_processors" not in kwargs):
103
- kwargs["logits_processors"] = [MinerULogitsProcessor]
104
- # 使用kwargs为 vllm初始化参数
105
- vllm_llm = vllm.LLM(**kwargs)
106
- elif backend == "vllm-async-engine":
107
- try:
108
- from vllm.engine.arg_utils import AsyncEngineArgs
109
- from vllm.v1.engine.async_llm import AsyncLLM
110
- from mineru_vl_utils import MinerULogitsProcessor
111
- except ImportError:
112
- raise ImportError("Please install vllm to use the vllm-async-engine backend.")
113
- if "gpu_memory_utilization" not in kwargs:
114
- kwargs["gpu_memory_utilization"] = set_default_gpu_memory_utilization()
115
- if "model" not in kwargs:
116
- kwargs["model"] = model_path
117
- if enable_custom_logits_processors() and ("logits_processors" not in kwargs):
118
- kwargs["logits_processors"] = [MinerULogitsProcessor]
119
- # 使用kwargs为 vllm初始化参数
120
- vllm_async_llm = AsyncLLM.from_engine_args(AsyncEngineArgs(**kwargs))
121
173
  self._models[key] = MinerUClient(
122
174
  backend=backend,
123
175
  model=model,
124
176
  processor=processor,
177
+ lmdeploy_engine=lmdeploy_engine,
125
178
  vllm_llm=vllm_llm,
126
179
  vllm_async_llm=vllm_async_llm,
127
180
  server_url=server_url,
128
181
  batch_size=batch_size,
129
182
  max_concurrency=max_concurrency,
130
183
  http_timeout=http_timeout,
184
+ server_headers=server_headers,
185
+ max_retries=max_retries,
186
+ retry_backoff_factor=retry_backoff_factor,
131
187
  )
132
188
  elapsed = round(time.time() - start_time, 2)
133
189
  logger.info(f"get {backend} predictor cost: {elapsed}s")
mineru/cli/client.py CHANGED
@@ -13,7 +13,7 @@ from ..version import __version__
13
13
  from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
14
14
 
15
15
 
16
- backends = ['pipeline', 'vlm-transformers', 'vlm-vllm-engine', 'vlm-http-client']
16
+ backends = ['pipeline', 'vlm-transformers', 'vlm-vllm-engine', 'vlm-lmdeploy-engine', 'vlm-http-client']
17
17
  if is_mac_os_version_supported():
18
18
  backends.append("vlm-mlx-engine")
19
19
 
@@ -62,9 +62,10 @@ if is_mac_os_version_supported():
62
62
  the backend for parsing pdf:
63
63
  pipeline: More general.
64
64
  vlm-transformers: More general, but slower.
65
- vlm-mlx-engine: Faster than transformers.
66
- vlm-vllm-engine: Faster(engine).
67
- vlm-http-client: Faster(client).
65
+ vlm-mlx-engine: Faster than transformers(macOS 13.5+).
66
+ vlm-vllm-engine: Faster(vllm-engine).
67
+ vlm-lmdeploy-engine: Faster(lmdeploy-engine).
68
+ vlm-http-client: Faster(client suitable for openai-compatible servers).
68
69
  Without method specified, pipeline will be used by default.""",
69
70
  default='pipeline',
70
71
  )
@@ -112,7 +113,7 @@ if is_mac_os_version_supported():
112
113
  '--formula',
113
114
  'formula_enable',
114
115
  type=bool,
115
- help='Enable formula parsing. Default is True. Adapted only for the case where the backend is set to "pipeline".',
116
+ help='Enable formula parsing. Default is True. ',
116
117
  default=True,
117
118
  )
118
119
  @click.option(
@@ -120,7 +121,7 @@ if is_mac_os_version_supported():
120
121
  '--table',
121
122
  'table_enable',
122
123
  type=bool,
123
- help='Enable table parsing. Default is True. Adapted only for the case where the backend is set to "pipeline".',
124
+ help='Enable table parsing. Default is True. ',
124
125
  default=True,
125
126
  )
126
127
  @click.option(
@@ -171,9 +172,8 @@ def main(
171
172
  def get_virtual_vram_size() -> int:
172
173
  if virtual_vram is not None:
173
174
  return virtual_vram
174
- if get_device_mode().startswith("cuda") or get_device_mode().startswith("npu"):
175
- return round(get_vram(get_device_mode()))
176
- return 1
175
+ else:
176
+ return get_vram(get_device_mode())
177
177
  if os.getenv('MINERU_VIRTUAL_VRAM_SIZE', None) is None:
178
178
  os.environ['MINERU_VIRTUAL_VRAM_SIZE']= str(get_virtual_vram_size())
179
179
 
mineru/cli/common.py CHANGED
@@ -18,6 +18,11 @@ from mineru.backend.vlm.vlm_analyze import doc_analyze as vlm_doc_analyze
18
18
  from mineru.backend.vlm.vlm_analyze import aio_doc_analyze as aio_vlm_doc_analyze
19
19
  from mineru.utils.pdf_page_id import get_end_page_id
20
20
 
21
+ if os.getenv("MINERU_LMDEPLOY_DEVICE", "") == "maca":
22
+ import torch
23
+ torch.backends.cudnn.enabled = False
24
+
25
+
21
26
  pdf_suffixes = ["pdf"]
22
27
  image_suffixes = ["png", "jpeg", "jp2", "webp", "gif", "bmp", "jpg", "tiff"]
23
28
 
@@ -51,11 +56,16 @@ def convert_pdf_bytes_to_bytes_by_pypdfium2(pdf_bytes, start_page_id=0, end_page
51
56
  try:
52
57
  end_page_id = get_end_page_id(end_page_id, len(pdf))
53
58
 
54
- # 选择要导入的页面索引
55
- page_indices = list(range(start_page_id, end_page_id + 1))
56
-
57
- # 从原PDF导入页面到新PDF
58
- output_pdf.import_pages(pdf, page_indices)
59
+ # 逐页导入,失败则跳过
60
+ output_index = 0
61
+ for page_index in range(start_page_id, end_page_id + 1):
62
+ try:
63
+ output_pdf.import_pages(pdf, pages=[page_index])
64
+ output_index += 1
65
+ except Exception as page_error:
66
+ output_pdf.del_page(output_index)
67
+ logger.warning(f"Failed to import page {page_index}: {page_error}, skipping this page.")
68
+ continue
59
69
 
60
70
  # 将新PDF保存到内存缓冲区
61
71
  output_buffer = io.BytesIO()
@@ -66,7 +76,6 @@ def convert_pdf_bytes_to_bytes_by_pypdfium2(pdf_bytes, start_page_id=0, end_page
66
76
  except Exception as e:
67
77
  logger.warning(f"Error in converting PDF bytes: {e}, Using original PDF bytes.")
68
78
  output_bytes = pdf_bytes
69
-
70
79
  pdf.close()
71
80
  output_pdf.close()
72
81
  return output_bytes
mineru/cli/fast_api.py CHANGED
@@ -8,7 +8,7 @@ import click
8
8
  import zipfile
9
9
  from pathlib import Path
10
10
  import glob
11
- from fastapi import FastAPI, UploadFile, File, Form
11
+ from fastapi import Depends, FastAPI, HTTPException, UploadFile, File, Form
12
12
  from fastapi.middleware.gzip import GZipMiddleware
13
13
  from fastapi.responses import JSONResponse, FileResponse
14
14
  from starlette.background import BackgroundTask
@@ -21,14 +21,53 @@ from mineru.utils.cli_parser import arg_parse
21
21
  from mineru.utils.guess_suffix_or_lang import guess_suffix_by_path
22
22
  from mineru.version import __version__
23
23
 
24
- app = FastAPI()
25
- app.add_middleware(GZipMiddleware, minimum_size=1000)
24
+ # 并发控制器
25
+ _request_semaphore: Optional[asyncio.Semaphore] = None
26
+
27
+ # 并发控制依赖函数
28
+ async def limit_concurrency():
29
+ if _request_semaphore is not None:
30
+ if _request_semaphore.locked():
31
+ raise HTTPException(
32
+ status_code=503,
33
+ detail=f"Server is at maximum capacity: {os.getenv('MINERU_API_MAX_CONCURRENT_REQUESTS', 'unset')}. Please try again later."
34
+ )
35
+ async with _request_semaphore:
36
+ yield
37
+ else:
38
+ yield
39
+
40
+ def create_app():
41
+ # By default, the OpenAPI documentation endpoints (openapi_url, docs_url, redoc_url) are enabled.
42
+ # To disable the FastAPI docs and schema endpoints, set the environment variable MINERU_API_ENABLE_FASTAPI_DOCS=0.
43
+ enable_docs = str(os.getenv("MINERU_API_ENABLE_FASTAPI_DOCS", "1")).lower() in ("1", "true", "yes")
44
+ app = FastAPI(
45
+ openapi_url="/openapi.json" if enable_docs else None,
46
+ docs_url="/docs" if enable_docs else None,
47
+ redoc_url="/redoc" if enable_docs else None,
48
+ )
49
+
50
+ # 初始化并发控制器:从环境变量MINERU_API_MAX_CONCURRENT_REQUESTS读取
51
+ global _request_semaphore
52
+ try:
53
+ max_concurrent_requests = int(os.getenv("MINERU_API_MAX_CONCURRENT_REQUESTS", "0"))
54
+ except ValueError:
55
+ max_concurrent_requests = 0
56
+
57
+ if max_concurrent_requests > 0:
58
+ _request_semaphore = asyncio.Semaphore(max_concurrent_requests)
59
+ logger.info(f"Request concurrency limited to {max_concurrent_requests}")
60
+
61
+ app.add_middleware(GZipMiddleware, minimum_size=1000)
62
+ return app
63
+
64
+ app = create_app()
26
65
 
27
66
 
28
67
  def sanitize_filename(filename: str) -> str:
29
68
  """
30
69
  格式化压缩文件的文件名
31
- 移除路径遍历字符, 保留 Unicode 字母、数字、._-
70
+ 移除路径遍历字符, 保留 Unicode 字母、数字、._-
32
71
  禁止隐藏文件
33
72
  """
34
73
  sanitized = re.sub(r'[/\\\.]{2,}|[/\\]', '', filename)
@@ -60,24 +99,48 @@ def get_infer_result(file_suffix_identifier: str, pdf_name: str, parse_dir: str)
60
99
  return None
61
100
 
62
101
 
63
- @app.post(path="/file_parse",)
102
+ @app.post(path="/file_parse", dependencies=[Depends(limit_concurrency)])
64
103
  async def parse_pdf(
65
- files: List[UploadFile] = File(...),
66
- output_dir: str = Form("./output"),
67
- lang_list: List[str] = Form(["ch"]),
68
- backend: str = Form("pipeline"),
69
- parse_method: str = Form("auto"),
70
- formula_enable: bool = Form(True),
71
- table_enable: bool = Form(True),
72
- server_url: Optional[str] = Form(None),
73
- return_md: bool = Form(True),
74
- return_middle_json: bool = Form(False),
75
- return_model_output: bool = Form(False),
76
- return_content_list: bool = Form(False),
77
- return_images: bool = Form(False),
78
- response_format_zip: bool = Form(False),
79
- start_page_id: int = Form(0),
80
- end_page_id: int = Form(99999),
104
+ files: List[UploadFile] = File(..., description="Upload pdf or image files for parsing"),
105
+ output_dir: str = Form("./output", description="Output local directory"),
106
+ lang_list: List[str] = Form(
107
+ ["ch"],
108
+ description="""(Adapted only for pipeline backend)Input the languages in the pdf to improve OCR accuracy.
109
+ Options: ch, ch_server, ch_lite, en, korean, japan, chinese_cht, ta, te, ka, th, el, latin, arabic, east_slavic, cyrillic, devanagari.
110
+ """
111
+ ),
112
+ backend: str = Form(
113
+ "pipeline",
114
+ description="""The backend for parsing:
115
+ - pipeline: More general
116
+ - vlm-transformers: More general, but slower
117
+ - vlm-mlx-engine: Faster than transformers (need apple silicon and macOS 13.5+)
118
+ - vlm-vllm-async-engine: Faster (vllm-engine, need vllm installed)
119
+ - vlm-lmdeploy-engine: Faster (lmdeploy-engine, need lmdeploy installed)
120
+ - vlm-http-client: Faster (client suitable for openai-compatible servers)"""
121
+ ),
122
+ parse_method: str = Form(
123
+ "auto",
124
+ description="""(Adapted only for pipeline backend)The method for parsing PDF:
125
+ - auto: Automatically determine the method based on the file type
126
+ - txt: Use text extraction method
127
+ - ocr: Use OCR method for image-based PDFs
128
+ """
129
+ ),
130
+ formula_enable: bool = Form(True, description="Enable formula parsing."),
131
+ table_enable: bool = Form(True, description="Enable table parsing."),
132
+ server_url: Optional[str] = Form(
133
+ None,
134
+ description="(Adapted only for vlm-http-client backend)openai compatible server url, e.g., http://127.0.0.1:30000"
135
+ ),
136
+ return_md: bool = Form(True, description="Return markdown content in response"),
137
+ return_middle_json: bool = Form(False, description="Return middle JSON in response"),
138
+ return_model_output: bool = Form(False, description="Return model output JSON in response"),
139
+ return_content_list: bool = Form(False, description="Return content list JSON in response"),
140
+ return_images: bool = Form(False, description="Return extracted images in response"),
141
+ response_format_zip: bool = Form(False, description="Return results as a ZIP file instead of JSON"),
142
+ start_page_id: int = Form(0, description="The starting page for PDF parsing, beginning from 0"),
143
+ end_page_id: int = Form(99999, description="The ending page for PDF parsing, beginning from 0"),
81
144
  ):
82
145
 
83
146
  # 获取命令行配置参数
@@ -153,7 +216,7 @@ async def parse_pdf(
153
216
  # 根据 response_format_zip 决定返回类型
154
217
  if response_format_zip:
155
218
  zip_fd, zip_path = tempfile.mkstemp(suffix=".zip", prefix="mineru_results_")
156
- os.close(zip_fd)
219
+ os.close(zip_fd)
157
220
  with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
158
221
  for pdf_name in pdf_file_names:
159
222
  safe_pdf_name = sanitize_filename(pdf_name)
@@ -178,7 +241,7 @@ async def parse_pdf(
178
241
 
179
242
  if return_model_output:
180
243
  path = os.path.join(parse_dir, f"{pdf_name}_model.json")
181
- if os.path.exists(path):
244
+ if os.path.exists(path):
182
245
  zf.write(path, arcname=os.path.join(safe_pdf_name, os.path.basename(path)))
183
246
 
184
247
  if return_content_list:
@@ -259,11 +322,16 @@ def main(ctx, host, port, reload, **kwargs):
259
322
  # 将配置参数存储到应用状态中
260
323
  app.state.config = kwargs
261
324
 
325
+ # 将 CLI 的并发参数同步到环境变量,确保 uvicorn 重载子进程可见
326
+ try:
327
+ mcr = int(kwargs.get("mineru_api_max_concurrent_requests", 0) or 0)
328
+ except ValueError:
329
+ mcr = 0
330
+ os.environ["MINERU_API_MAX_CONCURRENT_REQUESTS"] = str(mcr)
331
+
262
332
  """启动MinerU FastAPI服务器的命令行入口"""
263
333
  print(f"Start MinerU FastAPI Service: http://{host}:{port}")
264
- print("The API documentation can be accessed at the following address:")
265
- print(f"- Swagger UI: http://{host}:{port}/docs")
266
- print(f"- ReDoc: http://{host}:{port}/redoc")
334
+ print(f"API documentation: http://{host}:{port}/docs")
267
335
 
268
336
  uvicorn.run(
269
337
  "mineru.cli.fast_api:app",
@@ -274,4 +342,4 @@ def main(ctx, host, port, reload, **kwargs):
274
342
 
275
343
 
276
344
  if __name__ == "__main__":
277
- main()
345
+ main()
mineru/cli/gradio_app.py CHANGED
@@ -274,7 +274,7 @@ def to_pdf(file_path):
274
274
 
275
275
  # 更新界面函数
276
276
  def update_interface(backend_choice):
277
- if backend_choice in ["vlm-transformers", "vlm-vllm-async-engine", "vlm-mlx-engine"]:
277
+ if backend_choice in ["vlm-transformers", "vlm-vllm-async-engine", "vlm-lmdeploy-engine", "vlm-mlx-engine"]:
278
278
  return gr.update(visible=False), gr.update(visible=False)
279
279
  elif backend_choice in ["vlm-http-client"]:
280
280
  return gr.update(visible=True), gr.update(visible=False)
@@ -301,6 +301,13 @@ def update_interface(backend_choice):
301
301
  help="Enable vLLM engine backend for faster processing.",
302
302
  default=False,
303
303
  )
304
+ @click.option(
305
+ '--enable-lmdeploy-engine',
306
+ 'lmdeploy_engine_enable',
307
+ type=bool,
308
+ help="Enable LMDeploy engine backend for faster processing.",
309
+ default=False,
310
+ )
304
311
  @click.option(
305
312
  '--enable-api',
306
313
  'api_enable',
@@ -338,7 +345,7 @@ def update_interface(backend_choice):
338
345
  default='all',
339
346
  )
340
347
  def main(ctx,
341
- example_enable, vllm_engine_enable, api_enable, max_convert_pages,
348
+ example_enable, vllm_engine_enable, lmdeploy_engine_enable, api_enable, max_convert_pages,
342
349
  server_name, server_port, latex_delimiters_type, **kwargs
343
350
  ):
344
351
 
@@ -367,6 +374,20 @@ def main(ctx,
367
374
  print("vLLM engine init successfully.")
368
375
  except Exception as e:
369
376
  logger.exception(e)
377
+ elif lmdeploy_engine_enable:
378
+ try:
379
+ print("Start init LMDeploy engine...")
380
+ from mineru.backend.vlm.vlm_analyze import ModelSingleton
381
+ model_singleton = ModelSingleton()
382
+ predictor = model_singleton.get_model(
383
+ "lmdeploy-engine",
384
+ None,
385
+ None,
386
+ **kwargs
387
+ )
388
+ print("LMDeploy engine init successfully.")
389
+ except Exception as e:
390
+ logger.exception(e)
370
391
  suffixes = [f".{suffix}" for suffix in pdf_suffixes + image_suffixes]
371
392
  with gr.Blocks() as demo:
372
393
  gr.HTML(header)
@@ -380,6 +401,9 @@ def main(ctx,
380
401
  if vllm_engine_enable:
381
402
  drop_list = ["pipeline", "vlm-vllm-async-engine"]
382
403
  preferred_option = "vlm-vllm-async-engine"
404
+ elif lmdeploy_engine_enable:
405
+ drop_list = ["pipeline", "vlm-lmdeploy-engine"]
406
+ preferred_option = "vlm-lmdeploy-engine"
383
407
  else:
384
408
  drop_list = ["pipeline", "vlm-transformers", "vlm-http-client"]
385
409
  if is_mac_os_version_supported():
@@ -453,4 +477,4 @@ def main(ctx,
453
477
 
454
478
 
455
479
  if __name__ == '__main__':
456
- main()
480
+ main()
@@ -0,0 +1,61 @@
1
+ import click
2
+ import sys
3
+
4
+ from loguru import logger
5
+
6
+
7
+ def vllm_server():
8
+ from mineru.model.vlm.vllm_server import main
9
+ main()
10
+
11
+
12
+ def lmdeploy_server():
13
+ from mineru.model.vlm.lmdeploy_server import main
14
+ main()
15
+
16
+
17
+ @click.command(context_settings=dict(ignore_unknown_options=True, allow_extra_args=True))
18
+ @click.option(
19
+ '-e',
20
+ '--engine',
21
+ 'inference_engine',
22
+ type=click.Choice(['auto', 'vllm', 'lmdeploy']),
23
+ default='auto',
24
+ help='Select the inference engine used to accelerate VLM inference, default is "auto".',
25
+ )
26
+ @click.pass_context
27
+ def openai_server(ctx, inference_engine):
28
+ sys.argv = [sys.argv[0]] + ctx.args
29
+ if inference_engine == 'auto':
30
+ try:
31
+ import vllm
32
+ inference_engine = 'vllm'
33
+ logger.info("Using vLLM as the inference engine for VLM server.")
34
+ except ImportError:
35
+ logger.info("vLLM not found, attempting to use LMDeploy as the inference engine for VLM server.")
36
+ try:
37
+ import lmdeploy
38
+ inference_engine = 'lmdeploy'
39
+ # Success message moved after successful import
40
+ logger.info("Using LMDeploy as the inference engine for VLM server.")
41
+ except ImportError:
42
+ logger.error("Neither vLLM nor LMDeploy is installed. Please install at least one of them.")
43
+ sys.exit(1)
44
+
45
+ if inference_engine == 'vllm':
46
+ try:
47
+ import vllm
48
+ except ImportError:
49
+ logger.error("vLLM is not installed. Please install vLLM or choose LMDeploy as the inference engine.")
50
+ sys.exit(1)
51
+ vllm_server()
52
+ elif inference_engine == 'lmdeploy':
53
+ try:
54
+ import lmdeploy
55
+ except ImportError:
56
+ logger.error("LMDeploy is not installed. Please install LMDeploy or choose vLLM as the inference engine.")
57
+ sys.exit(1)
58
+ lmdeploy_server()
59
+
60
+ if __name__ == "__main__":
61
+ openai_server()
@@ -0,0 +1,93 @@
1
+ import os
2
+ import sys
3
+
4
+ from loguru import logger
5
+
6
+ from mineru.backend.vlm.utils import set_lmdeploy_backend
7
+ from mineru.utils.models_download_utils import auto_download_and_get_model_root_path
8
+
9
+
10
+ def main():
11
+ args = sys.argv[1:]
12
+
13
+ has_port_arg = False
14
+ has_gpu_memory_utilization_arg = False
15
+ has_log_level_arg = False
16
+ device_type = ""
17
+ lm_backend = ""
18
+
19
+ # 检查现有参数
20
+ indices_to_remove = []
21
+
22
+ for i, arg in enumerate(args):
23
+ if arg == "--server-port" or arg.startswith("--server-port="):
24
+ has_port_arg = True
25
+ if arg == "--cache-max-entry-count" or arg.startswith("--cache-max-entry-count="):
26
+ has_gpu_memory_utilization_arg = True
27
+ if arg == "--log-level" or arg.startswith("--log-level="):
28
+ has_log_level_arg = True
29
+ if arg == "--backend" or arg == "--lmdeploy-backend":
30
+ if i + 1 < len(args):
31
+ lm_backend = args[i + 1]
32
+ indices_to_remove.extend([i, i + 1])
33
+ elif arg.startswith("--backend=") or arg.startswith("--lmdeploy-backend="):
34
+ lm_backend = arg.split("=", 1)[1]
35
+ indices_to_remove.append(i)
36
+ if arg == "--device" or arg == "--lmdeploy-device":
37
+ if i + 1 < len(args):
38
+ device_type = args[i + 1]
39
+ indices_to_remove.extend([i, i + 1])
40
+ elif arg.startswith("--device=") or arg.startswith("--lmdeploy-device="):
41
+ device_type = arg.split("=", 1)[1]
42
+ indices_to_remove.append(i)
43
+
44
+ # 从后往前删除,避免索引错位
45
+ for i in sorted(set(indices_to_remove), reverse=True):
46
+ args.pop(i)
47
+
48
+ # 添加默认参数
49
+ if not has_port_arg:
50
+ args.extend(["--server-port", "30000"])
51
+ if not has_gpu_memory_utilization_arg:
52
+ args.extend(["--cache-max-entry-count", "0.5"])
53
+ if not has_log_level_arg:
54
+ args.extend(["--log-level", "ERROR"])
55
+
56
+ device_type = os.getenv("MINERU_LMDEPLOY_DEVICE", device_type)
57
+ if device_type == "":
58
+ device_type = "cuda"
59
+ elif device_type not in ["cuda", "ascend", "maca", "camb"]:
60
+ raise ValueError(f"Unsupported lmdeploy device type: {device_type}")
61
+ lm_backend = os.getenv("MINERU_LMDEPLOY_BACKEND", lm_backend)
62
+ if lm_backend == "":
63
+ lm_backend = set_lmdeploy_backend(device_type)
64
+ elif lm_backend not in ["pytorch", "turbomind"]:
65
+ raise ValueError(f"Unsupported lmdeploy backend: {lm_backend}")
66
+ logger.info(f"lmdeploy device is: {device_type}, lmdeploy backend is: {lm_backend}")
67
+
68
+ if lm_backend == "pytorch":
69
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
70
+
71
+
72
+ args.extend(["--device", device_type])
73
+ args.extend(["--backend", lm_backend])
74
+
75
+ model_path = auto_download_and_get_model_root_path("/", "vlm")
76
+
77
+ # logger.debug(args)
78
+
79
+ # 重构参数,将模型路径作为位置参数
80
+ sys.argv = [sys.argv[0]] + ["serve", "api_server", model_path] + args
81
+
82
+ if os.getenv('OMP_NUM_THREADS') is None:
83
+ os.environ["OMP_NUM_THREADS"] = "1"
84
+
85
+ # 启动 lmdeploy 服务器
86
+ print(f"start lmdeploy server: {sys.argv}")
87
+
88
+ # 使用os.system调用启动lmdeploy服务器
89
+ os.system("lmdeploy " + " ".join(sys.argv[1:]))
90
+
91
+
92
+ if __name__ == "__main__":
93
+ main()
@@ -13,6 +13,10 @@ def is_mac_environment() -> bool:
13
13
  return platform.system() == "Darwin"
14
14
 
15
15
 
16
+ def is_linux_environment() -> bool:
17
+ return platform.system() == "Linux"
18
+
19
+
16
20
  # Detect if CPU is Apple Silicon architecture
17
21
  def is_apple_silicon_cpu() -> bool:
18
22
  return platform.machine() in ["arm64", "aarch64"]
@@ -428,8 +428,6 @@ def clean_memory(device='cuda'):
428
428
 
429
429
  def clean_vram(device, vram_threshold=8):
430
430
  total_memory = get_vram(device)
431
- if total_memory is not None:
432
- total_memory = int(os.getenv('MINERU_VIRTUAL_VRAM_SIZE', round(total_memory)))
433
431
  if total_memory and total_memory <= vram_threshold:
434
432
  gc_start = time.time()
435
433
  clean_memory(device)
@@ -437,13 +435,28 @@ def clean_vram(device, vram_threshold=8):
437
435
  # logger.info(f"gc time: {gc_time}")
438
436
 
439
437
 
440
- def get_vram(device):
438
+ def get_vram(device) -> int:
439
+ env_vram = os.getenv("MINERU_VIRTUAL_VRAM_SIZE")
440
+
441
+ # 如果环境变量已配置,尝试解析并返回
442
+ if env_vram is not None:
443
+ try:
444
+ total_memory = int(env_vram)
445
+ if total_memory > 0:
446
+ return total_memory
447
+ else:
448
+ logger.warning(
449
+ f"MINERU_VIRTUAL_VRAM_SIZE value '{env_vram}' is not positive, falling back to auto-detection")
450
+ except ValueError:
451
+ logger.warning(
452
+ f"MINERU_VIRTUAL_VRAM_SIZE value '{env_vram}' is not a valid integer, falling back to auto-detection")
453
+
454
+ # 环境变量未配置或配置错误,根据device自动获取
455
+ total_memory = 1
441
456
  if torch.cuda.is_available() and str(device).startswith("cuda"):
442
- total_memory = torch.cuda.get_device_properties(device).total_memory / (1024 ** 3) # 将字节转换为 GB
443
- return total_memory
457
+ total_memory = round(torch.cuda.get_device_properties(device).total_memory / (1024 ** 3)) # 将字节转换为 GB
444
458
  elif str(device).startswith("npu"):
445
459
  if torch_npu.npu.is_available():
446
- total_memory = torch_npu.npu.get_device_properties(device).total_memory / (1024 ** 3) # 转为 GB
447
- return total_memory
448
- else:
449
- return None
460
+ total_memory = round(torch_npu.npu.get_device_properties(device).total_memory / (1024 ** 3)) # 转为 GB
461
+
462
+ return total_memory
mineru/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "2.6.4"
1
+ __version__ = "2.6.6"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mineru
3
- Version: 2.6.4
3
+ Version: 2.6.6
4
4
  Summary: A practical tool for converting PDF to Markdown
5
5
  License: AGPL-3.0
6
6
  Project-URL: homepage, https://mineru.net/
@@ -36,8 +36,9 @@ Requires-Dist: fast-langdetect<0.3.0,>=0.2.3
36
36
  Requires-Dist: scikit-image<1.0.0,>=0.25.0
37
37
  Requires-Dist: openai<3,>=1.70.0
38
38
  Requires-Dist: beautifulsoup4<5,>=4.13.5
39
- Requires-Dist: magika<0.7.0,>=0.6.2
40
- Requires-Dist: mineru-vl-utils<1,>=0.1.15
39
+ Requires-Dist: magika<1.1.0,>=0.6.2
40
+ Requires-Dist: mineru-vl-utils<1,>=0.1.17
41
+ Requires-Dist: qwen-vl-utils<1,>=0.0.14
41
42
  Provides-Extra: test
42
43
  Requires-Dist: mineru[core]; extra == "test"
43
44
  Requires-Dist: pytest; extra == "test"
@@ -46,10 +47,12 @@ Requires-Dist: coverage; extra == "test"
46
47
  Requires-Dist: fuzzywuzzy; extra == "test"
47
48
  Provides-Extra: vlm
48
49
  Requires-Dist: torch<3,>=2.6.0; extra == "vlm"
49
- Requires-Dist: transformers<5.0.0,>=4.51.1; extra == "vlm"
50
+ Requires-Dist: transformers!=4.57.2,<5.0.0,>=4.51.1; extra == "vlm"
50
51
  Requires-Dist: accelerate>=1.5.1; extra == "vlm"
51
52
  Provides-Extra: vllm
52
53
  Requires-Dist: vllm<0.12,>=0.10.1.1; extra == "vllm"
54
+ Provides-Extra: lmdeploy
55
+ Requires-Dist: lmdeploy<0.12,>=0.10.2; extra == "lmdeploy"
53
56
  Provides-Extra: mlx
54
57
  Requires-Dist: mlx-vlm<0.4,>=0.3.3; extra == "mlx"
55
58
  Provides-Extra: pipeline
@@ -71,8 +74,8 @@ Requires-Dist: fastapi; extra == "api"
71
74
  Requires-Dist: python-multipart; extra == "api"
72
75
  Requires-Dist: uvicorn; extra == "api"
73
76
  Provides-Extra: gradio
74
- Requires-Dist: gradio<6,>=5.34; extra == "gradio"
75
- Requires-Dist: gradio-pdf>=0.0.22; extra == "gradio"
77
+ Requires-Dist: gradio==5.49.1; extra == "gradio"
78
+ Requires-Dist: gradio-pdf==0.0.22; extra == "gradio"
76
79
  Provides-Extra: core
77
80
  Requires-Dist: mineru[vlm]; extra == "core"
78
81
  Requires-Dist: mineru[pipeline]; extra == "core"
@@ -81,7 +84,8 @@ Requires-Dist: mineru[gradio]; extra == "core"
81
84
  Requires-Dist: mineru[mlx]; sys_platform == "darwin" and extra == "core"
82
85
  Provides-Extra: all
83
86
  Requires-Dist: mineru[core]; extra == "all"
84
- Requires-Dist: mineru[vllm]; extra == "all"
87
+ Requires-Dist: mineru[vllm]; sys_platform == "linux" and extra == "all"
88
+ Requires-Dist: mineru[lmdeploy]; sys_platform == "windows" and extra == "all"
85
89
  Dynamic: license-file
86
90
 
87
91
  <div align="center" xmlns="http://www.w3.org/1999/html">
@@ -130,6 +134,16 @@ Dynamic: license-file
130
134
  </div>
131
135
 
132
136
  # Changelog
137
+
138
+ - 2025/12/02 2.6.6 Release
139
+ - `mineru-api` tool optimizations
140
+ - Added descriptive text to `mineru-api` interface parameters to improve API documentation readability.
141
+ - You can use the environment variable `MINERU_API_ENABLE_FASTAPI_DOCS` to control whether the auto-generated interface documentation page is enabled (enabled by default).
142
+ - Added concurrency configuration options for the `vlm-vllm-async-engine`, `vlm-lmdeploy-engine`, and `vlm-http-client` backends. Users can use the environment variable `MINERU_API_MAX_CONCURRENT_REQUESTS` to set the maximum number of concurrent API requests (unlimited by default).
143
+
144
+ - 2025/11/26 2.6.5 Release
145
+ - Added support for a new backend vlm-lmdeploy-engine. Its usage is similar to vlm-vllm-(async)engine, but it uses lmdeploy as the inference engine and additionally supports native inference acceleration on Windows platforms compared to vllm.
146
+
133
147
  - 2025/11/04 2.6.4 Release
134
148
  - Added timeout configuration for PDF image rendering, default is 300 seconds, can be configured via environment variable `MINERU_PDF_RENDER_TIMEOUT` to prevent long blocking of the rendering process caused by some abnormal PDF files.
135
149
  - Added CPU thread count configuration options for ONNX models, default is the system CPU core count, can be configured via environment variables `MINERU_INTRA_OP_NUM_THREADS` and `MINERU_INTER_OP_NUM_THREADS` to reduce CPU resource contention conflicts in high concurrency scenarios.
@@ -718,12 +732,13 @@ A WebUI developed based on Gradio, with a simple interface and only core parsing
718
732
  <tr>
719
733
  <th rowspan="2">Parsing Backend</th>
720
734
  <th rowspan="2">pipeline <br> (Accuracy<sup>1</sup> 82+)</th>
721
- <th colspan="4">vlm (Accuracy<sup>1</sup> 90+)</th>
735
+ <th colspan="5">vlm (Accuracy<sup>1</sup> 90+)</th>
722
736
  </tr>
723
737
  <tr>
724
738
  <th>transformers</th>
725
739
  <th>mlx-engine</th>
726
740
  <th>vllm-engine / <br>vllm-async-engine</th>
741
+ <th>lmdeploy-engine</th>
727
742
  <th>http-client</th>
728
743
  </tr>
729
744
  </thead>
@@ -734,40 +749,42 @@ A WebUI developed based on Gradio, with a simple interface and only core parsing
734
749
  <td>Good compatibility, <br>but slower</td>
735
750
  <td>Faster than transformers</td>
736
751
  <td>Fast, compatible with the vLLM ecosystem</td>
737
- <td>Suitable for OpenAI-compatible servers<sup>5</sup></td>
752
+ <td>Fast, compatible with the LMDeploy ecosystem</td>
753
+ <td>Suitable for OpenAI-compatible servers<sup>6</sup></td>
738
754
  </tr>
739
755
  <tr>
740
756
  <th>Operating System</th>
741
757
  <td colspan="2" style="text-align:center;">Linux<sup>2</sup> / Windows / macOS</td>
742
758
  <td style="text-align:center;">macOS<sup>3</sup></td>
743
759
  <td style="text-align:center;">Linux<sup>2</sup> / Windows<sup>4</sup> </td>
760
+ <td style="text-align:center;">Linux<sup>2</sup> / Windows<sup>5</sup> </td>
744
761
  <td>Any</td>
745
762
  </tr>
746
763
  <tr>
747
764
  <th>CPU inference support</th>
748
765
  <td colspan="2" style="text-align:center;">✅</td>
749
- <td colspan="2" style="text-align:center;">❌</td>
766
+ <td colspan="3" style="text-align:center;">❌</td>
750
767
  <td>Not required</td>
751
768
  </tr>
752
769
  <tr>
753
770
  <th>GPU Requirements</th><td colspan="2" style="text-align:center;">Volta or later architectures, 6 GB VRAM or more, or Apple Silicon</td>
754
771
  <td>Apple Silicon</td>
755
- <td>Volta or later architectures, 8 GB VRAM or more</td>
772
+ <td colspan="2" style="text-align:center;">Volta or later architectures, 8 GB VRAM or more</td>
756
773
  <td>Not required</td>
757
774
  </tr>
758
775
  <tr>
759
776
  <th>Memory Requirements</th>
760
- <td colspan="4" style="text-align:center;">Minimum 16 GB, 32 GB recommended</td>
777
+ <td colspan="5" style="text-align:center;">Minimum 16 GB, 32 GB recommended</td>
761
778
  <td>8 GB</td>
762
779
  </tr>
763
780
  <tr>
764
781
  <th>Disk Space Requirements</th>
765
- <td colspan="4" style="text-align:center;">20 GB or more, SSD recommended</td>
782
+ <td colspan="5" style="text-align:center;">20 GB or more, SSD recommended</td>
766
783
  <td>2 GB</td>
767
784
  </tr>
768
785
  <tr>
769
786
  <th>Python Version</th>
770
- <td colspan="5" style="text-align:center;">3.10-3.13</td>
787
+ <td colspan="6" style="text-align:center;">3.10-3.13<sup>7</sup></td>
771
788
  </tr>
772
789
  </tbody>
773
790
  </table>
@@ -776,7 +793,9 @@ A WebUI developed based on Gradio, with a simple interface and only core parsing
776
793
  <sup>2</sup> Linux supports only distributions released in 2019 or later.
777
794
  <sup>3</sup> MLX requires macOS 13.5 or later, recommended for use with version 14.0 or higher.
778
795
  <sup>4</sup> Windows vLLM support via WSL2(Windows Subsystem for Linux).
779
- <sup>5</sup> Servers compatible with the OpenAI API, such as local or remote model services deployed via inference frameworks like `vLLM`, `SGLang`, or `LMDeploy`.
796
+ <sup>5</sup> Windows LMDeploy can only use the `turbomind` backend, which is slightly slower than the `pytorch` backend. If performance is critical, it is recommended to run it via WSL2.
797
+ <sup>6</sup> Servers compatible with the OpenAI API, such as local or remote model services deployed via inference frameworks like `vLLM`, `SGLang`, or `LMDeploy`.
798
+ <sup>7</sup> Windows + LMDeploy only supports Python versions 3.10–3.12, as the critical dependency `ray` does not yet support Python 3.13 on Windows.
780
799
 
781
800
 
782
801
  ### Install MinerU
@@ -796,8 +815,8 @@ uv pip install -e .[core]
796
815
  ```
797
816
 
798
817
  > [!TIP]
799
- > `mineru[core]` includes all core features except `vLLM` acceleration, compatible with Windows / Linux / macOS systems, suitable for most users.
800
- > If you need to use `vLLM` acceleration for VLM model inference or install a lightweight client on edge devices, please refer to the documentation [Extension Modules Installation Guide](https://opendatalab.github.io/MinerU/quick_start/extension_modules/).
818
+ > `mineru[core]` includes all core features except `vLLM`/`LMDeploy` acceleration, compatible with Windows / Linux / macOS systems, suitable for most users.
819
+ > If you need to use `vLLM`/`LMDeploy` acceleration for VLM model inference or install a lightweight client on edge devices, please refer to the documentation [Extension Modules Installation Guide](https://opendatalab.github.io/MinerU/quick_start/extension_modules/).
801
820
 
802
821
  ---
803
822
 
@@ -875,6 +894,8 @@ Currently, some models in this project are trained based on YOLO. However, since
875
894
  - [pdfminer.six](https://github.com/pdfminer/pdfminer.six)
876
895
  - [pypdf](https://github.com/py-pdf/pypdf)
877
896
  - [magika](https://github.com/google/magika)
897
+ - [vLLM](https://github.com/vllm-project/vllm)
898
+ - [LMDeploy](https://github.com/InternLM/lmdeploy)
878
899
 
879
900
  # Citation
880
901
 
@@ -1,5 +1,5 @@
1
1
  mineru/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
2
- mineru/version.py,sha256=ODIwI6SfzWmx_FdtwCfr6k5TmpNuA5JdvGyV-9G9YrM,22
2
+ mineru/version.py,sha256=MJHGx-Qo0nycI7WHSavnK8Mok6HS_De_qLfGWXih6Og,22
3
3
  mineru/backend/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
4
4
  mineru/backend/utils.py,sha256=GLJU3IznDmhE1_qNmkU1UOtsuskIHBezgsEVO6Uar-Y,698
5
5
  mineru/backend/pipeline/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
@@ -8,22 +8,22 @@ mineru/backend/pipeline/model_init.py,sha256=OAylOcQD9gu5TBcX7nMt7X5NpJMtQICI5Iv
8
8
  mineru/backend/pipeline/model_json_to_middle_json.py,sha256=reXkUR_wKmJD64d7vRNXMxFviwkzDlGjRshpdwsVquI,10951
9
9
  mineru/backend/pipeline/model_list.py,sha256=7cXMBfZrP0K6qWueg1D_-WoUANeSINzkn_ic9E7YQLs,222
10
10
  mineru/backend/pipeline/para_split.py,sha256=Kq95MmvkPm7rKxlCSGiTvVKyF7CErHI2eGGAs5sLl0Q,17119
11
- mineru/backend/pipeline/pipeline_analyze.py,sha256=O_HGifodg03VZbmTve-U6Cmo0T03AmuK86t1v1J9X-Q,6897
11
+ mineru/backend/pipeline/pipeline_analyze.py,sha256=GkGOrWGnBSswUik3nt_m76bCwDISC9sxXZ6xRX3L154,6528
12
12
  mineru/backend/pipeline/pipeline_magic_model.py,sha256=w8jGx8f6yZN0Wf2yPP3L9rYKc9rogxreZCrUJzJvPO8,14974
13
13
  mineru/backend/pipeline/pipeline_middle_json_mkcontent.py,sha256=YlnEbbUnkniZXS13aLo5mjfFQvQM5SrIVvTAGBZsLmw,14478
14
14
  mineru/backend/vlm/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
15
15
  mineru/backend/vlm/model_output_to_middle_json.py,sha256=AqYX44gS9crUO_t7SuUatD71EVjow6pI6yA2Ik3gQ0s,5139
16
- mineru/backend/vlm/utils.py,sha256=woGqyRI4S7p69daLCU07XNXWTV27aLf7YBjjVH1x-5o,2794
17
- mineru/backend/vlm/vlm_analyze.py,sha256=EQKNtc12pQ6so5NuUE-ppUtWI1QH_CQnsx1QfHdzAwA,8790
16
+ mineru/backend/vlm/utils.py,sha256=JMgS3SMFcHJYH2jIx-Xhs-P2a1bmT8U6Kn60IL0OmQA,3570
17
+ mineru/backend/vlm/vlm_analyze.py,sha256=wP3vuYGVec0hRsDAuzfSm2HD4Muu7wSWL767qxd_yqw,11690
18
18
  mineru/backend/vlm/vlm_magic_model.py,sha256=Pd0sOr7G1crAJIVeq6h_03gNSuxmV5U8dvGTGT_rrjs,23452
19
19
  mineru/backend/vlm/vlm_middle_json_mkcontent.py,sha256=5V-AU9KkxxMn0DDSQBrb15I4GVpEyiQy8uNI_tQhS6M,13498
20
20
  mineru/cli/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
21
- mineru/cli/client.py,sha256=ul2Twu-MWT2pCPrtvWbhIwWnoR6aurHJ3KhFOmElP90,6915
22
- mineru/cli/common.py,sha256=3kd6sF6BlnBNL_UeMjXKJ11fGQA4Y9lOckznWNiIWY8,13988
23
- mineru/cli/fast_api.py,sha256=t5bda769VbM5iokAboiJfPIOnm-r5GTFReE-KQy8L3g,10941
24
- mineru/cli/gradio_app.py,sha256=hyhI38y-JahMJgYZiikC3CYUVrtYVjbZb67Q4RUKbw4,14731
21
+ mineru/cli/client.py,sha256=XSEIr4klUuufMAWn5IioZdXpg1xAxqRZF0HkaVIhxh0,6815
22
+ mineru/cli/common.py,sha256=zhNOJCOnTSMbWdUWSZG-nf0odv5vBRtdZYZ1UbUPH3g,14369
23
+ mineru/cli/fast_api.py,sha256=lLxQKKHmD8ruoZGcE6LrXzr3pQIxvw8OdJrQq_FNLSM,14447
24
+ mineru/cli/gradio_app.py,sha256=EUPuRHHCOECrE3E3VNEeuMDYeC3nicurOYfk8YJSOMw,15646
25
25
  mineru/cli/models_download.py,sha256=LNfoIpUlJM7m7qb2SiCxtjMDw4jILBQtZwNP2JoY81U,4815
26
- mineru/cli/vlm_vllm_server.py,sha256=fQJyD-gIPQ41hR_6aIaDJczl66N310t0CiZEBAfX5mc,90
26
+ mineru/cli/vlm_server.py,sha256=27HaqO3wpMXSA_nA3CC6JOBTHK3q66SP00cD6m9HuQE,1974
27
27
  mineru/data/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
28
28
  mineru/data/data_reader_writer/__init__.py,sha256=9qnGNrsuGBMwwfsQy6oChdkz--a_LPdYWE0VZZr0yr4,490
29
29
  mineru/data/data_reader_writer/base.py,sha256=nqmAcdHOXMOJO6RAT3ILligDFaw8Op0STyCw5yOzAbI,1706
@@ -145,15 +145,16 @@ mineru/model/utils/tools/infer/predict_det.py,sha256=vYQREn7vELXxBsr72CCCVvm1gwV
145
145
  mineru/model/utils/tools/infer/predict_rec.py,sha256=-BH93JDisu0kT6CyHA4plUOKcb2L-UvDk7Tein5uwt4,19209
146
146
  mineru/model/utils/tools/infer/predict_system.py,sha256=hkegkn6hq2v2zqHVAP615-k-fkTS8swRYSbZeoqmSI8,3822
147
147
  mineru/model/utils/tools/infer/pytorchocr_utility.py,sha256=i1PFN-_kefJUUZ4Vk7igs1TU8gfErTDlDXY6-8Uaurw,9323
148
- mineru/model/vlm_vllm_model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
149
- mineru/model/vlm_vllm_model/server.py,sha256=w5ddusPbcVaEoWAo_BRjmwv_Ywxrc_bCMRhxihoyykY,2263
148
+ mineru/model/vlm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
149
+ mineru/model/vlm/lmdeploy_server.py,sha256=PvxJNcUIKB8VzWMDXeV1t0SHSgz_ULO36ZAzJbppz90,3262
150
+ mineru/model/vlm/vllm_server.py,sha256=w5ddusPbcVaEoWAo_BRjmwv_Ywxrc_bCMRhxihoyykY,2263
150
151
  mineru/resources/header.html,sha256=PUselBXLBn8gfeP3zwEtj6zIxfhcCN4vN_B796nQFNQ,4410
151
152
  mineru/resources/fasttext-langdetect/lid.176.ftz,sha256=jzRyz-hzintgmejpmcPL-uDc0VaWqsfXc4qAOdtgPoM,938013
152
153
  mineru/utils/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
153
154
  mineru/utils/block_pre_proc.py,sha256=uGBmxf2MR9bplTnQI8xHjCI-kj3plRhJr0hcWKidbOQ,9632
154
155
  mineru/utils/block_sort.py,sha256=5e1mOLB3W7xu5Y1hmhvGSHPL_aQ41R_4VXcP4vjYAOU,12976
155
156
  mineru/utils/boxbase.py,sha256=moP660AmZq_udHEsfvFkTQdJ4gjrrBwN7t0Enx7CIL8,6903
156
- mineru/utils/check_sys_env.py,sha256=1o7Do3k84Hnwvlnmzx8JqkcGJA3UqiGfucMv9sPgPyI,1113
157
+ mineru/utils/check_sys_env.py,sha256=TRjzg4xWyoSGrgv4KaP225A-99xBgLAfZ1cPcGqrBAA,1191
157
158
  mineru/utils/cli_parser.py,sha256=4seFAu1kulsYnw6WM2q_cxgEOt2tErZVkI-LNEF_kGw,1445
158
159
  mineru/utils/config_reader.py,sha256=IRVWTpBnbnRpck6eXZUKw-fcLt7hon5S4uqWW-RBb1w,4075
159
160
  mineru/utils/cut_image.py,sha256=g3m4nfcJNWlxi-P0kpXTtlmspXkMcLCfGwmYuQ-Z2hE,751
@@ -165,7 +166,7 @@ mineru/utils/hash_utils.py,sha256=UPS_8NRBmVumdyOv16Lmv6Ly2xK8OVDJEe5gG6gKIFk,85
165
166
  mineru/utils/language.py,sha256=7RT3mxSa7jdpoC5ySd7ZddHA7TO7UsnmDOWiYZAxuyg,1433
166
167
  mineru/utils/llm_aided.py,sha256=9WUytvxenSAuaWR4sTQhVPQ5h8pY0wVOH1O2sj_6dLs,5149
167
168
  mineru/utils/magic_model_utils.py,sha256=2xOvi4oqg3MSw1FUrJTnYDtWeFrrm6qbmlEorLZSaYs,5650
168
- mineru/utils/model_utils.py,sha256=6OsgFLsABX5JuShSzCMSNHWV-yi-1cjwHweafyxIgRo,18448
169
+ mineru/utils/model_utils.py,sha256=6moOQqE5ShHaJKkENXP8BXJA7RCWtOGlYHZ3nidwmZs,18977
169
170
  mineru/utils/models_download_utils.py,sha256=UfjvwhxO6BkJHa5JSpEVNZ71GoLMPMmJpym3THET2T4,2957
170
171
  mineru/utils/ocr_utils.py,sha256=lPIrwNUib5mrzUkponRYHuUCdjV2qvETNLSzOLyflrU,15990
171
172
  mineru/utils/os_env_config.py,sha256=ZNtkR4KrJW72CeIoTNzGDL6tMKv_hL8nzvWIssGWbqY,842
@@ -178,9 +179,9 @@ mineru/utils/run_async.py,sha256=rPeP4BCZerR8VByRDhiYzfZiahLVqoZEBVAS54dAjNg,128
178
179
  mineru/utils/span_block_fix.py,sha256=0eVQjJCrT03woRt9hoh6Uu42Tp1dacfGTv2x3B9qq94,8797
179
180
  mineru/utils/span_pre_proc.py,sha256=h41q2uQajI0xQbc_30hqaju1dv3oVYxBAlKgURl8HIc,13692
180
181
  mineru/utils/table_merge.py,sha256=d98zNbM1ZQ8V1kUt6RugParNUNPv7DGL-XKIzR3iJVQ,15360
181
- mineru-2.6.4.dist-info/licenses/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
182
- mineru-2.6.4.dist-info/METADATA,sha256=igOwr_rwmoJGD4KXKyEBgpESlUr6CZHThNXXE2PQ59U,71241
183
- mineru-2.6.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
184
- mineru-2.6.4.dist-info/entry_points.txt,sha256=luXmbhPiZK_tKlRgWuYOaW_V6EFpG-yJcAevVv9MEqE,252
185
- mineru-2.6.4.dist-info/top_level.txt,sha256=zuGQfZcbsHv4I4oKI9gaKPqEWBFm6xJroKuug2LnKP8,7
186
- mineru-2.6.4.dist-info/RECORD,,
182
+ mineru-2.6.6.dist-info/licenses/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
183
+ mineru-2.6.6.dist-info/METADATA,sha256=9f-9lcSQXdLCxbYmHItJbLgDc-TZG7u7dVUWMS0SzXA,73095
184
+ mineru-2.6.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
185
+ mineru-2.6.6.dist-info/entry_points.txt,sha256=JbtrCPhx1T32s7TONUsteKg-24ZwRT1HSiFtW5jypVw,376
186
+ mineru-2.6.6.dist-info/top_level.txt,sha256=zuGQfZcbsHv4I4oKI9gaKPqEWBFm6xJroKuug2LnKP8,7
187
+ mineru-2.6.6.dist-info/RECORD,,
@@ -2,5 +2,7 @@
2
2
  mineru = mineru.cli:client.main
3
3
  mineru-api = mineru.cli.fast_api:main
4
4
  mineru-gradio = mineru.cli.gradio_app:main
5
+ mineru-lmdeploy-server = mineru.cli.vlm_server:lmdeploy_server
5
6
  mineru-models-download = mineru.cli.models_download:download_models
6
- mineru-vllm-server = mineru.cli.vlm_vllm_server:main
7
+ mineru-openai-server = mineru.cli.vlm_server:openai_server
8
+ mineru-vllm-server = mineru.cli.vlm_server:vllm_server
@@ -1,4 +0,0 @@
1
- from mineru.model.vlm_vllm_model.server import main
2
-
3
- if __name__ == "__main__":
4
- main()
File without changes
File without changes