mineru 2.6.4__py3-none-any.whl → 2.6.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mineru/backend/vlm/utils.py +31 -5
- mineru/backend/vlm/vlm_analyze.py +122 -66
- mineru/cli/client.py +5 -4
- mineru/cli/common.py +15 -6
- mineru/cli/gradio_app.py +27 -3
- mineru/cli/vlm_server.py +61 -0
- mineru/model/vlm/lmdeploy_server.py +93 -0
- mineru/utils/check_sys_env.py +4 -0
- mineru/version.py +1 -1
- {mineru-2.6.4.dist-info → mineru-2.6.5.dist-info}/METADATA +27 -15
- {mineru-2.6.4.dist-info → mineru-2.6.5.dist-info}/RECORD +17 -16
- {mineru-2.6.4.dist-info → mineru-2.6.5.dist-info}/entry_points.txt +3 -1
- mineru/cli/vlm_vllm_server.py +0 -4
- /mineru/model/{vlm_vllm_model → vlm}/__init__.py +0 -0
- /mineru/model/{vlm_vllm_model/server.py → vlm/vllm_server.py} +0 -0
- {mineru-2.6.4.dist-info → mineru-2.6.5.dist-info}/WHEEL +0 -0
- {mineru-2.6.4.dist-info → mineru-2.6.5.dist-info}/licenses/LICENSE.md +0 -0
- {mineru-2.6.4.dist-info → mineru-2.6.5.dist-info}/top_level.txt +0 -0
mineru/backend/vlm/utils.py
CHANGED
|
@@ -3,6 +3,7 @@ import os
|
|
|
3
3
|
from loguru import logger
|
|
4
4
|
from packaging import version
|
|
5
5
|
|
|
6
|
+
from mineru.utils.check_sys_env import is_windows_environment, is_linux_environment
|
|
6
7
|
from mineru.utils.config_reader import get_device
|
|
7
8
|
from mineru.utils.model_utils import get_vram
|
|
8
9
|
|
|
@@ -11,14 +12,16 @@ def enable_custom_logits_processors() -> bool:
|
|
|
11
12
|
import torch
|
|
12
13
|
from vllm import __version__ as vllm_version
|
|
13
14
|
|
|
14
|
-
if
|
|
15
|
+
if torch.cuda.is_available():
|
|
16
|
+
major, minor = torch.cuda.get_device_capability()
|
|
17
|
+
# 正确计算Compute Capability
|
|
18
|
+
compute_capability = f"{major}.{minor}"
|
|
19
|
+
elif hasattr(torch, 'npu') and torch.npu.is_available():
|
|
20
|
+
compute_capability = "8.0"
|
|
21
|
+
else:
|
|
15
22
|
logger.info("CUDA not available, disabling custom_logits_processors")
|
|
16
23
|
return False
|
|
17
24
|
|
|
18
|
-
major, minor = torch.cuda.get_device_capability()
|
|
19
|
-
# 正确计算Compute Capability
|
|
20
|
-
compute_capability = f"{major}.{minor}"
|
|
21
|
-
|
|
22
25
|
# 安全地处理环境变量
|
|
23
26
|
vllm_use_v1_str = os.getenv('VLLM_USE_V1', "1")
|
|
24
27
|
if vllm_use_v1_str.isdigit():
|
|
@@ -44,6 +47,29 @@ def enable_custom_logits_processors() -> bool:
|
|
|
44
47
|
return True
|
|
45
48
|
|
|
46
49
|
|
|
50
|
+
def set_lmdeploy_backend(device_type: str) -> str:
|
|
51
|
+
if device_type.lower() in ["ascend", "maca", "camb"]:
|
|
52
|
+
lmdeploy_backend = "pytorch"
|
|
53
|
+
elif device_type.lower() in ["cuda"]:
|
|
54
|
+
import torch
|
|
55
|
+
if not torch.cuda.is_available():
|
|
56
|
+
raise ValueError("CUDA is not available.")
|
|
57
|
+
if is_windows_environment():
|
|
58
|
+
lmdeploy_backend = "turbomind"
|
|
59
|
+
elif is_linux_environment():
|
|
60
|
+
major, minor = torch.cuda.get_device_capability()
|
|
61
|
+
compute_capability = f"{major}.{minor}"
|
|
62
|
+
if version.parse(compute_capability) >= version.parse("8.0"):
|
|
63
|
+
lmdeploy_backend = "pytorch"
|
|
64
|
+
else:
|
|
65
|
+
lmdeploy_backend = "turbomind"
|
|
66
|
+
else:
|
|
67
|
+
raise ValueError("Unsupported operating system.")
|
|
68
|
+
else:
|
|
69
|
+
raise ValueError(f"Unsupported lmdeploy device type: {device_type}")
|
|
70
|
+
return lmdeploy_backend
|
|
71
|
+
|
|
72
|
+
|
|
47
73
|
def set_default_gpu_memory_utilization() -> float:
|
|
48
74
|
from vllm import __version__ as vllm_version
|
|
49
75
|
if version.parse(vllm_version) >= version.parse("0.11.0"):
|
|
@@ -4,7 +4,8 @@ import time
|
|
|
4
4
|
|
|
5
5
|
from loguru import logger
|
|
6
6
|
|
|
7
|
-
from .utils import enable_custom_logits_processors, set_default_gpu_memory_utilization, set_default_batch_size
|
|
7
|
+
from .utils import enable_custom_logits_processors, set_default_gpu_memory_utilization, set_default_batch_size, \
|
|
8
|
+
set_lmdeploy_backend
|
|
8
9
|
from .model_output_to_middle_json import result_to_middle_json
|
|
9
10
|
from ...data.data_reader_writer import DataWriter
|
|
10
11
|
from mineru.utils.pdf_image_tools import load_images_from_pdf
|
|
@@ -40,94 +41,149 @@ class ModelSingleton:
|
|
|
40
41
|
model = None
|
|
41
42
|
processor = None
|
|
42
43
|
vllm_llm = None
|
|
44
|
+
lmdeploy_engine = None
|
|
43
45
|
vllm_async_llm = None
|
|
44
46
|
batch_size = kwargs.get("batch_size", 0) # for transformers backend only
|
|
45
47
|
max_concurrency = kwargs.get("max_concurrency", 100) # for http-client backend only
|
|
46
48
|
http_timeout = kwargs.get("http_timeout", 600) # for http-client backend only
|
|
49
|
+
server_headers = kwargs.get("server_headers", None) # for http-client backend only
|
|
50
|
+
max_retries = kwargs.get("max_retries", 3) # for http-client backend only
|
|
51
|
+
retry_backoff_factor = kwargs.get("retry_backoff_factor", 0.5) # for http-client backend only
|
|
47
52
|
# 从kwargs中移除这些参数,避免传递给不相关的初始化函数
|
|
48
|
-
for param in ["batch_size", "max_concurrency", "http_timeout"]:
|
|
53
|
+
for param in ["batch_size", "max_concurrency", "http_timeout", "server_headers", "max_retries", "retry_backoff_factor"]:
|
|
49
54
|
if param in kwargs:
|
|
50
55
|
del kwargs[param]
|
|
51
|
-
if backend in [
|
|
56
|
+
if backend not in ["http-client"] and not model_path:
|
|
52
57
|
model_path = auto_download_and_get_model_root_path("/","vlm")
|
|
53
|
-
|
|
58
|
+
if backend == "transformers":
|
|
59
|
+
try:
|
|
60
|
+
from transformers import (
|
|
61
|
+
AutoProcessor,
|
|
62
|
+
Qwen2VLForConditionalGeneration,
|
|
63
|
+
)
|
|
64
|
+
from transformers import __version__ as transformers_version
|
|
65
|
+
except ImportError:
|
|
66
|
+
raise ImportError("Please install transformers to use the transformers backend.")
|
|
67
|
+
|
|
68
|
+
if version.parse(transformers_version) >= version.parse("4.56.0"):
|
|
69
|
+
dtype_key = "dtype"
|
|
70
|
+
else:
|
|
71
|
+
dtype_key = "torch_dtype"
|
|
72
|
+
device = get_device()
|
|
73
|
+
model = Qwen2VLForConditionalGeneration.from_pretrained(
|
|
74
|
+
model_path,
|
|
75
|
+
device_map={"": device},
|
|
76
|
+
**{dtype_key: "auto"}, # type: ignore
|
|
77
|
+
)
|
|
78
|
+
processor = AutoProcessor.from_pretrained(
|
|
79
|
+
model_path,
|
|
80
|
+
use_fast=True,
|
|
81
|
+
)
|
|
82
|
+
if batch_size == 0:
|
|
83
|
+
batch_size = set_default_batch_size()
|
|
84
|
+
elif backend == "mlx-engine":
|
|
85
|
+
mlx_supported = is_mac_os_version_supported()
|
|
86
|
+
if not mlx_supported:
|
|
87
|
+
raise EnvironmentError("mlx-engine backend is only supported on macOS 13.5+ with Apple Silicon.")
|
|
88
|
+
try:
|
|
89
|
+
from mlx_vlm import load as mlx_load
|
|
90
|
+
except ImportError:
|
|
91
|
+
raise ImportError("Please install mlx-vlm to use the mlx-engine backend.")
|
|
92
|
+
model, processor = mlx_load(model_path)
|
|
93
|
+
else:
|
|
94
|
+
if os.getenv('OMP_NUM_THREADS') is None:
|
|
95
|
+
os.environ["OMP_NUM_THREADS"] = "1"
|
|
96
|
+
|
|
97
|
+
if backend == "vllm-engine":
|
|
54
98
|
try:
|
|
55
|
-
|
|
56
|
-
AutoProcessor,
|
|
57
|
-
Qwen2VLForConditionalGeneration,
|
|
58
|
-
)
|
|
59
|
-
from transformers import __version__ as transformers_version
|
|
99
|
+
import vllm
|
|
60
100
|
except ImportError:
|
|
61
|
-
raise ImportError("Please install
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
101
|
+
raise ImportError("Please install vllm to use the vllm-engine backend.")
|
|
102
|
+
if "gpu_memory_utilization" not in kwargs:
|
|
103
|
+
kwargs["gpu_memory_utilization"] = set_default_gpu_memory_utilization()
|
|
104
|
+
if "model" not in kwargs:
|
|
105
|
+
kwargs["model"] = model_path
|
|
106
|
+
if enable_custom_logits_processors() and ("logits_processors" not in kwargs):
|
|
107
|
+
from mineru_vl_utils import MinerULogitsProcessor
|
|
108
|
+
kwargs["logits_processors"] = [MinerULogitsProcessor]
|
|
109
|
+
# 使用kwargs为 vllm初始化参数
|
|
110
|
+
vllm_llm = vllm.LLM(**kwargs)
|
|
111
|
+
elif backend == "vllm-async-engine":
|
|
112
|
+
try:
|
|
113
|
+
from vllm.engine.arg_utils import AsyncEngineArgs
|
|
114
|
+
from vllm.v1.engine.async_llm import AsyncLLM
|
|
115
|
+
except ImportError:
|
|
116
|
+
raise ImportError("Please install vllm to use the vllm-async-engine backend.")
|
|
117
|
+
if "gpu_memory_utilization" not in kwargs:
|
|
118
|
+
kwargs["gpu_memory_utilization"] = set_default_gpu_memory_utilization()
|
|
119
|
+
if "model" not in kwargs:
|
|
120
|
+
kwargs["model"] = model_path
|
|
121
|
+
if enable_custom_logits_processors() and ("logits_processors" not in kwargs):
|
|
122
|
+
from mineru_vl_utils import MinerULogitsProcessor
|
|
123
|
+
kwargs["logits_processors"] = [MinerULogitsProcessor]
|
|
124
|
+
# 使用kwargs为 vllm初始化参数
|
|
125
|
+
vllm_async_llm = AsyncLLM.from_engine_args(AsyncEngineArgs(**kwargs))
|
|
126
|
+
elif backend == "lmdeploy-engine":
|
|
127
|
+
try:
|
|
128
|
+
from lmdeploy import PytorchEngineConfig, TurbomindEngineConfig
|
|
129
|
+
from lmdeploy.serve.vl_async_engine import VLAsyncEngine
|
|
130
|
+
except ImportError:
|
|
131
|
+
raise ImportError("Please install lmdeploy to use the lmdeploy-engine backend.")
|
|
132
|
+
if "cache_max_entry_count" not in kwargs:
|
|
133
|
+
kwargs["cache_max_entry_count"] = 0.5
|
|
134
|
+
|
|
135
|
+
device_type = os.getenv("MINERU_LMDEPLOY_DEVICE", "")
|
|
136
|
+
if device_type == "":
|
|
137
|
+
if "lmdeploy_device" in kwargs:
|
|
138
|
+
device_type = kwargs.pop("lmdeploy_device")
|
|
139
|
+
if device_type not in ["cuda", "ascend", "maca", "camb"]:
|
|
140
|
+
raise ValueError(f"Unsupported lmdeploy device type: {device_type}")
|
|
141
|
+
else:
|
|
142
|
+
device_type = "cuda"
|
|
143
|
+
lm_backend = os.getenv("MINERU_LMDEPLOY_BACKEND", "")
|
|
144
|
+
if lm_backend == "":
|
|
145
|
+
if "lmdeploy_backend" in kwargs:
|
|
146
|
+
lm_backend = kwargs.pop("lmdeploy_backend")
|
|
147
|
+
if lm_backend not in ["pytorch", "turbomind"]:
|
|
148
|
+
raise ValueError(f"Unsupported lmdeploy backend: {lm_backend}")
|
|
149
|
+
else:
|
|
150
|
+
lm_backend = set_lmdeploy_backend(device_type)
|
|
151
|
+
logger.info(f"lmdeploy device is: {device_type}, lmdeploy backend is: {lm_backend}")
|
|
152
|
+
|
|
153
|
+
if lm_backend == "pytorch":
|
|
154
|
+
kwargs["device_type"] = device_type
|
|
155
|
+
backend_config = PytorchEngineConfig(**kwargs)
|
|
156
|
+
elif lm_backend == "turbomind":
|
|
157
|
+
backend_config = TurbomindEngineConfig(**kwargs)
|
|
65
158
|
else:
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
)
|
|
73
|
-
|
|
159
|
+
raise ValueError(f"Unsupported lmdeploy backend: {lm_backend}")
|
|
160
|
+
|
|
161
|
+
log_level = 'ERROR'
|
|
162
|
+
from lmdeploy.utils import get_logger
|
|
163
|
+
lm_logger = get_logger('lmdeploy')
|
|
164
|
+
lm_logger.setLevel(log_level)
|
|
165
|
+
if os.getenv('TM_LOG_LEVEL') is None:
|
|
166
|
+
os.environ['TM_LOG_LEVEL'] = log_level
|
|
167
|
+
|
|
168
|
+
lmdeploy_engine = VLAsyncEngine(
|
|
74
169
|
model_path,
|
|
75
|
-
|
|
170
|
+
backend=lm_backend,
|
|
171
|
+
backend_config=backend_config,
|
|
76
172
|
)
|
|
77
|
-
if batch_size == 0:
|
|
78
|
-
batch_size = set_default_batch_size()
|
|
79
|
-
elif backend == "mlx-engine":
|
|
80
|
-
mlx_supported = is_mac_os_version_supported()
|
|
81
|
-
if not mlx_supported:
|
|
82
|
-
raise EnvironmentError("mlx-engine backend is only supported on macOS 13.5+ with Apple Silicon.")
|
|
83
|
-
try:
|
|
84
|
-
from mlx_vlm import load as mlx_load
|
|
85
|
-
except ImportError:
|
|
86
|
-
raise ImportError("Please install mlx-vlm to use the mlx-engine backend.")
|
|
87
|
-
model, processor = mlx_load(model_path)
|
|
88
|
-
else:
|
|
89
|
-
if os.getenv('OMP_NUM_THREADS') is None:
|
|
90
|
-
os.environ["OMP_NUM_THREADS"] = "1"
|
|
91
|
-
|
|
92
|
-
if backend == "vllm-engine":
|
|
93
|
-
try:
|
|
94
|
-
import vllm
|
|
95
|
-
from mineru_vl_utils import MinerULogitsProcessor
|
|
96
|
-
except ImportError:
|
|
97
|
-
raise ImportError("Please install vllm to use the vllm-engine backend.")
|
|
98
|
-
if "gpu_memory_utilization" not in kwargs:
|
|
99
|
-
kwargs["gpu_memory_utilization"] = set_default_gpu_memory_utilization()
|
|
100
|
-
if "model" not in kwargs:
|
|
101
|
-
kwargs["model"] = model_path
|
|
102
|
-
if enable_custom_logits_processors() and ("logits_processors" not in kwargs):
|
|
103
|
-
kwargs["logits_processors"] = [MinerULogitsProcessor]
|
|
104
|
-
# 使用kwargs为 vllm初始化参数
|
|
105
|
-
vllm_llm = vllm.LLM(**kwargs)
|
|
106
|
-
elif backend == "vllm-async-engine":
|
|
107
|
-
try:
|
|
108
|
-
from vllm.engine.arg_utils import AsyncEngineArgs
|
|
109
|
-
from vllm.v1.engine.async_llm import AsyncLLM
|
|
110
|
-
from mineru_vl_utils import MinerULogitsProcessor
|
|
111
|
-
except ImportError:
|
|
112
|
-
raise ImportError("Please install vllm to use the vllm-async-engine backend.")
|
|
113
|
-
if "gpu_memory_utilization" not in kwargs:
|
|
114
|
-
kwargs["gpu_memory_utilization"] = set_default_gpu_memory_utilization()
|
|
115
|
-
if "model" not in kwargs:
|
|
116
|
-
kwargs["model"] = model_path
|
|
117
|
-
if enable_custom_logits_processors() and ("logits_processors" not in kwargs):
|
|
118
|
-
kwargs["logits_processors"] = [MinerULogitsProcessor]
|
|
119
|
-
# 使用kwargs为 vllm初始化参数
|
|
120
|
-
vllm_async_llm = AsyncLLM.from_engine_args(AsyncEngineArgs(**kwargs))
|
|
121
173
|
self._models[key] = MinerUClient(
|
|
122
174
|
backend=backend,
|
|
123
175
|
model=model,
|
|
124
176
|
processor=processor,
|
|
177
|
+
lmdeploy_engine=lmdeploy_engine,
|
|
125
178
|
vllm_llm=vllm_llm,
|
|
126
179
|
vllm_async_llm=vllm_async_llm,
|
|
127
180
|
server_url=server_url,
|
|
128
181
|
batch_size=batch_size,
|
|
129
182
|
max_concurrency=max_concurrency,
|
|
130
183
|
http_timeout=http_timeout,
|
|
184
|
+
server_headers=server_headers,
|
|
185
|
+
max_retries=max_retries,
|
|
186
|
+
retry_backoff_factor=retry_backoff_factor,
|
|
131
187
|
)
|
|
132
188
|
elapsed = round(time.time() - start_time, 2)
|
|
133
189
|
logger.info(f"get {backend} predictor cost: {elapsed}s")
|
mineru/cli/client.py
CHANGED
|
@@ -13,7 +13,7 @@ from ..version import __version__
|
|
|
13
13
|
from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
|
|
14
14
|
|
|
15
15
|
|
|
16
|
-
backends = ['pipeline', 'vlm-transformers', 'vlm-vllm-engine', 'vlm-http-client']
|
|
16
|
+
backends = ['pipeline', 'vlm-transformers', 'vlm-vllm-engine', 'vlm-lmdeploy-engine', 'vlm-http-client']
|
|
17
17
|
if is_mac_os_version_supported():
|
|
18
18
|
backends.append("vlm-mlx-engine")
|
|
19
19
|
|
|
@@ -62,9 +62,10 @@ if is_mac_os_version_supported():
|
|
|
62
62
|
the backend for parsing pdf:
|
|
63
63
|
pipeline: More general.
|
|
64
64
|
vlm-transformers: More general, but slower.
|
|
65
|
-
vlm-mlx-engine: Faster than transformers.
|
|
66
|
-
vlm-vllm-engine: Faster(engine).
|
|
67
|
-
vlm-
|
|
65
|
+
vlm-mlx-engine: Faster than transformers(macOS 13.5+).
|
|
66
|
+
vlm-vllm-engine: Faster(vllm-engine).
|
|
67
|
+
vlm-lmdeploy-engine: Faster(lmdeploy-engine).
|
|
68
|
+
vlm-http-client: Faster(client suitable for openai-compatible servers).
|
|
68
69
|
Without method specified, pipeline will be used by default.""",
|
|
69
70
|
default='pipeline',
|
|
70
71
|
)
|
mineru/cli/common.py
CHANGED
|
@@ -18,6 +18,11 @@ from mineru.backend.vlm.vlm_analyze import doc_analyze as vlm_doc_analyze
|
|
|
18
18
|
from mineru.backend.vlm.vlm_analyze import aio_doc_analyze as aio_vlm_doc_analyze
|
|
19
19
|
from mineru.utils.pdf_page_id import get_end_page_id
|
|
20
20
|
|
|
21
|
+
if os.getenv("MINERU_LMDEPLOY_DEVICE", "") == "maca":
|
|
22
|
+
import torch
|
|
23
|
+
torch.backends.cudnn.enabled = False
|
|
24
|
+
|
|
25
|
+
|
|
21
26
|
pdf_suffixes = ["pdf"]
|
|
22
27
|
image_suffixes = ["png", "jpeg", "jp2", "webp", "gif", "bmp", "jpg", "tiff"]
|
|
23
28
|
|
|
@@ -51,11 +56,16 @@ def convert_pdf_bytes_to_bytes_by_pypdfium2(pdf_bytes, start_page_id=0, end_page
|
|
|
51
56
|
try:
|
|
52
57
|
end_page_id = get_end_page_id(end_page_id, len(pdf))
|
|
53
58
|
|
|
54
|
-
#
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
+
# 逐页导入,失败则跳过
|
|
60
|
+
output_index = 0
|
|
61
|
+
for page_index in range(start_page_id, end_page_id + 1):
|
|
62
|
+
try:
|
|
63
|
+
output_pdf.import_pages(pdf, pages=[page_index])
|
|
64
|
+
output_index += 1
|
|
65
|
+
except Exception as page_error:
|
|
66
|
+
output_pdf.del_page(output_index)
|
|
67
|
+
logger.warning(f"Failed to import page {page_index}: {page_error}, skipping this page.")
|
|
68
|
+
continue
|
|
59
69
|
|
|
60
70
|
# 将新PDF保存到内存缓冲区
|
|
61
71
|
output_buffer = io.BytesIO()
|
|
@@ -66,7 +76,6 @@ def convert_pdf_bytes_to_bytes_by_pypdfium2(pdf_bytes, start_page_id=0, end_page
|
|
|
66
76
|
except Exception as e:
|
|
67
77
|
logger.warning(f"Error in converting PDF bytes: {e}, Using original PDF bytes.")
|
|
68
78
|
output_bytes = pdf_bytes
|
|
69
|
-
|
|
70
79
|
pdf.close()
|
|
71
80
|
output_pdf.close()
|
|
72
81
|
return output_bytes
|
mineru/cli/gradio_app.py
CHANGED
|
@@ -274,7 +274,7 @@ def to_pdf(file_path):
|
|
|
274
274
|
|
|
275
275
|
# 更新界面函数
|
|
276
276
|
def update_interface(backend_choice):
|
|
277
|
-
if backend_choice in ["vlm-transformers", "vlm-vllm-async-engine", "vlm-mlx-engine"]:
|
|
277
|
+
if backend_choice in ["vlm-transformers", "vlm-vllm-async-engine", "vlm-lmdeploy-engine", "vlm-mlx-engine"]:
|
|
278
278
|
return gr.update(visible=False), gr.update(visible=False)
|
|
279
279
|
elif backend_choice in ["vlm-http-client"]:
|
|
280
280
|
return gr.update(visible=True), gr.update(visible=False)
|
|
@@ -301,6 +301,13 @@ def update_interface(backend_choice):
|
|
|
301
301
|
help="Enable vLLM engine backend for faster processing.",
|
|
302
302
|
default=False,
|
|
303
303
|
)
|
|
304
|
+
@click.option(
|
|
305
|
+
'--enable-lmdeploy-engine',
|
|
306
|
+
'lmdeploy_engine_enable',
|
|
307
|
+
type=bool,
|
|
308
|
+
help="Enable LMDeploy engine backend for faster processing.",
|
|
309
|
+
default=False,
|
|
310
|
+
)
|
|
304
311
|
@click.option(
|
|
305
312
|
'--enable-api',
|
|
306
313
|
'api_enable',
|
|
@@ -338,7 +345,7 @@ def update_interface(backend_choice):
|
|
|
338
345
|
default='all',
|
|
339
346
|
)
|
|
340
347
|
def main(ctx,
|
|
341
|
-
example_enable, vllm_engine_enable, api_enable, max_convert_pages,
|
|
348
|
+
example_enable, vllm_engine_enable, lmdeploy_engine_enable, api_enable, max_convert_pages,
|
|
342
349
|
server_name, server_port, latex_delimiters_type, **kwargs
|
|
343
350
|
):
|
|
344
351
|
|
|
@@ -367,6 +374,20 @@ def main(ctx,
|
|
|
367
374
|
print("vLLM engine init successfully.")
|
|
368
375
|
except Exception as e:
|
|
369
376
|
logger.exception(e)
|
|
377
|
+
elif lmdeploy_engine_enable:
|
|
378
|
+
try:
|
|
379
|
+
print("Start init LMDeploy engine...")
|
|
380
|
+
from mineru.backend.vlm.vlm_analyze import ModelSingleton
|
|
381
|
+
model_singleton = ModelSingleton()
|
|
382
|
+
predictor = model_singleton.get_model(
|
|
383
|
+
"lmdeploy-engine",
|
|
384
|
+
None,
|
|
385
|
+
None,
|
|
386
|
+
**kwargs
|
|
387
|
+
)
|
|
388
|
+
print("LMDeploy engine init successfully.")
|
|
389
|
+
except Exception as e:
|
|
390
|
+
logger.exception(e)
|
|
370
391
|
suffixes = [f".{suffix}" for suffix in pdf_suffixes + image_suffixes]
|
|
371
392
|
with gr.Blocks() as demo:
|
|
372
393
|
gr.HTML(header)
|
|
@@ -380,6 +401,9 @@ def main(ctx,
|
|
|
380
401
|
if vllm_engine_enable:
|
|
381
402
|
drop_list = ["pipeline", "vlm-vllm-async-engine"]
|
|
382
403
|
preferred_option = "vlm-vllm-async-engine"
|
|
404
|
+
elif lmdeploy_engine_enable:
|
|
405
|
+
drop_list = ["pipeline", "vlm-lmdeploy-engine"]
|
|
406
|
+
preferred_option = "vlm-lmdeploy-engine"
|
|
383
407
|
else:
|
|
384
408
|
drop_list = ["pipeline", "vlm-transformers", "vlm-http-client"]
|
|
385
409
|
if is_mac_os_version_supported():
|
|
@@ -453,4 +477,4 @@ def main(ctx,
|
|
|
453
477
|
|
|
454
478
|
|
|
455
479
|
if __name__ == '__main__':
|
|
456
|
-
main()
|
|
480
|
+
main()
|
mineru/cli/vlm_server.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import click
|
|
2
|
+
import sys
|
|
3
|
+
|
|
4
|
+
from loguru import logger
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def vllm_server():
|
|
8
|
+
from mineru.model.vlm.vllm_server import main
|
|
9
|
+
main()
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def lmdeploy_server():
|
|
13
|
+
from mineru.model.vlm.lmdeploy_server import main
|
|
14
|
+
main()
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@click.command(context_settings=dict(ignore_unknown_options=True, allow_extra_args=True))
|
|
18
|
+
@click.option(
|
|
19
|
+
'-e',
|
|
20
|
+
'--engine',
|
|
21
|
+
'inference_engine',
|
|
22
|
+
type=click.Choice(['auto', 'vllm', 'lmdeploy']),
|
|
23
|
+
default='auto',
|
|
24
|
+
help='Select the inference engine used to accelerate VLM inference, default is "auto".',
|
|
25
|
+
)
|
|
26
|
+
@click.pass_context
|
|
27
|
+
def openai_server(ctx, inference_engine):
|
|
28
|
+
sys.argv = [sys.argv[0]] + ctx.args
|
|
29
|
+
if inference_engine == 'auto':
|
|
30
|
+
try:
|
|
31
|
+
import vllm
|
|
32
|
+
inference_engine = 'vllm'
|
|
33
|
+
logger.info("Using vLLM as the inference engine for VLM server.")
|
|
34
|
+
except ImportError:
|
|
35
|
+
logger.info("vLLM not found, attempting to use LMDeploy as the inference engine for VLM server.")
|
|
36
|
+
try:
|
|
37
|
+
import lmdeploy
|
|
38
|
+
inference_engine = 'lmdeploy'
|
|
39
|
+
# Success message moved after successful import
|
|
40
|
+
logger.info("Using LMDeploy as the inference engine for VLM server.")
|
|
41
|
+
except ImportError:
|
|
42
|
+
logger.error("Neither vLLM nor LMDeploy is installed. Please install at least one of them.")
|
|
43
|
+
sys.exit(1)
|
|
44
|
+
|
|
45
|
+
if inference_engine == 'vllm':
|
|
46
|
+
try:
|
|
47
|
+
import vllm
|
|
48
|
+
except ImportError:
|
|
49
|
+
logger.error("vLLM is not installed. Please install vLLM or choose LMDeploy as the inference engine.")
|
|
50
|
+
sys.exit(1)
|
|
51
|
+
vllm_server()
|
|
52
|
+
elif inference_engine == 'lmdeploy':
|
|
53
|
+
try:
|
|
54
|
+
import lmdeploy
|
|
55
|
+
except ImportError:
|
|
56
|
+
logger.error("LMDeploy is not installed. Please install LMDeploy or choose vLLM as the inference engine.")
|
|
57
|
+
sys.exit(1)
|
|
58
|
+
lmdeploy_server()
|
|
59
|
+
|
|
60
|
+
if __name__ == "__main__":
|
|
61
|
+
openai_server()
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
|
|
4
|
+
from loguru import logger
|
|
5
|
+
|
|
6
|
+
from mineru.backend.vlm.utils import set_lmdeploy_backend
|
|
7
|
+
from mineru.utils.models_download_utils import auto_download_and_get_model_root_path
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def main():
|
|
11
|
+
args = sys.argv[1:]
|
|
12
|
+
|
|
13
|
+
has_port_arg = False
|
|
14
|
+
has_gpu_memory_utilization_arg = False
|
|
15
|
+
has_log_level_arg = False
|
|
16
|
+
device_type = ""
|
|
17
|
+
lm_backend = ""
|
|
18
|
+
|
|
19
|
+
# 检查现有参数
|
|
20
|
+
indices_to_remove = []
|
|
21
|
+
|
|
22
|
+
for i, arg in enumerate(args):
|
|
23
|
+
if arg == "--server-port" or arg.startswith("--server-port="):
|
|
24
|
+
has_port_arg = True
|
|
25
|
+
if arg == "--cache-max-entry-count" or arg.startswith("--cache-max-entry-count="):
|
|
26
|
+
has_gpu_memory_utilization_arg = True
|
|
27
|
+
if arg == "--log-level" or arg.startswith("--log-level="):
|
|
28
|
+
has_log_level_arg = True
|
|
29
|
+
if arg == "--backend" or arg == "--lmdeploy-backend":
|
|
30
|
+
if i + 1 < len(args):
|
|
31
|
+
lm_backend = args[i + 1]
|
|
32
|
+
indices_to_remove.extend([i, i + 1])
|
|
33
|
+
elif arg.startswith("--backend=") or arg.startswith("--lmdeploy-backend="):
|
|
34
|
+
lm_backend = arg.split("=", 1)[1]
|
|
35
|
+
indices_to_remove.append(i)
|
|
36
|
+
if arg == "--device" or arg == "--lmdeploy-device":
|
|
37
|
+
if i + 1 < len(args):
|
|
38
|
+
device_type = args[i + 1]
|
|
39
|
+
indices_to_remove.extend([i, i + 1])
|
|
40
|
+
elif arg.startswith("--device=") or arg.startswith("--lmdeploy-device="):
|
|
41
|
+
device_type = arg.split("=", 1)[1]
|
|
42
|
+
indices_to_remove.append(i)
|
|
43
|
+
|
|
44
|
+
# 从后往前删除,避免索引错位
|
|
45
|
+
for i in sorted(set(indices_to_remove), reverse=True):
|
|
46
|
+
args.pop(i)
|
|
47
|
+
|
|
48
|
+
# 添加默认参数
|
|
49
|
+
if not has_port_arg:
|
|
50
|
+
args.extend(["--server-port", "30000"])
|
|
51
|
+
if not has_gpu_memory_utilization_arg:
|
|
52
|
+
args.extend(["--cache-max-entry-count", "0.5"])
|
|
53
|
+
if not has_log_level_arg:
|
|
54
|
+
args.extend(["--log-level", "ERROR"])
|
|
55
|
+
|
|
56
|
+
device_type = os.getenv("MINERU_LMDEPLOY_DEVICE", device_type)
|
|
57
|
+
if device_type == "":
|
|
58
|
+
device_type = "cuda"
|
|
59
|
+
elif device_type not in ["cuda", "ascend", "maca", "camb"]:
|
|
60
|
+
raise ValueError(f"Unsupported lmdeploy device type: {device_type}")
|
|
61
|
+
lm_backend = os.getenv("MINERU_LMDEPLOY_BACKEND", lm_backend)
|
|
62
|
+
if lm_backend == "":
|
|
63
|
+
lm_backend = set_lmdeploy_backend(device_type)
|
|
64
|
+
elif lm_backend not in ["pytorch", "turbomind"]:
|
|
65
|
+
raise ValueError(f"Unsupported lmdeploy backend: {lm_backend}")
|
|
66
|
+
logger.info(f"lmdeploy device is: {device_type}, lmdeploy backend is: {lm_backend}")
|
|
67
|
+
|
|
68
|
+
if lm_backend == "pytorch":
|
|
69
|
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
args.extend(["--device", device_type])
|
|
73
|
+
args.extend(["--backend", lm_backend])
|
|
74
|
+
|
|
75
|
+
model_path = auto_download_and_get_model_root_path("/", "vlm")
|
|
76
|
+
|
|
77
|
+
# logger.debug(args)
|
|
78
|
+
|
|
79
|
+
# 重构参数,将模型路径作为位置参数
|
|
80
|
+
sys.argv = [sys.argv[0]] + ["serve", "api_server", model_path] + args
|
|
81
|
+
|
|
82
|
+
if os.getenv('OMP_NUM_THREADS') is None:
|
|
83
|
+
os.environ["OMP_NUM_THREADS"] = "1"
|
|
84
|
+
|
|
85
|
+
# 启动 lmdeploy 服务器
|
|
86
|
+
print(f"start lmdeploy server: {sys.argv}")
|
|
87
|
+
|
|
88
|
+
# 使用os.system调用启动lmdeploy服务器
|
|
89
|
+
os.system("lmdeploy " + " ".join(sys.argv[1:]))
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
if __name__ == "__main__":
|
|
93
|
+
main()
|
mineru/utils/check_sys_env.py
CHANGED
|
@@ -13,6 +13,10 @@ def is_mac_environment() -> bool:
|
|
|
13
13
|
return platform.system() == "Darwin"
|
|
14
14
|
|
|
15
15
|
|
|
16
|
+
def is_linux_environment() -> bool:
|
|
17
|
+
return platform.system() == "Linux"
|
|
18
|
+
|
|
19
|
+
|
|
16
20
|
# Detect if CPU is Apple Silicon architecture
|
|
17
21
|
def is_apple_silicon_cpu() -> bool:
|
|
18
22
|
return platform.machine() in ["arm64", "aarch64"]
|
mineru/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "2.6.
|
|
1
|
+
__version__ = "2.6.5"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mineru
|
|
3
|
-
Version: 2.6.
|
|
3
|
+
Version: 2.6.5
|
|
4
4
|
Summary: A practical tool for converting PDF to Markdown
|
|
5
5
|
License: AGPL-3.0
|
|
6
6
|
Project-URL: homepage, https://mineru.net/
|
|
@@ -36,8 +36,9 @@ Requires-Dist: fast-langdetect<0.3.0,>=0.2.3
|
|
|
36
36
|
Requires-Dist: scikit-image<1.0.0,>=0.25.0
|
|
37
37
|
Requires-Dist: openai<3,>=1.70.0
|
|
38
38
|
Requires-Dist: beautifulsoup4<5,>=4.13.5
|
|
39
|
-
Requires-Dist: magika<
|
|
40
|
-
Requires-Dist: mineru-vl-utils<1,>=0.1.
|
|
39
|
+
Requires-Dist: magika<1.1.0,>=0.6.2
|
|
40
|
+
Requires-Dist: mineru-vl-utils<1,>=0.1.17
|
|
41
|
+
Requires-Dist: qwen-vl-utils<1,>=0.0.14
|
|
41
42
|
Provides-Extra: test
|
|
42
43
|
Requires-Dist: mineru[core]; extra == "test"
|
|
43
44
|
Requires-Dist: pytest; extra == "test"
|
|
@@ -46,10 +47,12 @@ Requires-Dist: coverage; extra == "test"
|
|
|
46
47
|
Requires-Dist: fuzzywuzzy; extra == "test"
|
|
47
48
|
Provides-Extra: vlm
|
|
48
49
|
Requires-Dist: torch<3,>=2.6.0; extra == "vlm"
|
|
49
|
-
Requires-Dist: transformers
|
|
50
|
+
Requires-Dist: transformers!=4.57.2,<5.0.0,>=4.51.1; extra == "vlm"
|
|
50
51
|
Requires-Dist: accelerate>=1.5.1; extra == "vlm"
|
|
51
52
|
Provides-Extra: vllm
|
|
52
53
|
Requires-Dist: vllm<0.12,>=0.10.1.1; extra == "vllm"
|
|
54
|
+
Provides-Extra: lmdeploy
|
|
55
|
+
Requires-Dist: lmdeploy<0.12,>=0.10.2; extra == "lmdeploy"
|
|
53
56
|
Provides-Extra: mlx
|
|
54
57
|
Requires-Dist: mlx-vlm<0.4,>=0.3.3; extra == "mlx"
|
|
55
58
|
Provides-Extra: pipeline
|
|
@@ -81,7 +84,8 @@ Requires-Dist: mineru[gradio]; extra == "core"
|
|
|
81
84
|
Requires-Dist: mineru[mlx]; sys_platform == "darwin" and extra == "core"
|
|
82
85
|
Provides-Extra: all
|
|
83
86
|
Requires-Dist: mineru[core]; extra == "all"
|
|
84
|
-
Requires-Dist: mineru[vllm]; extra == "all"
|
|
87
|
+
Requires-Dist: mineru[vllm]; sys_platform == "linux" and extra == "all"
|
|
88
|
+
Requires-Dist: mineru[lmdeploy]; sys_platform == "windows" and extra == "all"
|
|
85
89
|
Dynamic: license-file
|
|
86
90
|
|
|
87
91
|
<div align="center" xmlns="http://www.w3.org/1999/html">
|
|
@@ -130,6 +134,9 @@ Dynamic: license-file
|
|
|
130
134
|
</div>
|
|
131
135
|
|
|
132
136
|
# Changelog
|
|
137
|
+
- 2025/11/26 2.6.5 Release
|
|
138
|
+
- Added support for a new backend vlm-lmdeploy-engine. Its usage is similar to vlm-vllm-(async)engine, but it uses lmdeploy as the inference engine and additionally supports native inference acceleration on Windows platforms compared to vllm.
|
|
139
|
+
|
|
133
140
|
- 2025/11/04 2.6.4 Release
|
|
134
141
|
- Added timeout configuration for PDF image rendering, default is 300 seconds, can be configured via environment variable `MINERU_PDF_RENDER_TIMEOUT` to prevent long blocking of the rendering process caused by some abnormal PDF files.
|
|
135
142
|
- Added CPU thread count configuration options for ONNX models, default is the system CPU core count, can be configured via environment variables `MINERU_INTRA_OP_NUM_THREADS` and `MINERU_INTER_OP_NUM_THREADS` to reduce CPU resource contention conflicts in high concurrency scenarios.
|
|
@@ -718,12 +725,13 @@ A WebUI developed based on Gradio, with a simple interface and only core parsing
|
|
|
718
725
|
<tr>
|
|
719
726
|
<th rowspan="2">Parsing Backend</th>
|
|
720
727
|
<th rowspan="2">pipeline <br> (Accuracy<sup>1</sup> 82+)</th>
|
|
721
|
-
<th colspan="
|
|
728
|
+
<th colspan="5">vlm (Accuracy<sup>1</sup> 90+)</th>
|
|
722
729
|
</tr>
|
|
723
730
|
<tr>
|
|
724
731
|
<th>transformers</th>
|
|
725
732
|
<th>mlx-engine</th>
|
|
726
733
|
<th>vllm-engine / <br>vllm-async-engine</th>
|
|
734
|
+
<th>lmdeploy-engine</th>
|
|
727
735
|
<th>http-client</th>
|
|
728
736
|
</tr>
|
|
729
737
|
</thead>
|
|
@@ -734,40 +742,42 @@ A WebUI developed based on Gradio, with a simple interface and only core parsing
|
|
|
734
742
|
<td>Good compatibility, <br>but slower</td>
|
|
735
743
|
<td>Faster than transformers</td>
|
|
736
744
|
<td>Fast, compatible with the vLLM ecosystem</td>
|
|
737
|
-
<td>
|
|
745
|
+
<td>Fast, compatible with the LMDeploy ecosystem</td>
|
|
746
|
+
<td>Suitable for OpenAI-compatible servers<sup>6</sup></td>
|
|
738
747
|
</tr>
|
|
739
748
|
<tr>
|
|
740
749
|
<th>Operating System</th>
|
|
741
750
|
<td colspan="2" style="text-align:center;">Linux<sup>2</sup> / Windows / macOS</td>
|
|
742
751
|
<td style="text-align:center;">macOS<sup>3</sup></td>
|
|
743
752
|
<td style="text-align:center;">Linux<sup>2</sup> / Windows<sup>4</sup> </td>
|
|
753
|
+
<td style="text-align:center;">Linux<sup>2</sup> / Windows<sup>5</sup> </td>
|
|
744
754
|
<td>Any</td>
|
|
745
755
|
</tr>
|
|
746
756
|
<tr>
|
|
747
757
|
<th>CPU inference support</th>
|
|
748
758
|
<td colspan="2" style="text-align:center;">✅</td>
|
|
749
|
-
<td colspan="
|
|
759
|
+
<td colspan="3" style="text-align:center;">❌</td>
|
|
750
760
|
<td>Not required</td>
|
|
751
761
|
</tr>
|
|
752
762
|
<tr>
|
|
753
763
|
<th>GPU Requirements</th><td colspan="2" style="text-align:center;">Volta or later architectures, 6 GB VRAM or more, or Apple Silicon</td>
|
|
754
764
|
<td>Apple Silicon</td>
|
|
755
|
-
<td>Volta or later architectures, 8 GB VRAM or more</td>
|
|
765
|
+
<td colspan="2" style="text-align:center;">Volta or later architectures, 8 GB VRAM or more</td>
|
|
756
766
|
<td>Not required</td>
|
|
757
767
|
</tr>
|
|
758
768
|
<tr>
|
|
759
769
|
<th>Memory Requirements</th>
|
|
760
|
-
<td colspan="
|
|
770
|
+
<td colspan="5" style="text-align:center;">Minimum 16 GB, 32 GB recommended</td>
|
|
761
771
|
<td>8 GB</td>
|
|
762
772
|
</tr>
|
|
763
773
|
<tr>
|
|
764
774
|
<th>Disk Space Requirements</th>
|
|
765
|
-
<td colspan="
|
|
775
|
+
<td colspan="5" style="text-align:center;">20 GB or more, SSD recommended</td>
|
|
766
776
|
<td>2 GB</td>
|
|
767
777
|
</tr>
|
|
768
778
|
<tr>
|
|
769
779
|
<th>Python Version</th>
|
|
770
|
-
<td colspan="
|
|
780
|
+
<td colspan="6" style="text-align:center;">3.10-3.13<sup>7</sup></td>
|
|
771
781
|
</tr>
|
|
772
782
|
</tbody>
|
|
773
783
|
</table>
|
|
@@ -776,7 +786,9 @@ A WebUI developed based on Gradio, with a simple interface and only core parsing
|
|
|
776
786
|
<sup>2</sup> Linux supports only distributions released in 2019 or later.
|
|
777
787
|
<sup>3</sup> MLX requires macOS 13.5 or later, recommended for use with version 14.0 or higher.
|
|
778
788
|
<sup>4</sup> Windows vLLM support via WSL2(Windows Subsystem for Linux).
|
|
779
|
-
<sup>5</sup>
|
|
789
|
+
<sup>5</sup> Windows LMDeploy can only use the `turbomind` backend, which is slightly slower than the `pytorch` backend. If performance is critical, it is recommended to run it via WSL2.
|
|
790
|
+
<sup>6</sup> Servers compatible with the OpenAI API, such as local or remote model services deployed via inference frameworks like `vLLM`, `SGLang`, or `LMDeploy`.
|
|
791
|
+
<sup>7</sup> Windows + LMDeploy only supports Python versions 3.10–3.12, as the critical dependency `ray` does not yet support Python 3.13 on Windows.
|
|
780
792
|
|
|
781
793
|
|
|
782
794
|
### Install MinerU
|
|
@@ -796,8 +808,8 @@ uv pip install -e .[core]
|
|
|
796
808
|
```
|
|
797
809
|
|
|
798
810
|
> [!TIP]
|
|
799
|
-
> `mineru[core]` includes all core features except `vLLM` acceleration, compatible with Windows / Linux / macOS systems, suitable for most users.
|
|
800
|
-
> If you need to use `vLLM` acceleration for VLM model inference or install a lightweight client on edge devices, please refer to the documentation [Extension Modules Installation Guide](https://opendatalab.github.io/MinerU/quick_start/extension_modules/).
|
|
811
|
+
> `mineru[core]` includes all core features except `vLLM`/`LMDeploy` acceleration, compatible with Windows / Linux / macOS systems, suitable for most users.
|
|
812
|
+
> If you need to use `vLLM`/`LMDeploy` acceleration for VLM model inference or install a lightweight client on edge devices, please refer to the documentation [Extension Modules Installation Guide](https://opendatalab.github.io/MinerU/quick_start/extension_modules/).
|
|
801
813
|
|
|
802
814
|
---
|
|
803
815
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
mineru/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
2
|
-
mineru/version.py,sha256=
|
|
2
|
+
mineru/version.py,sha256=b8L3dijps7oaMPmOpJzOuXwvOcbIuro9wWmuPwiL87o,22
|
|
3
3
|
mineru/backend/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
4
4
|
mineru/backend/utils.py,sha256=GLJU3IznDmhE1_qNmkU1UOtsuskIHBezgsEVO6Uar-Y,698
|
|
5
5
|
mineru/backend/pipeline/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
@@ -13,17 +13,17 @@ mineru/backend/pipeline/pipeline_magic_model.py,sha256=w8jGx8f6yZN0Wf2yPP3L9rYKc
|
|
|
13
13
|
mineru/backend/pipeline/pipeline_middle_json_mkcontent.py,sha256=YlnEbbUnkniZXS13aLo5mjfFQvQM5SrIVvTAGBZsLmw,14478
|
|
14
14
|
mineru/backend/vlm/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
15
15
|
mineru/backend/vlm/model_output_to_middle_json.py,sha256=AqYX44gS9crUO_t7SuUatD71EVjow6pI6yA2Ik3gQ0s,5139
|
|
16
|
-
mineru/backend/vlm/utils.py,sha256=
|
|
17
|
-
mineru/backend/vlm/vlm_analyze.py,sha256=
|
|
16
|
+
mineru/backend/vlm/utils.py,sha256=taiPNKtsykImUYkkosk1CjxFIJEutygK8iZTLly-ZqU,3905
|
|
17
|
+
mineru/backend/vlm/vlm_analyze.py,sha256=wP3vuYGVec0hRsDAuzfSm2HD4Muu7wSWL767qxd_yqw,11690
|
|
18
18
|
mineru/backend/vlm/vlm_magic_model.py,sha256=Pd0sOr7G1crAJIVeq6h_03gNSuxmV5U8dvGTGT_rrjs,23452
|
|
19
19
|
mineru/backend/vlm/vlm_middle_json_mkcontent.py,sha256=5V-AU9KkxxMn0DDSQBrb15I4GVpEyiQy8uNI_tQhS6M,13498
|
|
20
20
|
mineru/cli/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
21
|
-
mineru/cli/client.py,sha256=
|
|
22
|
-
mineru/cli/common.py,sha256=
|
|
21
|
+
mineru/cli/client.py,sha256=__CQknekVeq6s72JzHQRPSpR7mfNaO-ob9wq6oiEj6s,7047
|
|
22
|
+
mineru/cli/common.py,sha256=zhNOJCOnTSMbWdUWSZG-nf0odv5vBRtdZYZ1UbUPH3g,14369
|
|
23
23
|
mineru/cli/fast_api.py,sha256=t5bda769VbM5iokAboiJfPIOnm-r5GTFReE-KQy8L3g,10941
|
|
24
|
-
mineru/cli/gradio_app.py,sha256=
|
|
24
|
+
mineru/cli/gradio_app.py,sha256=EUPuRHHCOECrE3E3VNEeuMDYeC3nicurOYfk8YJSOMw,15646
|
|
25
25
|
mineru/cli/models_download.py,sha256=LNfoIpUlJM7m7qb2SiCxtjMDw4jILBQtZwNP2JoY81U,4815
|
|
26
|
-
mineru/cli/
|
|
26
|
+
mineru/cli/vlm_server.py,sha256=27HaqO3wpMXSA_nA3CC6JOBTHK3q66SP00cD6m9HuQE,1974
|
|
27
27
|
mineru/data/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
28
28
|
mineru/data/data_reader_writer/__init__.py,sha256=9qnGNrsuGBMwwfsQy6oChdkz--a_LPdYWE0VZZr0yr4,490
|
|
29
29
|
mineru/data/data_reader_writer/base.py,sha256=nqmAcdHOXMOJO6RAT3ILligDFaw8Op0STyCw5yOzAbI,1706
|
|
@@ -145,15 +145,16 @@ mineru/model/utils/tools/infer/predict_det.py,sha256=vYQREn7vELXxBsr72CCCVvm1gwV
|
|
|
145
145
|
mineru/model/utils/tools/infer/predict_rec.py,sha256=-BH93JDisu0kT6CyHA4plUOKcb2L-UvDk7Tein5uwt4,19209
|
|
146
146
|
mineru/model/utils/tools/infer/predict_system.py,sha256=hkegkn6hq2v2zqHVAP615-k-fkTS8swRYSbZeoqmSI8,3822
|
|
147
147
|
mineru/model/utils/tools/infer/pytorchocr_utility.py,sha256=i1PFN-_kefJUUZ4Vk7igs1TU8gfErTDlDXY6-8Uaurw,9323
|
|
148
|
-
mineru/model/
|
|
149
|
-
mineru/model/
|
|
148
|
+
mineru/model/vlm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
149
|
+
mineru/model/vlm/lmdeploy_server.py,sha256=PvxJNcUIKB8VzWMDXeV1t0SHSgz_ULO36ZAzJbppz90,3262
|
|
150
|
+
mineru/model/vlm/vllm_server.py,sha256=w5ddusPbcVaEoWAo_BRjmwv_Ywxrc_bCMRhxihoyykY,2263
|
|
150
151
|
mineru/resources/header.html,sha256=PUselBXLBn8gfeP3zwEtj6zIxfhcCN4vN_B796nQFNQ,4410
|
|
151
152
|
mineru/resources/fasttext-langdetect/lid.176.ftz,sha256=jzRyz-hzintgmejpmcPL-uDc0VaWqsfXc4qAOdtgPoM,938013
|
|
152
153
|
mineru/utils/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
153
154
|
mineru/utils/block_pre_proc.py,sha256=uGBmxf2MR9bplTnQI8xHjCI-kj3plRhJr0hcWKidbOQ,9632
|
|
154
155
|
mineru/utils/block_sort.py,sha256=5e1mOLB3W7xu5Y1hmhvGSHPL_aQ41R_4VXcP4vjYAOU,12976
|
|
155
156
|
mineru/utils/boxbase.py,sha256=moP660AmZq_udHEsfvFkTQdJ4gjrrBwN7t0Enx7CIL8,6903
|
|
156
|
-
mineru/utils/check_sys_env.py,sha256=
|
|
157
|
+
mineru/utils/check_sys_env.py,sha256=TRjzg4xWyoSGrgv4KaP225A-99xBgLAfZ1cPcGqrBAA,1191
|
|
157
158
|
mineru/utils/cli_parser.py,sha256=4seFAu1kulsYnw6WM2q_cxgEOt2tErZVkI-LNEF_kGw,1445
|
|
158
159
|
mineru/utils/config_reader.py,sha256=IRVWTpBnbnRpck6eXZUKw-fcLt7hon5S4uqWW-RBb1w,4075
|
|
159
160
|
mineru/utils/cut_image.py,sha256=g3m4nfcJNWlxi-P0kpXTtlmspXkMcLCfGwmYuQ-Z2hE,751
|
|
@@ -178,9 +179,9 @@ mineru/utils/run_async.py,sha256=rPeP4BCZerR8VByRDhiYzfZiahLVqoZEBVAS54dAjNg,128
|
|
|
178
179
|
mineru/utils/span_block_fix.py,sha256=0eVQjJCrT03woRt9hoh6Uu42Tp1dacfGTv2x3B9qq94,8797
|
|
179
180
|
mineru/utils/span_pre_proc.py,sha256=h41q2uQajI0xQbc_30hqaju1dv3oVYxBAlKgURl8HIc,13692
|
|
180
181
|
mineru/utils/table_merge.py,sha256=d98zNbM1ZQ8V1kUt6RugParNUNPv7DGL-XKIzR3iJVQ,15360
|
|
181
|
-
mineru-2.6.
|
|
182
|
-
mineru-2.6.
|
|
183
|
-
mineru-2.6.
|
|
184
|
-
mineru-2.6.
|
|
185
|
-
mineru-2.6.
|
|
186
|
-
mineru-2.6.
|
|
182
|
+
mineru-2.6.5.dist-info/licenses/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
|
|
183
|
+
mineru-2.6.5.dist-info/METADATA,sha256=BUj9fYR_NiRpYGqXWd3J_fOTE8IN0bdl0PgY6FUGVcg,72362
|
|
184
|
+
mineru-2.6.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
185
|
+
mineru-2.6.5.dist-info/entry_points.txt,sha256=JbtrCPhx1T32s7TONUsteKg-24ZwRT1HSiFtW5jypVw,376
|
|
186
|
+
mineru-2.6.5.dist-info/top_level.txt,sha256=zuGQfZcbsHv4I4oKI9gaKPqEWBFm6xJroKuug2LnKP8,7
|
|
187
|
+
mineru-2.6.5.dist-info/RECORD,,
|
|
@@ -2,5 +2,7 @@
|
|
|
2
2
|
mineru = mineru.cli:client.main
|
|
3
3
|
mineru-api = mineru.cli.fast_api:main
|
|
4
4
|
mineru-gradio = mineru.cli.gradio_app:main
|
|
5
|
+
mineru-lmdeploy-server = mineru.cli.vlm_server:lmdeploy_server
|
|
5
6
|
mineru-models-download = mineru.cli.models_download:download_models
|
|
6
|
-
mineru-
|
|
7
|
+
mineru-openai-server = mineru.cli.vlm_server:openai_server
|
|
8
|
+
mineru-vllm-server = mineru.cli.vlm_server:vllm_server
|
mineru/cli/vlm_vllm_server.py
DELETED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|