mineru 2.5.0__py3-none-any.whl → 2.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mineru/backend/vlm/vlm_analyze.py +9 -1
- mineru/model/vlm_vllm_model/server.py +8 -0
- mineru/version.py +1 -1
- {mineru-2.5.0.dist-info → mineru-2.5.1.dist-info}/METADATA +3 -3
- {mineru-2.5.0.dist-info → mineru-2.5.1.dist-info}/RECORD +9 -9
- {mineru-2.5.0.dist-info → mineru-2.5.1.dist-info}/WHEEL +0 -0
- {mineru-2.5.0.dist-info → mineru-2.5.1.dist-info}/entry_points.txt +0 -0
- {mineru-2.5.0.dist-info → mineru-2.5.1.dist-info}/licenses/LICENSE.md +0 -0
- {mineru-2.5.0.dist-info → mineru-2.5.1.dist-info}/top_level.txt +0 -0
|
@@ -14,6 +14,7 @@ from ...utils.model_utils import get_vram
|
|
|
14
14
|
from ...utils.models_download_utils import auto_download_and_get_model_root_path
|
|
15
15
|
|
|
16
16
|
from mineru_vl_utils import MinerUClient
|
|
17
|
+
from packaging import version
|
|
17
18
|
|
|
18
19
|
|
|
19
20
|
class ModelSingleton:
|
|
@@ -52,7 +53,6 @@ class ModelSingleton:
|
|
|
52
53
|
except ImportError:
|
|
53
54
|
raise ImportError("Please install transformers to use the transformers backend.")
|
|
54
55
|
|
|
55
|
-
from packaging import version
|
|
56
56
|
if version.parse(transformers_version) >= version.parse("4.56.0"):
|
|
57
57
|
dtype_key = "dtype"
|
|
58
58
|
else:
|
|
@@ -88,24 +88,32 @@ class ModelSingleton:
|
|
|
88
88
|
elif backend == "vllm-engine":
|
|
89
89
|
try:
|
|
90
90
|
import vllm
|
|
91
|
+
vllm_version = vllm.__version__
|
|
92
|
+
from mineru_vl_utils import MinerULogitsProcessor
|
|
91
93
|
except ImportError:
|
|
92
94
|
raise ImportError("Please install vllm to use the vllm-engine backend.")
|
|
93
95
|
if "gpu_memory_utilization" not in kwargs:
|
|
94
96
|
kwargs["gpu_memory_utilization"] = 0.5
|
|
95
97
|
if "model" not in kwargs:
|
|
96
98
|
kwargs["model"] = model_path
|
|
99
|
+
if version.parse(vllm_version) >= version.parse("0.10.1") and "logits_processors" not in kwargs:
|
|
100
|
+
kwargs["logits_processors"] = [MinerULogitsProcessor]
|
|
97
101
|
# 使用kwargs为 vllm初始化参数
|
|
98
102
|
vllm_llm = vllm.LLM(**kwargs)
|
|
99
103
|
elif backend == "vllm-async-engine":
|
|
100
104
|
try:
|
|
101
105
|
from vllm.engine.arg_utils import AsyncEngineArgs
|
|
102
106
|
from vllm.v1.engine.async_llm import AsyncLLM
|
|
107
|
+
from vllm import __version__ as vllm_version
|
|
108
|
+
from mineru_vl_utils import MinerULogitsProcessor
|
|
103
109
|
except ImportError:
|
|
104
110
|
raise ImportError("Please install vllm to use the vllm-async-engine backend.")
|
|
105
111
|
if "gpu_memory_utilization" not in kwargs:
|
|
106
112
|
kwargs["gpu_memory_utilization"] = 0.5
|
|
107
113
|
if "model" not in kwargs:
|
|
108
114
|
kwargs["model"] = model_path
|
|
115
|
+
if version.parse(vllm_version) >= version.parse("0.10.1") and "logits_processors" not in kwargs:
|
|
116
|
+
kwargs["logits_processors"] = [MinerULogitsProcessor]
|
|
109
117
|
# 使用kwargs为 vllm初始化参数
|
|
110
118
|
vllm_async_llm = AsyncLLM.from_engine_args(AsyncEngineArgs(**kwargs))
|
|
111
119
|
self._models[key] = MinerUClient(
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
import sys
|
|
2
2
|
|
|
3
3
|
from mineru.utils.models_download_utils import auto_download_and_get_model_root_path
|
|
4
|
+
|
|
4
5
|
from vllm.entrypoints.cli.main import main as vllm_main
|
|
6
|
+
from vllm import __version__ as vllm_version
|
|
7
|
+
from packaging import version
|
|
5
8
|
|
|
6
9
|
|
|
7
10
|
def main():
|
|
@@ -9,6 +12,7 @@ def main():
|
|
|
9
12
|
|
|
10
13
|
has_port_arg = False
|
|
11
14
|
has_gpu_memory_utilization_arg = False
|
|
15
|
+
has_logits_processors_arg = False
|
|
12
16
|
model_path = None
|
|
13
17
|
model_arg_indices = []
|
|
14
18
|
|
|
@@ -18,6 +22,8 @@ def main():
|
|
|
18
22
|
has_port_arg = True
|
|
19
23
|
if arg == "--gpu-memory-utilization" or arg.startswith("--gpu-memory-utilization="):
|
|
20
24
|
has_gpu_memory_utilization_arg = True
|
|
25
|
+
if arg == "--logits-processors" or arg.startswith("--logits-processors="):
|
|
26
|
+
has_logits_processors_arg = True
|
|
21
27
|
if arg == "--model":
|
|
22
28
|
if i + 1 < len(args):
|
|
23
29
|
model_path = args[i + 1]
|
|
@@ -38,6 +44,8 @@ def main():
|
|
|
38
44
|
args.extend(["--gpu-memory-utilization", "0.5"])
|
|
39
45
|
if not model_path:
|
|
40
46
|
model_path = auto_download_and_get_model_root_path("/", "vlm")
|
|
47
|
+
if not has_logits_processors_arg and version.parse(vllm_version) >= version.parse("0.10.1"):
|
|
48
|
+
args.extend(["--logits-processors", "mineru_vl_utils:MinerULogitsProcessor"])
|
|
41
49
|
|
|
42
50
|
# 重构参数,将模型路径作为位置参数
|
|
43
51
|
sys.argv = [sys.argv[0]] + ["serve", model_path] + args
|
mineru/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "2.5.
|
|
1
|
+
__version__ = "2.5.1"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mineru
|
|
3
|
-
Version: 2.5.
|
|
3
|
+
Version: 2.5.1
|
|
4
4
|
Summary: A practical tool for converting PDF to Markdown
|
|
5
5
|
License: AGPL-3.0
|
|
6
6
|
Project-URL: homepage, https://mineru.net/
|
|
@@ -37,7 +37,7 @@ Requires-Dist: scikit-image<1.0.0,>=0.25.0
|
|
|
37
37
|
Requires-Dist: openai<2,>=1.70.0
|
|
38
38
|
Requires-Dist: beautifulsoup4<5,>=4.13.5
|
|
39
39
|
Requires-Dist: magika<0.7.0,>=0.6.2
|
|
40
|
-
Requires-Dist: mineru-vl-utils<1,>=0.1.
|
|
40
|
+
Requires-Dist: mineru-vl-utils<1,>=0.1.7
|
|
41
41
|
Provides-Extra: test
|
|
42
42
|
Requires-Dist: mineru[core]; extra == "test"
|
|
43
43
|
Requires-Dist: pytest; extra == "test"
|
|
@@ -127,7 +127,7 @@ Dynamic: license-file
|
|
|
127
127
|
|
|
128
128
|
# Changelog
|
|
129
129
|
|
|
130
|
-
- 2025/09/19 2.5.
|
|
130
|
+
- 2025/09/19 2.5.1 Released
|
|
131
131
|
|
|
132
132
|
We are officially releasing MinerU2.5, currently the most powerful multimodal large model for document parsing.
|
|
133
133
|
With only 1.2B parameters, MinerU2.5's accuracy on the OmniDocBench benchmark comprehensively surpasses top-tier multimodal models like Gemini 2.5 Pro, GPT-4o, and Qwen2.5-VL-72B. It also significantly outperforms leading specialized models such as dots.ocr, MonkeyOCR, and PP-StructureV3.
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
mineru/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
2
|
-
mineru/version.py,sha256=
|
|
2
|
+
mineru/version.py,sha256=PfQ9ThOuZlUZhThya-_PpR02LjazRR6LNSivpta03mM,22
|
|
3
3
|
mineru/backend/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
4
4
|
mineru/backend/pipeline/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
5
5
|
mineru/backend/pipeline/batch_analyze.py,sha256=rp9nHYmuBBytlJIc3oRwqTtgFd5mhRak5UMhQ4mu02Y,21896
|
|
@@ -12,7 +12,7 @@ mineru/backend/pipeline/pipeline_magic_model.py,sha256=w8jGx8f6yZN0Wf2yPP3L9rYKc
|
|
|
12
12
|
mineru/backend/pipeline/pipeline_middle_json_mkcontent.py,sha256=xWWOFmYL6hB8PLrxQFyRJ72dAmTIDHtqiWV-WFUfR44,14081
|
|
13
13
|
mineru/backend/vlm/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
14
14
|
mineru/backend/vlm/model_output_to_middle_json.py,sha256=e4Yc98_Cth2cjVPybPGehD5cpjGcTka4D2qKKrP_qqo,5121
|
|
15
|
-
mineru/backend/vlm/vlm_analyze.py,sha256=
|
|
15
|
+
mineru/backend/vlm/vlm_analyze.py,sha256=jR5DTHT0qtOLVos5-qSGPvlRCdYREoAshFej98FY5ao,8282
|
|
16
16
|
mineru/backend/vlm/vlm_magic_model.py,sha256=o1WKwgArV0f4lp7ufmSq9zRZaM5bnfOdx1AQtKnosro,16379
|
|
17
17
|
mineru/backend/vlm/vlm_middle_json_mkcontent.py,sha256=DX2TRpfSIqpuhWqou5QXNtCW40ddQi0kdQxXi4QgzKs,13375
|
|
18
18
|
mineru/cli/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
@@ -143,7 +143,7 @@ mineru/model/table/rec/unet_table/utils.py,sha256=CYAqJW0wePJk4NAemb8W203N7E32v0
|
|
|
143
143
|
mineru/model/table/rec/unet_table/utils_table_line_rec.py,sha256=zrCdPwI4M8nu0FEfd7lRJAe0z8kYq3KFbzwElM82USE,11174
|
|
144
144
|
mineru/model/table/rec/unet_table/utils_table_recover.py,sha256=XksJsY82ZS0kqUnNT-jvaYzxJ3V3svMSzj0puwIau1k,10651
|
|
145
145
|
mineru/model/vlm_vllm_model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
146
|
-
mineru/model/vlm_vllm_model/server.py,sha256=
|
|
146
|
+
mineru/model/vlm_vllm_model/server.py,sha256=v07x1esggP7Wbw0r8NeAbqG2kuJN9x5Xl2CmE2x0qzk,2003
|
|
147
147
|
mineru/resources/header.html,sha256=NO8ZZdCYLqu_E72AtNcuRnA2NbFBamScjjGhtg9PKiM,4409
|
|
148
148
|
mineru/resources/fasttext-langdetect/lid.176.ftz,sha256=jzRyz-hzintgmejpmcPL-uDc0VaWqsfXc4qAOdtgPoM,938013
|
|
149
149
|
mineru/utils/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
@@ -172,9 +172,9 @@ mineru/utils/run_async.py,sha256=rPeP4BCZerR8VByRDhiYzfZiahLVqoZEBVAS54dAjNg,128
|
|
|
172
172
|
mineru/utils/span_block_fix.py,sha256=0eVQjJCrT03woRt9hoh6Uu42Tp1dacfGTv2x3B9qq94,8797
|
|
173
173
|
mineru/utils/span_pre_proc.py,sha256=h41q2uQajI0xQbc_30hqaju1dv3oVYxBAlKgURl8HIc,13692
|
|
174
174
|
mineru/utils/table_merge.py,sha256=zYUpYLrfhBCnbHCYZi6rG8-s38NDnTbiNTObvLdYwJk,11494
|
|
175
|
-
mineru-2.5.
|
|
176
|
-
mineru-2.5.
|
|
177
|
-
mineru-2.5.
|
|
178
|
-
mineru-2.5.
|
|
179
|
-
mineru-2.5.
|
|
180
|
-
mineru-2.5.
|
|
175
|
+
mineru-2.5.1.dist-info/licenses/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
|
|
176
|
+
mineru-2.5.1.dist-info/METADATA,sha256=BnhjCe7hE1EMEZdmz-YQgX0Mq5LANyNWHayKFaJztw0,64460
|
|
177
|
+
mineru-2.5.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
178
|
+
mineru-2.5.1.dist-info/entry_points.txt,sha256=luXmbhPiZK_tKlRgWuYOaW_V6EFpG-yJcAevVv9MEqE,252
|
|
179
|
+
mineru-2.5.1.dist-info/top_level.txt,sha256=zuGQfZcbsHv4I4oKI9gaKPqEWBFm6xJroKuug2LnKP8,7
|
|
180
|
+
mineru-2.5.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|