mineru 2.6.0__py3-none-any.whl → 2.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mineru/backend/vlm/utils.py +2 -2
- mineru/backend/vlm/vlm_analyze.py +7 -5
- mineru/model/vlm_vllm_model/server.py +5 -3
- mineru/version.py +1 -1
- {mineru-2.6.0.dist-info → mineru-2.6.1.dist-info}/METADATA +2 -2
- {mineru-2.6.0.dist-info → mineru-2.6.1.dist-info}/RECORD +10 -10
- {mineru-2.6.0.dist-info → mineru-2.6.1.dist-info}/WHEEL +0 -0
- {mineru-2.6.0.dist-info → mineru-2.6.1.dist-info}/entry_points.txt +0 -0
- {mineru-2.6.0.dist-info → mineru-2.6.1.dist-info}/licenses/LICENSE.md +0 -0
- {mineru-2.6.0.dist-info → mineru-2.6.1.dist-info}/top_level.txt +0 -0
mineru/backend/vlm/utils.py
CHANGED
|
@@ -44,7 +44,7 @@ def enable_custom_logits_processors() -> bool:
|
|
|
44
44
|
return True
|
|
45
45
|
|
|
46
46
|
|
|
47
|
-
def
|
|
47
|
+
def set_default_gpu_memory_utilization() -> float:
|
|
48
48
|
from vllm import __version__ as vllm_version
|
|
49
49
|
if version.parse(vllm_version) >= version.parse("0.11.0"):
|
|
50
50
|
return 0.7
|
|
@@ -52,7 +52,7 @@ def set_defult_gpu_memory_utilization() -> float:
|
|
|
52
52
|
return 0.5
|
|
53
53
|
|
|
54
54
|
|
|
55
|
-
def
|
|
55
|
+
def set_default_batch_size() -> int:
|
|
56
56
|
try:
|
|
57
57
|
device = get_device()
|
|
58
58
|
vram = get_vram(device)
|
|
@@ -4,7 +4,7 @@ import time
|
|
|
4
4
|
|
|
5
5
|
from loguru import logger
|
|
6
6
|
|
|
7
|
-
from .utils import enable_custom_logits_processors,
|
|
7
|
+
from .utils import enable_custom_logits_processors, set_default_gpu_memory_utilization, set_default_batch_size
|
|
8
8
|
from .model_output_to_middle_json import result_to_middle_json
|
|
9
9
|
from ...data.data_reader_writer import DataWriter
|
|
10
10
|
from mineru.utils.pdf_image_tools import load_images_from_pdf
|
|
@@ -74,9 +74,11 @@ class ModelSingleton:
|
|
|
74
74
|
use_fast=True,
|
|
75
75
|
)
|
|
76
76
|
if batch_size == 0:
|
|
77
|
-
batch_size =
|
|
77
|
+
batch_size = set_default_batch_size()
|
|
78
78
|
else:
|
|
79
|
-
os.
|
|
79
|
+
if os.getenv('OMP_NUM_THREADS') is None:
|
|
80
|
+
os.environ["OMP_NUM_THREADS"] = "1"
|
|
81
|
+
|
|
80
82
|
if backend == "vllm-engine":
|
|
81
83
|
try:
|
|
82
84
|
import vllm
|
|
@@ -84,7 +86,7 @@ class ModelSingleton:
|
|
|
84
86
|
except ImportError:
|
|
85
87
|
raise ImportError("Please install vllm to use the vllm-engine backend.")
|
|
86
88
|
if "gpu_memory_utilization" not in kwargs:
|
|
87
|
-
kwargs["gpu_memory_utilization"] =
|
|
89
|
+
kwargs["gpu_memory_utilization"] = set_default_gpu_memory_utilization()
|
|
88
90
|
if "model" not in kwargs:
|
|
89
91
|
kwargs["model"] = model_path
|
|
90
92
|
if enable_custom_logits_processors() and ("logits_processors" not in kwargs):
|
|
@@ -99,7 +101,7 @@ class ModelSingleton:
|
|
|
99
101
|
except ImportError:
|
|
100
102
|
raise ImportError("Please install vllm to use the vllm-async-engine backend.")
|
|
101
103
|
if "gpu_memory_utilization" not in kwargs:
|
|
102
|
-
kwargs["gpu_memory_utilization"] =
|
|
104
|
+
kwargs["gpu_memory_utilization"] = set_default_gpu_memory_utilization()
|
|
103
105
|
if "model" not in kwargs:
|
|
104
106
|
kwargs["model"] = model_path
|
|
105
107
|
if enable_custom_logits_processors() and ("logits_processors" not in kwargs):
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import sys
|
|
3
3
|
|
|
4
|
-
from mineru.backend.vlm.
|
|
4
|
+
from mineru.backend.vlm.utils import set_default_gpu_memory_utilization, enable_custom_logits_processors
|
|
5
5
|
from mineru.utils.models_download_utils import auto_download_and_get_model_root_path
|
|
6
6
|
|
|
7
7
|
from vllm.entrypoints.cli.main import main as vllm_main
|
|
@@ -43,7 +43,8 @@ def main():
|
|
|
43
43
|
if not has_port_arg:
|
|
44
44
|
args.extend(["--port", "30000"])
|
|
45
45
|
if not has_gpu_memory_utilization_arg:
|
|
46
|
-
|
|
46
|
+
gpu_memory_utilization = str(set_default_gpu_memory_utilization())
|
|
47
|
+
args.extend(["--gpu-memory-utilization", gpu_memory_utilization])
|
|
47
48
|
if not model_path:
|
|
48
49
|
model_path = auto_download_and_get_model_root_path("/", "vlm")
|
|
49
50
|
if (not has_logits_processors_arg) and custom_logits_processors:
|
|
@@ -52,7 +53,8 @@ def main():
|
|
|
52
53
|
# 重构参数,将模型路径作为位置参数
|
|
53
54
|
sys.argv = [sys.argv[0]] + ["serve", model_path] + args
|
|
54
55
|
|
|
55
|
-
os.
|
|
56
|
+
if os.getenv('OMP_NUM_THREADS') is None:
|
|
57
|
+
os.environ["OMP_NUM_THREADS"] = "1"
|
|
56
58
|
|
|
57
59
|
# 启动vllm服务器
|
|
58
60
|
print(f"start vllm server: {sys.argv}")
|
mineru/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "2.6.
|
|
1
|
+
__version__ = "2.6.1"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mineru
|
|
3
|
-
Version: 2.6.
|
|
3
|
+
Version: 2.6.1
|
|
4
4
|
Summary: A practical tool for converting PDF to Markdown
|
|
5
5
|
License: AGPL-3.0
|
|
6
6
|
Project-URL: homepage, https://mineru.net/
|
|
@@ -127,7 +127,7 @@ Dynamic: license-file
|
|
|
127
127
|
</div>
|
|
128
128
|
|
|
129
129
|
# Changelog
|
|
130
|
-
- 2025/10/24 2.6.
|
|
130
|
+
- 2025/10/24 2.6.1 Release
|
|
131
131
|
- `pipeline` backend optimizations
|
|
132
132
|
- Added experimental support for Chinese formulas, which can be enabled by setting the environment variable `export MINERU_FORMULA_CH_SUPPORT=1`. This feature may cause a slight decrease in MFR speed and failures in recognizing some long formulas. It is recommended to enable it only when parsing Chinese formulas is needed. To disable this feature, set the environment variable to `0`.
|
|
133
133
|
- `OCR` speed significantly improved by 200%~300%, thanks to the optimization solution provided by [@cjsdurj](https://github.com/cjsdurj)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
mineru/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
2
|
-
mineru/version.py,sha256=
|
|
2
|
+
mineru/version.py,sha256=yv0wJuq7dd_PlBhLN8iuPUYVsoACKuk2R3Gg5WU-tHk,22
|
|
3
3
|
mineru/backend/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
4
4
|
mineru/backend/utils.py,sha256=GLJU3IznDmhE1_qNmkU1UOtsuskIHBezgsEVO6Uar-Y,698
|
|
5
5
|
mineru/backend/pipeline/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
@@ -13,8 +13,8 @@ mineru/backend/pipeline/pipeline_magic_model.py,sha256=w8jGx8f6yZN0Wf2yPP3L9rYKc
|
|
|
13
13
|
mineru/backend/pipeline/pipeline_middle_json_mkcontent.py,sha256=xWWOFmYL6hB8PLrxQFyRJ72dAmTIDHtqiWV-WFUfR44,14081
|
|
14
14
|
mineru/backend/vlm/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
15
15
|
mineru/backend/vlm/model_output_to_middle_json.py,sha256=AqYX44gS9crUO_t7SuUatD71EVjow6pI6yA2Ik3gQ0s,5139
|
|
16
|
-
mineru/backend/vlm/utils.py,sha256=
|
|
17
|
-
mineru/backend/vlm/vlm_analyze.py,sha256=
|
|
16
|
+
mineru/backend/vlm/utils.py,sha256=woGqyRI4S7p69daLCU07XNXWTV27aLf7YBjjVH1x-5o,2794
|
|
17
|
+
mineru/backend/vlm/vlm_analyze.py,sha256=nzwTGndwZFfTEvHppakyDKZxph7SYOuUZW3johY5F8c,8154
|
|
18
18
|
mineru/backend/vlm/vlm_magic_model.py,sha256=Pd0sOr7G1crAJIVeq6h_03gNSuxmV5U8dvGTGT_rrjs,23452
|
|
19
19
|
mineru/backend/vlm/vlm_middle_json_mkcontent.py,sha256=Ie95XpwTgi7EmidcwE_scvXMRQjE2xASU_Rm_F8EP-I,13377
|
|
20
20
|
mineru/cli/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
@@ -158,7 +158,7 @@ mineru/model/utils/tools/infer/predict_rec.py,sha256=-BH93JDisu0kT6CyHA4plUOKcb2
|
|
|
158
158
|
mineru/model/utils/tools/infer/predict_system.py,sha256=hkegkn6hq2v2zqHVAP615-k-fkTS8swRYSbZeoqmSI8,3822
|
|
159
159
|
mineru/model/utils/tools/infer/pytorchocr_utility.py,sha256=i1PFN-_kefJUUZ4Vk7igs1TU8gfErTDlDXY6-8Uaurw,9323
|
|
160
160
|
mineru/model/vlm_vllm_model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
161
|
-
mineru/model/vlm_vllm_model/server.py,sha256=
|
|
161
|
+
mineru/model/vlm_vllm_model/server.py,sha256=w5ddusPbcVaEoWAo_BRjmwv_Ywxrc_bCMRhxihoyykY,2263
|
|
162
162
|
mineru/resources/header.html,sha256=PUselBXLBn8gfeP3zwEtj6zIxfhcCN4vN_B796nQFNQ,4410
|
|
163
163
|
mineru/resources/fasttext-langdetect/lid.176.ftz,sha256=jzRyz-hzintgmejpmcPL-uDc0VaWqsfXc4qAOdtgPoM,938013
|
|
164
164
|
mineru/utils/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
@@ -187,9 +187,9 @@ mineru/utils/run_async.py,sha256=rPeP4BCZerR8VByRDhiYzfZiahLVqoZEBVAS54dAjNg,128
|
|
|
187
187
|
mineru/utils/span_block_fix.py,sha256=0eVQjJCrT03woRt9hoh6Uu42Tp1dacfGTv2x3B9qq94,8797
|
|
188
188
|
mineru/utils/span_pre_proc.py,sha256=h41q2uQajI0xQbc_30hqaju1dv3oVYxBAlKgURl8HIc,13692
|
|
189
189
|
mineru/utils/table_merge.py,sha256=d98zNbM1ZQ8V1kUt6RugParNUNPv7DGL-XKIzR3iJVQ,15360
|
|
190
|
-
mineru-2.6.
|
|
191
|
-
mineru-2.6.
|
|
192
|
-
mineru-2.6.
|
|
193
|
-
mineru-2.6.
|
|
194
|
-
mineru-2.6.
|
|
195
|
-
mineru-2.6.
|
|
190
|
+
mineru-2.6.1.dist-info/licenses/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
|
|
191
|
+
mineru-2.6.1.dist-info/METADATA,sha256=bY_TtFykxzJJsqbtGMC2C7Tl2wYx4EOtT9w6Z3DKMuA,68358
|
|
192
|
+
mineru-2.6.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
193
|
+
mineru-2.6.1.dist-info/entry_points.txt,sha256=luXmbhPiZK_tKlRgWuYOaW_V6EFpG-yJcAevVv9MEqE,252
|
|
194
|
+
mineru-2.6.1.dist-info/top_level.txt,sha256=zuGQfZcbsHv4I4oKI9gaKPqEWBFm6xJroKuug2LnKP8,7
|
|
195
|
+
mineru-2.6.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|