mineru 2.5.0__py3-none-any.whl → 2.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,6 +14,7 @@ from ...utils.model_utils import get_vram
14
14
  from ...utils.models_download_utils import auto_download_and_get_model_root_path
15
15
 
16
16
  from mineru_vl_utils import MinerUClient
17
+ from packaging import version
17
18
 
18
19
 
19
20
  class ModelSingleton:
@@ -52,7 +53,6 @@ class ModelSingleton:
52
53
  except ImportError:
53
54
  raise ImportError("Please install transformers to use the transformers backend.")
54
55
 
55
- from packaging import version
56
56
  if version.parse(transformers_version) >= version.parse("4.56.0"):
57
57
  dtype_key = "dtype"
58
58
  else:
@@ -88,24 +88,32 @@ class ModelSingleton:
88
88
  elif backend == "vllm-engine":
89
89
  try:
90
90
  import vllm
91
+ vllm_version = vllm.__version__
92
+ from mineru_vl_utils import MinerULogitsProcessor
91
93
  except ImportError:
92
94
  raise ImportError("Please install vllm to use the vllm-engine backend.")
93
95
  if "gpu_memory_utilization" not in kwargs:
94
96
  kwargs["gpu_memory_utilization"] = 0.5
95
97
  if "model" not in kwargs:
96
98
  kwargs["model"] = model_path
99
+ if version.parse(vllm_version) >= version.parse("0.10.1") and "logits_processors" not in kwargs:
100
+ kwargs["logits_processors"] = [MinerULogitsProcessor]
97
101
  # 使用kwargs为 vllm初始化参数
98
102
  vllm_llm = vllm.LLM(**kwargs)
99
103
  elif backend == "vllm-async-engine":
100
104
  try:
101
105
  from vllm.engine.arg_utils import AsyncEngineArgs
102
106
  from vllm.v1.engine.async_llm import AsyncLLM
107
+ from vllm import __version__ as vllm_version
108
+ from mineru_vl_utils import MinerULogitsProcessor
103
109
  except ImportError:
104
110
  raise ImportError("Please install vllm to use the vllm-async-engine backend.")
105
111
  if "gpu_memory_utilization" not in kwargs:
106
112
  kwargs["gpu_memory_utilization"] = 0.5
107
113
  if "model" not in kwargs:
108
114
  kwargs["model"] = model_path
115
+ if version.parse(vllm_version) >= version.parse("0.10.1") and "logits_processors" not in kwargs:
116
+ kwargs["logits_processors"] = [MinerULogitsProcessor]
109
117
  # 使用kwargs为 vllm初始化参数
110
118
  vllm_async_llm = AsyncLLM.from_engine_args(AsyncEngineArgs(**kwargs))
111
119
  self._models[key] = MinerUClient(
@@ -54,7 +54,7 @@ def mk_blocks_to_markdown(para_blocks, make_mode, formula_enable, table_enable,
54
54
  elif para_type == BlockType.LIST:
55
55
  for block in para_block['blocks']:
56
56
  item_text = merge_para_with_text(block, formula_enable=formula_enable, img_buket_path=img_buket_path)
57
- para_text += f"{item_text}\n"
57
+ para_text += f"{item_text} \n"
58
58
  elif para_type == BlockType.TITLE:
59
59
  title_level = get_title_level(para_block)
60
60
  para_text = f'{"#" * title_level} {merge_para_with_text(para_block)}'
@@ -1,7 +1,10 @@
1
1
  import sys
2
2
 
3
3
  from mineru.utils.models_download_utils import auto_download_and_get_model_root_path
4
+
4
5
  from vllm.entrypoints.cli.main import main as vllm_main
6
+ from vllm import __version__ as vllm_version
7
+ from packaging import version
5
8
 
6
9
 
7
10
  def main():
@@ -9,6 +12,7 @@ def main():
9
12
 
10
13
  has_port_arg = False
11
14
  has_gpu_memory_utilization_arg = False
15
+ has_logits_processors_arg = False
12
16
  model_path = None
13
17
  model_arg_indices = []
14
18
 
@@ -18,6 +22,8 @@ def main():
18
22
  has_port_arg = True
19
23
  if arg == "--gpu-memory-utilization" or arg.startswith("--gpu-memory-utilization="):
20
24
  has_gpu_memory_utilization_arg = True
25
+ if arg == "--logits-processors" or arg.startswith("--logits-processors="):
26
+ has_logits_processors_arg = True
21
27
  if arg == "--model":
22
28
  if i + 1 < len(args):
23
29
  model_path = args[i + 1]
@@ -38,6 +44,8 @@ def main():
38
44
  args.extend(["--gpu-memory-utilization", "0.5"])
39
45
  if not model_path:
40
46
  model_path = auto_download_and_get_model_root_path("/", "vlm")
47
+ if not has_logits_processors_arg and version.parse(vllm_version) >= version.parse("0.10.1"):
48
+ args.extend(["--logits-processors", "mineru_vl_utils:MinerULogitsProcessor"])
41
49
 
42
50
  # 重构参数,将模型路径作为位置参数
43
51
  sys.argv = [sys.argv[0]] + ["serve", model_path] + args
mineru/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "2.5.0"
1
+ __version__ = "2.5.2"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mineru
3
- Version: 2.5.0
3
+ Version: 2.5.2
4
4
  Summary: A practical tool for converting PDF to Markdown
5
5
  License: AGPL-3.0
6
6
  Project-URL: homepage, https://mineru.net/
@@ -37,7 +37,7 @@ Requires-Dist: scikit-image<1.0.0,>=0.25.0
37
37
  Requires-Dist: openai<2,>=1.70.0
38
38
  Requires-Dist: beautifulsoup4<5,>=4.13.5
39
39
  Requires-Dist: magika<0.7.0,>=0.6.2
40
- Requires-Dist: mineru-vl-utils<1,>=0.1.6
40
+ Requires-Dist: mineru-vl-utils<1,>=0.1.7
41
41
  Provides-Extra: test
42
42
  Requires-Dist: mineru[core]; extra == "test"
43
43
  Requires-Dist: pytest; extra == "test"
@@ -127,7 +127,7 @@ Dynamic: license-file
127
127
 
128
128
  # Changelog
129
129
 
130
- - 2025/09/19 2.5.0 Released
130
+ - 2025/09/19 2.5.2 Released
131
131
 
132
132
  We are officially releasing MinerU2.5, currently the most powerful multimodal large model for document parsing.
133
133
  With only 1.2B parameters, MinerU2.5's accuracy on the OmniDocBench benchmark comprehensively surpasses top-tier multimodal models like Gemini 2.5 Pro, GPT-4o, and Qwen2.5-VL-72B. It also significantly outperforms leading specialized models such as dots.ocr, MonkeyOCR, and PP-StructureV3.
@@ -1,5 +1,5 @@
1
1
  mineru/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
2
- mineru/version.py,sha256=fMbNgIJqxiZEaSBLadLBt4rZpCHqarzb4Okt-aWsp2E,22
2
+ mineru/version.py,sha256=V-NiKyTdzd5WY2b4iSwaM1JcbOEyZ0IH2WQKBnjN2DI,22
3
3
  mineru/backend/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
4
4
  mineru/backend/pipeline/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
5
5
  mineru/backend/pipeline/batch_analyze.py,sha256=rp9nHYmuBBytlJIc3oRwqTtgFd5mhRak5UMhQ4mu02Y,21896
@@ -12,9 +12,9 @@ mineru/backend/pipeline/pipeline_magic_model.py,sha256=w8jGx8f6yZN0Wf2yPP3L9rYKc
12
12
  mineru/backend/pipeline/pipeline_middle_json_mkcontent.py,sha256=xWWOFmYL6hB8PLrxQFyRJ72dAmTIDHtqiWV-WFUfR44,14081
13
13
  mineru/backend/vlm/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
14
14
  mineru/backend/vlm/model_output_to_middle_json.py,sha256=e4Yc98_Cth2cjVPybPGehD5cpjGcTka4D2qKKrP_qqo,5121
15
- mineru/backend/vlm/vlm_analyze.py,sha256=Li5peZS4YTYl7WI0Zcz2v_wPcHSCsZnYSI-_lZZOsOo,7639
15
+ mineru/backend/vlm/vlm_analyze.py,sha256=jR5DTHT0qtOLVos5-qSGPvlRCdYREoAshFej98FY5ao,8282
16
16
  mineru/backend/vlm/vlm_magic_model.py,sha256=o1WKwgArV0f4lp7ufmSq9zRZaM5bnfOdx1AQtKnosro,16379
17
- mineru/backend/vlm/vlm_middle_json_mkcontent.py,sha256=DX2TRpfSIqpuhWqou5QXNtCW40ddQi0kdQxXi4QgzKs,13375
17
+ mineru/backend/vlm/vlm_middle_json_mkcontent.py,sha256=Ie95XpwTgi7EmidcwE_scvXMRQjE2xASU_Rm_F8EP-I,13377
18
18
  mineru/cli/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
19
19
  mineru/cli/client.py,sha256=uo7db9Wqj1Mc11MYuaM-bi54BfKKU3SFB9Urc8md5X4,6641
20
20
  mineru/cli/common.py,sha256=yJPdrwSYVidl2hTJ2Hn2YhnfH97GJ-QZi20dGFz7h5c,14025
@@ -143,7 +143,7 @@ mineru/model/table/rec/unet_table/utils.py,sha256=CYAqJW0wePJk4NAemb8W203N7E32v0
143
143
  mineru/model/table/rec/unet_table/utils_table_line_rec.py,sha256=zrCdPwI4M8nu0FEfd7lRJAe0z8kYq3KFbzwElM82USE,11174
144
144
  mineru/model/table/rec/unet_table/utils_table_recover.py,sha256=XksJsY82ZS0kqUnNT-jvaYzxJ3V3svMSzj0puwIau1k,10651
145
145
  mineru/model/vlm_vllm_model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
146
- mineru/model/vlm_vllm_model/server.py,sha256=TLgiScQCTfuY3g5mkI43I_uxuSJJ2ItC19HDeypB7jI,1578
146
+ mineru/model/vlm_vllm_model/server.py,sha256=v07x1esggP7Wbw0r8NeAbqG2kuJN9x5Xl2CmE2x0qzk,2003
147
147
  mineru/resources/header.html,sha256=NO8ZZdCYLqu_E72AtNcuRnA2NbFBamScjjGhtg9PKiM,4409
148
148
  mineru/resources/fasttext-langdetect/lid.176.ftz,sha256=jzRyz-hzintgmejpmcPL-uDc0VaWqsfXc4qAOdtgPoM,938013
149
149
  mineru/utils/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
@@ -172,9 +172,9 @@ mineru/utils/run_async.py,sha256=rPeP4BCZerR8VByRDhiYzfZiahLVqoZEBVAS54dAjNg,128
172
172
  mineru/utils/span_block_fix.py,sha256=0eVQjJCrT03woRt9hoh6Uu42Tp1dacfGTv2x3B9qq94,8797
173
173
  mineru/utils/span_pre_proc.py,sha256=h41q2uQajI0xQbc_30hqaju1dv3oVYxBAlKgURl8HIc,13692
174
174
  mineru/utils/table_merge.py,sha256=zYUpYLrfhBCnbHCYZi6rG8-s38NDnTbiNTObvLdYwJk,11494
175
- mineru-2.5.0.dist-info/licenses/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
176
- mineru-2.5.0.dist-info/METADATA,sha256=m404n7vO2GrWJbpksXY_tMCKxbjAux71JRg2tby2Gw0,64460
177
- mineru-2.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
178
- mineru-2.5.0.dist-info/entry_points.txt,sha256=luXmbhPiZK_tKlRgWuYOaW_V6EFpG-yJcAevVv9MEqE,252
179
- mineru-2.5.0.dist-info/top_level.txt,sha256=zuGQfZcbsHv4I4oKI9gaKPqEWBFm6xJroKuug2LnKP8,7
180
- mineru-2.5.0.dist-info/RECORD,,
175
+ mineru-2.5.2.dist-info/licenses/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
176
+ mineru-2.5.2.dist-info/METADATA,sha256=TlgabU5BQLlL7bZ9LqeMhW2fWvA14NmubtgTVrul94k,64460
177
+ mineru-2.5.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
178
+ mineru-2.5.2.dist-info/entry_points.txt,sha256=luXmbhPiZK_tKlRgWuYOaW_V6EFpG-yJcAevVv9MEqE,252
179
+ mineru-2.5.2.dist-info/top_level.txt,sha256=zuGQfZcbsHv4I4oKI9gaKPqEWBFm6xJroKuug2LnKP8,7
180
+ mineru-2.5.2.dist-info/RECORD,,
File without changes