PyPI - mineru - Versions diffs - 2.6.3__py3-none-any.whl → 2.6.5__py3-none-any.whl - Mend

mineru 2.6.3py3-none-any.whl → 2.6.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

mineru/backend/pipeline/pipeline_analyze.py +3 -0
mineru/backend/vlm/utils.py +31 -5
mineru/backend/vlm/vlm_analyze.py +124 -68
mineru/cli/client.py +6 -5
mineru/cli/common.py +24 -22
mineru/cli/gradio_app.py +28 -4
mineru/cli/vlm_server.py +61 -0
mineru/model/table/rec/slanet_plus/table_structure.py +4 -0
mineru/model/table/rec/unet_table/table_structure_unet.py +5 -0
mineru/model/vlm/lmdeploy_server.py +93 -0
mineru/utils/{check_mac_env.py → check_sys_env.py} +8 -0
mineru/utils/os_env_config.py +30 -0
mineru/utils/pdf_image_tools.py +118 -14
mineru/utils/pdf_page_id.py +10 -0
mineru/version.py +1 -1
{mineru-2.6.3.dist-info → mineru-2.6.5.dist-info}/METADATA +31 -15
{mineru-2.6.3.dist-info → mineru-2.6.5.dist-info}/RECORD +23 -20
{mineru-2.6.3.dist-info → mineru-2.6.5.dist-info}/entry_points.txt +3 -1
mineru/cli/vlm_vllm_server.py +0 -4
/mineru/model/{vlm_vllm_model → vlm}/__init__.py +0 -0
/mineru/model/{vlm_vllm_model/server.py → vlm/vllm_server.py} +0 -0
{mineru-2.6.3.dist-info → mineru-2.6.5.dist-info}/WHEEL +0 -0
{mineru-2.6.3.dist-info → mineru-2.6.5.dist-info}/licenses/LICENSE.md +0 -0
{mineru-2.6.3.dist-info → mineru-2.6.5.dist-info}/top_level.txt +0 -0

mineru/backend/pipeline/pipeline_analyze.py CHANGED Viewed

@@ -99,7 +99,10 @@ def doc_analyze(
         _lang = lang_list[pdf_idx]
         # 收集每个数据集中的页面
+        # load_images_start = time.time()
         images_list, pdf_doc = load_images_from_pdf(pdf_bytes, image_type=ImageType.PIL)
+        # load_images_time = round(time.time() - load_images_start, 2)
+        # logger.debug(f"load images cost: {load_images_time}, speed: {round(len(images_list) / load_images_time, 3)} images/s")
         all_image_lists.append(images_list)
         all_pdf_docs.append(pdf_doc)
         for page_idx in range(len(images_list)):

mineru/backend/vlm/utils.py CHANGED Viewed

@@ -3,6 +3,7 @@ import os
 from loguru import logger
 from packaging import version
+from mineru.utils.check_sys_env import is_windows_environment, is_linux_environment
 from mineru.utils.config_reader import get_device
 from mineru.utils.model_utils import get_vram
@@ -11,14 +12,16 @@ def enable_custom_logits_processors() -> bool:
     import torch
     from vllm import __version__ as vllm_version
-    if not torch.cuda.is_available():
+    if torch.cuda.is_available():
+        major, minor = torch.cuda.get_device_capability()
+        # 正确计算Compute Capability
+        compute_capability = f"{major}.{minor}"
+    elif hasattr(torch, 'npu') and torch.npu.is_available():
+        compute_capability = "8.0"
+    else:
         logger.info("CUDA not available, disabling custom_logits_processors")
         return False
-    major, minor = torch.cuda.get_device_capability()
-    # 正确计算Compute Capability
-    compute_capability = f"{major}.{minor}"
     # 安全地处理环境变量
     vllm_use_v1_str = os.getenv('VLLM_USE_V1', "1")
     if vllm_use_v1_str.isdigit():
@@ -44,6 +47,29 @@ def enable_custom_logits_processors() -> bool:
         return True
+def set_lmdeploy_backend(device_type: str) -> str:
+    if device_type.lower() in ["ascend", "maca", "camb"]:
+        lmdeploy_backend = "pytorch"
+    elif device_type.lower() in ["cuda"]:
+        import torch
+        if not torch.cuda.is_available():
+            raise ValueError("CUDA is not available.")
+        if is_windows_environment():
+            lmdeploy_backend = "turbomind"
+        elif is_linux_environment():
+            major, minor = torch.cuda.get_device_capability()
+            compute_capability = f"{major}.{minor}"
+            if version.parse(compute_capability) >= version.parse("8.0"):
+                lmdeploy_backend = "pytorch"
+            else:
+                lmdeploy_backend = "turbomind"
+        else:
+            raise ValueError("Unsupported operating system.")
+    else:
+        raise ValueError(f"Unsupported lmdeploy device type: {device_type}")
+    return lmdeploy_backend
 def set_default_gpu_memory_utilization() -> float:
     from vllm import __version__ as vllm_version
     if version.parse(vllm_version) >= version.parse("0.11.0"):

mineru/backend/vlm/vlm_analyze.py CHANGED Viewed

@@ -4,11 +4,12 @@ import time
 from loguru import logger
-from .utils import enable_custom_logits_processors, set_default_gpu_memory_utilization, set_default_batch_size
+from .utils import enable_custom_logits_processors, set_default_gpu_memory_utilization, set_default_batch_size, \
+    set_lmdeploy_backend
 from .model_output_to_middle_json import result_to_middle_json
 from ...data.data_reader_writer import DataWriter
 from mineru.utils.pdf_image_tools import load_images_from_pdf
-from ...utils.check_mac_env import is_mac_os_version_supported
+from ...utils.check_sys_env import is_mac_os_version_supported
 from ...utils.config_reader import get_device
 from ...utils.enum_class import ImageType
@@ -40,94 +41,149 @@ class ModelSingleton:
             model = None
             processor = None
             vllm_llm = None
+            lmdeploy_engine = None
             vllm_async_llm = None
             batch_size = kwargs.get("batch_size", 0)  # for transformers backend only
             max_concurrency = kwargs.get("max_concurrency", 100)  # for http-client backend only
             http_timeout = kwargs.get("http_timeout", 600)  # for http-client backend only
+            server_headers = kwargs.get("server_headers", None)  # for http-client backend only
+            max_retries = kwargs.get("max_retries", 3)  # for http-client backend only
+            retry_backoff_factor = kwargs.get("retry_backoff_factor", 0.5)  # for http-client backend only
             # 从kwargs中移除这些参数，避免传递给不相关的初始化函数
-            for param in ["batch_size", "max_concurrency", "http_timeout"]:
+            for param in ["batch_size", "max_concurrency", "http_timeout", "server_headers", "max_retries", "retry_backoff_factor"]:
                 if param in kwargs:
                     del kwargs[param]
-            if backend in ['transformers', 'vllm-engine', "vllm-async-engine", "mlx-engine"] and not model_path:
+            if backend not in ["http-client"] and not model_path:
                 model_path = auto_download_and_get_model_root_path("/","vlm")
-                if backend == "transformers":
+            if backend == "transformers":
+                try:
+                    from transformers import (
+                        AutoProcessor,
+                        Qwen2VLForConditionalGeneration,
+                    )
+                    from transformers import __version__ as transformers_version
+                except ImportError:
+                    raise ImportError("Please install transformers to use the transformers backend.")
+                if version.parse(transformers_version) >= version.parse("4.56.0"):
+                    dtype_key = "dtype"
+                else:
+                    dtype_key = "torch_dtype"
+                device = get_device()
+                model = Qwen2VLForConditionalGeneration.from_pretrained(
+                    model_path,
+                    device_map={"": device},
+                    **{dtype_key: "auto"},  # type: ignore
+                )
+                processor = AutoProcessor.from_pretrained(
+                    model_path,
+                    use_fast=True,
+                )
+                if batch_size == 0:
+                    batch_size = set_default_batch_size()
+            elif backend == "mlx-engine":
+                mlx_supported = is_mac_os_version_supported()
+                if not mlx_supported:
+                    raise EnvironmentError("mlx-engine backend is only supported on macOS 13.5+ with Apple Silicon.")
+                try:
+                    from mlx_vlm import load as mlx_load
+                except ImportError:
+                    raise ImportError("Please install mlx-vlm to use the mlx-engine backend.")
+                model, processor = mlx_load(model_path)
+            else:
+                if os.getenv('OMP_NUM_THREADS') is None:
+                    os.environ["OMP_NUM_THREADS"] = "1"
+                if backend == "vllm-engine":
                     try:
-                        from transformers import (
-                            AutoProcessor,
-                            Qwen2VLForConditionalGeneration,
-                        )
-                        from transformers import __version__ as transformers_version
+                        import vllm
                     except ImportError:
-                        raise ImportError("Please install transformers to use the transformers backend.")
-                    if version.parse(transformers_version) >= version.parse("4.56.0"):
-                        dtype_key = "dtype"
+                        raise ImportError("Please install vllm to use the vllm-engine backend.")
+                    if "gpu_memory_utilization" not in kwargs:
+                        kwargs["gpu_memory_utilization"] = set_default_gpu_memory_utilization()
+                    if "model" not in kwargs:
+                        kwargs["model"] = model_path
+                    if enable_custom_logits_processors() and ("logits_processors" not in kwargs):
+                        from mineru_vl_utils import MinerULogitsProcessor
+                        kwargs["logits_processors"] = [MinerULogitsProcessor]
+                    # 使用kwargs为 vllm初始化参数
+                    vllm_llm = vllm.LLM(**kwargs)
+                elif backend == "vllm-async-engine":
+                    try:
+                        from vllm.engine.arg_utils import AsyncEngineArgs
+                        from vllm.v1.engine.async_llm import AsyncLLM
+                    except ImportError:
+                        raise ImportError("Please install vllm to use the vllm-async-engine backend.")
+                    if "gpu_memory_utilization" not in kwargs:
+                        kwargs["gpu_memory_utilization"] = set_default_gpu_memory_utilization()
+                    if "model" not in kwargs:
+                        kwargs["model"] = model_path
+                    if enable_custom_logits_processors() and ("logits_processors" not in kwargs):
+                        from mineru_vl_utils import MinerULogitsProcessor
+                        kwargs["logits_processors"] = [MinerULogitsProcessor]
+                    # 使用kwargs为 vllm初始化参数
+                    vllm_async_llm = AsyncLLM.from_engine_args(AsyncEngineArgs(**kwargs))
+                elif backend == "lmdeploy-engine":
+                    try:
+                        from lmdeploy import PytorchEngineConfig, TurbomindEngineConfig
+                        from lmdeploy.serve.vl_async_engine import VLAsyncEngine
+                    except ImportError:
+                        raise ImportError("Please install lmdeploy to use the lmdeploy-engine backend.")
+                    if "cache_max_entry_count" not in kwargs:
+                        kwargs["cache_max_entry_count"] = 0.5
+                    device_type = os.getenv("MINERU_LMDEPLOY_DEVICE", "")
+                    if device_type == "":
+                        if "lmdeploy_device" in kwargs:
+                            device_type = kwargs.pop("lmdeploy_device")
+                            if device_type not in ["cuda", "ascend", "maca", "camb"]:
+                                raise ValueError(f"Unsupported lmdeploy device type: {device_type}")
+                        else:
+                            device_type = "cuda"
+                    lm_backend = os.getenv("MINERU_LMDEPLOY_BACKEND", "")
+                    if lm_backend == "":
+                        if "lmdeploy_backend" in kwargs:
+                            lm_backend = kwargs.pop("lmdeploy_backend")
+                            if lm_backend not in ["pytorch", "turbomind"]:
+                                raise ValueError(f"Unsupported lmdeploy backend: {lm_backend}")
+                        else:
+                            lm_backend = set_lmdeploy_backend(device_type)
+                    logger.info(f"lmdeploy device is: {device_type}, lmdeploy backend is: {lm_backend}")
+                    if lm_backend == "pytorch":
+                        kwargs["device_type"] = device_type
+                        backend_config = PytorchEngineConfig(**kwargs)
+                    elif lm_backend == "turbomind":
+                        backend_config = TurbomindEngineConfig(**kwargs)
                     else:
-                        dtype_key = "torch_dtype"
-                    device = get_device()
-                    model = Qwen2VLForConditionalGeneration.from_pretrained(
-                        model_path,
-                        device_map={"": device},
-                        **{dtype_key: "auto"},  # type: ignore
-                    )
-                    processor = AutoProcessor.from_pretrained(
+                        raise ValueError(f"Unsupported lmdeploy backend: {lm_backend}")
+                    log_level = 'ERROR'
+                    from lmdeploy.utils import get_logger
+                    lm_logger = get_logger('lmdeploy')
+                    lm_logger.setLevel(log_level)
+                    if os.getenv('TM_LOG_LEVEL') is None:
+                        os.environ['TM_LOG_LEVEL'] = log_level
+                    lmdeploy_engine = VLAsyncEngine(
                         model_path,
-                        use_fast=True,
+                        backend=lm_backend,
+                        backend_config=backend_config,
                     )
-                    if batch_size == 0:
-                        batch_size = set_default_batch_size()
-                elif backend == "mlx-engine":
-                    mlx_supported = is_mac_os_version_supported()
-                    if not mlx_supported:
-                        raise EnvironmentError("mlx-engine backend is only supported on macOS 13.5+ with Apple Silicon.")
-                    try:
-                        from mlx_vlm import load as mlx_load
-                    except ImportError:
-                        raise ImportError("Please install mlx-vlm to use the mlx-engine backend.")
-                    model, processor = mlx_load(model_path)
-                else:
-                    if os.getenv('OMP_NUM_THREADS') is None:
-                        os.environ["OMP_NUM_THREADS"] = "1"
-                    if backend == "vllm-engine":
-                        try:
-                            import vllm
-                            from mineru_vl_utils import MinerULogitsProcessor
-                        except ImportError:
-                            raise ImportError("Please install vllm to use the vllm-engine backend.")
-                        if "gpu_memory_utilization" not in kwargs:
-                            kwargs["gpu_memory_utilization"] = set_default_gpu_memory_utilization()
-                        if "model" not in kwargs:
-                            kwargs["model"] = model_path
-                        if enable_custom_logits_processors() and ("logits_processors" not in kwargs):
-                            kwargs["logits_processors"] = [MinerULogitsProcessor]
-                        # 使用kwargs为 vllm初始化参数
-                        vllm_llm = vllm.LLM(**kwargs)
-                    elif backend == "vllm-async-engine":
-                        try:
-                            from vllm.engine.arg_utils import AsyncEngineArgs
-                            from vllm.v1.engine.async_llm import AsyncLLM
-                            from mineru_vl_utils import MinerULogitsProcessor
-                        except ImportError:
-                            raise ImportError("Please install vllm to use the vllm-async-engine backend.")
-                        if "gpu_memory_utilization" not in kwargs:
-                            kwargs["gpu_memory_utilization"] = set_default_gpu_memory_utilization()
-                        if "model" not in kwargs:
-                            kwargs["model"] = model_path
-                        if enable_custom_logits_processors() and ("logits_processors" not in kwargs):
-                            kwargs["logits_processors"] = [MinerULogitsProcessor]
-                        # 使用kwargs为 vllm初始化参数
-                        vllm_async_llm = AsyncLLM.from_engine_args(AsyncEngineArgs(**kwargs))
             self._models[key] = MinerUClient(
                 backend=backend,
                 model=model,
                 processor=processor,
+                lmdeploy_engine=lmdeploy_engine,
                 vllm_llm=vllm_llm,
                 vllm_async_llm=vllm_async_llm,
                 server_url=server_url,
                 batch_size=batch_size,
                 max_concurrency=max_concurrency,
                 http_timeout=http_timeout,
+                server_headers=server_headers,
+                max_retries=max_retries,
+                retry_backoff_factor=retry_backoff_factor,
             )
             elapsed = round(time.time() - start_time, 2)
             logger.info(f"get {backend} predictor cost: {elapsed}s")
@@ -177,7 +233,7 @@ async def aio_doc_analyze(
     images_list, pdf_doc = load_images_from_pdf(pdf_bytes, image_type=ImageType.PIL)
     images_pil_list = [image_dict["img_pil"] for image_dict in images_list]
     # load_images_time = round(time.time() - load_images_start, 2)
-    # logger.info(f"load images cost: {load_images_time}, speed: {round(len(images_base64_list)/load_images_time, 3)} images/s")
+    # logger.debug(f"load images cost: {load_images_time}, speed: {round(len(images_pil_list)/load_images_time, 3)} images/s")
     # infer_start = time.time()
     results = await predictor.aio_batch_two_step_extract(images=images_pil_list)

mineru/cli/client.py CHANGED Viewed

@@ -4,7 +4,7 @@ import click
 from pathlib import Path
 from loguru import logger
-from mineru.utils.check_mac_env import is_mac_os_version_supported
+from mineru.utils.check_sys_env import is_mac_os_version_supported
 from mineru.utils.cli_parser import arg_parse
 from mineru.utils.config_reader import get_device
 from mineru.utils.guess_suffix_or_lang import guess_suffix_by_path
@@ -13,7 +13,7 @@ from ..version import __version__
 from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
-backends = ['pipeline', 'vlm-transformers', 'vlm-vllm-engine', 'vlm-http-client']
+backends = ['pipeline', 'vlm-transformers', 'vlm-vllm-engine', 'vlm-lmdeploy-engine', 'vlm-http-client']
 if is_mac_os_version_supported():
     backends.append("vlm-mlx-engine")
@@ -62,9 +62,10 @@ if is_mac_os_version_supported():
     the backend for parsing pdf:
       pipeline: More general.
       vlm-transformers: More general, but slower.
-      vlm-mlx-engine: Faster than transformers.
-      vlm-vllm-engine: Faster(engine).
-      vlm-http-client: Faster(client).
+      vlm-mlx-engine: Faster than transformers(macOS 13.5+).
+      vlm-vllm-engine: Faster(vllm-engine).
+      vlm-lmdeploy-engine: Faster(lmdeploy-engine).
+      vlm-http-client: Faster(client suitable for openai-compatible servers).
     Without method specified, pipeline will be used by default.""",
     default='pipeline',
 )

mineru/cli/common.py CHANGED Viewed

@@ -5,8 +5,8 @@ import os
 import copy
 from pathlib import Path
-import pypdfium2 as pdfium
 from loguru import logger
+import pypdfium2 as pdfium
 from mineru.data.data_reader_writer import FileBasedDataWriter
 from mineru.utils.draw_bbox import draw_layout_bbox, draw_span_bbox, draw_line_sort_bbox
@@ -16,10 +16,17 @@ from mineru.utils.pdf_image_tools import images_bytes_to_pdf_bytes
 from mineru.backend.vlm.vlm_middle_json_mkcontent import union_make as vlm_union_make
 from mineru.backend.vlm.vlm_analyze import doc_analyze as vlm_doc_analyze
 from mineru.backend.vlm.vlm_analyze import aio_doc_analyze as aio_vlm_doc_analyze
+from mineru.utils.pdf_page_id import get_end_page_id
+if os.getenv("MINERU_LMDEPLOY_DEVICE", "") == "maca":
+    import torch
+    torch.backends.cudnn.enabled = False
 pdf_suffixes = ["pdf"]
 image_suffixes = ["png", "jpeg", "jp2", "webp", "gif", "bmp", "jpg", "tiff"]
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
 def read_fn(path):
     if not isinstance(path, Path):
@@ -44,24 +51,21 @@ def prepare_env(output_dir, pdf_file_name, parse_method):
 def convert_pdf_bytes_to_bytes_by_pypdfium2(pdf_bytes, start_page_id=0, end_page_id=None):
+    pdf = pdfium.PdfDocument(pdf_bytes)
+    output_pdf = pdfium.PdfDocument.new()
     try:
-        # 从字节数据加载PDF
-        pdf = pdfium.PdfDocument(pdf_bytes)
-        # 确定结束页
-        end_page_id = end_page_id if end_page_id is not None and end_page_id >= 0 else len(pdf) - 1
-        if end_page_id > len(pdf) - 1:
-            logger.warning("end_page_id is out of range, use pdf_docs length")
-            end_page_id = len(pdf) - 1
-        # 创建一个新的PDF文档
-        output_pdf = pdfium.PdfDocument.new()
-        # 选择要导入的页面索引
-        page_indices = list(range(start_page_id, end_page_id + 1))
-        # 从原PDF导入页面到新PDF
-        output_pdf.import_pages(pdf, page_indices)
+        end_page_id = get_end_page_id(end_page_id, len(pdf))
+        # 逐页导入,失败则跳过
+        output_index = 0
+        for page_index in range(start_page_id, end_page_id + 1):
+            try:
+                output_pdf.import_pages(pdf, pages=[page_index])
+                output_index += 1
+            except Exception as page_error:
+                output_pdf.del_page(output_index)
+                logger.warning(f"Failed to import page {page_index}: {page_error}, skipping this page.")
+                continue
         # 将新PDF保存到内存缓冲区
         output_buffer = io.BytesIO()
@@ -69,13 +73,11 @@ def convert_pdf_bytes_to_bytes_by_pypdfium2(pdf_bytes, start_page_id=0, end_page
         # 获取字节数据
         output_bytes = output_buffer.getvalue()
-        pdf.close()  # 关闭原PDF文档以释放资源
-        output_pdf.close()  # 关闭新PDF文档以释放资源
     except Exception as e:
         logger.warning(f"Error in converting PDF bytes: {e}, Using original PDF bytes.")
         output_bytes = pdf_bytes
+    pdf.close()
+    output_pdf.close()
     return output_bytes

mineru/cli/gradio_app.py CHANGED Viewed

@@ -13,7 +13,7 @@ from gradio_pdf import PDF
 from loguru import logger
 from mineru.cli.common import prepare_env, read_fn, aio_do_parse, pdf_suffixes, image_suffixes
-from mineru.utils.check_mac_env import is_mac_os_version_supported
+from mineru.utils.check_sys_env import is_mac_os_version_supported
 from mineru.utils.cli_parser import arg_parse
 from mineru.utils.hash_utils import str_sha256
@@ -274,7 +274,7 @@ def to_pdf(file_path):
 # 更新界面函数
 def update_interface(backend_choice):
-    if backend_choice in ["vlm-transformers", "vlm-vllm-async-engine", "vlm-mlx-engine"]:
+    if backend_choice in ["vlm-transformers", "vlm-vllm-async-engine", "vlm-lmdeploy-engine", "vlm-mlx-engine"]:
         return gr.update(visible=False), gr.update(visible=False)
     elif backend_choice in ["vlm-http-client"]:
         return gr.update(visible=True), gr.update(visible=False)
@@ -301,6 +301,13 @@ def update_interface(backend_choice):
     help="Enable vLLM engine backend for faster processing.",
     default=False,
 )
+@click.option(
+    '--enable-lmdeploy-engine',
+    'lmdeploy_engine_enable',
+    type=bool,
+    help="Enable LMDeploy engine backend for faster processing.",
+    default=False,
+)
 @click.option(
     '--enable-api',
     'api_enable',
@@ -338,7 +345,7 @@ def update_interface(backend_choice):
     default='all',
 )
 def main(ctx,
-        example_enable, vllm_engine_enable, api_enable, max_convert_pages,
+        example_enable, vllm_engine_enable, lmdeploy_engine_enable, api_enable, max_convert_pages,
         server_name, server_port, latex_delimiters_type, **kwargs
 ):
@@ -367,6 +374,20 @@ def main(ctx,
             print("vLLM engine init successfully.")
         except Exception as e:
             logger.exception(e)
+    elif lmdeploy_engine_enable:
+        try:
+            print("Start init LMDeploy engine...")
+            from mineru.backend.vlm.vlm_analyze import ModelSingleton
+            model_singleton = ModelSingleton()
+            predictor = model_singleton.get_model(
+                "lmdeploy-engine",
+                None,
+                None,
+                **kwargs
+            )
+            print("LMDeploy engine init successfully.")
+        except Exception as e:
+            logger.exception(e)
     suffixes = [f".{suffix}" for suffix in pdf_suffixes + image_suffixes]
     with gr.Blocks() as demo:
         gr.HTML(header)
@@ -380,6 +401,9 @@ def main(ctx,
                     if vllm_engine_enable:
                         drop_list = ["pipeline", "vlm-vllm-async-engine"]
                         preferred_option = "vlm-vllm-async-engine"
+                    elif lmdeploy_engine_enable:
+                        drop_list = ["pipeline", "vlm-lmdeploy-engine"]
+                        preferred_option = "vlm-lmdeploy-engine"
                     else:
                         drop_list = ["pipeline", "vlm-transformers", "vlm-http-client"]
                         if is_mac_os_version_supported():
@@ -453,4 +477,4 @@ def main(ctx,
 if __name__ == '__main__':
-    main()
+    main()

mineru/cli/vlm_server.py ADDED Viewed

@@ -0,0 +1,61 @@
+import click
+import sys
+from loguru import logger
+def vllm_server():
+    from mineru.model.vlm.vllm_server import main
+    main()
+def lmdeploy_server():
+    from mineru.model.vlm.lmdeploy_server import main
+    main()
+@click.command(context_settings=dict(ignore_unknown_options=True, allow_extra_args=True))
+@click.option(
+    '-e',
+    '--engine',
+    'inference_engine',
+    type=click.Choice(['auto', 'vllm', 'lmdeploy']),
+    default='auto',
+    help='Select the inference engine used to accelerate VLM inference, default is "auto".',
+)
+@click.pass_context
+def openai_server(ctx, inference_engine):
+    sys.argv = [sys.argv[0]] + ctx.args
+    if inference_engine == 'auto':
+        try:
+            import vllm
+            inference_engine = 'vllm'
+            logger.info("Using vLLM as the inference engine for VLM server.")
+        except ImportError:
+            logger.info("vLLM not found, attempting to use LMDeploy as the inference engine for VLM server.")
+            try:
+                import lmdeploy
+                inference_engine = 'lmdeploy'
+            # Success message moved after successful import
+                logger.info("Using LMDeploy as the inference engine for VLM server.")
+            except ImportError:
+                logger.error("Neither vLLM nor LMDeploy is installed. Please install at least one of them.")
+                sys.exit(1)
+    if inference_engine == 'vllm':
+        try:
+            import vllm
+        except ImportError:
+            logger.error("vLLM is not installed. Please install vLLM or choose LMDeploy as the inference engine.")
+            sys.exit(1)
+        vllm_server()
+    elif inference_engine == 'lmdeploy':
+        try:
+            import lmdeploy
+        except ImportError:
+            logger.error("LMDeploy is not installed. Please install LMDeploy or choose vLLM as the inference engine.")
+            sys.exit(1)
+        lmdeploy_server()
+if __name__ == "__main__":
+    openai_server()

mineru/model/table/rec/slanet_plus/table_structure.py CHANGED Viewed

@@ -16,6 +16,7 @@ from typing import Any, Dict, List, Tuple
 import numpy as np
+from mineru.utils.os_env_config import get_op_num_threads
 from .table_structure_utils import (
     OrtInferSession,
     TableLabelDecode,
@@ -29,6 +30,9 @@ class TableStructurer:
         self.preprocess_op = TablePreprocess()
         self.batch_preprocess_op = BatchTablePreprocess()
+        config["intra_op_num_threads"] = get_op_num_threads("MINERU_INTRA_OP_NUM_THREADS")
+        config["inter_op_num_threads"] = get_op_num_threads("MINERU_INTER_OP_NUM_THREADS")
         self.session = OrtInferSession(config)
         self.character = self.session.get_metadata()

mineru/model/table/rec/unet_table/table_structure_unet.py CHANGED Viewed

@@ -5,6 +5,8 @@ from typing import Optional, Dict, Any, Tuple
 import cv2
 import numpy as np
 from skimage import measure
+from mineru.utils.os_env_config import get_op_num_threads
 from .utils import OrtInferSession, resize_img
 from .utils_table_line_rec import (
     get_table_line,
@@ -28,6 +30,9 @@ class TSRUnet:
         self.inp_height = 1024
         self.inp_width = 1024
+        config["intra_op_num_threads"] = get_op_num_threads("MINERU_INTRA_OP_NUM_THREADS")
+        config["inter_op_num_threads"] = get_op_num_threads("MINERU_INTER_OP_NUM_THREADS")
         self.session = OrtInferSession(config)
     def __call__(

mineru/model/vlm/lmdeploy_server.py ADDED Viewed

@@ -0,0 +1,93 @@
+import os
+import sys
+from loguru import logger
+from mineru.backend.vlm.utils import set_lmdeploy_backend
+from mineru.utils.models_download_utils import auto_download_and_get_model_root_path
+def main():
+    args = sys.argv[1:]
+    has_port_arg = False
+    has_gpu_memory_utilization_arg = False
+    has_log_level_arg = False
+    device_type = ""
+    lm_backend = ""
+    # 检查现有参数
+    indices_to_remove = []
+    for i, arg in enumerate(args):
+        if arg == "--server-port" or arg.startswith("--server-port="):
+            has_port_arg = True
+        if arg == "--cache-max-entry-count" or arg.startswith("--cache-max-entry-count="):
+            has_gpu_memory_utilization_arg = True
+        if arg == "--log-level" or arg.startswith("--log-level="):
+            has_log_level_arg = True
+        if arg == "--backend" or arg == "--lmdeploy-backend":
+            if i + 1 < len(args):
+                lm_backend = args[i + 1]
+                indices_to_remove.extend([i, i + 1])
+        elif arg.startswith("--backend=") or arg.startswith("--lmdeploy-backend="):
+            lm_backend = arg.split("=", 1)[1]
+            indices_to_remove.append(i)
+        if arg == "--device" or arg == "--lmdeploy-device":
+            if i + 1 < len(args):
+                device_type = args[i + 1]
+                indices_to_remove.extend([i, i + 1])
+        elif arg.startswith("--device=") or arg.startswith("--lmdeploy-device="):
+            device_type = arg.split("=", 1)[1]
+            indices_to_remove.append(i)
+    # 从后往前删除,避免索引错位
+    for i in sorted(set(indices_to_remove), reverse=True):
+        args.pop(i)
+    # 添加默认参数
+    if not has_port_arg:
+        args.extend(["--server-port", "30000"])
+    if not has_gpu_memory_utilization_arg:
+        args.extend(["--cache-max-entry-count", "0.5"])
+    if not has_log_level_arg:
+        args.extend(["--log-level", "ERROR"])
+    device_type = os.getenv("MINERU_LMDEPLOY_DEVICE", device_type)
+    if device_type == "":
+        device_type = "cuda"
+    elif device_type not in ["cuda", "ascend", "maca", "camb"]:
+        raise ValueError(f"Unsupported lmdeploy device type: {device_type}")
+    lm_backend = os.getenv("MINERU_LMDEPLOY_BACKEND", lm_backend)
+    if lm_backend == "":
+        lm_backend = set_lmdeploy_backend(device_type)
+    elif lm_backend not in ["pytorch", "turbomind"]:
+        raise ValueError(f"Unsupported lmdeploy backend: {lm_backend}")
+    logger.info(f"lmdeploy device is: {device_type}, lmdeploy backend is: {lm_backend}")
+    if lm_backend == "pytorch":
+        os.environ["TOKENIZERS_PARALLELISM"] = "false"
+    args.extend(["--device", device_type])
+    args.extend(["--backend", lm_backend])
+    model_path = auto_download_and_get_model_root_path("/", "vlm")
+    # logger.debug(args)
+    # 重构参数，将模型路径作为位置参数
+    sys.argv = [sys.argv[0]] + ["serve", "api_server", model_path] + args
+    if os.getenv('OMP_NUM_THREADS') is None:
+        os.environ["OMP_NUM_THREADS"] = "1"
+    # 启动 lmdeploy 服务器
+    print(f"start lmdeploy server: {sys.argv}")
+    # 使用os.system调用启动lmdeploy服务器
+    os.system("lmdeploy " + " ".join(sys.argv[1:]))
+if __name__ == "__main__":
+    main()

mineru/utils/{check_mac_env.py → check_sys_env.py} RENAMED Viewed

@@ -4,11 +4,19 @@ import platform
 from packaging import version
+def is_windows_environment() -> bool:
+    return platform.system() == "Windows"
 # Detect if the current environment is a Mac computer
 def is_mac_environment() -> bool:
     return platform.system() == "Darwin"
+def is_linux_environment() -> bool:
+    return platform.system() == "Linux"
 # Detect if CPU is Apple Silicon architecture
 def is_apple_silicon_cpu() -> bool:
     return platform.machine() in ["arm64", "aarch64"]

mineru/utils/os_env_config.py ADDED Viewed

@@ -0,0 +1,30 @@
+import os
+def get_op_num_threads(env_name: str) -> int:
+    env_value = os.getenv(env_name, None)
+    return get_value_from_string(env_value, -1)
+def get_load_images_timeout() -> int:
+    env_value = os.getenv('MINERU_PDF_RENDER_TIMEOUT', None)
+    return get_value_from_string(env_value, 300)
+def get_value_from_string(env_value: str, default_value: int) -> int:
+    if env_value is not None:
+        try:
+            num_threads = int(env_value)
+            if num_threads > 0:
+                return num_threads
+        except ValueError:
+            return default_value
+    return default_value
+if __name__ == '__main__':
+    print(get_value_from_string('1', -1))
+    print(get_value_from_string('0', -1))
+    print(get_value_from_string('-1', -1))
+    print(get_value_from_string('abc', -1))
+    print(get_load_images_timeout())

mineru/utils/pdf_image_tools.py CHANGED Viewed

@@ -1,4 +1,5 @@
 # Copyright (c) Opendatalab. All rights reserved.
+import os
 from io import BytesIO
 import numpy as np
@@ -7,9 +8,14 @@ from loguru import logger
 from PIL import Image
 from mineru.data.data_reader_writer import FileBasedDataWriter
+from mineru.utils.check_sys_env import is_windows_environment
+from mineru.utils.os_env_config import get_load_images_timeout
 from mineru.utils.pdf_reader import image_to_b64str, image_to_bytes, page_to_image
-from .enum_class import ImageType
-from .hash_utils import str_sha256
+from mineru.utils.enum_class import ImageType
+from mineru.utils.hash_utils import str_sha256
+from mineru.utils.pdf_page_id import get_end_page_id
+from concurrent.futures import ProcessPoolExecutor, TimeoutError as FuturesTimeoutError
 def pdf_page_to_image(page: pdfium.PdfPage, dpi=200, image_type=ImageType.PIL) -> dict:
@@ -35,7 +41,106 @@ def pdf_page_to_image(page: pdfium.PdfPage, dpi=200, image_type=ImageType.PIL) -
     return image_dict
+def _load_images_from_pdf_worker(pdf_bytes, dpi, start_page_id, end_page_id, image_type):
+    """用于进程池的包装函数"""
+    return load_images_from_pdf_core(pdf_bytes, dpi, start_page_id, end_page_id, image_type)
 def load_images_from_pdf(
+        pdf_bytes: bytes,
+        dpi=200,
+        start_page_id=0,
+        end_page_id=None,
+        image_type=ImageType.PIL,
+        timeout=None,
+        threads=4,
+):
+    """带超时控制的 PDF 转图片函数,支持多进程加速
+    Args:
+        pdf_bytes (bytes): PDF 文件的 bytes
+        dpi (int, optional): reset the dpi of dpi. Defaults to 200.
+        start_page_id (int, optional): 起始页码. Defaults to 0.
+        end_page_id (int | None, optional): 结束页码. Defaults to None.
+        image_type (ImageType, optional): 图片类型. Defaults to ImageType.PIL.
+        timeout (int | None, optional): 超时时间(秒)。如果为 None，则从环境变量 MINERU_PDF_LOAD_IMAGES_TIMEOUT 读取，若未设置则默认为 300 秒。
+        threads (int): 进程数,默认 4
+    Raises:
+        TimeoutError: 当转换超时时抛出
+    """
+    pdf_doc = pdfium.PdfDocument(pdf_bytes)
+    if is_windows_environment():
+        # Windows 环境下不使用多进程
+        return load_images_from_pdf_core(
+            pdf_bytes,
+            dpi,
+            start_page_id,
+            get_end_page_id(end_page_id, len(pdf_doc)),
+            image_type
+        ), pdf_doc
+    else:
+        if timeout is None:
+            timeout = get_load_images_timeout()
+        end_page_id = get_end_page_id(end_page_id, len(pdf_doc))
+        # 计算总页数
+        total_pages = end_page_id - start_page_id + 1
+        # 实际使用的进程数不超过总页数
+        actual_threads = min(os.cpu_count() or 1, threads, total_pages)
+        # 根据实际进程数分组页面范围
+        pages_per_thread = max(1, total_pages // actual_threads)
+        page_ranges = []
+        for i in range(actual_threads):
+            range_start = start_page_id + i * pages_per_thread
+            if i == actual_threads - 1:
+                # 最后一个进程处理剩余所有页面
+                range_end = end_page_id
+            else:
+                range_end = start_page_id + (i + 1) * pages_per_thread - 1
+            page_ranges.append((range_start, range_end))
+        # logger.debug(f"PDF to images using {actual_threads} processes, page ranges: {page_ranges}")
+        with ProcessPoolExecutor(max_workers=actual_threads) as executor:
+            # 提交所有任务
+            futures = []
+            for range_start, range_end in page_ranges:
+                future = executor.submit(
+                    _load_images_from_pdf_worker,
+                    pdf_bytes,
+                    dpi,
+                    range_start,
+                    range_end,
+                    image_type
+                )
+                futures.append((range_start, future))
+            try:
+                # 收集结果并按页码排序
+                all_results = []
+                for range_start, future in futures:
+                    images_list = future.result(timeout=timeout)
+                    all_results.append((range_start, images_list))
+                # 按起始页码排序并合并结果
+                all_results.sort(key=lambda x: x[0])
+                images_list = []
+                for _, imgs in all_results:
+                    images_list.extend(imgs)
+                return images_list, pdf_doc
+            except FuturesTimeoutError:
+                pdf_doc.close()
+                executor.shutdown(wait=False, cancel_futures=True)
+                raise TimeoutError(f"PDF to images conversion timeout after {timeout}s")
+def load_images_from_pdf_core(
     pdf_bytes: bytes,
     dpi=200,
     start_page_id=0,
@@ -45,18 +150,17 @@ def load_images_from_pdf(
     images_list = []
     pdf_doc = pdfium.PdfDocument(pdf_bytes)
     pdf_page_num = len(pdf_doc)
-    end_page_id = end_page_id if end_page_id is not None and end_page_id >= 0 else pdf_page_num - 1
-    if end_page_id > pdf_page_num - 1:
-        logger.warning("end_page_id is out of range, use images length")
-        end_page_id = pdf_page_num - 1
-    for index in range(0, pdf_page_num):
-        if start_page_id <= index <= end_page_id:
-            page = pdf_doc[index]
-            image_dict = pdf_page_to_image(page, dpi=dpi, image_type=image_type)
-            images_list.append(image_dict)
-    return images_list, pdf_doc
+    end_page_id = get_end_page_id(end_page_id, pdf_page_num)
+    for index in range(start_page_id, end_page_id + 1):
+        # logger.debug(f"Converting page {index}/{pdf_page_num} to image")
+        page = pdf_doc[index]
+        image_dict = pdf_page_to_image(page, dpi=dpi, image_type=image_type)
+        images_list.append(image_dict)
+    pdf_doc.close()
+    return images_list
 def cut_image(bbox: tuple, page_num: int, page_pil_img, return_path, image_writer: FileBasedDataWriter, scale=2):

mineru/utils/pdf_page_id.py ADDED Viewed

@@ -0,0 +1,10 @@
+# Copyright (c) Opendatalab. All rights reserved.
+from loguru import logger
+def get_end_page_id(end_page_id, pdf_page_num):
+    end_page_id = end_page_id if end_page_id is not None and end_page_id >= 0 else pdf_page_num - 1
+    if end_page_id > pdf_page_num - 1:
+        logger.warning("end_page_id is out of range, use images length")
+        end_page_id = pdf_page_num - 1
+    return end_page_id

mineru/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "2.6.3"
1	+ __version__ = "2.6.5"

{mineru-2.6.3.dist-info → mineru-2.6.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mineru
-Version: 2.6.3
+Version: 2.6.5
 Summary: A practical tool for converting PDF to Markdown
 License: AGPL-3.0
 Project-URL: homepage, https://mineru.net/
@@ -36,8 +36,9 @@ Requires-Dist: fast-langdetect<0.3.0,>=0.2.3
 Requires-Dist: scikit-image<1.0.0,>=0.25.0
 Requires-Dist: openai<3,>=1.70.0
 Requires-Dist: beautifulsoup4<5,>=4.13.5
-Requires-Dist: magika<0.7.0,>=0.6.2
-Requires-Dist: mineru-vl-utils<1,>=0.1.15
+Requires-Dist: magika<1.1.0,>=0.6.2
+Requires-Dist: mineru-vl-utils<1,>=0.1.17
+Requires-Dist: qwen-vl-utils<1,>=0.0.14
 Provides-Extra: test
 Requires-Dist: mineru[core]; extra == "test"
 Requires-Dist: pytest; extra == "test"
@@ -46,10 +47,12 @@ Requires-Dist: coverage; extra == "test"
 Requires-Dist: fuzzywuzzy; extra == "test"
 Provides-Extra: vlm
 Requires-Dist: torch<3,>=2.6.0; extra == "vlm"
-Requires-Dist: transformers<5.0.0,>=4.51.1; extra == "vlm"
+Requires-Dist: transformers!=4.57.2,<5.0.0,>=4.51.1; extra == "vlm"
 Requires-Dist: accelerate>=1.5.1; extra == "vlm"
 Provides-Extra: vllm
 Requires-Dist: vllm<0.12,>=0.10.1.1; extra == "vllm"
+Provides-Extra: lmdeploy
+Requires-Dist: lmdeploy<0.12,>=0.10.2; extra == "lmdeploy"
 Provides-Extra: mlx
 Requires-Dist: mlx-vlm<0.4,>=0.3.3; extra == "mlx"
 Provides-Extra: pipeline
@@ -81,7 +84,8 @@ Requires-Dist: mineru[gradio]; extra == "core"
 Requires-Dist: mineru[mlx]; sys_platform == "darwin" and extra == "core"
 Provides-Extra: all
 Requires-Dist: mineru[core]; extra == "all"
-Requires-Dist: mineru[vllm]; extra == "all"
+Requires-Dist: mineru[vllm]; sys_platform == "linux" and extra == "all"
+Requires-Dist: mineru[lmdeploy]; sys_platform == "windows" and extra == "all"
 Dynamic: license-file
 <div align="center" xmlns="http://www.w3.org/1999/html">
@@ -130,6 +134,13 @@ Dynamic: license-file
 </div>
 # Changelog
+- 2025/11/26 2.6.5 Release
+  - Added support for a new backend vlm-lmdeploy-engine. Its usage is similar to vlm-vllm-(async)engine, but it uses lmdeploy as the inference engine and additionally supports native inference acceleration on Windows platforms compared to vllm.
+- 2025/11/04 2.6.4 Release
+  - Added timeout configuration for PDF image rendering, default is 300 seconds, can be configured via environment variable `MINERU_PDF_RENDER_TIMEOUT` to prevent long blocking of the rendering process caused by some abnormal PDF files.
+  - Added CPU thread count configuration options for ONNX models, default is the system CPU core count, can be configured via environment variables `MINERU_INTRA_OP_NUM_THREADS` and `MINERU_INTER_OP_NUM_THREADS` to reduce CPU resource contention conflicts in high concurrency scenarios.
 - 2025/10/31 2.6.3 Release
   - Added support for a new backend `vlm-mlx-engine`, enabling MLX-accelerated inference for the MinerU2.5 model on Apple Silicon devices. Compared to the `vlm-transformers` backend, `vlm-mlx-engine` delivers a 100%–200% speed improvement.
   - Bug fixes: #3849, #3859
@@ -714,12 +725,13 @@ A WebUI developed based on Gradio, with a simple interface and only core parsing
         <tr>
             <th rowspan="2">Parsing Backend</th>
             <th rowspan="2">pipeline <br> (Accuracy<sup>1</sup> 82+)</th>
-            <th colspan="4">vlm (Accuracy<sup>1</sup> 90+)</th>
+            <th colspan="5">vlm (Accuracy<sup>1</sup> 90+)</th>
         </tr>
         <tr>
             <th>transformers</th>
             <th>mlx-engine</th>
             <th>vllm-engine / <br>vllm-async-engine</th>
+            <th>lmdeploy-engine</th>
             <th>http-client</th>
         </tr>
     </thead>
@@ -730,40 +742,42 @@ A WebUI developed based on Gradio, with a simple interface and only core parsing
             <td>Good compatibility, <br>but slower</td>
             <td>Faster than transformers</td>
             <td>Fast, compatible with the vLLM ecosystem</td>
-            <td>Suitable for OpenAI-compatible servers<sup>5</sup></td>
+            <td>Fast, compatible with the LMDeploy ecosystem</td>
+            <td>Suitable for OpenAI-compatible servers<sup>6</sup></td>
         </tr>
         <tr>
             <th>Operating System</th>
             <td colspan="2" style="text-align:center;">Linux<sup>2</sup> / Windows / macOS</td>
             <td style="text-align:center;">macOS<sup>3</sup></td>
             <td style="text-align:center;">Linux<sup>2</sup> / Windows<sup>4</sup> </td>
+            <td style="text-align:center;">Linux<sup>2</sup> / Windows<sup>5</sup> </td>
             <td>Any</td>
         </tr>
         <tr>
             <th>CPU inference support</th>
             <td colspan="2" style="text-align:center;">✅</td>
-            <td colspan="2" style="text-align:center;">❌</td>
+            <td colspan="3" style="text-align:center;">❌</td>
             <td>Not required</td>
         </tr>
         <tr>
             <th>GPU Requirements</th><td colspan="2" style="text-align:center;">Volta or later architectures, 6 GB VRAM or more, or Apple Silicon</td>
             <td>Apple Silicon</td>
-            <td>Volta or later architectures, 8 GB VRAM or more</td>
+            <td colspan="2" style="text-align:center;">Volta or later architectures, 8 GB VRAM or more</td>
             <td>Not required</td>
         </tr>
         <tr>
             <th>Memory Requirements</th>
-            <td colspan="4" style="text-align:center;">Minimum 16 GB, 32 GB recommended</td>
+            <td colspan="5" style="text-align:center;">Minimum 16 GB, 32 GB recommended</td>
             <td>8 GB</td>
         </tr>
         <tr>
             <th>Disk Space Requirements</th>
-            <td colspan="4" style="text-align:center;">20 GB or more, SSD recommended</td>
+            <td colspan="5" style="text-align:center;">20 GB or more, SSD recommended</td>
             <td>2 GB</td>
         </tr>
         <tr>
             <th>Python Version</th>
-            <td colspan="5" style="text-align:center;">3.10-3.13</td>
+            <td colspan="6" style="text-align:center;">3.10-3.13<sup>7</sup></td>
         </tr>
     </tbody>
 </table>
@@ -772,7 +786,9 @@ A WebUI developed based on Gradio, with a simple interface and only core parsing
 <sup>2</sup> Linux supports only distributions released in 2019 or later.
 <sup>3</sup> MLX requires macOS 13.5 or later, recommended for use with version 14.0 or higher.
 <sup>4</sup> Windows vLLM support via WSL2(Windows Subsystem for Linux).
-<sup>5</sup> Servers compatible with the OpenAI API, such as local or remote model services deployed via inference frameworks like `vLLM`, `SGLang`, or `LMDeploy`.
+<sup>5</sup> Windows LMDeploy can only use the `turbomind` backend, which is slightly slower than the `pytorch` backend. If performance is critical, it is recommended to run it via WSL2.
+<sup>6</sup> Servers compatible with the OpenAI API, such as local or remote model services deployed via inference frameworks like `vLLM`, `SGLang`, or `LMDeploy`.
+<sup>7</sup> Windows + LMDeploy only supports Python versions 3.10–3.12, as the critical dependency `ray` does not yet support Python 3.13 on Windows.
 ### Install MinerU
@@ -792,8 +808,8 @@ uv pip install -e .[core]
 ```
 > [!TIP]
-> `mineru[core]` includes all core features except `vLLM` acceleration, compatible with Windows / Linux / macOS systems, suitable for most users.
-> If you need to use `vLLM` acceleration for VLM model inference or install a lightweight client on edge devices, please refer to the documentation [Extension Modules Installation Guide](https://opendatalab.github.io/MinerU/quick_start/extension_modules/).
+> `mineru[core]` includes all core features except `vLLM`/`LMDeploy` acceleration, compatible with Windows / Linux / macOS systems, suitable for most users.
+> If you need to use `vLLM`/`LMDeploy` acceleration for VLM model inference or install a lightweight client on edge devices, please refer to the documentation [Extension Modules Installation Guide](https://opendatalab.github.io/MinerU/quick_start/extension_modules/).
 ---

{mineru-2.6.3.dist-info → mineru-2.6.5.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 mineru/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
-mineru/version.py,sha256=uJ6TLK18jhCrL0aclBja7NzlAGLAyZjVpX-gq3d461k,22
+mineru/version.py,sha256=b8L3dijps7oaMPmOpJzOuXwvOcbIuro9wWmuPwiL87o,22
 mineru/backend/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
 mineru/backend/utils.py,sha256=GLJU3IznDmhE1_qNmkU1UOtsuskIHBezgsEVO6Uar-Y,698
 mineru/backend/pipeline/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
@@ -8,22 +8,22 @@ mineru/backend/pipeline/model_init.py,sha256=OAylOcQD9gu5TBcX7nMt7X5NpJMtQICI5Iv
 mineru/backend/pipeline/model_json_to_middle_json.py,sha256=reXkUR_wKmJD64d7vRNXMxFviwkzDlGjRshpdwsVquI,10951
 mineru/backend/pipeline/model_list.py,sha256=7cXMBfZrP0K6qWueg1D_-WoUANeSINzkn_ic9E7YQLs,222
 mineru/backend/pipeline/para_split.py,sha256=Kq95MmvkPm7rKxlCSGiTvVKyF7CErHI2eGGAs5sLl0Q,17119
-mineru/backend/pipeline/pipeline_analyze.py,sha256=rbO5AetOdnxR5ctkoDzFCFoElkz7Jgb7gi2Ct596NK8,6655
+mineru/backend/pipeline/pipeline_analyze.py,sha256=O_HGifodg03VZbmTve-U6Cmo0T03AmuK86t1v1J9X-Q,6897
 mineru/backend/pipeline/pipeline_magic_model.py,sha256=w8jGx8f6yZN0Wf2yPP3L9rYKc9rogxreZCrUJzJvPO8,14974
 mineru/backend/pipeline/pipeline_middle_json_mkcontent.py,sha256=YlnEbbUnkniZXS13aLo5mjfFQvQM5SrIVvTAGBZsLmw,14478
 mineru/backend/vlm/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
 mineru/backend/vlm/model_output_to_middle_json.py,sha256=AqYX44gS9crUO_t7SuUatD71EVjow6pI6yA2Ik3gQ0s,5139
-mineru/backend/vlm/utils.py,sha256=woGqyRI4S7p69daLCU07XNXWTV27aLf7YBjjVH1x-5o,2794
-mineru/backend/vlm/vlm_analyze.py,sha256=7c5_JN1F9YTDNNgA_Rmw6xX1PI7gcIT4A4ujtGQHH9Q,8792
+mineru/backend/vlm/utils.py,sha256=taiPNKtsykImUYkkosk1CjxFIJEutygK8iZTLly-ZqU,3905
+mineru/backend/vlm/vlm_analyze.py,sha256=wP3vuYGVec0hRsDAuzfSm2HD4Muu7wSWL767qxd_yqw,11690
 mineru/backend/vlm/vlm_magic_model.py,sha256=Pd0sOr7G1crAJIVeq6h_03gNSuxmV5U8dvGTGT_rrjs,23452
 mineru/backend/vlm/vlm_middle_json_mkcontent.py,sha256=5V-AU9KkxxMn0DDSQBrb15I4GVpEyiQy8uNI_tQhS6M,13498
 mineru/cli/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
-mineru/cli/client.py,sha256=ArnoT2psOQRnTqLpsFwPaoi-l444iIVkbBn90Pm16n8,6915
-mineru/cli/common.py,sha256=jxFJMdc-02UMO3SXAtcZ6aIdPrakAE6DCccZ9kDlPKc,14276
+mineru/cli/client.py,sha256=__CQknekVeq6s72JzHQRPSpR7mfNaO-ob9wq6oiEj6s,7047
+mineru/cli/common.py,sha256=zhNOJCOnTSMbWdUWSZG-nf0odv5vBRtdZYZ1UbUPH3g,14369
 mineru/cli/fast_api.py,sha256=t5bda769VbM5iokAboiJfPIOnm-r5GTFReE-KQy8L3g,10941
-mineru/cli/gradio_app.py,sha256=6dA0ARpdOoewFeXmHrleF1amCgBV9ilY_nkWAmAmN8A,14731
+mineru/cli/gradio_app.py,sha256=EUPuRHHCOECrE3E3VNEeuMDYeC3nicurOYfk8YJSOMw,15646
 mineru/cli/models_download.py,sha256=LNfoIpUlJM7m7qb2SiCxtjMDw4jILBQtZwNP2JoY81U,4815
-mineru/cli/vlm_vllm_server.py,sha256=fQJyD-gIPQ41hR_6aIaDJczl66N310t0CiZEBAfX5mc,90
+mineru/cli/vlm_server.py,sha256=27HaqO3wpMXSA_nA3CC6JOBTHK3q66SP00cD6m9HuQE,1974
 mineru/data/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
 mineru/data/data_reader_writer/__init__.py,sha256=9qnGNrsuGBMwwfsQy6oChdkz--a_LPdYWE0VZZr0yr4,490
 mineru/data/data_reader_writer/base.py,sha256=nqmAcdHOXMOJO6RAT3ILligDFaw8Op0STyCw5yOzAbI,1706
@@ -77,12 +77,12 @@ mineru/model/table/rec/slanet_plus/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQe
 mineru/model/table/rec/slanet_plus/main.py,sha256=vfrcvQ9JBf32YZU9eNoetoqdpcrFNsA1WNqQBsG8i2o,7646
 mineru/model/table/rec/slanet_plus/matcher.py,sha256=uwF-wCLaYlaQ3JQ_-YywGVl1XQYnx7G_RTuWLW8JlBk,7321
 mineru/model/table/rec/slanet_plus/matcher_utils.py,sha256=9wt_ydeeViLd57bU6g3lnXXni49qLSra2C6wSFQZkiw,9597
-mineru/model/table/rec/slanet_plus/table_structure.py,sha256=Ve9eUdA0ivHf5bf9gwvHHfb7-E7drJLP3S3MPlh3uZ0,3844
+mineru/model/table/rec/slanet_plus/table_structure.py,sha256=qt-HPYIQyp0aWG_MmnM_sMQCV8ZLb4rALSueyCohPgM,4085
 mineru/model/table/rec/slanet_plus/table_structure_utils.py,sha256=YYSkwN2WdLx7qkWMSGkPY7yXOH5ENVhg5CsRGhtZ5Wk,19281
 mineru/model/table/rec/unet_table/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 mineru/model/table/rec/unet_table/main.py,sha256=J13Q7_6stYyedmVedf9CZD7R0tuguGfTg3Z3ob4GDuM,15565
 mineru/model/table/rec/unet_table/table_recover.py,sha256=rSyeWyuP10M8dLKA5e0n4P2DXMYbVbmgLxEcdZA8_0E,9059
-mineru/model/table/rec/unet_table/table_structure_unet.py,sha256=beBMmBHAOR2lAuf2rcOKRSbFaJqwuIgMJWxWQsFmIRI,7908
+mineru/model/table/rec/unet_table/table_structure_unet.py,sha256=hnmYLzZFRlK0Y4gr874G9GaLahcKnNZYNun869FdmH8,8150
 mineru/model/table/rec/unet_table/utils.py,sha256=CYAqJW0wePJk4NAemb8W203N7E32v0ujiWbxanDhd8I,16083
 mineru/model/table/rec/unet_table/utils_table_line_rec.py,sha256=zrCdPwI4M8nu0FEfd7lRJAe0z8kYq3KFbzwElM82USE,11174
 mineru/model/table/rec/unet_table/utils_table_recover.py,sha256=XksJsY82ZS0kqUnNT-jvaYzxJ3V3svMSzj0puwIau1k,10651
@@ -145,15 +145,16 @@ mineru/model/utils/tools/infer/predict_det.py,sha256=vYQREn7vELXxBsr72CCCVvm1gwV
 mineru/model/utils/tools/infer/predict_rec.py,sha256=-BH93JDisu0kT6CyHA4plUOKcb2L-UvDk7Tein5uwt4,19209
 mineru/model/utils/tools/infer/predict_system.py,sha256=hkegkn6hq2v2zqHVAP615-k-fkTS8swRYSbZeoqmSI8,3822
 mineru/model/utils/tools/infer/pytorchocr_utility.py,sha256=i1PFN-_kefJUUZ4Vk7igs1TU8gfErTDlDXY6-8Uaurw,9323
-mineru/model/vlm_vllm_model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-mineru/model/vlm_vllm_model/server.py,sha256=w5ddusPbcVaEoWAo_BRjmwv_Ywxrc_bCMRhxihoyykY,2263
+mineru/model/vlm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+mineru/model/vlm/lmdeploy_server.py,sha256=PvxJNcUIKB8VzWMDXeV1t0SHSgz_ULO36ZAzJbppz90,3262
+mineru/model/vlm/vllm_server.py,sha256=w5ddusPbcVaEoWAo_BRjmwv_Ywxrc_bCMRhxihoyykY,2263
 mineru/resources/header.html,sha256=PUselBXLBn8gfeP3zwEtj6zIxfhcCN4vN_B796nQFNQ,4410
 mineru/resources/fasttext-langdetect/lid.176.ftz,sha256=jzRyz-hzintgmejpmcPL-uDc0VaWqsfXc4qAOdtgPoM,938013
 mineru/utils/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
 mineru/utils/block_pre_proc.py,sha256=uGBmxf2MR9bplTnQI8xHjCI-kj3plRhJr0hcWKidbOQ,9632
 mineru/utils/block_sort.py,sha256=5e1mOLB3W7xu5Y1hmhvGSHPL_aQ41R_4VXcP4vjYAOU,12976
 mineru/utils/boxbase.py,sha256=moP660AmZq_udHEsfvFkTQdJ4gjrrBwN7t0Enx7CIL8,6903
-mineru/utils/check_mac_env.py,sha256=pbmbcnS60zZRqNRBxFJbbPrCosU1lC464b0v6JUlgaE,1031
+mineru/utils/check_sys_env.py,sha256=TRjzg4xWyoSGrgv4KaP225A-99xBgLAfZ1cPcGqrBAA,1191
 mineru/utils/cli_parser.py,sha256=4seFAu1kulsYnw6WM2q_cxgEOt2tErZVkI-LNEF_kGw,1445
 mineru/utils/config_reader.py,sha256=IRVWTpBnbnRpck6eXZUKw-fcLt7hon5S4uqWW-RBb1w,4075
 mineru/utils/cut_image.py,sha256=g3m4nfcJNWlxi-P0kpXTtlmspXkMcLCfGwmYuQ-Z2hE,751
@@ -168,17 +169,19 @@ mineru/utils/magic_model_utils.py,sha256=2xOvi4oqg3MSw1FUrJTnYDtWeFrrm6qbmlEorLZ
 mineru/utils/model_utils.py,sha256=6OsgFLsABX5JuShSzCMSNHWV-yi-1cjwHweafyxIgRo,18448
 mineru/utils/models_download_utils.py,sha256=UfjvwhxO6BkJHa5JSpEVNZ71GoLMPMmJpym3THET2T4,2957
 mineru/utils/ocr_utils.py,sha256=lPIrwNUib5mrzUkponRYHuUCdjV2qvETNLSzOLyflrU,15990
+mineru/utils/os_env_config.py,sha256=ZNtkR4KrJW72CeIoTNzGDL6tMKv_hL8nzvWIssGWbqY,842
 mineru/utils/pdf_classify.py,sha256=6DF5pH_9Uq83fsFtp7n4i-OdYQGzoNOV9L0VBUhgBMQ,8078
-mineru/utils/pdf_image_tools.py,sha256=mioLEHOdDtM1YbspNaa0wWhnLw_4-H7rdHlIM40vrT4,4077
+mineru/utils/pdf_image_tools.py,sha256=86_xvsGOEde5QGlKz5uJemjoO1upr6n_K7o3lCdyIjQ,7981
+mineru/utils/pdf_page_id.py,sha256=em966k12CRW4Rj49RGiLB_8ILwkXPBnWRetApax3eTs,400
 mineru/utils/pdf_reader.py,sha256=WeINm5SyWBUXT0wP9lzIbeHs8P6WUIkN6nVL5X4LzG4,3267
 mineru/utils/pdf_text_tool.py,sha256=KEztjfdqsIHHuiTEAMAL7Lr1OS3R7Ur-uTqGiCRjReQ,1364
 mineru/utils/run_async.py,sha256=rPeP4BCZerR8VByRDhiYzfZiahLVqoZEBVAS54dAjNg,1286
 mineru/utils/span_block_fix.py,sha256=0eVQjJCrT03woRt9hoh6Uu42Tp1dacfGTv2x3B9qq94,8797
 mineru/utils/span_pre_proc.py,sha256=h41q2uQajI0xQbc_30hqaju1dv3oVYxBAlKgURl8HIc,13692
 mineru/utils/table_merge.py,sha256=d98zNbM1ZQ8V1kUt6RugParNUNPv7DGL-XKIzR3iJVQ,15360
-mineru-2.6.3.dist-info/licenses/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
-mineru-2.6.3.dist-info/METADATA,sha256=mUBTxFG5tgdyQ1caZVRNrk4MhIX36PECY09dwCNYXZ4,70689
-mineru-2.6.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-mineru-2.6.3.dist-info/entry_points.txt,sha256=luXmbhPiZK_tKlRgWuYOaW_V6EFpG-yJcAevVv9MEqE,252
-mineru-2.6.3.dist-info/top_level.txt,sha256=zuGQfZcbsHv4I4oKI9gaKPqEWBFm6xJroKuug2LnKP8,7
-mineru-2.6.3.dist-info/RECORD,,
+mineru-2.6.5.dist-info/licenses/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
+mineru-2.6.5.dist-info/METADATA,sha256=BUj9fYR_NiRpYGqXWd3J_fOTE8IN0bdl0PgY6FUGVcg,72362
+mineru-2.6.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+mineru-2.6.5.dist-info/entry_points.txt,sha256=JbtrCPhx1T32s7TONUsteKg-24ZwRT1HSiFtW5jypVw,376
+mineru-2.6.5.dist-info/top_level.txt,sha256=zuGQfZcbsHv4I4oKI9gaKPqEWBFm6xJroKuug2LnKP8,7
+mineru-2.6.5.dist-info/RECORD,,

{mineru-2.6.3.dist-info → mineru-2.6.5.dist-info}/entry_points.txt RENAMED Viewed

@@ -2,5 +2,7 @@
 mineru = mineru.cli:client.main
 mineru-api = mineru.cli.fast_api:main
 mineru-gradio = mineru.cli.gradio_app:main
+mineru-lmdeploy-server = mineru.cli.vlm_server:lmdeploy_server
 mineru-models-download = mineru.cli.models_download:download_models
-mineru-vllm-server = mineru.cli.vlm_vllm_server:main
+mineru-openai-server = mineru.cli.vlm_server:openai_server
+mineru-vllm-server = mineru.cli.vlm_server:vllm_server

mineru/cli/vlm_vllm_server.py DELETED Viewed

@@ -1,4 +0,0 @@
-from mineru.model.vlm_vllm_model.server import main
-if __name__ == "__main__":
-    main()

/mineru/model/{vlm_vllm_model → vlm}/__init__.py RENAMED Viewed

File without changes

/mineru/model/{vlm_vllm_model/server.py → vlm/vllm_server.py} RENAMED Viewed

File without changes

{mineru-2.6.3.dist-info → mineru-2.6.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{mineru-2.6.3.dist-info → mineru-2.6.5.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

{mineru-2.6.3.dist-info → mineru-2.6.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

mineru 2.6.3__py3-none-any.whl → 2.6.5__py3-none-any.whl

mineru 2.6.3py3-none-any.whl → 2.6.5py3-none-any.whl