mineru 2.6.5__py3-none-any.whl → 2.6.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -159,7 +159,6 @@ def batch_image_analyze(
159
159
 
160
160
  model_manager = ModelSingleton()
161
161
 
162
- batch_ratio = 1
163
162
  device = get_device()
164
163
 
165
164
  if str(device).startswith('npu'):
@@ -173,25 +172,21 @@ def batch_image_analyze(
173
172
  "Please ensure that the torch_npu package is installed correctly."
174
173
  ) from e
175
174
 
176
- if str(device).startswith('npu') or str(device).startswith('cuda'):
177
- vram = get_vram(device)
178
- if vram is not None:
179
- gpu_memory = int(os.getenv('MINERU_VIRTUAL_VRAM_SIZE', round(vram)))
180
- if gpu_memory >= 16:
181
- batch_ratio = 16
182
- elif gpu_memory >= 12:
183
- batch_ratio = 8
184
- elif gpu_memory >= 8:
185
- batch_ratio = 4
186
- elif gpu_memory >= 6:
187
- batch_ratio = 2
188
- else:
189
- batch_ratio = 1
190
- logger.info(f'gpu_memory: {gpu_memory} GB, batch_ratio: {batch_ratio}')
191
- else:
192
- # Default batch_ratio when VRAM can't be determined
193
- batch_ratio = 1
194
- logger.info(f'Could not determine GPU memory, using default batch_ratio: {batch_ratio}')
175
+ gpu_memory = get_vram(device)
176
+ if gpu_memory >= 16:
177
+ batch_ratio = 16
178
+ elif gpu_memory >= 12:
179
+ batch_ratio = 8
180
+ elif gpu_memory >= 8:
181
+ batch_ratio = 4
182
+ elif gpu_memory >= 6:
183
+ batch_ratio = 2
184
+ else:
185
+ batch_ratio = 1
186
+ logger.info(
187
+ f'GPU Memory: {gpu_memory} GB, Batch Ratio: {batch_ratio}. '
188
+ f'You can set MINERU_VIRTUAL_VRAM_SIZE environment variable to adjust GPU memory allocation.'
189
+ )
195
190
 
196
191
  # 检测torch的版本号
197
192
  import torch
@@ -81,20 +81,16 @@ def set_default_gpu_memory_utilization() -> float:
81
81
  def set_default_batch_size() -> int:
82
82
  try:
83
83
  device = get_device()
84
- vram = get_vram(device)
85
- if vram is not None:
86
- gpu_memory = int(os.getenv('MINERU_VIRTUAL_VRAM_SIZE', round(vram)))
87
- if gpu_memory >= 16:
88
- batch_size = 8
89
- elif gpu_memory >= 8:
90
- batch_size = 4
91
- else:
92
- batch_size = 1
93
- logger.info(f'gpu_memory: {gpu_memory} GB, batch_size: {batch_size}')
84
+ gpu_memory = get_vram(device)
85
+
86
+ if gpu_memory >= 16:
87
+ batch_size = 8
88
+ elif gpu_memory >= 8:
89
+ batch_size = 4
94
90
  else:
95
- # Default batch_ratio when VRAM can't be determined
96
91
  batch_size = 1
97
- logger.info(f'Could not determine GPU memory, using default batch_ratio: {batch_size}')
92
+ logger.info(f'gpu_memory: {gpu_memory} GB, batch_size: {batch_size}')
93
+
98
94
  except Exception as e:
99
95
  logger.warning(f'Error determining VRAM: {e}, using default batch_ratio: 1')
100
96
  batch_size = 1
mineru/cli/client.py CHANGED
@@ -113,7 +113,7 @@ if is_mac_os_version_supported():
113
113
  '--formula',
114
114
  'formula_enable',
115
115
  type=bool,
116
- help='Enable formula parsing. Default is True. Adapted only for the case where the backend is set to "pipeline".',
116
+ help='Enable formula parsing. Default is True. ',
117
117
  default=True,
118
118
  )
119
119
  @click.option(
@@ -121,7 +121,7 @@ if is_mac_os_version_supported():
121
121
  '--table',
122
122
  'table_enable',
123
123
  type=bool,
124
- help='Enable table parsing. Default is True. Adapted only for the case where the backend is set to "pipeline".',
124
+ help='Enable table parsing. Default is True. ',
125
125
  default=True,
126
126
  )
127
127
  @click.option(
@@ -172,9 +172,8 @@ def main(
172
172
  def get_virtual_vram_size() -> int:
173
173
  if virtual_vram is not None:
174
174
  return virtual_vram
175
- if get_device_mode().startswith("cuda") or get_device_mode().startswith("npu"):
176
- return round(get_vram(get_device_mode()))
177
- return 1
175
+ else:
176
+ return get_vram(get_device_mode())
178
177
  if os.getenv('MINERU_VIRTUAL_VRAM_SIZE', None) is None:
179
178
  os.environ['MINERU_VIRTUAL_VRAM_SIZE']= str(get_virtual_vram_size())
180
179
 
mineru/cli/fast_api.py CHANGED
@@ -8,7 +8,7 @@ import click
8
8
  import zipfile
9
9
  from pathlib import Path
10
10
  import glob
11
- from fastapi import FastAPI, UploadFile, File, Form
11
+ from fastapi import Depends, FastAPI, HTTPException, UploadFile, File, Form
12
12
  from fastapi.middleware.gzip import GZipMiddleware
13
13
  from fastapi.responses import JSONResponse, FileResponse
14
14
  from starlette.background import BackgroundTask
@@ -21,14 +21,53 @@ from mineru.utils.cli_parser import arg_parse
21
21
  from mineru.utils.guess_suffix_or_lang import guess_suffix_by_path
22
22
  from mineru.version import __version__
23
23
 
24
- app = FastAPI()
25
- app.add_middleware(GZipMiddleware, minimum_size=1000)
24
+ # 并发控制器
25
+ _request_semaphore: Optional[asyncio.Semaphore] = None
26
+
27
+ # 并发控制依赖函数
28
+ async def limit_concurrency():
29
+ if _request_semaphore is not None:
30
+ if _request_semaphore.locked():
31
+ raise HTTPException(
32
+ status_code=503,
33
+ detail=f"Server is at maximum capacity: {os.getenv('MINERU_API_MAX_CONCURRENT_REQUESTS', 'unset')}. Please try again later."
34
+ )
35
+ async with _request_semaphore:
36
+ yield
37
+ else:
38
+ yield
39
+
40
+ def create_app():
41
+ # By default, the OpenAPI documentation endpoints (openapi_url, docs_url, redoc_url) are enabled.
42
+ # To disable the FastAPI docs and schema endpoints, set the environment variable MINERU_API_ENABLE_FASTAPI_DOCS=0.
43
+ enable_docs = str(os.getenv("MINERU_API_ENABLE_FASTAPI_DOCS", "1")).lower() in ("1", "true", "yes")
44
+ app = FastAPI(
45
+ openapi_url="/openapi.json" if enable_docs else None,
46
+ docs_url="/docs" if enable_docs else None,
47
+ redoc_url="/redoc" if enable_docs else None,
48
+ )
49
+
50
+ # 初始化并发控制器:从环境变量MINERU_API_MAX_CONCURRENT_REQUESTS读取
51
+ global _request_semaphore
52
+ try:
53
+ max_concurrent_requests = int(os.getenv("MINERU_API_MAX_CONCURRENT_REQUESTS", "0"))
54
+ except ValueError:
55
+ max_concurrent_requests = 0
56
+
57
+ if max_concurrent_requests > 0:
58
+ _request_semaphore = asyncio.Semaphore(max_concurrent_requests)
59
+ logger.info(f"Request concurrency limited to {max_concurrent_requests}")
60
+
61
+ app.add_middleware(GZipMiddleware, minimum_size=1000)
62
+ return app
63
+
64
+ app = create_app()
26
65
 
27
66
 
28
67
  def sanitize_filename(filename: str) -> str:
29
68
  """
30
69
  格式化压缩文件的文件名
31
- 移除路径遍历字符, 保留 Unicode 字母、数字、._-
70
+ 移除路径遍历字符, 保留 Unicode 字母、数字、._-
32
71
  禁止隐藏文件
33
72
  """
34
73
  sanitized = re.sub(r'[/\\\.]{2,}|[/\\]', '', filename)
@@ -60,24 +99,48 @@ def get_infer_result(file_suffix_identifier: str, pdf_name: str, parse_dir: str)
60
99
  return None
61
100
 
62
101
 
63
- @app.post(path="/file_parse",)
102
+ @app.post(path="/file_parse", dependencies=[Depends(limit_concurrency)])
64
103
  async def parse_pdf(
65
- files: List[UploadFile] = File(...),
66
- output_dir: str = Form("./output"),
67
- lang_list: List[str] = Form(["ch"]),
68
- backend: str = Form("pipeline"),
69
- parse_method: str = Form("auto"),
70
- formula_enable: bool = Form(True),
71
- table_enable: bool = Form(True),
72
- server_url: Optional[str] = Form(None),
73
- return_md: bool = Form(True),
74
- return_middle_json: bool = Form(False),
75
- return_model_output: bool = Form(False),
76
- return_content_list: bool = Form(False),
77
- return_images: bool = Form(False),
78
- response_format_zip: bool = Form(False),
79
- start_page_id: int = Form(0),
80
- end_page_id: int = Form(99999),
104
+ files: List[UploadFile] = File(..., description="Upload pdf or image files for parsing"),
105
+ output_dir: str = Form("./output", description="Output local directory"),
106
+ lang_list: List[str] = Form(
107
+ ["ch"],
108
+ description="""(Adapted only for pipeline backend)Input the languages in the pdf to improve OCR accuracy.
109
+ Options: ch, ch_server, ch_lite, en, korean, japan, chinese_cht, ta, te, ka, th, el, latin, arabic, east_slavic, cyrillic, devanagari.
110
+ """
111
+ ),
112
+ backend: str = Form(
113
+ "pipeline",
114
+ description="""The backend for parsing:
115
+ - pipeline: More general
116
+ - vlm-transformers: More general, but slower
117
+ - vlm-mlx-engine: Faster than transformers (need apple silicon and macOS 13.5+)
118
+ - vlm-vllm-async-engine: Faster (vllm-engine, need vllm installed)
119
+ - vlm-lmdeploy-engine: Faster (lmdeploy-engine, need lmdeploy installed)
120
+ - vlm-http-client: Faster (client suitable for openai-compatible servers)"""
121
+ ),
122
+ parse_method: str = Form(
123
+ "auto",
124
+ description="""(Adapted only for pipeline backend)The method for parsing PDF:
125
+ - auto: Automatically determine the method based on the file type
126
+ - txt: Use text extraction method
127
+ - ocr: Use OCR method for image-based PDFs
128
+ """
129
+ ),
130
+ formula_enable: bool = Form(True, description="Enable formula parsing."),
131
+ table_enable: bool = Form(True, description="Enable table parsing."),
132
+ server_url: Optional[str] = Form(
133
+ None,
134
+ description="(Adapted only for vlm-http-client backend)openai compatible server url, e.g., http://127.0.0.1:30000"
135
+ ),
136
+ return_md: bool = Form(True, description="Return markdown content in response"),
137
+ return_middle_json: bool = Form(False, description="Return middle JSON in response"),
138
+ return_model_output: bool = Form(False, description="Return model output JSON in response"),
139
+ return_content_list: bool = Form(False, description="Return content list JSON in response"),
140
+ return_images: bool = Form(False, description="Return extracted images in response"),
141
+ response_format_zip: bool = Form(False, description="Return results as a ZIP file instead of JSON"),
142
+ start_page_id: int = Form(0, description="The starting page for PDF parsing, beginning from 0"),
143
+ end_page_id: int = Form(99999, description="The ending page for PDF parsing, beginning from 0"),
81
144
  ):
82
145
 
83
146
  # 获取命令行配置参数
@@ -153,7 +216,7 @@ async def parse_pdf(
153
216
  # 根据 response_format_zip 决定返回类型
154
217
  if response_format_zip:
155
218
  zip_fd, zip_path = tempfile.mkstemp(suffix=".zip", prefix="mineru_results_")
156
- os.close(zip_fd)
219
+ os.close(zip_fd)
157
220
  with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
158
221
  for pdf_name in pdf_file_names:
159
222
  safe_pdf_name = sanitize_filename(pdf_name)
@@ -178,7 +241,7 @@ async def parse_pdf(
178
241
 
179
242
  if return_model_output:
180
243
  path = os.path.join(parse_dir, f"{pdf_name}_model.json")
181
- if os.path.exists(path):
244
+ if os.path.exists(path):
182
245
  zf.write(path, arcname=os.path.join(safe_pdf_name, os.path.basename(path)))
183
246
 
184
247
  if return_content_list:
@@ -259,11 +322,16 @@ def main(ctx, host, port, reload, **kwargs):
259
322
  # 将配置参数存储到应用状态中
260
323
  app.state.config = kwargs
261
324
 
325
+ # 将 CLI 的并发参数同步到环境变量,确保 uvicorn 重载子进程可见
326
+ try:
327
+ mcr = int(kwargs.get("mineru_api_max_concurrent_requests", 0) or 0)
328
+ except ValueError:
329
+ mcr = 0
330
+ os.environ["MINERU_API_MAX_CONCURRENT_REQUESTS"] = str(mcr)
331
+
262
332
  """启动MinerU FastAPI服务器的命令行入口"""
263
333
  print(f"Start MinerU FastAPI Service: http://{host}:{port}")
264
- print("The API documentation can be accessed at the following address:")
265
- print(f"- Swagger UI: http://{host}:{port}/docs")
266
- print(f"- ReDoc: http://{host}:{port}/redoc")
334
+ print(f"API documentation: http://{host}:{port}/docs")
267
335
 
268
336
  uvicorn.run(
269
337
  "mineru.cli.fast_api:app",
@@ -274,4 +342,4 @@ def main(ctx, host, port, reload, **kwargs):
274
342
 
275
343
 
276
344
  if __name__ == "__main__":
277
- main()
345
+ main()
@@ -428,8 +428,6 @@ def clean_memory(device='cuda'):
428
428
 
429
429
  def clean_vram(device, vram_threshold=8):
430
430
  total_memory = get_vram(device)
431
- if total_memory is not None:
432
- total_memory = int(os.getenv('MINERU_VIRTUAL_VRAM_SIZE', round(total_memory)))
433
431
  if total_memory and total_memory <= vram_threshold:
434
432
  gc_start = time.time()
435
433
  clean_memory(device)
@@ -437,13 +435,28 @@ def clean_vram(device, vram_threshold=8):
437
435
  # logger.info(f"gc time: {gc_time}")
438
436
 
439
437
 
440
- def get_vram(device):
438
+ def get_vram(device) -> int:
439
+ env_vram = os.getenv("MINERU_VIRTUAL_VRAM_SIZE")
440
+
441
+ # 如果环境变量已配置,尝试解析并返回
442
+ if env_vram is not None:
443
+ try:
444
+ total_memory = int(env_vram)
445
+ if total_memory > 0:
446
+ return total_memory
447
+ else:
448
+ logger.warning(
449
+ f"MINERU_VIRTUAL_VRAM_SIZE value '{env_vram}' is not positive, falling back to auto-detection")
450
+ except ValueError:
451
+ logger.warning(
452
+ f"MINERU_VIRTUAL_VRAM_SIZE value '{env_vram}' is not a valid integer, falling back to auto-detection")
453
+
454
+ # 环境变量未配置或配置错误,根据device自动获取
455
+ total_memory = 1
441
456
  if torch.cuda.is_available() and str(device).startswith("cuda"):
442
- total_memory = torch.cuda.get_device_properties(device).total_memory / (1024 ** 3) # 将字节转换为 GB
443
- return total_memory
457
+ total_memory = round(torch.cuda.get_device_properties(device).total_memory / (1024 ** 3)) # 将字节转换为 GB
444
458
  elif str(device).startswith("npu"):
445
459
  if torch_npu.npu.is_available():
446
- total_memory = torch_npu.npu.get_device_properties(device).total_memory / (1024 ** 3) # 转为 GB
447
- return total_memory
448
- else:
449
- return None
460
+ total_memory = round(torch_npu.npu.get_device_properties(device).total_memory / (1024 ** 3)) # 转为 GB
461
+
462
+ return total_memory
mineru/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "2.6.5"
1
+ __version__ = "2.6.6"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mineru
3
- Version: 2.6.5
3
+ Version: 2.6.6
4
4
  Summary: A practical tool for converting PDF to Markdown
5
5
  License: AGPL-3.0
6
6
  Project-URL: homepage, https://mineru.net/
@@ -74,8 +74,8 @@ Requires-Dist: fastapi; extra == "api"
74
74
  Requires-Dist: python-multipart; extra == "api"
75
75
  Requires-Dist: uvicorn; extra == "api"
76
76
  Provides-Extra: gradio
77
- Requires-Dist: gradio<6,>=5.34; extra == "gradio"
78
- Requires-Dist: gradio-pdf>=0.0.22; extra == "gradio"
77
+ Requires-Dist: gradio==5.49.1; extra == "gradio"
78
+ Requires-Dist: gradio-pdf==0.0.22; extra == "gradio"
79
79
  Provides-Extra: core
80
80
  Requires-Dist: mineru[vlm]; extra == "core"
81
81
  Requires-Dist: mineru[pipeline]; extra == "core"
@@ -134,6 +134,13 @@ Dynamic: license-file
134
134
  </div>
135
135
 
136
136
  # Changelog
137
+
138
+ - 2025/12/02 2.6.6 Release
139
+ - `mineru-api` tool optimizations
140
+ - Added descriptive text to `mineru-api` interface parameters to improve API documentation readability.
141
+ - You can use the environment variable `MINERU_API_ENABLE_FASTAPI_DOCS` to control whether the auto-generated interface documentation page is enabled (enabled by default).
142
+ - Added concurrency configuration options for the `vlm-vllm-async-engine`, `vlm-lmdeploy-engine`, and `vlm-http-client` backends. Users can use the environment variable `MINERU_API_MAX_CONCURRENT_REQUESTS` to set the maximum number of concurrent API requests (unlimited by default).
143
+
137
144
  - 2025/11/26 2.6.5 Release
138
145
  - Added support for a new backend vlm-lmdeploy-engine. Its usage is similar to vlm-vllm-(async)engine, but it uses lmdeploy as the inference engine and additionally supports native inference acceleration on Windows platforms compared to vllm.
139
146
 
@@ -887,6 +894,8 @@ Currently, some models in this project are trained based on YOLO. However, since
887
894
  - [pdfminer.six](https://github.com/pdfminer/pdfminer.six)
888
895
  - [pypdf](https://github.com/py-pdf/pypdf)
889
896
  - [magika](https://github.com/google/magika)
897
+ - [vLLM](https://github.com/vllm-project/vllm)
898
+ - [LMDeploy](https://github.com/InternLM/lmdeploy)
890
899
 
891
900
  # Citation
892
901
 
@@ -1,5 +1,5 @@
1
1
  mineru/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
2
- mineru/version.py,sha256=b8L3dijps7oaMPmOpJzOuXwvOcbIuro9wWmuPwiL87o,22
2
+ mineru/version.py,sha256=MJHGx-Qo0nycI7WHSavnK8Mok6HS_De_qLfGWXih6Og,22
3
3
  mineru/backend/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
4
4
  mineru/backend/utils.py,sha256=GLJU3IznDmhE1_qNmkU1UOtsuskIHBezgsEVO6Uar-Y,698
5
5
  mineru/backend/pipeline/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
@@ -8,19 +8,19 @@ mineru/backend/pipeline/model_init.py,sha256=OAylOcQD9gu5TBcX7nMt7X5NpJMtQICI5Iv
8
8
  mineru/backend/pipeline/model_json_to_middle_json.py,sha256=reXkUR_wKmJD64d7vRNXMxFviwkzDlGjRshpdwsVquI,10951
9
9
  mineru/backend/pipeline/model_list.py,sha256=7cXMBfZrP0K6qWueg1D_-WoUANeSINzkn_ic9E7YQLs,222
10
10
  mineru/backend/pipeline/para_split.py,sha256=Kq95MmvkPm7rKxlCSGiTvVKyF7CErHI2eGGAs5sLl0Q,17119
11
- mineru/backend/pipeline/pipeline_analyze.py,sha256=O_HGifodg03VZbmTve-U6Cmo0T03AmuK86t1v1J9X-Q,6897
11
+ mineru/backend/pipeline/pipeline_analyze.py,sha256=GkGOrWGnBSswUik3nt_m76bCwDISC9sxXZ6xRX3L154,6528
12
12
  mineru/backend/pipeline/pipeline_magic_model.py,sha256=w8jGx8f6yZN0Wf2yPP3L9rYKc9rogxreZCrUJzJvPO8,14974
13
13
  mineru/backend/pipeline/pipeline_middle_json_mkcontent.py,sha256=YlnEbbUnkniZXS13aLo5mjfFQvQM5SrIVvTAGBZsLmw,14478
14
14
  mineru/backend/vlm/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
15
15
  mineru/backend/vlm/model_output_to_middle_json.py,sha256=AqYX44gS9crUO_t7SuUatD71EVjow6pI6yA2Ik3gQ0s,5139
16
- mineru/backend/vlm/utils.py,sha256=taiPNKtsykImUYkkosk1CjxFIJEutygK8iZTLly-ZqU,3905
16
+ mineru/backend/vlm/utils.py,sha256=JMgS3SMFcHJYH2jIx-Xhs-P2a1bmT8U6Kn60IL0OmQA,3570
17
17
  mineru/backend/vlm/vlm_analyze.py,sha256=wP3vuYGVec0hRsDAuzfSm2HD4Muu7wSWL767qxd_yqw,11690
18
18
  mineru/backend/vlm/vlm_magic_model.py,sha256=Pd0sOr7G1crAJIVeq6h_03gNSuxmV5U8dvGTGT_rrjs,23452
19
19
  mineru/backend/vlm/vlm_middle_json_mkcontent.py,sha256=5V-AU9KkxxMn0DDSQBrb15I4GVpEyiQy8uNI_tQhS6M,13498
20
20
  mineru/cli/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
21
- mineru/cli/client.py,sha256=__CQknekVeq6s72JzHQRPSpR7mfNaO-ob9wq6oiEj6s,7047
21
+ mineru/cli/client.py,sha256=XSEIr4klUuufMAWn5IioZdXpg1xAxqRZF0HkaVIhxh0,6815
22
22
  mineru/cli/common.py,sha256=zhNOJCOnTSMbWdUWSZG-nf0odv5vBRtdZYZ1UbUPH3g,14369
23
- mineru/cli/fast_api.py,sha256=t5bda769VbM5iokAboiJfPIOnm-r5GTFReE-KQy8L3g,10941
23
+ mineru/cli/fast_api.py,sha256=lLxQKKHmD8ruoZGcE6LrXzr3pQIxvw8OdJrQq_FNLSM,14447
24
24
  mineru/cli/gradio_app.py,sha256=EUPuRHHCOECrE3E3VNEeuMDYeC3nicurOYfk8YJSOMw,15646
25
25
  mineru/cli/models_download.py,sha256=LNfoIpUlJM7m7qb2SiCxtjMDw4jILBQtZwNP2JoY81U,4815
26
26
  mineru/cli/vlm_server.py,sha256=27HaqO3wpMXSA_nA3CC6JOBTHK3q66SP00cD6m9HuQE,1974
@@ -166,7 +166,7 @@ mineru/utils/hash_utils.py,sha256=UPS_8NRBmVumdyOv16Lmv6Ly2xK8OVDJEe5gG6gKIFk,85
166
166
  mineru/utils/language.py,sha256=7RT3mxSa7jdpoC5ySd7ZddHA7TO7UsnmDOWiYZAxuyg,1433
167
167
  mineru/utils/llm_aided.py,sha256=9WUytvxenSAuaWR4sTQhVPQ5h8pY0wVOH1O2sj_6dLs,5149
168
168
  mineru/utils/magic_model_utils.py,sha256=2xOvi4oqg3MSw1FUrJTnYDtWeFrrm6qbmlEorLZSaYs,5650
169
- mineru/utils/model_utils.py,sha256=6OsgFLsABX5JuShSzCMSNHWV-yi-1cjwHweafyxIgRo,18448
169
+ mineru/utils/model_utils.py,sha256=6moOQqE5ShHaJKkENXP8BXJA7RCWtOGlYHZ3nidwmZs,18977
170
170
  mineru/utils/models_download_utils.py,sha256=UfjvwhxO6BkJHa5JSpEVNZ71GoLMPMmJpym3THET2T4,2957
171
171
  mineru/utils/ocr_utils.py,sha256=lPIrwNUib5mrzUkponRYHuUCdjV2qvETNLSzOLyflrU,15990
172
172
  mineru/utils/os_env_config.py,sha256=ZNtkR4KrJW72CeIoTNzGDL6tMKv_hL8nzvWIssGWbqY,842
@@ -179,9 +179,9 @@ mineru/utils/run_async.py,sha256=rPeP4BCZerR8VByRDhiYzfZiahLVqoZEBVAS54dAjNg,128
179
179
  mineru/utils/span_block_fix.py,sha256=0eVQjJCrT03woRt9hoh6Uu42Tp1dacfGTv2x3B9qq94,8797
180
180
  mineru/utils/span_pre_proc.py,sha256=h41q2uQajI0xQbc_30hqaju1dv3oVYxBAlKgURl8HIc,13692
181
181
  mineru/utils/table_merge.py,sha256=d98zNbM1ZQ8V1kUt6RugParNUNPv7DGL-XKIzR3iJVQ,15360
182
- mineru-2.6.5.dist-info/licenses/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
183
- mineru-2.6.5.dist-info/METADATA,sha256=BUj9fYR_NiRpYGqXWd3J_fOTE8IN0bdl0PgY6FUGVcg,72362
184
- mineru-2.6.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
185
- mineru-2.6.5.dist-info/entry_points.txt,sha256=JbtrCPhx1T32s7TONUsteKg-24ZwRT1HSiFtW5jypVw,376
186
- mineru-2.6.5.dist-info/top_level.txt,sha256=zuGQfZcbsHv4I4oKI9gaKPqEWBFm6xJroKuug2LnKP8,7
187
- mineru-2.6.5.dist-info/RECORD,,
182
+ mineru-2.6.6.dist-info/licenses/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
183
+ mineru-2.6.6.dist-info/METADATA,sha256=9f-9lcSQXdLCxbYmHItJbLgDc-TZG7u7dVUWMS0SzXA,73095
184
+ mineru-2.6.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
185
+ mineru-2.6.6.dist-info/entry_points.txt,sha256=JbtrCPhx1T32s7TONUsteKg-24ZwRT1HSiFtW5jypVw,376
186
+ mineru-2.6.6.dist-info/top_level.txt,sha256=zuGQfZcbsHv4I4oKI9gaKPqEWBFm6xJroKuug2LnKP8,7
187
+ mineru-2.6.6.dist-info/RECORD,,
File without changes