mineru 2.6.1__py3-none-any.whl → 2.6.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mineru/backend/pipeline/batch_analyze.py +20 -43
- mineru/backend/pipeline/model_init.py +1 -1
- mineru/backend/pipeline/model_json_to_middle_json.py +1 -1
- mineru/backend/pipeline/pipeline_middle_json_mkcontent.py +17 -4
- mineru/backend/vlm/vlm_analyze.py +11 -1
- mineru/backend/vlm/vlm_middle_json_mkcontent.py +6 -3
- mineru/cli/client.py +24 -14
- mineru/cli/gradio_app.py +4 -1
- mineru/cli/models_download.py +1 -1
- mineru/model/ocr/{paddleocr2pytorch/pytorch_paddle.py → pytorch_paddle.py} +1 -1
- mineru/model/table/rec/RapidTable.py +1 -1
- mineru/model/utils/pytorchocr/utils/resources/arch_config.yaml +1 -317
- mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_latin_dict.txt +545 -211
- mineru/model/utils/pytorchocr/utils/resources/models_config.yml +0 -12
- mineru/utils/block_sort.py +3 -2
- mineru/utils/check_mac_env.py +30 -0
- mineru/utils/llm_aided.py +13 -8
- mineru/version.py +1 -1
- {mineru-2.6.1.dist-info → mineru-2.6.3.dist-info}/METADATA +75 -39
- {mineru-2.6.1.dist-info → mineru-2.6.3.dist-info}/RECORD +24 -35
- mineru/model/ocr/paddleocr2pytorch/__init__.py +0 -1
- mineru/model/utils/pytorchocr/utils/resources/dict/arabic_dict.txt +0 -162
- mineru/model/utils/pytorchocr/utils/resources/dict/chinese_cht_dict.txt +0 -8421
- mineru/model/utils/pytorchocr/utils/resources/dict/cyrillic_dict.txt +0 -163
- mineru/model/utils/pytorchocr/utils/resources/dict/devanagari_dict.txt +0 -167
- mineru/model/utils/pytorchocr/utils/resources/dict/en_dict.txt +0 -95
- mineru/model/utils/pytorchocr/utils/resources/dict/japan_dict.txt +0 -4399
- mineru/model/utils/pytorchocr/utils/resources/dict/korean_dict.txt +0 -3688
- mineru/model/utils/pytorchocr/utils/resources/dict/latin_dict.txt +0 -185
- mineru/model/utils/pytorchocr/utils/resources/dict/ppocr_keys_v1.txt +0 -6623
- mineru/model/utils/pytorchocr/utils/resources/dict/ta_dict.txt +0 -128
- mineru/model/utils/pytorchocr/utils/resources/dict/te_dict.txt +0 -151
- {mineru-2.6.1.dist-info → mineru-2.6.3.dist-info}/WHEEL +0 -0
- {mineru-2.6.1.dist-info → mineru-2.6.3.dist-info}/entry_points.txt +0 -0
- {mineru-2.6.1.dist-info → mineru-2.6.3.dist-info}/licenses/LICENSE.md +0 -0
- {mineru-2.6.1.dist-info → mineru-2.6.3.dist-info}/top_level.txt +0 -0
|
@@ -3,26 +3,14 @@ lang:
|
|
|
3
3
|
det: ch_PP-OCRv5_det_infer.pth
|
|
4
4
|
rec: ch_PP-OCRv5_rec_infer.pth
|
|
5
5
|
dict: ppocrv5_dict.txt
|
|
6
|
-
ch_lite_v4:
|
|
7
|
-
det: ch_PP-OCRv5_det_infer.pth
|
|
8
|
-
rec: ch_PP-OCRv4_rec_infer.pth
|
|
9
|
-
dict: ppocr_keys_v1.txt
|
|
10
6
|
ch_server:
|
|
11
7
|
det: ch_PP-OCRv5_det_infer.pth
|
|
12
8
|
rec: ch_PP-OCRv5_rec_server_infer.pth
|
|
13
9
|
dict: ppocrv5_dict.txt
|
|
14
|
-
ch_server_v4:
|
|
15
|
-
det: ch_PP-OCRv5_det_infer.pth
|
|
16
|
-
rec: ch_PP-OCRv4_rec_server_infer.pth
|
|
17
|
-
dict: ppocr_keys_v1.txt
|
|
18
10
|
ch:
|
|
19
11
|
det: ch_PP-OCRv5_det_infer.pth
|
|
20
12
|
rec: ch_PP-OCRv4_rec_server_doc_infer.pth
|
|
21
13
|
dict: ppocrv4_doc_dict.txt
|
|
22
|
-
en_v4:
|
|
23
|
-
det: en_PP-OCRv3_det_infer.pth
|
|
24
|
-
rec: en_PP-OCRv4_rec_infer.pth
|
|
25
|
-
dict: en_dict.txt
|
|
26
14
|
korean:
|
|
27
15
|
det: ch_PP-OCRv5_det_infer.pth
|
|
28
16
|
rec: korean_PP-OCRv5_rec_infer.pth
|
mineru/utils/block_sort.py
CHANGED
|
@@ -179,13 +179,14 @@ def insert_lines_into_block(block_bbox, line_height, page_w, page_h):
|
|
|
179
179
|
def model_init(model_name: str):
|
|
180
180
|
from transformers import LayoutLMv3ForTokenClassification
|
|
181
181
|
device_name = get_device()
|
|
182
|
+
device = torch.device(device_name)
|
|
182
183
|
bf_16_support = False
|
|
183
184
|
if device_name.startswith("cuda"):
|
|
184
|
-
|
|
185
|
+
if torch.cuda.get_device_properties(device).major >= 8:
|
|
186
|
+
bf_16_support = True
|
|
185
187
|
elif device_name.startswith("mps"):
|
|
186
188
|
bf_16_support = True
|
|
187
189
|
|
|
188
|
-
device = torch.device(device_name)
|
|
189
190
|
if model_name == 'layoutreader':
|
|
190
191
|
# 检测modelscope的缓存目录是否存在
|
|
191
192
|
layoutreader_model_dir = os.path.join(auto_download_and_get_model_root_path(ModelPath.layout_reader), ModelPath.layout_reader)
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Copyright (c) Opendatalab. All rights reserved.
|
|
2
|
+
import platform
|
|
3
|
+
|
|
4
|
+
from packaging import version
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
# Detect if the current environment is a Mac computer
|
|
8
|
+
def is_mac_environment() -> bool:
|
|
9
|
+
return platform.system() == "Darwin"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# Detect if CPU is Apple Silicon architecture
|
|
13
|
+
def is_apple_silicon_cpu() -> bool:
|
|
14
|
+
return platform.machine() in ["arm64", "aarch64"]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# If Mac computer with Apple Silicon architecture, check if macOS version is 13.5 or above
|
|
18
|
+
def is_mac_os_version_supported(min_version: str = "13.5") -> bool:
|
|
19
|
+
if not is_mac_environment() or not is_apple_silicon_cpu():
|
|
20
|
+
return False
|
|
21
|
+
mac_version = platform.mac_ver()[0]
|
|
22
|
+
if not mac_version:
|
|
23
|
+
return False
|
|
24
|
+
# print("Mac OS Version:", mac_version)
|
|
25
|
+
return version.parse(mac_version) >= version.parse(min_version)
|
|
26
|
+
|
|
27
|
+
if __name__ == "__main__":
|
|
28
|
+
print("Is Mac Environment:", is_mac_environment())
|
|
29
|
+
print("Is Apple Silicon CPU:", is_apple_silicon_cpu())
|
|
30
|
+
print("Is Mac OS Version Supported (>=13.5):", is_mac_os_version_supported())
|
mineru/utils/llm_aided.py
CHANGED
|
@@ -84,16 +84,21 @@ Corrected title list:
|
|
|
84
84
|
max_retries = 3
|
|
85
85
|
dict_completion = None
|
|
86
86
|
|
|
87
|
+
# Build API call parameters
|
|
88
|
+
api_params = {
|
|
89
|
+
"model": title_aided_config["model"],
|
|
90
|
+
"messages": [{'role': 'user', 'content': title_optimize_prompt}],
|
|
91
|
+
"temperature": 0.7,
|
|
92
|
+
"stream": True,
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
# Only add extra_body when explicitly specified in config
|
|
96
|
+
if "enable_thinking" in title_aided_config:
|
|
97
|
+
api_params["extra_body"] = {"enable_thinking": title_aided_config["enable_thinking"]}
|
|
98
|
+
|
|
87
99
|
while retry_count < max_retries:
|
|
88
100
|
try:
|
|
89
|
-
completion = client.chat.completions.create(
|
|
90
|
-
model=title_aided_config["model"],
|
|
91
|
-
messages=[
|
|
92
|
-
{'role': 'user', 'content': title_optimize_prompt}],
|
|
93
|
-
extra_body={"enable_thinking": False},
|
|
94
|
-
temperature=0.7,
|
|
95
|
-
stream=True,
|
|
96
|
-
)
|
|
101
|
+
completion = client.chat.completions.create(**api_params)
|
|
97
102
|
content_pieces = []
|
|
98
103
|
for chunk in completion:
|
|
99
104
|
if chunk.choices and chunk.choices[0].delta.content is not None:
|
mineru/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "2.6.
|
|
1
|
+
__version__ = "2.6.3"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mineru
|
|
3
|
-
Version: 2.6.
|
|
3
|
+
Version: 2.6.3
|
|
4
4
|
Summary: A practical tool for converting PDF to Markdown
|
|
5
5
|
License: AGPL-3.0
|
|
6
6
|
Project-URL: homepage, https://mineru.net/
|
|
@@ -37,7 +37,7 @@ Requires-Dist: scikit-image<1.0.0,>=0.25.0
|
|
|
37
37
|
Requires-Dist: openai<3,>=1.70.0
|
|
38
38
|
Requires-Dist: beautifulsoup4<5,>=4.13.5
|
|
39
39
|
Requires-Dist: magika<0.7.0,>=0.6.2
|
|
40
|
-
Requires-Dist: mineru-vl-utils<1,>=0.1.
|
|
40
|
+
Requires-Dist: mineru-vl-utils<1,>=0.1.15
|
|
41
41
|
Provides-Extra: test
|
|
42
42
|
Requires-Dist: mineru[core]; extra == "test"
|
|
43
43
|
Requires-Dist: pytest; extra == "test"
|
|
@@ -50,6 +50,8 @@ Requires-Dist: transformers<5.0.0,>=4.51.1; extra == "vlm"
|
|
|
50
50
|
Requires-Dist: accelerate>=1.5.1; extra == "vlm"
|
|
51
51
|
Provides-Extra: vllm
|
|
52
52
|
Requires-Dist: vllm<0.12,>=0.10.1.1; extra == "vllm"
|
|
53
|
+
Provides-Extra: mlx
|
|
54
|
+
Requires-Dist: mlx-vlm<0.4,>=0.3.3; extra == "mlx"
|
|
53
55
|
Provides-Extra: pipeline
|
|
54
56
|
Requires-Dist: matplotlib<4,>=3.10; extra == "pipeline"
|
|
55
57
|
Requires-Dist: ultralytics<9,>=8.3.48; extra == "pipeline"
|
|
@@ -76,6 +78,7 @@ Requires-Dist: mineru[vlm]; extra == "core"
|
|
|
76
78
|
Requires-Dist: mineru[pipeline]; extra == "core"
|
|
77
79
|
Requires-Dist: mineru[api]; extra == "core"
|
|
78
80
|
Requires-Dist: mineru[gradio]; extra == "core"
|
|
81
|
+
Requires-Dist: mineru[mlx]; sys_platform == "darwin" and extra == "core"
|
|
79
82
|
Provides-Extra: all
|
|
80
83
|
Requires-Dist: mineru[core]; extra == "all"
|
|
81
84
|
Requires-Dist: mineru[vllm]; extra == "all"
|
|
@@ -127,11 +130,15 @@ Dynamic: license-file
|
|
|
127
130
|
</div>
|
|
128
131
|
|
|
129
132
|
# Changelog
|
|
130
|
-
- 2025/10/
|
|
133
|
+
- 2025/10/31 2.6.3 Release
|
|
134
|
+
- Added support for a new backend `vlm-mlx-engine`, enabling MLX-accelerated inference for the MinerU2.5 model on Apple Silicon devices. Compared to the `vlm-transformers` backend, `vlm-mlx-engine` delivers a 100%–200% speed improvement.
|
|
135
|
+
- Bug fixes: #3849, #3859
|
|
136
|
+
|
|
137
|
+
- 2025/10/24 2.6.2 Release
|
|
131
138
|
- `pipeline` backend optimizations
|
|
132
139
|
- Added experimental support for Chinese formulas, which can be enabled by setting the environment variable `export MINERU_FORMULA_CH_SUPPORT=1`. This feature may cause a slight decrease in MFR speed and failures in recognizing some long formulas. It is recommended to enable it only when parsing Chinese formulas is needed. To disable this feature, set the environment variable to `0`.
|
|
133
140
|
- `OCR` speed significantly improved by 200%~300%, thanks to the optimization solution provided by [@cjsdurj](https://github.com/cjsdurj)
|
|
134
|
-
- `OCR` models
|
|
141
|
+
- `OCR` models optimized for improved accuracy and coverage of Latin script recognition, and updated Cyrillic, Arabic, Devanagari, Telugu (te), and Tamil (ta) language systems to `ppocr-v5` version, with accuracy improved by over 40% compared to previous models
|
|
135
142
|
- `vlm` backend optimizations
|
|
136
143
|
- `table_caption` and `table_footnote` matching logic optimized to improve the accuracy of table caption and footnote matching and reading order rationality in scenarios with multiple consecutive tables on a page
|
|
137
144
|
- Optimized CPU resource usage during high concurrency when using `vllm` backend, reducing server pressure
|
|
@@ -666,7 +673,7 @@ https://github.com/user-attachments/assets/4bea02c9-6d54-4cd6-97ed-dff14340982c
|
|
|
666
673
|
- Automatically recognize and convert formulas in the document to LaTeX format.
|
|
667
674
|
- Automatically recognize and convert tables in the document to HTML format.
|
|
668
675
|
- Automatically detect scanned PDFs and garbled PDFs and enable OCR functionality.
|
|
669
|
-
- OCR supports detection and recognition of
|
|
676
|
+
- OCR supports detection and recognition of 109 languages.
|
|
670
677
|
- Supports multiple output formats, such as multimodal and NLP Markdown, JSON sorted by reading order, and rich intermediate formats.
|
|
671
678
|
- Supports various visualization results, including layout visualization and span visualization, for efficient confirmation of output quality.
|
|
672
679
|
- Supports running in a pure CPU environment, and also supports GPU(CUDA)/NPU(CANN)/MPS acceleration
|
|
@@ -703,41 +710,70 @@ A WebUI developed based on Gradio, with a simple interface and only core parsing
|
|
|
703
710
|
> In non-mainline environments, due to the diversity of hardware and software configurations, as well as third-party dependency compatibility issues, we cannot guarantee 100% project availability. Therefore, for users who wish to use this project in non-recommended environments, we suggest carefully reading the documentation and FAQ first. Most issues already have corresponding solutions in the FAQ. We also encourage community feedback to help us gradually expand support.
|
|
704
711
|
|
|
705
712
|
<table>
|
|
706
|
-
<
|
|
707
|
-
<
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
<
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
713
|
+
<thead>
|
|
714
|
+
<tr>
|
|
715
|
+
<th rowspan="2">Parsing Backend</th>
|
|
716
|
+
<th rowspan="2">pipeline <br> (Accuracy<sup>1</sup> 82+)</th>
|
|
717
|
+
<th colspan="4">vlm (Accuracy<sup>1</sup> 90+)</th>
|
|
718
|
+
</tr>
|
|
719
|
+
<tr>
|
|
720
|
+
<th>transformers</th>
|
|
721
|
+
<th>mlx-engine</th>
|
|
722
|
+
<th>vllm-engine / <br>vllm-async-engine</th>
|
|
723
|
+
<th>http-client</th>
|
|
724
|
+
</tr>
|
|
725
|
+
</thead>
|
|
726
|
+
<tbody>
|
|
727
|
+
<tr>
|
|
728
|
+
<th>Backend Features</th>
|
|
729
|
+
<td>Fast, no hallucinations</td>
|
|
730
|
+
<td>Good compatibility, <br>but slower</td>
|
|
731
|
+
<td>Faster than transformers</td>
|
|
732
|
+
<td>Fast, compatible with the vLLM ecosystem</td>
|
|
733
|
+
<td>Suitable for OpenAI-compatible servers<sup>5</sup></td>
|
|
734
|
+
</tr>
|
|
735
|
+
<tr>
|
|
736
|
+
<th>Operating System</th>
|
|
737
|
+
<td colspan="2" style="text-align:center;">Linux<sup>2</sup> / Windows / macOS</td>
|
|
738
|
+
<td style="text-align:center;">macOS<sup>3</sup></td>
|
|
739
|
+
<td style="text-align:center;">Linux<sup>2</sup> / Windows<sup>4</sup> </td>
|
|
740
|
+
<td>Any</td>
|
|
741
|
+
</tr>
|
|
742
|
+
<tr>
|
|
743
|
+
<th>CPU inference support</th>
|
|
744
|
+
<td colspan="2" style="text-align:center;">✅</td>
|
|
745
|
+
<td colspan="2" style="text-align:center;">❌</td>
|
|
746
|
+
<td>Not required</td>
|
|
747
|
+
</tr>
|
|
748
|
+
<tr>
|
|
749
|
+
<th>GPU Requirements</th><td colspan="2" style="text-align:center;">Volta or later architectures, 6 GB VRAM or more, or Apple Silicon</td>
|
|
750
|
+
<td>Apple Silicon</td>
|
|
751
|
+
<td>Volta or later architectures, 8 GB VRAM or more</td>
|
|
752
|
+
<td>Not required</td>
|
|
753
|
+
</tr>
|
|
754
|
+
<tr>
|
|
755
|
+
<th>Memory Requirements</th>
|
|
756
|
+
<td colspan="4" style="text-align:center;">Minimum 16 GB, 32 GB recommended</td>
|
|
757
|
+
<td>8 GB</td>
|
|
758
|
+
</tr>
|
|
759
|
+
<tr>
|
|
760
|
+
<th>Disk Space Requirements</th>
|
|
761
|
+
<td colspan="4" style="text-align:center;">20 GB or more, SSD recommended</td>
|
|
762
|
+
<td>2 GB</td>
|
|
763
|
+
</tr>
|
|
764
|
+
<tr>
|
|
765
|
+
<th>Python Version</th>
|
|
766
|
+
<td colspan="5" style="text-align:center;">3.10-3.13</td>
|
|
767
|
+
</tr>
|
|
768
|
+
</tbody>
|
|
740
769
|
</table>
|
|
770
|
+
|
|
771
|
+
<sup>1</sup> Accuracy metric is the End-to-End Evaluation Overall score of OmniDocBench (v1.5), tested on the latest `MinerU` version.
|
|
772
|
+
<sup>2</sup> Linux supports only distributions released in 2019 or later.
|
|
773
|
+
<sup>3</sup> MLX requires macOS 13.5 or later, recommended for use with version 14.0 or higher.
|
|
774
|
+
<sup>4</sup> Windows vLLM support via WSL2(Windows Subsystem for Linux).
|
|
775
|
+
<sup>5</sup> Servers compatible with the OpenAI API, such as local or remote model services deployed via inference frameworks like `vLLM`, `SGLang`, or `LMDeploy`.
|
|
776
|
+
|
|
741
777
|
|
|
742
778
|
### Install MinerU
|
|
743
779
|
|
|
@@ -1,28 +1,28 @@
|
|
|
1
1
|
mineru/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
2
|
-
mineru/version.py,sha256=
|
|
2
|
+
mineru/version.py,sha256=uJ6TLK18jhCrL0aclBja7NzlAGLAyZjVpX-gq3d461k,22
|
|
3
3
|
mineru/backend/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
4
4
|
mineru/backend/utils.py,sha256=GLJU3IznDmhE1_qNmkU1UOtsuskIHBezgsEVO6Uar-Y,698
|
|
5
5
|
mineru/backend/pipeline/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
6
|
-
mineru/backend/pipeline/batch_analyze.py,sha256=
|
|
7
|
-
mineru/backend/pipeline/model_init.py,sha256=
|
|
8
|
-
mineru/backend/pipeline/model_json_to_middle_json.py,sha256=
|
|
6
|
+
mineru/backend/pipeline/batch_analyze.py,sha256=gnilKhFlMe8-55X2PJnb-ZSVeZIS-5DxIbMpHnwLne8,20889
|
|
7
|
+
mineru/backend/pipeline/model_init.py,sha256=OAylOcQD9gu5TBcX7nMt7X5NpJMtQICI5IvEQ648lpI,9358
|
|
8
|
+
mineru/backend/pipeline/model_json_to_middle_json.py,sha256=reXkUR_wKmJD64d7vRNXMxFviwkzDlGjRshpdwsVquI,10951
|
|
9
9
|
mineru/backend/pipeline/model_list.py,sha256=7cXMBfZrP0K6qWueg1D_-WoUANeSINzkn_ic9E7YQLs,222
|
|
10
10
|
mineru/backend/pipeline/para_split.py,sha256=Kq95MmvkPm7rKxlCSGiTvVKyF7CErHI2eGGAs5sLl0Q,17119
|
|
11
11
|
mineru/backend/pipeline/pipeline_analyze.py,sha256=rbO5AetOdnxR5ctkoDzFCFoElkz7Jgb7gi2Ct596NK8,6655
|
|
12
12
|
mineru/backend/pipeline/pipeline_magic_model.py,sha256=w8jGx8f6yZN0Wf2yPP3L9rYKc9rogxreZCrUJzJvPO8,14974
|
|
13
|
-
mineru/backend/pipeline/pipeline_middle_json_mkcontent.py,sha256=
|
|
13
|
+
mineru/backend/pipeline/pipeline_middle_json_mkcontent.py,sha256=YlnEbbUnkniZXS13aLo5mjfFQvQM5SrIVvTAGBZsLmw,14478
|
|
14
14
|
mineru/backend/vlm/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
15
15
|
mineru/backend/vlm/model_output_to_middle_json.py,sha256=AqYX44gS9crUO_t7SuUatD71EVjow6pI6yA2Ik3gQ0s,5139
|
|
16
16
|
mineru/backend/vlm/utils.py,sha256=woGqyRI4S7p69daLCU07XNXWTV27aLf7YBjjVH1x-5o,2794
|
|
17
|
-
mineru/backend/vlm/vlm_analyze.py,sha256=
|
|
17
|
+
mineru/backend/vlm/vlm_analyze.py,sha256=7c5_JN1F9YTDNNgA_Rmw6xX1PI7gcIT4A4ujtGQHH9Q,8792
|
|
18
18
|
mineru/backend/vlm/vlm_magic_model.py,sha256=Pd0sOr7G1crAJIVeq6h_03gNSuxmV5U8dvGTGT_rrjs,23452
|
|
19
|
-
mineru/backend/vlm/vlm_middle_json_mkcontent.py,sha256=
|
|
19
|
+
mineru/backend/vlm/vlm_middle_json_mkcontent.py,sha256=5V-AU9KkxxMn0DDSQBrb15I4GVpEyiQy8uNI_tQhS6M,13498
|
|
20
20
|
mineru/cli/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
21
|
-
mineru/cli/client.py,sha256=
|
|
21
|
+
mineru/cli/client.py,sha256=ArnoT2psOQRnTqLpsFwPaoi-l444iIVkbBn90Pm16n8,6915
|
|
22
22
|
mineru/cli/common.py,sha256=jxFJMdc-02UMO3SXAtcZ6aIdPrakAE6DCccZ9kDlPKc,14276
|
|
23
23
|
mineru/cli/fast_api.py,sha256=t5bda769VbM5iokAboiJfPIOnm-r5GTFReE-KQy8L3g,10941
|
|
24
|
-
mineru/cli/gradio_app.py,sha256=
|
|
25
|
-
mineru/cli/models_download.py,sha256=
|
|
24
|
+
mineru/cli/gradio_app.py,sha256=6dA0ARpdOoewFeXmHrleF1amCgBV9ilY_nkWAmAmN8A,14731
|
|
25
|
+
mineru/cli/models_download.py,sha256=LNfoIpUlJM7m7qb2SiCxtjMDw4jILBQtZwNP2JoY81U,4815
|
|
26
26
|
mineru/cli/vlm_vllm_server.py,sha256=fQJyD-gIPQ41hR_6aIaDJczl66N310t0CiZEBAfX5mc,90
|
|
27
27
|
mineru/data/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
28
28
|
mineru/data/data_reader_writer/__init__.py,sha256=9qnGNrsuGBMwwfsQy6oChdkz--a_LPdYWE0VZZr0yr4,490
|
|
@@ -62,8 +62,7 @@ mineru/model/mfr/unimernet/unimernet_hf/unimer_swin/configuration_unimer_swin.py
|
|
|
62
62
|
mineru/model/mfr/unimernet/unimernet_hf/unimer_swin/image_processing_unimer_swin.py,sha256=a9kCvwzJJSRrKQNtW2oOpTwrapzep8BjGFWLhLF1T0k,6036
|
|
63
63
|
mineru/model/mfr/unimernet/unimernet_hf/unimer_swin/modeling_unimer_swin.py,sha256=Q_fdmFHUBtEoAfWp9aowdwTCE2MIFMOPbYjoSyXK2iU,48929
|
|
64
64
|
mineru/model/ocr/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
65
|
-
mineru/model/ocr/
|
|
66
|
-
mineru/model/ocr/paddleocr2pytorch/pytorch_paddle.py,sha256=wZOw82q1NARNHBW2Lk5zumjdAqzPZqnhV6rvMULvLs8,9207
|
|
65
|
+
mineru/model/ocr/pytorch_paddle.py,sha256=cHMTl5sKyn4BY2207-7GQ4eZl9BQUcs5ucxw_NFezII,9200
|
|
67
66
|
mineru/model/ori_cls/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
68
67
|
mineru/model/ori_cls/paddle_ori_cls.py,sha256=VIS22IerHST7g60AC9r2PEQIG6NQWeQaH1OrXIxNTsg,11943
|
|
69
68
|
mineru/model/reading_order/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
@@ -72,7 +71,7 @@ mineru/model/reading_order/xycut.py,sha256=ezNSq_Y4UXiztB58hbXJsjTJlOBqWIjuW5A2u
|
|
|
72
71
|
mineru/model/table/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
73
72
|
mineru/model/table/cls/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
74
73
|
mineru/model/table/cls/paddle_table_cls.py,sha256=5PtieKQnAzgMNRTZFgnqQsGWKTEQ3yyFWQnBRIjfQ4A,5781
|
|
75
|
-
mineru/model/table/rec/RapidTable.py,sha256=
|
|
74
|
+
mineru/model/table/rec/RapidTable.py,sha256=2dNdGJsVdsGfRm6r3deERUMst5RIxH0YuiGALkQbNTw,5955
|
|
76
75
|
mineru/model/table/rec/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
77
76
|
mineru/model/table/rec/slanet_plus/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
78
77
|
mineru/model/table/rec/slanet_plus/main.py,sha256=vfrcvQ9JBf32YZU9eNoetoqdpcrFNsA1WNqQBsG8i2o,7646
|
|
@@ -122,19 +121,10 @@ mineru/model/utils/pytorchocr/postprocess/cls_postprocess.py,sha256=1VVWXT_b1vhG
|
|
|
122
121
|
mineru/model/utils/pytorchocr/postprocess/db_postprocess.py,sha256=AdZPF7frhQ27VVdp0GFmMcXtivwDZZfXYhzJOlP4zUs,6483
|
|
123
122
|
mineru/model/utils/pytorchocr/postprocess/rec_postprocess.py,sha256=qGB3onFEFhHjqksIR1IKOx2EY98ewfsmjADjrRXg30Y,30552
|
|
124
123
|
mineru/model/utils/pytorchocr/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
125
|
-
mineru/model/utils/pytorchocr/utils/resources/arch_config.yaml,sha256=
|
|
126
|
-
mineru/model/utils/pytorchocr/utils/resources/models_config.yml,sha256=
|
|
124
|
+
mineru/model/utils/pytorchocr/utils/resources/arch_config.yaml,sha256=BvaXsHXXVyxYOFYjkt7HtyZWiEyLrvHsHsJYfQZShDY,9095
|
|
125
|
+
mineru/model/utils/pytorchocr/utils/resources/models_config.yml,sha256=QIuqeu91pfuqya4JbxmZQSkOYke-h2Bz21O2pOLQPYc,1944
|
|
127
126
|
mineru/model/utils/pytorchocr/utils/resources/pp_formulanet_arch_config.yaml,sha256=a7yueOTUrfpZo8CsK6vQokbLNB2J-P77ihaCh_LozvQ,507
|
|
128
|
-
mineru/model/utils/pytorchocr/utils/resources/dict/arabic_dict.txt,sha256=xbaXD14RWk0Vpc7fAHpephuszp1j-Qi3IWC4VrFKu70,407
|
|
129
|
-
mineru/model/utils/pytorchocr/utils/resources/dict/chinese_cht_dict.txt,sha256=gyVR_uHy-8l1CHctgevcjboSwA3pejXHHJ3fQ92sGoM,33443
|
|
130
|
-
mineru/model/utils/pytorchocr/utils/resources/dict/cyrillic_dict.txt,sha256=NpqCxsjEeXhKXXJkSLg7Hq-1_vCkEppeqjkpYl3c0TI,410
|
|
131
|
-
mineru/model/utils/pytorchocr/utils/resources/dict/devanagari_dict.txt,sha256=tfG-bYu_8aGfuWxdTKlqQjOAI0u30s4OB7WDittNGOo,508
|
|
132
|
-
mineru/model/utils/pytorchocr/utils/resources/dict/en_dict.txt,sha256=VmLfnS0D8OjKDTsGSdasurkEtqFLPTUhRjxxw3xmjOM,190
|
|
133
|
-
mineru/model/utils/pytorchocr/utils/resources/dict/japan_dict.txt,sha256=Hc_LQe7JBXapRbMITyKt4RztUG4k8Uh5JFsHFpjzCOg,17332
|
|
134
127
|
mineru/model/utils/pytorchocr/utils/resources/dict/ka_dict.txt,sha256=-tP3ZZQyde7CE0pvvJtSeFQmZBEE1OfbOhWdxz80Hd4,452
|
|
135
|
-
mineru/model/utils/pytorchocr/utils/resources/dict/korean_dict.txt,sha256=qh_ciuj3zUCg7E7bRy6wQh4RQn5sz-6ZFUQHQsGLCiA,14480
|
|
136
|
-
mineru/model/utils/pytorchocr/utils/resources/dict/latin_dict.txt,sha256=jm1ONil4jDXDH35TAofWFHtUm7eiZb1nCLsoETRCniw,468
|
|
137
|
-
mineru/model/utils/pytorchocr/utils/resources/dict/ppocr_keys_v1.txt,sha256=KLI2KtSrLcOHaapy_rU146nds_0qdYWgWSDmOTsdx_c,26249
|
|
138
128
|
mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv4_doc_dict.txt,sha256=pbw4h8Q8kB5aP5exP_rfHFdU7efMjJ9aviLodafEg3I,62346
|
|
139
129
|
mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_arabic_dict.txt,sha256=f5L327m3WkeHqDv7T20UqKtRVSUTDJ1AqQNvYc9pmek,2369
|
|
140
130
|
mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_cyrillic_dict.txt,sha256=20CqUs6xEgVb6AxpSv32VdXSxPeHNwRSTMFqRHypE7o,2781
|
|
@@ -144,12 +134,10 @@ mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_el_dict.txt,sha256=Md
|
|
|
144
134
|
mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_en_dict.txt,sha256=4CWmbTHzJ7oMIy4D9AeujRBeHnCefMs_QIqneMJOcNY,1416
|
|
145
135
|
mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_eslav_dict.txt,sha256=PpXxWBVXFihwys26WvkaTGviiQcQ05Www8dXjn7l5us,1663
|
|
146
136
|
mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_korean_dict.txt,sha256=qIBxxowBcHSJuqeevgQFt761zKIp9PyUzD75kjKIAtc,47451
|
|
147
|
-
mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_latin_dict.txt,sha256=
|
|
137
|
+
mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_latin_dict.txt,sha256=zLzEVzCz-72QUMW8dNtqmQZxQe8QNePRSImoSmubGv8,2616
|
|
148
138
|
mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_ta_dict.txt,sha256=hbVBNSrhjca6bUcVLYv4rf9rAmbmBdLu8pkMG_RmEXs,1723
|
|
149
139
|
mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_te_dict.txt,sha256=Qvg_XT_bUHeOT6W2bFjZmlmrd5IVHF5080uP_XthydY,1831
|
|
150
140
|
mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_th_dict.txt,sha256=V_VAb5S7Zoj7cHf3vmXwi71xzs9IwB6ibFIstcSDa3o,1767
|
|
151
|
-
mineru/model/utils/pytorchocr/utils/resources/dict/ta_dict.txt,sha256=6T5pSBSv2f8ekYtvS7Qmf7TGWpNE7l10ZPkTW5DAonA,352
|
|
152
|
-
mineru/model/utils/pytorchocr/utils/resources/dict/te_dict.txt,sha256=7plGpg13AZd0dOiYg2lKTKIOqjhoojM0v3lA3NAI8Pk,429
|
|
153
141
|
mineru/model/utils/tools/__init__.py,sha256=xEqR65Z8YOzOLorLjK0LCHos2zX-tCuxSrxndjU00hE,49
|
|
154
142
|
mineru/model/utils/tools/infer/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
155
143
|
mineru/model/utils/tools/infer/predict_cls.py,sha256=8RmKl1vejnZl65caHZNV2ta6hMsg5B_LE-FuqCO8T8A,4225
|
|
@@ -163,8 +151,9 @@ mineru/resources/header.html,sha256=PUselBXLBn8gfeP3zwEtj6zIxfhcCN4vN_B796nQFNQ,
|
|
|
163
151
|
mineru/resources/fasttext-langdetect/lid.176.ftz,sha256=jzRyz-hzintgmejpmcPL-uDc0VaWqsfXc4qAOdtgPoM,938013
|
|
164
152
|
mineru/utils/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
165
153
|
mineru/utils/block_pre_proc.py,sha256=uGBmxf2MR9bplTnQI8xHjCI-kj3plRhJr0hcWKidbOQ,9632
|
|
166
|
-
mineru/utils/block_sort.py,sha256=
|
|
154
|
+
mineru/utils/block_sort.py,sha256=5e1mOLB3W7xu5Y1hmhvGSHPL_aQ41R_4VXcP4vjYAOU,12976
|
|
167
155
|
mineru/utils/boxbase.py,sha256=moP660AmZq_udHEsfvFkTQdJ4gjrrBwN7t0Enx7CIL8,6903
|
|
156
|
+
mineru/utils/check_mac_env.py,sha256=pbmbcnS60zZRqNRBxFJbbPrCosU1lC464b0v6JUlgaE,1031
|
|
168
157
|
mineru/utils/cli_parser.py,sha256=4seFAu1kulsYnw6WM2q_cxgEOt2tErZVkI-LNEF_kGw,1445
|
|
169
158
|
mineru/utils/config_reader.py,sha256=IRVWTpBnbnRpck6eXZUKw-fcLt7hon5S4uqWW-RBb1w,4075
|
|
170
159
|
mineru/utils/cut_image.py,sha256=g3m4nfcJNWlxi-P0kpXTtlmspXkMcLCfGwmYuQ-Z2hE,751
|
|
@@ -174,7 +163,7 @@ mineru/utils/format_utils.py,sha256=2s89vHcSISjuolk8Hvg3K-5-rRbiT3Us7eFLzUKrNKs,
|
|
|
174
163
|
mineru/utils/guess_suffix_or_lang.py,sha256=nznyQpUn1BSA8JNw9HuG3pVV-xtVAtrtcGuHZ-VXt9M,856
|
|
175
164
|
mineru/utils/hash_utils.py,sha256=UPS_8NRBmVumdyOv16Lmv6Ly2xK8OVDJEe5gG6gKIFk,857
|
|
176
165
|
mineru/utils/language.py,sha256=7RT3mxSa7jdpoC5ySd7ZddHA7TO7UsnmDOWiYZAxuyg,1433
|
|
177
|
-
mineru/utils/llm_aided.py,sha256=
|
|
166
|
+
mineru/utils/llm_aided.py,sha256=9WUytvxenSAuaWR4sTQhVPQ5h8pY0wVOH1O2sj_6dLs,5149
|
|
178
167
|
mineru/utils/magic_model_utils.py,sha256=2xOvi4oqg3MSw1FUrJTnYDtWeFrrm6qbmlEorLZSaYs,5650
|
|
179
168
|
mineru/utils/model_utils.py,sha256=6OsgFLsABX5JuShSzCMSNHWV-yi-1cjwHweafyxIgRo,18448
|
|
180
169
|
mineru/utils/models_download_utils.py,sha256=UfjvwhxO6BkJHa5JSpEVNZ71GoLMPMmJpym3THET2T4,2957
|
|
@@ -187,9 +176,9 @@ mineru/utils/run_async.py,sha256=rPeP4BCZerR8VByRDhiYzfZiahLVqoZEBVAS54dAjNg,128
|
|
|
187
176
|
mineru/utils/span_block_fix.py,sha256=0eVQjJCrT03woRt9hoh6Uu42Tp1dacfGTv2x3B9qq94,8797
|
|
188
177
|
mineru/utils/span_pre_proc.py,sha256=h41q2uQajI0xQbc_30hqaju1dv3oVYxBAlKgURl8HIc,13692
|
|
189
178
|
mineru/utils/table_merge.py,sha256=d98zNbM1ZQ8V1kUt6RugParNUNPv7DGL-XKIzR3iJVQ,15360
|
|
190
|
-
mineru-2.6.
|
|
191
|
-
mineru-2.6.
|
|
192
|
-
mineru-2.6.
|
|
193
|
-
mineru-2.6.
|
|
194
|
-
mineru-2.6.
|
|
195
|
-
mineru-2.6.
|
|
179
|
+
mineru-2.6.3.dist-info/licenses/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
|
|
180
|
+
mineru-2.6.3.dist-info/METADATA,sha256=mUBTxFG5tgdyQ1caZVRNrk4MhIX36PECY09dwCNYXZ4,70689
|
|
181
|
+
mineru-2.6.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
182
|
+
mineru-2.6.3.dist-info/entry_points.txt,sha256=luXmbhPiZK_tKlRgWuYOaW_V6EFpG-yJcAevVv9MEqE,252
|
|
183
|
+
mineru-2.6.3.dist-info/top_level.txt,sha256=zuGQfZcbsHv4I4oKI9gaKPqEWBFm6xJroKuug2LnKP8,7
|
|
184
|
+
mineru-2.6.3.dist-info/RECORD,,
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
# Copyright (c) Opendatalab. All rights reserved.
|
|
@@ -1,162 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
!
|
|
3
|
-
#
|
|
4
|
-
$
|
|
5
|
-
%
|
|
6
|
-
&
|
|
7
|
-
'
|
|
8
|
-
(
|
|
9
|
-
+
|
|
10
|
-
,
|
|
11
|
-
-
|
|
12
|
-
.
|
|
13
|
-
/
|
|
14
|
-
0
|
|
15
|
-
1
|
|
16
|
-
2
|
|
17
|
-
3
|
|
18
|
-
4
|
|
19
|
-
5
|
|
20
|
-
6
|
|
21
|
-
7
|
|
22
|
-
8
|
|
23
|
-
9
|
|
24
|
-
:
|
|
25
|
-
?
|
|
26
|
-
@
|
|
27
|
-
A
|
|
28
|
-
B
|
|
29
|
-
C
|
|
30
|
-
D
|
|
31
|
-
E
|
|
32
|
-
F
|
|
33
|
-
G
|
|
34
|
-
H
|
|
35
|
-
I
|
|
36
|
-
J
|
|
37
|
-
K
|
|
38
|
-
L
|
|
39
|
-
M
|
|
40
|
-
N
|
|
41
|
-
O
|
|
42
|
-
P
|
|
43
|
-
Q
|
|
44
|
-
R
|
|
45
|
-
S
|
|
46
|
-
T
|
|
47
|
-
U
|
|
48
|
-
V
|
|
49
|
-
W
|
|
50
|
-
X
|
|
51
|
-
Y
|
|
52
|
-
Z
|
|
53
|
-
_
|
|
54
|
-
a
|
|
55
|
-
b
|
|
56
|
-
c
|
|
57
|
-
d
|
|
58
|
-
e
|
|
59
|
-
f
|
|
60
|
-
g
|
|
61
|
-
h
|
|
62
|
-
i
|
|
63
|
-
j
|
|
64
|
-
k
|
|
65
|
-
l
|
|
66
|
-
m
|
|
67
|
-
n
|
|
68
|
-
o
|
|
69
|
-
p
|
|
70
|
-
q
|
|
71
|
-
r
|
|
72
|
-
s
|
|
73
|
-
t
|
|
74
|
-
u
|
|
75
|
-
v
|
|
76
|
-
w
|
|
77
|
-
x
|
|
78
|
-
y
|
|
79
|
-
z
|
|
80
|
-
É
|
|
81
|
-
é
|
|
82
|
-
ء
|
|
83
|
-
آ
|
|
84
|
-
أ
|
|
85
|
-
ؤ
|
|
86
|
-
إ
|
|
87
|
-
ئ
|
|
88
|
-
ا
|
|
89
|
-
ب
|
|
90
|
-
ة
|
|
91
|
-
ت
|
|
92
|
-
ث
|
|
93
|
-
ج
|
|
94
|
-
ح
|
|
95
|
-
خ
|
|
96
|
-
د
|
|
97
|
-
ذ
|
|
98
|
-
ر
|
|
99
|
-
ز
|
|
100
|
-
س
|
|
101
|
-
ش
|
|
102
|
-
ص
|
|
103
|
-
ض
|
|
104
|
-
ط
|
|
105
|
-
ظ
|
|
106
|
-
ع
|
|
107
|
-
غ
|
|
108
|
-
ف
|
|
109
|
-
ق
|
|
110
|
-
ك
|
|
111
|
-
ل
|
|
112
|
-
م
|
|
113
|
-
ن
|
|
114
|
-
ه
|
|
115
|
-
و
|
|
116
|
-
ى
|
|
117
|
-
ي
|
|
118
|
-
ً
|
|
119
|
-
ٌ
|
|
120
|
-
ٍ
|
|
121
|
-
َ
|
|
122
|
-
ُ
|
|
123
|
-
ِ
|
|
124
|
-
ّ
|
|
125
|
-
ْ
|
|
126
|
-
ٓ
|
|
127
|
-
ٔ
|
|
128
|
-
ٰ
|
|
129
|
-
ٱ
|
|
130
|
-
ٹ
|
|
131
|
-
پ
|
|
132
|
-
چ
|
|
133
|
-
ڈ
|
|
134
|
-
ڑ
|
|
135
|
-
ژ
|
|
136
|
-
ک
|
|
137
|
-
ڭ
|
|
138
|
-
گ
|
|
139
|
-
ں
|
|
140
|
-
ھ
|
|
141
|
-
ۀ
|
|
142
|
-
ہ
|
|
143
|
-
ۂ
|
|
144
|
-
ۃ
|
|
145
|
-
ۆ
|
|
146
|
-
ۇ
|
|
147
|
-
ۈ
|
|
148
|
-
ۋ
|
|
149
|
-
ی
|
|
150
|
-
ې
|
|
151
|
-
ے
|
|
152
|
-
ۓ
|
|
153
|
-
ە
|
|
154
|
-
١
|
|
155
|
-
٢
|
|
156
|
-
٣
|
|
157
|
-
٤
|
|
158
|
-
٥
|
|
159
|
-
٦
|
|
160
|
-
٧
|
|
161
|
-
٨
|
|
162
|
-
٩
|