mineru 2.6.1__py3-none-any.whl → 2.6.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. mineru/backend/pipeline/batch_analyze.py +20 -43
  2. mineru/backend/pipeline/model_init.py +1 -1
  3. mineru/backend/pipeline/model_json_to_middle_json.py +1 -1
  4. mineru/backend/pipeline/pipeline_middle_json_mkcontent.py +17 -4
  5. mineru/backend/vlm/vlm_analyze.py +11 -1
  6. mineru/backend/vlm/vlm_middle_json_mkcontent.py +6 -3
  7. mineru/cli/client.py +24 -14
  8. mineru/cli/gradio_app.py +4 -1
  9. mineru/cli/models_download.py +1 -1
  10. mineru/model/ocr/{paddleocr2pytorch/pytorch_paddle.py → pytorch_paddle.py} +1 -1
  11. mineru/model/table/rec/RapidTable.py +1 -1
  12. mineru/model/utils/pytorchocr/utils/resources/arch_config.yaml +1 -317
  13. mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_latin_dict.txt +545 -211
  14. mineru/model/utils/pytorchocr/utils/resources/models_config.yml +0 -12
  15. mineru/utils/block_sort.py +3 -2
  16. mineru/utils/check_mac_env.py +30 -0
  17. mineru/utils/llm_aided.py +13 -8
  18. mineru/version.py +1 -1
  19. {mineru-2.6.1.dist-info → mineru-2.6.3.dist-info}/METADATA +75 -39
  20. {mineru-2.6.1.dist-info → mineru-2.6.3.dist-info}/RECORD +24 -35
  21. mineru/model/ocr/paddleocr2pytorch/__init__.py +0 -1
  22. mineru/model/utils/pytorchocr/utils/resources/dict/arabic_dict.txt +0 -162
  23. mineru/model/utils/pytorchocr/utils/resources/dict/chinese_cht_dict.txt +0 -8421
  24. mineru/model/utils/pytorchocr/utils/resources/dict/cyrillic_dict.txt +0 -163
  25. mineru/model/utils/pytorchocr/utils/resources/dict/devanagari_dict.txt +0 -167
  26. mineru/model/utils/pytorchocr/utils/resources/dict/en_dict.txt +0 -95
  27. mineru/model/utils/pytorchocr/utils/resources/dict/japan_dict.txt +0 -4399
  28. mineru/model/utils/pytorchocr/utils/resources/dict/korean_dict.txt +0 -3688
  29. mineru/model/utils/pytorchocr/utils/resources/dict/latin_dict.txt +0 -185
  30. mineru/model/utils/pytorchocr/utils/resources/dict/ppocr_keys_v1.txt +0 -6623
  31. mineru/model/utils/pytorchocr/utils/resources/dict/ta_dict.txt +0 -128
  32. mineru/model/utils/pytorchocr/utils/resources/dict/te_dict.txt +0 -151
  33. {mineru-2.6.1.dist-info → mineru-2.6.3.dist-info}/WHEEL +0 -0
  34. {mineru-2.6.1.dist-info → mineru-2.6.3.dist-info}/entry_points.txt +0 -0
  35. {mineru-2.6.1.dist-info → mineru-2.6.3.dist-info}/licenses/LICENSE.md +0 -0
  36. {mineru-2.6.1.dist-info → mineru-2.6.3.dist-info}/top_level.txt +0 -0
@@ -3,26 +3,14 @@ lang:
3
3
  det: ch_PP-OCRv5_det_infer.pth
4
4
  rec: ch_PP-OCRv5_rec_infer.pth
5
5
  dict: ppocrv5_dict.txt
6
- ch_lite_v4:
7
- det: ch_PP-OCRv5_det_infer.pth
8
- rec: ch_PP-OCRv4_rec_infer.pth
9
- dict: ppocr_keys_v1.txt
10
6
  ch_server:
11
7
  det: ch_PP-OCRv5_det_infer.pth
12
8
  rec: ch_PP-OCRv5_rec_server_infer.pth
13
9
  dict: ppocrv5_dict.txt
14
- ch_server_v4:
15
- det: ch_PP-OCRv5_det_infer.pth
16
- rec: ch_PP-OCRv4_rec_server_infer.pth
17
- dict: ppocr_keys_v1.txt
18
10
  ch:
19
11
  det: ch_PP-OCRv5_det_infer.pth
20
12
  rec: ch_PP-OCRv4_rec_server_doc_infer.pth
21
13
  dict: ppocrv4_doc_dict.txt
22
- en_v4:
23
- det: en_PP-OCRv3_det_infer.pth
24
- rec: en_PP-OCRv4_rec_infer.pth
25
- dict: en_dict.txt
26
14
  korean:
27
15
  det: ch_PP-OCRv5_det_infer.pth
28
16
  rec: korean_PP-OCRv5_rec_infer.pth
@@ -179,13 +179,14 @@ def insert_lines_into_block(block_bbox, line_height, page_w, page_h):
179
179
  def model_init(model_name: str):
180
180
  from transformers import LayoutLMv3ForTokenClassification
181
181
  device_name = get_device()
182
+ device = torch.device(device_name)
182
183
  bf_16_support = False
183
184
  if device_name.startswith("cuda"):
184
- bf_16_support = torch.cuda.is_bf16_supported()
185
+ if torch.cuda.get_device_properties(device).major >= 8:
186
+ bf_16_support = True
185
187
  elif device_name.startswith("mps"):
186
188
  bf_16_support = True
187
189
 
188
- device = torch.device(device_name)
189
190
  if model_name == 'layoutreader':
190
191
  # 检测modelscope的缓存目录是否存在
191
192
  layoutreader_model_dir = os.path.join(auto_download_and_get_model_root_path(ModelPath.layout_reader), ModelPath.layout_reader)
@@ -0,0 +1,30 @@
1
+ # Copyright (c) Opendatalab. All rights reserved.
2
+ import platform
3
+
4
+ from packaging import version
5
+
6
+
7
+ # Detect if the current environment is a Mac computer
8
+ def is_mac_environment() -> bool:
9
+ return platform.system() == "Darwin"
10
+
11
+
12
+ # Detect if CPU is Apple Silicon architecture
13
+ def is_apple_silicon_cpu() -> bool:
14
+ return platform.machine() in ["arm64", "aarch64"]
15
+
16
+
17
+ # If Mac computer with Apple Silicon architecture, check if macOS version is 13.5 or above
18
+ def is_mac_os_version_supported(min_version: str = "13.5") -> bool:
19
+ if not is_mac_environment() or not is_apple_silicon_cpu():
20
+ return False
21
+ mac_version = platform.mac_ver()[0]
22
+ if not mac_version:
23
+ return False
24
+ # print("Mac OS Version:", mac_version)
25
+ return version.parse(mac_version) >= version.parse(min_version)
26
+
27
+ if __name__ == "__main__":
28
+ print("Is Mac Environment:", is_mac_environment())
29
+ print("Is Apple Silicon CPU:", is_apple_silicon_cpu())
30
+ print("Is Mac OS Version Supported (>=13.5):", is_mac_os_version_supported())
mineru/utils/llm_aided.py CHANGED
@@ -84,16 +84,21 @@ Corrected title list:
84
84
  max_retries = 3
85
85
  dict_completion = None
86
86
 
87
+ # Build API call parameters
88
+ api_params = {
89
+ "model": title_aided_config["model"],
90
+ "messages": [{'role': 'user', 'content': title_optimize_prompt}],
91
+ "temperature": 0.7,
92
+ "stream": True,
93
+ }
94
+
95
+ # Only add extra_body when explicitly specified in config
96
+ if "enable_thinking" in title_aided_config:
97
+ api_params["extra_body"] = {"enable_thinking": title_aided_config["enable_thinking"]}
98
+
87
99
  while retry_count < max_retries:
88
100
  try:
89
- completion = client.chat.completions.create(
90
- model=title_aided_config["model"],
91
- messages=[
92
- {'role': 'user', 'content': title_optimize_prompt}],
93
- extra_body={"enable_thinking": False},
94
- temperature=0.7,
95
- stream=True,
96
- )
101
+ completion = client.chat.completions.create(**api_params)
97
102
  content_pieces = []
98
103
  for chunk in completion:
99
104
  if chunk.choices and chunk.choices[0].delta.content is not None:
mineru/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "2.6.1"
1
+ __version__ = "2.6.3"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mineru
3
- Version: 2.6.1
3
+ Version: 2.6.3
4
4
  Summary: A practical tool for converting PDF to Markdown
5
5
  License: AGPL-3.0
6
6
  Project-URL: homepage, https://mineru.net/
@@ -37,7 +37,7 @@ Requires-Dist: scikit-image<1.0.0,>=0.25.0
37
37
  Requires-Dist: openai<3,>=1.70.0
38
38
  Requires-Dist: beautifulsoup4<5,>=4.13.5
39
39
  Requires-Dist: magika<0.7.0,>=0.6.2
40
- Requires-Dist: mineru-vl-utils<1,>=0.1.14
40
+ Requires-Dist: mineru-vl-utils<1,>=0.1.15
41
41
  Provides-Extra: test
42
42
  Requires-Dist: mineru[core]; extra == "test"
43
43
  Requires-Dist: pytest; extra == "test"
@@ -50,6 +50,8 @@ Requires-Dist: transformers<5.0.0,>=4.51.1; extra == "vlm"
50
50
  Requires-Dist: accelerate>=1.5.1; extra == "vlm"
51
51
  Provides-Extra: vllm
52
52
  Requires-Dist: vllm<0.12,>=0.10.1.1; extra == "vllm"
53
+ Provides-Extra: mlx
54
+ Requires-Dist: mlx-vlm<0.4,>=0.3.3; extra == "mlx"
53
55
  Provides-Extra: pipeline
54
56
  Requires-Dist: matplotlib<4,>=3.10; extra == "pipeline"
55
57
  Requires-Dist: ultralytics<9,>=8.3.48; extra == "pipeline"
@@ -76,6 +78,7 @@ Requires-Dist: mineru[vlm]; extra == "core"
76
78
  Requires-Dist: mineru[pipeline]; extra == "core"
77
79
  Requires-Dist: mineru[api]; extra == "core"
78
80
  Requires-Dist: mineru[gradio]; extra == "core"
81
+ Requires-Dist: mineru[mlx]; sys_platform == "darwin" and extra == "core"
79
82
  Provides-Extra: all
80
83
  Requires-Dist: mineru[core]; extra == "all"
81
84
  Requires-Dist: mineru[vllm]; extra == "all"
@@ -127,11 +130,15 @@ Dynamic: license-file
127
130
  </div>
128
131
 
129
132
  # Changelog
130
- - 2025/10/24 2.6.1 Release
133
+ - 2025/10/31 2.6.3 Release
134
+ - Added support for a new backend `vlm-mlx-engine`, enabling MLX-accelerated inference for the MinerU2.5 model on Apple Silicon devices. Compared to the `vlm-transformers` backend, `vlm-mlx-engine` delivers a 100%–200% speed improvement.
135
+ - Bug fixes: #3849, #3859
136
+
137
+ - 2025/10/24 2.6.2 Release
131
138
  - `pipeline` backend optimizations
132
139
  - Added experimental support for Chinese formulas, which can be enabled by setting the environment variable `export MINERU_FORMULA_CH_SUPPORT=1`. This feature may cause a slight decrease in MFR speed and failures in recognizing some long formulas. It is recommended to enable it only when parsing Chinese formulas is needed. To disable this feature, set the environment variable to `0`.
133
140
  - `OCR` speed significantly improved by 200%~300%, thanks to the optimization solution provided by [@cjsdurj](https://github.com/cjsdurj)
134
- - `OCR` models updated to `ppocr-v5` version for Cyrillic, Arabic, Devanagari, Telugu (te), and Tamil (ta) languages, with accuracy improved by over 40% compared to previous models
141
+ - `OCR` models optimized for improved accuracy and coverage of Latin script recognition, and updated Cyrillic, Arabic, Devanagari, Telugu (te), and Tamil (ta) language systems to `ppocr-v5` version, with accuracy improved by over 40% compared to previous models
135
142
  - `vlm` backend optimizations
136
143
  - `table_caption` and `table_footnote` matching logic optimized to improve the accuracy of table caption and footnote matching and reading order rationality in scenarios with multiple consecutive tables on a page
137
144
  - Optimized CPU resource usage during high concurrency when using `vllm` backend, reducing server pressure
@@ -666,7 +673,7 @@ https://github.com/user-attachments/assets/4bea02c9-6d54-4cd6-97ed-dff14340982c
666
673
  - Automatically recognize and convert formulas in the document to LaTeX format.
667
674
  - Automatically recognize and convert tables in the document to HTML format.
668
675
  - Automatically detect scanned PDFs and garbled PDFs and enable OCR functionality.
669
- - OCR supports detection and recognition of 84 languages.
676
+ - OCR supports detection and recognition of 109 languages.
670
677
  - Supports multiple output formats, such as multimodal and NLP Markdown, JSON sorted by reading order, and rich intermediate formats.
671
678
  - Supports various visualization results, including layout visualization and span visualization, for efficient confirmation of output quality.
672
679
  - Supports running in a pure CPU environment, and also supports GPU(CUDA)/NPU(CANN)/MPS acceleration
@@ -703,41 +710,70 @@ A WebUI developed based on Gradio, with a simple interface and only core parsing
703
710
  > In non-mainline environments, due to the diversity of hardware and software configurations, as well as third-party dependency compatibility issues, we cannot guarantee 100% project availability. Therefore, for users who wish to use this project in non-recommended environments, we suggest carefully reading the documentation and FAQ first. Most issues already have corresponding solutions in the FAQ. We also encourage community feedback to help us gradually expand support.
704
711
 
705
712
  <table>
706
- <tr>
707
- <td>Parsing Backend</td>
708
- <td>pipeline</td>
709
- <td>vlm-transformers</td>
710
- <td>vlm-vllm</td>
711
- </tr>
712
- <tr>
713
- <td>Operating System</td>
714
- <td>Linux / Windows / macOS</td>
715
- <td>Linux / Windows</td>
716
- <td>Linux / Windows (via WSL2)</td>
717
- </tr>
718
- <tr>
719
- <td>CPU Inference Support</td>
720
- <td>✅</td>
721
- <td colspan="2">❌</td>
722
- </tr>
723
- <tr>
724
- <td>GPU Requirements</td>
725
- <td>Turing architecture and later, 6GB+ VRAM or Apple Silicon</td>
726
- <td colspan="2">Turing architecture and later, 8GB+ VRAM</td>
727
- </tr>
728
- <tr>
729
- <td>Memory Requirements</td>
730
- <td colspan="3">Minimum 16GB+, recommended 32GB+</td>
731
- </tr>
732
- <tr>
733
- <td>Disk Space Requirements</td>
734
- <td colspan="3">20GB+, SSD recommended</td>
735
- </tr>
736
- <tr>
737
- <td>Python Version</td>
738
- <td colspan="3">3.10-3.13</td>
739
- </tr>
713
+ <thead>
714
+ <tr>
715
+ <th rowspan="2">Parsing Backend</th>
716
+ <th rowspan="2">pipeline <br> (Accuracy<sup>1</sup> 82+)</th>
717
+ <th colspan="4">vlm (Accuracy<sup>1</sup> 90+)</th>
718
+ </tr>
719
+ <tr>
720
+ <th>transformers</th>
721
+ <th>mlx-engine</th>
722
+ <th>vllm-engine / <br>vllm-async-engine</th>
723
+ <th>http-client</th>
724
+ </tr>
725
+ </thead>
726
+ <tbody>
727
+ <tr>
728
+ <th>Backend Features</th>
729
+ <td>Fast, no hallucinations</td>
730
+ <td>Good compatibility, <br>but slower</td>
731
+ <td>Faster than transformers</td>
732
+ <td>Fast, compatible with the vLLM ecosystem</td>
733
+ <td>Suitable for OpenAI-compatible servers<sup>5</sup></td>
734
+ </tr>
735
+ <tr>
736
+ <th>Operating System</th>
737
+ <td colspan="2" style="text-align:center;">Linux<sup>2</sup> / Windows / macOS</td>
738
+ <td style="text-align:center;">macOS<sup>3</sup></td>
739
+ <td style="text-align:center;">Linux<sup>2</sup> / Windows<sup>4</sup> </td>
740
+ <td>Any</td>
741
+ </tr>
742
+ <tr>
743
+ <th>CPU inference support</th>
744
+ <td colspan="2" style="text-align:center;">✅</td>
745
+ <td colspan="2" style="text-align:center;">❌</td>
746
+ <td>Not required</td>
747
+ </tr>
748
+ <tr>
749
+ <th>GPU Requirements</th><td colspan="2" style="text-align:center;">Volta or later architectures, 6 GB VRAM or more, or Apple Silicon</td>
750
+ <td>Apple Silicon</td>
751
+ <td>Volta or later architectures, 8 GB VRAM or more</td>
752
+ <td>Not required</td>
753
+ </tr>
754
+ <tr>
755
+ <th>Memory Requirements</th>
756
+ <td colspan="4" style="text-align:center;">Minimum 16 GB, 32 GB recommended</td>
757
+ <td>8 GB</td>
758
+ </tr>
759
+ <tr>
760
+ <th>Disk Space Requirements</th>
761
+ <td colspan="4" style="text-align:center;">20 GB or more, SSD recommended</td>
762
+ <td>2 GB</td>
763
+ </tr>
764
+ <tr>
765
+ <th>Python Version</th>
766
+ <td colspan="5" style="text-align:center;">3.10-3.13</td>
767
+ </tr>
768
+ </tbody>
740
769
  </table>
770
+
771
+ <sup>1</sup> Accuracy metric is the End-to-End Evaluation Overall score of OmniDocBench (v1.5), tested on the latest `MinerU` version.
772
+ <sup>2</sup> Linux supports only distributions released in 2019 or later.
773
+ <sup>3</sup> MLX requires macOS 13.5 or later, recommended for use with version 14.0 or higher.
774
+ <sup>4</sup> Windows vLLM support via WSL2(Windows Subsystem for Linux).
775
+ <sup>5</sup> Servers compatible with the OpenAI API, such as local or remote model services deployed via inference frameworks like `vLLM`, `SGLang`, or `LMDeploy`.
776
+
741
777
 
742
778
  ### Install MinerU
743
779
 
@@ -1,28 +1,28 @@
1
1
  mineru/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
2
- mineru/version.py,sha256=yv0wJuq7dd_PlBhLN8iuPUYVsoACKuk2R3Gg5WU-tHk,22
2
+ mineru/version.py,sha256=uJ6TLK18jhCrL0aclBja7NzlAGLAyZjVpX-gq3d461k,22
3
3
  mineru/backend/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
4
4
  mineru/backend/utils.py,sha256=GLJU3IznDmhE1_qNmkU1UOtsuskIHBezgsEVO6Uar-Y,698
5
5
  mineru/backend/pipeline/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
6
- mineru/backend/pipeline/batch_analyze.py,sha256=dOnktvOMjfg84w1H34YlJg6N9_x6Yfvf14NIpOQcZqQ,22221
7
- mineru/backend/pipeline/model_init.py,sha256=OfB2MMjNmZcHl4fkqS1fT5R8I3LVoSKAHGtl8PcBfBs,9372
8
- mineru/backend/pipeline/model_json_to_middle_json.py,sha256=DtB7kE_7CtxwOMcb6QYeKzY6vMwUJNpavc5fn9z9oiI,10916
6
+ mineru/backend/pipeline/batch_analyze.py,sha256=gnilKhFlMe8-55X2PJnb-ZSVeZIS-5DxIbMpHnwLne8,20889
7
+ mineru/backend/pipeline/model_init.py,sha256=OAylOcQD9gu5TBcX7nMt7X5NpJMtQICI5IvEQ648lpI,9358
8
+ mineru/backend/pipeline/model_json_to_middle_json.py,sha256=reXkUR_wKmJD64d7vRNXMxFviwkzDlGjRshpdwsVquI,10951
9
9
  mineru/backend/pipeline/model_list.py,sha256=7cXMBfZrP0K6qWueg1D_-WoUANeSINzkn_ic9E7YQLs,222
10
10
  mineru/backend/pipeline/para_split.py,sha256=Kq95MmvkPm7rKxlCSGiTvVKyF7CErHI2eGGAs5sLl0Q,17119
11
11
  mineru/backend/pipeline/pipeline_analyze.py,sha256=rbO5AetOdnxR5ctkoDzFCFoElkz7Jgb7gi2Ct596NK8,6655
12
12
  mineru/backend/pipeline/pipeline_magic_model.py,sha256=w8jGx8f6yZN0Wf2yPP3L9rYKc9rogxreZCrUJzJvPO8,14974
13
- mineru/backend/pipeline/pipeline_middle_json_mkcontent.py,sha256=xWWOFmYL6hB8PLrxQFyRJ72dAmTIDHtqiWV-WFUfR44,14081
13
+ mineru/backend/pipeline/pipeline_middle_json_mkcontent.py,sha256=YlnEbbUnkniZXS13aLo5mjfFQvQM5SrIVvTAGBZsLmw,14478
14
14
  mineru/backend/vlm/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
15
15
  mineru/backend/vlm/model_output_to_middle_json.py,sha256=AqYX44gS9crUO_t7SuUatD71EVjow6pI6yA2Ik3gQ0s,5139
16
16
  mineru/backend/vlm/utils.py,sha256=woGqyRI4S7p69daLCU07XNXWTV27aLf7YBjjVH1x-5o,2794
17
- mineru/backend/vlm/vlm_analyze.py,sha256=nzwTGndwZFfTEvHppakyDKZxph7SYOuUZW3johY5F8c,8154
17
+ mineru/backend/vlm/vlm_analyze.py,sha256=7c5_JN1F9YTDNNgA_Rmw6xX1PI7gcIT4A4ujtGQHH9Q,8792
18
18
  mineru/backend/vlm/vlm_magic_model.py,sha256=Pd0sOr7G1crAJIVeq6h_03gNSuxmV5U8dvGTGT_rrjs,23452
19
- mineru/backend/vlm/vlm_middle_json_mkcontent.py,sha256=Ie95XpwTgi7EmidcwE_scvXMRQjE2xASU_Rm_F8EP-I,13377
19
+ mineru/backend/vlm/vlm_middle_json_mkcontent.py,sha256=5V-AU9KkxxMn0DDSQBrb15I4GVpEyiQy8uNI_tQhS6M,13498
20
20
  mineru/cli/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
21
- mineru/cli/client.py,sha256=uo7db9Wqj1Mc11MYuaM-bi54BfKKU3SFB9Urc8md5X4,6641
21
+ mineru/cli/client.py,sha256=ArnoT2psOQRnTqLpsFwPaoi-l444iIVkbBn90Pm16n8,6915
22
22
  mineru/cli/common.py,sha256=jxFJMdc-02UMO3SXAtcZ6aIdPrakAE6DCccZ9kDlPKc,14276
23
23
  mineru/cli/fast_api.py,sha256=t5bda769VbM5iokAboiJfPIOnm-r5GTFReE-KQy8L3g,10941
24
- mineru/cli/gradio_app.py,sha256=8rMdW7grwBUn0MdXyG4eOTQUzKWq6nErtMWl-vGdWbU,14525
25
- mineru/cli/models_download.py,sha256=7KA-Boe-eIt3WW6eyaxM1HfubTXLsQ8sMmT1H1X7vAc,4815
24
+ mineru/cli/gradio_app.py,sha256=6dA0ARpdOoewFeXmHrleF1amCgBV9ilY_nkWAmAmN8A,14731
25
+ mineru/cli/models_download.py,sha256=LNfoIpUlJM7m7qb2SiCxtjMDw4jILBQtZwNP2JoY81U,4815
26
26
  mineru/cli/vlm_vllm_server.py,sha256=fQJyD-gIPQ41hR_6aIaDJczl66N310t0CiZEBAfX5mc,90
27
27
  mineru/data/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
28
28
  mineru/data/data_reader_writer/__init__.py,sha256=9qnGNrsuGBMwwfsQy6oChdkz--a_LPdYWE0VZZr0yr4,490
@@ -62,8 +62,7 @@ mineru/model/mfr/unimernet/unimernet_hf/unimer_swin/configuration_unimer_swin.py
62
62
  mineru/model/mfr/unimernet/unimernet_hf/unimer_swin/image_processing_unimer_swin.py,sha256=a9kCvwzJJSRrKQNtW2oOpTwrapzep8BjGFWLhLF1T0k,6036
63
63
  mineru/model/mfr/unimernet/unimernet_hf/unimer_swin/modeling_unimer_swin.py,sha256=Q_fdmFHUBtEoAfWp9aowdwTCE2MIFMOPbYjoSyXK2iU,48929
64
64
  mineru/model/ocr/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
65
- mineru/model/ocr/paddleocr2pytorch/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
66
- mineru/model/ocr/paddleocr2pytorch/pytorch_paddle.py,sha256=wZOw82q1NARNHBW2Lk5zumjdAqzPZqnhV6rvMULvLs8,9207
65
+ mineru/model/ocr/pytorch_paddle.py,sha256=cHMTl5sKyn4BY2207-7GQ4eZl9BQUcs5ucxw_NFezII,9200
67
66
  mineru/model/ori_cls/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
68
67
  mineru/model/ori_cls/paddle_ori_cls.py,sha256=VIS22IerHST7g60AC9r2PEQIG6NQWeQaH1OrXIxNTsg,11943
69
68
  mineru/model/reading_order/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
@@ -72,7 +71,7 @@ mineru/model/reading_order/xycut.py,sha256=ezNSq_Y4UXiztB58hbXJsjTJlOBqWIjuW5A2u
72
71
  mineru/model/table/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
73
72
  mineru/model/table/cls/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
74
73
  mineru/model/table/cls/paddle_table_cls.py,sha256=5PtieKQnAzgMNRTZFgnqQsGWKTEQ3yyFWQnBRIjfQ4A,5781
75
- mineru/model/table/rec/RapidTable.py,sha256=FxO3dLNKfQrgcQU7gRI0kLAxllnoHWZptCtyyHNuMpM,5973
74
+ mineru/model/table/rec/RapidTable.py,sha256=2dNdGJsVdsGfRm6r3deERUMst5RIxH0YuiGALkQbNTw,5955
76
75
  mineru/model/table/rec/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
77
76
  mineru/model/table/rec/slanet_plus/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
78
77
  mineru/model/table/rec/slanet_plus/main.py,sha256=vfrcvQ9JBf32YZU9eNoetoqdpcrFNsA1WNqQBsG8i2o,7646
@@ -122,19 +121,10 @@ mineru/model/utils/pytorchocr/postprocess/cls_postprocess.py,sha256=1VVWXT_b1vhG
122
121
  mineru/model/utils/pytorchocr/postprocess/db_postprocess.py,sha256=AdZPF7frhQ27VVdp0GFmMcXtivwDZZfXYhzJOlP4zUs,6483
123
122
  mineru/model/utils/pytorchocr/postprocess/rec_postprocess.py,sha256=qGB3onFEFhHjqksIR1IKOx2EY98ewfsmjADjrRXg30Y,30552
124
123
  mineru/model/utils/pytorchocr/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
125
- mineru/model/utils/pytorchocr/utils/resources/arch_config.yaml,sha256=yl4qTf-q0Du0MEOuYDffOt776_6qXBU5b2K3N-IOjd8,14964
126
- mineru/model/utils/pytorchocr/utils/resources/models_config.yml,sha256=70B392J5XloC7mnK1eVi8GsWKSu7UE7qGffkEmBI9Us,2278
124
+ mineru/model/utils/pytorchocr/utils/resources/arch_config.yaml,sha256=BvaXsHXXVyxYOFYjkt7HtyZWiEyLrvHsHsJYfQZShDY,9095
125
+ mineru/model/utils/pytorchocr/utils/resources/models_config.yml,sha256=QIuqeu91pfuqya4JbxmZQSkOYke-h2Bz21O2pOLQPYc,1944
127
126
  mineru/model/utils/pytorchocr/utils/resources/pp_formulanet_arch_config.yaml,sha256=a7yueOTUrfpZo8CsK6vQokbLNB2J-P77ihaCh_LozvQ,507
128
- mineru/model/utils/pytorchocr/utils/resources/dict/arabic_dict.txt,sha256=xbaXD14RWk0Vpc7fAHpephuszp1j-Qi3IWC4VrFKu70,407
129
- mineru/model/utils/pytorchocr/utils/resources/dict/chinese_cht_dict.txt,sha256=gyVR_uHy-8l1CHctgevcjboSwA3pejXHHJ3fQ92sGoM,33443
130
- mineru/model/utils/pytorchocr/utils/resources/dict/cyrillic_dict.txt,sha256=NpqCxsjEeXhKXXJkSLg7Hq-1_vCkEppeqjkpYl3c0TI,410
131
- mineru/model/utils/pytorchocr/utils/resources/dict/devanagari_dict.txt,sha256=tfG-bYu_8aGfuWxdTKlqQjOAI0u30s4OB7WDittNGOo,508
132
- mineru/model/utils/pytorchocr/utils/resources/dict/en_dict.txt,sha256=VmLfnS0D8OjKDTsGSdasurkEtqFLPTUhRjxxw3xmjOM,190
133
- mineru/model/utils/pytorchocr/utils/resources/dict/japan_dict.txt,sha256=Hc_LQe7JBXapRbMITyKt4RztUG4k8Uh5JFsHFpjzCOg,17332
134
127
  mineru/model/utils/pytorchocr/utils/resources/dict/ka_dict.txt,sha256=-tP3ZZQyde7CE0pvvJtSeFQmZBEE1OfbOhWdxz80Hd4,452
135
- mineru/model/utils/pytorchocr/utils/resources/dict/korean_dict.txt,sha256=qh_ciuj3zUCg7E7bRy6wQh4RQn5sz-6ZFUQHQsGLCiA,14480
136
- mineru/model/utils/pytorchocr/utils/resources/dict/latin_dict.txt,sha256=jm1ONil4jDXDH35TAofWFHtUm7eiZb1nCLsoETRCniw,468
137
- mineru/model/utils/pytorchocr/utils/resources/dict/ppocr_keys_v1.txt,sha256=KLI2KtSrLcOHaapy_rU146nds_0qdYWgWSDmOTsdx_c,26249
138
128
  mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv4_doc_dict.txt,sha256=pbw4h8Q8kB5aP5exP_rfHFdU7efMjJ9aviLodafEg3I,62346
139
129
  mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_arabic_dict.txt,sha256=f5L327m3WkeHqDv7T20UqKtRVSUTDJ1AqQNvYc9pmek,2369
140
130
  mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_cyrillic_dict.txt,sha256=20CqUs6xEgVb6AxpSv32VdXSxPeHNwRSTMFqRHypE7o,2781
@@ -144,12 +134,10 @@ mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_el_dict.txt,sha256=Md
144
134
  mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_en_dict.txt,sha256=4CWmbTHzJ7oMIy4D9AeujRBeHnCefMs_QIqneMJOcNY,1416
145
135
  mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_eslav_dict.txt,sha256=PpXxWBVXFihwys26WvkaTGviiQcQ05Www8dXjn7l5us,1663
146
136
  mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_korean_dict.txt,sha256=qIBxxowBcHSJuqeevgQFt761zKIp9PyUzD75kjKIAtc,47451
147
- mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_latin_dict.txt,sha256=PAqKebYSZTwl92UnFxT3EoHk6VWWLBU-Jyt7jB0rE_8,1634
137
+ mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_latin_dict.txt,sha256=zLzEVzCz-72QUMW8dNtqmQZxQe8QNePRSImoSmubGv8,2616
148
138
  mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_ta_dict.txt,sha256=hbVBNSrhjca6bUcVLYv4rf9rAmbmBdLu8pkMG_RmEXs,1723
149
139
  mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_te_dict.txt,sha256=Qvg_XT_bUHeOT6W2bFjZmlmrd5IVHF5080uP_XthydY,1831
150
140
  mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_th_dict.txt,sha256=V_VAb5S7Zoj7cHf3vmXwi71xzs9IwB6ibFIstcSDa3o,1767
151
- mineru/model/utils/pytorchocr/utils/resources/dict/ta_dict.txt,sha256=6T5pSBSv2f8ekYtvS7Qmf7TGWpNE7l10ZPkTW5DAonA,352
152
- mineru/model/utils/pytorchocr/utils/resources/dict/te_dict.txt,sha256=7plGpg13AZd0dOiYg2lKTKIOqjhoojM0v3lA3NAI8Pk,429
153
141
  mineru/model/utils/tools/__init__.py,sha256=xEqR65Z8YOzOLorLjK0LCHos2zX-tCuxSrxndjU00hE,49
154
142
  mineru/model/utils/tools/infer/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
155
143
  mineru/model/utils/tools/infer/predict_cls.py,sha256=8RmKl1vejnZl65caHZNV2ta6hMsg5B_LE-FuqCO8T8A,4225
@@ -163,8 +151,9 @@ mineru/resources/header.html,sha256=PUselBXLBn8gfeP3zwEtj6zIxfhcCN4vN_B796nQFNQ,
163
151
  mineru/resources/fasttext-langdetect/lid.176.ftz,sha256=jzRyz-hzintgmejpmcPL-uDc0VaWqsfXc4qAOdtgPoM,938013
164
152
  mineru/utils/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
165
153
  mineru/utils/block_pre_proc.py,sha256=uGBmxf2MR9bplTnQI8xHjCI-kj3plRhJr0hcWKidbOQ,9632
166
- mineru/utils/block_sort.py,sha256=mViceDw3O2ksBDFxt-wmX67bCZOwKyp68yZnEjS3Ijc,12934
154
+ mineru/utils/block_sort.py,sha256=5e1mOLB3W7xu5Y1hmhvGSHPL_aQ41R_4VXcP4vjYAOU,12976
167
155
  mineru/utils/boxbase.py,sha256=moP660AmZq_udHEsfvFkTQdJ4gjrrBwN7t0Enx7CIL8,6903
156
+ mineru/utils/check_mac_env.py,sha256=pbmbcnS60zZRqNRBxFJbbPrCosU1lC464b0v6JUlgaE,1031
168
157
  mineru/utils/cli_parser.py,sha256=4seFAu1kulsYnw6WM2q_cxgEOt2tErZVkI-LNEF_kGw,1445
169
158
  mineru/utils/config_reader.py,sha256=IRVWTpBnbnRpck6eXZUKw-fcLt7hon5S4uqWW-RBb1w,4075
170
159
  mineru/utils/cut_image.py,sha256=g3m4nfcJNWlxi-P0kpXTtlmspXkMcLCfGwmYuQ-Z2hE,751
@@ -174,7 +163,7 @@ mineru/utils/format_utils.py,sha256=2s89vHcSISjuolk8Hvg3K-5-rRbiT3Us7eFLzUKrNKs,
174
163
  mineru/utils/guess_suffix_or_lang.py,sha256=nznyQpUn1BSA8JNw9HuG3pVV-xtVAtrtcGuHZ-VXt9M,856
175
164
  mineru/utils/hash_utils.py,sha256=UPS_8NRBmVumdyOv16Lmv6Ly2xK8OVDJEe5gG6gKIFk,857
176
165
  mineru/utils/language.py,sha256=7RT3mxSa7jdpoC5ySd7ZddHA7TO7UsnmDOWiYZAxuyg,1433
177
- mineru/utils/llm_aided.py,sha256=eBGKCD7cJBjkyn38yqCdh0S-fgRG9fLuQCByLDQuyWs,4983
166
+ mineru/utils/llm_aided.py,sha256=9WUytvxenSAuaWR4sTQhVPQ5h8pY0wVOH1O2sj_6dLs,5149
178
167
  mineru/utils/magic_model_utils.py,sha256=2xOvi4oqg3MSw1FUrJTnYDtWeFrrm6qbmlEorLZSaYs,5650
179
168
  mineru/utils/model_utils.py,sha256=6OsgFLsABX5JuShSzCMSNHWV-yi-1cjwHweafyxIgRo,18448
180
169
  mineru/utils/models_download_utils.py,sha256=UfjvwhxO6BkJHa5JSpEVNZ71GoLMPMmJpym3THET2T4,2957
@@ -187,9 +176,9 @@ mineru/utils/run_async.py,sha256=rPeP4BCZerR8VByRDhiYzfZiahLVqoZEBVAS54dAjNg,128
187
176
  mineru/utils/span_block_fix.py,sha256=0eVQjJCrT03woRt9hoh6Uu42Tp1dacfGTv2x3B9qq94,8797
188
177
  mineru/utils/span_pre_proc.py,sha256=h41q2uQajI0xQbc_30hqaju1dv3oVYxBAlKgURl8HIc,13692
189
178
  mineru/utils/table_merge.py,sha256=d98zNbM1ZQ8V1kUt6RugParNUNPv7DGL-XKIzR3iJVQ,15360
190
- mineru-2.6.1.dist-info/licenses/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
191
- mineru-2.6.1.dist-info/METADATA,sha256=bY_TtFykxzJJsqbtGMC2C7Tl2wYx4EOtT9w6Z3DKMuA,68358
192
- mineru-2.6.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
193
- mineru-2.6.1.dist-info/entry_points.txt,sha256=luXmbhPiZK_tKlRgWuYOaW_V6EFpG-yJcAevVv9MEqE,252
194
- mineru-2.6.1.dist-info/top_level.txt,sha256=zuGQfZcbsHv4I4oKI9gaKPqEWBFm6xJroKuug2LnKP8,7
195
- mineru-2.6.1.dist-info/RECORD,,
179
+ mineru-2.6.3.dist-info/licenses/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
180
+ mineru-2.6.3.dist-info/METADATA,sha256=mUBTxFG5tgdyQ1caZVRNrk4MhIX36PECY09dwCNYXZ4,70689
181
+ mineru-2.6.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
182
+ mineru-2.6.3.dist-info/entry_points.txt,sha256=luXmbhPiZK_tKlRgWuYOaW_V6EFpG-yJcAevVv9MEqE,252
183
+ mineru-2.6.3.dist-info/top_level.txt,sha256=zuGQfZcbsHv4I4oKI9gaKPqEWBFm6xJroKuug2LnKP8,7
184
+ mineru-2.6.3.dist-info/RECORD,,
@@ -1 +0,0 @@
1
- # Copyright (c) Opendatalab. All rights reserved.
@@ -1,162 +0,0 @@
1
-
2
- !
3
- #
4
- $
5
- %
6
- &
7
- '
8
- (
9
- +
10
- ,
11
- -
12
- .
13
- /
14
- 0
15
- 1
16
- 2
17
- 3
18
- 4
19
- 5
20
- 6
21
- 7
22
- 8
23
- 9
24
- :
25
- ?
26
- @
27
- A
28
- B
29
- C
30
- D
31
- E
32
- F
33
- G
34
- H
35
- I
36
- J
37
- K
38
- L
39
- M
40
- N
41
- O
42
- P
43
- Q
44
- R
45
- S
46
- T
47
- U
48
- V
49
- W
50
- X
51
- Y
52
- Z
53
- _
54
- a
55
- b
56
- c
57
- d
58
- e
59
- f
60
- g
61
- h
62
- i
63
- j
64
- k
65
- l
66
- m
67
- n
68
- o
69
- p
70
- q
71
- r
72
- s
73
- t
74
- u
75
- v
76
- w
77
- x
78
- y
79
- z
80
- É
81
- é
82
- ء
83
- آ
84
- أ
85
- ؤ
86
- إ
87
- ئ
88
- ا
89
- ب
90
- ة
91
- ت
92
- ث
93
- ج
94
- ح
95
- خ
96
- د
97
- ذ
98
- ر
99
- ز
100
- س
101
- ش
102
- ص
103
- ض
104
- ط
105
- ظ
106
- ع
107
- غ
108
- ف
109
- ق
110
- ك
111
- ل
112
- م
113
- ن
114
- ه
115
- و
116
- ى
117
- ي
118
- ً
119
- ٌ
120
- ٍ
121
- َ
122
- ُ
123
- ِ
124
- ّ
125
- ْ
126
- ٓ
127
- ٔ
128
- ٰ
129
- ٱ
130
- ٹ
131
- پ
132
- چ
133
- ڈ
134
- ڑ
135
- ژ
136
- ک
137
- ڭ
138
- گ
139
- ں
140
- ھ
141
- ۀ
142
- ہ
143
- ۂ
144
- ۃ
145
- ۆ
146
- ۇ
147
- ۈ
148
- ۋ
149
- ی
150
- ې
151
- ے
152
- ۓ
153
- ە
154
- ١
155
- ٢
156
- ٣
157
- ٤
158
- ٥
159
- ٦
160
- ٧
161
- ٨
162
- ٩