mineru 2.2.2__py3-none-any.whl → 2.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mineru/backend/pipeline/pipeline_middle_json_mkcontent.py +3 -3
- mineru/backend/vlm/model_output_to_middle_json.py +123 -0
- mineru/backend/vlm/vlm_analyze.py +97 -16
- mineru/backend/vlm/vlm_magic_model.py +201 -135
- mineru/backend/vlm/vlm_middle_json_mkcontent.py +52 -11
- mineru/cli/client.py +6 -5
- mineru/cli/common.py +17 -16
- mineru/cli/fast_api.py +9 -7
- mineru/cli/gradio_app.py +15 -16
- mineru/cli/vlm_vllm_server.py +4 -0
- mineru/model/table/rec/unet_table/main.py +8 -0
- mineru/model/vlm_vllm_model/__init__.py +0 -0
- mineru/model/vlm_vllm_model/server.py +51 -0
- mineru/resources/header.html +10 -2
- mineru/utils/draw_bbox.py +32 -10
- mineru/utils/enum_class.py +16 -2
- mineru/utils/guess_suffix_or_lang.py +20 -0
- mineru/utils/span_block_fix.py +4 -2
- mineru/version.py +1 -1
- {mineru-2.2.2.dist-info → mineru-2.5.0.dist-info}/METADATA +70 -25
- {mineru-2.2.2.dist-info → mineru-2.5.0.dist-info}/RECORD +25 -38
- {mineru-2.2.2.dist-info → mineru-2.5.0.dist-info}/entry_points.txt +1 -1
- mineru/backend/vlm/base_predictor.py +0 -186
- mineru/backend/vlm/hf_predictor.py +0 -217
- mineru/backend/vlm/predictor.py +0 -111
- mineru/backend/vlm/sglang_client_predictor.py +0 -443
- mineru/backend/vlm/sglang_engine_predictor.py +0 -246
- mineru/backend/vlm/token_to_middle_json.py +0 -122
- mineru/backend/vlm/utils.py +0 -40
- mineru/cli/vlm_sglang_server.py +0 -4
- mineru/model/vlm_hf_model/__init__.py +0 -9
- mineru/model/vlm_hf_model/configuration_mineru2.py +0 -38
- mineru/model/vlm_hf_model/image_processing_mineru2.py +0 -269
- mineru/model/vlm_hf_model/modeling_mineru2.py +0 -449
- mineru/model/vlm_sglang_model/__init__.py +0 -14
- mineru/model/vlm_sglang_model/engine.py +0 -264
- mineru/model/vlm_sglang_model/image_processor.py +0 -213
- mineru/model/vlm_sglang_model/logit_processor.py +0 -90
- mineru/model/vlm_sglang_model/model.py +0 -453
- mineru/model/vlm_sglang_model/server.py +0 -75
- {mineru-2.2.2.dist-info → mineru-2.5.0.dist-info}/WHEEL +0 -0
- {mineru-2.2.2.dist-info → mineru-2.5.0.dist-info}/licenses/LICENSE.md +0 -0
- {mineru-2.2.2.dist-info → mineru-2.5.0.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mineru
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.5.0
|
|
4
4
|
Summary: A practical tool for converting PDF to Markdown
|
|
5
5
|
License: AGPL-3.0
|
|
6
6
|
Project-URL: homepage, https://mineru.net/
|
|
@@ -36,6 +36,8 @@ Requires-Dist: fast-langdetect<0.3.0,>=0.2.3
|
|
|
36
36
|
Requires-Dist: scikit-image<1.0.0,>=0.25.0
|
|
37
37
|
Requires-Dist: openai<2,>=1.70.0
|
|
38
38
|
Requires-Dist: beautifulsoup4<5,>=4.13.5
|
|
39
|
+
Requires-Dist: magika<0.7.0,>=0.6.2
|
|
40
|
+
Requires-Dist: mineru-vl-utils<1,>=0.1.6
|
|
39
41
|
Provides-Extra: test
|
|
40
42
|
Requires-Dist: mineru[core]; extra == "test"
|
|
41
43
|
Requires-Dist: pytest; extra == "test"
|
|
@@ -43,12 +45,11 @@ Requires-Dist: pytest-cov; extra == "test"
|
|
|
43
45
|
Requires-Dist: coverage; extra == "test"
|
|
44
46
|
Requires-Dist: fuzzywuzzy; extra == "test"
|
|
45
47
|
Provides-Extra: vlm
|
|
46
|
-
Requires-Dist:
|
|
47
|
-
Requires-Dist:
|
|
48
|
+
Requires-Dist: torch<2.8.0,>=2.6.0; extra == "vlm"
|
|
49
|
+
Requires-Dist: transformers<5.0.0,>=4.51.1; extra == "vlm"
|
|
48
50
|
Requires-Dist: accelerate>=1.5.1; extra == "vlm"
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
Requires-Dist: sglang[all]<0.4.11,>=0.4.7; extra == "sglang"
|
|
51
|
+
Provides-Extra: vllm
|
|
52
|
+
Requires-Dist: vllm==0.10.1.1; extra == "vllm"
|
|
52
53
|
Provides-Extra: pipeline
|
|
53
54
|
Requires-Dist: matplotlib<4,>=3.10; extra == "pipeline"
|
|
54
55
|
Requires-Dist: ultralytics<9,>=8.3.48; extra == "pipeline"
|
|
@@ -77,7 +78,7 @@ Requires-Dist: mineru[api]; extra == "core"
|
|
|
77
78
|
Requires-Dist: mineru[gradio]; extra == "core"
|
|
78
79
|
Provides-Extra: all
|
|
79
80
|
Requires-Dist: mineru[core]; extra == "all"
|
|
80
|
-
Requires-Dist: mineru[
|
|
81
|
+
Requires-Dist: mineru[vllm]; extra == "all"
|
|
81
82
|
Dynamic: license-file
|
|
82
83
|
|
|
83
84
|
<div align="center" xmlns="http://www.w3.org/1999/html">
|
|
@@ -126,25 +127,67 @@ Dynamic: license-file
|
|
|
126
127
|
|
|
127
128
|
# Changelog
|
|
128
129
|
|
|
129
|
-
- 2025/09/
|
|
130
|
-
- Fixed the issue where the new table recognition model would affect the overall parsing task when some table parsing failed
|
|
130
|
+
- 2025/09/19 2.5.0 Released
|
|
131
131
|
|
|
132
|
-
|
|
133
|
-
|
|
132
|
+
We are officially releasing MinerU2.5, currently the most powerful multimodal large model for document parsing.
|
|
133
|
+
With only 1.2B parameters, MinerU2.5's accuracy on the OmniDocBench benchmark comprehensively surpasses top-tier multimodal models like Gemini 2.5 Pro, GPT-4o, and Qwen2.5-VL-72B. It also significantly outperforms leading specialized models such as dots.ocr, MonkeyOCR, and PP-StructureV3.
|
|
134
|
+
The model has been released on [HuggingFace](https://huggingface.co/opendatalab/MinerU2.5-2509-1.2B) and [ModelScope](https://modelscope.cn/models/opendatalab/MinerU2.5-2509-1.2B) platforms. Welcome to download and use!
|
|
135
|
+
- Core Highlights:
|
|
136
|
+
- SOTA Performance with Extreme Efficiency: As a 1.2B model, it achieves State-of-the-Art (SOTA) results that exceed models in the 10B and 100B+ classes, redefining the performance-per-parameter standard in document AI.
|
|
137
|
+
- Advanced Architecture for Across-the-Board Leadership: By combining a two-stage inference pipeline (decoupling layout analysis from content recognition) with a native high-resolution architecture, it achieves SOTA performance across five key areas: layout analysis, text recognition, formula recognition, table recognition, and reading order.
|
|
138
|
+
- Key Capability Enhancements:
|
|
139
|
+
- Layout Detection: Delivers more complete results by accurately covering non-body content like headers, footers, and page numbers. It also provides more precise element localization and natural format reconstruction for lists and references.
|
|
140
|
+
- Table Parsing: Drastically improves parsing for challenging cases, including rotated tables, borderless/semi-structured tables, and long/complex tables.
|
|
141
|
+
- Formula Recognition: Significantly boosts accuracy for complex, long-form, and hybrid Chinese-English formulas, greatly enhancing the parsing capability for mathematical documents.
|
|
134
142
|
|
|
135
|
-
|
|
136
|
-
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
- Removed the `pipeline_old_linux` installation option, no longer supporting legacy Linux systems such as `CentOS 7`, to provide better support for `uv`'s `sync`/`run` commands
|
|
143
|
+
Additionally, with the release of vlm 2.5, we have made some adjustments to the repository:
|
|
144
|
+
- The vlm backend has been upgraded to version 2.5, supporting the MinerU2.5 model and no longer compatible with the MinerU2.0-2505-0.9B model. The last version supporting the 2.0 model is mineru-2.2.2.
|
|
145
|
+
- VLM inference-related code has been moved to [mineru_vl_utils](https://github.com/opendatalab/mineru-vl-utils), reducing coupling with the main mineru repository and facilitating independent iteration in the future.
|
|
146
|
+
- The vlm accelerated inference framework has been switched from `sglang` to `vllm`, achieving full compatibility with the vllm ecosystem, allowing users to use the MinerU2.5 model and accelerated inference on any platform that supports the vllm framework.
|
|
147
|
+
- Due to major upgrades in the vlm model supporting more layout types, we have made some adjustments to the structure of the parsing intermediate file `middle.json` and result file `content_list.json`. Please refer to the [documentation](https://opendatalab.github.io/MinerU/reference/output_files/) for details.
|
|
148
|
+
|
|
149
|
+
Other repository optimizations:
|
|
150
|
+
- Removed file extension whitelist validation for input files. When input files are PDF documents or images, there are no longer requirements for file extensions, improving usability.
|
|
144
151
|
|
|
145
152
|
<details>
|
|
146
153
|
<summary>History Log</summary>
|
|
147
154
|
|
|
155
|
+
<details>
|
|
156
|
+
<summary>2025/09/10 2.2.2 Released</summary>
|
|
157
|
+
<ul>
|
|
158
|
+
<li>Fixed the issue where the new table recognition model would affect the overall parsing task when some table parsing failed</li>
|
|
159
|
+
</ul>
|
|
160
|
+
</details>
|
|
161
|
+
|
|
162
|
+
<details>
|
|
163
|
+
<summary>2025/09/08 2.2.1 Released</summary>
|
|
164
|
+
<ul>
|
|
165
|
+
<li>Fixed the issue where some newly added models were not downloaded when using the model download command.</li>
|
|
166
|
+
</ul>
|
|
167
|
+
</details>
|
|
168
|
+
|
|
169
|
+
<details>
|
|
170
|
+
<summary>2025/09/05 2.2.0 Released</summary>
|
|
171
|
+
<ul>
|
|
172
|
+
<li>
|
|
173
|
+
Major Updates
|
|
174
|
+
<ul>
|
|
175
|
+
<li>In this version, we focused on improving table parsing accuracy by introducing a new <a href="https://github.com/RapidAI/TableStructureRec">wired table recognition model</a> and a brand-new hybrid table structure parsing algorithm, significantly enhancing the table recognition capabilities of the <code>pipeline</code> backend.</li>
|
|
176
|
+
<li>We also added support for cross-page table merging, which is supported by both <code>pipeline</code> and <code>vlm</code> backends, further improving the completeness and accuracy of table parsing.</li>
|
|
177
|
+
</ul>
|
|
178
|
+
</li>
|
|
179
|
+
<li>
|
|
180
|
+
Other Updates
|
|
181
|
+
<ul>
|
|
182
|
+
<li>The <code>pipeline</code> backend now supports 270-degree rotated table parsing, bringing support for table parsing in 0/90/270-degree orientations</li>
|
|
183
|
+
<li><code>pipeline</code> added OCR capability support for Thai and Greek, and updated the English OCR model to the latest version. English recognition accuracy improved by 11%, Thai recognition model accuracy is 82.68%, and Greek recognition model accuracy is 89.28% (by PPOCRv5)</li>
|
|
184
|
+
<li>Added <code>bbox</code> field (mapped to 0-1000 range) in the output <code>content_list.json</code>, making it convenient for users to directly obtain position information for each content block</li>
|
|
185
|
+
<li>Removed the <code>pipeline_old_linux</code> installation option, no longer supporting legacy Linux systems such as <code>CentOS 7</code>, to provide better support for <code>uv</code>'s <code>sync</code>/<code>run</code> commands</li>
|
|
186
|
+
</ul>
|
|
187
|
+
</li>
|
|
188
|
+
</ul>
|
|
189
|
+
</details>
|
|
190
|
+
|
|
148
191
|
<details>
|
|
149
192
|
<summary>2025/08/01 2.1.10 Released</summary>
|
|
150
193
|
<ul>
|
|
@@ -641,7 +684,7 @@ A WebUI developed based on Gradio, with a simple interface and only core parsing
|
|
|
641
684
|
<td>Parsing Backend</td>
|
|
642
685
|
<td>pipeline</td>
|
|
643
686
|
<td>vlm-transformers</td>
|
|
644
|
-
<td>vlm-
|
|
687
|
+
<td>vlm-vllm</td>
|
|
645
688
|
</tr>
|
|
646
689
|
<tr>
|
|
647
690
|
<td>Operating System</td>
|
|
@@ -690,8 +733,8 @@ uv pip install -e .[core]
|
|
|
690
733
|
```
|
|
691
734
|
|
|
692
735
|
> [!TIP]
|
|
693
|
-
> `mineru[core]` includes all core features except `
|
|
694
|
-
> If you need to use `
|
|
736
|
+
> `mineru[core]` includes all core features except `vLLM` acceleration, compatible with Windows / Linux / macOS systems, suitable for most users.
|
|
737
|
+
> If you need to use `vLLM` acceleration for VLM model inference or install a lightweight client on edge devices, please refer to the documentation [Extension Modules Installation Guide](https://opendatalab.github.io/MinerU/quick_start/extension_modules/).
|
|
695
738
|
|
|
696
739
|
---
|
|
697
740
|
|
|
@@ -719,8 +762,8 @@ You can use MinerU for PDF parsing through various methods such as command line,
|
|
|
719
762
|
- [x] Handwritten Text Recognition
|
|
720
763
|
- [x] Vertical Text Recognition
|
|
721
764
|
- [x] Latin Accent Mark Recognition
|
|
722
|
-
- [
|
|
723
|
-
- [
|
|
765
|
+
- [x] Code block recognition in the main text
|
|
766
|
+
- [x] [Chemical formula recognition](docs/chemical_knowledge_introduction/introduction.pdf)(mineru.net)
|
|
724
767
|
- [ ] Geometric shape recognition
|
|
725
768
|
|
|
726
769
|
# Known Issues
|
|
@@ -768,6 +811,7 @@ Currently, some models in this project are trained based on YOLO. However, since
|
|
|
768
811
|
- [pdftext](https://github.com/datalab-to/pdftext)
|
|
769
812
|
- [pdfminer.six](https://github.com/pdfminer/pdfminer.six)
|
|
770
813
|
- [pypdf](https://github.com/py-pdf/pypdf)
|
|
814
|
+
- [magika](https://github.com/google/magika)
|
|
771
815
|
|
|
772
816
|
# Citation
|
|
773
817
|
|
|
@@ -810,3 +854,4 @@ Currently, some models in this project are trained based on YOLO. However, since
|
|
|
810
854
|
- [OmniDocBench (A Comprehensive Benchmark for Document Parsing and Evaluation)](https://github.com/opendatalab/OmniDocBench)
|
|
811
855
|
- [Magic-HTML (Mixed web page extraction tool)](https://github.com/opendatalab/magic-html)
|
|
812
856
|
- [Magic-Doc (Fast speed ppt/pptx/doc/docx/pdf extraction tool)](https://github.com/InternLM/magic-doc)
|
|
857
|
+
- [Dingo: A Comprehensive AI Data Quality Evaluation Tool](https://github.com/MigoXLab/dingo)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
mineru/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
2
|
-
mineru/version.py,sha256=
|
|
2
|
+
mineru/version.py,sha256=fMbNgIJqxiZEaSBLadLBt4rZpCHqarzb4Okt-aWsp2E,22
|
|
3
3
|
mineru/backend/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
4
4
|
mineru/backend/pipeline/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
5
5
|
mineru/backend/pipeline/batch_analyze.py,sha256=rp9nHYmuBBytlJIc3oRwqTtgFd5mhRak5UMhQ4mu02Y,21896
|
|
@@ -9,25 +9,19 @@ mineru/backend/pipeline/model_list.py,sha256=62_61GQ9ZgS5YmNSpzRkts9kLtbSu8dYrfn
|
|
|
9
9
|
mineru/backend/pipeline/para_split.py,sha256=Kq95MmvkPm7rKxlCSGiTvVKyF7CErHI2eGGAs5sLl0Q,17119
|
|
10
10
|
mineru/backend/pipeline/pipeline_analyze.py,sha256=rbO5AetOdnxR5ctkoDzFCFoElkz7Jgb7gi2Ct596NK8,6655
|
|
11
11
|
mineru/backend/pipeline/pipeline_magic_model.py,sha256=w8jGx8f6yZN0Wf2yPP3L9rYKc9rogxreZCrUJzJvPO8,14974
|
|
12
|
-
mineru/backend/pipeline/pipeline_middle_json_mkcontent.py,sha256=
|
|
12
|
+
mineru/backend/pipeline/pipeline_middle_json_mkcontent.py,sha256=xWWOFmYL6hB8PLrxQFyRJ72dAmTIDHtqiWV-WFUfR44,14081
|
|
13
13
|
mineru/backend/vlm/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
14
|
-
mineru/backend/vlm/
|
|
15
|
-
mineru/backend/vlm/
|
|
16
|
-
mineru/backend/vlm/
|
|
17
|
-
mineru/backend/vlm/
|
|
18
|
-
mineru/backend/vlm/sglang_engine_predictor.py,sha256=gnAEaYKCx1_0PGuo7JRcOisMZmgruUucBvAhyrhHui8,8587
|
|
19
|
-
mineru/backend/vlm/token_to_middle_json.py,sha256=JDnXI2f7YgcGRJ5ZrZtZevlX6gdiUVAsfT70vByWIoo,9622
|
|
20
|
-
mineru/backend/vlm/utils.py,sha256=QwTEUpnMVVF30CiNAHJRN7n8ullqGYEH8TQyXy7my08,1361
|
|
21
|
-
mineru/backend/vlm/vlm_analyze.py,sha256=zc6BpduUmIp0ysHKLAlAUVnKA-xRYo_8Pw6QXlVQrko,3588
|
|
22
|
-
mineru/backend/vlm/vlm_magic_model.py,sha256=8e8EnpNZBKyPBLP1UPlRRVnbV9Sxd5u55OgaSdfpECQ,14256
|
|
23
|
-
mineru/backend/vlm/vlm_middle_json_mkcontent.py,sha256=AU0udnwLnxSYxFOhzSn3zG5j3-ic-zuAJAnR_RBrNck,11138
|
|
14
|
+
mineru/backend/vlm/model_output_to_middle_json.py,sha256=e4Yc98_Cth2cjVPybPGehD5cpjGcTka4D2qKKrP_qqo,5121
|
|
15
|
+
mineru/backend/vlm/vlm_analyze.py,sha256=Li5peZS4YTYl7WI0Zcz2v_wPcHSCsZnYSI-_lZZOsOo,7639
|
|
16
|
+
mineru/backend/vlm/vlm_magic_model.py,sha256=o1WKwgArV0f4lp7ufmSq9zRZaM5bnfOdx1AQtKnosro,16379
|
|
17
|
+
mineru/backend/vlm/vlm_middle_json_mkcontent.py,sha256=DX2TRpfSIqpuhWqou5QXNtCW40ddQi0kdQxXi4QgzKs,13375
|
|
24
18
|
mineru/cli/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
25
|
-
mineru/cli/client.py,sha256=
|
|
26
|
-
mineru/cli/common.py,sha256=
|
|
27
|
-
mineru/cli/fast_api.py,sha256
|
|
28
|
-
mineru/cli/gradio_app.py,sha256=
|
|
19
|
+
mineru/cli/client.py,sha256=uo7db9Wqj1Mc11MYuaM-bi54BfKKU3SFB9Urc8md5X4,6641
|
|
20
|
+
mineru/cli/common.py,sha256=yJPdrwSYVidl2hTJ2Hn2YhnfH97GJ-QZi20dGFz7h5c,14025
|
|
21
|
+
mineru/cli/fast_api.py,sha256=-GDT4gOCjKQrRjrx9WVJw-D-EC7Adv-F2rAiSWdl2CA,11328
|
|
22
|
+
mineru/cli/gradio_app.py,sha256=m1ppNVVOS-gdMpQPWOgHQCBQkLZ4B6gKCZuUyhfsR1g,13482
|
|
29
23
|
mineru/cli/models_download.py,sha256=TCKtzTRJ-ShaqZnRQID40QsILqp2b3basU142FMTmns,4775
|
|
30
|
-
mineru/cli/
|
|
24
|
+
mineru/cli/vlm_vllm_server.py,sha256=fQJyD-gIPQ41hR_6aIaDJczl66N310t0CiZEBAfX5mc,90
|
|
31
25
|
mineru/data/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
32
26
|
mineru/data/data_reader_writer/__init__.py,sha256=9qnGNrsuGBMwwfsQy6oChdkz--a_LPdYWE0VZZr0yr4,490
|
|
33
27
|
mineru/data/data_reader_writer/base.py,sha256=nqmAcdHOXMOJO6RAT3ILligDFaw8Op0STyCw5yOzAbI,1706
|
|
@@ -142,23 +136,15 @@ mineru/model/table/rec/slanet_plus/matcher_utils.py,sha256=9wt_ydeeViLd57bU6g3ln
|
|
|
142
136
|
mineru/model/table/rec/slanet_plus/table_structure.py,sha256=Ve9eUdA0ivHf5bf9gwvHHfb7-E7drJLP3S3MPlh3uZ0,3844
|
|
143
137
|
mineru/model/table/rec/slanet_plus/table_structure_utils.py,sha256=YYSkwN2WdLx7qkWMSGkPY7yXOH5ENVhg5CsRGhtZ5Wk,19281
|
|
144
138
|
mineru/model/table/rec/unet_table/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
145
|
-
mineru/model/table/rec/unet_table/main.py,sha256=
|
|
139
|
+
mineru/model/table/rec/unet_table/main.py,sha256=6O5g2Gjml_NBMWKBK1rGdwpcjGIi6JHtjIhxjsGhXIY,15545
|
|
146
140
|
mineru/model/table/rec/unet_table/table_recover.py,sha256=rSyeWyuP10M8dLKA5e0n4P2DXMYbVbmgLxEcdZA8_0E,9059
|
|
147
141
|
mineru/model/table/rec/unet_table/table_structure_unet.py,sha256=beBMmBHAOR2lAuf2rcOKRSbFaJqwuIgMJWxWQsFmIRI,7908
|
|
148
142
|
mineru/model/table/rec/unet_table/utils.py,sha256=CYAqJW0wePJk4NAemb8W203N7E32v0ujiWbxanDhd8I,16083
|
|
149
143
|
mineru/model/table/rec/unet_table/utils_table_line_rec.py,sha256=zrCdPwI4M8nu0FEfd7lRJAe0z8kYq3KFbzwElM82USE,11174
|
|
150
144
|
mineru/model/table/rec/unet_table/utils_table_recover.py,sha256=XksJsY82ZS0kqUnNT-jvaYzxJ3V3svMSzj0puwIau1k,10651
|
|
151
|
-
mineru/model/
|
|
152
|
-
mineru/model/
|
|
153
|
-
mineru/
|
|
154
|
-
mineru/model/vlm_hf_model/modeling_mineru2.py,sha256=hBqvzmVR3w1Bm6OhSRc0IvuC7AG5d0j73EXYsmw_2xA,20365
|
|
155
|
-
mineru/model/vlm_sglang_model/__init__.py,sha256=ykNvPdCw-vmSy2UySBGhRkpDfhqEQ4hZ3FGoMU-nhQ8,558
|
|
156
|
-
mineru/model/vlm_sglang_model/engine.py,sha256=E8FJBTclxQNDjnEluiz_QlambyPfo60nKm7ogUZFArg,10128
|
|
157
|
-
mineru/model/vlm_sglang_model/image_processor.py,sha256=lUmHWSNSPED0RhYWwl6-z4R3KePPz12oBmkXBj-MRO4,8561
|
|
158
|
-
mineru/model/vlm_sglang_model/logit_processor.py,sha256=TbNi-QEFXm9lTB94640_dNYZOthpnBcKJnPgwX2eSDs,3679
|
|
159
|
-
mineru/model/vlm_sglang_model/model.py,sha256=faA8UkUNbITvKL8afpMRe4ZtukgtUgaM_uljZVk-2M0,22813
|
|
160
|
-
mineru/model/vlm_sglang_model/server.py,sha256=UZIy61GvsqCtDfyQpMGTeiuJA2XiifmfbEfLIoWkqfg,2403
|
|
161
|
-
mineru/resources/header.html,sha256=Ebc1VPxwLdbcmNHhHFlcb1G_WCwB60i7rX1pec2FsJM,3919
|
|
145
|
+
mineru/model/vlm_vllm_model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
146
|
+
mineru/model/vlm_vllm_model/server.py,sha256=TLgiScQCTfuY3g5mkI43I_uxuSJJ2ItC19HDeypB7jI,1578
|
|
147
|
+
mineru/resources/header.html,sha256=NO8ZZdCYLqu_E72AtNcuRnA2NbFBamScjjGhtg9PKiM,4409
|
|
162
148
|
mineru/resources/fasttext-langdetect/lid.176.ftz,sha256=jzRyz-hzintgmejpmcPL-uDc0VaWqsfXc4qAOdtgPoM,938013
|
|
163
149
|
mineru/utils/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
164
150
|
mineru/utils/block_pre_proc.py,sha256=uGBmxf2MR9bplTnQI8xHjCI-kj3plRhJr0hcWKidbOQ,9632
|
|
@@ -167,9 +153,10 @@ mineru/utils/boxbase.py,sha256=moP660AmZq_udHEsfvFkTQdJ4gjrrBwN7t0Enx7CIL8,6903
|
|
|
167
153
|
mineru/utils/cli_parser.py,sha256=4seFAu1kulsYnw6WM2q_cxgEOt2tErZVkI-LNEF_kGw,1445
|
|
168
154
|
mineru/utils/config_reader.py,sha256=IRVWTpBnbnRpck6eXZUKw-fcLt7hon5S4uqWW-RBb1w,4075
|
|
169
155
|
mineru/utils/cut_image.py,sha256=g3m4nfcJNWlxi-P0kpXTtlmspXkMcLCfGwmYuQ-Z2hE,751
|
|
170
|
-
mineru/utils/draw_bbox.py,sha256=
|
|
171
|
-
mineru/utils/enum_class.py,sha256=
|
|
156
|
+
mineru/utils/draw_bbox.py,sha256=FkgppjUzRhN-uxvChdkhHXcDavJEaApMD6qC6qoRwfQ,20292
|
|
157
|
+
mineru/utils/enum_class.py,sha256=34lVsjeAYLha7Q-1qxY9seJFdK6fjuEphXfYFibghEY,2442
|
|
172
158
|
mineru/utils/format_utils.py,sha256=2s89vHcSISjuolk8Hvg3K-5-rRbiT3Us7eFLzUKrNKs,10233
|
|
159
|
+
mineru/utils/guess_suffix_or_lang.py,sha256=q7CbPpiaDFA2AmOS66oJ_HwzamEftNSsmOn5mbrtO9I,540
|
|
173
160
|
mineru/utils/hash_utils.py,sha256=UPS_8NRBmVumdyOv16Lmv6Ly2xK8OVDJEe5gG6gKIFk,857
|
|
174
161
|
mineru/utils/language.py,sha256=7RT3mxSa7jdpoC5ySd7ZddHA7TO7UsnmDOWiYZAxuyg,1433
|
|
175
162
|
mineru/utils/llm_aided.py,sha256=0W6AlBpLfflON1ob6p72IgwdCJKFXhYpDWlrhrToR5s,4892
|
|
@@ -182,12 +169,12 @@ mineru/utils/pdf_image_tools.py,sha256=mioLEHOdDtM1YbspNaa0wWhnLw_4-H7rdHlIM40vr
|
|
|
182
169
|
mineru/utils/pdf_reader.py,sha256=WeINm5SyWBUXT0wP9lzIbeHs8P6WUIkN6nVL5X4LzG4,3267
|
|
183
170
|
mineru/utils/pdf_text_tool.py,sha256=KEztjfdqsIHHuiTEAMAL7Lr1OS3R7Ur-uTqGiCRjReQ,1364
|
|
184
171
|
mineru/utils/run_async.py,sha256=rPeP4BCZerR8VByRDhiYzfZiahLVqoZEBVAS54dAjNg,1286
|
|
185
|
-
mineru/utils/span_block_fix.py,sha256=
|
|
172
|
+
mineru/utils/span_block_fix.py,sha256=0eVQjJCrT03woRt9hoh6Uu42Tp1dacfGTv2x3B9qq94,8797
|
|
186
173
|
mineru/utils/span_pre_proc.py,sha256=h41q2uQajI0xQbc_30hqaju1dv3oVYxBAlKgURl8HIc,13692
|
|
187
174
|
mineru/utils/table_merge.py,sha256=zYUpYLrfhBCnbHCYZi6rG8-s38NDnTbiNTObvLdYwJk,11494
|
|
188
|
-
mineru-2.
|
|
189
|
-
mineru-2.
|
|
190
|
-
mineru-2.
|
|
191
|
-
mineru-2.
|
|
192
|
-
mineru-2.
|
|
193
|
-
mineru-2.
|
|
175
|
+
mineru-2.5.0.dist-info/licenses/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
|
|
176
|
+
mineru-2.5.0.dist-info/METADATA,sha256=m404n7vO2GrWJbpksXY_tMCKxbjAux71JRg2tby2Gw0,64460
|
|
177
|
+
mineru-2.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
178
|
+
mineru-2.5.0.dist-info/entry_points.txt,sha256=luXmbhPiZK_tKlRgWuYOaW_V6EFpG-yJcAevVv9MEqE,252
|
|
179
|
+
mineru-2.5.0.dist-info/top_level.txt,sha256=zuGQfZcbsHv4I4oKI9gaKPqEWBFm6xJroKuug2LnKP8,7
|
|
180
|
+
mineru-2.5.0.dist-info/RECORD,,
|
|
@@ -3,4 +3,4 @@ mineru = mineru.cli:client.main
|
|
|
3
3
|
mineru-api = mineru.cli.fast_api:main
|
|
4
4
|
mineru-gradio = mineru.cli.gradio_app:main
|
|
5
5
|
mineru-models-download = mineru.cli.models_download:download_models
|
|
6
|
-
mineru-
|
|
6
|
+
mineru-vllm-server = mineru.cli.vlm_vllm_server:main
|
|
@@ -1,186 +0,0 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
from abc import ABC, abstractmethod
|
|
3
|
-
from typing import AsyncIterable, Iterable, List, Optional, Union
|
|
4
|
-
|
|
5
|
-
DEFAULT_SYSTEM_PROMPT = (
|
|
6
|
-
"A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers."
|
|
7
|
-
)
|
|
8
|
-
DEFAULT_USER_PROMPT = "Document Parsing:"
|
|
9
|
-
DEFAULT_TEMPERATURE = 0.0
|
|
10
|
-
DEFAULT_TOP_P = 0.8
|
|
11
|
-
DEFAULT_TOP_K = 20
|
|
12
|
-
DEFAULT_REPETITION_PENALTY = 1.0
|
|
13
|
-
DEFAULT_PRESENCE_PENALTY = 0.0
|
|
14
|
-
DEFAULT_NO_REPEAT_NGRAM_SIZE = 100
|
|
15
|
-
DEFAULT_MAX_NEW_TOKENS = 16384
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
class BasePredictor(ABC):
|
|
19
|
-
system_prompt = DEFAULT_SYSTEM_PROMPT
|
|
20
|
-
|
|
21
|
-
def __init__(
|
|
22
|
-
self,
|
|
23
|
-
temperature: float = DEFAULT_TEMPERATURE,
|
|
24
|
-
top_p: float = DEFAULT_TOP_P,
|
|
25
|
-
top_k: int = DEFAULT_TOP_K,
|
|
26
|
-
repetition_penalty: float = DEFAULT_REPETITION_PENALTY,
|
|
27
|
-
presence_penalty: float = DEFAULT_PRESENCE_PENALTY,
|
|
28
|
-
no_repeat_ngram_size: int = DEFAULT_NO_REPEAT_NGRAM_SIZE,
|
|
29
|
-
max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS,
|
|
30
|
-
) -> None:
|
|
31
|
-
self.temperature = temperature
|
|
32
|
-
self.top_p = top_p
|
|
33
|
-
self.top_k = top_k
|
|
34
|
-
self.repetition_penalty = repetition_penalty
|
|
35
|
-
self.presence_penalty = presence_penalty
|
|
36
|
-
self.no_repeat_ngram_size = no_repeat_ngram_size
|
|
37
|
-
self.max_new_tokens = max_new_tokens
|
|
38
|
-
|
|
39
|
-
@abstractmethod
|
|
40
|
-
def predict(
|
|
41
|
-
self,
|
|
42
|
-
image: str | bytes,
|
|
43
|
-
prompt: str = "",
|
|
44
|
-
temperature: Optional[float] = None,
|
|
45
|
-
top_p: Optional[float] = None,
|
|
46
|
-
top_k: Optional[int] = None,
|
|
47
|
-
repetition_penalty: Optional[float] = None,
|
|
48
|
-
presence_penalty: Optional[float] = None,
|
|
49
|
-
no_repeat_ngram_size: Optional[int] = None,
|
|
50
|
-
max_new_tokens: Optional[int] = None,
|
|
51
|
-
) -> str: ...
|
|
52
|
-
|
|
53
|
-
@abstractmethod
|
|
54
|
-
def batch_predict(
|
|
55
|
-
self,
|
|
56
|
-
images: List[str] | List[bytes],
|
|
57
|
-
prompts: Union[List[str], str] = "",
|
|
58
|
-
temperature: Optional[float] = None,
|
|
59
|
-
top_p: Optional[float] = None,
|
|
60
|
-
top_k: Optional[int] = None,
|
|
61
|
-
repetition_penalty: Optional[float] = None,
|
|
62
|
-
presence_penalty: Optional[float] = None,
|
|
63
|
-
no_repeat_ngram_size: Optional[int] = None,
|
|
64
|
-
max_new_tokens: Optional[int] = None,
|
|
65
|
-
) -> List[str]: ...
|
|
66
|
-
|
|
67
|
-
@abstractmethod
|
|
68
|
-
def stream_predict(
|
|
69
|
-
self,
|
|
70
|
-
image: str | bytes,
|
|
71
|
-
prompt: str = "",
|
|
72
|
-
temperature: Optional[float] = None,
|
|
73
|
-
top_p: Optional[float] = None,
|
|
74
|
-
top_k: Optional[int] = None,
|
|
75
|
-
repetition_penalty: Optional[float] = None,
|
|
76
|
-
presence_penalty: Optional[float] = None,
|
|
77
|
-
no_repeat_ngram_size: Optional[int] = None,
|
|
78
|
-
max_new_tokens: Optional[int] = None,
|
|
79
|
-
) -> Iterable[str]: ...
|
|
80
|
-
|
|
81
|
-
async def aio_predict(
|
|
82
|
-
self,
|
|
83
|
-
image: str | bytes,
|
|
84
|
-
prompt: str = "",
|
|
85
|
-
temperature: Optional[float] = None,
|
|
86
|
-
top_p: Optional[float] = None,
|
|
87
|
-
top_k: Optional[int] = None,
|
|
88
|
-
repetition_penalty: Optional[float] = None,
|
|
89
|
-
presence_penalty: Optional[float] = None,
|
|
90
|
-
no_repeat_ngram_size: Optional[int] = None,
|
|
91
|
-
max_new_tokens: Optional[int] = None,
|
|
92
|
-
) -> str:
|
|
93
|
-
return await asyncio.to_thread(
|
|
94
|
-
self.predict,
|
|
95
|
-
image,
|
|
96
|
-
prompt,
|
|
97
|
-
temperature,
|
|
98
|
-
top_p,
|
|
99
|
-
top_k,
|
|
100
|
-
repetition_penalty,
|
|
101
|
-
presence_penalty,
|
|
102
|
-
no_repeat_ngram_size,
|
|
103
|
-
max_new_tokens,
|
|
104
|
-
)
|
|
105
|
-
|
|
106
|
-
async def aio_batch_predict(
|
|
107
|
-
self,
|
|
108
|
-
images: List[str] | List[bytes],
|
|
109
|
-
prompts: Union[List[str], str] = "",
|
|
110
|
-
temperature: Optional[float] = None,
|
|
111
|
-
top_p: Optional[float] = None,
|
|
112
|
-
top_k: Optional[int] = None,
|
|
113
|
-
repetition_penalty: Optional[float] = None,
|
|
114
|
-
presence_penalty: Optional[float] = None,
|
|
115
|
-
no_repeat_ngram_size: Optional[int] = None,
|
|
116
|
-
max_new_tokens: Optional[int] = None,
|
|
117
|
-
) -> List[str]:
|
|
118
|
-
return await asyncio.to_thread(
|
|
119
|
-
self.batch_predict,
|
|
120
|
-
images,
|
|
121
|
-
prompts,
|
|
122
|
-
temperature,
|
|
123
|
-
top_p,
|
|
124
|
-
top_k,
|
|
125
|
-
repetition_penalty,
|
|
126
|
-
presence_penalty,
|
|
127
|
-
no_repeat_ngram_size,
|
|
128
|
-
max_new_tokens,
|
|
129
|
-
)
|
|
130
|
-
|
|
131
|
-
async def aio_stream_predict(
|
|
132
|
-
self,
|
|
133
|
-
image: str | bytes,
|
|
134
|
-
prompt: str = "",
|
|
135
|
-
temperature: Optional[float] = None,
|
|
136
|
-
top_p: Optional[float] = None,
|
|
137
|
-
top_k: Optional[int] = None,
|
|
138
|
-
repetition_penalty: Optional[float] = None,
|
|
139
|
-
presence_penalty: Optional[float] = None,
|
|
140
|
-
no_repeat_ngram_size: Optional[int] = None,
|
|
141
|
-
max_new_tokens: Optional[int] = None,
|
|
142
|
-
) -> AsyncIterable[str]:
|
|
143
|
-
queue = asyncio.Queue()
|
|
144
|
-
loop = asyncio.get_running_loop()
|
|
145
|
-
|
|
146
|
-
def synced_predict():
|
|
147
|
-
for chunk in self.stream_predict(
|
|
148
|
-
image=image,
|
|
149
|
-
prompt=prompt,
|
|
150
|
-
temperature=temperature,
|
|
151
|
-
top_p=top_p,
|
|
152
|
-
top_k=top_k,
|
|
153
|
-
repetition_penalty=repetition_penalty,
|
|
154
|
-
presence_penalty=presence_penalty,
|
|
155
|
-
no_repeat_ngram_size=no_repeat_ngram_size,
|
|
156
|
-
max_new_tokens=max_new_tokens,
|
|
157
|
-
):
|
|
158
|
-
asyncio.run_coroutine_threadsafe(queue.put(chunk), loop)
|
|
159
|
-
asyncio.run_coroutine_threadsafe(queue.put(None), loop)
|
|
160
|
-
|
|
161
|
-
asyncio.create_task(
|
|
162
|
-
asyncio.to_thread(synced_predict),
|
|
163
|
-
)
|
|
164
|
-
|
|
165
|
-
while True:
|
|
166
|
-
chunk = await queue.get()
|
|
167
|
-
if chunk is None:
|
|
168
|
-
return
|
|
169
|
-
assert isinstance(chunk, str)
|
|
170
|
-
yield chunk
|
|
171
|
-
|
|
172
|
-
def build_prompt(self, prompt: str) -> str:
|
|
173
|
-
if prompt.startswith("<|im_start|>"):
|
|
174
|
-
return prompt
|
|
175
|
-
if not prompt:
|
|
176
|
-
prompt = DEFAULT_USER_PROMPT
|
|
177
|
-
|
|
178
|
-
return f"<|im_start|>system\n{self.system_prompt}<|im_end|><|im_start|>user\n<image>\n{prompt}<|im_end|><|im_start|>assistant\n"
|
|
179
|
-
# Modify here. We add <|box_start|> at the end of the prompt to force the model to generate bounding box.
|
|
180
|
-
# if "Document OCR" in prompt:
|
|
181
|
-
# return f"<|im_start|>system\n{self.system_prompt}<|im_end|><|im_start|>user\n<image>\n{prompt}<|im_end|><|im_start|>assistant\n<|box_start|>"
|
|
182
|
-
# else:
|
|
183
|
-
# return f"<|im_start|>system\n{self.system_prompt}<|im_end|><|im_start|>user\n<image>\n{prompt}<|im_end|><|im_start|>assistant\n"
|
|
184
|
-
|
|
185
|
-
def close(self):
|
|
186
|
-
pass
|