mineru 2.2.2__py3-none-any.whl → 2.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. mineru/backend/pipeline/pipeline_middle_json_mkcontent.py +3 -3
  2. mineru/backend/vlm/model_output_to_middle_json.py +123 -0
  3. mineru/backend/vlm/vlm_analyze.py +105 -16
  4. mineru/backend/vlm/vlm_magic_model.py +201 -135
  5. mineru/backend/vlm/vlm_middle_json_mkcontent.py +52 -11
  6. mineru/cli/client.py +6 -5
  7. mineru/cli/common.py +17 -16
  8. mineru/cli/fast_api.py +9 -7
  9. mineru/cli/gradio_app.py +15 -16
  10. mineru/cli/vlm_vllm_server.py +4 -0
  11. mineru/model/table/rec/unet_table/main.py +8 -0
  12. mineru/model/vlm_vllm_model/__init__.py +0 -0
  13. mineru/model/vlm_vllm_model/server.py +59 -0
  14. mineru/resources/header.html +10 -2
  15. mineru/utils/draw_bbox.py +32 -10
  16. mineru/utils/enum_class.py +16 -2
  17. mineru/utils/guess_suffix_or_lang.py +20 -0
  18. mineru/utils/span_block_fix.py +4 -2
  19. mineru/version.py +1 -1
  20. {mineru-2.2.2.dist-info → mineru-2.5.1.dist-info}/METADATA +70 -25
  21. {mineru-2.2.2.dist-info → mineru-2.5.1.dist-info}/RECORD +25 -38
  22. {mineru-2.2.2.dist-info → mineru-2.5.1.dist-info}/entry_points.txt +1 -1
  23. mineru/backend/vlm/base_predictor.py +0 -186
  24. mineru/backend/vlm/hf_predictor.py +0 -217
  25. mineru/backend/vlm/predictor.py +0 -111
  26. mineru/backend/vlm/sglang_client_predictor.py +0 -443
  27. mineru/backend/vlm/sglang_engine_predictor.py +0 -246
  28. mineru/backend/vlm/token_to_middle_json.py +0 -122
  29. mineru/backend/vlm/utils.py +0 -40
  30. mineru/cli/vlm_sglang_server.py +0 -4
  31. mineru/model/vlm_hf_model/__init__.py +0 -9
  32. mineru/model/vlm_hf_model/configuration_mineru2.py +0 -38
  33. mineru/model/vlm_hf_model/image_processing_mineru2.py +0 -269
  34. mineru/model/vlm_hf_model/modeling_mineru2.py +0 -449
  35. mineru/model/vlm_sglang_model/__init__.py +0 -14
  36. mineru/model/vlm_sglang_model/engine.py +0 -264
  37. mineru/model/vlm_sglang_model/image_processor.py +0 -213
  38. mineru/model/vlm_sglang_model/logit_processor.py +0 -90
  39. mineru/model/vlm_sglang_model/model.py +0 -453
  40. mineru/model/vlm_sglang_model/server.py +0 -75
  41. {mineru-2.2.2.dist-info → mineru-2.5.1.dist-info}/WHEEL +0 -0
  42. {mineru-2.2.2.dist-info → mineru-2.5.1.dist-info}/licenses/LICENSE.md +0 -0
  43. {mineru-2.2.2.dist-info → mineru-2.5.1.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mineru
3
- Version: 2.2.2
3
+ Version: 2.5.1
4
4
  Summary: A practical tool for converting PDF to Markdown
5
5
  License: AGPL-3.0
6
6
  Project-URL: homepage, https://mineru.net/
@@ -36,6 +36,8 @@ Requires-Dist: fast-langdetect<0.3.0,>=0.2.3
36
36
  Requires-Dist: scikit-image<1.0.0,>=0.25.0
37
37
  Requires-Dist: openai<2,>=1.70.0
38
38
  Requires-Dist: beautifulsoup4<5,>=4.13.5
39
+ Requires-Dist: magika<0.7.0,>=0.6.2
40
+ Requires-Dist: mineru-vl-utils<1,>=0.1.7
39
41
  Provides-Extra: test
40
42
  Requires-Dist: mineru[core]; extra == "test"
41
43
  Requires-Dist: pytest; extra == "test"
@@ -43,12 +45,11 @@ Requires-Dist: pytest-cov; extra == "test"
43
45
  Requires-Dist: coverage; extra == "test"
44
46
  Requires-Dist: fuzzywuzzy; extra == "test"
45
47
  Provides-Extra: vlm
46
- Requires-Dist: transformers>=4.51.1; extra == "vlm"
47
- Requires-Dist: torch>=2.6.0; extra == "vlm"
48
+ Requires-Dist: torch<2.8.0,>=2.6.0; extra == "vlm"
49
+ Requires-Dist: transformers<5.0.0,>=4.51.1; extra == "vlm"
48
50
  Requires-Dist: accelerate>=1.5.1; extra == "vlm"
49
- Requires-Dist: pydantic; extra == "vlm"
50
- Provides-Extra: sglang
51
- Requires-Dist: sglang[all]<0.4.11,>=0.4.7; extra == "sglang"
51
+ Provides-Extra: vllm
52
+ Requires-Dist: vllm==0.10.1.1; extra == "vllm"
52
53
  Provides-Extra: pipeline
53
54
  Requires-Dist: matplotlib<4,>=3.10; extra == "pipeline"
54
55
  Requires-Dist: ultralytics<9,>=8.3.48; extra == "pipeline"
@@ -77,7 +78,7 @@ Requires-Dist: mineru[api]; extra == "core"
77
78
  Requires-Dist: mineru[gradio]; extra == "core"
78
79
  Provides-Extra: all
79
80
  Requires-Dist: mineru[core]; extra == "all"
80
- Requires-Dist: mineru[sglang]; extra == "all"
81
+ Requires-Dist: mineru[vllm]; extra == "all"
81
82
  Dynamic: license-file
82
83
 
83
84
  <div align="center" xmlns="http://www.w3.org/1999/html">
@@ -126,25 +127,67 @@ Dynamic: license-file
126
127
 
127
128
  # Changelog
128
129
 
129
- - 2025/09/10 2.2.2 Released
130
- - Fixed the issue where the new table recognition model would affect the overall parsing task when some table parsing failed
130
+ - 2025/09/19 2.5.1 Released
131
131
 
132
- - 2025/09/08 2.2.1 Released
133
- - Fixed the issue where some newly added models were not downloaded when using the model download command.
132
+ We are officially releasing MinerU2.5, currently the most powerful multimodal large model for document parsing.
133
+ With only 1.2B parameters, MinerU2.5's accuracy on the OmniDocBench benchmark comprehensively surpasses top-tier multimodal models like Gemini 2.5 Pro, GPT-4o, and Qwen2.5-VL-72B. It also significantly outperforms leading specialized models such as dots.ocr, MonkeyOCR, and PP-StructureV3.
134
+ The model has been released on [HuggingFace](https://huggingface.co/opendatalab/MinerU2.5-2509-1.2B) and [ModelScope](https://modelscope.cn/models/opendatalab/MinerU2.5-2509-1.2B) platforms. Welcome to download and use!
135
+ - Core Highlights:
136
+ - SOTA Performance with Extreme Efficiency: As a 1.2B model, it achieves State-of-the-Art (SOTA) results that exceed models in the 10B and 100B+ classes, redefining the performance-per-parameter standard in document AI.
137
+ - Advanced Architecture for Across-the-Board Leadership: By combining a two-stage inference pipeline (decoupling layout analysis from content recognition) with a native high-resolution architecture, it achieves SOTA performance across five key areas: layout analysis, text recognition, formula recognition, table recognition, and reading order.
138
+ - Key Capability Enhancements:
139
+ - Layout Detection: Delivers more complete results by accurately covering non-body content like headers, footers, and page numbers. It also provides more precise element localization and natural format reconstruction for lists and references.
140
+ - Table Parsing: Drastically improves parsing for challenging cases, including rotated tables, borderless/semi-structured tables, and long/complex tables.
141
+ - Formula Recognition: Significantly boosts accuracy for complex, long-form, and hybrid Chinese-English formulas, greatly enhancing the parsing capability for mathematical documents.
134
142
 
135
- - 2025/09/05 2.2.0 Released
136
- - Major Updates
137
- - In this version, we focused on improving table parsing accuracy by introducing a new [wired table recognition model](https://github.com/RapidAI/TableStructureRec) and a brand-new hybrid table structure parsing algorithm, significantly enhancing the table recognition capabilities of the `pipeline` backend.
138
- - We also added support for cross-page table merging, which is supported by both `pipeline` and `vlm` backends, further improving the completeness and accuracy of table parsing.
139
- - Other Updates
140
- - The `pipeline` backend now supports 270-degree rotated table parsing, bringing support for table parsing in 0/90/270-degree orientations
141
- - `pipeline` added OCR capability support for Thai and Greek, and updated the English OCR model to the latest version. English recognition accuracy improved by 11%, Thai recognition model accuracy is 82.68%, and Greek recognition model accuracy is 89.28% (by PPOCRv5)
142
- - Added `bbox` field (mapped to 0-1000 range) in the output `content_list.json`, making it convenient for users to directly obtain position information for each content block
143
- - Removed the `pipeline_old_linux` installation option, no longer supporting legacy Linux systems such as `CentOS 7`, to provide better support for `uv`'s `sync`/`run` commands
143
+ Additionally, with the release of vlm 2.5, we have made some adjustments to the repository:
144
+ - The vlm backend has been upgraded to version 2.5, supporting the MinerU2.5 model and no longer compatible with the MinerU2.0-2505-0.9B model. The last version supporting the 2.0 model is mineru-2.2.2.
145
+ - VLM inference-related code has been moved to [mineru_vl_utils](https://github.com/opendatalab/mineru-vl-utils), reducing coupling with the main mineru repository and facilitating independent iteration in the future.
146
+ - The vlm accelerated inference framework has been switched from `sglang` to `vllm`, achieving full compatibility with the vllm ecosystem, allowing users to use the MinerU2.5 model and accelerated inference on any platform that supports the vllm framework.
147
+ - Due to major upgrades in the vlm model supporting more layout types, we have made some adjustments to the structure of the parsing intermediate file `middle.json` and result file `content_list.json`. Please refer to the [documentation](https://opendatalab.github.io/MinerU/reference/output_files/) for details.
148
+
149
+ Other repository optimizations:
150
+ - Removed file extension whitelist validation for input files. When input files are PDF documents or images, there are no longer requirements for file extensions, improving usability.
144
151
 
145
152
  <details>
146
153
  <summary>History Log</summary>
147
154
 
155
+ <details>
156
+ <summary>2025/09/10 2.2.2 Released</summary>
157
+ <ul>
158
+ <li>Fixed the issue where the new table recognition model would affect the overall parsing task when some table parsing failed</li>
159
+ </ul>
160
+ </details>
161
+
162
+ <details>
163
+ <summary>2025/09/08 2.2.1 Released</summary>
164
+ <ul>
165
+ <li>Fixed the issue where some newly added models were not downloaded when using the model download command.</li>
166
+ </ul>
167
+ </details>
168
+
169
+ <details>
170
+ <summary>2025/09/05 2.2.0 Released</summary>
171
+ <ul>
172
+ <li>
173
+ Major Updates
174
+ <ul>
175
+ <li>In this version, we focused on improving table parsing accuracy by introducing a new <a href="https://github.com/RapidAI/TableStructureRec">wired table recognition model</a> and a brand-new hybrid table structure parsing algorithm, significantly enhancing the table recognition capabilities of the <code>pipeline</code> backend.</li>
176
+ <li>We also added support for cross-page table merging, which is supported by both <code>pipeline</code> and <code>vlm</code> backends, further improving the completeness and accuracy of table parsing.</li>
177
+ </ul>
178
+ </li>
179
+ <li>
180
+ Other Updates
181
+ <ul>
182
+ <li>The <code>pipeline</code> backend now supports 270-degree rotated table parsing, bringing support for table parsing in 0/90/270-degree orientations</li>
183
+ <li><code>pipeline</code> added OCR capability support for Thai and Greek, and updated the English OCR model to the latest version. English recognition accuracy improved by 11%, Thai recognition model accuracy is 82.68%, and Greek recognition model accuracy is 89.28% (by PPOCRv5)</li>
184
+ <li>Added <code>bbox</code> field (mapped to 0-1000 range) in the output <code>content_list.json</code>, making it convenient for users to directly obtain position information for each content block</li>
185
+ <li>Removed the <code>pipeline_old_linux</code> installation option, no longer supporting legacy Linux systems such as <code>CentOS 7</code>, to provide better support for <code>uv</code>'s <code>sync</code>/<code>run</code> commands</li>
186
+ </ul>
187
+ </li>
188
+ </ul>
189
+ </details>
190
+
148
191
  <details>
149
192
  <summary>2025/08/01 2.1.10 Released</summary>
150
193
  <ul>
@@ -641,7 +684,7 @@ A WebUI developed based on Gradio, with a simple interface and only core parsing
641
684
  <td>Parsing Backend</td>
642
685
  <td>pipeline</td>
643
686
  <td>vlm-transformers</td>
644
- <td>vlm-sglang</td>
687
+ <td>vlm-vllm</td>
645
688
  </tr>
646
689
  <tr>
647
690
  <td>Operating System</td>
@@ -690,8 +733,8 @@ uv pip install -e .[core]
690
733
  ```
691
734
 
692
735
  > [!TIP]
693
- > `mineru[core]` includes all core features except `sglang` acceleration, compatible with Windows / Linux / macOS systems, suitable for most users.
694
- > If you need to use `sglang` acceleration for VLM model inference or install a lightweight client on edge devices, please refer to the documentation [Extension Modules Installation Guide](https://opendatalab.github.io/MinerU/quick_start/extension_modules/).
736
+ > `mineru[core]` includes all core features except `vLLM` acceleration, compatible with Windows / Linux / macOS systems, suitable for most users.
737
+ > If you need to use `vLLM` acceleration for VLM model inference or install a lightweight client on edge devices, please refer to the documentation [Extension Modules Installation Guide](https://opendatalab.github.io/MinerU/quick_start/extension_modules/).
695
738
 
696
739
  ---
697
740
 
@@ -719,8 +762,8 @@ You can use MinerU for PDF parsing through various methods such as command line,
719
762
  - [x] Handwritten Text Recognition
720
763
  - [x] Vertical Text Recognition
721
764
  - [x] Latin Accent Mark Recognition
722
- - [ ] Code block recognition in the main text
723
- - [ ] [Chemical formula recognition](docs/chemical_knowledge_introduction/introduction.pdf)
765
+ - [x] Code block recognition in the main text
766
+ - [x] [Chemical formula recognition](docs/chemical_knowledge_introduction/introduction.pdf)(mineru.net)
724
767
  - [ ] Geometric shape recognition
725
768
 
726
769
  # Known Issues
@@ -768,6 +811,7 @@ Currently, some models in this project are trained based on YOLO. However, since
768
811
  - [pdftext](https://github.com/datalab-to/pdftext)
769
812
  - [pdfminer.six](https://github.com/pdfminer/pdfminer.six)
770
813
  - [pypdf](https://github.com/py-pdf/pypdf)
814
+ - [magika](https://github.com/google/magika)
771
815
 
772
816
  # Citation
773
817
 
@@ -810,3 +854,4 @@ Currently, some models in this project are trained based on YOLO. However, since
810
854
  - [OmniDocBench (A Comprehensive Benchmark for Document Parsing and Evaluation)](https://github.com/opendatalab/OmniDocBench)
811
855
  - [Magic-HTML (Mixed web page extraction tool)](https://github.com/opendatalab/magic-html)
812
856
  - [Magic-Doc (Fast speed ppt/pptx/doc/docx/pdf extraction tool)](https://github.com/InternLM/magic-doc)
857
+ - [Dingo: A Comprehensive AI Data Quality Evaluation Tool](https://github.com/MigoXLab/dingo)
@@ -1,5 +1,5 @@
1
1
  mineru/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
2
- mineru/version.py,sha256=toAYzE_ok1SiBE0AqAVdW0O8YCXCwcx0w4JATYQuJOg,22
2
+ mineru/version.py,sha256=PfQ9ThOuZlUZhThya-_PpR02LjazRR6LNSivpta03mM,22
3
3
  mineru/backend/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
4
4
  mineru/backend/pipeline/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
5
5
  mineru/backend/pipeline/batch_analyze.py,sha256=rp9nHYmuBBytlJIc3oRwqTtgFd5mhRak5UMhQ4mu02Y,21896
@@ -9,25 +9,19 @@ mineru/backend/pipeline/model_list.py,sha256=62_61GQ9ZgS5YmNSpzRkts9kLtbSu8dYrfn
9
9
  mineru/backend/pipeline/para_split.py,sha256=Kq95MmvkPm7rKxlCSGiTvVKyF7CErHI2eGGAs5sLl0Q,17119
10
10
  mineru/backend/pipeline/pipeline_analyze.py,sha256=rbO5AetOdnxR5ctkoDzFCFoElkz7Jgb7gi2Ct596NK8,6655
11
11
  mineru/backend/pipeline/pipeline_magic_model.py,sha256=w8jGx8f6yZN0Wf2yPP3L9rYKc9rogxreZCrUJzJvPO8,14974
12
- mineru/backend/pipeline/pipeline_middle_json_mkcontent.py,sha256=KlNdxI6-zij_NRzdD11UaAdQNtIo82FCYfeDRzgBeIk,14084
12
+ mineru/backend/pipeline/pipeline_middle_json_mkcontent.py,sha256=xWWOFmYL6hB8PLrxQFyRJ72dAmTIDHtqiWV-WFUfR44,14081
13
13
  mineru/backend/vlm/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
14
- mineru/backend/vlm/base_predictor.py,sha256=1QhREwTLNrQ2zQRRfJuyyb0PPoJRAi9JDkgBEBZpZ6U,6417
15
- mineru/backend/vlm/hf_predictor.py,sha256=oQE-vkTq1Yu7uFPBAwb-I8FWkyoxfYakOjRjAnnbc_M,7555
16
- mineru/backend/vlm/predictor.py,sha256=KWvyGB26IVwunf5d_HQao12LUAOLtMr9c48WfOoXbzU,3925
17
- mineru/backend/vlm/sglang_client_predictor.py,sha256=pXY1tUoV88dhtxKDkGUKJbEsaLPC_TmSwT3fcAD6GyU,15871
18
- mineru/backend/vlm/sglang_engine_predictor.py,sha256=gnAEaYKCx1_0PGuo7JRcOisMZmgruUucBvAhyrhHui8,8587
19
- mineru/backend/vlm/token_to_middle_json.py,sha256=JDnXI2f7YgcGRJ5ZrZtZevlX6gdiUVAsfT70vByWIoo,9622
20
- mineru/backend/vlm/utils.py,sha256=QwTEUpnMVVF30CiNAHJRN7n8ullqGYEH8TQyXy7my08,1361
21
- mineru/backend/vlm/vlm_analyze.py,sha256=zc6BpduUmIp0ysHKLAlAUVnKA-xRYo_8Pw6QXlVQrko,3588
22
- mineru/backend/vlm/vlm_magic_model.py,sha256=8e8EnpNZBKyPBLP1UPlRRVnbV9Sxd5u55OgaSdfpECQ,14256
23
- mineru/backend/vlm/vlm_middle_json_mkcontent.py,sha256=AU0udnwLnxSYxFOhzSn3zG5j3-ic-zuAJAnR_RBrNck,11138
14
+ mineru/backend/vlm/model_output_to_middle_json.py,sha256=e4Yc98_Cth2cjVPybPGehD5cpjGcTka4D2qKKrP_qqo,5121
15
+ mineru/backend/vlm/vlm_analyze.py,sha256=jR5DTHT0qtOLVos5-qSGPvlRCdYREoAshFej98FY5ao,8282
16
+ mineru/backend/vlm/vlm_magic_model.py,sha256=o1WKwgArV0f4lp7ufmSq9zRZaM5bnfOdx1AQtKnosro,16379
17
+ mineru/backend/vlm/vlm_middle_json_mkcontent.py,sha256=DX2TRpfSIqpuhWqou5QXNtCW40ddQi0kdQxXi4QgzKs,13375
24
18
  mineru/cli/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
25
- mineru/cli/client.py,sha256=3F3d59YJZiVgTmI1B8vrZZ1icftsC5Tk8So7hkAkSqs,6565
26
- mineru/cli/common.py,sha256=63qK2NyAhzT9b0_YN3jbEKeAH7UjOlQnuagZqpinLuU,13812
27
- mineru/cli/fast_api.py,sha256=yGqzML1KZJc-YQnbjZVxVgPswe2VD7EoGfdHVkr0bbA,11256
28
- mineru/cli/gradio_app.py,sha256=ckwyzAAZVLRsyMEEFcVnmvoCKf0O3Y1RYi5NQI0JSSU,13458
19
+ mineru/cli/client.py,sha256=uo7db9Wqj1Mc11MYuaM-bi54BfKKU3SFB9Urc8md5X4,6641
20
+ mineru/cli/common.py,sha256=yJPdrwSYVidl2hTJ2Hn2YhnfH97GJ-QZi20dGFz7h5c,14025
21
+ mineru/cli/fast_api.py,sha256=-GDT4gOCjKQrRjrx9WVJw-D-EC7Adv-F2rAiSWdl2CA,11328
22
+ mineru/cli/gradio_app.py,sha256=m1ppNVVOS-gdMpQPWOgHQCBQkLZ4B6gKCZuUyhfsR1g,13482
29
23
  mineru/cli/models_download.py,sha256=TCKtzTRJ-ShaqZnRQID40QsILqp2b3basU142FMTmns,4775
30
- mineru/cli/vlm_sglang_server.py,sha256=EyRL0dt0-SvnG5lzyWB8OuVHHbc1MdeqSszNUk03Y6U,88
24
+ mineru/cli/vlm_vllm_server.py,sha256=fQJyD-gIPQ41hR_6aIaDJczl66N310t0CiZEBAfX5mc,90
31
25
  mineru/data/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
32
26
  mineru/data/data_reader_writer/__init__.py,sha256=9qnGNrsuGBMwwfsQy6oChdkz--a_LPdYWE0VZZr0yr4,490
33
27
  mineru/data/data_reader_writer/base.py,sha256=nqmAcdHOXMOJO6RAT3ILligDFaw8Op0STyCw5yOzAbI,1706
@@ -142,23 +136,15 @@ mineru/model/table/rec/slanet_plus/matcher_utils.py,sha256=9wt_ydeeViLd57bU6g3ln
142
136
  mineru/model/table/rec/slanet_plus/table_structure.py,sha256=Ve9eUdA0ivHf5bf9gwvHHfb7-E7drJLP3S3MPlh3uZ0,3844
143
137
  mineru/model/table/rec/slanet_plus/table_structure_utils.py,sha256=YYSkwN2WdLx7qkWMSGkPY7yXOH5ENVhg5CsRGhtZ5Wk,19281
144
138
  mineru/model/table/rec/unet_table/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
145
- mineru/model/table/rec/unet_table/main.py,sha256=dHJodOnvl2dk7he3QhZpJy19rhxtBxIAEiOlAKx3dXY,15199
139
+ mineru/model/table/rec/unet_table/main.py,sha256=6O5g2Gjml_NBMWKBK1rGdwpcjGIi6JHtjIhxjsGhXIY,15545
146
140
  mineru/model/table/rec/unet_table/table_recover.py,sha256=rSyeWyuP10M8dLKA5e0n4P2DXMYbVbmgLxEcdZA8_0E,9059
147
141
  mineru/model/table/rec/unet_table/table_structure_unet.py,sha256=beBMmBHAOR2lAuf2rcOKRSbFaJqwuIgMJWxWQsFmIRI,7908
148
142
  mineru/model/table/rec/unet_table/utils.py,sha256=CYAqJW0wePJk4NAemb8W203N7E32v0ujiWbxanDhd8I,16083
149
143
  mineru/model/table/rec/unet_table/utils_table_line_rec.py,sha256=zrCdPwI4M8nu0FEfd7lRJAe0z8kYq3KFbzwElM82USE,11174
150
144
  mineru/model/table/rec/unet_table/utils_table_recover.py,sha256=XksJsY82ZS0kqUnNT-jvaYzxJ3V3svMSzj0puwIau1k,10651
151
- mineru/model/vlm_hf_model/__init__.py,sha256=a24kdhTzsD__uL0h65MYehkAVgBZvk-BFZn9Cuv95c8,485
152
- mineru/model/vlm_hf_model/configuration_mineru2.py,sha256=DUr_xaG7jNHTgxMmnQ2aoYag-GgH9-Es5fL3XO8wAlQ,1529
153
- mineru/model/vlm_hf_model/image_processing_mineru2.py,sha256=MQg7723JeA1v1HlnzdkbMDfmwqbXZpCCPz71DFLfocA,10594
154
- mineru/model/vlm_hf_model/modeling_mineru2.py,sha256=hBqvzmVR3w1Bm6OhSRc0IvuC7AG5d0j73EXYsmw_2xA,20365
155
- mineru/model/vlm_sglang_model/__init__.py,sha256=ykNvPdCw-vmSy2UySBGhRkpDfhqEQ4hZ3FGoMU-nhQ8,558
156
- mineru/model/vlm_sglang_model/engine.py,sha256=E8FJBTclxQNDjnEluiz_QlambyPfo60nKm7ogUZFArg,10128
157
- mineru/model/vlm_sglang_model/image_processor.py,sha256=lUmHWSNSPED0RhYWwl6-z4R3KePPz12oBmkXBj-MRO4,8561
158
- mineru/model/vlm_sglang_model/logit_processor.py,sha256=TbNi-QEFXm9lTB94640_dNYZOthpnBcKJnPgwX2eSDs,3679
159
- mineru/model/vlm_sglang_model/model.py,sha256=faA8UkUNbITvKL8afpMRe4ZtukgtUgaM_uljZVk-2M0,22813
160
- mineru/model/vlm_sglang_model/server.py,sha256=UZIy61GvsqCtDfyQpMGTeiuJA2XiifmfbEfLIoWkqfg,2403
161
- mineru/resources/header.html,sha256=Ebc1VPxwLdbcmNHhHFlcb1G_WCwB60i7rX1pec2FsJM,3919
145
+ mineru/model/vlm_vllm_model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
146
+ mineru/model/vlm_vllm_model/server.py,sha256=v07x1esggP7Wbw0r8NeAbqG2kuJN9x5Xl2CmE2x0qzk,2003
147
+ mineru/resources/header.html,sha256=NO8ZZdCYLqu_E72AtNcuRnA2NbFBamScjjGhtg9PKiM,4409
162
148
  mineru/resources/fasttext-langdetect/lid.176.ftz,sha256=jzRyz-hzintgmejpmcPL-uDc0VaWqsfXc4qAOdtgPoM,938013
163
149
  mineru/utils/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
164
150
  mineru/utils/block_pre_proc.py,sha256=uGBmxf2MR9bplTnQI8xHjCI-kj3plRhJr0hcWKidbOQ,9632
@@ -167,9 +153,10 @@ mineru/utils/boxbase.py,sha256=moP660AmZq_udHEsfvFkTQdJ4gjrrBwN7t0Enx7CIL8,6903
167
153
  mineru/utils/cli_parser.py,sha256=4seFAu1kulsYnw6WM2q_cxgEOt2tErZVkI-LNEF_kGw,1445
168
154
  mineru/utils/config_reader.py,sha256=IRVWTpBnbnRpck6eXZUKw-fcLt7hon5S4uqWW-RBb1w,4075
169
155
  mineru/utils/cut_image.py,sha256=g3m4nfcJNWlxi-P0kpXTtlmspXkMcLCfGwmYuQ-Z2hE,751
170
- mineru/utils/draw_bbox.py,sha256=xTNlION8xY-6a6lgC85ePK1io2jddqFzfkIjAZrpnGc,19088
171
- mineru/utils/enum_class.py,sha256=3_OMpUKOXTh0djgx4cjbAQZJYuEHMaMPrB-K0egUXGk,2098
156
+ mineru/utils/draw_bbox.py,sha256=FkgppjUzRhN-uxvChdkhHXcDavJEaApMD6qC6qoRwfQ,20292
157
+ mineru/utils/enum_class.py,sha256=34lVsjeAYLha7Q-1qxY9seJFdK6fjuEphXfYFibghEY,2442
172
158
  mineru/utils/format_utils.py,sha256=2s89vHcSISjuolk8Hvg3K-5-rRbiT3Us7eFLzUKrNKs,10233
159
+ mineru/utils/guess_suffix_or_lang.py,sha256=q7CbPpiaDFA2AmOS66oJ_HwzamEftNSsmOn5mbrtO9I,540
173
160
  mineru/utils/hash_utils.py,sha256=UPS_8NRBmVumdyOv16Lmv6Ly2xK8OVDJEe5gG6gKIFk,857
174
161
  mineru/utils/language.py,sha256=7RT3mxSa7jdpoC5ySd7ZddHA7TO7UsnmDOWiYZAxuyg,1433
175
162
  mineru/utils/llm_aided.py,sha256=0W6AlBpLfflON1ob6p72IgwdCJKFXhYpDWlrhrToR5s,4892
@@ -182,12 +169,12 @@ mineru/utils/pdf_image_tools.py,sha256=mioLEHOdDtM1YbspNaa0wWhnLw_4-H7rdHlIM40vr
182
169
  mineru/utils/pdf_reader.py,sha256=WeINm5SyWBUXT0wP9lzIbeHs8P6WUIkN6nVL5X4LzG4,3267
183
170
  mineru/utils/pdf_text_tool.py,sha256=KEztjfdqsIHHuiTEAMAL7Lr1OS3R7Ur-uTqGiCRjReQ,1364
184
171
  mineru/utils/run_async.py,sha256=rPeP4BCZerR8VByRDhiYzfZiahLVqoZEBVAS54dAjNg,1286
185
- mineru/utils/span_block_fix.py,sha256=2i1PLEAAo5O6dwPgDDTzPI_uVBAF2MgQV7zKCj7q0-0,8678
172
+ mineru/utils/span_block_fix.py,sha256=0eVQjJCrT03woRt9hoh6Uu42Tp1dacfGTv2x3B9qq94,8797
186
173
  mineru/utils/span_pre_proc.py,sha256=h41q2uQajI0xQbc_30hqaju1dv3oVYxBAlKgURl8HIc,13692
187
174
  mineru/utils/table_merge.py,sha256=zYUpYLrfhBCnbHCYZi6rG8-s38NDnTbiNTObvLdYwJk,11494
188
- mineru-2.2.2.dist-info/licenses/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
189
- mineru-2.2.2.dist-info/METADATA,sha256=tknI_mkDKcjDtiTNEWzI6YMxTKe4EjhqzocGcBjoyTc,60541
190
- mineru-2.2.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
191
- mineru-2.2.2.dist-info/entry_points.txt,sha256=1bcWxo6oE06gh07gIX6c-71Kb9CSLJ_i6FAmianbXd8,256
192
- mineru-2.2.2.dist-info/top_level.txt,sha256=zuGQfZcbsHv4I4oKI9gaKPqEWBFm6xJroKuug2LnKP8,7
193
- mineru-2.2.2.dist-info/RECORD,,
175
+ mineru-2.5.1.dist-info/licenses/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
176
+ mineru-2.5.1.dist-info/METADATA,sha256=BnhjCe7hE1EMEZdmz-YQgX0Mq5LANyNWHayKFaJztw0,64460
177
+ mineru-2.5.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
178
+ mineru-2.5.1.dist-info/entry_points.txt,sha256=luXmbhPiZK_tKlRgWuYOaW_V6EFpG-yJcAevVv9MEqE,252
179
+ mineru-2.5.1.dist-info/top_level.txt,sha256=zuGQfZcbsHv4I4oKI9gaKPqEWBFm6xJroKuug2LnKP8,7
180
+ mineru-2.5.1.dist-info/RECORD,,
@@ -3,4 +3,4 @@ mineru = mineru.cli:client.main
3
3
  mineru-api = mineru.cli.fast_api:main
4
4
  mineru-gradio = mineru.cli.gradio_app:main
5
5
  mineru-models-download = mineru.cli.models_download:download_models
6
- mineru-sglang-server = mineru.cli.vlm_sglang_server:main
6
+ mineru-vllm-server = mineru.cli.vlm_vllm_server:main
@@ -1,186 +0,0 @@
1
- import asyncio
2
- from abc import ABC, abstractmethod
3
- from typing import AsyncIterable, Iterable, List, Optional, Union
4
-
5
- DEFAULT_SYSTEM_PROMPT = (
6
- "A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers."
7
- )
8
- DEFAULT_USER_PROMPT = "Document Parsing:"
9
- DEFAULT_TEMPERATURE = 0.0
10
- DEFAULT_TOP_P = 0.8
11
- DEFAULT_TOP_K = 20
12
- DEFAULT_REPETITION_PENALTY = 1.0
13
- DEFAULT_PRESENCE_PENALTY = 0.0
14
- DEFAULT_NO_REPEAT_NGRAM_SIZE = 100
15
- DEFAULT_MAX_NEW_TOKENS = 16384
16
-
17
-
18
- class BasePredictor(ABC):
19
- system_prompt = DEFAULT_SYSTEM_PROMPT
20
-
21
- def __init__(
22
- self,
23
- temperature: float = DEFAULT_TEMPERATURE,
24
- top_p: float = DEFAULT_TOP_P,
25
- top_k: int = DEFAULT_TOP_K,
26
- repetition_penalty: float = DEFAULT_REPETITION_PENALTY,
27
- presence_penalty: float = DEFAULT_PRESENCE_PENALTY,
28
- no_repeat_ngram_size: int = DEFAULT_NO_REPEAT_NGRAM_SIZE,
29
- max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS,
30
- ) -> None:
31
- self.temperature = temperature
32
- self.top_p = top_p
33
- self.top_k = top_k
34
- self.repetition_penalty = repetition_penalty
35
- self.presence_penalty = presence_penalty
36
- self.no_repeat_ngram_size = no_repeat_ngram_size
37
- self.max_new_tokens = max_new_tokens
38
-
39
- @abstractmethod
40
- def predict(
41
- self,
42
- image: str | bytes,
43
- prompt: str = "",
44
- temperature: Optional[float] = None,
45
- top_p: Optional[float] = None,
46
- top_k: Optional[int] = None,
47
- repetition_penalty: Optional[float] = None,
48
- presence_penalty: Optional[float] = None,
49
- no_repeat_ngram_size: Optional[int] = None,
50
- max_new_tokens: Optional[int] = None,
51
- ) -> str: ...
52
-
53
- @abstractmethod
54
- def batch_predict(
55
- self,
56
- images: List[str] | List[bytes],
57
- prompts: Union[List[str], str] = "",
58
- temperature: Optional[float] = None,
59
- top_p: Optional[float] = None,
60
- top_k: Optional[int] = None,
61
- repetition_penalty: Optional[float] = None,
62
- presence_penalty: Optional[float] = None,
63
- no_repeat_ngram_size: Optional[int] = None,
64
- max_new_tokens: Optional[int] = None,
65
- ) -> List[str]: ...
66
-
67
- @abstractmethod
68
- def stream_predict(
69
- self,
70
- image: str | bytes,
71
- prompt: str = "",
72
- temperature: Optional[float] = None,
73
- top_p: Optional[float] = None,
74
- top_k: Optional[int] = None,
75
- repetition_penalty: Optional[float] = None,
76
- presence_penalty: Optional[float] = None,
77
- no_repeat_ngram_size: Optional[int] = None,
78
- max_new_tokens: Optional[int] = None,
79
- ) -> Iterable[str]: ...
80
-
81
- async def aio_predict(
82
- self,
83
- image: str | bytes,
84
- prompt: str = "",
85
- temperature: Optional[float] = None,
86
- top_p: Optional[float] = None,
87
- top_k: Optional[int] = None,
88
- repetition_penalty: Optional[float] = None,
89
- presence_penalty: Optional[float] = None,
90
- no_repeat_ngram_size: Optional[int] = None,
91
- max_new_tokens: Optional[int] = None,
92
- ) -> str:
93
- return await asyncio.to_thread(
94
- self.predict,
95
- image,
96
- prompt,
97
- temperature,
98
- top_p,
99
- top_k,
100
- repetition_penalty,
101
- presence_penalty,
102
- no_repeat_ngram_size,
103
- max_new_tokens,
104
- )
105
-
106
- async def aio_batch_predict(
107
- self,
108
- images: List[str] | List[bytes],
109
- prompts: Union[List[str], str] = "",
110
- temperature: Optional[float] = None,
111
- top_p: Optional[float] = None,
112
- top_k: Optional[int] = None,
113
- repetition_penalty: Optional[float] = None,
114
- presence_penalty: Optional[float] = None,
115
- no_repeat_ngram_size: Optional[int] = None,
116
- max_new_tokens: Optional[int] = None,
117
- ) -> List[str]:
118
- return await asyncio.to_thread(
119
- self.batch_predict,
120
- images,
121
- prompts,
122
- temperature,
123
- top_p,
124
- top_k,
125
- repetition_penalty,
126
- presence_penalty,
127
- no_repeat_ngram_size,
128
- max_new_tokens,
129
- )
130
-
131
- async def aio_stream_predict(
132
- self,
133
- image: str | bytes,
134
- prompt: str = "",
135
- temperature: Optional[float] = None,
136
- top_p: Optional[float] = None,
137
- top_k: Optional[int] = None,
138
- repetition_penalty: Optional[float] = None,
139
- presence_penalty: Optional[float] = None,
140
- no_repeat_ngram_size: Optional[int] = None,
141
- max_new_tokens: Optional[int] = None,
142
- ) -> AsyncIterable[str]:
143
- queue = asyncio.Queue()
144
- loop = asyncio.get_running_loop()
145
-
146
- def synced_predict():
147
- for chunk in self.stream_predict(
148
- image=image,
149
- prompt=prompt,
150
- temperature=temperature,
151
- top_p=top_p,
152
- top_k=top_k,
153
- repetition_penalty=repetition_penalty,
154
- presence_penalty=presence_penalty,
155
- no_repeat_ngram_size=no_repeat_ngram_size,
156
- max_new_tokens=max_new_tokens,
157
- ):
158
- asyncio.run_coroutine_threadsafe(queue.put(chunk), loop)
159
- asyncio.run_coroutine_threadsafe(queue.put(None), loop)
160
-
161
- asyncio.create_task(
162
- asyncio.to_thread(synced_predict),
163
- )
164
-
165
- while True:
166
- chunk = await queue.get()
167
- if chunk is None:
168
- return
169
- assert isinstance(chunk, str)
170
- yield chunk
171
-
172
- def build_prompt(self, prompt: str) -> str:
173
- if prompt.startswith("<|im_start|>"):
174
- return prompt
175
- if not prompt:
176
- prompt = DEFAULT_USER_PROMPT
177
-
178
- return f"<|im_start|>system\n{self.system_prompt}<|im_end|><|im_start|>user\n<image>\n{prompt}<|im_end|><|im_start|>assistant\n"
179
- # Modify here. We add <|box_start|> at the end of the prompt to force the model to generate bounding box.
180
- # if "Document OCR" in prompt:
181
- # return f"<|im_start|>system\n{self.system_prompt}<|im_end|><|im_start|>user\n<image>\n{prompt}<|im_end|><|im_start|>assistant\n<|box_start|>"
182
- # else:
183
- # return f"<|im_start|>system\n{self.system_prompt}<|im_end|><|im_start|>user\n<image>\n{prompt}<|im_end|><|im_start|>assistant\n"
184
-
185
- def close(self):
186
- pass