mineru 2.6.0__py3-none-any.whl → 2.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. mineru/backend/vlm/utils.py +2 -2
  2. mineru/backend/vlm/vlm_analyze.py +7 -5
  3. mineru/model/utils/pytorchocr/utils/resources/arch_config.yaml +1 -317
  4. mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_latin_dict.txt +545 -211
  5. mineru/model/utils/pytorchocr/utils/resources/models_config.yml +0 -12
  6. mineru/model/vlm_vllm_model/server.py +5 -3
  7. mineru/version.py +1 -1
  8. {mineru-2.6.0.dist-info → mineru-2.6.2.dist-info}/METADATA +3 -3
  9. {mineru-2.6.0.dist-info → mineru-2.6.2.dist-info}/RECORD +13 -24
  10. mineru/model/utils/pytorchocr/utils/resources/dict/arabic_dict.txt +0 -162
  11. mineru/model/utils/pytorchocr/utils/resources/dict/chinese_cht_dict.txt +0 -8421
  12. mineru/model/utils/pytorchocr/utils/resources/dict/cyrillic_dict.txt +0 -163
  13. mineru/model/utils/pytorchocr/utils/resources/dict/devanagari_dict.txt +0 -167
  14. mineru/model/utils/pytorchocr/utils/resources/dict/en_dict.txt +0 -95
  15. mineru/model/utils/pytorchocr/utils/resources/dict/japan_dict.txt +0 -4399
  16. mineru/model/utils/pytorchocr/utils/resources/dict/korean_dict.txt +0 -3688
  17. mineru/model/utils/pytorchocr/utils/resources/dict/latin_dict.txt +0 -185
  18. mineru/model/utils/pytorchocr/utils/resources/dict/ppocr_keys_v1.txt +0 -6623
  19. mineru/model/utils/pytorchocr/utils/resources/dict/ta_dict.txt +0 -128
  20. mineru/model/utils/pytorchocr/utils/resources/dict/te_dict.txt +0 -151
  21. {mineru-2.6.0.dist-info → mineru-2.6.2.dist-info}/WHEEL +0 -0
  22. {mineru-2.6.0.dist-info → mineru-2.6.2.dist-info}/entry_points.txt +0 -0
  23. {mineru-2.6.0.dist-info → mineru-2.6.2.dist-info}/licenses/LICENSE.md +0 -0
  24. {mineru-2.6.0.dist-info → mineru-2.6.2.dist-info}/top_level.txt +0 -0
@@ -3,26 +3,14 @@ lang:
3
3
  det: ch_PP-OCRv5_det_infer.pth
4
4
  rec: ch_PP-OCRv5_rec_infer.pth
5
5
  dict: ppocrv5_dict.txt
6
- ch_lite_v4:
7
- det: ch_PP-OCRv5_det_infer.pth
8
- rec: ch_PP-OCRv4_rec_infer.pth
9
- dict: ppocr_keys_v1.txt
10
6
  ch_server:
11
7
  det: ch_PP-OCRv5_det_infer.pth
12
8
  rec: ch_PP-OCRv5_rec_server_infer.pth
13
9
  dict: ppocrv5_dict.txt
14
- ch_server_v4:
15
- det: ch_PP-OCRv5_det_infer.pth
16
- rec: ch_PP-OCRv4_rec_server_infer.pth
17
- dict: ppocr_keys_v1.txt
18
10
  ch:
19
11
  det: ch_PP-OCRv5_det_infer.pth
20
12
  rec: ch_PP-OCRv4_rec_server_doc_infer.pth
21
13
  dict: ppocrv4_doc_dict.txt
22
- en_v4:
23
- det: en_PP-OCRv3_det_infer.pth
24
- rec: en_PP-OCRv4_rec_infer.pth
25
- dict: en_dict.txt
26
14
  korean:
27
15
  det: ch_PP-OCRv5_det_infer.pth
28
16
  rec: korean_PP-OCRv5_rec_infer.pth
@@ -1,7 +1,7 @@
1
1
  import os
2
2
  import sys
3
3
 
4
- from mineru.backend.vlm.custom_logits_processors import enable_custom_logits_processors
4
+ from mineru.backend.vlm.utils import set_default_gpu_memory_utilization, enable_custom_logits_processors
5
5
  from mineru.utils.models_download_utils import auto_download_and_get_model_root_path
6
6
 
7
7
  from vllm.entrypoints.cli.main import main as vllm_main
@@ -43,7 +43,8 @@ def main():
43
43
  if not has_port_arg:
44
44
  args.extend(["--port", "30000"])
45
45
  if not has_gpu_memory_utilization_arg:
46
- args.extend(["--gpu-memory-utilization", "0.7"])
46
+ gpu_memory_utilization = str(set_default_gpu_memory_utilization())
47
+ args.extend(["--gpu-memory-utilization", gpu_memory_utilization])
47
48
  if not model_path:
48
49
  model_path = auto_download_and_get_model_root_path("/", "vlm")
49
50
  if (not has_logits_processors_arg) and custom_logits_processors:
@@ -52,7 +53,8 @@ def main():
52
53
  # 重构参数,将模型路径作为位置参数
53
54
  sys.argv = [sys.argv[0]] + ["serve", model_path] + args
54
55
 
55
- os.environ["OMP_NUM_THREADS"] = "1"
56
+ if os.getenv('OMP_NUM_THREADS') is None:
57
+ os.environ["OMP_NUM_THREADS"] = "1"
56
58
 
57
59
  # 启动vllm服务器
58
60
  print(f"start vllm server: {sys.argv}")
mineru/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "2.6.0"
1
+ __version__ = "2.6.2"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mineru
3
- Version: 2.6.0
3
+ Version: 2.6.2
4
4
  Summary: A practical tool for converting PDF to Markdown
5
5
  License: AGPL-3.0
6
6
  Project-URL: homepage, https://mineru.net/
@@ -127,11 +127,11 @@ Dynamic: license-file
127
127
  </div>
128
128
 
129
129
  # Changelog
130
- - 2025/10/24 2.6.0 Release
130
+ - 2025/10/24 2.6.2 Release
131
131
  - `pipeline` backend optimizations
132
132
  - Added experimental support for Chinese formulas, which can be enabled by setting the environment variable `export MINERU_FORMULA_CH_SUPPORT=1`. This feature may cause a slight decrease in MFR speed and failures in recognizing some long formulas. It is recommended to enable it only when parsing Chinese formulas is needed. To disable this feature, set the environment variable to `0`.
133
133
  - `OCR` speed significantly improved by 200%~300%, thanks to the optimization solution provided by [@cjsdurj](https://github.com/cjsdurj)
134
- - `OCR` models updated to `ppocr-v5` version for Cyrillic, Arabic, Devanagari, Telugu (te), and Tamil (ta) languages, with accuracy improved by over 40% compared to previous models
134
+ - `OCR` models optimized for improved accuracy and coverage of Latin script recognition, and updated Cyrillic, Arabic, Devanagari, Telugu (te), and Tamil (ta) language systems to `ppocr-v5` version, with accuracy improved by over 40% compared to previous models
135
135
  - `vlm` backend optimizations
136
136
  - `table_caption` and `table_footnote` matching logic optimized to improve the accuracy of table caption and footnote matching and reading order rationality in scenarios with multiple consecutive tables on a page
137
137
  - Optimized CPU resource usage during high concurrency when using `vllm` backend, reducing server pressure
@@ -1,5 +1,5 @@
1
1
  mineru/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
2
- mineru/version.py,sha256=OEib63e0yPEGlhEXyrWE1OwRnleR0cHI7KSX7oZEQLs,22
2
+ mineru/version.py,sha256=53Sii4w6BIWn-1RhaTyqUO46gDe4nDCRQDAcpsWFH24,22
3
3
  mineru/backend/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
4
4
  mineru/backend/utils.py,sha256=GLJU3IznDmhE1_qNmkU1UOtsuskIHBezgsEVO6Uar-Y,698
5
5
  mineru/backend/pipeline/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
@@ -13,8 +13,8 @@ mineru/backend/pipeline/pipeline_magic_model.py,sha256=w8jGx8f6yZN0Wf2yPP3L9rYKc
13
13
  mineru/backend/pipeline/pipeline_middle_json_mkcontent.py,sha256=xWWOFmYL6hB8PLrxQFyRJ72dAmTIDHtqiWV-WFUfR44,14081
14
14
  mineru/backend/vlm/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
15
15
  mineru/backend/vlm/model_output_to_middle_json.py,sha256=AqYX44gS9crUO_t7SuUatD71EVjow6pI6yA2Ik3gQ0s,5139
16
- mineru/backend/vlm/utils.py,sha256=6NmVmr6-7idurCmT-1gE2SdmGaorSGgIaHmAg0fMABI,2792
17
- mineru/backend/vlm/vlm_analyze.py,sha256=aepYsICM2LXhm4pkAa0Abyki1d8M-OdbgeL4KWt91BQ,8083
16
+ mineru/backend/vlm/utils.py,sha256=woGqyRI4S7p69daLCU07XNXWTV27aLf7YBjjVH1x-5o,2794
17
+ mineru/backend/vlm/vlm_analyze.py,sha256=nzwTGndwZFfTEvHppakyDKZxph7SYOuUZW3johY5F8c,8154
18
18
  mineru/backend/vlm/vlm_magic_model.py,sha256=Pd0sOr7G1crAJIVeq6h_03gNSuxmV5U8dvGTGT_rrjs,23452
19
19
  mineru/backend/vlm/vlm_middle_json_mkcontent.py,sha256=Ie95XpwTgi7EmidcwE_scvXMRQjE2xASU_Rm_F8EP-I,13377
20
20
  mineru/cli/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
@@ -122,19 +122,10 @@ mineru/model/utils/pytorchocr/postprocess/cls_postprocess.py,sha256=1VVWXT_b1vhG
122
122
  mineru/model/utils/pytorchocr/postprocess/db_postprocess.py,sha256=AdZPF7frhQ27VVdp0GFmMcXtivwDZZfXYhzJOlP4zUs,6483
123
123
  mineru/model/utils/pytorchocr/postprocess/rec_postprocess.py,sha256=qGB3onFEFhHjqksIR1IKOx2EY98ewfsmjADjrRXg30Y,30552
124
124
  mineru/model/utils/pytorchocr/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
125
- mineru/model/utils/pytorchocr/utils/resources/arch_config.yaml,sha256=yl4qTf-q0Du0MEOuYDffOt776_6qXBU5b2K3N-IOjd8,14964
126
- mineru/model/utils/pytorchocr/utils/resources/models_config.yml,sha256=70B392J5XloC7mnK1eVi8GsWKSu7UE7qGffkEmBI9Us,2278
125
+ mineru/model/utils/pytorchocr/utils/resources/arch_config.yaml,sha256=BvaXsHXXVyxYOFYjkt7HtyZWiEyLrvHsHsJYfQZShDY,9095
126
+ mineru/model/utils/pytorchocr/utils/resources/models_config.yml,sha256=QIuqeu91pfuqya4JbxmZQSkOYke-h2Bz21O2pOLQPYc,1944
127
127
  mineru/model/utils/pytorchocr/utils/resources/pp_formulanet_arch_config.yaml,sha256=a7yueOTUrfpZo8CsK6vQokbLNB2J-P77ihaCh_LozvQ,507
128
- mineru/model/utils/pytorchocr/utils/resources/dict/arabic_dict.txt,sha256=xbaXD14RWk0Vpc7fAHpephuszp1j-Qi3IWC4VrFKu70,407
129
- mineru/model/utils/pytorchocr/utils/resources/dict/chinese_cht_dict.txt,sha256=gyVR_uHy-8l1CHctgevcjboSwA3pejXHHJ3fQ92sGoM,33443
130
- mineru/model/utils/pytorchocr/utils/resources/dict/cyrillic_dict.txt,sha256=NpqCxsjEeXhKXXJkSLg7Hq-1_vCkEppeqjkpYl3c0TI,410
131
- mineru/model/utils/pytorchocr/utils/resources/dict/devanagari_dict.txt,sha256=tfG-bYu_8aGfuWxdTKlqQjOAI0u30s4OB7WDittNGOo,508
132
- mineru/model/utils/pytorchocr/utils/resources/dict/en_dict.txt,sha256=VmLfnS0D8OjKDTsGSdasurkEtqFLPTUhRjxxw3xmjOM,190
133
- mineru/model/utils/pytorchocr/utils/resources/dict/japan_dict.txt,sha256=Hc_LQe7JBXapRbMITyKt4RztUG4k8Uh5JFsHFpjzCOg,17332
134
128
  mineru/model/utils/pytorchocr/utils/resources/dict/ka_dict.txt,sha256=-tP3ZZQyde7CE0pvvJtSeFQmZBEE1OfbOhWdxz80Hd4,452
135
- mineru/model/utils/pytorchocr/utils/resources/dict/korean_dict.txt,sha256=qh_ciuj3zUCg7E7bRy6wQh4RQn5sz-6ZFUQHQsGLCiA,14480
136
- mineru/model/utils/pytorchocr/utils/resources/dict/latin_dict.txt,sha256=jm1ONil4jDXDH35TAofWFHtUm7eiZb1nCLsoETRCniw,468
137
- mineru/model/utils/pytorchocr/utils/resources/dict/ppocr_keys_v1.txt,sha256=KLI2KtSrLcOHaapy_rU146nds_0qdYWgWSDmOTsdx_c,26249
138
129
  mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv4_doc_dict.txt,sha256=pbw4h8Q8kB5aP5exP_rfHFdU7efMjJ9aviLodafEg3I,62346
139
130
  mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_arabic_dict.txt,sha256=f5L327m3WkeHqDv7T20UqKtRVSUTDJ1AqQNvYc9pmek,2369
140
131
  mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_cyrillic_dict.txt,sha256=20CqUs6xEgVb6AxpSv32VdXSxPeHNwRSTMFqRHypE7o,2781
@@ -144,12 +135,10 @@ mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_el_dict.txt,sha256=Md
144
135
  mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_en_dict.txt,sha256=4CWmbTHzJ7oMIy4D9AeujRBeHnCefMs_QIqneMJOcNY,1416
145
136
  mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_eslav_dict.txt,sha256=PpXxWBVXFihwys26WvkaTGviiQcQ05Www8dXjn7l5us,1663
146
137
  mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_korean_dict.txt,sha256=qIBxxowBcHSJuqeevgQFt761zKIp9PyUzD75kjKIAtc,47451
147
- mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_latin_dict.txt,sha256=PAqKebYSZTwl92UnFxT3EoHk6VWWLBU-Jyt7jB0rE_8,1634
138
+ mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_latin_dict.txt,sha256=zLzEVzCz-72QUMW8dNtqmQZxQe8QNePRSImoSmubGv8,2616
148
139
  mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_ta_dict.txt,sha256=hbVBNSrhjca6bUcVLYv4rf9rAmbmBdLu8pkMG_RmEXs,1723
149
140
  mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_te_dict.txt,sha256=Qvg_XT_bUHeOT6W2bFjZmlmrd5IVHF5080uP_XthydY,1831
150
141
  mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_th_dict.txt,sha256=V_VAb5S7Zoj7cHf3vmXwi71xzs9IwB6ibFIstcSDa3o,1767
151
- mineru/model/utils/pytorchocr/utils/resources/dict/ta_dict.txt,sha256=6T5pSBSv2f8ekYtvS7Qmf7TGWpNE7l10ZPkTW5DAonA,352
152
- mineru/model/utils/pytorchocr/utils/resources/dict/te_dict.txt,sha256=7plGpg13AZd0dOiYg2lKTKIOqjhoojM0v3lA3NAI8Pk,429
153
142
  mineru/model/utils/tools/__init__.py,sha256=xEqR65Z8YOzOLorLjK0LCHos2zX-tCuxSrxndjU00hE,49
154
143
  mineru/model/utils/tools/infer/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
155
144
  mineru/model/utils/tools/infer/predict_cls.py,sha256=8RmKl1vejnZl65caHZNV2ta6hMsg5B_LE-FuqCO8T8A,4225
@@ -158,7 +147,7 @@ mineru/model/utils/tools/infer/predict_rec.py,sha256=-BH93JDisu0kT6CyHA4plUOKcb2
158
147
  mineru/model/utils/tools/infer/predict_system.py,sha256=hkegkn6hq2v2zqHVAP615-k-fkTS8swRYSbZeoqmSI8,3822
159
148
  mineru/model/utils/tools/infer/pytorchocr_utility.py,sha256=i1PFN-_kefJUUZ4Vk7igs1TU8gfErTDlDXY6-8Uaurw,9323
160
149
  mineru/model/vlm_vllm_model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
161
- mineru/model/vlm_vllm_model/server.py,sha256=nv51j9yAa-u4iFGy4Idh4-viM4sqLHvzs3Lk5w-Cfxg,2105
150
+ mineru/model/vlm_vllm_model/server.py,sha256=w5ddusPbcVaEoWAo_BRjmwv_Ywxrc_bCMRhxihoyykY,2263
162
151
  mineru/resources/header.html,sha256=PUselBXLBn8gfeP3zwEtj6zIxfhcCN4vN_B796nQFNQ,4410
163
152
  mineru/resources/fasttext-langdetect/lid.176.ftz,sha256=jzRyz-hzintgmejpmcPL-uDc0VaWqsfXc4qAOdtgPoM,938013
164
153
  mineru/utils/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
@@ -187,9 +176,9 @@ mineru/utils/run_async.py,sha256=rPeP4BCZerR8VByRDhiYzfZiahLVqoZEBVAS54dAjNg,128
187
176
  mineru/utils/span_block_fix.py,sha256=0eVQjJCrT03woRt9hoh6Uu42Tp1dacfGTv2x3B9qq94,8797
188
177
  mineru/utils/span_pre_proc.py,sha256=h41q2uQajI0xQbc_30hqaju1dv3oVYxBAlKgURl8HIc,13692
189
178
  mineru/utils/table_merge.py,sha256=d98zNbM1ZQ8V1kUt6RugParNUNPv7DGL-XKIzR3iJVQ,15360
190
- mineru-2.6.0.dist-info/licenses/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
191
- mineru-2.6.0.dist-info/METADATA,sha256=dbt-b5mAS6fgkv06-dMemfgqARV02Ji_eCDqZ6SlRD4,68358
192
- mineru-2.6.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
193
- mineru-2.6.0.dist-info/entry_points.txt,sha256=luXmbhPiZK_tKlRgWuYOaW_V6EFpG-yJcAevVv9MEqE,252
194
- mineru-2.6.0.dist-info/top_level.txt,sha256=zuGQfZcbsHv4I4oKI9gaKPqEWBFm6xJroKuug2LnKP8,7
195
- mineru-2.6.0.dist-info/RECORD,,
179
+ mineru-2.6.2.dist-info/licenses/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
180
+ mineru-2.6.2.dist-info/METADATA,sha256=QGCp0YLuKymDMYmMZuOn8IYM-kpbKas5nKF7yl3la_0,68440
181
+ mineru-2.6.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
182
+ mineru-2.6.2.dist-info/entry_points.txt,sha256=luXmbhPiZK_tKlRgWuYOaW_V6EFpG-yJcAevVv9MEqE,252
183
+ mineru-2.6.2.dist-info/top_level.txt,sha256=zuGQfZcbsHv4I4oKI9gaKPqEWBFm6xJroKuug2LnKP8,7
184
+ mineru-2.6.2.dist-info/RECORD,,
@@ -1,162 +0,0 @@
1
-
2
- !
3
- #
4
- $
5
- %
6
- &
7
- '
8
- (
9
- +
10
- ,
11
- -
12
- .
13
- /
14
- 0
15
- 1
16
- 2
17
- 3
18
- 4
19
- 5
20
- 6
21
- 7
22
- 8
23
- 9
24
- :
25
- ?
26
- @
27
- A
28
- B
29
- C
30
- D
31
- E
32
- F
33
- G
34
- H
35
- I
36
- J
37
- K
38
- L
39
- M
40
- N
41
- O
42
- P
43
- Q
44
- R
45
- S
46
- T
47
- U
48
- V
49
- W
50
- X
51
- Y
52
- Z
53
- _
54
- a
55
- b
56
- c
57
- d
58
- e
59
- f
60
- g
61
- h
62
- i
63
- j
64
- k
65
- l
66
- m
67
- n
68
- o
69
- p
70
- q
71
- r
72
- s
73
- t
74
- u
75
- v
76
- w
77
- x
78
- y
79
- z
80
- É
81
- é
82
- ء
83
- آ
84
- أ
85
- ؤ
86
- إ
87
- ئ
88
- ا
89
- ب
90
- ة
91
- ت
92
- ث
93
- ج
94
- ح
95
- خ
96
- د
97
- ذ
98
- ر
99
- ز
100
- س
101
- ش
102
- ص
103
- ض
104
- ط
105
- ظ
106
- ع
107
- غ
108
- ف
109
- ق
110
- ك
111
- ل
112
- م
113
- ن
114
- ه
115
- و
116
- ى
117
- ي
118
- ً
119
- ٌ
120
- ٍ
121
- َ
122
- ُ
123
- ِ
124
- ّ
125
- ْ
126
- ٓ
127
- ٔ
128
- ٰ
129
- ٱ
130
- ٹ
131
- پ
132
- چ
133
- ڈ
134
- ڑ
135
- ژ
136
- ک
137
- ڭ
138
- گ
139
- ں
140
- ھ
141
- ۀ
142
- ہ
143
- ۂ
144
- ۃ
145
- ۆ
146
- ۇ
147
- ۈ
148
- ۋ
149
- ی
150
- ې
151
- ے
152
- ۓ
153
- ە
154
- ١
155
- ٢
156
- ٣
157
- ٤
158
- ٥
159
- ٦
160
- ٧
161
- ٨
162
- ٩