mineru 2.6.7__py3-none-any.whl → 2.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. mineru/backend/hybrid/__init__.py +1 -0
  2. mineru/backend/hybrid/hybrid_analyze.py +526 -0
  3. mineru/backend/hybrid/hybrid_magic_model.py +617 -0
  4. mineru/backend/hybrid/hybrid_model_output_to_middle_json.py +212 -0
  5. mineru/backend/pipeline/batch_analyze.py +9 -1
  6. mineru/backend/pipeline/model_init.py +96 -1
  7. mineru/backend/pipeline/pipeline_analyze.py +6 -4
  8. mineru/backend/pipeline/pipeline_middle_json_mkcontent.py +32 -41
  9. mineru/backend/vlm/utils.py +3 -1
  10. mineru/backend/vlm/vlm_analyze.py +12 -12
  11. mineru/backend/vlm/vlm_magic_model.py +24 -89
  12. mineru/backend/vlm/vlm_middle_json_mkcontent.py +118 -19
  13. mineru/cli/client.py +17 -17
  14. mineru/cli/common.py +170 -20
  15. mineru/cli/fast_api.py +39 -13
  16. mineru/cli/gradio_app.py +232 -206
  17. mineru/model/mfd/yolo_v8.py +12 -6
  18. mineru/model/mfr/unimernet/Unimernet.py +71 -3
  19. mineru/resources/header.html +5 -1
  20. mineru/utils/boxbase.py +23 -0
  21. mineru/utils/char_utils.py +55 -0
  22. mineru/utils/engine_utils.py +74 -0
  23. mineru/utils/enum_class.py +18 -1
  24. mineru/utils/magic_model_utils.py +85 -2
  25. mineru/utils/span_pre_proc.py +5 -3
  26. mineru/utils/table_merge.py +5 -21
  27. mineru/version.py +1 -1
  28. mineru-2.7.0.dist-info/METADATA +433 -0
  29. {mineru-2.6.7.dist-info → mineru-2.7.0.dist-info}/RECORD +33 -27
  30. mineru-2.6.7.dist-info/METADATA +0 -954
  31. {mineru-2.6.7.dist-info → mineru-2.7.0.dist-info}/WHEEL +0 -0
  32. {mineru-2.6.7.dist-info → mineru-2.7.0.dist-info}/entry_points.txt +0 -0
  33. {mineru-2.6.7.dist-info → mineru-2.7.0.dist-info}/licenses/LICENSE.md +0 -0
  34. {mineru-2.6.7.dist-info → mineru-2.7.0.dist-info}/top_level.txt +0 -0
mineru/cli/gradio_app.py CHANGED
@@ -3,6 +3,7 @@
3
3
  import base64
4
4
  import os
5
5
  import re
6
+ import sys
6
7
  import time
7
8
  import zipfile
8
9
  from pathlib import Path
@@ -12,9 +13,13 @@ import gradio as gr
12
13
  from gradio_pdf import PDF
13
14
  from loguru import logger
14
15
 
16
+ log_level = os.getenv("MINERU_LOG_LEVEL", "INFO").upper()
17
+ logger.remove() # 移除默认handler
18
+ logger.add(sys.stderr, level=log_level) # 添加新handler
19
+
15
20
  from mineru.cli.common import prepare_env, read_fn, aio_do_parse, pdf_suffixes, image_suffixes
16
- from mineru.utils.check_sys_env import is_mac_os_version_supported
17
21
  from mineru.utils.cli_parser import arg_parse
22
+ from mineru.utils.engine_utils import get_vlm_engine
18
23
  from mineru.utils.hash_utils import str_sha256
19
24
 
20
25
 
@@ -24,15 +29,20 @@ async def parse_pdf(doc_path, output_dir, end_page_id, is_ocr, formula_enable, t
24
29
  try:
25
30
  file_name = f'{safe_stem(Path(doc_path).stem)}_{time.strftime("%y%m%d_%H%M%S")}'
26
31
  pdf_data = read_fn(doc_path)
27
- if is_ocr:
28
- parse_method = 'ocr'
29
- else:
30
- parse_method = 'auto'
31
-
32
+ # 根据 backend 确定 parse_method
32
33
  if backend.startswith("vlm"):
33
34
  parse_method = "vlm"
35
+ else:
36
+ parse_method = 'ocr' if is_ocr else 'auto'
37
+
38
+ # 根据 backend 类型准备环境目录
39
+ if backend.startswith("hybrid"):
40
+ env_name = f"hybrid_{parse_method}"
41
+ else:
42
+ env_name = parse_method
43
+
44
+ local_image_dir, local_md_dir = prepare_env(output_dir, file_name, env_name)
34
45
 
35
- local_image_dir, local_md_dir = prepare_env(output_dir, file_name, parse_method)
36
46
  await aio_do_parse(
37
47
  output_dir=output_dir,
38
48
  pdf_file_names=[file_name],
@@ -100,6 +110,9 @@ def replace_image_with_base64(markdown_text, image_dir_path):
100
110
 
101
111
 
102
112
  async def to_markdown(file_path, end_pages=10, is_ocr=False, formula_enable=True, table_enable=True, language="ch", backend="pipeline", url=None):
113
+ # 如果language包含(),则提取括号前的内容作为实际语言
114
+ if '(' in language and ')' in language:
115
+ language = language.split('(')[0].strip()
103
116
  file_path = to_pdf(file_path)
104
117
  # 获取识别的md文件以及压缩包文件路径
105
118
  local_md_dir, file_name = await parse_pdf(file_path, './output', end_pages - 1, is_ocr, formula_enable, table_enable, language, backend, url)
@@ -130,120 +143,31 @@ latex_delimiters_type_b = [
130
143
  latex_delimiters_type_all = latex_delimiters_type_a + latex_delimiters_type_b
131
144
 
132
145
  header_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'resources', 'header.html')
133
- with open(header_path, 'r') as header_file:
146
+ with open(header_path, mode='r', encoding='utf-8') as header_file:
134
147
  header = header_file.read()
135
148
 
136
-
137
- latin_lang = [
138
- "af",
139
- "az",
140
- "bs",
141
- "cs",
142
- "cy",
143
- "da",
144
- "de",
145
- "es",
146
- "et",
147
- "fr",
148
- "ga",
149
- "hr",
150
- "hu",
151
- "id",
152
- "is",
153
- "it",
154
- "ku",
155
- "la",
156
- "lt",
157
- "lv",
158
- "mi",
159
- "ms",
160
- "mt",
161
- "nl",
162
- "no",
163
- "oc",
164
- "pi",
165
- "pl",
166
- "pt",
167
- "ro",
168
- "rs_latin",
169
- "sk",
170
- "sl",
171
- "sq",
172
- "sv",
173
- "sw",
174
- "tl",
175
- "tr",
176
- "uz",
177
- "vi",
178
- "french",
179
- "german",
180
- "fi",
181
- "eu",
182
- "gl",
183
- "lb",
184
- "rm",
185
- "ca",
186
- "qu",
149
+ other_lang = [
150
+ 'ch (Chinese, English, Chinese Traditional)',
151
+ 'ch_lite (Chinese, English, Chinese Traditional, Japanese)',
152
+ 'ch_server (Chinese, English, Chinese Traditional, Japanese)',
153
+ 'en (English)',
154
+ 'korean (Korean, English)',
155
+ 'japan (Chinese, English, Chinese Traditional, Japanese)',
156
+ 'chinese_cht (Chinese, English, Chinese Traditional, Japanese)',
157
+ 'ta (Tamil, English)',
158
+ 'te (Telugu, English)',
159
+ 'ka (Kannada)',
160
+ 'el (Greek, English)',
161
+ 'th (Thai, English)'
187
162
  ]
188
- arabic_lang = ["ar", "fa", "ug", "ur", "ps", "ku", "sd", "bal"]
189
- cyrillic_lang = [
190
- "ru",
191
- "rs_cyrillic",
192
- "be",
193
- "bg",
194
- "uk",
195
- "mn",
196
- "abq",
197
- "ady",
198
- "kbd",
199
- "ava",
200
- "dar",
201
- "inh",
202
- "che",
203
- "lbe",
204
- "lez",
205
- "tab",
206
- "kk",
207
- "ky",
208
- "tg",
209
- "mk",
210
- "tt",
211
- "cv",
212
- "ba",
213
- "mhr",
214
- "mo",
215
- "udm",
216
- "kv",
217
- "os",
218
- "bua",
219
- "xal",
220
- "tyv",
221
- "sah",
222
- "kaa",
163
+ add_lang = [
164
+ 'latin (French, German, Afrikaans, Italian, Spanish, Bosnian, Portuguese, Czech, Welsh, Danish, Estonian, Irish, Croatian, Uzbek, Hungarian, Serbian (Latin), Indonesian, Occitan, Icelandic, Lithuanian, Maori, Malay, Dutch, Norwegian, Polish, Slovak, Slovenian, Albanian, Swedish, Swahili, Tagalog, Turkish, Latin, Azerbaijani, Kurdish, Latvian, Maltese, Pali, Romanian, Vietnamese, Finnish, Basque, Galician, Luxembourgish, Romansh, Catalan, Quechua)',
165
+ 'arabic (Arabic, Persian, Uyghur, Urdu, Pashto, Kurdish, Sindhi, Balochi, English)',
166
+ 'east_slavic (Russian, Belarusian, Ukrainian, English)',
167
+ 'cyrillic (Russian, Belarusian, Ukrainian, Serbian (Cyrillic), Bulgarian, Mongolian, Abkhazian, Adyghe, Kabardian, Avar, Dargin, Ingush, Chechen, Lak, Lezgin, Tabasaran, Kazakh, Kyrgyz, Tajik, Macedonian, Tatar, Chuvash, Bashkir, Malian, Moldovan, Udmurt, Komi, Ossetian, Buryat, Kalmyk, Tuvan, Sakha, Karakalpak, English)',
168
+ 'devanagari (Hindi, Marathi, Nepali, Bihari, Maithili, Angika, Bhojpuri, Magahi, Santali, Newari, Konkani, Sanskrit, Haryanvi, English)'
223
169
  ]
224
- east_slavic_lang = ["ru", "be", "uk"]
225
- devanagari_lang = [
226
- "hi",
227
- "mr",
228
- "ne",
229
- "bh",
230
- "mai",
231
- "ang",
232
- "bho",
233
- "mah",
234
- "sck",
235
- "new",
236
- "gom",
237
- "sa",
238
- "bgc",
239
- ]
240
- other_lang = ['ch', 'ch_lite', 'ch_server', 'en', 'korean', 'japan', 'chinese_cht', 'ta', 'te', 'ka', "el", "th"]
241
- add_lang = ['latin', 'arabic', 'east_slavic', 'cyrillic', 'devanagari']
242
-
243
- # all_lang = ['', 'auto']
244
- all_lang = []
245
- # all_lang.extend([*other_lang, *latin_lang, *arabic_lang, *cyrillic_lang, *devanagari_lang])
246
- all_lang.extend([*other_lang, *add_lang])
170
+ all_lang = [*other_lang, *add_lang]
247
171
 
248
172
 
249
173
  def safe_stem(file_path):
@@ -272,18 +196,6 @@ def to_pdf(file_path):
272
196
  return tmp_file_path
273
197
 
274
198
 
275
- # 更新界面函数
276
- def update_interface(backend_choice):
277
- if backend_choice in ["vlm-transformers", "vlm-vllm-async-engine", "vlm-lmdeploy-engine", "vlm-mlx-engine"]:
278
- return gr.update(visible=False), gr.update(visible=False)
279
- elif backend_choice in ["vlm-http-client"]:
280
- return gr.update(visible=True), gr.update(visible=False)
281
- elif backend_choice in ["pipeline"]:
282
- return gr.update(visible=False), gr.update(visible=True)
283
- else:
284
- pass
285
-
286
-
287
199
  @click.command(context_settings=dict(ignore_unknown_options=True, allow_extra_args=True))
288
200
  @click.pass_context
289
201
  @click.option(
@@ -295,17 +207,10 @@ def update_interface(backend_choice):
295
207
  default=True,
296
208
  )
297
209
  @click.option(
298
- '--enable-vllm-engine',
299
- 'vllm_engine_enable',
300
- type=bool,
301
- help="Enable vLLM engine backend for faster processing.",
302
- default=False,
303
- )
304
- @click.option(
305
- '--enable-lmdeploy-engine',
306
- 'lmdeploy_engine_enable',
210
+ '--enable-http-client',
211
+ 'http_client_enable',
307
212
  type=bool,
308
- help="Enable LMDeploy engine backend for faster processing.",
213
+ help="Enable http-client backend to link openai-compatible servers.",
309
214
  default=False,
310
215
  )
311
216
  @click.option(
@@ -345,10 +250,125 @@ def update_interface(backend_choice):
345
250
  default='all',
346
251
  )
347
252
  def main(ctx,
348
- example_enable, vllm_engine_enable, lmdeploy_engine_enable, api_enable, max_convert_pages,
253
+ example_enable,
254
+ http_client_enable,
255
+ api_enable, max_convert_pages,
349
256
  server_name, server_port, latex_delimiters_type, **kwargs
350
257
  ):
351
258
 
259
+ # 创建 i18n 实例,支持中英文
260
+ i18n = gr.I18n(
261
+ en={
262
+ "upload_file": "Please upload a PDF or image",
263
+ "max_pages": "Max convert pages",
264
+ "backend": "Backend",
265
+ "server_url": "Server URL",
266
+ "server_url_info": "OpenAI-compatible server URL for http-client backend.",
267
+ "recognition_options": "**Recognition Options:**",
268
+ "table_enable": "Enable table recognition",
269
+ "table_info": "If disabled, tables will be shown as images.",
270
+ "formula_label_vlm": "Enable display formula recognition",
271
+ "formula_label_pipeline": "Enable formula recognition",
272
+ "formula_label_hybrid": "Enable inline formula recognition",
273
+ "formula_info_vlm": "If disabled, display formulas will be shown as images.",
274
+ "formula_info_pipeline": "If disabled, display formulas will be shown as images, and inline formulas will not be detected or parsed.",
275
+ "formula_info_hybrid": "If disabled, inline formulas will not be detected or parsed.",
276
+ "ocr_language": "OCR Language",
277
+ "ocr_language_info": "Select the OCR language for image-based PDFs and images.",
278
+ "force_ocr": "Force enable OCR",
279
+ "force_ocr_info": "Enable only if the result is extremely poor. Requires correct OCR language.",
280
+ "convert": "Convert",
281
+ "clear": "Clear",
282
+ "pdf_preview": "PDF preview",
283
+ "examples": "Examples:",
284
+ "convert_result": "Convert result",
285
+ "md_rendering": "Markdown rendering",
286
+ "md_text": "Markdown text",
287
+ "backend_info_vlm": "High-precision parsing via VLM, supports Chinese and English documents only.",
288
+ "backend_info_pipeline": "Traditional Multi-model pipeline parsing, supports multiple languages, hallucination-free.",
289
+ "backend_info_hybrid": "High-precision hybrid parsing, supports multiple languages.",
290
+ "backend_info_default": "Select the backend engine for document parsing.",
291
+ },
292
+ zh={
293
+ "upload_file": "请上传 PDF 或图片",
294
+ "max_pages": "最大转换页数",
295
+ "backend": "解析后端",
296
+ "server_url": "服务器地址",
297
+ "server_url_info": "http-client 后端的 OpenAI 兼容服务器地址。",
298
+ "recognition_options": "**识别选项:**",
299
+ "table_enable": "启用表格识别",
300
+ "table_info": "禁用后,表格将显示为图片。",
301
+ "formula_label_vlm": "启用行间公式识别",
302
+ "formula_label_pipeline": "启用公式识别",
303
+ "formula_label_hybrid": "启用行内公式识别",
304
+ "formula_info_vlm": "禁用后,行间公式将显示为图片。",
305
+ "formula_info_pipeline": "禁用后,行间公式将显示为图片,行内公式将不会被检测或解析。",
306
+ "formula_info_hybrid": "禁用后,行内公式将不会被检测或解析。",
307
+ "ocr_language": "OCR 语言",
308
+ "ocr_language_info": "为扫描版 PDF 和图片选择 OCR 语言。",
309
+ "force_ocr": "强制启用 OCR",
310
+ "force_ocr_info": "仅在识别效果极差时启用,需选择正确的 OCR 语言。",
311
+ "convert": "转换",
312
+ "clear": "清除",
313
+ "pdf_preview": "PDF 预览",
314
+ "examples": "示例:",
315
+ "convert_result": "转换结果",
316
+ "md_rendering": "Markdown 渲染",
317
+ "md_text": "Markdown 文本",
318
+ "backend_info_vlm": "多模态大模型高精度解析,仅支持中英文文档。",
319
+ "backend_info_pipeline": "传统多模型管道解析,支持多语言,无幻觉。",
320
+ "backend_info_hybrid": "高精度混合解析,支持多语言。",
321
+ "backend_info_default": "选择文档解析的后端引擎。",
322
+ },
323
+ )
324
+
325
+ # 根据后端类型获取公式识别标签(闭包函数以支持 i18n)
326
+ def get_formula_label(backend_choice):
327
+ if backend_choice.startswith("vlm"):
328
+ return i18n("formula_label_vlm")
329
+ elif backend_choice == "pipeline":
330
+ return i18n("formula_label_pipeline")
331
+ elif backend_choice.startswith("hybrid"):
332
+ return i18n("formula_label_hybrid")
333
+ else:
334
+ return i18n("formula_label_pipeline")
335
+
336
+ def get_formula_info(backend_choice):
337
+ if backend_choice.startswith("vlm"):
338
+ return i18n("formula_info_vlm")
339
+ elif backend_choice == "pipeline":
340
+ return i18n("formula_info_pipeline")
341
+ elif backend_choice.startswith("hybrid"):
342
+ return i18n("formula_info_hybrid")
343
+ else:
344
+ return ""
345
+
346
+ def get_backend_info(backend_choice):
347
+ if backend_choice.startswith("vlm"):
348
+ return i18n("backend_info_vlm")
349
+ elif backend_choice == "pipeline":
350
+ return i18n("backend_info_pipeline")
351
+ elif backend_choice.startswith("hybrid"):
352
+ return i18n("backend_info_hybrid")
353
+ else:
354
+ return i18n("backend_info_default")
355
+
356
+ # 更新界面函数
357
+ def update_interface(backend_choice):
358
+ formula_label_update = gr.update(label=get_formula_label(backend_choice), info=get_formula_info(backend_choice))
359
+ backend_info_update = gr.update(info=get_backend_info(backend_choice))
360
+ if "http-client" in backend_choice:
361
+ client_options_update = gr.update(visible=True)
362
+ else:
363
+ client_options_update = gr.update(visible=False)
364
+ if "vlm" in backend_choice:
365
+ ocr_options_update = gr.update(visible=False)
366
+ else:
367
+ ocr_options_update = gr.update(visible=True)
368
+
369
+ return client_options_update, ocr_options_update, formula_label_update, backend_info_update
370
+
371
+
352
372
  kwargs.update(arg_parse(ctx))
353
373
 
354
374
  if latex_delimiters_type == 'a':
@@ -360,120 +380,126 @@ def main(ctx,
360
380
  else:
361
381
  raise ValueError(f"Invalid latex delimiters type: {latex_delimiters_type}.")
362
382
 
363
- if vllm_engine_enable:
364
- try:
365
- print("Start init vLLM engine...")
366
- from mineru.backend.vlm.vlm_analyze import ModelSingleton
367
- model_singleton = ModelSingleton()
368
- predictor = model_singleton.get_model(
369
- "vllm-async-engine",
370
- None,
371
- None,
372
- **kwargs
373
- )
374
- print("vLLM engine init successfully.")
375
- except Exception as e:
376
- logger.exception(e)
377
- elif lmdeploy_engine_enable:
383
+ vlm_engine = get_vlm_engine("auto", is_async=True)
384
+ if vlm_engine in ["transformers", "mlx-engine"]:
385
+ http_client_enable = True
386
+ else:
378
387
  try:
379
- print("Start init LMDeploy engine...")
388
+ logger.info(f"Start init {vlm_engine}...")
380
389
  from mineru.backend.vlm.vlm_analyze import ModelSingleton
381
390
  model_singleton = ModelSingleton()
382
391
  predictor = model_singleton.get_model(
383
- "lmdeploy-engine",
392
+ vlm_engine,
384
393
  None,
385
394
  None,
386
395
  **kwargs
387
396
  )
388
- print("LMDeploy engine init successfully.")
397
+ logger.info(f"{vlm_engine} init successfully.")
389
398
  except Exception as e:
390
399
  logger.exception(e)
400
+
391
401
  suffixes = [f".{suffix}" for suffix in pdf_suffixes + image_suffixes]
392
402
  with gr.Blocks() as demo:
393
403
  gr.HTML(header)
394
404
  with gr.Row():
395
405
  with gr.Column(variant='panel', scale=5):
396
406
  with gr.Row():
397
- input_file = gr.File(label='Please upload a PDF or image', file_types=suffixes)
407
+ input_file = gr.File(label=i18n("upload_file"), file_types=suffixes)
398
408
  with gr.Row():
399
- max_pages = gr.Slider(1, max_convert_pages, int(max_convert_pages/2), step=1, label='Max convert pages')
409
+ max_pages = gr.Slider(1, max_convert_pages, max_convert_pages, step=1, label=i18n("max_pages"))
400
410
  with gr.Row():
401
- if vllm_engine_enable:
402
- drop_list = ["pipeline", "vlm-vllm-async-engine"]
403
- preferred_option = "vlm-vllm-async-engine"
404
- elif lmdeploy_engine_enable:
405
- drop_list = ["pipeline", "vlm-lmdeploy-engine"]
406
- preferred_option = "vlm-lmdeploy-engine"
407
- else:
408
- drop_list = ["pipeline", "vlm-transformers", "vlm-http-client"]
409
- if is_mac_os_version_supported():
410
- drop_list.append("vlm-mlx-engine")
411
- preferred_option = "pipeline"
412
- backend = gr.Dropdown(drop_list, label="Backend", value=preferred_option)
411
+ drop_list = ["pipeline", "vlm-auto-engine", "hybrid-auto-engine"]
412
+ preferred_option = "hybrid-auto-engine"
413
+ if http_client_enable:
414
+ drop_list.extend(["vlm-http-client", "hybrid-http-client"])
415
+ backend = gr.Dropdown(drop_list, label=i18n("backend"), value=preferred_option, info=get_backend_info(preferred_option))
413
416
  with gr.Row(visible=False) as client_options:
414
- url = gr.Textbox(label='Server URL', value='http://localhost:30000', placeholder='http://localhost:30000')
417
+ url = gr.Textbox(label=i18n("server_url"), value='http://localhost:30000', placeholder='http://localhost:30000', info=i18n("server_url_info"))
415
418
  with gr.Row(equal_height=True):
416
419
  with gr.Column():
417
- gr.Markdown("**Recognition Options:**")
418
- formula_enable = gr.Checkbox(label='Enable formula recognition', value=True)
419
- table_enable = gr.Checkbox(label='Enable table recognition', value=True)
420
+ gr.Markdown(i18n("recognition_options"))
421
+ table_enable = gr.Checkbox(label=i18n("table_enable"), value=True, info=i18n("table_info"))
422
+ formula_enable = gr.Checkbox(label=get_formula_label(preferred_option), value=True, info=get_formula_info(preferred_option))
420
423
  with gr.Column(visible=False) as ocr_options:
421
- language = gr.Dropdown(all_lang, label='Language', value='ch')
422
- is_ocr = gr.Checkbox(label='Force enable OCR', value=False)
424
+ language = gr.Dropdown(all_lang, label=i18n("ocr_language"), value='ch (Chinese, English, Chinese Traditional)', info=i18n("ocr_language_info"))
425
+ is_ocr = gr.Checkbox(label=i18n("force_ocr"), value=False, info=i18n("force_ocr_info"))
423
426
  with gr.Row():
424
- change_bu = gr.Button('Convert')
425
- clear_bu = gr.ClearButton(value='Clear')
426
- pdf_show = PDF(label='PDF preview', interactive=False, visible=True, height=800)
427
+ change_bu = gr.Button(i18n("convert"))
428
+ clear_bu = gr.ClearButton(value=i18n("clear"))
429
+ pdf_show = PDF(label=i18n("pdf_preview"), interactive=False, visible=True, height=800)
427
430
  if example_enable:
428
431
  example_root = os.path.join(os.getcwd(), 'examples')
429
432
  if os.path.exists(example_root):
430
- with gr.Accordion('Examples:'):
431
- gr.Examples(
432
- examples=[os.path.join(example_root, _) for _ in os.listdir(example_root) if
433
- _.endswith(tuple(suffixes))],
434
- inputs=input_file
435
- )
433
+ gr.Examples(
434
+ label=i18n("examples"),
435
+ examples=[os.path.join(example_root, _) for _ in os.listdir(example_root) if
436
+ _.endswith(tuple(suffixes))],
437
+ inputs=input_file
438
+ )
436
439
 
437
440
  with gr.Column(variant='panel', scale=5):
438
- output_file = gr.File(label='convert result', interactive=False)
441
+ output_file = gr.File(label=i18n("convert_result"), interactive=False)
439
442
  with gr.Tabs():
440
- with gr.Tab('Markdown rendering'):
441
- md = gr.Markdown(label='Markdown rendering', height=1100, show_copy_button=True,
442
- latex_delimiters=latex_delimiters,
443
- line_breaks=True)
444
- with gr.Tab('Markdown text'):
445
- md_text = gr.TextArea(lines=45, show_copy_button=True)
443
+ with gr.Tab(i18n("md_rendering")):
444
+ md = gr.Markdown(
445
+ label=i18n("md_rendering"),
446
+ height=1200,
447
+ # buttons=["copy"], # gradio 6 以上版本使用
448
+ show_copy_button=True, # gradio 6 以下版本使用
449
+ latex_delimiters=latex_delimiters,
450
+ line_breaks=True
451
+ )
452
+ with gr.Tab(i18n("md_text")):
453
+ md_text = gr.TextArea(
454
+ lines=45,
455
+ # buttons=["copy"], # gradio 6 以上版本使用
456
+ show_copy_button=True, # gradio 6 以下版本使用
457
+ label=i18n("md_text")
458
+ )
446
459
 
447
460
  # 添加事件处理
448
461
  backend.change(
449
462
  fn=update_interface,
450
463
  inputs=[backend],
451
- outputs=[client_options, ocr_options],
452
- api_name=False
464
+ outputs=[client_options, ocr_options, formula_enable, backend],
465
+ # api_visibility="private" # gradio 6 以上版本使用
466
+ api_name=False # gradio 6 以下版本使用
453
467
  )
454
468
  # 添加demo.load事件,在页面加载时触发一次界面更新
455
469
  demo.load(
456
470
  fn=update_interface,
457
471
  inputs=[backend],
458
- outputs=[client_options, ocr_options],
459
- api_name=False
472
+ outputs=[client_options, ocr_options, formula_enable, backend],
473
+ # api_visibility="private" # gradio 6 以上版本使用
474
+ api_name=False # gradio 6 以下版本使用
460
475
  )
461
476
  clear_bu.add([input_file, md, pdf_show, md_text, output_file, is_ocr])
462
477
 
463
- if api_enable:
464
- api_name = None
465
- else:
466
- api_name = False
467
-
468
- input_file.change(fn=to_pdf, inputs=input_file, outputs=pdf_show, api_name=api_name)
478
+ input_file.change(
479
+ fn=to_pdf,
480
+ inputs=input_file,
481
+ outputs=pdf_show,
482
+ api_name="to_pdf" if api_enable else False, # gradio 6 以下版本使用
483
+ # api_visibility="public" if api_enable else "private" # gradio 6 以上版本使用
484
+ )
469
485
  change_bu.click(
470
486
  fn=to_markdown,
471
487
  inputs=[input_file, max_pages, is_ocr, formula_enable, table_enable, language, backend, url],
472
488
  outputs=[md, md_text, output_file, pdf_show],
473
- api_name=api_name
489
+ api_name="to_markdown" if api_enable else False, # gradio 6 以下版本使用
490
+ # api_visibility="public" if api_enable else "private" # gradio 6 以上版本使用
474
491
  )
475
492
 
476
- demo.launch(server_name=server_name, server_port=server_port, show_api=api_enable)
493
+ footer_links = ["gradio", "settings"]
494
+ if api_enable:
495
+ footer_links.append("api")
496
+ demo.launch(
497
+ server_name=server_name,
498
+ server_port=server_port,
499
+ # footer_links=footer_links, # gradio 6 以上版本使用
500
+ show_api=api_enable, # gradio 6 以下版本使用
501
+ i18n=i18n
502
+ )
477
503
 
478
504
 
479
505
  if __name__ == '__main__':
@@ -27,31 +27,37 @@ class YOLOv8MFDModel:
27
27
  def _run_predict(
28
28
  self,
29
29
  inputs: Union[np.ndarray, Image.Image, List],
30
- is_batch: bool = False
30
+ is_batch: bool = False,
31
+ conf: float = None,
31
32
  ) -> List:
32
33
  preds = self.model.predict(
33
34
  inputs,
34
35
  imgsz=self.imgsz,
35
- conf=self.conf,
36
+ conf=conf if conf is not None else self.conf,
36
37
  iou=self.iou,
37
38
  verbose=False,
38
39
  device=self.device
39
40
  )
40
41
  return [pred.cpu() for pred in preds] if is_batch else preds[0].cpu()
41
42
 
42
- def predict(self, image: Union[np.ndarray, Image.Image]):
43
- return self._run_predict(image)
43
+ def predict(
44
+ self,
45
+ image: Union[np.ndarray, Image.Image],
46
+ conf: float = None,
47
+ ):
48
+ return self._run_predict(image, is_batch=False, conf=conf)
44
49
 
45
50
  def batch_predict(
46
51
  self,
47
52
  images: List[Union[np.ndarray, Image.Image]],
48
- batch_size: int = 4
53
+ batch_size: int = 4,
54
+ conf: float = None,
49
55
  ) -> List:
50
56
  results = []
51
57
  with tqdm(total=len(images), desc="MFD Predict") as pbar:
52
58
  for idx in range(0, len(images), batch_size):
53
59
  batch = images[idx: idx + batch_size]
54
- batch_preds = self._run_predict(batch, is_batch=True)
60
+ batch_preds = self._run_predict(batch, is_batch=True, conf=conf)
55
61
  results.extend(batch_preds)
56
62
  pbar.update(len(batch))
57
63
  return results