openocr-python 0.0.9__py3-none-any.whl → 0.1.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. openocr/__init__.py +35 -1
  2. openocr/configs/dataset/rec/evaluation.yaml +41 -0
  3. openocr/configs/dataset/rec/ltb.yaml +9 -0
  4. openocr/configs/dataset/rec/mjsynth.yaml +11 -0
  5. openocr/configs/dataset/rec/openvino.yaml +25 -0
  6. openocr/configs/dataset/rec/ost.yaml +17 -0
  7. openocr/configs/dataset/rec/synthtext.yaml +7 -0
  8. openocr/configs/dataset/rec/test.yaml +77 -0
  9. openocr/configs/dataset/rec/textocr.yaml +13 -0
  10. openocr/configs/dataset/rec/textocr_horizontal.yaml +13 -0
  11. openocr/configs/dataset/rec/union14m_b.yaml +47 -0
  12. openocr/configs/dataset/rec/union14m_l_filtered.yaml +35 -0
  13. openocr/configs/rec/cmer/cmer.yml +127 -0
  14. openocr/configs/rec/mdiff4str/svtrv2_mdiffdecoder_base.yml +152 -0
  15. openocr/configs/rec/mdiff4str/svtrv2_mdiffdecoder_small.yml +152 -0
  16. openocr/configs/rec/unirec/focalsvtr_ardecoder_unirec.yml +114 -0
  17. openocr/configs/rec/unirec/opendoc_pipeline.yml +105 -0
  18. openocr/demo_gradio.py +28 -8
  19. openocr/demo_opendoc.py +572 -0
  20. openocr/demo_unirec.py +392 -0
  21. openocr/opendet/losses/__init__.py +5 -7
  22. openocr/opendet/preprocess/crop_resize.py +2 -1
  23. openocr/openocr.py +685 -0
  24. openocr/openrec/losses/__init__.py +8 -3
  25. openocr/openrec/losses/cmer_loss.py +12 -0
  26. openocr/openrec/losses/mdiff_loss.py +11 -0
  27. openocr/openrec/losses/unirec_loss.py +12 -0
  28. openocr/openrec/metrics/__init__.py +4 -1
  29. openocr/openrec/metrics/rec_metric_cmer.py +328 -0
  30. openocr/openrec/modeling/cmer_modeling/modeling_cmer.py +643 -0
  31. openocr/openrec/modeling/decoders/__init__.py +1 -0
  32. openocr/openrec/modeling/decoders/ctc_decoder.py +1 -1
  33. openocr/openrec/modeling/decoders/dan_decoder.py +4 -4
  34. openocr/openrec/modeling/decoders/dptr_parseq_clip_b_decoder.py +1563 -1398
  35. openocr/openrec/modeling/decoders/mdiff_decoder.py +587 -0
  36. openocr/openrec/modeling/decoders/smtr_decoder.py +99 -48
  37. openocr/openrec/modeling/unirec_modeling/configuration_unirec.py +166 -0
  38. openocr/openrec/modeling/unirec_modeling/modeling_unirec.py +433 -0
  39. openocr/openrec/optimizer/__init__.py +4 -3
  40. openocr/openrec/optimizer/lr.py +49 -0
  41. openocr/openrec/postprocess/__init__.py +2 -0
  42. openocr/openrec/postprocess/abinet_postprocess.py +1 -1
  43. openocr/openrec/postprocess/ar_postprocess.py +1 -1
  44. openocr/openrec/postprocess/cmer_postprocess.py +86 -0
  45. openocr/openrec/postprocess/cppd_postprocess.py +1 -1
  46. openocr/openrec/postprocess/igtr_postprocess.py +1 -1
  47. openocr/openrec/postprocess/lister_postprocess.py +1 -1
  48. openocr/openrec/postprocess/mgp_postprocess.py +1 -1
  49. openocr/openrec/postprocess/nrtr_postprocess.py +2 -2
  50. openocr/openrec/postprocess/smtr_postprocess.py +1 -1
  51. openocr/openrec/postprocess/srn_postprocess.py +1 -1
  52. openocr/openrec/postprocess/unirec_postprocess.py +58 -0
  53. openocr/openrec/postprocess/visionlan_postprocess.py +1 -1
  54. openocr/openrec/preprocess/__init__.py +5 -0
  55. openocr/openrec/preprocess/ce_label_encode.py +1 -1
  56. openocr/openrec/preprocess/cmer_label_encode.py +1025 -0
  57. openocr/openrec/preprocess/ctc_label_encode.py +1 -1
  58. openocr/openrec/preprocess/dptr_label_encode.py +177 -157
  59. openocr/openrec/preprocess/igtr_label_encode.py +4 -2
  60. openocr/openrec/preprocess/mdiff_label_encode.py +312 -0
  61. openocr/openrec/preprocess/rec_aug.py +128 -2
  62. openocr/openrec/preprocess/resize.py +57 -0
  63. openocr/openrec/preprocess/unirec_label_encode.py +62 -0
  64. openocr/tools/data/__init__.py +78 -55
  65. openocr/tools/data/cmer_web_dataset.py +310 -0
  66. openocr/tools/data/native_size_dataset.py +753 -0
  67. openocr/tools/data/native_size_sampler.py +158 -0
  68. openocr/tools/data/ratio_dataset_tvresize.py +2 -0
  69. openocr/tools/data/ratio_sampler.py +2 -1
  70. openocr/tools/download/download_dataset.py +38 -0
  71. openocr/tools/download/utils.py +28 -0
  72. openocr/tools/download_example_images.py +236 -0
  73. openocr/tools/engine/trainer.py +155 -39
  74. openocr/tools/eval_rec_all_ch.py +2 -2
  75. openocr/tools/infer_det.py +20 -2
  76. openocr/tools/infer_doc.py +898 -0
  77. openocr/tools/infer_doc_onnx.py +1172 -0
  78. openocr/tools/infer_e2e.py +27 -10
  79. openocr/tools/infer_rec.py +64 -15
  80. openocr/tools/infer_unirec_onnx.py +730 -0
  81. openocr/tools/to_markdown.py +468 -0
  82. openocr/tools/utils/ckpt.py +17 -5
  83. openocr/tools/utils/opendoc_onnx_utils/utils.py +1052 -0
  84. openocr_python-0.1.0.dev0.dist-info/METADATA +324 -0
  85. {openocr_python-0.0.9.dist-info → openocr_python-0.1.0.dev0.dist-info}/RECORD +89 -45
  86. {openocr_python-0.0.9.dist-info → openocr_python-0.1.0.dev0.dist-info}/WHEEL +1 -1
  87. openocr_python-0.1.0.dev0.dist-info/entry_points.txt +2 -0
  88. openocr_python-0.0.9.dist-info/METADATA +0 -149
  89. /openocr_python-0.0.9.dist-info/LICENCE → /openocr_python-0.1.0.dev0.dist-info/licenses/LICENSE +0 -0
  90. {openocr_python-0.0.9.dist-info → openocr_python-0.1.0.dev0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,572 @@
1
+ import os
2
+ import uuid
3
+ import shutil
4
+ import re
5
+ import base64
6
+ import argparse
7
+ import gradio as gr
8
+ from PIL import Image
9
+
10
+ from tools.infer_doc_onnx import OpenDocONNX
11
+ from tools.utils.logging import get_logger
12
+ from tools.download_example_images import get_example_images_path
13
+
14
+ logger = get_logger(name='opendoc_gradio')
15
+
16
+ # Initialize the pipeline
17
+ pipeline: OpenDocONNX | None = None
18
+
19
+
20
+ def get_pipeline(
21
+ layout_model_path=None,
22
+ unirec_encoder_path=None,
23
+ unirec_decoder_path=None,
24
+ tokenizer_mapping_path=None,
25
+ use_gpu=None,
26
+ auto_download=True
27
+ ) -> OpenDocONNX:
28
+ """获取或初始化OpenDocONNX流水线
29
+
30
+ Args:
31
+ layout_model_path: 版面检测ONNX模型路径
32
+ unirec_encoder_path: UniRec编码器ONNX模型路径
33
+ unirec_decoder_path: UniRec解码器ONNX模型路径
34
+ tokenizer_mapping_path: Tokenizer映射文件路径
35
+ use_gpu: Whether to use GPU. If None, auto-detect.
36
+ auto_download: If True, automatically download missing models
37
+
38
+ Returns:
39
+ OpenDocONNX: 初始化好的OpenDocONNX实例
40
+ """
41
+ global pipeline
42
+ if pipeline is None:
43
+ gpu_info = 'GPU (auto-detect)' if use_gpu is None else ('GPU' if use_gpu else 'CPU')
44
+ logger.info(f'Initializing OpenDoc ONNX pipeline on {gpu_info}...')
45
+ pipeline = OpenDocONNX(
46
+ layout_model_path=layout_model_path,
47
+ unirec_encoder_path=unirec_encoder_path,
48
+ unirec_decoder_path=unirec_decoder_path,
49
+ tokenizer_mapping_path=tokenizer_mapping_path,
50
+ use_gpu=use_gpu,
51
+ auto_download=auto_download
52
+ )
53
+ return pipeline
54
+
55
+
56
+ # Ensure pipeline is initialized (will be done on first request)
57
+ current_pipeline = None
58
+
59
+
60
+ def process_image(
61
+ image_path: str | None
62
+ ) -> tuple[Image.Image | None, str, str, str | None, str, str]:
63
+ """处理图片并进行OCR识别
64
+
65
+ Args:
66
+ image_path: 图片文件路径,None表示无图片
67
+
68
+ Returns:
69
+ tuple: (可视化图片, Markdown内容(base64图片), JSON内容, ZIP文件路径, 原始Markdown, Markdown内容(base64图片))
70
+ """
71
+ global current_pipeline
72
+
73
+ if image_path is None:
74
+ return None, '', '', None, '', ''
75
+
76
+ # Initialize pipeline on first use
77
+ if current_pipeline is None:
78
+ current_pipeline = get_pipeline()
79
+
80
+ # Get original image name
81
+ base_name = os.path.splitext(os.path.basename(image_path))[0]
82
+ file_ext = os.path.splitext(image_path)[1] or '.jpg'
83
+
84
+ # Create a directory with image name for this request
85
+ output_base_dir = 'gradio_outputs'
86
+ os.makedirs(output_base_dir, exist_ok=True)
87
+
88
+ # Add timestamp to avoid conflicts if same filename is uploaded multiple times
89
+ timestamp = str(uuid.uuid4())[:8]
90
+ folder_name = f'{base_name}_{timestamp}'
91
+ tmp_dir = os.path.join(output_base_dir, folder_name)
92
+ os.makedirs(tmp_dir, exist_ok=True)
93
+
94
+ try:
95
+ # Copy and rename the input image
96
+ tmp_img_path = os.path.join(tmp_dir, f'{base_name}{file_ext}')
97
+ image = Image.open(image_path)
98
+ image.save(tmp_img_path)
99
+
100
+ # Predict
101
+ result = current_pipeline(
102
+ img_path=tmp_img_path,
103
+ merge_layout_blocks=True
104
+ )
105
+ logger.info(f'Pipeline result type: {type(result)}, has content: {bool(result)}')
106
+ if result:
107
+ logger.info(f'Result keys: {result.keys()}')
108
+ if 'recognition_results' in result:
109
+ logger.info(f'Recognition results count: {len(result["recognition_results"])}')
110
+
111
+ if not result:
112
+ logger.warning('Pipeline returned empty result')
113
+ return None, 'No results found.', '', None, '', ''
114
+
115
+ # Save results
116
+ logger.info(f'Saving results to: {tmp_dir}')
117
+ current_pipeline.save_visualization(result, tmp_dir)
118
+ logger.info('Visualization saved')
119
+ current_pipeline.save_to_markdown(result, tmp_dir)
120
+ logger.info('Markdown saved')
121
+ current_pipeline.save_to_json(result, tmp_dir)
122
+ logger.info('JSON saved')
123
+
124
+ # The save methods create a subdirectory with the image name
125
+ # Find the actual output directory
126
+ actual_output_dir = None
127
+ for item in os.listdir(tmp_dir):
128
+ item_path = os.path.join(tmp_dir, item)
129
+ if os.path.isdir(item_path):
130
+ actual_output_dir = item_path
131
+ break
132
+
133
+ if actual_output_dir is None:
134
+ # Fallback to tmp_dir if no subdirectory found
135
+ actual_output_dir = tmp_dir
136
+
137
+ logger.info(f'Actual output directory: {actual_output_dir}')
138
+ logger.info(f'Files in output dir: {os.listdir(actual_output_dir)}')
139
+
140
+ # Find the saved files
141
+ vis_img = None
142
+ for f in os.listdir(actual_output_dir):
143
+ if f.endswith('_vis.jpg'):
144
+ vis_img_path = os.path.join(actual_output_dir, f)
145
+ vis_img = Image.open(vis_img_path)
146
+ logger.info(f'Found visualization image: {vis_img_path}')
147
+ break
148
+
149
+ if vis_img is None:
150
+ logger.warning('No visualization image found')
151
+
152
+ markdown_content = ''
153
+ md_file_path = None
154
+ for f in os.listdir(actual_output_dir):
155
+ if f.endswith('.md'):
156
+ md_file_path = os.path.join(actual_output_dir, f)
157
+ with open(md_file_path, 'r', encoding='utf-8') as file:
158
+ markdown_content = file.read()
159
+ logger.info(f'Found markdown file: {md_file_path}, length: {len(markdown_content)}')
160
+ break
161
+
162
+ if not markdown_content:
163
+ logger.warning('No markdown content found')
164
+
165
+ # Convert relative image paths to base64 for proper display in Gradio
166
+ if markdown_content:
167
+
168
+ def replace_img_with_base64(match):
169
+ img_path = match.group(1)
170
+ full_img_path = os.path.join(actual_output_dir, img_path)
171
+
172
+ if os.path.exists(full_img_path):
173
+ try:
174
+ with open(full_img_path, 'rb') as img_file:
175
+ img_data = base64.b64encode(
176
+ img_file.read()).decode('utf-8')
177
+ # Determine image format
178
+ ext = os.path.splitext(full_img_path)[1].lower()
179
+ mime_type = 'image/jpeg' if ext in [
180
+ '.jpg', '.jpeg'
181
+ ] else 'image/png'
182
+ # Replace src with base64 data URL
183
+ return match.group(0).replace(
184
+ f'src=\"{img_path}\"',
185
+ f'src=\"data:{mime_type};base64,{img_data}\"')
186
+ except Exception as e:
187
+ logger.warning(
188
+ f'Failed to convert image {img_path} to base64: {e}')
189
+ return match.group(0)
190
+
191
+ # Find all img tags and replace their src
192
+ markdown_content_show = re.sub(r'<img[^>]*src=\"([^\"]+)\"[^>]*>',
193
+ replace_img_with_base64,
194
+ markdown_content)
195
+ else:
196
+ markdown_content_show = markdown_content
197
+
198
+ json_content = ''
199
+ json_file_path = None
200
+ for f in os.listdir(actual_output_dir):
201
+ if f.endswith('.json'):
202
+ json_file_path = os.path.join(actual_output_dir, f)
203
+ with open(json_file_path, 'r', encoding='utf-8') as file:
204
+ json_content = file.read()
205
+ break
206
+ # Prepare all files in tmp_dir for download by creating a zip archive
207
+ zip_path = os.path.join(output_base_dir, f'{folder_name}.zip')
208
+ _ = shutil.make_archive(zip_path.replace('.zip', ''), 'zip', tmp_dir)
209
+
210
+ return vis_img, markdown_content_show, json_content, zip_path, markdown_content
211
+
212
+ except Exception as e:
213
+ logger.error(f'Prediction error: {str(e)}')
214
+ return None, f'Error during prediction: {str(e)}', '', None, '', ''
215
+
216
+
217
+ # Custom CSS with adaptive colors
218
+ custom_css = """
219
+ body, .gradio-container {
220
+ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", "Noto Sans", Helvetica, Arial, sans-serif;
221
+ }
222
+ .app-header {
223
+ text-align: center;
224
+ max-width: 1200px;
225
+ margin: 20px auto !important;
226
+ padding: 20px;
227
+ }
228
+ .app-header h1 {
229
+ font-size: 2.5em;
230
+ font-weight: 700;
231
+ margin-bottom: 10px;
232
+ }
233
+ .app-header p {
234
+ font-size: 1.1em;
235
+ opacity: 0.7;
236
+ line-height: 1.6;
237
+ }
238
+ .quick-links {
239
+ text-align: center;
240
+ padding: 12px 0;
241
+ border: 1px solid var(--border-color-primary);
242
+ border-radius: 12px;
243
+ margin: 16px auto;
244
+ max-width: 1200px;
245
+ background: var(--background-fill-secondary);
246
+ }
247
+ .quick-links a {
248
+ margin: 0 16px;
249
+ font-size: 15px;
250
+ font-weight: 600;
251
+ color: var(--link-text-color);
252
+ text-decoration: none;
253
+ transition: all 0.3s ease;
254
+ }
255
+ .quick-links a:hover {
256
+ opacity: 0.8;
257
+ text-decoration: underline;
258
+ }
259
+ .upload-section {
260
+ border: 2px dashed var(--border-color-primary);
261
+ border-radius: 12px;
262
+ padding: 20px;
263
+ background: var(--background-fill-secondary);
264
+ transition: all 0.3s ease;
265
+ }
266
+ .upload-section:hover {
267
+ border-color: var(--color-accent);
268
+ background: var(--background-fill-primary);
269
+ }
270
+ #vis_output {
271
+ min-height: 400px;
272
+ border-radius: 12px;
273
+ overflow: hidden;
274
+ }
275
+ #md_preview {
276
+ max-height: 600px;
277
+ min-height: 200px;
278
+ overflow: auto;
279
+ padding: 20px;
280
+ background: var(--background-fill-primary);
281
+ border-radius: 12px;
282
+ box-shadow: var(--shadow-drop);
283
+ }
284
+ #md_preview img {
285
+ display: block;
286
+ margin: 16px auto;
287
+ max-width: 100%;
288
+ height: auto;
289
+ border-radius: 8px;
290
+ }
291
+ .notice {
292
+ margin: 20px auto;
293
+ max-width: 1200px;
294
+ padding: 16px 20px;
295
+ border-left: 4px solid var(--color-accent);
296
+ border-radius: 8px;
297
+ background: var(--background-fill-secondary);
298
+ font-size: 14px;
299
+ line-height: 1.8;
300
+ }
301
+ .notice strong {
302
+ font-weight: 700;
303
+ color: var(--color-accent);
304
+ }
305
+ .notice ul {
306
+ margin-top: 8px;
307
+ padding-left: 20px;
308
+ }
309
+ .notice li {
310
+ margin: 8px 0;
311
+ }
312
+ .gradio-button-primary {
313
+ font-weight: 600 !important;
314
+ transition: all 0.3s ease !important;
315
+ }
316
+ .gradio-button-primary:hover {
317
+ transform: translateY(-2px);
318
+ box-shadow: var(--shadow-drop-lg) !important;
319
+ }
320
+ """
321
+
322
+ # LaTeX delimiters for formula rendering
323
+ LATEX_DELIMS = [
324
+ {
325
+ 'left': '$$',
326
+ 'right': '$$',
327
+ 'display': True
328
+ },
329
+ {
330
+ 'left': '$',
331
+ 'right': '$',
332
+ 'display': False
333
+ },
334
+ {
335
+ 'left': '\\(',
336
+ 'right': '\\)',
337
+ 'display': False
338
+ },
339
+ {
340
+ 'left': '\\[',
341
+ 'right': '\\]',
342
+ 'display': True
343
+ },
344
+ ]
345
+
346
+
347
+ # Define the Gradio Interface
348
+ def create_demo() -> gr.Blocks:
349
+ """创建Gradio演示界面
350
+
351
+ Returns:
352
+ gr.Blocks: Gradio Blocks应用实例
353
+ """
354
+ # Get example images path and download if necessary
355
+ example_img_dir = get_example_images_path(demo_type='doc')
356
+
357
+ # Get list of example images
358
+ example_images = []
359
+ if os.path.exists(example_img_dir):
360
+ for file in os.listdir(example_img_dir):
361
+ if file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
362
+ example_images.append(os.path.join(example_img_dir, file))
363
+ example_images = sorted(example_images)
364
+
365
+ with gr.Blocks(css=custom_css,
366
+ theme=gr.themes.Soft(),
367
+ title='OpenDoc-0.1B Demo') as demo:
368
+ # Header
369
+ gr.HTML("""
370
+ <div class="app-header">
371
+ <h1>🚀 OpenDoc-0.1B</h1>
372
+ <p>Ultra-Lightweight Document Parsing System with 0.1B Parameters (built by <a href="https://github.com/Topdu/OpenOCR">OCR Team</a>, <a href="https://fvl.fudan.edu.cn">FVL Lab</a>)</p>
373
+ <p style="font-size: 0.95em; color: #888;">
374
+ Powered by <a href="https://www.paddleocr.ai/latest/version3.x/module_usage/layout_analysis.html" target="_blank">PP-DocLayoutV2</a> for layout analysis and <a href="https://arxiv.org/pdf/2512.21095" target="_blank">UniRec-0.1B</a> for unified recognition of text, formulas, and tables
375
+ </p>
376
+ </div>
377
+ <div class="quick-links">
378
+ <a href="https://github.com/Topdu/OpenOCR" target="_blank">📖 GitHub</a>
379
+ <a href="https://arxiv.org/pdf/2512.21095" target="_blank">📄 Paper</a>
380
+ <a href="https://huggingface.co/topdu/unirec-0.1b" target="_blank">🤗 Model</a>
381
+ </div>
382
+ """)
383
+
384
+ with gr.Row():
385
+ with gr.Column(scale=4, elem_classes=['upload-section']):
386
+ input_img = gr.Image(type='filepath',
387
+ label='📤 Upload Document Image',
388
+ height=400)
389
+
390
+ # Add examples if available
391
+ if example_images:
392
+ gr.Examples(
393
+ examples=example_images,
394
+ inputs=input_img,
395
+ label='📚 Example Documents'
396
+ )
397
+ btn = gr.Button('🔍 Analyze Document',
398
+ variant='primary',
399
+ size='lg')
400
+ gr.Markdown("""
401
+ ### 💡 Tips
402
+ - Supports Chinese and English documents
403
+ - Best for reports, papers, magazines, and complex layouts
404
+ - Handles text, formulas, tables, and images
405
+ """)
406
+
407
+ download_output = gr.File(label='📥 Download All Results (ZIP)',
408
+ visible=True)
409
+
410
+ with gr.Column(scale=6):
411
+ with gr.Tabs():
412
+ with gr.Tab('📝 Markdown Preview'):
413
+ output_md = gr.Markdown(
414
+ 'Please upload an image and click "Analyze Document" to see results.',
415
+ latex_delimiters=LATEX_DELIMS,
416
+ elem_id='md_preview')
417
+ with gr.Tab('📊 Layout Visualization'):
418
+ output_vis = gr.Image(type='pil',
419
+ label='Layout Analysis Results',
420
+ elem_id='vis_output')
421
+
422
+ with gr.Tab('📄 Raw Markdown'):
423
+ output_md_raw = gr.Code(label='Markdown Source',
424
+ language='markdown',
425
+ lines=20)
426
+
427
+ with gr.Tab('🗂️ JSON Result'):
428
+ output_json = gr.Code(label='Structured Data',
429
+ language='json')
430
+
431
+ # Feature notice
432
+ gr.HTML("""
433
+ <div class="notice">
434
+ <strong>✨ Key Features:</strong>
435
+ <ul>
436
+ <li><strong>Ultra-lightweight:</strong> Only 0.1B parameters, fast inference speed</li>
437
+ <li><strong>High accuracy:</strong> Achieves 90.57% on OmniDocBench (v1.5)</li>
438
+ <li><strong>Unified recognition:</strong> Handles text, formulas, and tables in one model</li>
439
+ <li><strong>Rich output:</strong> Provides Markdown, JSON, and visualization results</li>
440
+ </ul>
441
+ </div>
442
+ """)
443
+
444
+ btn.click(fn=process_image,
445
+ inputs=[input_img],
446
+ outputs=[
447
+ output_vis, output_md, output_json, download_output,
448
+ output_md_raw
449
+ ])
450
+
451
+ return demo
452
+
453
+
454
+ def launch_demo(
455
+ layout_model_path=None,
456
+ unirec_encoder_path=None,
457
+ unirec_decoder_path=None,
458
+ tokenizer_mapping_path=None,
459
+ use_gpu=None,
460
+ auto_download=True,
461
+ share=False,
462
+ server_port=7860,
463
+ server_name='0.0.0.0'
464
+ ):
465
+ """Launch OpenDoc ONNX Gradio demo with default configuration.
466
+
467
+ Args:
468
+ layout_model_path: Path to layout detection ONNX model (default: auto-download)
469
+ unirec_encoder_path: Path to UniRec encoder ONNX model (default: auto-download)
470
+ unirec_decoder_path: Path to UniRec decoder ONNX model (default: auto-download)
471
+ tokenizer_mapping_path: Path to tokenizer mapping JSON (default: auto-download)
472
+ use_gpu: Whether to use GPU. If None, auto-detect (default: None)
473
+ auto_download: If True, automatically download missing models (default: True)
474
+ share: Create a public share link (default: False)
475
+ server_port: Server port (default: 7860)
476
+ server_name: Server name (default: '0.0.0.0')
477
+
478
+ Returns:
479
+ gr.Blocks: Gradio demo instance
480
+ """
481
+ global current_pipeline
482
+
483
+ # Initialize pipeline with arguments
484
+ try:
485
+ current_pipeline = get_pipeline(
486
+ layout_model_path=layout_model_path,
487
+ unirec_encoder_path=unirec_encoder_path,
488
+ unirec_decoder_path=unirec_decoder_path,
489
+ tokenizer_mapping_path=tokenizer_mapping_path,
490
+ use_gpu=use_gpu,
491
+ auto_download=auto_download
492
+ )
493
+ except Exception as e:
494
+ logger.error(f'Failed to initialize pipeline: {e}')
495
+ raise e
496
+
497
+ demo = create_demo()
498
+ # Launch with settings from arguments
499
+ demo.launch(
500
+ share=share,
501
+ server_port=server_port,
502
+ server_name=server_name
503
+ )
504
+ return demo
505
+
506
+
507
+ if __name__ == '__main__':
508
+ parser = argparse.ArgumentParser(description='OpenDoc-0.1B ONNX Gradio Demo')
509
+
510
+ # Model paths
511
+ parser.add_argument('--layout-model',
512
+ type=str,
513
+ default=None,
514
+ help='Path to layout detection ONNX model (default: ~/.cache/openocr/PP_DoclayoutV2_onnx/PP-DoclayoutV2.onnx)')
515
+ parser.add_argument('--encoder',
516
+ type=str,
517
+ default=None,
518
+ help='Path to UniRec encoder ONNX model (default: ~/.cache/openocr/unirec_0_1b_onnx/unirec_encoder.onnx)')
519
+ parser.add_argument('--decoder',
520
+ type=str,
521
+ default=None,
522
+ help='Path to UniRec decoder ONNX model (default: ~/.cache/openocr/unirec_0_1b_onnx/unirec_decoder.onnx)')
523
+ parser.add_argument('--mapping',
524
+ type=str,
525
+ default=None,
526
+ help='Path to tokenizer mapping JSON (default: ~/.cache/openocrunirec_0_1b_onnx/unirec_tokenizer_mapping.json)')
527
+
528
+ # GPU settings
529
+ parser.add_argument('--use-gpu',
530
+ type=str,
531
+ default='auto',
532
+ choices=['auto', 'true', 'false'],
533
+ help='Use GPU for inference (auto: auto-detect, true: force GPU, false: force CPU)')
534
+ parser.add_argument('--no-auto-download',
535
+ action='store_true',
536
+ help='Disable automatic model download')
537
+
538
+ # Gradio settings
539
+ parser.add_argument('--share',
540
+ action='store_true',
541
+ help='Create a public link')
542
+ parser.add_argument('--server-port',
543
+ type=int,
544
+ default=7860,
545
+ help='Server port')
546
+ parser.add_argument('--server-name',
547
+ type=str,
548
+ default='0.0.0.0',
549
+ help='Server name')
550
+
551
+ args = parser.parse_args()
552
+
553
+ # Parse use_gpu argument
554
+ if args.use_gpu == 'auto':
555
+ use_gpu = None
556
+ elif args.use_gpu == 'true':
557
+ use_gpu = True
558
+ else:
559
+ use_gpu = False
560
+
561
+ # Launch demo with parsed arguments
562
+ launch_demo(
563
+ layout_model_path=args.layout_model,
564
+ unirec_encoder_path=args.encoder,
565
+ unirec_decoder_path=args.decoder,
566
+ tokenizer_mapping_path=args.mapping,
567
+ use_gpu=use_gpu,
568
+ auto_download=not args.no_auto_download,
569
+ share=args.share,
570
+ server_port=args.server_port,
571
+ server_name=args.server_name
572
+ )