openocr-python 0.0.9__py3-none-any.whl → 0.1.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openocr/__init__.py +35 -1
- openocr/configs/dataset/rec/evaluation.yaml +41 -0
- openocr/configs/dataset/rec/ltb.yaml +9 -0
- openocr/configs/dataset/rec/mjsynth.yaml +11 -0
- openocr/configs/dataset/rec/openvino.yaml +25 -0
- openocr/configs/dataset/rec/ost.yaml +17 -0
- openocr/configs/dataset/rec/synthtext.yaml +7 -0
- openocr/configs/dataset/rec/test.yaml +77 -0
- openocr/configs/dataset/rec/textocr.yaml +13 -0
- openocr/configs/dataset/rec/textocr_horizontal.yaml +13 -0
- openocr/configs/dataset/rec/union14m_b.yaml +47 -0
- openocr/configs/dataset/rec/union14m_l_filtered.yaml +35 -0
- openocr/configs/rec/cmer/cmer.yml +127 -0
- openocr/configs/rec/mdiff4str/svtrv2_mdiffdecoder_base.yml +152 -0
- openocr/configs/rec/mdiff4str/svtrv2_mdiffdecoder_small.yml +152 -0
- openocr/configs/rec/unirec/focalsvtr_ardecoder_unirec.yml +114 -0
- openocr/configs/rec/unirec/opendoc_pipeline.yml +105 -0
- openocr/demo_gradio.py +28 -8
- openocr/demo_opendoc.py +572 -0
- openocr/demo_unirec.py +392 -0
- openocr/opendet/losses/__init__.py +5 -7
- openocr/opendet/preprocess/crop_resize.py +2 -1
- openocr/openocr.py +685 -0
- openocr/openrec/losses/__init__.py +8 -3
- openocr/openrec/losses/cmer_loss.py +12 -0
- openocr/openrec/losses/mdiff_loss.py +11 -0
- openocr/openrec/losses/unirec_loss.py +12 -0
- openocr/openrec/metrics/__init__.py +4 -1
- openocr/openrec/metrics/rec_metric_cmer.py +328 -0
- openocr/openrec/modeling/cmer_modeling/modeling_cmer.py +643 -0
- openocr/openrec/modeling/decoders/__init__.py +1 -0
- openocr/openrec/modeling/decoders/ctc_decoder.py +1 -1
- openocr/openrec/modeling/decoders/dan_decoder.py +4 -4
- openocr/openrec/modeling/decoders/dptr_parseq_clip_b_decoder.py +1563 -1398
- openocr/openrec/modeling/decoders/mdiff_decoder.py +587 -0
- openocr/openrec/modeling/decoders/smtr_decoder.py +99 -48
- openocr/openrec/modeling/unirec_modeling/configuration_unirec.py +166 -0
- openocr/openrec/modeling/unirec_modeling/modeling_unirec.py +433 -0
- openocr/openrec/optimizer/__init__.py +4 -3
- openocr/openrec/optimizer/lr.py +49 -0
- openocr/openrec/postprocess/__init__.py +2 -0
- openocr/openrec/postprocess/abinet_postprocess.py +1 -1
- openocr/openrec/postprocess/ar_postprocess.py +1 -1
- openocr/openrec/postprocess/cmer_postprocess.py +86 -0
- openocr/openrec/postprocess/cppd_postprocess.py +1 -1
- openocr/openrec/postprocess/igtr_postprocess.py +1 -1
- openocr/openrec/postprocess/lister_postprocess.py +1 -1
- openocr/openrec/postprocess/mgp_postprocess.py +1 -1
- openocr/openrec/postprocess/nrtr_postprocess.py +2 -2
- openocr/openrec/postprocess/smtr_postprocess.py +1 -1
- openocr/openrec/postprocess/srn_postprocess.py +1 -1
- openocr/openrec/postprocess/unirec_postprocess.py +58 -0
- openocr/openrec/postprocess/visionlan_postprocess.py +1 -1
- openocr/openrec/preprocess/__init__.py +5 -0
- openocr/openrec/preprocess/ce_label_encode.py +1 -1
- openocr/openrec/preprocess/cmer_label_encode.py +1025 -0
- openocr/openrec/preprocess/ctc_label_encode.py +1 -1
- openocr/openrec/preprocess/dptr_label_encode.py +177 -157
- openocr/openrec/preprocess/igtr_label_encode.py +4 -2
- openocr/openrec/preprocess/mdiff_label_encode.py +312 -0
- openocr/openrec/preprocess/rec_aug.py +128 -2
- openocr/openrec/preprocess/resize.py +57 -0
- openocr/openrec/preprocess/unirec_label_encode.py +62 -0
- openocr/tools/data/__init__.py +78 -55
- openocr/tools/data/cmer_web_dataset.py +310 -0
- openocr/tools/data/native_size_dataset.py +753 -0
- openocr/tools/data/native_size_sampler.py +158 -0
- openocr/tools/data/ratio_dataset_tvresize.py +2 -0
- openocr/tools/data/ratio_sampler.py +2 -1
- openocr/tools/download/download_dataset.py +38 -0
- openocr/tools/download/utils.py +28 -0
- openocr/tools/download_example_images.py +236 -0
- openocr/tools/engine/trainer.py +155 -39
- openocr/tools/eval_rec_all_ch.py +2 -2
- openocr/tools/infer_det.py +20 -2
- openocr/tools/infer_doc.py +898 -0
- openocr/tools/infer_doc_onnx.py +1172 -0
- openocr/tools/infer_e2e.py +27 -10
- openocr/tools/infer_rec.py +64 -15
- openocr/tools/infer_unirec_onnx.py +730 -0
- openocr/tools/to_markdown.py +468 -0
- openocr/tools/utils/ckpt.py +17 -5
- openocr/tools/utils/opendoc_onnx_utils/utils.py +1052 -0
- openocr_python-0.1.0.dev0.dist-info/METADATA +324 -0
- {openocr_python-0.0.9.dist-info → openocr_python-0.1.0.dev0.dist-info}/RECORD +89 -45
- {openocr_python-0.0.9.dist-info → openocr_python-0.1.0.dev0.dist-info}/WHEEL +1 -1
- openocr_python-0.1.0.dev0.dist-info/entry_points.txt +2 -0
- openocr_python-0.0.9.dist-info/METADATA +0 -149
- /openocr_python-0.0.9.dist-info/LICENCE → /openocr_python-0.1.0.dev0.dist-info/licenses/LICENSE +0 -0
- {openocr_python-0.0.9.dist-info → openocr_python-0.1.0.dev0.dist-info}/top_level.txt +0 -0
openocr/demo_opendoc.py
ADDED
|
@@ -0,0 +1,572 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import uuid
|
|
3
|
+
import shutil
|
|
4
|
+
import re
|
|
5
|
+
import base64
|
|
6
|
+
import argparse
|
|
7
|
+
import gradio as gr
|
|
8
|
+
from PIL import Image
|
|
9
|
+
|
|
10
|
+
from tools.infer_doc_onnx import OpenDocONNX
|
|
11
|
+
from tools.utils.logging import get_logger
|
|
12
|
+
from tools.download_example_images import get_example_images_path
|
|
13
|
+
|
|
14
|
+
logger = get_logger(name='opendoc_gradio')
|
|
15
|
+
|
|
16
|
+
# Initialize the pipeline
|
|
17
|
+
pipeline: OpenDocONNX | None = None
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_pipeline(
|
|
21
|
+
layout_model_path=None,
|
|
22
|
+
unirec_encoder_path=None,
|
|
23
|
+
unirec_decoder_path=None,
|
|
24
|
+
tokenizer_mapping_path=None,
|
|
25
|
+
use_gpu=None,
|
|
26
|
+
auto_download=True
|
|
27
|
+
) -> OpenDocONNX:
|
|
28
|
+
"""获取或初始化OpenDocONNX流水线
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
layout_model_path: 版面检测ONNX模型路径
|
|
32
|
+
unirec_encoder_path: UniRec编码器ONNX模型路径
|
|
33
|
+
unirec_decoder_path: UniRec解码器ONNX模型路径
|
|
34
|
+
tokenizer_mapping_path: Tokenizer映射文件路径
|
|
35
|
+
use_gpu: Whether to use GPU. If None, auto-detect.
|
|
36
|
+
auto_download: If True, automatically download missing models
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
OpenDocONNX: 初始化好的OpenDocONNX实例
|
|
40
|
+
"""
|
|
41
|
+
global pipeline
|
|
42
|
+
if pipeline is None:
|
|
43
|
+
gpu_info = 'GPU (auto-detect)' if use_gpu is None else ('GPU' if use_gpu else 'CPU')
|
|
44
|
+
logger.info(f'Initializing OpenDoc ONNX pipeline on {gpu_info}...')
|
|
45
|
+
pipeline = OpenDocONNX(
|
|
46
|
+
layout_model_path=layout_model_path,
|
|
47
|
+
unirec_encoder_path=unirec_encoder_path,
|
|
48
|
+
unirec_decoder_path=unirec_decoder_path,
|
|
49
|
+
tokenizer_mapping_path=tokenizer_mapping_path,
|
|
50
|
+
use_gpu=use_gpu,
|
|
51
|
+
auto_download=auto_download
|
|
52
|
+
)
|
|
53
|
+
return pipeline
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# Ensure pipeline is initialized (will be done on first request)
|
|
57
|
+
current_pipeline = None
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def process_image(
|
|
61
|
+
image_path: str | None
|
|
62
|
+
) -> tuple[Image.Image | None, str, str, str | None, str, str]:
|
|
63
|
+
"""处理图片并进行OCR识别
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
image_path: 图片文件路径,None表示无图片
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
tuple: (可视化图片, Markdown内容(base64图片), JSON内容, ZIP文件路径, 原始Markdown, Markdown内容(base64图片))
|
|
70
|
+
"""
|
|
71
|
+
global current_pipeline
|
|
72
|
+
|
|
73
|
+
if image_path is None:
|
|
74
|
+
return None, '', '', None, '', ''
|
|
75
|
+
|
|
76
|
+
# Initialize pipeline on first use
|
|
77
|
+
if current_pipeline is None:
|
|
78
|
+
current_pipeline = get_pipeline()
|
|
79
|
+
|
|
80
|
+
# Get original image name
|
|
81
|
+
base_name = os.path.splitext(os.path.basename(image_path))[0]
|
|
82
|
+
file_ext = os.path.splitext(image_path)[1] or '.jpg'
|
|
83
|
+
|
|
84
|
+
# Create a directory with image name for this request
|
|
85
|
+
output_base_dir = 'gradio_outputs'
|
|
86
|
+
os.makedirs(output_base_dir, exist_ok=True)
|
|
87
|
+
|
|
88
|
+
# Add timestamp to avoid conflicts if same filename is uploaded multiple times
|
|
89
|
+
timestamp = str(uuid.uuid4())[:8]
|
|
90
|
+
folder_name = f'{base_name}_{timestamp}'
|
|
91
|
+
tmp_dir = os.path.join(output_base_dir, folder_name)
|
|
92
|
+
os.makedirs(tmp_dir, exist_ok=True)
|
|
93
|
+
|
|
94
|
+
try:
|
|
95
|
+
# Copy and rename the input image
|
|
96
|
+
tmp_img_path = os.path.join(tmp_dir, f'{base_name}{file_ext}')
|
|
97
|
+
image = Image.open(image_path)
|
|
98
|
+
image.save(tmp_img_path)
|
|
99
|
+
|
|
100
|
+
# Predict
|
|
101
|
+
result = current_pipeline(
|
|
102
|
+
img_path=tmp_img_path,
|
|
103
|
+
merge_layout_blocks=True
|
|
104
|
+
)
|
|
105
|
+
logger.info(f'Pipeline result type: {type(result)}, has content: {bool(result)}')
|
|
106
|
+
if result:
|
|
107
|
+
logger.info(f'Result keys: {result.keys()}')
|
|
108
|
+
if 'recognition_results' in result:
|
|
109
|
+
logger.info(f'Recognition results count: {len(result["recognition_results"])}')
|
|
110
|
+
|
|
111
|
+
if not result:
|
|
112
|
+
logger.warning('Pipeline returned empty result')
|
|
113
|
+
return None, 'No results found.', '', None, '', ''
|
|
114
|
+
|
|
115
|
+
# Save results
|
|
116
|
+
logger.info(f'Saving results to: {tmp_dir}')
|
|
117
|
+
current_pipeline.save_visualization(result, tmp_dir)
|
|
118
|
+
logger.info('Visualization saved')
|
|
119
|
+
current_pipeline.save_to_markdown(result, tmp_dir)
|
|
120
|
+
logger.info('Markdown saved')
|
|
121
|
+
current_pipeline.save_to_json(result, tmp_dir)
|
|
122
|
+
logger.info('JSON saved')
|
|
123
|
+
|
|
124
|
+
# The save methods create a subdirectory with the image name
|
|
125
|
+
# Find the actual output directory
|
|
126
|
+
actual_output_dir = None
|
|
127
|
+
for item in os.listdir(tmp_dir):
|
|
128
|
+
item_path = os.path.join(tmp_dir, item)
|
|
129
|
+
if os.path.isdir(item_path):
|
|
130
|
+
actual_output_dir = item_path
|
|
131
|
+
break
|
|
132
|
+
|
|
133
|
+
if actual_output_dir is None:
|
|
134
|
+
# Fallback to tmp_dir if no subdirectory found
|
|
135
|
+
actual_output_dir = tmp_dir
|
|
136
|
+
|
|
137
|
+
logger.info(f'Actual output directory: {actual_output_dir}')
|
|
138
|
+
logger.info(f'Files in output dir: {os.listdir(actual_output_dir)}')
|
|
139
|
+
|
|
140
|
+
# Find the saved files
|
|
141
|
+
vis_img = None
|
|
142
|
+
for f in os.listdir(actual_output_dir):
|
|
143
|
+
if f.endswith('_vis.jpg'):
|
|
144
|
+
vis_img_path = os.path.join(actual_output_dir, f)
|
|
145
|
+
vis_img = Image.open(vis_img_path)
|
|
146
|
+
logger.info(f'Found visualization image: {vis_img_path}')
|
|
147
|
+
break
|
|
148
|
+
|
|
149
|
+
if vis_img is None:
|
|
150
|
+
logger.warning('No visualization image found')
|
|
151
|
+
|
|
152
|
+
markdown_content = ''
|
|
153
|
+
md_file_path = None
|
|
154
|
+
for f in os.listdir(actual_output_dir):
|
|
155
|
+
if f.endswith('.md'):
|
|
156
|
+
md_file_path = os.path.join(actual_output_dir, f)
|
|
157
|
+
with open(md_file_path, 'r', encoding='utf-8') as file:
|
|
158
|
+
markdown_content = file.read()
|
|
159
|
+
logger.info(f'Found markdown file: {md_file_path}, length: {len(markdown_content)}')
|
|
160
|
+
break
|
|
161
|
+
|
|
162
|
+
if not markdown_content:
|
|
163
|
+
logger.warning('No markdown content found')
|
|
164
|
+
|
|
165
|
+
# Convert relative image paths to base64 for proper display in Gradio
|
|
166
|
+
if markdown_content:
|
|
167
|
+
|
|
168
|
+
def replace_img_with_base64(match):
|
|
169
|
+
img_path = match.group(1)
|
|
170
|
+
full_img_path = os.path.join(actual_output_dir, img_path)
|
|
171
|
+
|
|
172
|
+
if os.path.exists(full_img_path):
|
|
173
|
+
try:
|
|
174
|
+
with open(full_img_path, 'rb') as img_file:
|
|
175
|
+
img_data = base64.b64encode(
|
|
176
|
+
img_file.read()).decode('utf-8')
|
|
177
|
+
# Determine image format
|
|
178
|
+
ext = os.path.splitext(full_img_path)[1].lower()
|
|
179
|
+
mime_type = 'image/jpeg' if ext in [
|
|
180
|
+
'.jpg', '.jpeg'
|
|
181
|
+
] else 'image/png'
|
|
182
|
+
# Replace src with base64 data URL
|
|
183
|
+
return match.group(0).replace(
|
|
184
|
+
f'src=\"{img_path}\"',
|
|
185
|
+
f'src=\"data:{mime_type};base64,{img_data}\"')
|
|
186
|
+
except Exception as e:
|
|
187
|
+
logger.warning(
|
|
188
|
+
f'Failed to convert image {img_path} to base64: {e}')
|
|
189
|
+
return match.group(0)
|
|
190
|
+
|
|
191
|
+
# Find all img tags and replace their src
|
|
192
|
+
markdown_content_show = re.sub(r'<img[^>]*src=\"([^\"]+)\"[^>]*>',
|
|
193
|
+
replace_img_with_base64,
|
|
194
|
+
markdown_content)
|
|
195
|
+
else:
|
|
196
|
+
markdown_content_show = markdown_content
|
|
197
|
+
|
|
198
|
+
json_content = ''
|
|
199
|
+
json_file_path = None
|
|
200
|
+
for f in os.listdir(actual_output_dir):
|
|
201
|
+
if f.endswith('.json'):
|
|
202
|
+
json_file_path = os.path.join(actual_output_dir, f)
|
|
203
|
+
with open(json_file_path, 'r', encoding='utf-8') as file:
|
|
204
|
+
json_content = file.read()
|
|
205
|
+
break
|
|
206
|
+
# Prepare all files in tmp_dir for download by creating a zip archive
|
|
207
|
+
zip_path = os.path.join(output_base_dir, f'{folder_name}.zip')
|
|
208
|
+
_ = shutil.make_archive(zip_path.replace('.zip', ''), 'zip', tmp_dir)
|
|
209
|
+
|
|
210
|
+
return vis_img, markdown_content_show, json_content, zip_path, markdown_content
|
|
211
|
+
|
|
212
|
+
except Exception as e:
|
|
213
|
+
logger.error(f'Prediction error: {str(e)}')
|
|
214
|
+
return None, f'Error during prediction: {str(e)}', '', None, '', ''
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
# Custom CSS with adaptive colors
|
|
218
|
+
custom_css = """
|
|
219
|
+
body, .gradio-container {
|
|
220
|
+
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", "Noto Sans", Helvetica, Arial, sans-serif;
|
|
221
|
+
}
|
|
222
|
+
.app-header {
|
|
223
|
+
text-align: center;
|
|
224
|
+
max-width: 1200px;
|
|
225
|
+
margin: 20px auto !important;
|
|
226
|
+
padding: 20px;
|
|
227
|
+
}
|
|
228
|
+
.app-header h1 {
|
|
229
|
+
font-size: 2.5em;
|
|
230
|
+
font-weight: 700;
|
|
231
|
+
margin-bottom: 10px;
|
|
232
|
+
}
|
|
233
|
+
.app-header p {
|
|
234
|
+
font-size: 1.1em;
|
|
235
|
+
opacity: 0.7;
|
|
236
|
+
line-height: 1.6;
|
|
237
|
+
}
|
|
238
|
+
.quick-links {
|
|
239
|
+
text-align: center;
|
|
240
|
+
padding: 12px 0;
|
|
241
|
+
border: 1px solid var(--border-color-primary);
|
|
242
|
+
border-radius: 12px;
|
|
243
|
+
margin: 16px auto;
|
|
244
|
+
max-width: 1200px;
|
|
245
|
+
background: var(--background-fill-secondary);
|
|
246
|
+
}
|
|
247
|
+
.quick-links a {
|
|
248
|
+
margin: 0 16px;
|
|
249
|
+
font-size: 15px;
|
|
250
|
+
font-weight: 600;
|
|
251
|
+
color: var(--link-text-color);
|
|
252
|
+
text-decoration: none;
|
|
253
|
+
transition: all 0.3s ease;
|
|
254
|
+
}
|
|
255
|
+
.quick-links a:hover {
|
|
256
|
+
opacity: 0.8;
|
|
257
|
+
text-decoration: underline;
|
|
258
|
+
}
|
|
259
|
+
.upload-section {
|
|
260
|
+
border: 2px dashed var(--border-color-primary);
|
|
261
|
+
border-radius: 12px;
|
|
262
|
+
padding: 20px;
|
|
263
|
+
background: var(--background-fill-secondary);
|
|
264
|
+
transition: all 0.3s ease;
|
|
265
|
+
}
|
|
266
|
+
.upload-section:hover {
|
|
267
|
+
border-color: var(--color-accent);
|
|
268
|
+
background: var(--background-fill-primary);
|
|
269
|
+
}
|
|
270
|
+
#vis_output {
|
|
271
|
+
min-height: 400px;
|
|
272
|
+
border-radius: 12px;
|
|
273
|
+
overflow: hidden;
|
|
274
|
+
}
|
|
275
|
+
#md_preview {
|
|
276
|
+
max-height: 600px;
|
|
277
|
+
min-height: 200px;
|
|
278
|
+
overflow: auto;
|
|
279
|
+
padding: 20px;
|
|
280
|
+
background: var(--background-fill-primary);
|
|
281
|
+
border-radius: 12px;
|
|
282
|
+
box-shadow: var(--shadow-drop);
|
|
283
|
+
}
|
|
284
|
+
#md_preview img {
|
|
285
|
+
display: block;
|
|
286
|
+
margin: 16px auto;
|
|
287
|
+
max-width: 100%;
|
|
288
|
+
height: auto;
|
|
289
|
+
border-radius: 8px;
|
|
290
|
+
}
|
|
291
|
+
.notice {
|
|
292
|
+
margin: 20px auto;
|
|
293
|
+
max-width: 1200px;
|
|
294
|
+
padding: 16px 20px;
|
|
295
|
+
border-left: 4px solid var(--color-accent);
|
|
296
|
+
border-radius: 8px;
|
|
297
|
+
background: var(--background-fill-secondary);
|
|
298
|
+
font-size: 14px;
|
|
299
|
+
line-height: 1.8;
|
|
300
|
+
}
|
|
301
|
+
.notice strong {
|
|
302
|
+
font-weight: 700;
|
|
303
|
+
color: var(--color-accent);
|
|
304
|
+
}
|
|
305
|
+
.notice ul {
|
|
306
|
+
margin-top: 8px;
|
|
307
|
+
padding-left: 20px;
|
|
308
|
+
}
|
|
309
|
+
.notice li {
|
|
310
|
+
margin: 8px 0;
|
|
311
|
+
}
|
|
312
|
+
.gradio-button-primary {
|
|
313
|
+
font-weight: 600 !important;
|
|
314
|
+
transition: all 0.3s ease !important;
|
|
315
|
+
}
|
|
316
|
+
.gradio-button-primary:hover {
|
|
317
|
+
transform: translateY(-2px);
|
|
318
|
+
box-shadow: var(--shadow-drop-lg) !important;
|
|
319
|
+
}
|
|
320
|
+
"""
|
|
321
|
+
|
|
322
|
+
# LaTeX delimiters for formula rendering
|
|
323
|
+
LATEX_DELIMS = [
|
|
324
|
+
{
|
|
325
|
+
'left': '$$',
|
|
326
|
+
'right': '$$',
|
|
327
|
+
'display': True
|
|
328
|
+
},
|
|
329
|
+
{
|
|
330
|
+
'left': '$',
|
|
331
|
+
'right': '$',
|
|
332
|
+
'display': False
|
|
333
|
+
},
|
|
334
|
+
{
|
|
335
|
+
'left': '\\(',
|
|
336
|
+
'right': '\\)',
|
|
337
|
+
'display': False
|
|
338
|
+
},
|
|
339
|
+
{
|
|
340
|
+
'left': '\\[',
|
|
341
|
+
'right': '\\]',
|
|
342
|
+
'display': True
|
|
343
|
+
},
|
|
344
|
+
]
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
# Define the Gradio Interface
|
|
348
|
+
def create_demo() -> gr.Blocks:
|
|
349
|
+
"""创建Gradio演示界面
|
|
350
|
+
|
|
351
|
+
Returns:
|
|
352
|
+
gr.Blocks: Gradio Blocks应用实例
|
|
353
|
+
"""
|
|
354
|
+
# Get example images path and download if necessary
|
|
355
|
+
example_img_dir = get_example_images_path(demo_type='doc')
|
|
356
|
+
|
|
357
|
+
# Get list of example images
|
|
358
|
+
example_images = []
|
|
359
|
+
if os.path.exists(example_img_dir):
|
|
360
|
+
for file in os.listdir(example_img_dir):
|
|
361
|
+
if file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
|
|
362
|
+
example_images.append(os.path.join(example_img_dir, file))
|
|
363
|
+
example_images = sorted(example_images)
|
|
364
|
+
|
|
365
|
+
with gr.Blocks(css=custom_css,
|
|
366
|
+
theme=gr.themes.Soft(),
|
|
367
|
+
title='OpenDoc-0.1B Demo') as demo:
|
|
368
|
+
# Header
|
|
369
|
+
gr.HTML("""
|
|
370
|
+
<div class="app-header">
|
|
371
|
+
<h1>🚀 OpenDoc-0.1B</h1>
|
|
372
|
+
<p>Ultra-Lightweight Document Parsing System with 0.1B Parameters (built by <a href="https://github.com/Topdu/OpenOCR">OCR Team</a>, <a href="https://fvl.fudan.edu.cn">FVL Lab</a>)</p>
|
|
373
|
+
<p style="font-size: 0.95em; color: #888;">
|
|
374
|
+
Powered by <a href="https://www.paddleocr.ai/latest/version3.x/module_usage/layout_analysis.html" target="_blank">PP-DocLayoutV2</a> for layout analysis and <a href="https://arxiv.org/pdf/2512.21095" target="_blank">UniRec-0.1B</a> for unified recognition of text, formulas, and tables
|
|
375
|
+
</p>
|
|
376
|
+
</div>
|
|
377
|
+
<div class="quick-links">
|
|
378
|
+
<a href="https://github.com/Topdu/OpenOCR" target="_blank">📖 GitHub</a>
|
|
379
|
+
<a href="https://arxiv.org/pdf/2512.21095" target="_blank">📄 Paper</a>
|
|
380
|
+
<a href="https://huggingface.co/topdu/unirec-0.1b" target="_blank">🤗 Model</a>
|
|
381
|
+
</div>
|
|
382
|
+
""")
|
|
383
|
+
|
|
384
|
+
with gr.Row():
|
|
385
|
+
with gr.Column(scale=4, elem_classes=['upload-section']):
|
|
386
|
+
input_img = gr.Image(type='filepath',
|
|
387
|
+
label='📤 Upload Document Image',
|
|
388
|
+
height=400)
|
|
389
|
+
|
|
390
|
+
# Add examples if available
|
|
391
|
+
if example_images:
|
|
392
|
+
gr.Examples(
|
|
393
|
+
examples=example_images,
|
|
394
|
+
inputs=input_img,
|
|
395
|
+
label='📚 Example Documents'
|
|
396
|
+
)
|
|
397
|
+
btn = gr.Button('🔍 Analyze Document',
|
|
398
|
+
variant='primary',
|
|
399
|
+
size='lg')
|
|
400
|
+
gr.Markdown("""
|
|
401
|
+
### 💡 Tips
|
|
402
|
+
- Supports Chinese and English documents
|
|
403
|
+
- Best for reports, papers, magazines, and complex layouts
|
|
404
|
+
- Handles text, formulas, tables, and images
|
|
405
|
+
""")
|
|
406
|
+
|
|
407
|
+
download_output = gr.File(label='📥 Download All Results (ZIP)',
|
|
408
|
+
visible=True)
|
|
409
|
+
|
|
410
|
+
with gr.Column(scale=6):
|
|
411
|
+
with gr.Tabs():
|
|
412
|
+
with gr.Tab('📝 Markdown Preview'):
|
|
413
|
+
output_md = gr.Markdown(
|
|
414
|
+
'Please upload an image and click "Analyze Document" to see results.',
|
|
415
|
+
latex_delimiters=LATEX_DELIMS,
|
|
416
|
+
elem_id='md_preview')
|
|
417
|
+
with gr.Tab('📊 Layout Visualization'):
|
|
418
|
+
output_vis = gr.Image(type='pil',
|
|
419
|
+
label='Layout Analysis Results',
|
|
420
|
+
elem_id='vis_output')
|
|
421
|
+
|
|
422
|
+
with gr.Tab('📄 Raw Markdown'):
|
|
423
|
+
output_md_raw = gr.Code(label='Markdown Source',
|
|
424
|
+
language='markdown',
|
|
425
|
+
lines=20)
|
|
426
|
+
|
|
427
|
+
with gr.Tab('🗂️ JSON Result'):
|
|
428
|
+
output_json = gr.Code(label='Structured Data',
|
|
429
|
+
language='json')
|
|
430
|
+
|
|
431
|
+
# Feature notice
|
|
432
|
+
gr.HTML("""
|
|
433
|
+
<div class="notice">
|
|
434
|
+
<strong>✨ Key Features:</strong>
|
|
435
|
+
<ul>
|
|
436
|
+
<li><strong>Ultra-lightweight:</strong> Only 0.1B parameters, fast inference speed</li>
|
|
437
|
+
<li><strong>High accuracy:</strong> Achieves 90.57% on OmniDocBench (v1.5)</li>
|
|
438
|
+
<li><strong>Unified recognition:</strong> Handles text, formulas, and tables in one model</li>
|
|
439
|
+
<li><strong>Rich output:</strong> Provides Markdown, JSON, and visualization results</li>
|
|
440
|
+
</ul>
|
|
441
|
+
</div>
|
|
442
|
+
""")
|
|
443
|
+
|
|
444
|
+
btn.click(fn=process_image,
|
|
445
|
+
inputs=[input_img],
|
|
446
|
+
outputs=[
|
|
447
|
+
output_vis, output_md, output_json, download_output,
|
|
448
|
+
output_md_raw
|
|
449
|
+
])
|
|
450
|
+
|
|
451
|
+
return demo
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
def launch_demo(
|
|
455
|
+
layout_model_path=None,
|
|
456
|
+
unirec_encoder_path=None,
|
|
457
|
+
unirec_decoder_path=None,
|
|
458
|
+
tokenizer_mapping_path=None,
|
|
459
|
+
use_gpu=None,
|
|
460
|
+
auto_download=True,
|
|
461
|
+
share=False,
|
|
462
|
+
server_port=7860,
|
|
463
|
+
server_name='0.0.0.0'
|
|
464
|
+
):
|
|
465
|
+
"""Launch OpenDoc ONNX Gradio demo with default configuration.
|
|
466
|
+
|
|
467
|
+
Args:
|
|
468
|
+
layout_model_path: Path to layout detection ONNX model (default: auto-download)
|
|
469
|
+
unirec_encoder_path: Path to UniRec encoder ONNX model (default: auto-download)
|
|
470
|
+
unirec_decoder_path: Path to UniRec decoder ONNX model (default: auto-download)
|
|
471
|
+
tokenizer_mapping_path: Path to tokenizer mapping JSON (default: auto-download)
|
|
472
|
+
use_gpu: Whether to use GPU. If None, auto-detect (default: None)
|
|
473
|
+
auto_download: If True, automatically download missing models (default: True)
|
|
474
|
+
share: Create a public share link (default: False)
|
|
475
|
+
server_port: Server port (default: 7860)
|
|
476
|
+
server_name: Server name (default: '0.0.0.0')
|
|
477
|
+
|
|
478
|
+
Returns:
|
|
479
|
+
gr.Blocks: Gradio demo instance
|
|
480
|
+
"""
|
|
481
|
+
global current_pipeline
|
|
482
|
+
|
|
483
|
+
# Initialize pipeline with arguments
|
|
484
|
+
try:
|
|
485
|
+
current_pipeline = get_pipeline(
|
|
486
|
+
layout_model_path=layout_model_path,
|
|
487
|
+
unirec_encoder_path=unirec_encoder_path,
|
|
488
|
+
unirec_decoder_path=unirec_decoder_path,
|
|
489
|
+
tokenizer_mapping_path=tokenizer_mapping_path,
|
|
490
|
+
use_gpu=use_gpu,
|
|
491
|
+
auto_download=auto_download
|
|
492
|
+
)
|
|
493
|
+
except Exception as e:
|
|
494
|
+
logger.error(f'Failed to initialize pipeline: {e}')
|
|
495
|
+
raise e
|
|
496
|
+
|
|
497
|
+
demo = create_demo()
|
|
498
|
+
# Launch with settings from arguments
|
|
499
|
+
demo.launch(
|
|
500
|
+
share=share,
|
|
501
|
+
server_port=server_port,
|
|
502
|
+
server_name=server_name
|
|
503
|
+
)
|
|
504
|
+
return demo
|
|
505
|
+
|
|
506
|
+
|
|
507
|
+
if __name__ == '__main__':
|
|
508
|
+
parser = argparse.ArgumentParser(description='OpenDoc-0.1B ONNX Gradio Demo')
|
|
509
|
+
|
|
510
|
+
# Model paths
|
|
511
|
+
parser.add_argument('--layout-model',
|
|
512
|
+
type=str,
|
|
513
|
+
default=None,
|
|
514
|
+
help='Path to layout detection ONNX model (default: ~/.cache/openocr/PP_DoclayoutV2_onnx/PP-DoclayoutV2.onnx)')
|
|
515
|
+
parser.add_argument('--encoder',
|
|
516
|
+
type=str,
|
|
517
|
+
default=None,
|
|
518
|
+
help='Path to UniRec encoder ONNX model (default: ~/.cache/openocr/unirec_0_1b_onnx/unirec_encoder.onnx)')
|
|
519
|
+
parser.add_argument('--decoder',
|
|
520
|
+
type=str,
|
|
521
|
+
default=None,
|
|
522
|
+
help='Path to UniRec decoder ONNX model (default: ~/.cache/openocr/unirec_0_1b_onnx/unirec_decoder.onnx)')
|
|
523
|
+
parser.add_argument('--mapping',
|
|
524
|
+
type=str,
|
|
525
|
+
default=None,
|
|
526
|
+
help='Path to tokenizer mapping JSON (default: ~/.cache/openocrunirec_0_1b_onnx/unirec_tokenizer_mapping.json)')
|
|
527
|
+
|
|
528
|
+
# GPU settings
|
|
529
|
+
parser.add_argument('--use-gpu',
|
|
530
|
+
type=str,
|
|
531
|
+
default='auto',
|
|
532
|
+
choices=['auto', 'true', 'false'],
|
|
533
|
+
help='Use GPU for inference (auto: auto-detect, true: force GPU, false: force CPU)')
|
|
534
|
+
parser.add_argument('--no-auto-download',
|
|
535
|
+
action='store_true',
|
|
536
|
+
help='Disable automatic model download')
|
|
537
|
+
|
|
538
|
+
# Gradio settings
|
|
539
|
+
parser.add_argument('--share',
|
|
540
|
+
action='store_true',
|
|
541
|
+
help='Create a public link')
|
|
542
|
+
parser.add_argument('--server-port',
|
|
543
|
+
type=int,
|
|
544
|
+
default=7860,
|
|
545
|
+
help='Server port')
|
|
546
|
+
parser.add_argument('--server-name',
|
|
547
|
+
type=str,
|
|
548
|
+
default='0.0.0.0',
|
|
549
|
+
help='Server name')
|
|
550
|
+
|
|
551
|
+
args = parser.parse_args()
|
|
552
|
+
|
|
553
|
+
# Parse use_gpu argument
|
|
554
|
+
if args.use_gpu == 'auto':
|
|
555
|
+
use_gpu = None
|
|
556
|
+
elif args.use_gpu == 'true':
|
|
557
|
+
use_gpu = True
|
|
558
|
+
else:
|
|
559
|
+
use_gpu = False
|
|
560
|
+
|
|
561
|
+
# Launch demo with parsed arguments
|
|
562
|
+
launch_demo(
|
|
563
|
+
layout_model_path=args.layout_model,
|
|
564
|
+
unirec_encoder_path=args.encoder,
|
|
565
|
+
unirec_decoder_path=args.decoder,
|
|
566
|
+
tokenizer_mapping_path=args.mapping,
|
|
567
|
+
use_gpu=use_gpu,
|
|
568
|
+
auto_download=not args.no_auto_download,
|
|
569
|
+
share=args.share,
|
|
570
|
+
server_port=args.server_port,
|
|
571
|
+
server_name=args.server_name
|
|
572
|
+
)
|