pyxllib 0.3.96__py3-none-any.whl → 0.3.197__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyxllib/algo/geo.py +12 -0
- pyxllib/algo/intervals.py +1 -1
- pyxllib/algo/matcher.py +78 -0
- pyxllib/algo/pupil.py +187 -19
- pyxllib/algo/specialist.py +2 -1
- pyxllib/algo/stat.py +38 -2
- {pyxlpr → pyxllib/autogui}/__init__.py +1 -1
- pyxllib/autogui/activewin.py +246 -0
- pyxllib/autogui/all.py +9 -0
- pyxllib/{ext/autogui → autogui}/autogui.py +40 -11
- pyxllib/autogui/uiautolib.py +362 -0
- pyxllib/autogui/wechat.py +827 -0
- pyxllib/autogui/wechat_msg.py +421 -0
- pyxllib/autogui/wxautolib.py +84 -0
- pyxllib/cv/slidercaptcha.py +137 -0
- pyxllib/data/echarts.py +123 -12
- pyxllib/data/jsonlib.py +89 -0
- pyxllib/data/pglib.py +514 -30
- pyxllib/data/sqlite.py +231 -4
- pyxllib/ext/JLineViewer.py +14 -1
- pyxllib/ext/drissionlib.py +277 -0
- pyxllib/ext/kq5034lib.py +0 -1594
- pyxllib/ext/robustprocfile.py +497 -0
- pyxllib/ext/unixlib.py +6 -5
- pyxllib/ext/utools.py +108 -95
- pyxllib/ext/webhook.py +32 -14
- pyxllib/ext/wjxlib.py +88 -0
- pyxllib/ext/wpsapi.py +124 -0
- pyxllib/ext/xlwork.py +9 -0
- pyxllib/ext/yuquelib.py +1003 -71
- pyxllib/file/docxlib.py +1 -1
- pyxllib/file/libreoffice.py +165 -0
- pyxllib/file/movielib.py +9 -0
- pyxllib/file/packlib/__init__.py +112 -75
- pyxllib/file/pdflib.py +1 -1
- pyxllib/file/pupil.py +1 -1
- pyxllib/file/specialist/dirlib.py +1 -1
- pyxllib/file/specialist/download.py +10 -3
- pyxllib/file/specialist/filelib.py +266 -55
- pyxllib/file/xlsxlib.py +205 -50
- pyxllib/file/xlsyncfile.py +341 -0
- pyxllib/prog/cachetools.py +64 -0
- pyxllib/prog/filelock.py +42 -0
- pyxllib/prog/multiprogs.py +940 -0
- pyxllib/prog/newbie.py +9 -2
- pyxllib/prog/pupil.py +129 -60
- pyxllib/prog/specialist/__init__.py +176 -2
- pyxllib/prog/specialist/bc.py +5 -2
- pyxllib/prog/specialist/browser.py +11 -2
- pyxllib/prog/specialist/datetime.py +68 -0
- pyxllib/prog/specialist/tictoc.py +12 -13
- pyxllib/prog/specialist/xllog.py +5 -5
- pyxllib/prog/xlosenv.py +7 -0
- pyxllib/text/airscript.js +744 -0
- pyxllib/text/charclasslib.py +17 -5
- pyxllib/text/jiebalib.py +6 -3
- pyxllib/text/jinjalib.py +32 -0
- pyxllib/text/jsa_ai_prompt.md +271 -0
- pyxllib/text/jscode.py +159 -4
- pyxllib/text/nestenv.py +1 -1
- pyxllib/text/newbie.py +12 -0
- pyxllib/text/pupil/common.py +26 -0
- pyxllib/text/specialist/ptag.py +2 -2
- pyxllib/text/templates/echart_base.html +11 -0
- pyxllib/text/templates/highlight_code.html +17 -0
- pyxllib/text/templates/latex_editor.html +103 -0
- pyxllib/text/xmllib.py +76 -14
- pyxllib/xl.py +2 -1
- pyxllib-0.3.197.dist-info/METADATA +48 -0
- pyxllib-0.3.197.dist-info/RECORD +126 -0
- {pyxllib-0.3.96.dist-info → pyxllib-0.3.197.dist-info}/WHEEL +1 -2
- pyxllib/ext/autogui/__init__.py +0 -8
- pyxllib-0.3.96.dist-info/METADATA +0 -51
- pyxllib-0.3.96.dist-info/RECORD +0 -333
- pyxllib-0.3.96.dist-info/top_level.txt +0 -2
- pyxlpr/ai/__init__.py +0 -5
- pyxlpr/ai/clientlib.py +0 -1281
- pyxlpr/ai/specialist.py +0 -286
- pyxlpr/ai/torch_app.py +0 -172
- pyxlpr/ai/xlpaddle.py +0 -655
- pyxlpr/ai/xltorch.py +0 -705
- pyxlpr/data/__init__.py +0 -11
- pyxlpr/data/coco.py +0 -1325
- pyxlpr/data/datacls.py +0 -365
- pyxlpr/data/datasets.py +0 -200
- pyxlpr/data/gptlib.py +0 -1291
- pyxlpr/data/icdar/__init__.py +0 -96
- pyxlpr/data/icdar/deteval.py +0 -377
- pyxlpr/data/icdar/icdar2013.py +0 -341
- pyxlpr/data/icdar/iou.py +0 -340
- pyxlpr/data/icdar/rrc_evaluation_funcs_1_1.py +0 -463
- pyxlpr/data/imtextline.py +0 -473
- pyxlpr/data/labelme.py +0 -866
- pyxlpr/data/removeline.py +0 -179
- pyxlpr/data/specialist.py +0 -57
- pyxlpr/eval/__init__.py +0 -85
- pyxlpr/paddleocr.py +0 -776
- pyxlpr/ppocr/__init__.py +0 -15
- pyxlpr/ppocr/configs/rec/multi_language/generate_multi_language_configs.py +0 -226
- pyxlpr/ppocr/data/__init__.py +0 -135
- pyxlpr/ppocr/data/imaug/ColorJitter.py +0 -26
- pyxlpr/ppocr/data/imaug/__init__.py +0 -67
- pyxlpr/ppocr/data/imaug/copy_paste.py +0 -170
- pyxlpr/ppocr/data/imaug/east_process.py +0 -437
- pyxlpr/ppocr/data/imaug/gen_table_mask.py +0 -244
- pyxlpr/ppocr/data/imaug/iaa_augment.py +0 -114
- pyxlpr/ppocr/data/imaug/label_ops.py +0 -789
- pyxlpr/ppocr/data/imaug/make_border_map.py +0 -184
- pyxlpr/ppocr/data/imaug/make_pse_gt.py +0 -106
- pyxlpr/ppocr/data/imaug/make_shrink_map.py +0 -126
- pyxlpr/ppocr/data/imaug/operators.py +0 -433
- pyxlpr/ppocr/data/imaug/pg_process.py +0 -906
- pyxlpr/ppocr/data/imaug/randaugment.py +0 -143
- pyxlpr/ppocr/data/imaug/random_crop_data.py +0 -239
- pyxlpr/ppocr/data/imaug/rec_img_aug.py +0 -533
- pyxlpr/ppocr/data/imaug/sast_process.py +0 -777
- pyxlpr/ppocr/data/imaug/text_image_aug/__init__.py +0 -17
- pyxlpr/ppocr/data/imaug/text_image_aug/augment.py +0 -120
- pyxlpr/ppocr/data/imaug/text_image_aug/warp_mls.py +0 -168
- pyxlpr/ppocr/data/lmdb_dataset.py +0 -115
- pyxlpr/ppocr/data/pgnet_dataset.py +0 -104
- pyxlpr/ppocr/data/pubtab_dataset.py +0 -107
- pyxlpr/ppocr/data/simple_dataset.py +0 -372
- pyxlpr/ppocr/losses/__init__.py +0 -61
- pyxlpr/ppocr/losses/ace_loss.py +0 -52
- pyxlpr/ppocr/losses/basic_loss.py +0 -135
- pyxlpr/ppocr/losses/center_loss.py +0 -88
- pyxlpr/ppocr/losses/cls_loss.py +0 -30
- pyxlpr/ppocr/losses/combined_loss.py +0 -67
- pyxlpr/ppocr/losses/det_basic_loss.py +0 -208
- pyxlpr/ppocr/losses/det_db_loss.py +0 -80
- pyxlpr/ppocr/losses/det_east_loss.py +0 -63
- pyxlpr/ppocr/losses/det_pse_loss.py +0 -149
- pyxlpr/ppocr/losses/det_sast_loss.py +0 -121
- pyxlpr/ppocr/losses/distillation_loss.py +0 -272
- pyxlpr/ppocr/losses/e2e_pg_loss.py +0 -140
- pyxlpr/ppocr/losses/kie_sdmgr_loss.py +0 -113
- pyxlpr/ppocr/losses/rec_aster_loss.py +0 -99
- pyxlpr/ppocr/losses/rec_att_loss.py +0 -39
- pyxlpr/ppocr/losses/rec_ctc_loss.py +0 -44
- pyxlpr/ppocr/losses/rec_enhanced_ctc_loss.py +0 -70
- pyxlpr/ppocr/losses/rec_nrtr_loss.py +0 -30
- pyxlpr/ppocr/losses/rec_sar_loss.py +0 -28
- pyxlpr/ppocr/losses/rec_srn_loss.py +0 -47
- pyxlpr/ppocr/losses/table_att_loss.py +0 -109
- pyxlpr/ppocr/metrics/__init__.py +0 -44
- pyxlpr/ppocr/metrics/cls_metric.py +0 -45
- pyxlpr/ppocr/metrics/det_metric.py +0 -82
- pyxlpr/ppocr/metrics/distillation_metric.py +0 -73
- pyxlpr/ppocr/metrics/e2e_metric.py +0 -86
- pyxlpr/ppocr/metrics/eval_det_iou.py +0 -274
- pyxlpr/ppocr/metrics/kie_metric.py +0 -70
- pyxlpr/ppocr/metrics/rec_metric.py +0 -75
- pyxlpr/ppocr/metrics/table_metric.py +0 -50
- pyxlpr/ppocr/modeling/architectures/__init__.py +0 -32
- pyxlpr/ppocr/modeling/architectures/base_model.py +0 -88
- pyxlpr/ppocr/modeling/architectures/distillation_model.py +0 -60
- pyxlpr/ppocr/modeling/backbones/__init__.py +0 -54
- pyxlpr/ppocr/modeling/backbones/det_mobilenet_v3.py +0 -268
- pyxlpr/ppocr/modeling/backbones/det_resnet_vd.py +0 -246
- pyxlpr/ppocr/modeling/backbones/det_resnet_vd_sast.py +0 -285
- pyxlpr/ppocr/modeling/backbones/e2e_resnet_vd_pg.py +0 -265
- pyxlpr/ppocr/modeling/backbones/kie_unet_sdmgr.py +0 -186
- pyxlpr/ppocr/modeling/backbones/rec_mobilenet_v3.py +0 -138
- pyxlpr/ppocr/modeling/backbones/rec_mv1_enhance.py +0 -258
- pyxlpr/ppocr/modeling/backbones/rec_nrtr_mtb.py +0 -48
- pyxlpr/ppocr/modeling/backbones/rec_resnet_31.py +0 -210
- pyxlpr/ppocr/modeling/backbones/rec_resnet_aster.py +0 -143
- pyxlpr/ppocr/modeling/backbones/rec_resnet_fpn.py +0 -307
- pyxlpr/ppocr/modeling/backbones/rec_resnet_vd.py +0 -286
- pyxlpr/ppocr/modeling/heads/__init__.py +0 -54
- pyxlpr/ppocr/modeling/heads/cls_head.py +0 -52
- pyxlpr/ppocr/modeling/heads/det_db_head.py +0 -118
- pyxlpr/ppocr/modeling/heads/det_east_head.py +0 -121
- pyxlpr/ppocr/modeling/heads/det_pse_head.py +0 -37
- pyxlpr/ppocr/modeling/heads/det_sast_head.py +0 -128
- pyxlpr/ppocr/modeling/heads/e2e_pg_head.py +0 -253
- pyxlpr/ppocr/modeling/heads/kie_sdmgr_head.py +0 -206
- pyxlpr/ppocr/modeling/heads/multiheadAttention.py +0 -163
- pyxlpr/ppocr/modeling/heads/rec_aster_head.py +0 -393
- pyxlpr/ppocr/modeling/heads/rec_att_head.py +0 -202
- pyxlpr/ppocr/modeling/heads/rec_ctc_head.py +0 -88
- pyxlpr/ppocr/modeling/heads/rec_nrtr_head.py +0 -826
- pyxlpr/ppocr/modeling/heads/rec_sar_head.py +0 -402
- pyxlpr/ppocr/modeling/heads/rec_srn_head.py +0 -280
- pyxlpr/ppocr/modeling/heads/self_attention.py +0 -406
- pyxlpr/ppocr/modeling/heads/table_att_head.py +0 -246
- pyxlpr/ppocr/modeling/necks/__init__.py +0 -32
- pyxlpr/ppocr/modeling/necks/db_fpn.py +0 -111
- pyxlpr/ppocr/modeling/necks/east_fpn.py +0 -188
- pyxlpr/ppocr/modeling/necks/fpn.py +0 -138
- pyxlpr/ppocr/modeling/necks/pg_fpn.py +0 -314
- pyxlpr/ppocr/modeling/necks/rnn.py +0 -92
- pyxlpr/ppocr/modeling/necks/sast_fpn.py +0 -284
- pyxlpr/ppocr/modeling/necks/table_fpn.py +0 -110
- pyxlpr/ppocr/modeling/transforms/__init__.py +0 -28
- pyxlpr/ppocr/modeling/transforms/stn.py +0 -135
- pyxlpr/ppocr/modeling/transforms/tps.py +0 -308
- pyxlpr/ppocr/modeling/transforms/tps_spatial_transformer.py +0 -156
- pyxlpr/ppocr/optimizer/__init__.py +0 -61
- pyxlpr/ppocr/optimizer/learning_rate.py +0 -228
- pyxlpr/ppocr/optimizer/lr_scheduler.py +0 -49
- pyxlpr/ppocr/optimizer/optimizer.py +0 -160
- pyxlpr/ppocr/optimizer/regularizer.py +0 -52
- pyxlpr/ppocr/postprocess/__init__.py +0 -55
- pyxlpr/ppocr/postprocess/cls_postprocess.py +0 -33
- pyxlpr/ppocr/postprocess/db_postprocess.py +0 -234
- pyxlpr/ppocr/postprocess/east_postprocess.py +0 -143
- pyxlpr/ppocr/postprocess/locality_aware_nms.py +0 -200
- pyxlpr/ppocr/postprocess/pg_postprocess.py +0 -52
- pyxlpr/ppocr/postprocess/pse_postprocess/__init__.py +0 -15
- pyxlpr/ppocr/postprocess/pse_postprocess/pse/__init__.py +0 -29
- pyxlpr/ppocr/postprocess/pse_postprocess/pse/setup.py +0 -14
- pyxlpr/ppocr/postprocess/pse_postprocess/pse_postprocess.py +0 -118
- pyxlpr/ppocr/postprocess/rec_postprocess.py +0 -654
- pyxlpr/ppocr/postprocess/sast_postprocess.py +0 -355
- pyxlpr/ppocr/tools/__init__.py +0 -14
- pyxlpr/ppocr/tools/eval.py +0 -83
- pyxlpr/ppocr/tools/export_center.py +0 -77
- pyxlpr/ppocr/tools/export_model.py +0 -129
- pyxlpr/ppocr/tools/infer/predict_cls.py +0 -151
- pyxlpr/ppocr/tools/infer/predict_det.py +0 -300
- pyxlpr/ppocr/tools/infer/predict_e2e.py +0 -169
- pyxlpr/ppocr/tools/infer/predict_rec.py +0 -414
- pyxlpr/ppocr/tools/infer/predict_system.py +0 -204
- pyxlpr/ppocr/tools/infer/utility.py +0 -629
- pyxlpr/ppocr/tools/infer_cls.py +0 -83
- pyxlpr/ppocr/tools/infer_det.py +0 -134
- pyxlpr/ppocr/tools/infer_e2e.py +0 -122
- pyxlpr/ppocr/tools/infer_kie.py +0 -153
- pyxlpr/ppocr/tools/infer_rec.py +0 -146
- pyxlpr/ppocr/tools/infer_table.py +0 -107
- pyxlpr/ppocr/tools/program.py +0 -596
- pyxlpr/ppocr/tools/test_hubserving.py +0 -117
- pyxlpr/ppocr/tools/train.py +0 -163
- pyxlpr/ppocr/tools/xlprog.py +0 -748
- pyxlpr/ppocr/utils/EN_symbol_dict.txt +0 -94
- pyxlpr/ppocr/utils/__init__.py +0 -24
- pyxlpr/ppocr/utils/dict/ar_dict.txt +0 -117
- pyxlpr/ppocr/utils/dict/arabic_dict.txt +0 -162
- pyxlpr/ppocr/utils/dict/be_dict.txt +0 -145
- pyxlpr/ppocr/utils/dict/bg_dict.txt +0 -140
- pyxlpr/ppocr/utils/dict/chinese_cht_dict.txt +0 -8421
- pyxlpr/ppocr/utils/dict/cyrillic_dict.txt +0 -163
- pyxlpr/ppocr/utils/dict/devanagari_dict.txt +0 -167
- pyxlpr/ppocr/utils/dict/en_dict.txt +0 -63
- pyxlpr/ppocr/utils/dict/fa_dict.txt +0 -136
- pyxlpr/ppocr/utils/dict/french_dict.txt +0 -136
- pyxlpr/ppocr/utils/dict/german_dict.txt +0 -143
- pyxlpr/ppocr/utils/dict/hi_dict.txt +0 -162
- pyxlpr/ppocr/utils/dict/it_dict.txt +0 -118
- pyxlpr/ppocr/utils/dict/japan_dict.txt +0 -4399
- pyxlpr/ppocr/utils/dict/ka_dict.txt +0 -153
- pyxlpr/ppocr/utils/dict/korean_dict.txt +0 -3688
- pyxlpr/ppocr/utils/dict/latin_dict.txt +0 -185
- pyxlpr/ppocr/utils/dict/mr_dict.txt +0 -153
- pyxlpr/ppocr/utils/dict/ne_dict.txt +0 -153
- pyxlpr/ppocr/utils/dict/oc_dict.txt +0 -96
- pyxlpr/ppocr/utils/dict/pu_dict.txt +0 -130
- pyxlpr/ppocr/utils/dict/rs_dict.txt +0 -91
- pyxlpr/ppocr/utils/dict/rsc_dict.txt +0 -134
- pyxlpr/ppocr/utils/dict/ru_dict.txt +0 -125
- pyxlpr/ppocr/utils/dict/ta_dict.txt +0 -128
- pyxlpr/ppocr/utils/dict/table_dict.txt +0 -277
- pyxlpr/ppocr/utils/dict/table_structure_dict.txt +0 -2759
- pyxlpr/ppocr/utils/dict/te_dict.txt +0 -151
- pyxlpr/ppocr/utils/dict/ug_dict.txt +0 -114
- pyxlpr/ppocr/utils/dict/uk_dict.txt +0 -142
- pyxlpr/ppocr/utils/dict/ur_dict.txt +0 -137
- pyxlpr/ppocr/utils/dict/xi_dict.txt +0 -110
- pyxlpr/ppocr/utils/dict90.txt +0 -90
- pyxlpr/ppocr/utils/e2e_metric/Deteval.py +0 -574
- pyxlpr/ppocr/utils/e2e_metric/polygon_fast.py +0 -83
- pyxlpr/ppocr/utils/e2e_utils/extract_batchsize.py +0 -87
- pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_fast.py +0 -457
- pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_slow.py +0 -592
- pyxlpr/ppocr/utils/e2e_utils/pgnet_pp_utils.py +0 -162
- pyxlpr/ppocr/utils/e2e_utils/visual.py +0 -162
- pyxlpr/ppocr/utils/en_dict.txt +0 -95
- pyxlpr/ppocr/utils/gen_label.py +0 -81
- pyxlpr/ppocr/utils/ic15_dict.txt +0 -36
- pyxlpr/ppocr/utils/iou.py +0 -54
- pyxlpr/ppocr/utils/logging.py +0 -69
- pyxlpr/ppocr/utils/network.py +0 -84
- pyxlpr/ppocr/utils/ppocr_keys_v1.txt +0 -6623
- pyxlpr/ppocr/utils/profiler.py +0 -110
- pyxlpr/ppocr/utils/save_load.py +0 -150
- pyxlpr/ppocr/utils/stats.py +0 -72
- pyxlpr/ppocr/utils/utility.py +0 -80
- pyxlpr/ppstructure/__init__.py +0 -13
- pyxlpr/ppstructure/predict_system.py +0 -187
- pyxlpr/ppstructure/table/__init__.py +0 -13
- pyxlpr/ppstructure/table/eval_table.py +0 -72
- pyxlpr/ppstructure/table/matcher.py +0 -192
- pyxlpr/ppstructure/table/predict_structure.py +0 -136
- pyxlpr/ppstructure/table/predict_table.py +0 -221
- pyxlpr/ppstructure/table/table_metric/__init__.py +0 -16
- pyxlpr/ppstructure/table/table_metric/parallel.py +0 -51
- pyxlpr/ppstructure/table/table_metric/table_metric.py +0 -247
- pyxlpr/ppstructure/table/tablepyxl/__init__.py +0 -13
- pyxlpr/ppstructure/table/tablepyxl/style.py +0 -283
- pyxlpr/ppstructure/table/tablepyxl/tablepyxl.py +0 -118
- pyxlpr/ppstructure/utility.py +0 -71
- pyxlpr/xlai.py +0 -10
- /pyxllib/{ext/autogui → autogui}/virtualkey.py +0 -0
- {pyxllib-0.3.96.dist-info → pyxllib-0.3.197.dist-info/licenses}/LICENSE +0 -0
pyxlpr/ppocr/utils/profiler.py
DELETED
@@ -1,110 +0,0 @@
|
|
1
|
-
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
|
15
|
-
import sys
|
16
|
-
import paddle
|
17
|
-
|
18
|
-
# A global variable to record the number of calling times for profiler
|
19
|
-
# functions. It is used to specify the tracing range of training steps.
|
20
|
-
_profiler_step_id = 0
|
21
|
-
|
22
|
-
# A global variable to avoid parsing from string every time.
|
23
|
-
_profiler_options = None
|
24
|
-
|
25
|
-
|
26
|
-
class ProfilerOptions(object):
|
27
|
-
'''
|
28
|
-
Use a string to initialize a ProfilerOptions.
|
29
|
-
The string should be in the format: "key1=value1;key2=value;key3=value3".
|
30
|
-
For example:
|
31
|
-
"profile_path=model.profile"
|
32
|
-
"batch_range=[50, 60]; profile_path=model.profile"
|
33
|
-
"batch_range=[50, 60]; tracer_option=OpDetail; profile_path=model.profile"
|
34
|
-
ProfilerOptions supports following key-value pair:
|
35
|
-
batch_range - a integer list, e.g. [100, 110].
|
36
|
-
state - a string, the optional values are 'CPU', 'GPU' or 'All'.
|
37
|
-
sorted_key - a string, the optional values are 'calls', 'total',
|
38
|
-
'max', 'min' or 'ave.
|
39
|
-
tracer_option - a string, the optional values are 'Default', 'OpDetail',
|
40
|
-
'AllOpDetail'.
|
41
|
-
profile_path - a string, the path to save the serialized profile data,
|
42
|
-
which can be used to generate a timeline.
|
43
|
-
exit_on_finished - a boolean.
|
44
|
-
'''
|
45
|
-
|
46
|
-
def __init__(self, options_str):
|
47
|
-
assert isinstance(options_str, str)
|
48
|
-
|
49
|
-
self._options = {
|
50
|
-
'batch_range': [10, 20],
|
51
|
-
'state': 'All',
|
52
|
-
'sorted_key': 'total',
|
53
|
-
'tracer_option': 'Default',
|
54
|
-
'profile_path': '/tmp/profile',
|
55
|
-
'exit_on_finished': True
|
56
|
-
}
|
57
|
-
self._parse_from_string(options_str)
|
58
|
-
|
59
|
-
def _parse_from_string(self, options_str):
|
60
|
-
for kv in options_str.replace(' ', '').split(';'):
|
61
|
-
key, value = kv.split('=')
|
62
|
-
if key == 'batch_range':
|
63
|
-
value_list = value.replace('[', '').replace(']', '').split(',')
|
64
|
-
value_list = list(map(int, value_list))
|
65
|
-
if len(value_list) >= 2 and value_list[0] >= 0 and value_list[
|
66
|
-
1] > value_list[0]:
|
67
|
-
self._options[key] = value_list
|
68
|
-
elif key == 'exit_on_finished':
|
69
|
-
self._options[key] = value.lower() in ("yes", "true", "t", "1")
|
70
|
-
elif key in [
|
71
|
-
'state', 'sorted_key', 'tracer_option', 'profile_path'
|
72
|
-
]:
|
73
|
-
self._options[key] = value
|
74
|
-
|
75
|
-
def __getitem__(self, name):
|
76
|
-
if self._options.get(name, None) is None:
|
77
|
-
raise ValueError(
|
78
|
-
"ProfilerOptions does not have an option named %s." % name)
|
79
|
-
return self._options[name]
|
80
|
-
|
81
|
-
|
82
|
-
def add_profiler_step(options_str=None):
|
83
|
-
'''
|
84
|
-
Enable the operator-level timing using PaddlePaddle's profiler.
|
85
|
-
The profiler uses a independent variable to count the profiler steps.
|
86
|
-
One call of this function is treated as a profiler step.
|
87
|
-
|
88
|
-
Args:
|
89
|
-
profiler_options - a string to initialize the ProfilerOptions.
|
90
|
-
Default is None, and the profiler is disabled.
|
91
|
-
'''
|
92
|
-
if options_str is None:
|
93
|
-
return
|
94
|
-
|
95
|
-
global _profiler_step_id
|
96
|
-
global _profiler_options
|
97
|
-
|
98
|
-
if _profiler_options is None:
|
99
|
-
_profiler_options = ProfilerOptions(options_str)
|
100
|
-
|
101
|
-
if _profiler_step_id == _profiler_options['batch_range'][0]:
|
102
|
-
paddle.utils.profiler.start_profiler(
|
103
|
-
_profiler_options['state'], _profiler_options['tracer_option'])
|
104
|
-
elif _profiler_step_id == _profiler_options['batch_range'][1]:
|
105
|
-
paddle.utils.profiler.stop_profiler(_profiler_options['sorted_key'],
|
106
|
-
_profiler_options['profile_path'])
|
107
|
-
if _profiler_options['exit_on_finished']:
|
108
|
-
sys.exit(0)
|
109
|
-
|
110
|
-
_profiler_step_id += 1
|
pyxlpr/ppocr/utils/save_load.py
DELETED
@@ -1,150 +0,0 @@
|
|
1
|
-
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
|
15
|
-
from __future__ import absolute_import
|
16
|
-
from __future__ import division
|
17
|
-
from __future__ import print_function
|
18
|
-
|
19
|
-
import errno
|
20
|
-
import os
|
21
|
-
import pickle
|
22
|
-
import six
|
23
|
-
|
24
|
-
import paddle
|
25
|
-
|
26
|
-
from pyxlpr.ppocr.utils.logging import get_logger
|
27
|
-
|
28
|
-
__all__ = ['load_model']
|
29
|
-
|
30
|
-
|
31
|
-
def _mkdir_if_not_exist(path, logger):
|
32
|
-
"""
|
33
|
-
mkdir if not exists, ignore the exception when multiprocess mkdir together
|
34
|
-
"""
|
35
|
-
if not os.path.exists(path):
|
36
|
-
try:
|
37
|
-
os.makedirs(path)
|
38
|
-
except OSError as e:
|
39
|
-
if e.errno == errno.EEXIST and os.path.isdir(path):
|
40
|
-
logger.warning(
|
41
|
-
'be happy if some process has already created {}'.format(
|
42
|
-
path))
|
43
|
-
else:
|
44
|
-
raise OSError('Failed to mkdir {}'.format(path))
|
45
|
-
|
46
|
-
|
47
|
-
def load_model(config, model, optimizer=None):
|
48
|
-
"""
|
49
|
-
load model from checkpoint or pretrained_model
|
50
|
-
"""
|
51
|
-
logger = get_logger()
|
52
|
-
global_config = config['Global']
|
53
|
-
checkpoints = global_config.get('checkpoints')
|
54
|
-
pretrained_model = global_config.get('pretrained_model')
|
55
|
-
best_model_dict = {}
|
56
|
-
if checkpoints:
|
57
|
-
if checkpoints.endswith('.pdparams'):
|
58
|
-
checkpoints = checkpoints.replace('.pdparams', '')
|
59
|
-
assert os.path.exists(checkpoints + ".pdparams"), \
|
60
|
-
"The {}.pdparams does not exists!".format(checkpoints)
|
61
|
-
|
62
|
-
# load params from trained model
|
63
|
-
params = paddle.load(checkpoints + '.pdparams')
|
64
|
-
state_dict = model.state_dict()
|
65
|
-
new_state_dict = {}
|
66
|
-
for key, value in state_dict.items():
|
67
|
-
if key not in params:
|
68
|
-
logger.warning("{} not in loaded params {} !".format(
|
69
|
-
key, params.keys()))
|
70
|
-
continue
|
71
|
-
pre_value = params[key]
|
72
|
-
if list(value.shape) == list(pre_value.shape):
|
73
|
-
new_state_dict[key] = pre_value
|
74
|
-
else:
|
75
|
-
logger.warning(
|
76
|
-
"The shape of model params {} {} not matched with loaded params shape {} !".
|
77
|
-
format(key, value.shape, pre_value.shape))
|
78
|
-
model.set_state_dict(new_state_dict)
|
79
|
-
|
80
|
-
if optimizer is not None:
|
81
|
-
if os.path.exists(checkpoints + '.pdopt'):
|
82
|
-
optim_dict = paddle.load(checkpoints + '.pdopt')
|
83
|
-
optimizer.set_state_dict(optim_dict)
|
84
|
-
else:
|
85
|
-
logger.warning(
|
86
|
-
"{}.pdopt is not exists, params of optimizer is not loaded".
|
87
|
-
format(checkpoints))
|
88
|
-
|
89
|
-
if os.path.exists(checkpoints + '.states'):
|
90
|
-
with open(checkpoints + '.states', 'rb') as f:
|
91
|
-
states_dict = pickle.load(f) if six.PY2 else pickle.load(
|
92
|
-
f, encoding='latin1')
|
93
|
-
best_model_dict = states_dict.get('best_model_dict', {})
|
94
|
-
if 'epoch' in states_dict:
|
95
|
-
best_model_dict['start_epoch'] = states_dict['epoch'] + 1
|
96
|
-
logger.info("resume from {}".format(checkpoints))
|
97
|
-
elif pretrained_model:
|
98
|
-
load_pretrained_params(model, pretrained_model)
|
99
|
-
else:
|
100
|
-
logger.info('train from scratch')
|
101
|
-
return best_model_dict
|
102
|
-
|
103
|
-
|
104
|
-
def load_pretrained_params(model, path):
|
105
|
-
logger = get_logger()
|
106
|
-
if path.endswith('.pdparams'):
|
107
|
-
path = path.replace('.pdparams', '')
|
108
|
-
assert os.path.exists(path + ".pdparams"), \
|
109
|
-
"The {}.pdparams does not exists!".format(path)
|
110
|
-
|
111
|
-
params = paddle.load(path + '.pdparams')
|
112
|
-
state_dict = model.state_dict()
|
113
|
-
new_state_dict = {}
|
114
|
-
for k1 in params.keys():
|
115
|
-
if k1 not in state_dict.keys():
|
116
|
-
logger.warning("The pretrained params {} not in model".format(k1))
|
117
|
-
else:
|
118
|
-
if list(state_dict[k1].shape) == list(params[k1].shape):
|
119
|
-
new_state_dict[k1] = params[k1]
|
120
|
-
else:
|
121
|
-
logger.warning(
|
122
|
-
"The shape of model params {} {} not matched with loaded params {} {} !".
|
123
|
-
format(k1, state_dict[k1].shape, k1, params[k1].shape))
|
124
|
-
model.set_state_dict(new_state_dict)
|
125
|
-
logger.info("load pretrain successful from {}".format(path))
|
126
|
-
return model
|
127
|
-
|
128
|
-
|
129
|
-
def save_model(model,
|
130
|
-
optimizer,
|
131
|
-
model_path,
|
132
|
-
logger,
|
133
|
-
is_best=False,
|
134
|
-
prefix='ppocr',
|
135
|
-
**kwargs):
|
136
|
-
"""
|
137
|
-
save model to the target path
|
138
|
-
"""
|
139
|
-
_mkdir_if_not_exist(model_path, logger)
|
140
|
-
model_prefix = os.path.join(model_path, prefix)
|
141
|
-
paddle.save(model.state_dict(), model_prefix + '.pdparams')
|
142
|
-
paddle.save(optimizer.state_dict(), model_prefix + '.pdopt')
|
143
|
-
|
144
|
-
# save metric and config
|
145
|
-
with open(model_prefix + '.states', 'wb') as f:
|
146
|
-
pickle.dump(kwargs, f, protocol=2)
|
147
|
-
if is_best:
|
148
|
-
logger.info('save best model is to {}'.format(model_prefix))
|
149
|
-
else:
|
150
|
-
logger.info("save model in {}".format(model_prefix))
|
pyxlpr/ppocr/utils/stats.py
DELETED
@@ -1,72 +0,0 @@
|
|
1
|
-
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
|
15
|
-
import collections
|
16
|
-
import numpy as np
|
17
|
-
import datetime
|
18
|
-
|
19
|
-
__all__ = ['TrainingStats', 'Time']
|
20
|
-
|
21
|
-
|
22
|
-
class SmoothedValue(object):
|
23
|
-
"""Track a series of values and provide access to smoothed values over a
|
24
|
-
window or the global series average.
|
25
|
-
"""
|
26
|
-
|
27
|
-
def __init__(self, window_size):
|
28
|
-
self.deque = collections.deque(maxlen=window_size)
|
29
|
-
|
30
|
-
def add_value(self, value):
|
31
|
-
self.deque.append(value)
|
32
|
-
|
33
|
-
def get_median_value(self):
|
34
|
-
return np.median(self.deque)
|
35
|
-
|
36
|
-
|
37
|
-
def Time():
|
38
|
-
return datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')
|
39
|
-
|
40
|
-
|
41
|
-
class TrainingStats(object):
|
42
|
-
def __init__(self, window_size, stats_keys):
|
43
|
-
self.window_size = window_size
|
44
|
-
self.smoothed_losses_and_metrics = {
|
45
|
-
key: SmoothedValue(window_size)
|
46
|
-
for key in stats_keys
|
47
|
-
}
|
48
|
-
|
49
|
-
def update(self, stats):
|
50
|
-
for k, v in stats.items():
|
51
|
-
if k not in self.smoothed_losses_and_metrics:
|
52
|
-
self.smoothed_losses_and_metrics[k] = SmoothedValue(
|
53
|
-
self.window_size)
|
54
|
-
self.smoothed_losses_and_metrics[k].add_value(v)
|
55
|
-
|
56
|
-
def get(self, extras=None):
|
57
|
-
stats = collections.OrderedDict()
|
58
|
-
if extras:
|
59
|
-
for k, v in extras.items():
|
60
|
-
stats[k] = v
|
61
|
-
for k, v in self.smoothed_losses_and_metrics.items():
|
62
|
-
stats[k] = round(v.get_median_value(), 6)
|
63
|
-
|
64
|
-
return stats
|
65
|
-
|
66
|
-
def log(self, extras=None):
|
67
|
-
d = self.get(extras)
|
68
|
-
strs = []
|
69
|
-
for k, v in d.items():
|
70
|
-
strs.append('{}: {:x<6f}'.format(k, v))
|
71
|
-
strs = ', '.join(strs)
|
72
|
-
return strs
|
pyxlpr/ppocr/utils/utility.py
DELETED
@@ -1,80 +0,0 @@
|
|
1
|
-
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
|
15
|
-
import logging
|
16
|
-
import os
|
17
|
-
import imghdr
|
18
|
-
import cv2
|
19
|
-
|
20
|
-
|
21
|
-
def print_dict(d, logger, delimiter=0):
|
22
|
-
"""
|
23
|
-
Recursively visualize a dict and
|
24
|
-
indenting acrrording by the relationship of keys.
|
25
|
-
"""
|
26
|
-
for k, v in sorted(d.items()):
|
27
|
-
if isinstance(v, dict):
|
28
|
-
logger.info("{}{} : ".format(delimiter * " ", str(k)))
|
29
|
-
print_dict(v, logger, delimiter + 4)
|
30
|
-
elif isinstance(v, list) and len(v) >= 1 and isinstance(v[0], dict):
|
31
|
-
logger.info("{}{} : ".format(delimiter * " ", str(k)))
|
32
|
-
for value in v:
|
33
|
-
print_dict(value, logger, delimiter + 4)
|
34
|
-
else:
|
35
|
-
logger.info("{}{} : {}".format(delimiter * " ", k, v))
|
36
|
-
|
37
|
-
|
38
|
-
def get_check_global_params(mode):
|
39
|
-
check_params = ['use_gpu', 'max_text_length', 'image_shape', \
|
40
|
-
'image_shape', 'character_type', 'loss_type']
|
41
|
-
if mode == "train_eval":
|
42
|
-
check_params = check_params + [ \
|
43
|
-
'train_batch_size_per_card', 'test_batch_size_per_card']
|
44
|
-
elif mode == "test":
|
45
|
-
check_params = check_params + ['test_batch_size_per_card']
|
46
|
-
return check_params
|
47
|
-
|
48
|
-
|
49
|
-
def get_image_file_list(img_file):
|
50
|
-
imgs_lists = []
|
51
|
-
if img_file is None or not os.path.exists(img_file):
|
52
|
-
raise Exception("not found any img file in {}".format(img_file))
|
53
|
-
|
54
|
-
img_end = {'jpg', 'bmp', 'png', 'jpeg', 'rgb', 'tif', 'tiff', 'gif', 'GIF'}
|
55
|
-
if os.path.isfile(img_file) and imghdr.what(img_file) in img_end:
|
56
|
-
imgs_lists.append(img_file)
|
57
|
-
elif os.path.isdir(img_file):
|
58
|
-
for single_file in os.listdir(img_file):
|
59
|
-
file_path = os.path.join(img_file, single_file)
|
60
|
-
if os.path.isfile(file_path) and imghdr.what(file_path) in img_end:
|
61
|
-
imgs_lists.append(file_path)
|
62
|
-
if len(imgs_lists) == 0:
|
63
|
-
raise Exception("not found any img file in {}".format(img_file))
|
64
|
-
imgs_lists = sorted(imgs_lists)
|
65
|
-
return imgs_lists
|
66
|
-
|
67
|
-
|
68
|
-
def check_and_read_gif(img_path):
|
69
|
-
if os.path.basename(img_path)[-3:] in ['gif', 'GIF']:
|
70
|
-
gif = cv2.VideoCapture(img_path)
|
71
|
-
ret, frame = gif.read()
|
72
|
-
if not ret:
|
73
|
-
logger = logging.getLogger('ppocr')
|
74
|
-
logger.info("Cannot read {}. This gif image maybe corrupted.")
|
75
|
-
return None, False
|
76
|
-
if len(frame.shape) == 2 or frame.shape[-1] == 1:
|
77
|
-
frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
|
78
|
-
imgvalue = frame[:, :, ::-1]
|
79
|
-
return imgvalue, True
|
80
|
-
return None, False
|
pyxlpr/ppstructure/__init__.py
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
@@ -1,187 +0,0 @@
|
|
1
|
-
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
|
15
|
-
import os
|
16
|
-
import sys
|
17
|
-
import subprocess
|
18
|
-
|
19
|
-
__dir__ = os.path.dirname(os.path.abspath(__file__))
|
20
|
-
sys.path.append(__dir__)
|
21
|
-
sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
|
22
|
-
|
23
|
-
os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
|
24
|
-
import cv2
|
25
|
-
import numpy as np
|
26
|
-
import time
|
27
|
-
import logging
|
28
|
-
|
29
|
-
from pyxlpr.ppocr.utils.utility import get_image_file_list, check_and_read_gif
|
30
|
-
from pyxlpr.ppocr.utils.logging import get_logger
|
31
|
-
from pyxlpr.ppocr.tools.infer.predict_system import TextSystem
|
32
|
-
from pyxlpr.ppstructure.table.predict_table import TableSystem, to_excel
|
33
|
-
from pyxlpr.ppstructure.utility import parse_args, draw_structure_result
|
34
|
-
|
35
|
-
logger = get_logger()
|
36
|
-
|
37
|
-
|
38
|
-
class OCRSystem(object):
|
39
|
-
def __init__(self, args):
|
40
|
-
self.mode = args.mode
|
41
|
-
if self.mode == 'structure':
|
42
|
-
import layoutparser as lp
|
43
|
-
# args.det_limit_type = 'resize_long'
|
44
|
-
args.drop_score = 0
|
45
|
-
if not args.show_log:
|
46
|
-
logger.setLevel(logging.INFO)
|
47
|
-
self.text_system = TextSystem(args)
|
48
|
-
self.table_system = TableSystem(args,
|
49
|
-
self.text_system.text_detector,
|
50
|
-
self.text_system.text_recognizer)
|
51
|
-
|
52
|
-
config_path = None
|
53
|
-
model_path = None
|
54
|
-
if os.path.isdir(args.layout_path_model):
|
55
|
-
model_path = args.layout_path_model
|
56
|
-
else:
|
57
|
-
config_path = args.layout_path_model
|
58
|
-
self.table_layout = lp.PaddleDetectionLayoutModel(
|
59
|
-
config_path=config_path,
|
60
|
-
model_path=model_path,
|
61
|
-
threshold=0.5,
|
62
|
-
enable_mkldnn=args.enable_mkldnn,
|
63
|
-
enforce_cpu=not args.use_gpu,
|
64
|
-
thread_num=args.cpu_threads)
|
65
|
-
self.use_angle_cls = args.use_angle_cls
|
66
|
-
self.drop_score = args.drop_score
|
67
|
-
elif self.mode == 'vqa':
|
68
|
-
from pyxlpr.ppstructure.vqa.infer_ser_e2e import SerPredictor, draw_ser_results
|
69
|
-
self.vqa_engine = SerPredictor(args)
|
70
|
-
|
71
|
-
def __call__(self, img):
|
72
|
-
if self.mode == 'structure':
|
73
|
-
ori_im = img.copy()
|
74
|
-
layout_res = self.table_layout.detect(img[..., ::-1])
|
75
|
-
res_list = []
|
76
|
-
for region in layout_res:
|
77
|
-
x1, y1, x2, y2 = region.coordinates
|
78
|
-
x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
|
79
|
-
roi_img = ori_im[y1:y2, x1:x2, :]
|
80
|
-
if region.type == 'Table':
|
81
|
-
res = self.table_system(roi_img)
|
82
|
-
else:
|
83
|
-
filter_boxes, filter_rec_res = self.text_system(roi_img)
|
84
|
-
filter_boxes = [x + [x1, y1] for x in filter_boxes]
|
85
|
-
filter_boxes = [
|
86
|
-
x.reshape(-1).tolist() for x in filter_boxes
|
87
|
-
]
|
88
|
-
# remove style char
|
89
|
-
style_token = [
|
90
|
-
'<strike>', '<strike>', '<sup>', '</sub>', '<b>',
|
91
|
-
'</b>', '<sub>', '</sup>', '<overline>', '</overline>',
|
92
|
-
'<underline>', '</underline>', '<i>', '</i>'
|
93
|
-
]
|
94
|
-
filter_rec_res_tmp = []
|
95
|
-
for rec_res in filter_rec_res:
|
96
|
-
rec_str, rec_conf = rec_res
|
97
|
-
for token in style_token:
|
98
|
-
if token in rec_str:
|
99
|
-
rec_str = rec_str.replace(token, '')
|
100
|
-
filter_rec_res_tmp.append((rec_str, rec_conf))
|
101
|
-
res = (filter_boxes, filter_rec_res_tmp)
|
102
|
-
res_list.append({
|
103
|
-
'type': region.type,
|
104
|
-
'bbox': [x1, y1, x2, y2],
|
105
|
-
'img': roi_img,
|
106
|
-
'res': res
|
107
|
-
})
|
108
|
-
elif self.mode == 'vqa':
|
109
|
-
res_list, _ = self.vqa_engine(img)
|
110
|
-
return res_list
|
111
|
-
|
112
|
-
|
113
|
-
def save_structure_res(res, save_folder, img_name):
|
114
|
-
excel_save_folder = os.path.join(save_folder, img_name)
|
115
|
-
os.makedirs(excel_save_folder, exist_ok=True)
|
116
|
-
# save res
|
117
|
-
with open(
|
118
|
-
os.path.join(excel_save_folder, 'res.txt'), 'w',
|
119
|
-
encoding='utf8') as f:
|
120
|
-
for region in res:
|
121
|
-
if region['type'] == 'Table':
|
122
|
-
excel_path = os.path.join(excel_save_folder,
|
123
|
-
'{}.xlsx'.format(region['bbox']))
|
124
|
-
to_excel(region['res'], excel_path)
|
125
|
-
if region['type'] == 'Figure':
|
126
|
-
roi_img = region['img']
|
127
|
-
img_path = os.path.join(excel_save_folder,
|
128
|
-
'{}.jpg'.format(region['bbox']))
|
129
|
-
cv2.imwrite(img_path, roi_img)
|
130
|
-
else:
|
131
|
-
for box, rec_res in zip(region['res'][0], region['res'][1]):
|
132
|
-
f.write('{}\t{}\n'.format(
|
133
|
-
np.array(box).reshape(-1).tolist(), rec_res))
|
134
|
-
|
135
|
-
|
136
|
-
def main(args):
|
137
|
-
image_file_list = get_image_file_list(args.image_dir)
|
138
|
-
image_file_list = image_file_list
|
139
|
-
image_file_list = image_file_list[args.process_id::args.total_process_num]
|
140
|
-
|
141
|
-
structure_sys = OCRSystem(args)
|
142
|
-
img_num = len(image_file_list)
|
143
|
-
save_folder = os.path.join(args.output, structure_sys.mode)
|
144
|
-
os.makedirs(save_folder, exist_ok=True)
|
145
|
-
|
146
|
-
for i, image_file in enumerate(image_file_list):
|
147
|
-
logger.info("[{}/{}] {}".format(i, img_num, image_file))
|
148
|
-
img, flag = check_and_read_gif(image_file)
|
149
|
-
img_name = os.path.basename(image_file).split('.')[0]
|
150
|
-
|
151
|
-
if not flag:
|
152
|
-
img = cv2.imread(image_file)
|
153
|
-
if img is None:
|
154
|
-
logger.error("error in loading image:{}".format(image_file))
|
155
|
-
continue
|
156
|
-
starttime = time.time()
|
157
|
-
res = structure_sys(img)
|
158
|
-
|
159
|
-
if structure_sys.mode == 'structure':
|
160
|
-
save_structure_res(res, save_folder, img_name)
|
161
|
-
draw_img = draw_structure_result(img, res, args.vis_font_path)
|
162
|
-
img_save_path = os.path.join(save_folder, img_name, 'show.jpg')
|
163
|
-
elif structure_sys.mode == 'vqa':
|
164
|
-
draw_img = draw_ser_results(img, res, args.vis_font_path)
|
165
|
-
img_save_path = os.path.join(save_folder, img_name + '.jpg')
|
166
|
-
cv2.imwrite(img_save_path, draw_img)
|
167
|
-
logger.info('result save to {}'.format(img_save_path))
|
168
|
-
elapse = time.time() - starttime
|
169
|
-
logger.info("Predict time : {:.3f}s".format(elapse))
|
170
|
-
|
171
|
-
|
172
|
-
if __name__ == "__main__":
|
173
|
-
args = parse_args()
|
174
|
-
if args.use_mp:
|
175
|
-
p_list = []
|
176
|
-
total_process_num = args.total_process_num
|
177
|
-
for process_id in range(total_process_num):
|
178
|
-
cmd = [sys.executable, "-u"] + sys.argv + [
|
179
|
-
"--process_id={}".format(process_id),
|
180
|
-
"--use_mp={}".format(False)
|
181
|
-
]
|
182
|
-
p = subprocess.Popen(cmd, stdout=sys.stdout, stderr=sys.stdout)
|
183
|
-
p_list.append(p)
|
184
|
-
for p in p_list:
|
185
|
-
p.wait()
|
186
|
-
else:
|
187
|
-
main(args)
|
@@ -1,13 +0,0 @@
|
|
1
|
-
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|