pyxllib 0.3.96__py3-none-any.whl → 0.3.197__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyxllib/algo/geo.py +12 -0
- pyxllib/algo/intervals.py +1 -1
- pyxllib/algo/matcher.py +78 -0
- pyxllib/algo/pupil.py +187 -19
- pyxllib/algo/specialist.py +2 -1
- pyxllib/algo/stat.py +38 -2
- {pyxlpr → pyxllib/autogui}/__init__.py +1 -1
- pyxllib/autogui/activewin.py +246 -0
- pyxllib/autogui/all.py +9 -0
- pyxllib/{ext/autogui → autogui}/autogui.py +40 -11
- pyxllib/autogui/uiautolib.py +362 -0
- pyxllib/autogui/wechat.py +827 -0
- pyxllib/autogui/wechat_msg.py +421 -0
- pyxllib/autogui/wxautolib.py +84 -0
- pyxllib/cv/slidercaptcha.py +137 -0
- pyxllib/data/echarts.py +123 -12
- pyxllib/data/jsonlib.py +89 -0
- pyxllib/data/pglib.py +514 -30
- pyxllib/data/sqlite.py +231 -4
- pyxllib/ext/JLineViewer.py +14 -1
- pyxllib/ext/drissionlib.py +277 -0
- pyxllib/ext/kq5034lib.py +0 -1594
- pyxllib/ext/robustprocfile.py +497 -0
- pyxllib/ext/unixlib.py +6 -5
- pyxllib/ext/utools.py +108 -95
- pyxllib/ext/webhook.py +32 -14
- pyxllib/ext/wjxlib.py +88 -0
- pyxllib/ext/wpsapi.py +124 -0
- pyxllib/ext/xlwork.py +9 -0
- pyxllib/ext/yuquelib.py +1003 -71
- pyxllib/file/docxlib.py +1 -1
- pyxllib/file/libreoffice.py +165 -0
- pyxllib/file/movielib.py +9 -0
- pyxllib/file/packlib/__init__.py +112 -75
- pyxllib/file/pdflib.py +1 -1
- pyxllib/file/pupil.py +1 -1
- pyxllib/file/specialist/dirlib.py +1 -1
- pyxllib/file/specialist/download.py +10 -3
- pyxllib/file/specialist/filelib.py +266 -55
- pyxllib/file/xlsxlib.py +205 -50
- pyxllib/file/xlsyncfile.py +341 -0
- pyxllib/prog/cachetools.py +64 -0
- pyxllib/prog/filelock.py +42 -0
- pyxllib/prog/multiprogs.py +940 -0
- pyxllib/prog/newbie.py +9 -2
- pyxllib/prog/pupil.py +129 -60
- pyxllib/prog/specialist/__init__.py +176 -2
- pyxllib/prog/specialist/bc.py +5 -2
- pyxllib/prog/specialist/browser.py +11 -2
- pyxllib/prog/specialist/datetime.py +68 -0
- pyxllib/prog/specialist/tictoc.py +12 -13
- pyxllib/prog/specialist/xllog.py +5 -5
- pyxllib/prog/xlosenv.py +7 -0
- pyxllib/text/airscript.js +744 -0
- pyxllib/text/charclasslib.py +17 -5
- pyxllib/text/jiebalib.py +6 -3
- pyxllib/text/jinjalib.py +32 -0
- pyxllib/text/jsa_ai_prompt.md +271 -0
- pyxllib/text/jscode.py +159 -4
- pyxllib/text/nestenv.py +1 -1
- pyxllib/text/newbie.py +12 -0
- pyxllib/text/pupil/common.py +26 -0
- pyxllib/text/specialist/ptag.py +2 -2
- pyxllib/text/templates/echart_base.html +11 -0
- pyxllib/text/templates/highlight_code.html +17 -0
- pyxllib/text/templates/latex_editor.html +103 -0
- pyxllib/text/xmllib.py +76 -14
- pyxllib/xl.py +2 -1
- pyxllib-0.3.197.dist-info/METADATA +48 -0
- pyxllib-0.3.197.dist-info/RECORD +126 -0
- {pyxllib-0.3.96.dist-info → pyxllib-0.3.197.dist-info}/WHEEL +1 -2
- pyxllib/ext/autogui/__init__.py +0 -8
- pyxllib-0.3.96.dist-info/METADATA +0 -51
- pyxllib-0.3.96.dist-info/RECORD +0 -333
- pyxllib-0.3.96.dist-info/top_level.txt +0 -2
- pyxlpr/ai/__init__.py +0 -5
- pyxlpr/ai/clientlib.py +0 -1281
- pyxlpr/ai/specialist.py +0 -286
- pyxlpr/ai/torch_app.py +0 -172
- pyxlpr/ai/xlpaddle.py +0 -655
- pyxlpr/ai/xltorch.py +0 -705
- pyxlpr/data/__init__.py +0 -11
- pyxlpr/data/coco.py +0 -1325
- pyxlpr/data/datacls.py +0 -365
- pyxlpr/data/datasets.py +0 -200
- pyxlpr/data/gptlib.py +0 -1291
- pyxlpr/data/icdar/__init__.py +0 -96
- pyxlpr/data/icdar/deteval.py +0 -377
- pyxlpr/data/icdar/icdar2013.py +0 -341
- pyxlpr/data/icdar/iou.py +0 -340
- pyxlpr/data/icdar/rrc_evaluation_funcs_1_1.py +0 -463
- pyxlpr/data/imtextline.py +0 -473
- pyxlpr/data/labelme.py +0 -866
- pyxlpr/data/removeline.py +0 -179
- pyxlpr/data/specialist.py +0 -57
- pyxlpr/eval/__init__.py +0 -85
- pyxlpr/paddleocr.py +0 -776
- pyxlpr/ppocr/__init__.py +0 -15
- pyxlpr/ppocr/configs/rec/multi_language/generate_multi_language_configs.py +0 -226
- pyxlpr/ppocr/data/__init__.py +0 -135
- pyxlpr/ppocr/data/imaug/ColorJitter.py +0 -26
- pyxlpr/ppocr/data/imaug/__init__.py +0 -67
- pyxlpr/ppocr/data/imaug/copy_paste.py +0 -170
- pyxlpr/ppocr/data/imaug/east_process.py +0 -437
- pyxlpr/ppocr/data/imaug/gen_table_mask.py +0 -244
- pyxlpr/ppocr/data/imaug/iaa_augment.py +0 -114
- pyxlpr/ppocr/data/imaug/label_ops.py +0 -789
- pyxlpr/ppocr/data/imaug/make_border_map.py +0 -184
- pyxlpr/ppocr/data/imaug/make_pse_gt.py +0 -106
- pyxlpr/ppocr/data/imaug/make_shrink_map.py +0 -126
- pyxlpr/ppocr/data/imaug/operators.py +0 -433
- pyxlpr/ppocr/data/imaug/pg_process.py +0 -906
- pyxlpr/ppocr/data/imaug/randaugment.py +0 -143
- pyxlpr/ppocr/data/imaug/random_crop_data.py +0 -239
- pyxlpr/ppocr/data/imaug/rec_img_aug.py +0 -533
- pyxlpr/ppocr/data/imaug/sast_process.py +0 -777
- pyxlpr/ppocr/data/imaug/text_image_aug/__init__.py +0 -17
- pyxlpr/ppocr/data/imaug/text_image_aug/augment.py +0 -120
- pyxlpr/ppocr/data/imaug/text_image_aug/warp_mls.py +0 -168
- pyxlpr/ppocr/data/lmdb_dataset.py +0 -115
- pyxlpr/ppocr/data/pgnet_dataset.py +0 -104
- pyxlpr/ppocr/data/pubtab_dataset.py +0 -107
- pyxlpr/ppocr/data/simple_dataset.py +0 -372
- pyxlpr/ppocr/losses/__init__.py +0 -61
- pyxlpr/ppocr/losses/ace_loss.py +0 -52
- pyxlpr/ppocr/losses/basic_loss.py +0 -135
- pyxlpr/ppocr/losses/center_loss.py +0 -88
- pyxlpr/ppocr/losses/cls_loss.py +0 -30
- pyxlpr/ppocr/losses/combined_loss.py +0 -67
- pyxlpr/ppocr/losses/det_basic_loss.py +0 -208
- pyxlpr/ppocr/losses/det_db_loss.py +0 -80
- pyxlpr/ppocr/losses/det_east_loss.py +0 -63
- pyxlpr/ppocr/losses/det_pse_loss.py +0 -149
- pyxlpr/ppocr/losses/det_sast_loss.py +0 -121
- pyxlpr/ppocr/losses/distillation_loss.py +0 -272
- pyxlpr/ppocr/losses/e2e_pg_loss.py +0 -140
- pyxlpr/ppocr/losses/kie_sdmgr_loss.py +0 -113
- pyxlpr/ppocr/losses/rec_aster_loss.py +0 -99
- pyxlpr/ppocr/losses/rec_att_loss.py +0 -39
- pyxlpr/ppocr/losses/rec_ctc_loss.py +0 -44
- pyxlpr/ppocr/losses/rec_enhanced_ctc_loss.py +0 -70
- pyxlpr/ppocr/losses/rec_nrtr_loss.py +0 -30
- pyxlpr/ppocr/losses/rec_sar_loss.py +0 -28
- pyxlpr/ppocr/losses/rec_srn_loss.py +0 -47
- pyxlpr/ppocr/losses/table_att_loss.py +0 -109
- pyxlpr/ppocr/metrics/__init__.py +0 -44
- pyxlpr/ppocr/metrics/cls_metric.py +0 -45
- pyxlpr/ppocr/metrics/det_metric.py +0 -82
- pyxlpr/ppocr/metrics/distillation_metric.py +0 -73
- pyxlpr/ppocr/metrics/e2e_metric.py +0 -86
- pyxlpr/ppocr/metrics/eval_det_iou.py +0 -274
- pyxlpr/ppocr/metrics/kie_metric.py +0 -70
- pyxlpr/ppocr/metrics/rec_metric.py +0 -75
- pyxlpr/ppocr/metrics/table_metric.py +0 -50
- pyxlpr/ppocr/modeling/architectures/__init__.py +0 -32
- pyxlpr/ppocr/modeling/architectures/base_model.py +0 -88
- pyxlpr/ppocr/modeling/architectures/distillation_model.py +0 -60
- pyxlpr/ppocr/modeling/backbones/__init__.py +0 -54
- pyxlpr/ppocr/modeling/backbones/det_mobilenet_v3.py +0 -268
- pyxlpr/ppocr/modeling/backbones/det_resnet_vd.py +0 -246
- pyxlpr/ppocr/modeling/backbones/det_resnet_vd_sast.py +0 -285
- pyxlpr/ppocr/modeling/backbones/e2e_resnet_vd_pg.py +0 -265
- pyxlpr/ppocr/modeling/backbones/kie_unet_sdmgr.py +0 -186
- pyxlpr/ppocr/modeling/backbones/rec_mobilenet_v3.py +0 -138
- pyxlpr/ppocr/modeling/backbones/rec_mv1_enhance.py +0 -258
- pyxlpr/ppocr/modeling/backbones/rec_nrtr_mtb.py +0 -48
- pyxlpr/ppocr/modeling/backbones/rec_resnet_31.py +0 -210
- pyxlpr/ppocr/modeling/backbones/rec_resnet_aster.py +0 -143
- pyxlpr/ppocr/modeling/backbones/rec_resnet_fpn.py +0 -307
- pyxlpr/ppocr/modeling/backbones/rec_resnet_vd.py +0 -286
- pyxlpr/ppocr/modeling/heads/__init__.py +0 -54
- pyxlpr/ppocr/modeling/heads/cls_head.py +0 -52
- pyxlpr/ppocr/modeling/heads/det_db_head.py +0 -118
- pyxlpr/ppocr/modeling/heads/det_east_head.py +0 -121
- pyxlpr/ppocr/modeling/heads/det_pse_head.py +0 -37
- pyxlpr/ppocr/modeling/heads/det_sast_head.py +0 -128
- pyxlpr/ppocr/modeling/heads/e2e_pg_head.py +0 -253
- pyxlpr/ppocr/modeling/heads/kie_sdmgr_head.py +0 -206
- pyxlpr/ppocr/modeling/heads/multiheadAttention.py +0 -163
- pyxlpr/ppocr/modeling/heads/rec_aster_head.py +0 -393
- pyxlpr/ppocr/modeling/heads/rec_att_head.py +0 -202
- pyxlpr/ppocr/modeling/heads/rec_ctc_head.py +0 -88
- pyxlpr/ppocr/modeling/heads/rec_nrtr_head.py +0 -826
- pyxlpr/ppocr/modeling/heads/rec_sar_head.py +0 -402
- pyxlpr/ppocr/modeling/heads/rec_srn_head.py +0 -280
- pyxlpr/ppocr/modeling/heads/self_attention.py +0 -406
- pyxlpr/ppocr/modeling/heads/table_att_head.py +0 -246
- pyxlpr/ppocr/modeling/necks/__init__.py +0 -32
- pyxlpr/ppocr/modeling/necks/db_fpn.py +0 -111
- pyxlpr/ppocr/modeling/necks/east_fpn.py +0 -188
- pyxlpr/ppocr/modeling/necks/fpn.py +0 -138
- pyxlpr/ppocr/modeling/necks/pg_fpn.py +0 -314
- pyxlpr/ppocr/modeling/necks/rnn.py +0 -92
- pyxlpr/ppocr/modeling/necks/sast_fpn.py +0 -284
- pyxlpr/ppocr/modeling/necks/table_fpn.py +0 -110
- pyxlpr/ppocr/modeling/transforms/__init__.py +0 -28
- pyxlpr/ppocr/modeling/transforms/stn.py +0 -135
- pyxlpr/ppocr/modeling/transforms/tps.py +0 -308
- pyxlpr/ppocr/modeling/transforms/tps_spatial_transformer.py +0 -156
- pyxlpr/ppocr/optimizer/__init__.py +0 -61
- pyxlpr/ppocr/optimizer/learning_rate.py +0 -228
- pyxlpr/ppocr/optimizer/lr_scheduler.py +0 -49
- pyxlpr/ppocr/optimizer/optimizer.py +0 -160
- pyxlpr/ppocr/optimizer/regularizer.py +0 -52
- pyxlpr/ppocr/postprocess/__init__.py +0 -55
- pyxlpr/ppocr/postprocess/cls_postprocess.py +0 -33
- pyxlpr/ppocr/postprocess/db_postprocess.py +0 -234
- pyxlpr/ppocr/postprocess/east_postprocess.py +0 -143
- pyxlpr/ppocr/postprocess/locality_aware_nms.py +0 -200
- pyxlpr/ppocr/postprocess/pg_postprocess.py +0 -52
- pyxlpr/ppocr/postprocess/pse_postprocess/__init__.py +0 -15
- pyxlpr/ppocr/postprocess/pse_postprocess/pse/__init__.py +0 -29
- pyxlpr/ppocr/postprocess/pse_postprocess/pse/setup.py +0 -14
- pyxlpr/ppocr/postprocess/pse_postprocess/pse_postprocess.py +0 -118
- pyxlpr/ppocr/postprocess/rec_postprocess.py +0 -654
- pyxlpr/ppocr/postprocess/sast_postprocess.py +0 -355
- pyxlpr/ppocr/tools/__init__.py +0 -14
- pyxlpr/ppocr/tools/eval.py +0 -83
- pyxlpr/ppocr/tools/export_center.py +0 -77
- pyxlpr/ppocr/tools/export_model.py +0 -129
- pyxlpr/ppocr/tools/infer/predict_cls.py +0 -151
- pyxlpr/ppocr/tools/infer/predict_det.py +0 -300
- pyxlpr/ppocr/tools/infer/predict_e2e.py +0 -169
- pyxlpr/ppocr/tools/infer/predict_rec.py +0 -414
- pyxlpr/ppocr/tools/infer/predict_system.py +0 -204
- pyxlpr/ppocr/tools/infer/utility.py +0 -629
- pyxlpr/ppocr/tools/infer_cls.py +0 -83
- pyxlpr/ppocr/tools/infer_det.py +0 -134
- pyxlpr/ppocr/tools/infer_e2e.py +0 -122
- pyxlpr/ppocr/tools/infer_kie.py +0 -153
- pyxlpr/ppocr/tools/infer_rec.py +0 -146
- pyxlpr/ppocr/tools/infer_table.py +0 -107
- pyxlpr/ppocr/tools/program.py +0 -596
- pyxlpr/ppocr/tools/test_hubserving.py +0 -117
- pyxlpr/ppocr/tools/train.py +0 -163
- pyxlpr/ppocr/tools/xlprog.py +0 -748
- pyxlpr/ppocr/utils/EN_symbol_dict.txt +0 -94
- pyxlpr/ppocr/utils/__init__.py +0 -24
- pyxlpr/ppocr/utils/dict/ar_dict.txt +0 -117
- pyxlpr/ppocr/utils/dict/arabic_dict.txt +0 -162
- pyxlpr/ppocr/utils/dict/be_dict.txt +0 -145
- pyxlpr/ppocr/utils/dict/bg_dict.txt +0 -140
- pyxlpr/ppocr/utils/dict/chinese_cht_dict.txt +0 -8421
- pyxlpr/ppocr/utils/dict/cyrillic_dict.txt +0 -163
- pyxlpr/ppocr/utils/dict/devanagari_dict.txt +0 -167
- pyxlpr/ppocr/utils/dict/en_dict.txt +0 -63
- pyxlpr/ppocr/utils/dict/fa_dict.txt +0 -136
- pyxlpr/ppocr/utils/dict/french_dict.txt +0 -136
- pyxlpr/ppocr/utils/dict/german_dict.txt +0 -143
- pyxlpr/ppocr/utils/dict/hi_dict.txt +0 -162
- pyxlpr/ppocr/utils/dict/it_dict.txt +0 -118
- pyxlpr/ppocr/utils/dict/japan_dict.txt +0 -4399
- pyxlpr/ppocr/utils/dict/ka_dict.txt +0 -153
- pyxlpr/ppocr/utils/dict/korean_dict.txt +0 -3688
- pyxlpr/ppocr/utils/dict/latin_dict.txt +0 -185
- pyxlpr/ppocr/utils/dict/mr_dict.txt +0 -153
- pyxlpr/ppocr/utils/dict/ne_dict.txt +0 -153
- pyxlpr/ppocr/utils/dict/oc_dict.txt +0 -96
- pyxlpr/ppocr/utils/dict/pu_dict.txt +0 -130
- pyxlpr/ppocr/utils/dict/rs_dict.txt +0 -91
- pyxlpr/ppocr/utils/dict/rsc_dict.txt +0 -134
- pyxlpr/ppocr/utils/dict/ru_dict.txt +0 -125
- pyxlpr/ppocr/utils/dict/ta_dict.txt +0 -128
- pyxlpr/ppocr/utils/dict/table_dict.txt +0 -277
- pyxlpr/ppocr/utils/dict/table_structure_dict.txt +0 -2759
- pyxlpr/ppocr/utils/dict/te_dict.txt +0 -151
- pyxlpr/ppocr/utils/dict/ug_dict.txt +0 -114
- pyxlpr/ppocr/utils/dict/uk_dict.txt +0 -142
- pyxlpr/ppocr/utils/dict/ur_dict.txt +0 -137
- pyxlpr/ppocr/utils/dict/xi_dict.txt +0 -110
- pyxlpr/ppocr/utils/dict90.txt +0 -90
- pyxlpr/ppocr/utils/e2e_metric/Deteval.py +0 -574
- pyxlpr/ppocr/utils/e2e_metric/polygon_fast.py +0 -83
- pyxlpr/ppocr/utils/e2e_utils/extract_batchsize.py +0 -87
- pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_fast.py +0 -457
- pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_slow.py +0 -592
- pyxlpr/ppocr/utils/e2e_utils/pgnet_pp_utils.py +0 -162
- pyxlpr/ppocr/utils/e2e_utils/visual.py +0 -162
- pyxlpr/ppocr/utils/en_dict.txt +0 -95
- pyxlpr/ppocr/utils/gen_label.py +0 -81
- pyxlpr/ppocr/utils/ic15_dict.txt +0 -36
- pyxlpr/ppocr/utils/iou.py +0 -54
- pyxlpr/ppocr/utils/logging.py +0 -69
- pyxlpr/ppocr/utils/network.py +0 -84
- pyxlpr/ppocr/utils/ppocr_keys_v1.txt +0 -6623
- pyxlpr/ppocr/utils/profiler.py +0 -110
- pyxlpr/ppocr/utils/save_load.py +0 -150
- pyxlpr/ppocr/utils/stats.py +0 -72
- pyxlpr/ppocr/utils/utility.py +0 -80
- pyxlpr/ppstructure/__init__.py +0 -13
- pyxlpr/ppstructure/predict_system.py +0 -187
- pyxlpr/ppstructure/table/__init__.py +0 -13
- pyxlpr/ppstructure/table/eval_table.py +0 -72
- pyxlpr/ppstructure/table/matcher.py +0 -192
- pyxlpr/ppstructure/table/predict_structure.py +0 -136
- pyxlpr/ppstructure/table/predict_table.py +0 -221
- pyxlpr/ppstructure/table/table_metric/__init__.py +0 -16
- pyxlpr/ppstructure/table/table_metric/parallel.py +0 -51
- pyxlpr/ppstructure/table/table_metric/table_metric.py +0 -247
- pyxlpr/ppstructure/table/tablepyxl/__init__.py +0 -13
- pyxlpr/ppstructure/table/tablepyxl/style.py +0 -283
- pyxlpr/ppstructure/table/tablepyxl/tablepyxl.py +0 -118
- pyxlpr/ppstructure/utility.py +0 -71
- pyxlpr/xlai.py +0 -10
- /pyxllib/{ext/autogui → autogui}/virtualkey.py +0 -0
- {pyxllib-0.3.96.dist-info → pyxllib-0.3.197.dist-info/licenses}/LICENSE +0 -0
pyxlpr/ppocr/__init__.py
DELETED
@@ -1,15 +0,0 @@
|
|
1
|
-
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
|
15
|
-
from pathlib import Path
|
@@ -1,226 +0,0 @@
|
|
1
|
-
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
|
15
|
-
import yaml
|
16
|
-
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
17
|
-
import os.path
|
18
|
-
import logging
|
19
|
-
logging.basicConfig(level=logging.INFO)
|
20
|
-
|
21
|
-
support_list = {
|
22
|
-
'it': 'italian',
|
23
|
-
'xi': 'spanish',
|
24
|
-
'pu': 'portuguese',
|
25
|
-
'ru': 'russian',
|
26
|
-
'ar': 'arabic',
|
27
|
-
'ta': 'tamil',
|
28
|
-
'ug': 'uyghur',
|
29
|
-
'fa': 'persian',
|
30
|
-
'ur': 'urdu',
|
31
|
-
'rs': 'serbian latin',
|
32
|
-
'oc': 'occitan',
|
33
|
-
'rsc': 'serbian cyrillic',
|
34
|
-
'bg': 'bulgarian',
|
35
|
-
'uk': 'ukranian',
|
36
|
-
'be': 'belarusian',
|
37
|
-
'te': 'telugu',
|
38
|
-
'ka': 'kannada',
|
39
|
-
'chinese_cht': 'chinese tradition',
|
40
|
-
'hi': 'hindi',
|
41
|
-
'mr': 'marathi',
|
42
|
-
'ne': 'nepali',
|
43
|
-
}
|
44
|
-
|
45
|
-
latin_lang = [
|
46
|
-
'af', 'az', 'bs', 'cs', 'cy', 'da', 'de', 'es', 'et', 'fr', 'ga', 'hr',
|
47
|
-
'hu', 'id', 'is', 'it', 'ku', 'la', 'lt', 'lv', 'mi', 'ms', 'mt', 'nl',
|
48
|
-
'no', 'oc', 'pi', 'pl', 'pt', 'ro', 'rs_latin', 'sk', 'sl', 'sq', 'sv',
|
49
|
-
'sw', 'tl', 'tr', 'uz', 'vi', 'latin'
|
50
|
-
]
|
51
|
-
arabic_lang = ['ar', 'fa', 'ug', 'ur']
|
52
|
-
cyrillic_lang = [
|
53
|
-
'ru', 'rs_cyrillic', 'be', 'bg', 'uk', 'mn', 'abq', 'ady', 'kbd', 'ava',
|
54
|
-
'dar', 'inh', 'che', 'lbe', 'lez', 'tab', 'cyrillic'
|
55
|
-
]
|
56
|
-
devanagari_lang = [
|
57
|
-
'hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new', 'gom',
|
58
|
-
'sa', 'bgc', 'devanagari'
|
59
|
-
]
|
60
|
-
multi_lang = latin_lang + arabic_lang + cyrillic_lang + devanagari_lang
|
61
|
-
|
62
|
-
assert (os.path.isfile("./rec_multi_language_lite_train.yml")
|
63
|
-
), "Loss basic configuration file rec_multi_language_lite_train.yml.\
|
64
|
-
You can download it from \
|
65
|
-
https://github.com/PaddlePaddle/PaddleOCR/tree/dygraph/configs/rec/multi_language/"
|
66
|
-
|
67
|
-
global_config = yaml.load(
|
68
|
-
open("./rec_multi_language_lite_train.yml", 'rb'), Loader=yaml.Loader)
|
69
|
-
project_path = os.path.abspath(os.path.join(os.getcwd(), "../../../"))
|
70
|
-
|
71
|
-
|
72
|
-
class ArgsParser(ArgumentParser):
|
73
|
-
def __init__(self):
|
74
|
-
super(ArgsParser, self).__init__(
|
75
|
-
formatter_class=RawDescriptionHelpFormatter)
|
76
|
-
self.add_argument(
|
77
|
-
"-o", "--opt", nargs='+', help="set configuration options")
|
78
|
-
self.add_argument(
|
79
|
-
"-l",
|
80
|
-
"--language",
|
81
|
-
nargs='+',
|
82
|
-
help="set language type, support {}".format(support_list))
|
83
|
-
self.add_argument(
|
84
|
-
"--train",
|
85
|
-
type=str,
|
86
|
-
help="you can use this command to change the train dataset default path"
|
87
|
-
)
|
88
|
-
self.add_argument(
|
89
|
-
"--val",
|
90
|
-
type=str,
|
91
|
-
help="you can use this command to change the eval dataset default path"
|
92
|
-
)
|
93
|
-
self.add_argument(
|
94
|
-
"--dict",
|
95
|
-
type=str,
|
96
|
-
help="you can use this command to change the dictionary default path"
|
97
|
-
)
|
98
|
-
self.add_argument(
|
99
|
-
"--data_dir",
|
100
|
-
type=str,
|
101
|
-
help="you can use this command to change the dataset default root path"
|
102
|
-
)
|
103
|
-
|
104
|
-
def parse_args(self, argv=None):
|
105
|
-
args = super(ArgsParser, self).parse_args(argv)
|
106
|
-
args.opt = self._parse_opt(args.opt)
|
107
|
-
args.language = self._set_language(args.language)
|
108
|
-
return args
|
109
|
-
|
110
|
-
def _parse_opt(self, opts):
|
111
|
-
config = {}
|
112
|
-
if not opts:
|
113
|
-
return config
|
114
|
-
for s in opts:
|
115
|
-
s = s.strip()
|
116
|
-
k, v = s.split('=')
|
117
|
-
config[k] = yaml.load(v, Loader=yaml.Loader)
|
118
|
-
return config
|
119
|
-
|
120
|
-
def _set_language(self, type):
|
121
|
-
lang = type[0]
|
122
|
-
assert (type), "please use -l or --language to choose language type"
|
123
|
-
assert(
|
124
|
-
lang in support_list.keys() or lang in multi_lang
|
125
|
-
),"the sub_keys(-l or --language) can only be one of support list: \n{},\nbut get: {}, " \
|
126
|
-
"please check your running command".format(multi_lang, type)
|
127
|
-
if lang in latin_lang:
|
128
|
-
lang = "latin"
|
129
|
-
elif lang in arabic_lang:
|
130
|
-
lang = "arabic"
|
131
|
-
elif lang in cyrillic_lang:
|
132
|
-
lang = "cyrillic"
|
133
|
-
elif lang in devanagari_lang:
|
134
|
-
lang = "devanagari"
|
135
|
-
global_config['Global'][
|
136
|
-
'character_dict_path'] = 'ppocr/utils/dict/{}_dict.txt'.format(lang)
|
137
|
-
global_config['Global'][
|
138
|
-
'save_model_dir'] = './output/rec_{}_lite'.format(lang)
|
139
|
-
global_config['Train']['dataset'][
|
140
|
-
'label_file_list'] = ["train_data/{}_train.txt".format(lang)]
|
141
|
-
global_config['Eval']['dataset'][
|
142
|
-
'label_file_list'] = ["train_data/{}_val.txt".format(lang)]
|
143
|
-
global_config['Global']['character_type'] = lang
|
144
|
-
assert (
|
145
|
-
os.path.isfile(
|
146
|
-
os.path.join(project_path, global_config['Global'][
|
147
|
-
'character_dict_path']))
|
148
|
-
), "Loss default dictionary file {}_dict.txt.You can download it from \
|
149
|
-
https://github.com/PaddlePaddle/PaddleOCR/tree/dygraph/ppocr/utils/dict/".format(
|
150
|
-
lang)
|
151
|
-
return lang
|
152
|
-
|
153
|
-
|
154
|
-
def merge_config(config):
|
155
|
-
"""
|
156
|
-
Merge config into global config.
|
157
|
-
Args:
|
158
|
-
config (dict): Config to be merged.
|
159
|
-
Returns: global config
|
160
|
-
"""
|
161
|
-
for key, value in config.items():
|
162
|
-
if "." not in key:
|
163
|
-
if isinstance(value, dict) and key in global_config:
|
164
|
-
global_config[key].update(value)
|
165
|
-
else:
|
166
|
-
global_config[key] = value
|
167
|
-
else:
|
168
|
-
sub_keys = key.split('.')
|
169
|
-
assert (
|
170
|
-
sub_keys[0] in global_config
|
171
|
-
), "the sub_keys can only be one of global_config: {}, but get: {}, please check your running command".format(
|
172
|
-
global_config.keys(), sub_keys[0])
|
173
|
-
cur = global_config[sub_keys[0]]
|
174
|
-
for idx, sub_key in enumerate(sub_keys[1:]):
|
175
|
-
if idx == len(sub_keys) - 2:
|
176
|
-
cur[sub_key] = value
|
177
|
-
else:
|
178
|
-
cur = cur[sub_key]
|
179
|
-
|
180
|
-
|
181
|
-
def loss_file(path):
|
182
|
-
assert (
|
183
|
-
os.path.exists(path)
|
184
|
-
), "There is no such file:{},Please do not forget to put in the specified file".format(
|
185
|
-
path)
|
186
|
-
|
187
|
-
|
188
|
-
if __name__ == '__main__':
|
189
|
-
FLAGS = ArgsParser().parse_args()
|
190
|
-
merge_config(FLAGS.opt)
|
191
|
-
save_file_path = 'rec_{}_lite_train.yml'.format(FLAGS.language)
|
192
|
-
if os.path.isfile(save_file_path):
|
193
|
-
os.remove(save_file_path)
|
194
|
-
|
195
|
-
if FLAGS.train:
|
196
|
-
global_config['Train']['dataset']['label_file_list'] = [FLAGS.train]
|
197
|
-
train_label_path = os.path.join(project_path, FLAGS.train)
|
198
|
-
loss_file(train_label_path)
|
199
|
-
if FLAGS.val:
|
200
|
-
global_config['Eval']['dataset']['label_file_list'] = [FLAGS.val]
|
201
|
-
eval_label_path = os.path.join(project_path, FLAGS.val)
|
202
|
-
loss_file(eval_label_path)
|
203
|
-
if FLAGS.dict:
|
204
|
-
global_config['Global']['character_dict_path'] = FLAGS.dict
|
205
|
-
dict_path = os.path.join(project_path, FLAGS.dict)
|
206
|
-
loss_file(dict_path)
|
207
|
-
if FLAGS.data_dir:
|
208
|
-
global_config['Eval']['dataset']['data_dir'] = FLAGS.data_dir
|
209
|
-
global_config['Train']['dataset']['data_dir'] = FLAGS.data_dir
|
210
|
-
data_dir = os.path.join(project_path, FLAGS.data_dir)
|
211
|
-
loss_file(data_dir)
|
212
|
-
|
213
|
-
with open(save_file_path, 'w') as f:
|
214
|
-
yaml.dump(
|
215
|
-
dict(global_config), f, default_flow_style=False, sort_keys=False)
|
216
|
-
logging.info("Project path is :{}".format(project_path))
|
217
|
-
logging.info("Train list path set to :{}".format(global_config['Train'][
|
218
|
-
'dataset']['label_file_list'][0]))
|
219
|
-
logging.info("Eval list path set to :{}".format(global_config['Eval'][
|
220
|
-
'dataset']['label_file_list'][0]))
|
221
|
-
logging.info("Dataset root path set to :{}".format(global_config['Eval'][
|
222
|
-
'dataset']['data_dir']))
|
223
|
-
logging.info("Dict path set to :{}".format(global_config['Global'][
|
224
|
-
'character_dict_path']))
|
225
|
-
logging.info("Config file set to :configs/rec/multi_language/{}".
|
226
|
-
format(save_file_path))
|
pyxlpr/ppocr/data/__init__.py
DELETED
@@ -1,135 +0,0 @@
|
|
1
|
-
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
|
15
|
-
from __future__ import absolute_import
|
16
|
-
from __future__ import division
|
17
|
-
from __future__ import print_function
|
18
|
-
from __future__ import unicode_literals
|
19
|
-
|
20
|
-
import os
|
21
|
-
import sys
|
22
|
-
import numpy as np
|
23
|
-
import paddle
|
24
|
-
import signal
|
25
|
-
import random
|
26
|
-
|
27
|
-
__dir__ = os.path.dirname(os.path.abspath(__file__))
|
28
|
-
sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
|
29
|
-
|
30
|
-
import copy
|
31
|
-
from paddle.io import Dataset, DataLoader, BatchSampler, DistributedBatchSampler
|
32
|
-
import paddle.distributed as dist
|
33
|
-
|
34
|
-
from pyxlpr.ppocr.data.imaug import transform, create_operators
|
35
|
-
# 可以在ppocr/data目录下新增脚本,添加自己的数据格式
|
36
|
-
# 个人想法,到时候新增的数据格式类,都统一放到一个文件xl_dataset里,方便整理和分享
|
37
|
-
from pyxlpr.ppocr.data.simple_dataset import * # 这里扩展了一些自己的基础数据格式
|
38
|
-
from pyxlpr.ppocr.data.lmdb_dataset import LMDBDataSet
|
39
|
-
from pyxlpr.ppocr.data.pgnet_dataset import PGDataSet
|
40
|
-
from pyxlpr.ppocr.data.pubtab_dataset import PubTabDataSet
|
41
|
-
|
42
|
-
# 或者不在ppocr.data这里加也可以,重点是能导入特定接口范式的类,让这里eval能取到即可。
|
43
|
-
|
44
|
-
__all__ = ['build_dataloader', 'transform', 'create_operators']
|
45
|
-
|
46
|
-
|
47
|
-
def term_mp(sig_num, frame):
|
48
|
-
""" kill all child processes
|
49
|
-
"""
|
50
|
-
pid = os.getpid()
|
51
|
-
pgid = os.getpgid(os.getpid())
|
52
|
-
print("main proc {} exit, kill process group " "{}".format(pid, pgid))
|
53
|
-
os.killpg(pgid, signal.SIGKILL)
|
54
|
-
|
55
|
-
|
56
|
-
def build_dataloader(config, mode, device, logger, seed=None):
|
57
|
-
config = copy.deepcopy(config)
|
58
|
-
|
59
|
-
support_dict = [
|
60
|
-
'SimpleDataSet', 'LMDBDataSet', 'PGDataSet', 'PubTabDataSet', 'XlSimpleDataSet'
|
61
|
-
]
|
62
|
-
module_name = config[mode]['dataset']['name']
|
63
|
-
assert module_name in support_dict, Exception(
|
64
|
-
'DataSet only support {}'.format(support_dict))
|
65
|
-
# 这里eval没有安全隐患,因为会提前判断module_name要属于support_dict中的值
|
66
|
-
assert mode in ['Train', 'Eval', 'Test'
|
67
|
-
], "Mode should be Train, Eval or Test."
|
68
|
-
|
69
|
-
# 自定义数据格式类,除了初始化,似乎没看到还有其他框架性的约束
|
70
|
-
dataset = eval(module_name)(config, mode, logger, seed)
|
71
|
-
loader_config = config[mode]['loader']
|
72
|
-
# loader的必填参数
|
73
|
-
batch_size = loader_config['batch_size_per_card']
|
74
|
-
drop_last = loader_config['drop_last']
|
75
|
-
shuffle = loader_config['shuffle']
|
76
|
-
num_workers = loader_config['num_workers']
|
77
|
-
# 可选参数
|
78
|
-
# 注意这里实现机制策略与d2的不同,d2习惯是把这个参数加入到初始默认配置字典中,而ppocr是在函数运算中智能判断。
|
79
|
-
if 'use_shared_memory' in loader_config.keys():
|
80
|
-
use_shared_memory = loader_config['use_shared_memory']
|
81
|
-
else:
|
82
|
-
# use_shared_memory (bool) - 是否使用共享内存来提升子进程将数据放入进程间队列的速度,
|
83
|
-
# 该参数尽在多进程模式下有效(即 num_workers > 0 ),
|
84
|
-
# 请确认机器上有足够的共享内存空间(如Linux系统下 /dev/shm/ 目录空间大小)再设置此参数。
|
85
|
-
# 默认为True。
|
86
|
-
use_shared_memory = True
|
87
|
-
if mode == "Train":
|
88
|
-
# Train会用多卡机制分配BatchSampler,当然,如果外部设置了单卡也可以,单卡是特殊的多卡机制。
|
89
|
-
# Distribute data to multiple cards
|
90
|
-
batch_sampler = DistributedBatchSampler(
|
91
|
-
dataset=dataset,
|
92
|
-
batch_size=batch_size,
|
93
|
-
shuffle=shuffle,
|
94
|
-
drop_last=drop_last)
|
95
|
-
else:
|
96
|
-
# 非Train阶段,强制使用单卡处理。
|
97
|
-
# Distribute data to single card
|
98
|
-
batch_sampler = BatchSampler(
|
99
|
-
dataset=dataset,
|
100
|
-
batch_size=batch_size,
|
101
|
-
shuffle=shuffle,
|
102
|
-
drop_last=drop_last)
|
103
|
-
|
104
|
-
data_loader = DataLoader(
|
105
|
-
dataset=dataset,
|
106
|
-
batch_sampler=batch_sampler,
|
107
|
-
places=device,
|
108
|
-
num_workers=num_workers,
|
109
|
-
return_list=True,
|
110
|
-
use_shared_memory=use_shared_memory)
|
111
|
-
|
112
|
-
# 看到用了signal库,好奇~~ https://www.jianshu.com/p/e0a69beb98bb
|
113
|
-
# support exit using ctrl+c
|
114
|
-
signal.signal(signal.SIGINT, term_mp)
|
115
|
-
signal.signal(signal.SIGTERM, term_mp)
|
116
|
-
|
117
|
-
return data_loader
|
118
|
-
|
119
|
-
|
120
|
-
def build_dataset(config, mode, logger, seed=None):
|
121
|
-
""" ckz: 有时候不需要获得loader,只要dataset即可 """
|
122
|
-
config = copy.deepcopy(config)
|
123
|
-
|
124
|
-
support_dict = [
|
125
|
-
'SimpleDataSet', 'LMDBDataSet', 'PGDataSet', 'PubTabDataSet', 'XlSimpleDataSet'
|
126
|
-
]
|
127
|
-
module_name = config[mode]['dataset']['name']
|
128
|
-
assert module_name in support_dict, Exception(
|
129
|
-
'DataSet only support {}'.format(support_dict))
|
130
|
-
assert mode in ['Train', 'Eval', 'Test'
|
131
|
-
], "Mode should be Train, Eval or Test."
|
132
|
-
|
133
|
-
dataset = eval(module_name)(config, mode, logger, seed)
|
134
|
-
|
135
|
-
return dataset
|
@@ -1,26 +0,0 @@
|
|
1
|
-
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
from paddle.vision.transforms import ColorJitter as pp_ColorJitter
|
15
|
-
|
16
|
-
__all__ = ['ColorJitter']
|
17
|
-
|
18
|
-
class ColorJitter(object):
|
19
|
-
def __init__(self, brightness=0, contrast=0, saturation=0, hue=0,**kwargs):
|
20
|
-
self.aug = pp_ColorJitter(brightness, contrast, saturation, hue)
|
21
|
-
|
22
|
-
def __call__(self, data):
|
23
|
-
image = data['image']
|
24
|
-
image = self.aug(image)
|
25
|
-
data['image'] = image
|
26
|
-
return data
|
@@ -1,67 +0,0 @@
|
|
1
|
-
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
from __future__ import absolute_import
|
15
|
-
from __future__ import division
|
16
|
-
from __future__ import print_function
|
17
|
-
from __future__ import unicode_literals
|
18
|
-
|
19
|
-
from .iaa_augment import IaaAugment
|
20
|
-
from .make_border_map import MakeBorderMap
|
21
|
-
from .make_shrink_map import MakeShrinkMap
|
22
|
-
from .random_crop_data import EastRandomCropData, RandomCropImgMask
|
23
|
-
from .make_pse_gt import MakePseGt
|
24
|
-
|
25
|
-
from .rec_img_aug import RecAug, RecResizeImg, ClsResizeImg, SRNRecResizeImg, NRTRRecResizeImg, SARRecResizeImg
|
26
|
-
from .randaugment import RandAugment
|
27
|
-
from .copy_paste import CopyPaste
|
28
|
-
from .ColorJitter import ColorJitter
|
29
|
-
from .operators import * # 操作器的定义方式,可以参考这里的DecodeImage实现
|
30
|
-
from .label_ops import *
|
31
|
-
|
32
|
-
from .east_process import *
|
33
|
-
from .sast_process import *
|
34
|
-
from .pg_process import *
|
35
|
-
from .gen_table_mask import *
|
36
|
-
|
37
|
-
|
38
|
-
def transform(data, ops=None):
|
39
|
-
""" transform """
|
40
|
-
if ops is None:
|
41
|
-
ops = []
|
42
|
-
for op in ops:
|
43
|
-
data = op(data)
|
44
|
-
if data is None:
|
45
|
-
return None
|
46
|
-
return data
|
47
|
-
|
48
|
-
|
49
|
-
def create_operators(op_param_list, global_config=None):
|
50
|
-
"""
|
51
|
-
create operators based on the config
|
52
|
-
|
53
|
-
Args:
|
54
|
-
params(list): a dict list, used to create some operators
|
55
|
-
"""
|
56
|
-
assert isinstance(op_param_list, list), ('operator config should be a list')
|
57
|
-
ops = []
|
58
|
-
for operator in op_param_list:
|
59
|
-
assert isinstance(operator,
|
60
|
-
dict) and len(operator) == 1, "yaml format error"
|
61
|
-
op_name = list(operator)[0]
|
62
|
-
param = {} if operator[op_name] is None else operator[op_name]
|
63
|
-
if global_config is not None:
|
64
|
-
param.update(global_config)
|
65
|
-
op = eval(op_name)(**param)
|
66
|
-
ops.append(op)
|
67
|
-
return ops
|
@@ -1,170 +0,0 @@
|
|
1
|
-
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
import copy
|
15
|
-
import cv2
|
16
|
-
import random
|
17
|
-
import numpy as np
|
18
|
-
from PIL import Image
|
19
|
-
from shapely.geometry import Polygon
|
20
|
-
|
21
|
-
from pyxlpr.ppocr.data.imaug.iaa_augment import IaaAugment
|
22
|
-
from pyxlpr.ppocr.data.imaug.random_crop_data import is_poly_outside_rect
|
23
|
-
from pyxlpr.ppocr.tools.infer.utility import get_rotate_crop_image
|
24
|
-
|
25
|
-
|
26
|
-
class CopyPaste(object):
|
27
|
-
def __init__(self, objects_paste_ratio=0.2, limit_paste=True, **kwargs):
|
28
|
-
self.ext_data_num = 1
|
29
|
-
self.objects_paste_ratio = objects_paste_ratio
|
30
|
-
self.limit_paste = limit_paste
|
31
|
-
augmenter_args = [{'type': 'Resize', 'args': {'size': [0.5, 3]}}]
|
32
|
-
self.aug = IaaAugment(augmenter_args)
|
33
|
-
|
34
|
-
def __call__(self, data):
|
35
|
-
point_num = data['polys'].shape[1]
|
36
|
-
src_img = data['image']
|
37
|
-
src_polys = data['polys'].tolist()
|
38
|
-
src_ignores = data['ignore_tags'].tolist()
|
39
|
-
ext_data = data['ext_data'][0]
|
40
|
-
ext_image = ext_data['image']
|
41
|
-
ext_polys = ext_data['polys']
|
42
|
-
ext_ignores = ext_data['ignore_tags']
|
43
|
-
|
44
|
-
indexs = [i for i in range(len(ext_ignores)) if not ext_ignores[i]]
|
45
|
-
select_num = max(
|
46
|
-
1, min(int(self.objects_paste_ratio * len(ext_polys)), 30))
|
47
|
-
|
48
|
-
random.shuffle(indexs)
|
49
|
-
select_idxs = indexs[:select_num]
|
50
|
-
select_polys = ext_polys[select_idxs]
|
51
|
-
select_ignores = ext_ignores[select_idxs]
|
52
|
-
|
53
|
-
src_img = cv2.cvtColor(src_img, cv2.COLOR_BGR2RGB)
|
54
|
-
ext_image = cv2.cvtColor(ext_image, cv2.COLOR_BGR2RGB)
|
55
|
-
src_img = Image.fromarray(src_img).convert('RGBA')
|
56
|
-
for poly, tag in zip(select_polys, select_ignores):
|
57
|
-
box_img = get_rotate_crop_image(ext_image, poly)
|
58
|
-
|
59
|
-
src_img, box = self.paste_img(src_img, box_img, src_polys)
|
60
|
-
if box is not None:
|
61
|
-
box = box.tolist()
|
62
|
-
for _ in range(len(box), point_num):
|
63
|
-
box.append(box[-1])
|
64
|
-
src_polys.append(box)
|
65
|
-
src_ignores.append(tag)
|
66
|
-
src_img = cv2.cvtColor(np.array(src_img), cv2.COLOR_RGB2BGR)
|
67
|
-
h, w = src_img.shape[:2]
|
68
|
-
src_polys = np.array(src_polys)
|
69
|
-
src_polys[:, :, 0] = np.clip(src_polys[:, :, 0], 0, w)
|
70
|
-
src_polys[:, :, 1] = np.clip(src_polys[:, :, 1], 0, h)
|
71
|
-
data['image'] = src_img
|
72
|
-
data['polys'] = src_polys
|
73
|
-
data['ignore_tags'] = np.array(src_ignores)
|
74
|
-
return data
|
75
|
-
|
76
|
-
def paste_img(self, src_img, box_img, src_polys):
|
77
|
-
box_img_pil = Image.fromarray(box_img).convert('RGBA')
|
78
|
-
src_w, src_h = src_img.size
|
79
|
-
box_w, box_h = box_img_pil.size
|
80
|
-
|
81
|
-
angle = np.random.randint(0, 360)
|
82
|
-
box = np.array([[[0, 0], [box_w, 0], [box_w, box_h], [0, box_h]]])
|
83
|
-
box = rotate_bbox(box_img, box, angle)[0]
|
84
|
-
box_img_pil = box_img_pil.rotate(angle, expand=1)
|
85
|
-
box_w, box_h = box_img_pil.width, box_img_pil.height
|
86
|
-
if src_w - box_w < 0 or src_h - box_h < 0:
|
87
|
-
return src_img, None
|
88
|
-
|
89
|
-
paste_x, paste_y = self.select_coord(src_polys, box, src_w - box_w,
|
90
|
-
src_h - box_h)
|
91
|
-
if paste_x is None:
|
92
|
-
return src_img, None
|
93
|
-
box[:, 0] += paste_x
|
94
|
-
box[:, 1] += paste_y
|
95
|
-
r, g, b, A = box_img_pil.split()
|
96
|
-
src_img.paste(box_img_pil, (paste_x, paste_y), mask=A)
|
97
|
-
|
98
|
-
return src_img, box
|
99
|
-
|
100
|
-
def select_coord(self, src_polys, box, endx, endy):
|
101
|
-
if self.limit_paste:
|
102
|
-
xmin, ymin, xmax, ymax = box[:, 0].min(), box[:, 1].min(
|
103
|
-
), box[:, 0].max(), box[:, 1].max()
|
104
|
-
for _ in range(50):
|
105
|
-
paste_x = random.randint(0, endx)
|
106
|
-
paste_y = random.randint(0, endy)
|
107
|
-
xmin1 = xmin + paste_x
|
108
|
-
xmax1 = xmax + paste_x
|
109
|
-
ymin1 = ymin + paste_y
|
110
|
-
ymax1 = ymax + paste_y
|
111
|
-
|
112
|
-
num_poly_in_rect = 0
|
113
|
-
for poly in src_polys:
|
114
|
-
if not is_poly_outside_rect(poly, xmin1, ymin1,
|
115
|
-
xmax1 - xmin1, ymax1 - ymin1):
|
116
|
-
num_poly_in_rect += 1
|
117
|
-
break
|
118
|
-
if num_poly_in_rect == 0:
|
119
|
-
return paste_x, paste_y
|
120
|
-
return None, None
|
121
|
-
else:
|
122
|
-
paste_x = random.randint(0, endx)
|
123
|
-
paste_y = random.randint(0, endy)
|
124
|
-
return paste_x, paste_y
|
125
|
-
|
126
|
-
|
127
|
-
def get_union(pD, pG):
|
128
|
-
return Polygon(pD).union(Polygon(pG)).area
|
129
|
-
|
130
|
-
|
131
|
-
def get_intersection_over_union(pD, pG):
|
132
|
-
return get_intersection(pD, pG) / get_union(pD, pG)
|
133
|
-
|
134
|
-
|
135
|
-
def get_intersection(pD, pG):
|
136
|
-
return Polygon(pD).intersection(Polygon(pG)).area
|
137
|
-
|
138
|
-
|
139
|
-
def rotate_bbox(img, text_polys, angle, scale=1):
|
140
|
-
"""
|
141
|
-
from https://github.com/WenmuZhou/DBNet.pytorch/blob/master/data_loader/modules/augment.py
|
142
|
-
Args:
|
143
|
-
img: np.ndarray
|
144
|
-
text_polys: np.ndarray N*4*2
|
145
|
-
angle: int
|
146
|
-
scale: int
|
147
|
-
|
148
|
-
Returns:
|
149
|
-
|
150
|
-
"""
|
151
|
-
w = img.shape[1]
|
152
|
-
h = img.shape[0]
|
153
|
-
|
154
|
-
rangle = np.deg2rad(angle)
|
155
|
-
nw = (abs(np.sin(rangle) * h) + abs(np.cos(rangle) * w))
|
156
|
-
nh = (abs(np.cos(rangle) * h) + abs(np.sin(rangle) * w))
|
157
|
-
rot_mat = cv2.getRotationMatrix2D((nw * 0.5, nh * 0.5), angle, scale)
|
158
|
-
rot_move = np.dot(rot_mat, np.array([(nw - w) * 0.5, (nh - h) * 0.5, 0]))
|
159
|
-
rot_mat[0, 2] += rot_move[0]
|
160
|
-
rot_mat[1, 2] += rot_move[1]
|
161
|
-
|
162
|
-
# ---------------------- rotate box ----------------------
|
163
|
-
rot_text_polys = list()
|
164
|
-
for bbox in text_polys:
|
165
|
-
point1 = np.dot(rot_mat, np.array([bbox[0, 0], bbox[0, 1], 1]))
|
166
|
-
point2 = np.dot(rot_mat, np.array([bbox[1, 0], bbox[1, 1], 1]))
|
167
|
-
point3 = np.dot(rot_mat, np.array([bbox[2, 0], bbox[2, 1], 1]))
|
168
|
-
point4 = np.dot(rot_mat, np.array([bbox[3, 0], bbox[3, 1], 1]))
|
169
|
-
rot_text_polys.append([point1, point2, point3, point4])
|
170
|
-
return np.array(rot_text_polys, dtype=np.float32)
|