pyxllib 0.3.96__py3-none-any.whl → 0.3.197__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyxllib/algo/geo.py +12 -0
- pyxllib/algo/intervals.py +1 -1
- pyxllib/algo/matcher.py +78 -0
- pyxllib/algo/pupil.py +187 -19
- pyxllib/algo/specialist.py +2 -1
- pyxllib/algo/stat.py +38 -2
- {pyxlpr → pyxllib/autogui}/__init__.py +1 -1
- pyxllib/autogui/activewin.py +246 -0
- pyxllib/autogui/all.py +9 -0
- pyxllib/{ext/autogui → autogui}/autogui.py +40 -11
- pyxllib/autogui/uiautolib.py +362 -0
- pyxllib/autogui/wechat.py +827 -0
- pyxllib/autogui/wechat_msg.py +421 -0
- pyxllib/autogui/wxautolib.py +84 -0
- pyxllib/cv/slidercaptcha.py +137 -0
- pyxllib/data/echarts.py +123 -12
- pyxllib/data/jsonlib.py +89 -0
- pyxllib/data/pglib.py +514 -30
- pyxllib/data/sqlite.py +231 -4
- pyxllib/ext/JLineViewer.py +14 -1
- pyxllib/ext/drissionlib.py +277 -0
- pyxllib/ext/kq5034lib.py +0 -1594
- pyxllib/ext/robustprocfile.py +497 -0
- pyxllib/ext/unixlib.py +6 -5
- pyxllib/ext/utools.py +108 -95
- pyxllib/ext/webhook.py +32 -14
- pyxllib/ext/wjxlib.py +88 -0
- pyxllib/ext/wpsapi.py +124 -0
- pyxllib/ext/xlwork.py +9 -0
- pyxllib/ext/yuquelib.py +1003 -71
- pyxllib/file/docxlib.py +1 -1
- pyxllib/file/libreoffice.py +165 -0
- pyxllib/file/movielib.py +9 -0
- pyxllib/file/packlib/__init__.py +112 -75
- pyxllib/file/pdflib.py +1 -1
- pyxllib/file/pupil.py +1 -1
- pyxllib/file/specialist/dirlib.py +1 -1
- pyxllib/file/specialist/download.py +10 -3
- pyxllib/file/specialist/filelib.py +266 -55
- pyxllib/file/xlsxlib.py +205 -50
- pyxllib/file/xlsyncfile.py +341 -0
- pyxllib/prog/cachetools.py +64 -0
- pyxllib/prog/filelock.py +42 -0
- pyxllib/prog/multiprogs.py +940 -0
- pyxllib/prog/newbie.py +9 -2
- pyxllib/prog/pupil.py +129 -60
- pyxllib/prog/specialist/__init__.py +176 -2
- pyxllib/prog/specialist/bc.py +5 -2
- pyxllib/prog/specialist/browser.py +11 -2
- pyxllib/prog/specialist/datetime.py +68 -0
- pyxllib/prog/specialist/tictoc.py +12 -13
- pyxllib/prog/specialist/xllog.py +5 -5
- pyxllib/prog/xlosenv.py +7 -0
- pyxllib/text/airscript.js +744 -0
- pyxllib/text/charclasslib.py +17 -5
- pyxllib/text/jiebalib.py +6 -3
- pyxllib/text/jinjalib.py +32 -0
- pyxllib/text/jsa_ai_prompt.md +271 -0
- pyxllib/text/jscode.py +159 -4
- pyxllib/text/nestenv.py +1 -1
- pyxllib/text/newbie.py +12 -0
- pyxllib/text/pupil/common.py +26 -0
- pyxllib/text/specialist/ptag.py +2 -2
- pyxllib/text/templates/echart_base.html +11 -0
- pyxllib/text/templates/highlight_code.html +17 -0
- pyxllib/text/templates/latex_editor.html +103 -0
- pyxllib/text/xmllib.py +76 -14
- pyxllib/xl.py +2 -1
- pyxllib-0.3.197.dist-info/METADATA +48 -0
- pyxllib-0.3.197.dist-info/RECORD +126 -0
- {pyxllib-0.3.96.dist-info → pyxllib-0.3.197.dist-info}/WHEEL +1 -2
- pyxllib/ext/autogui/__init__.py +0 -8
- pyxllib-0.3.96.dist-info/METADATA +0 -51
- pyxllib-0.3.96.dist-info/RECORD +0 -333
- pyxllib-0.3.96.dist-info/top_level.txt +0 -2
- pyxlpr/ai/__init__.py +0 -5
- pyxlpr/ai/clientlib.py +0 -1281
- pyxlpr/ai/specialist.py +0 -286
- pyxlpr/ai/torch_app.py +0 -172
- pyxlpr/ai/xlpaddle.py +0 -655
- pyxlpr/ai/xltorch.py +0 -705
- pyxlpr/data/__init__.py +0 -11
- pyxlpr/data/coco.py +0 -1325
- pyxlpr/data/datacls.py +0 -365
- pyxlpr/data/datasets.py +0 -200
- pyxlpr/data/gptlib.py +0 -1291
- pyxlpr/data/icdar/__init__.py +0 -96
- pyxlpr/data/icdar/deteval.py +0 -377
- pyxlpr/data/icdar/icdar2013.py +0 -341
- pyxlpr/data/icdar/iou.py +0 -340
- pyxlpr/data/icdar/rrc_evaluation_funcs_1_1.py +0 -463
- pyxlpr/data/imtextline.py +0 -473
- pyxlpr/data/labelme.py +0 -866
- pyxlpr/data/removeline.py +0 -179
- pyxlpr/data/specialist.py +0 -57
- pyxlpr/eval/__init__.py +0 -85
- pyxlpr/paddleocr.py +0 -776
- pyxlpr/ppocr/__init__.py +0 -15
- pyxlpr/ppocr/configs/rec/multi_language/generate_multi_language_configs.py +0 -226
- pyxlpr/ppocr/data/__init__.py +0 -135
- pyxlpr/ppocr/data/imaug/ColorJitter.py +0 -26
- pyxlpr/ppocr/data/imaug/__init__.py +0 -67
- pyxlpr/ppocr/data/imaug/copy_paste.py +0 -170
- pyxlpr/ppocr/data/imaug/east_process.py +0 -437
- pyxlpr/ppocr/data/imaug/gen_table_mask.py +0 -244
- pyxlpr/ppocr/data/imaug/iaa_augment.py +0 -114
- pyxlpr/ppocr/data/imaug/label_ops.py +0 -789
- pyxlpr/ppocr/data/imaug/make_border_map.py +0 -184
- pyxlpr/ppocr/data/imaug/make_pse_gt.py +0 -106
- pyxlpr/ppocr/data/imaug/make_shrink_map.py +0 -126
- pyxlpr/ppocr/data/imaug/operators.py +0 -433
- pyxlpr/ppocr/data/imaug/pg_process.py +0 -906
- pyxlpr/ppocr/data/imaug/randaugment.py +0 -143
- pyxlpr/ppocr/data/imaug/random_crop_data.py +0 -239
- pyxlpr/ppocr/data/imaug/rec_img_aug.py +0 -533
- pyxlpr/ppocr/data/imaug/sast_process.py +0 -777
- pyxlpr/ppocr/data/imaug/text_image_aug/__init__.py +0 -17
- pyxlpr/ppocr/data/imaug/text_image_aug/augment.py +0 -120
- pyxlpr/ppocr/data/imaug/text_image_aug/warp_mls.py +0 -168
- pyxlpr/ppocr/data/lmdb_dataset.py +0 -115
- pyxlpr/ppocr/data/pgnet_dataset.py +0 -104
- pyxlpr/ppocr/data/pubtab_dataset.py +0 -107
- pyxlpr/ppocr/data/simple_dataset.py +0 -372
- pyxlpr/ppocr/losses/__init__.py +0 -61
- pyxlpr/ppocr/losses/ace_loss.py +0 -52
- pyxlpr/ppocr/losses/basic_loss.py +0 -135
- pyxlpr/ppocr/losses/center_loss.py +0 -88
- pyxlpr/ppocr/losses/cls_loss.py +0 -30
- pyxlpr/ppocr/losses/combined_loss.py +0 -67
- pyxlpr/ppocr/losses/det_basic_loss.py +0 -208
- pyxlpr/ppocr/losses/det_db_loss.py +0 -80
- pyxlpr/ppocr/losses/det_east_loss.py +0 -63
- pyxlpr/ppocr/losses/det_pse_loss.py +0 -149
- pyxlpr/ppocr/losses/det_sast_loss.py +0 -121
- pyxlpr/ppocr/losses/distillation_loss.py +0 -272
- pyxlpr/ppocr/losses/e2e_pg_loss.py +0 -140
- pyxlpr/ppocr/losses/kie_sdmgr_loss.py +0 -113
- pyxlpr/ppocr/losses/rec_aster_loss.py +0 -99
- pyxlpr/ppocr/losses/rec_att_loss.py +0 -39
- pyxlpr/ppocr/losses/rec_ctc_loss.py +0 -44
- pyxlpr/ppocr/losses/rec_enhanced_ctc_loss.py +0 -70
- pyxlpr/ppocr/losses/rec_nrtr_loss.py +0 -30
- pyxlpr/ppocr/losses/rec_sar_loss.py +0 -28
- pyxlpr/ppocr/losses/rec_srn_loss.py +0 -47
- pyxlpr/ppocr/losses/table_att_loss.py +0 -109
- pyxlpr/ppocr/metrics/__init__.py +0 -44
- pyxlpr/ppocr/metrics/cls_metric.py +0 -45
- pyxlpr/ppocr/metrics/det_metric.py +0 -82
- pyxlpr/ppocr/metrics/distillation_metric.py +0 -73
- pyxlpr/ppocr/metrics/e2e_metric.py +0 -86
- pyxlpr/ppocr/metrics/eval_det_iou.py +0 -274
- pyxlpr/ppocr/metrics/kie_metric.py +0 -70
- pyxlpr/ppocr/metrics/rec_metric.py +0 -75
- pyxlpr/ppocr/metrics/table_metric.py +0 -50
- pyxlpr/ppocr/modeling/architectures/__init__.py +0 -32
- pyxlpr/ppocr/modeling/architectures/base_model.py +0 -88
- pyxlpr/ppocr/modeling/architectures/distillation_model.py +0 -60
- pyxlpr/ppocr/modeling/backbones/__init__.py +0 -54
- pyxlpr/ppocr/modeling/backbones/det_mobilenet_v3.py +0 -268
- pyxlpr/ppocr/modeling/backbones/det_resnet_vd.py +0 -246
- pyxlpr/ppocr/modeling/backbones/det_resnet_vd_sast.py +0 -285
- pyxlpr/ppocr/modeling/backbones/e2e_resnet_vd_pg.py +0 -265
- pyxlpr/ppocr/modeling/backbones/kie_unet_sdmgr.py +0 -186
- pyxlpr/ppocr/modeling/backbones/rec_mobilenet_v3.py +0 -138
- pyxlpr/ppocr/modeling/backbones/rec_mv1_enhance.py +0 -258
- pyxlpr/ppocr/modeling/backbones/rec_nrtr_mtb.py +0 -48
- pyxlpr/ppocr/modeling/backbones/rec_resnet_31.py +0 -210
- pyxlpr/ppocr/modeling/backbones/rec_resnet_aster.py +0 -143
- pyxlpr/ppocr/modeling/backbones/rec_resnet_fpn.py +0 -307
- pyxlpr/ppocr/modeling/backbones/rec_resnet_vd.py +0 -286
- pyxlpr/ppocr/modeling/heads/__init__.py +0 -54
- pyxlpr/ppocr/modeling/heads/cls_head.py +0 -52
- pyxlpr/ppocr/modeling/heads/det_db_head.py +0 -118
- pyxlpr/ppocr/modeling/heads/det_east_head.py +0 -121
- pyxlpr/ppocr/modeling/heads/det_pse_head.py +0 -37
- pyxlpr/ppocr/modeling/heads/det_sast_head.py +0 -128
- pyxlpr/ppocr/modeling/heads/e2e_pg_head.py +0 -253
- pyxlpr/ppocr/modeling/heads/kie_sdmgr_head.py +0 -206
- pyxlpr/ppocr/modeling/heads/multiheadAttention.py +0 -163
- pyxlpr/ppocr/modeling/heads/rec_aster_head.py +0 -393
- pyxlpr/ppocr/modeling/heads/rec_att_head.py +0 -202
- pyxlpr/ppocr/modeling/heads/rec_ctc_head.py +0 -88
- pyxlpr/ppocr/modeling/heads/rec_nrtr_head.py +0 -826
- pyxlpr/ppocr/modeling/heads/rec_sar_head.py +0 -402
- pyxlpr/ppocr/modeling/heads/rec_srn_head.py +0 -280
- pyxlpr/ppocr/modeling/heads/self_attention.py +0 -406
- pyxlpr/ppocr/modeling/heads/table_att_head.py +0 -246
- pyxlpr/ppocr/modeling/necks/__init__.py +0 -32
- pyxlpr/ppocr/modeling/necks/db_fpn.py +0 -111
- pyxlpr/ppocr/modeling/necks/east_fpn.py +0 -188
- pyxlpr/ppocr/modeling/necks/fpn.py +0 -138
- pyxlpr/ppocr/modeling/necks/pg_fpn.py +0 -314
- pyxlpr/ppocr/modeling/necks/rnn.py +0 -92
- pyxlpr/ppocr/modeling/necks/sast_fpn.py +0 -284
- pyxlpr/ppocr/modeling/necks/table_fpn.py +0 -110
- pyxlpr/ppocr/modeling/transforms/__init__.py +0 -28
- pyxlpr/ppocr/modeling/transforms/stn.py +0 -135
- pyxlpr/ppocr/modeling/transforms/tps.py +0 -308
- pyxlpr/ppocr/modeling/transforms/tps_spatial_transformer.py +0 -156
- pyxlpr/ppocr/optimizer/__init__.py +0 -61
- pyxlpr/ppocr/optimizer/learning_rate.py +0 -228
- pyxlpr/ppocr/optimizer/lr_scheduler.py +0 -49
- pyxlpr/ppocr/optimizer/optimizer.py +0 -160
- pyxlpr/ppocr/optimizer/regularizer.py +0 -52
- pyxlpr/ppocr/postprocess/__init__.py +0 -55
- pyxlpr/ppocr/postprocess/cls_postprocess.py +0 -33
- pyxlpr/ppocr/postprocess/db_postprocess.py +0 -234
- pyxlpr/ppocr/postprocess/east_postprocess.py +0 -143
- pyxlpr/ppocr/postprocess/locality_aware_nms.py +0 -200
- pyxlpr/ppocr/postprocess/pg_postprocess.py +0 -52
- pyxlpr/ppocr/postprocess/pse_postprocess/__init__.py +0 -15
- pyxlpr/ppocr/postprocess/pse_postprocess/pse/__init__.py +0 -29
- pyxlpr/ppocr/postprocess/pse_postprocess/pse/setup.py +0 -14
- pyxlpr/ppocr/postprocess/pse_postprocess/pse_postprocess.py +0 -118
- pyxlpr/ppocr/postprocess/rec_postprocess.py +0 -654
- pyxlpr/ppocr/postprocess/sast_postprocess.py +0 -355
- pyxlpr/ppocr/tools/__init__.py +0 -14
- pyxlpr/ppocr/tools/eval.py +0 -83
- pyxlpr/ppocr/tools/export_center.py +0 -77
- pyxlpr/ppocr/tools/export_model.py +0 -129
- pyxlpr/ppocr/tools/infer/predict_cls.py +0 -151
- pyxlpr/ppocr/tools/infer/predict_det.py +0 -300
- pyxlpr/ppocr/tools/infer/predict_e2e.py +0 -169
- pyxlpr/ppocr/tools/infer/predict_rec.py +0 -414
- pyxlpr/ppocr/tools/infer/predict_system.py +0 -204
- pyxlpr/ppocr/tools/infer/utility.py +0 -629
- pyxlpr/ppocr/tools/infer_cls.py +0 -83
- pyxlpr/ppocr/tools/infer_det.py +0 -134
- pyxlpr/ppocr/tools/infer_e2e.py +0 -122
- pyxlpr/ppocr/tools/infer_kie.py +0 -153
- pyxlpr/ppocr/tools/infer_rec.py +0 -146
- pyxlpr/ppocr/tools/infer_table.py +0 -107
- pyxlpr/ppocr/tools/program.py +0 -596
- pyxlpr/ppocr/tools/test_hubserving.py +0 -117
- pyxlpr/ppocr/tools/train.py +0 -163
- pyxlpr/ppocr/tools/xlprog.py +0 -748
- pyxlpr/ppocr/utils/EN_symbol_dict.txt +0 -94
- pyxlpr/ppocr/utils/__init__.py +0 -24
- pyxlpr/ppocr/utils/dict/ar_dict.txt +0 -117
- pyxlpr/ppocr/utils/dict/arabic_dict.txt +0 -162
- pyxlpr/ppocr/utils/dict/be_dict.txt +0 -145
- pyxlpr/ppocr/utils/dict/bg_dict.txt +0 -140
- pyxlpr/ppocr/utils/dict/chinese_cht_dict.txt +0 -8421
- pyxlpr/ppocr/utils/dict/cyrillic_dict.txt +0 -163
- pyxlpr/ppocr/utils/dict/devanagari_dict.txt +0 -167
- pyxlpr/ppocr/utils/dict/en_dict.txt +0 -63
- pyxlpr/ppocr/utils/dict/fa_dict.txt +0 -136
- pyxlpr/ppocr/utils/dict/french_dict.txt +0 -136
- pyxlpr/ppocr/utils/dict/german_dict.txt +0 -143
- pyxlpr/ppocr/utils/dict/hi_dict.txt +0 -162
- pyxlpr/ppocr/utils/dict/it_dict.txt +0 -118
- pyxlpr/ppocr/utils/dict/japan_dict.txt +0 -4399
- pyxlpr/ppocr/utils/dict/ka_dict.txt +0 -153
- pyxlpr/ppocr/utils/dict/korean_dict.txt +0 -3688
- pyxlpr/ppocr/utils/dict/latin_dict.txt +0 -185
- pyxlpr/ppocr/utils/dict/mr_dict.txt +0 -153
- pyxlpr/ppocr/utils/dict/ne_dict.txt +0 -153
- pyxlpr/ppocr/utils/dict/oc_dict.txt +0 -96
- pyxlpr/ppocr/utils/dict/pu_dict.txt +0 -130
- pyxlpr/ppocr/utils/dict/rs_dict.txt +0 -91
- pyxlpr/ppocr/utils/dict/rsc_dict.txt +0 -134
- pyxlpr/ppocr/utils/dict/ru_dict.txt +0 -125
- pyxlpr/ppocr/utils/dict/ta_dict.txt +0 -128
- pyxlpr/ppocr/utils/dict/table_dict.txt +0 -277
- pyxlpr/ppocr/utils/dict/table_structure_dict.txt +0 -2759
- pyxlpr/ppocr/utils/dict/te_dict.txt +0 -151
- pyxlpr/ppocr/utils/dict/ug_dict.txt +0 -114
- pyxlpr/ppocr/utils/dict/uk_dict.txt +0 -142
- pyxlpr/ppocr/utils/dict/ur_dict.txt +0 -137
- pyxlpr/ppocr/utils/dict/xi_dict.txt +0 -110
- pyxlpr/ppocr/utils/dict90.txt +0 -90
- pyxlpr/ppocr/utils/e2e_metric/Deteval.py +0 -574
- pyxlpr/ppocr/utils/e2e_metric/polygon_fast.py +0 -83
- pyxlpr/ppocr/utils/e2e_utils/extract_batchsize.py +0 -87
- pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_fast.py +0 -457
- pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_slow.py +0 -592
- pyxlpr/ppocr/utils/e2e_utils/pgnet_pp_utils.py +0 -162
- pyxlpr/ppocr/utils/e2e_utils/visual.py +0 -162
- pyxlpr/ppocr/utils/en_dict.txt +0 -95
- pyxlpr/ppocr/utils/gen_label.py +0 -81
- pyxlpr/ppocr/utils/ic15_dict.txt +0 -36
- pyxlpr/ppocr/utils/iou.py +0 -54
- pyxlpr/ppocr/utils/logging.py +0 -69
- pyxlpr/ppocr/utils/network.py +0 -84
- pyxlpr/ppocr/utils/ppocr_keys_v1.txt +0 -6623
- pyxlpr/ppocr/utils/profiler.py +0 -110
- pyxlpr/ppocr/utils/save_load.py +0 -150
- pyxlpr/ppocr/utils/stats.py +0 -72
- pyxlpr/ppocr/utils/utility.py +0 -80
- pyxlpr/ppstructure/__init__.py +0 -13
- pyxlpr/ppstructure/predict_system.py +0 -187
- pyxlpr/ppstructure/table/__init__.py +0 -13
- pyxlpr/ppstructure/table/eval_table.py +0 -72
- pyxlpr/ppstructure/table/matcher.py +0 -192
- pyxlpr/ppstructure/table/predict_structure.py +0 -136
- pyxlpr/ppstructure/table/predict_table.py +0 -221
- pyxlpr/ppstructure/table/table_metric/__init__.py +0 -16
- pyxlpr/ppstructure/table/table_metric/parallel.py +0 -51
- pyxlpr/ppstructure/table/table_metric/table_metric.py +0 -247
- pyxlpr/ppstructure/table/tablepyxl/__init__.py +0 -13
- pyxlpr/ppstructure/table/tablepyxl/style.py +0 -283
- pyxlpr/ppstructure/table/tablepyxl/tablepyxl.py +0 -118
- pyxlpr/ppstructure/utility.py +0 -71
- pyxlpr/xlai.py +0 -10
- /pyxllib/{ext/autogui → autogui}/virtualkey.py +0 -0
- {pyxllib-0.3.96.dist-info → pyxllib-0.3.197.dist-info/licenses}/LICENSE +0 -0
@@ -1,72 +0,0 @@
|
|
1
|
-
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
import os
|
15
|
-
import sys
|
16
|
-
__dir__ = os.path.dirname(os.path.abspath(__file__))
|
17
|
-
sys.path.append(__dir__)
|
18
|
-
sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
|
19
|
-
|
20
|
-
import cv2
|
21
|
-
import json
|
22
|
-
from tqdm import tqdm
|
23
|
-
from pyxlpr.ppstructure.table.table_metric import TEDS
|
24
|
-
from pyxlpr.ppstructure.table.predict_table import TableSystem
|
25
|
-
from pyxlpr.ppstructure.utility import init_args
|
26
|
-
from pyxlpr.ppocr.utils.logging import get_logger
|
27
|
-
|
28
|
-
logger = get_logger()
|
29
|
-
|
30
|
-
|
31
|
-
def parse_args():
|
32
|
-
parser = init_args()
|
33
|
-
parser.add_argument("--gt_path", type=str)
|
34
|
-
return parser.parse_args()
|
35
|
-
|
36
|
-
def main(gt_path, img_root, args):
|
37
|
-
teds = TEDS(n_jobs=16)
|
38
|
-
|
39
|
-
text_sys = TableSystem(args)
|
40
|
-
jsons_gt = json.load(open(gt_path)) # gt
|
41
|
-
pred_htmls = []
|
42
|
-
gt_htmls = []
|
43
|
-
for img_name in tqdm(jsons_gt):
|
44
|
-
# read image
|
45
|
-
img = cv2.imread(os.path.join(img_root,img_name))
|
46
|
-
pred_html = text_sys(img)
|
47
|
-
pred_htmls.append(pred_html)
|
48
|
-
|
49
|
-
gt_structures, gt_bboxes, gt_contents = jsons_gt[img_name]
|
50
|
-
gt_html, gt = get_gt_html(gt_structures, gt_contents)
|
51
|
-
gt_htmls.append(gt_html)
|
52
|
-
scores = teds.batch_evaluate_html(gt_htmls, pred_htmls)
|
53
|
-
logger.info('teds:', sum(scores) / len(scores))
|
54
|
-
|
55
|
-
|
56
|
-
def get_gt_html(gt_structures, gt_contents):
|
57
|
-
end_html = []
|
58
|
-
td_index = 0
|
59
|
-
for tag in gt_structures:
|
60
|
-
if '</td>' in tag:
|
61
|
-
if gt_contents[td_index] != []:
|
62
|
-
end_html.extend(gt_contents[td_index])
|
63
|
-
end_html.append(tag)
|
64
|
-
td_index += 1
|
65
|
-
else:
|
66
|
-
end_html.append(tag)
|
67
|
-
return ''.join(end_html), end_html
|
68
|
-
|
69
|
-
|
70
|
-
if __name__ == '__main__':
|
71
|
-
args = parse_args()
|
72
|
-
main(args.gt_path,args.image_dir, args)
|
@@ -1,192 +0,0 @@
|
|
1
|
-
import json
|
2
|
-
def distance(box_1, box_2):
|
3
|
-
x1, y1, x2, y2 = box_1
|
4
|
-
x3, y3, x4, y4 = box_2
|
5
|
-
dis = abs(x3 - x1) + abs(y3 - y1) + abs(x4- x2) + abs(y4 - y2)
|
6
|
-
dis_2 = abs(x3 - x1) + abs(y3 - y1)
|
7
|
-
dis_3 = abs(x4- x2) + abs(y4 - y2)
|
8
|
-
return dis + min(dis_2, dis_3)
|
9
|
-
|
10
|
-
def compute_iou(rec1, rec2):
|
11
|
-
"""
|
12
|
-
computing IoU
|
13
|
-
:param rec1: (y0, x0, y1, x1), which reflects
|
14
|
-
(top, left, bottom, right)
|
15
|
-
:param rec2: (y0, x0, y1, x1)
|
16
|
-
:return: scala value of IoU
|
17
|
-
"""
|
18
|
-
# computing area of each rectangles
|
19
|
-
S_rec1 = (rec1[2] - rec1[0]) * (rec1[3] - rec1[1])
|
20
|
-
S_rec2 = (rec2[2] - rec2[0]) * (rec2[3] - rec2[1])
|
21
|
-
|
22
|
-
# computing the sum_area
|
23
|
-
sum_area = S_rec1 + S_rec2
|
24
|
-
|
25
|
-
# find the each edge of intersect rectangle
|
26
|
-
left_line = max(rec1[1], rec2[1])
|
27
|
-
right_line = min(rec1[3], rec2[3])
|
28
|
-
top_line = max(rec1[0], rec2[0])
|
29
|
-
bottom_line = min(rec1[2], rec2[2])
|
30
|
-
|
31
|
-
# judge if there is an intersect
|
32
|
-
if left_line >= right_line or top_line >= bottom_line:
|
33
|
-
return 0.0
|
34
|
-
else:
|
35
|
-
intersect = (right_line - left_line) * (bottom_line - top_line)
|
36
|
-
return (intersect / (sum_area - intersect))*1.0
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
def matcher_merge(ocr_bboxes, pred_bboxes):
|
41
|
-
all_dis = []
|
42
|
-
ious = []
|
43
|
-
matched = {}
|
44
|
-
for i, gt_box in enumerate(ocr_bboxes):
|
45
|
-
distances = []
|
46
|
-
for j, pred_box in enumerate(pred_bboxes):
|
47
|
-
# compute l1 distence and IOU between two boxes
|
48
|
-
distances.append((distance(gt_box, pred_box), 1. - compute_iou(gt_box, pred_box)))
|
49
|
-
sorted_distances = distances.copy()
|
50
|
-
# select nearest cell
|
51
|
-
sorted_distances = sorted(sorted_distances, key = lambda item: (item[1], item[0]))
|
52
|
-
if distances.index(sorted_distances[0]) not in matched.keys():
|
53
|
-
matched[distances.index(sorted_distances[0])] = [i]
|
54
|
-
else:
|
55
|
-
matched[distances.index(sorted_distances[0])].append(i)
|
56
|
-
return matched#, sum(ious) / len(ious)
|
57
|
-
|
58
|
-
def complex_num(pred_bboxes):
|
59
|
-
complex_nums = []
|
60
|
-
for bbox in pred_bboxes:
|
61
|
-
distances = []
|
62
|
-
temp_ious = []
|
63
|
-
for pred_bbox in pred_bboxes:
|
64
|
-
if bbox != pred_bbox:
|
65
|
-
distances.append(distance(bbox, pred_bbox))
|
66
|
-
temp_ious.append(compute_iou(bbox, pred_bbox))
|
67
|
-
complex_nums.append(temp_ious[distances.index(min(distances))])
|
68
|
-
return sum(complex_nums) / len(complex_nums)
|
69
|
-
|
70
|
-
def get_rows(pred_bboxes):
|
71
|
-
pre_bbox = pred_bboxes[0]
|
72
|
-
res = []
|
73
|
-
step = 0
|
74
|
-
for i in range(len(pred_bboxes)):
|
75
|
-
bbox = pred_bboxes[i]
|
76
|
-
if bbox[1] - pre_bbox[1] > 2 or bbox[0] - pre_bbox[0] < 0:
|
77
|
-
break
|
78
|
-
else:
|
79
|
-
res.append(bbox)
|
80
|
-
step += 1
|
81
|
-
for i in range(step):
|
82
|
-
pred_bboxes.pop(0)
|
83
|
-
return res, pred_bboxes
|
84
|
-
def refine_rows(pred_bboxes): # 微调整行的框,使在一条水平线上
|
85
|
-
ys_1 = []
|
86
|
-
ys_2 = []
|
87
|
-
for box in pred_bboxes:
|
88
|
-
ys_1.append(box[1])
|
89
|
-
ys_2.append(box[3])
|
90
|
-
min_y_1 = sum(ys_1) / len(ys_1)
|
91
|
-
min_y_2 = sum(ys_2) / len(ys_2)
|
92
|
-
re_boxes = []
|
93
|
-
for box in pred_bboxes:
|
94
|
-
box[1] = min_y_1
|
95
|
-
box[3] = min_y_2
|
96
|
-
re_boxes.append(box)
|
97
|
-
return re_boxes
|
98
|
-
|
99
|
-
def matcher_refine_row(gt_bboxes, pred_bboxes):
|
100
|
-
before_refine_pred_bboxes = pred_bboxes.copy()
|
101
|
-
pred_bboxes = []
|
102
|
-
while(len(before_refine_pred_bboxes) != 0):
|
103
|
-
row_bboxes, before_refine_pred_bboxes = get_rows(before_refine_pred_bboxes)
|
104
|
-
print(row_bboxes)
|
105
|
-
pred_bboxes.extend(refine_rows(row_bboxes))
|
106
|
-
all_dis = []
|
107
|
-
ious = []
|
108
|
-
matched = {}
|
109
|
-
for i, gt_box in enumerate(gt_bboxes):
|
110
|
-
distances = []
|
111
|
-
#temp_ious = []
|
112
|
-
for j, pred_box in enumerate(pred_bboxes):
|
113
|
-
distances.append(distance(gt_box, pred_box))
|
114
|
-
#temp_ious.append(compute_iou(gt_box, pred_box))
|
115
|
-
#all_dis.append(min(distances))
|
116
|
-
#ious.append(temp_ious[distances.index(min(distances))])
|
117
|
-
if distances.index(min(distances)) not in matched.keys():
|
118
|
-
matched[distances.index(min(distances))] = [i]
|
119
|
-
else:
|
120
|
-
matched[distances.index(min(distances))].append(i)
|
121
|
-
return matched#, sum(ious) / len(ious)
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
#先挑选出一行,再进行匹配
|
126
|
-
def matcher_structure_1(gt_bboxes, pred_bboxes_rows, pred_bboxes):
|
127
|
-
gt_box_index = 0
|
128
|
-
delete_gt_bboxes = gt_bboxes.copy()
|
129
|
-
match_bboxes_ready = []
|
130
|
-
matched = {}
|
131
|
-
while(len(delete_gt_bboxes) != 0):
|
132
|
-
row_bboxes, delete_gt_bboxes = get_rows(delete_gt_bboxes)
|
133
|
-
row_bboxes = sorted(row_bboxes, key = lambda key: key[0])
|
134
|
-
if len(pred_bboxes_rows) > 0:
|
135
|
-
match_bboxes_ready.extend(pred_bboxes_rows.pop(0))
|
136
|
-
print(row_bboxes)
|
137
|
-
for i, gt_box in enumerate(row_bboxes):
|
138
|
-
#print(gt_box)
|
139
|
-
pred_distances = []
|
140
|
-
distances = []
|
141
|
-
for pred_bbox in pred_bboxes:
|
142
|
-
pred_distances.append(distance(gt_box, pred_bbox))
|
143
|
-
for j, pred_box in enumerate(match_bboxes_ready):
|
144
|
-
distances.append(distance(gt_box, pred_box))
|
145
|
-
index = pred_distances.index(min(distances))
|
146
|
-
#print('index', index)
|
147
|
-
if index not in matched.keys():
|
148
|
-
matched[index] = [gt_box_index]
|
149
|
-
else:
|
150
|
-
matched[index].append(gt_box_index)
|
151
|
-
gt_box_index += 1
|
152
|
-
return matched
|
153
|
-
|
154
|
-
def matcher_structure(gt_bboxes, pred_bboxes_rows, pred_bboxes):
|
155
|
-
'''
|
156
|
-
gt_bboxes: 排序后
|
157
|
-
pred_bboxes:
|
158
|
-
'''
|
159
|
-
pre_bbox = gt_bboxes[0]
|
160
|
-
matched = {}
|
161
|
-
match_bboxes_ready = []
|
162
|
-
match_bboxes_ready.extend(pred_bboxes_rows.pop(0))
|
163
|
-
for i, gt_box in enumerate(gt_bboxes):
|
164
|
-
|
165
|
-
pred_distances = []
|
166
|
-
for pred_bbox in pred_bboxes:
|
167
|
-
pred_distances.append(distance(gt_box, pred_bbox))
|
168
|
-
distances = []
|
169
|
-
gap_pre = gt_box[1] - pre_bbox[1]
|
170
|
-
gap_pre_1 = gt_box[0] - pre_bbox[2]
|
171
|
-
#print(gap_pre, len(pred_bboxes_rows))
|
172
|
-
if (gap_pre_1 < 0 and len(pred_bboxes_rows) > 0):
|
173
|
-
match_bboxes_ready.extend(pred_bboxes_rows.pop(0))
|
174
|
-
if len(pred_bboxes_rows) == 1:
|
175
|
-
match_bboxes_ready.extend(pred_bboxes_rows.pop(0))
|
176
|
-
if len(match_bboxes_ready) == 0 and len(pred_bboxes_rows) > 0:
|
177
|
-
match_bboxes_ready.extend(pred_bboxes_rows.pop(0))
|
178
|
-
if len(match_bboxes_ready) == 0 and len(pred_bboxes_rows) == 0:
|
179
|
-
break
|
180
|
-
#print(match_bboxes_ready)
|
181
|
-
for j, pred_box in enumerate(match_bboxes_ready):
|
182
|
-
distances.append(distance(gt_box, pred_box))
|
183
|
-
index = pred_distances.index(min(distances))
|
184
|
-
#print(gt_box, index)
|
185
|
-
#match_bboxes_ready.pop(distances.index(min(distances)))
|
186
|
-
print(gt_box, match_bboxes_ready[distances.index(min(distances))])
|
187
|
-
if index not in matched.keys():
|
188
|
-
matched[index] = [i]
|
189
|
-
else:
|
190
|
-
matched[index].append(i)
|
191
|
-
pre_bbox = gt_box
|
192
|
-
return matched
|
@@ -1,136 +0,0 @@
|
|
1
|
-
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
import os
|
15
|
-
import sys
|
16
|
-
|
17
|
-
__dir__ = os.path.dirname(os.path.abspath(__file__))
|
18
|
-
sys.path.append(__dir__)
|
19
|
-
sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
|
20
|
-
|
21
|
-
os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
|
22
|
-
|
23
|
-
import cv2
|
24
|
-
import numpy as np
|
25
|
-
import time
|
26
|
-
|
27
|
-
import pyxlpr.ppocr.tools.infer.utility as utility
|
28
|
-
from pyxlpr.ppocr.data import create_operators, transform
|
29
|
-
from pyxlpr.ppocr.postprocess import build_post_process
|
30
|
-
from pyxlpr.ppocr.utils.logging import get_logger
|
31
|
-
from pyxlpr.ppocr.utils.utility import get_image_file_list, check_and_read_gif
|
32
|
-
from pyxlpr.ppstructure.utility import parse_args
|
33
|
-
|
34
|
-
logger = get_logger()
|
35
|
-
|
36
|
-
|
37
|
-
class TableStructurer(object):
|
38
|
-
def __init__(self, args):
|
39
|
-
pre_process_list = [{
|
40
|
-
'ResizeTableImage': {
|
41
|
-
'max_len': args.table_max_len
|
42
|
-
}
|
43
|
-
}, {
|
44
|
-
'NormalizeImage': {
|
45
|
-
'std': [0.229, 0.224, 0.225],
|
46
|
-
'mean': [0.485, 0.456, 0.406],
|
47
|
-
'scale': '1./255.',
|
48
|
-
'order': 'hwc'
|
49
|
-
}
|
50
|
-
}, {
|
51
|
-
'PaddingTableImage': None
|
52
|
-
}, {
|
53
|
-
'ToCHWImage': None
|
54
|
-
}, {
|
55
|
-
'KeepKeys': {
|
56
|
-
'keep_keys': ['image']
|
57
|
-
}
|
58
|
-
}]
|
59
|
-
postprocess_params = {
|
60
|
-
'name': 'TableLabelDecode',
|
61
|
-
"character_type": args.table_char_type,
|
62
|
-
"character_dict_path": args.table_char_dict_path,
|
63
|
-
}
|
64
|
-
|
65
|
-
self.preprocess_op = create_operators(pre_process_list)
|
66
|
-
self.postprocess_op = build_post_process(postprocess_params)
|
67
|
-
self.predictor, self.input_tensor, self.output_tensors, self.config = \
|
68
|
-
utility.create_predictor(args, 'table', logger)
|
69
|
-
|
70
|
-
def __call__(self, img):
|
71
|
-
ori_im = img.copy()
|
72
|
-
data = {'image': img}
|
73
|
-
data = transform(data, self.preprocess_op)
|
74
|
-
img = data[0]
|
75
|
-
if img is None:
|
76
|
-
return None, 0
|
77
|
-
img = np.expand_dims(img, axis=0)
|
78
|
-
img = img.copy()
|
79
|
-
starttime = time.time()
|
80
|
-
|
81
|
-
self.input_tensor.copy_from_cpu(img)
|
82
|
-
self.predictor.run()
|
83
|
-
outputs = []
|
84
|
-
for output_tensor in self.output_tensors:
|
85
|
-
output = output_tensor.copy_to_cpu()
|
86
|
-
outputs.append(output)
|
87
|
-
|
88
|
-
preds = {}
|
89
|
-
preds['structure_probs'] = outputs[1]
|
90
|
-
preds['loc_preds'] = outputs[0]
|
91
|
-
|
92
|
-
post_result = self.postprocess_op(preds)
|
93
|
-
|
94
|
-
structure_str_list = post_result['structure_str_list']
|
95
|
-
res_loc = post_result['res_loc']
|
96
|
-
imgh, imgw = ori_im.shape[0:2]
|
97
|
-
res_loc_final = []
|
98
|
-
for rno in range(len(res_loc[0])):
|
99
|
-
x0, y0, x1, y1 = res_loc[0][rno]
|
100
|
-
left = max(int(imgw * x0), 0)
|
101
|
-
top = max(int(imgh * y0), 0)
|
102
|
-
right = min(int(imgw * x1), imgw - 1)
|
103
|
-
bottom = min(int(imgh * y1), imgh - 1)
|
104
|
-
res_loc_final.append([left, top, right, bottom])
|
105
|
-
|
106
|
-
structure_str_list = structure_str_list[0][:-1]
|
107
|
-
structure_str_list = ['<html>', '<body>', '<table>'] + structure_str_list + ['</table>', '</body>', '</html>']
|
108
|
-
|
109
|
-
elapse = time.time() - starttime
|
110
|
-
return (structure_str_list, res_loc_final), elapse
|
111
|
-
|
112
|
-
|
113
|
-
def main(args):
|
114
|
-
image_file_list = get_image_file_list(args.image_dir)
|
115
|
-
table_structurer = TableStructurer(args)
|
116
|
-
count = 0
|
117
|
-
total_time = 0
|
118
|
-
for image_file in image_file_list:
|
119
|
-
img, flag = check_and_read_gif(image_file)
|
120
|
-
if not flag:
|
121
|
-
img = cv2.imread(image_file)
|
122
|
-
if img is None:
|
123
|
-
logger.info("error in loading image:{}".format(image_file))
|
124
|
-
continue
|
125
|
-
structure_res, elapse = table_structurer(img)
|
126
|
-
|
127
|
-
logger.info("result: {}".format(structure_res))
|
128
|
-
|
129
|
-
if count > 0:
|
130
|
-
total_time += elapse
|
131
|
-
count += 1
|
132
|
-
logger.info("Predict time of {}: {}".format(image_file, elapse))
|
133
|
-
|
134
|
-
|
135
|
-
if __name__ == "__main__":
|
136
|
-
main(parse_args())
|
@@ -1,221 +0,0 @@
|
|
1
|
-
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
|
15
|
-
import os
|
16
|
-
import sys
|
17
|
-
import subprocess
|
18
|
-
|
19
|
-
__dir__ = os.path.dirname(os.path.abspath(__file__))
|
20
|
-
sys.path.append(__dir__)
|
21
|
-
sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
|
22
|
-
sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
|
23
|
-
|
24
|
-
os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
|
25
|
-
import cv2
|
26
|
-
import copy
|
27
|
-
import numpy as np
|
28
|
-
import time
|
29
|
-
import pyxlpr.ppocr.tools.infer.predict_rec as predict_rec
|
30
|
-
import pyxlpr.ppocr.tools.infer.predict_det as predict_det
|
31
|
-
from pyxlpr.ppocr.utils.utility import get_image_file_list, check_and_read_gif
|
32
|
-
from pyxlpr.ppocr.utils.logging import get_logger
|
33
|
-
from pyxlpr.ppstructure.table.matcher import distance, compute_iou
|
34
|
-
from pyxlpr.ppstructure.utility import parse_args
|
35
|
-
import pyxlpr.ppstructure.table.predict_structure as predict_strture
|
36
|
-
|
37
|
-
logger = get_logger()
|
38
|
-
|
39
|
-
|
40
|
-
def expand(pix, det_box, shape):
|
41
|
-
x0, y0, x1, y1 = det_box
|
42
|
-
# print(shape)
|
43
|
-
h, w, c = shape
|
44
|
-
tmp_x0 = x0 - pix
|
45
|
-
tmp_x1 = x1 + pix
|
46
|
-
tmp_y0 = y0 - pix
|
47
|
-
tmp_y1 = y1 + pix
|
48
|
-
x0_ = tmp_x0 if tmp_x0 >= 0 else 0
|
49
|
-
x1_ = tmp_x1 if tmp_x1 <= w else w
|
50
|
-
y0_ = tmp_y0 if tmp_y0 >= 0 else 0
|
51
|
-
y1_ = tmp_y1 if tmp_y1 <= h else h
|
52
|
-
return x0_, y0_, x1_, y1_
|
53
|
-
|
54
|
-
|
55
|
-
class TableSystem(object):
|
56
|
-
def __init__(self, args, text_detector=None, text_recognizer=None):
|
57
|
-
self.text_detector = predict_det.TextDetector(args) if text_detector is None else text_detector
|
58
|
-
self.text_recognizer = predict_rec.TextRecognizer(args) if text_recognizer is None else text_recognizer
|
59
|
-
self.table_structurer = predict_strture.TableStructurer(args)
|
60
|
-
|
61
|
-
def __call__(self, img):
|
62
|
-
ori_im = img.copy()
|
63
|
-
structure_res, elapse = self.table_structurer(copy.deepcopy(img))
|
64
|
-
dt_boxes, elapse = self.text_detector(copy.deepcopy(img))
|
65
|
-
dt_boxes = sorted_boxes(dt_boxes)
|
66
|
-
|
67
|
-
r_boxes = []
|
68
|
-
for box in dt_boxes:
|
69
|
-
x_min = box[:, 0].min() - 1
|
70
|
-
x_max = box[:, 0].max() + 1
|
71
|
-
y_min = box[:, 1].min() - 1
|
72
|
-
y_max = box[:, 1].max() + 1
|
73
|
-
box = [x_min, y_min, x_max, y_max]
|
74
|
-
r_boxes.append(box)
|
75
|
-
dt_boxes = np.array(r_boxes)
|
76
|
-
|
77
|
-
logger.debug("dt_boxes num : {}, elapse : {}".format(
|
78
|
-
len(dt_boxes), elapse))
|
79
|
-
if dt_boxes is None:
|
80
|
-
return None, None
|
81
|
-
img_crop_list = []
|
82
|
-
|
83
|
-
for i in range(len(dt_boxes)):
|
84
|
-
det_box = dt_boxes[i]
|
85
|
-
x0, y0, x1, y1 = expand(2, det_box, ori_im.shape)
|
86
|
-
text_rect = ori_im[int(y0):int(y1), int(x0):int(x1), :]
|
87
|
-
img_crop_list.append(text_rect)
|
88
|
-
rec_res, elapse = self.text_recognizer(img_crop_list)
|
89
|
-
logger.debug("rec_res num : {}, elapse : {}".format(
|
90
|
-
len(rec_res), elapse))
|
91
|
-
|
92
|
-
pred_html, pred = self.rebuild_table(structure_res, dt_boxes, rec_res)
|
93
|
-
return pred_html
|
94
|
-
|
95
|
-
def rebuild_table(self, structure_res, dt_boxes, rec_res):
|
96
|
-
pred_structures, pred_bboxes = structure_res
|
97
|
-
matched_index = self.match_result(dt_boxes, pred_bboxes)
|
98
|
-
pred_html, pred = self.get_pred_html(pred_structures, matched_index, rec_res)
|
99
|
-
return pred_html, pred
|
100
|
-
|
101
|
-
def match_result(self, dt_boxes, pred_bboxes):
|
102
|
-
matched = {}
|
103
|
-
for i, gt_box in enumerate(dt_boxes):
|
104
|
-
# gt_box = [np.min(gt_box[:, 0]), np.min(gt_box[:, 1]), np.max(gt_box[:, 0]), np.max(gt_box[:, 1])]
|
105
|
-
distances = []
|
106
|
-
for j, pred_box in enumerate(pred_bboxes):
|
107
|
-
distances.append(
|
108
|
-
(distance(gt_box, pred_box), 1. - compute_iou(gt_box, pred_box))) # 获取两两cell之间的L1距离和 1- IOU
|
109
|
-
sorted_distances = distances.copy()
|
110
|
-
# 根据距离和IOU挑选最"近"的cell
|
111
|
-
sorted_distances = sorted(sorted_distances, key=lambda item: (item[1], item[0]))
|
112
|
-
if distances.index(sorted_distances[0]) not in matched.keys():
|
113
|
-
matched[distances.index(sorted_distances[0])] = [i]
|
114
|
-
else:
|
115
|
-
matched[distances.index(sorted_distances[0])].append(i)
|
116
|
-
return matched
|
117
|
-
|
118
|
-
def get_pred_html(self, pred_structures, matched_index, ocr_contents):
|
119
|
-
end_html = []
|
120
|
-
td_index = 0
|
121
|
-
for tag in pred_structures:
|
122
|
-
if '</td>' in tag:
|
123
|
-
if td_index in matched_index.keys():
|
124
|
-
b_with = False
|
125
|
-
if '<b>' in ocr_contents[matched_index[td_index][0]] and len(matched_index[td_index]) > 1:
|
126
|
-
b_with = True
|
127
|
-
end_html.extend('<b>')
|
128
|
-
for i, td_index_index in enumerate(matched_index[td_index]):
|
129
|
-
content = ocr_contents[td_index_index][0]
|
130
|
-
if len(matched_index[td_index]) > 1:
|
131
|
-
if len(content) == 0:
|
132
|
-
continue
|
133
|
-
if content[0] == ' ':
|
134
|
-
content = content[1:]
|
135
|
-
if '<b>' in content:
|
136
|
-
content = content[3:]
|
137
|
-
if '</b>' in content:
|
138
|
-
content = content[:-4]
|
139
|
-
if len(content) == 0:
|
140
|
-
continue
|
141
|
-
if i != len(matched_index[td_index]) - 1 and ' ' != content[-1]:
|
142
|
-
content += ' '
|
143
|
-
end_html.extend(content)
|
144
|
-
if b_with:
|
145
|
-
end_html.extend('</b>')
|
146
|
-
|
147
|
-
end_html.append(tag)
|
148
|
-
td_index += 1
|
149
|
-
else:
|
150
|
-
end_html.append(tag)
|
151
|
-
return ''.join(end_html), end_html
|
152
|
-
|
153
|
-
|
154
|
-
def sorted_boxes(dt_boxes):
|
155
|
-
"""
|
156
|
-
Sort text boxes in order from top to bottom, left to right
|
157
|
-
args:
|
158
|
-
dt_boxes(array):detected text boxes with shape [4, 2]
|
159
|
-
return:
|
160
|
-
sorted boxes(array) with shape [4, 2]
|
161
|
-
"""
|
162
|
-
num_boxes = dt_boxes.shape[0]
|
163
|
-
sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0]))
|
164
|
-
_boxes = list(sorted_boxes)
|
165
|
-
|
166
|
-
for i in range(num_boxes - 1):
|
167
|
-
if abs(_boxes[i + 1][0][1] - _boxes[i][0][1]) < 10 and \
|
168
|
-
(_boxes[i + 1][0][0] < _boxes[i][0][0]):
|
169
|
-
tmp = _boxes[i]
|
170
|
-
_boxes[i] = _boxes[i + 1]
|
171
|
-
_boxes[i + 1] = tmp
|
172
|
-
return _boxes
|
173
|
-
|
174
|
-
|
175
|
-
def to_excel(html_table, excel_path):
|
176
|
-
from tablepyxl import tablepyxl
|
177
|
-
tablepyxl.document_to_xl(html_table, excel_path)
|
178
|
-
|
179
|
-
|
180
|
-
def main(args):
|
181
|
-
image_file_list = get_image_file_list(args.image_dir)
|
182
|
-
image_file_list = image_file_list[args.process_id::args.total_process_num]
|
183
|
-
os.makedirs(args.output, exist_ok=True)
|
184
|
-
|
185
|
-
text_sys = TableSystem(args)
|
186
|
-
img_num = len(image_file_list)
|
187
|
-
for i, image_file in enumerate(image_file_list):
|
188
|
-
logger.info("[{}/{}] {}".format(i, img_num, image_file))
|
189
|
-
img, flag = check_and_read_gif(image_file)
|
190
|
-
excel_path = os.path.join(args.output, os.path.basename(image_file).split('.')[0] + '.xlsx')
|
191
|
-
if not flag:
|
192
|
-
img = cv2.imread(image_file)
|
193
|
-
if img is None:
|
194
|
-
logger.error("error in loading image:{}".format(image_file))
|
195
|
-
continue
|
196
|
-
starttime = time.time()
|
197
|
-
pred_html = text_sys(img)
|
198
|
-
|
199
|
-
to_excel(pred_html, excel_path)
|
200
|
-
logger.info('excel saved to {}'.format(excel_path))
|
201
|
-
logger.info(pred_html)
|
202
|
-
elapse = time.time() - starttime
|
203
|
-
logger.info("Predict time : {:.3f}s".format(elapse))
|
204
|
-
|
205
|
-
|
206
|
-
if __name__ == "__main__":
|
207
|
-
args = parse_args()
|
208
|
-
if args.use_mp:
|
209
|
-
p_list = []
|
210
|
-
total_process_num = args.total_process_num
|
211
|
-
for process_id in range(total_process_num):
|
212
|
-
cmd = [sys.executable, "-u"] + sys.argv + [
|
213
|
-
"--process_id={}".format(process_id),
|
214
|
-
"--use_mp={}".format(False)
|
215
|
-
]
|
216
|
-
p = subprocess.Popen(cmd, stdout=sys.stdout, stderr=sys.stdout)
|
217
|
-
p_list.append(p)
|
218
|
-
for p in p_list:
|
219
|
-
p.wait()
|
220
|
-
else:
|
221
|
-
main(args)
|
@@ -1,16 +0,0 @@
|
|
1
|
-
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
|
15
|
-
__all__ = ['TEDS']
|
16
|
-
from .table_metric import TEDS
|
@@ -1,51 +0,0 @@
|
|
1
|
-
from tqdm import tqdm
|
2
|
-
from concurrent.futures import ProcessPoolExecutor, as_completed
|
3
|
-
|
4
|
-
|
5
|
-
def parallel_process(array, function, n_jobs=16, use_kwargs=False, front_num=0):
|
6
|
-
"""
|
7
|
-
A parallel version of the map function with a progress bar.
|
8
|
-
Args:
|
9
|
-
array (array-like): An array to iterate over.
|
10
|
-
function (function): A python function to apply to the elements of array
|
11
|
-
n_jobs (int, default=16): The number of cores to use
|
12
|
-
use_kwargs (boolean, default=False): Whether to consider the elements of array as dictionaries of
|
13
|
-
keyword arguments to function
|
14
|
-
front_num (int, default=3): The number of iterations to run serially before kicking off the parallel job.
|
15
|
-
Useful for catching bugs
|
16
|
-
Returns:
|
17
|
-
[function(array[0]), function(array[1]), ...]
|
18
|
-
"""
|
19
|
-
# We run the first few iterations serially to catch bugs
|
20
|
-
if front_num > 0:
|
21
|
-
front = [function(**a) if use_kwargs else function(a)
|
22
|
-
for a in array[:front_num]]
|
23
|
-
else:
|
24
|
-
front = []
|
25
|
-
# If we set n_jobs to 1, just run a list comprehension. This is useful for benchmarking and debugging.
|
26
|
-
if n_jobs == 1:
|
27
|
-
return front + [function(**a) if use_kwargs else function(a) for a in tqdm(array[front_num:])]
|
28
|
-
# Assemble the workers
|
29
|
-
with ProcessPoolExecutor(max_workers=n_jobs) as pool:
|
30
|
-
# Pass the elements of array into function
|
31
|
-
if use_kwargs:
|
32
|
-
futures = [pool.submit(function, **a) for a in array[front_num:]]
|
33
|
-
else:
|
34
|
-
futures = [pool.submit(function, a) for a in array[front_num:]]
|
35
|
-
kwargs = {
|
36
|
-
'total': len(futures),
|
37
|
-
'unit': 'it',
|
38
|
-
'unit_scale': True,
|
39
|
-
'leave': True
|
40
|
-
}
|
41
|
-
# Print out the progress as tasks complete
|
42
|
-
for f in tqdm(as_completed(futures), **kwargs):
|
43
|
-
pass
|
44
|
-
out = []
|
45
|
-
# Get the results from the futures.
|
46
|
-
for i, future in tqdm(enumerate(futures)):
|
47
|
-
try:
|
48
|
-
out.append(future.result())
|
49
|
-
except Exception as e:
|
50
|
-
out.append(e)
|
51
|
-
return front + out
|