pyxllib 0.3.96__py3-none-any.whl → 0.3.197__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyxllib/algo/geo.py +12 -0
- pyxllib/algo/intervals.py +1 -1
- pyxllib/algo/matcher.py +78 -0
- pyxllib/algo/pupil.py +187 -19
- pyxllib/algo/specialist.py +2 -1
- pyxllib/algo/stat.py +38 -2
- {pyxlpr → pyxllib/autogui}/__init__.py +1 -1
- pyxllib/autogui/activewin.py +246 -0
- pyxllib/autogui/all.py +9 -0
- pyxllib/{ext/autogui → autogui}/autogui.py +40 -11
- pyxllib/autogui/uiautolib.py +362 -0
- pyxllib/autogui/wechat.py +827 -0
- pyxllib/autogui/wechat_msg.py +421 -0
- pyxllib/autogui/wxautolib.py +84 -0
- pyxllib/cv/slidercaptcha.py +137 -0
- pyxllib/data/echarts.py +123 -12
- pyxllib/data/jsonlib.py +89 -0
- pyxllib/data/pglib.py +514 -30
- pyxllib/data/sqlite.py +231 -4
- pyxllib/ext/JLineViewer.py +14 -1
- pyxllib/ext/drissionlib.py +277 -0
- pyxllib/ext/kq5034lib.py +0 -1594
- pyxllib/ext/robustprocfile.py +497 -0
- pyxllib/ext/unixlib.py +6 -5
- pyxllib/ext/utools.py +108 -95
- pyxllib/ext/webhook.py +32 -14
- pyxllib/ext/wjxlib.py +88 -0
- pyxllib/ext/wpsapi.py +124 -0
- pyxllib/ext/xlwork.py +9 -0
- pyxllib/ext/yuquelib.py +1003 -71
- pyxllib/file/docxlib.py +1 -1
- pyxllib/file/libreoffice.py +165 -0
- pyxllib/file/movielib.py +9 -0
- pyxllib/file/packlib/__init__.py +112 -75
- pyxllib/file/pdflib.py +1 -1
- pyxllib/file/pupil.py +1 -1
- pyxllib/file/specialist/dirlib.py +1 -1
- pyxllib/file/specialist/download.py +10 -3
- pyxllib/file/specialist/filelib.py +266 -55
- pyxllib/file/xlsxlib.py +205 -50
- pyxllib/file/xlsyncfile.py +341 -0
- pyxllib/prog/cachetools.py +64 -0
- pyxllib/prog/filelock.py +42 -0
- pyxllib/prog/multiprogs.py +940 -0
- pyxllib/prog/newbie.py +9 -2
- pyxllib/prog/pupil.py +129 -60
- pyxllib/prog/specialist/__init__.py +176 -2
- pyxllib/prog/specialist/bc.py +5 -2
- pyxllib/prog/specialist/browser.py +11 -2
- pyxllib/prog/specialist/datetime.py +68 -0
- pyxllib/prog/specialist/tictoc.py +12 -13
- pyxllib/prog/specialist/xllog.py +5 -5
- pyxllib/prog/xlosenv.py +7 -0
- pyxllib/text/airscript.js +744 -0
- pyxllib/text/charclasslib.py +17 -5
- pyxllib/text/jiebalib.py +6 -3
- pyxllib/text/jinjalib.py +32 -0
- pyxllib/text/jsa_ai_prompt.md +271 -0
- pyxllib/text/jscode.py +159 -4
- pyxllib/text/nestenv.py +1 -1
- pyxllib/text/newbie.py +12 -0
- pyxllib/text/pupil/common.py +26 -0
- pyxllib/text/specialist/ptag.py +2 -2
- pyxllib/text/templates/echart_base.html +11 -0
- pyxllib/text/templates/highlight_code.html +17 -0
- pyxllib/text/templates/latex_editor.html +103 -0
- pyxllib/text/xmllib.py +76 -14
- pyxllib/xl.py +2 -1
- pyxllib-0.3.197.dist-info/METADATA +48 -0
- pyxllib-0.3.197.dist-info/RECORD +126 -0
- {pyxllib-0.3.96.dist-info → pyxllib-0.3.197.dist-info}/WHEEL +1 -2
- pyxllib/ext/autogui/__init__.py +0 -8
- pyxllib-0.3.96.dist-info/METADATA +0 -51
- pyxllib-0.3.96.dist-info/RECORD +0 -333
- pyxllib-0.3.96.dist-info/top_level.txt +0 -2
- pyxlpr/ai/__init__.py +0 -5
- pyxlpr/ai/clientlib.py +0 -1281
- pyxlpr/ai/specialist.py +0 -286
- pyxlpr/ai/torch_app.py +0 -172
- pyxlpr/ai/xlpaddle.py +0 -655
- pyxlpr/ai/xltorch.py +0 -705
- pyxlpr/data/__init__.py +0 -11
- pyxlpr/data/coco.py +0 -1325
- pyxlpr/data/datacls.py +0 -365
- pyxlpr/data/datasets.py +0 -200
- pyxlpr/data/gptlib.py +0 -1291
- pyxlpr/data/icdar/__init__.py +0 -96
- pyxlpr/data/icdar/deteval.py +0 -377
- pyxlpr/data/icdar/icdar2013.py +0 -341
- pyxlpr/data/icdar/iou.py +0 -340
- pyxlpr/data/icdar/rrc_evaluation_funcs_1_1.py +0 -463
- pyxlpr/data/imtextline.py +0 -473
- pyxlpr/data/labelme.py +0 -866
- pyxlpr/data/removeline.py +0 -179
- pyxlpr/data/specialist.py +0 -57
- pyxlpr/eval/__init__.py +0 -85
- pyxlpr/paddleocr.py +0 -776
- pyxlpr/ppocr/__init__.py +0 -15
- pyxlpr/ppocr/configs/rec/multi_language/generate_multi_language_configs.py +0 -226
- pyxlpr/ppocr/data/__init__.py +0 -135
- pyxlpr/ppocr/data/imaug/ColorJitter.py +0 -26
- pyxlpr/ppocr/data/imaug/__init__.py +0 -67
- pyxlpr/ppocr/data/imaug/copy_paste.py +0 -170
- pyxlpr/ppocr/data/imaug/east_process.py +0 -437
- pyxlpr/ppocr/data/imaug/gen_table_mask.py +0 -244
- pyxlpr/ppocr/data/imaug/iaa_augment.py +0 -114
- pyxlpr/ppocr/data/imaug/label_ops.py +0 -789
- pyxlpr/ppocr/data/imaug/make_border_map.py +0 -184
- pyxlpr/ppocr/data/imaug/make_pse_gt.py +0 -106
- pyxlpr/ppocr/data/imaug/make_shrink_map.py +0 -126
- pyxlpr/ppocr/data/imaug/operators.py +0 -433
- pyxlpr/ppocr/data/imaug/pg_process.py +0 -906
- pyxlpr/ppocr/data/imaug/randaugment.py +0 -143
- pyxlpr/ppocr/data/imaug/random_crop_data.py +0 -239
- pyxlpr/ppocr/data/imaug/rec_img_aug.py +0 -533
- pyxlpr/ppocr/data/imaug/sast_process.py +0 -777
- pyxlpr/ppocr/data/imaug/text_image_aug/__init__.py +0 -17
- pyxlpr/ppocr/data/imaug/text_image_aug/augment.py +0 -120
- pyxlpr/ppocr/data/imaug/text_image_aug/warp_mls.py +0 -168
- pyxlpr/ppocr/data/lmdb_dataset.py +0 -115
- pyxlpr/ppocr/data/pgnet_dataset.py +0 -104
- pyxlpr/ppocr/data/pubtab_dataset.py +0 -107
- pyxlpr/ppocr/data/simple_dataset.py +0 -372
- pyxlpr/ppocr/losses/__init__.py +0 -61
- pyxlpr/ppocr/losses/ace_loss.py +0 -52
- pyxlpr/ppocr/losses/basic_loss.py +0 -135
- pyxlpr/ppocr/losses/center_loss.py +0 -88
- pyxlpr/ppocr/losses/cls_loss.py +0 -30
- pyxlpr/ppocr/losses/combined_loss.py +0 -67
- pyxlpr/ppocr/losses/det_basic_loss.py +0 -208
- pyxlpr/ppocr/losses/det_db_loss.py +0 -80
- pyxlpr/ppocr/losses/det_east_loss.py +0 -63
- pyxlpr/ppocr/losses/det_pse_loss.py +0 -149
- pyxlpr/ppocr/losses/det_sast_loss.py +0 -121
- pyxlpr/ppocr/losses/distillation_loss.py +0 -272
- pyxlpr/ppocr/losses/e2e_pg_loss.py +0 -140
- pyxlpr/ppocr/losses/kie_sdmgr_loss.py +0 -113
- pyxlpr/ppocr/losses/rec_aster_loss.py +0 -99
- pyxlpr/ppocr/losses/rec_att_loss.py +0 -39
- pyxlpr/ppocr/losses/rec_ctc_loss.py +0 -44
- pyxlpr/ppocr/losses/rec_enhanced_ctc_loss.py +0 -70
- pyxlpr/ppocr/losses/rec_nrtr_loss.py +0 -30
- pyxlpr/ppocr/losses/rec_sar_loss.py +0 -28
- pyxlpr/ppocr/losses/rec_srn_loss.py +0 -47
- pyxlpr/ppocr/losses/table_att_loss.py +0 -109
- pyxlpr/ppocr/metrics/__init__.py +0 -44
- pyxlpr/ppocr/metrics/cls_metric.py +0 -45
- pyxlpr/ppocr/metrics/det_metric.py +0 -82
- pyxlpr/ppocr/metrics/distillation_metric.py +0 -73
- pyxlpr/ppocr/metrics/e2e_metric.py +0 -86
- pyxlpr/ppocr/metrics/eval_det_iou.py +0 -274
- pyxlpr/ppocr/metrics/kie_metric.py +0 -70
- pyxlpr/ppocr/metrics/rec_metric.py +0 -75
- pyxlpr/ppocr/metrics/table_metric.py +0 -50
- pyxlpr/ppocr/modeling/architectures/__init__.py +0 -32
- pyxlpr/ppocr/modeling/architectures/base_model.py +0 -88
- pyxlpr/ppocr/modeling/architectures/distillation_model.py +0 -60
- pyxlpr/ppocr/modeling/backbones/__init__.py +0 -54
- pyxlpr/ppocr/modeling/backbones/det_mobilenet_v3.py +0 -268
- pyxlpr/ppocr/modeling/backbones/det_resnet_vd.py +0 -246
- pyxlpr/ppocr/modeling/backbones/det_resnet_vd_sast.py +0 -285
- pyxlpr/ppocr/modeling/backbones/e2e_resnet_vd_pg.py +0 -265
- pyxlpr/ppocr/modeling/backbones/kie_unet_sdmgr.py +0 -186
- pyxlpr/ppocr/modeling/backbones/rec_mobilenet_v3.py +0 -138
- pyxlpr/ppocr/modeling/backbones/rec_mv1_enhance.py +0 -258
- pyxlpr/ppocr/modeling/backbones/rec_nrtr_mtb.py +0 -48
- pyxlpr/ppocr/modeling/backbones/rec_resnet_31.py +0 -210
- pyxlpr/ppocr/modeling/backbones/rec_resnet_aster.py +0 -143
- pyxlpr/ppocr/modeling/backbones/rec_resnet_fpn.py +0 -307
- pyxlpr/ppocr/modeling/backbones/rec_resnet_vd.py +0 -286
- pyxlpr/ppocr/modeling/heads/__init__.py +0 -54
- pyxlpr/ppocr/modeling/heads/cls_head.py +0 -52
- pyxlpr/ppocr/modeling/heads/det_db_head.py +0 -118
- pyxlpr/ppocr/modeling/heads/det_east_head.py +0 -121
- pyxlpr/ppocr/modeling/heads/det_pse_head.py +0 -37
- pyxlpr/ppocr/modeling/heads/det_sast_head.py +0 -128
- pyxlpr/ppocr/modeling/heads/e2e_pg_head.py +0 -253
- pyxlpr/ppocr/modeling/heads/kie_sdmgr_head.py +0 -206
- pyxlpr/ppocr/modeling/heads/multiheadAttention.py +0 -163
- pyxlpr/ppocr/modeling/heads/rec_aster_head.py +0 -393
- pyxlpr/ppocr/modeling/heads/rec_att_head.py +0 -202
- pyxlpr/ppocr/modeling/heads/rec_ctc_head.py +0 -88
- pyxlpr/ppocr/modeling/heads/rec_nrtr_head.py +0 -826
- pyxlpr/ppocr/modeling/heads/rec_sar_head.py +0 -402
- pyxlpr/ppocr/modeling/heads/rec_srn_head.py +0 -280
- pyxlpr/ppocr/modeling/heads/self_attention.py +0 -406
- pyxlpr/ppocr/modeling/heads/table_att_head.py +0 -246
- pyxlpr/ppocr/modeling/necks/__init__.py +0 -32
- pyxlpr/ppocr/modeling/necks/db_fpn.py +0 -111
- pyxlpr/ppocr/modeling/necks/east_fpn.py +0 -188
- pyxlpr/ppocr/modeling/necks/fpn.py +0 -138
- pyxlpr/ppocr/modeling/necks/pg_fpn.py +0 -314
- pyxlpr/ppocr/modeling/necks/rnn.py +0 -92
- pyxlpr/ppocr/modeling/necks/sast_fpn.py +0 -284
- pyxlpr/ppocr/modeling/necks/table_fpn.py +0 -110
- pyxlpr/ppocr/modeling/transforms/__init__.py +0 -28
- pyxlpr/ppocr/modeling/transforms/stn.py +0 -135
- pyxlpr/ppocr/modeling/transforms/tps.py +0 -308
- pyxlpr/ppocr/modeling/transforms/tps_spatial_transformer.py +0 -156
- pyxlpr/ppocr/optimizer/__init__.py +0 -61
- pyxlpr/ppocr/optimizer/learning_rate.py +0 -228
- pyxlpr/ppocr/optimizer/lr_scheduler.py +0 -49
- pyxlpr/ppocr/optimizer/optimizer.py +0 -160
- pyxlpr/ppocr/optimizer/regularizer.py +0 -52
- pyxlpr/ppocr/postprocess/__init__.py +0 -55
- pyxlpr/ppocr/postprocess/cls_postprocess.py +0 -33
- pyxlpr/ppocr/postprocess/db_postprocess.py +0 -234
- pyxlpr/ppocr/postprocess/east_postprocess.py +0 -143
- pyxlpr/ppocr/postprocess/locality_aware_nms.py +0 -200
- pyxlpr/ppocr/postprocess/pg_postprocess.py +0 -52
- pyxlpr/ppocr/postprocess/pse_postprocess/__init__.py +0 -15
- pyxlpr/ppocr/postprocess/pse_postprocess/pse/__init__.py +0 -29
- pyxlpr/ppocr/postprocess/pse_postprocess/pse/setup.py +0 -14
- pyxlpr/ppocr/postprocess/pse_postprocess/pse_postprocess.py +0 -118
- pyxlpr/ppocr/postprocess/rec_postprocess.py +0 -654
- pyxlpr/ppocr/postprocess/sast_postprocess.py +0 -355
- pyxlpr/ppocr/tools/__init__.py +0 -14
- pyxlpr/ppocr/tools/eval.py +0 -83
- pyxlpr/ppocr/tools/export_center.py +0 -77
- pyxlpr/ppocr/tools/export_model.py +0 -129
- pyxlpr/ppocr/tools/infer/predict_cls.py +0 -151
- pyxlpr/ppocr/tools/infer/predict_det.py +0 -300
- pyxlpr/ppocr/tools/infer/predict_e2e.py +0 -169
- pyxlpr/ppocr/tools/infer/predict_rec.py +0 -414
- pyxlpr/ppocr/tools/infer/predict_system.py +0 -204
- pyxlpr/ppocr/tools/infer/utility.py +0 -629
- pyxlpr/ppocr/tools/infer_cls.py +0 -83
- pyxlpr/ppocr/tools/infer_det.py +0 -134
- pyxlpr/ppocr/tools/infer_e2e.py +0 -122
- pyxlpr/ppocr/tools/infer_kie.py +0 -153
- pyxlpr/ppocr/tools/infer_rec.py +0 -146
- pyxlpr/ppocr/tools/infer_table.py +0 -107
- pyxlpr/ppocr/tools/program.py +0 -596
- pyxlpr/ppocr/tools/test_hubserving.py +0 -117
- pyxlpr/ppocr/tools/train.py +0 -163
- pyxlpr/ppocr/tools/xlprog.py +0 -748
- pyxlpr/ppocr/utils/EN_symbol_dict.txt +0 -94
- pyxlpr/ppocr/utils/__init__.py +0 -24
- pyxlpr/ppocr/utils/dict/ar_dict.txt +0 -117
- pyxlpr/ppocr/utils/dict/arabic_dict.txt +0 -162
- pyxlpr/ppocr/utils/dict/be_dict.txt +0 -145
- pyxlpr/ppocr/utils/dict/bg_dict.txt +0 -140
- pyxlpr/ppocr/utils/dict/chinese_cht_dict.txt +0 -8421
- pyxlpr/ppocr/utils/dict/cyrillic_dict.txt +0 -163
- pyxlpr/ppocr/utils/dict/devanagari_dict.txt +0 -167
- pyxlpr/ppocr/utils/dict/en_dict.txt +0 -63
- pyxlpr/ppocr/utils/dict/fa_dict.txt +0 -136
- pyxlpr/ppocr/utils/dict/french_dict.txt +0 -136
- pyxlpr/ppocr/utils/dict/german_dict.txt +0 -143
- pyxlpr/ppocr/utils/dict/hi_dict.txt +0 -162
- pyxlpr/ppocr/utils/dict/it_dict.txt +0 -118
- pyxlpr/ppocr/utils/dict/japan_dict.txt +0 -4399
- pyxlpr/ppocr/utils/dict/ka_dict.txt +0 -153
- pyxlpr/ppocr/utils/dict/korean_dict.txt +0 -3688
- pyxlpr/ppocr/utils/dict/latin_dict.txt +0 -185
- pyxlpr/ppocr/utils/dict/mr_dict.txt +0 -153
- pyxlpr/ppocr/utils/dict/ne_dict.txt +0 -153
- pyxlpr/ppocr/utils/dict/oc_dict.txt +0 -96
- pyxlpr/ppocr/utils/dict/pu_dict.txt +0 -130
- pyxlpr/ppocr/utils/dict/rs_dict.txt +0 -91
- pyxlpr/ppocr/utils/dict/rsc_dict.txt +0 -134
- pyxlpr/ppocr/utils/dict/ru_dict.txt +0 -125
- pyxlpr/ppocr/utils/dict/ta_dict.txt +0 -128
- pyxlpr/ppocr/utils/dict/table_dict.txt +0 -277
- pyxlpr/ppocr/utils/dict/table_structure_dict.txt +0 -2759
- pyxlpr/ppocr/utils/dict/te_dict.txt +0 -151
- pyxlpr/ppocr/utils/dict/ug_dict.txt +0 -114
- pyxlpr/ppocr/utils/dict/uk_dict.txt +0 -142
- pyxlpr/ppocr/utils/dict/ur_dict.txt +0 -137
- pyxlpr/ppocr/utils/dict/xi_dict.txt +0 -110
- pyxlpr/ppocr/utils/dict90.txt +0 -90
- pyxlpr/ppocr/utils/e2e_metric/Deteval.py +0 -574
- pyxlpr/ppocr/utils/e2e_metric/polygon_fast.py +0 -83
- pyxlpr/ppocr/utils/e2e_utils/extract_batchsize.py +0 -87
- pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_fast.py +0 -457
- pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_slow.py +0 -592
- pyxlpr/ppocr/utils/e2e_utils/pgnet_pp_utils.py +0 -162
- pyxlpr/ppocr/utils/e2e_utils/visual.py +0 -162
- pyxlpr/ppocr/utils/en_dict.txt +0 -95
- pyxlpr/ppocr/utils/gen_label.py +0 -81
- pyxlpr/ppocr/utils/ic15_dict.txt +0 -36
- pyxlpr/ppocr/utils/iou.py +0 -54
- pyxlpr/ppocr/utils/logging.py +0 -69
- pyxlpr/ppocr/utils/network.py +0 -84
- pyxlpr/ppocr/utils/ppocr_keys_v1.txt +0 -6623
- pyxlpr/ppocr/utils/profiler.py +0 -110
- pyxlpr/ppocr/utils/save_load.py +0 -150
- pyxlpr/ppocr/utils/stats.py +0 -72
- pyxlpr/ppocr/utils/utility.py +0 -80
- pyxlpr/ppstructure/__init__.py +0 -13
- pyxlpr/ppstructure/predict_system.py +0 -187
- pyxlpr/ppstructure/table/__init__.py +0 -13
- pyxlpr/ppstructure/table/eval_table.py +0 -72
- pyxlpr/ppstructure/table/matcher.py +0 -192
- pyxlpr/ppstructure/table/predict_structure.py +0 -136
- pyxlpr/ppstructure/table/predict_table.py +0 -221
- pyxlpr/ppstructure/table/table_metric/__init__.py +0 -16
- pyxlpr/ppstructure/table/table_metric/parallel.py +0 -51
- pyxlpr/ppstructure/table/table_metric/table_metric.py +0 -247
- pyxlpr/ppstructure/table/tablepyxl/__init__.py +0 -13
- pyxlpr/ppstructure/table/tablepyxl/style.py +0 -283
- pyxlpr/ppstructure/table/tablepyxl/tablepyxl.py +0 -118
- pyxlpr/ppstructure/utility.py +0 -71
- pyxlpr/xlai.py +0 -10
- /pyxllib/{ext/autogui → autogui}/virtualkey.py +0 -0
- {pyxllib-0.3.96.dist-info → pyxllib-0.3.197.dist-info/licenses}/LICENSE +0 -0
pyxlpr/data/imtextline.py
DELETED
@@ -1,473 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
# @Author : 陈坤泽
|
4
|
-
# @Email : 877362867@qq.com
|
5
|
-
# @Date : 2020/11/17
|
6
|
-
|
7
|
-
""" 图片文本行标注相关处理
|
8
|
-
"""
|
9
|
-
|
10
|
-
from pyxllib.xlcv import *
|
11
|
-
|
12
|
-
from functools import reduce
|
13
|
-
|
14
|
-
from shapely.geometry import MultiPolygon
|
15
|
-
|
16
|
-
from pyxllib.algo.geo import split_vector_interval
|
17
|
-
from pyxllib.algo.disjoint import disjoint_set
|
18
|
-
from pyxllib.algo.shapelylib import ShapelyPolygon
|
19
|
-
|
20
|
-
|
21
|
-
class TextlineShape:
|
22
|
-
""" 一个文本行标注对象
|
23
|
-
|
24
|
-
这里的基础功能主要是几何关系计算,可以继承类后扩展其他功能
|
25
|
-
"""
|
26
|
-
|
27
|
-
def __init__(self, box, *, shrink_bound=False):
|
28
|
-
"""
|
29
|
-
:param box: 可以转成Polygon的数据类型
|
30
|
-
:param shrink_bound: 倾斜度过大的文本框,需要特殊处理,把外接矩形缩小会更准确些。
|
31
|
-
使用这个参数后,斜的框可以当成矩形框理解、处理
|
32
|
-
|
33
|
-
详细文档:https://www.yuque.com/xlpr/pyxllib/textlineshape
|
34
|
-
"""
|
35
|
-
self.polygon = ShapelyPolygon.gen(box)
|
36
|
-
self.bounds = self.polygon.bounds
|
37
|
-
if shrink_bound:
|
38
|
-
b = self.bounds
|
39
|
-
total_area = (b[2] - b[0]) * (b[3] - b[1])
|
40
|
-
# 缩放比例
|
41
|
-
self.bounds = bound_scale(self.bounds, self.polygon.area / total_area)
|
42
|
-
# self.bounds = bound_scale(self.bounds, 0.5)
|
43
|
-
|
44
|
-
self.minx, self.maxx = self.bounds[0], self.bounds[2]
|
45
|
-
self.width = self.maxx - self.minx
|
46
|
-
self.miny, self.maxy = self.bounds[1], self.bounds[3]
|
47
|
-
self.height = self.maxy - self.miny
|
48
|
-
self.centroid = self.polygon.centroid
|
49
|
-
|
50
|
-
def in_the_same_line(self, other):
|
51
|
-
""" 两个框在同一个文本行(一般特指在同一水平行的文本) """
|
52
|
-
if other.miny < self.centroid.y < other.maxy:
|
53
|
-
return True
|
54
|
-
elif self.miny < other.centroid.y < self.maxy:
|
55
|
-
return True
|
56
|
-
else:
|
57
|
-
return False
|
58
|
-
|
59
|
-
def in_the_same_column(self, other):
|
60
|
-
if other.minx < self.centroid.x < other.maxx:
|
61
|
-
return True
|
62
|
-
elif self.minx < other.centroid.x < self.maxx:
|
63
|
-
return True
|
64
|
-
else:
|
65
|
-
return False
|
66
|
-
|
67
|
-
def is_lr_intersect(self, other, gap=5):
|
68
|
-
""" 左右相交
|
69
|
-
"""
|
70
|
-
if other.minx - gap <= self.minx <= other.maxx + gap:
|
71
|
-
return True
|
72
|
-
elif other.minx - gap <= self.maxx <= other.maxx + gap:
|
73
|
-
return True
|
74
|
-
else:
|
75
|
-
return False
|
76
|
-
|
77
|
-
def is_tb_intersect(self, other, gap=5):
|
78
|
-
""" 上下相交
|
79
|
-
"""
|
80
|
-
# 这个 gap 规则是不动产的,不能放在通用规则里
|
81
|
-
# gap = min(50, self.height / 2, other.height / 2) # 允许的最大间距,默认按照最小的高,但还要再设置一个50的上限
|
82
|
-
if other.miny - gap <= self.miny <= other.maxy + gap:
|
83
|
-
return True
|
84
|
-
elif other.miny - gap <= self.maxy <= other.maxy + gap:
|
85
|
-
return True
|
86
|
-
else:
|
87
|
-
return False
|
88
|
-
|
89
|
-
def is_intersect(self, other):
|
90
|
-
return self.polygon.intersects(other)
|
91
|
-
|
92
|
-
def __add__(self, other):
|
93
|
-
""" 合并两个文本行 """
|
94
|
-
box = rect2polygon(MultiPolygon([self.polygon, other.polygon]).bounds)
|
95
|
-
return TextlineShape(box)
|
96
|
-
|
97
|
-
def __lt__(self, other):
|
98
|
-
""" 框的排序准则 """
|
99
|
-
if self.in_the_same_line(other):
|
100
|
-
return self.centroid.x < other.centroid.x
|
101
|
-
else:
|
102
|
-
return self.centroid.y < other.centroid.y
|
103
|
-
|
104
|
-
@classmethod
|
105
|
-
def merge(cls, shapes):
|
106
|
-
""" 将同张图片里的多个shape进行合并 """
|
107
|
-
# 1 对文本框分组
|
108
|
-
shape_groups = disjoint_set(shapes, lambda x, y: x.is_intersect(y))
|
109
|
-
|
110
|
-
# 2 合并文本内容
|
111
|
-
new_shapes = []
|
112
|
-
for group in shape_groups:
|
113
|
-
shape = reduce(lambda x, y: x + y, sorted(group))
|
114
|
-
new_shapes.append(shape)
|
115
|
-
return new_shapes
|
116
|
-
|
117
|
-
|
118
|
-
def im_textline_split(im, maxsplit=None, minwidth=3):
|
119
|
-
""" 这是最基础版本的示例:比较干净,白底黑字,没有太大倾斜的处理情况
|
120
|
-
|
121
|
-
一般各种特殊任务的数据,需要根据具体任务定制、修改该函数
|
122
|
-
"""
|
123
|
-
img = xlcv.read(im, 0)
|
124
|
-
m = np.mean(img)
|
125
|
-
# 比较干净的图可以这样,直接做二值化,并且对二值化中的图要求比较高,基本不能出现一个文字的像素
|
126
|
-
bi = img < m
|
127
|
-
vec = bi.sum(axis=0)
|
128
|
-
return split_vector_interval(vec, maxsplit=maxsplit, minwidth=minwidth)
|
129
|
-
|
130
|
-
|
131
|
-
def merge_labels_by_widths(labels, widths, sep=' '):
|
132
|
-
""" 一组数量不少于len(widths)的labels,参照widths给的每一部分权重,合并文本内容
|
133
|
-
|
134
|
-
算是和图片分割配套的相关功能,往往文本内容要跟着图片的切割情况进行拆分
|
135
|
-
|
136
|
-
这个算法其实也可以用来做拆分,比如要把'abcdefg'拆成[20, 30]的两段,
|
137
|
-
可以用list先把前者变成单字符的list就行了 ['a', 'b', 'c', 'd', 'e', 'f', 'g']
|
138
|
-
|
139
|
-
:param labels: 一组字符串
|
140
|
-
:param widths: 一组参考宽度
|
141
|
-
:param sep: 拼接的时候使用的间隔符
|
142
|
-
:return: 尽可能拼接出符合参考宽度的一组字符串
|
143
|
-
|
144
|
-
>>> merge_labels_by_widths(['aa', 'bbb', 'c', 'ccc'], [10,10,20])
|
145
|
-
['aa', 'bbb', 'c ccc']
|
146
|
-
>>> merge_labels_by_widths(['a', 'a', 'b', 'b'], [13, 10, 10])
|
147
|
-
['a a', 'b', 'b']
|
148
|
-
>>> merge_labels_by_widths(['a', 'a', 'b', 'b'], [10, 10, 10])
|
149
|
-
['a', 'a', 'b b']
|
150
|
-
>>> merge_labels_by_widths(['a', 'b', 'c'], [11, 12, 13])
|
151
|
-
['a', 'b', 'c']
|
152
|
-
>>> merge_labels_by_widths(['a'], [10, 12]) # labels比widths少时,后面的统一用''填充
|
153
|
-
['a', '']
|
154
|
-
>>> merge_labels_by_widths([''], [10, 12])
|
155
|
-
['', '']
|
156
|
-
|
157
|
-
TODO 感觉实现的代码还有点凌乱,可能还有改进空间
|
158
|
-
"""
|
159
|
-
# 1 统一量纲
|
160
|
-
if len(labels) < len(widths):
|
161
|
-
labels += [''] * (len(widths) - len(labels))
|
162
|
-
label_widths = [strwidth(x) for x in labels]
|
163
|
-
n_label = len(labels)
|
164
|
-
assert sum(widths), 'widths必须要有权重值'
|
165
|
-
r = sum(label_widths) / sum(widths)
|
166
|
-
widths = [r * w for w in widths]
|
167
|
-
|
168
|
-
# 2 用贪心算法合并
|
169
|
-
need_merge = n_label - len(widths)
|
170
|
-
i, k, new_labels = 0, 0, []
|
171
|
-
for w in widths:
|
172
|
-
if k < need_merge:
|
173
|
-
label_width = label_widths[i]
|
174
|
-
j = i + 1
|
175
|
-
while j < n_label and k < need_merge and abs(label_width + label_widths[j] - w) < abs(label_width - w):
|
176
|
-
label_width += label_widths[j]
|
177
|
-
j += 1
|
178
|
-
k += 1
|
179
|
-
new_labels.append(sep.join(labels[i:j]))
|
180
|
-
i = j
|
181
|
-
elif k == need_merge:
|
182
|
-
new_labels += labels[i:]
|
183
|
-
i = n_label
|
184
|
-
break
|
185
|
-
# 还有未匹配使用的,全部拼接到末尾
|
186
|
-
if i + 1 <= n_label:
|
187
|
-
new_labels[-1] = ' '.join([new_labels[-1]] + labels[i:])
|
188
|
-
|
189
|
-
return new_labels
|
190
|
-
|
191
|
-
|
192
|
-
class TextlineAnnotation(TextlineShape):
|
193
|
-
""" coco格式的标注 """
|
194
|
-
|
195
|
-
def __init__(self, anno):
|
196
|
-
super().__init__(xywh2ltrb(anno['bbox']))
|
197
|
-
self.anno = anno
|
198
|
-
|
199
|
-
def __add__(self, other):
|
200
|
-
""" 两个coco标注的合并 """
|
201
|
-
# 以 self 框的属性为基准
|
202
|
-
anno, anno2 = self.anno.copy(), other.anno
|
203
|
-
|
204
|
-
# 合并后的 bbox
|
205
|
-
anno['bbox'] = ltrb2xywh(MultiPolygon([self.polygon, other.polygon]).bounds)
|
206
|
-
|
207
|
-
# 合并分割属性
|
208
|
-
if anno2['segmentation']:
|
209
|
-
anno['segmentation'] += anno2['segmentation']
|
210
|
-
|
211
|
-
# 合并 label
|
212
|
-
if 'label' in anno or 'label' in anno2:
|
213
|
-
text = anno2.get('label', '')
|
214
|
-
if text: text = ' ' + text
|
215
|
-
anno['label'] = anno.get('label', '') + text
|
216
|
-
|
217
|
-
return TextlineAnnotation(anno)
|
218
|
-
|
219
|
-
@classmethod
|
220
|
-
def merge(cls, annotations):
|
221
|
-
""" 合并同一文本行上相近、相交的文本标注 """
|
222
|
-
# 1 转 shape 格式
|
223
|
-
shapes = [cls(x) for x in annotations]
|
224
|
-
|
225
|
-
# 2 对文本框分组
|
226
|
-
shape_groups = disjoint_set(shapes, lambda x, y: x.in_the_same_line(y) and x.is_lr_intersect(y))
|
227
|
-
|
228
|
-
# 3 合并文本内容
|
229
|
-
new_shapes = []
|
230
|
-
for group in shape_groups:
|
231
|
-
shape = reduce(lambda x, y: x + y, sorted(group))
|
232
|
-
new_shapes.append(shape)
|
233
|
-
|
234
|
-
# 4 转回 annotations 格式
|
235
|
-
return [x.anno for x in new_shapes]
|
236
|
-
|
237
|
-
@classmethod
|
238
|
-
def split(cls, im, annotations, split_func=im_textline_split):
|
239
|
-
""" coco标注格式的处理,将图片im对应的文本行标注结果 annos,按照空白背景切分开
|
240
|
-
|
241
|
-
:param im: 图片数据
|
242
|
-
:param annotations: coco 格式的 annotations
|
243
|
-
:param split_func: 分析图片数据时所用投影分析函数,需要返回带有文本内容的列区间
|
244
|
-
|
245
|
-
如果有label文本,会跟着一起切割处理
|
246
|
-
|
247
|
-
:return:
|
248
|
-
新的annotations数组
|
249
|
-
注意,有的图片处理起来会有问题,此时会返回 [],建议丢弃这些图片
|
250
|
-
"""
|
251
|
-
new_annos = []
|
252
|
-
for anno in annotations:
|
253
|
-
# 仅测试某个特定的 anno
|
254
|
-
# if anno['id'] != 2345:
|
255
|
-
# continue
|
256
|
-
|
257
|
-
x, y, w, h = anno['bbox']
|
258
|
-
_, t, _, b = xywh2ltrb(anno['bbox'])
|
259
|
-
subim = xlcv.get_sub(im, xywh2ltrb(anno['bbox']))
|
260
|
-
spans = split_func(subim)
|
261
|
-
# print(anno['label'], spans)
|
262
|
-
# 左右放宽一些,并且计算基于全图的绝对坐标
|
263
|
-
spans = [[x + max(span[0] - 3, 0), x + min(span[1] + 3, w)] for span in spans]
|
264
|
-
|
265
|
-
if len(spans) == 0:
|
266
|
-
# 一些特殊情况,很可能是框标的位置偏了,质量不行
|
267
|
-
return [] # 整张图的标注都不要了,直接返回空值
|
268
|
-
elif len(spans) == 1:
|
269
|
-
l, r = spans[0]
|
270
|
-
a = copy.copy(anno)
|
271
|
-
a['bbox'] = ltrb2xywh([l, t, r, b])
|
272
|
-
new_annos.append(a)
|
273
|
-
else: # 拆分出了多段
|
274
|
-
# 这里 label 最好也要拆一下
|
275
|
-
labels = anno['label'].split()
|
276
|
-
if len(labels) > len(spans):
|
277
|
-
labels = merge_labels_by_widths(labels, [(span[1] - span[0]) for span in spans])
|
278
|
-
elif len(labels) < len(spans):
|
279
|
-
# imwrite(subim, 'subim.jpg')
|
280
|
-
# print(x, y, w, h)
|
281
|
-
# 要检查出现这些情况的所有数据:labels的少于spans
|
282
|
-
get_xllog().warning(DPrint.format({'$异常': 'len(labels)<len(spans)',
|
283
|
-
'labels': labels, 'spans': spans}))
|
284
|
-
# 这种情况先保留原始框
|
285
|
-
new_annos.append(anno)
|
286
|
-
continue
|
287
|
-
|
288
|
-
for span, label in zip(spans, labels):
|
289
|
-
l, r = span
|
290
|
-
a = copy.copy(anno)
|
291
|
-
a['bbox'] = ltrb2xywh([l, t, r, b])
|
292
|
-
a['label'] = label
|
293
|
-
new_annos.append(a)
|
294
|
-
|
295
|
-
return new_annos
|
296
|
-
|
297
|
-
|
298
|
-
class TextlineSpliter:
|
299
|
-
"""
|
300
|
-
TextString2016、Casia 基本都可以直接用
|
301
|
-
"""
|
302
|
-
|
303
|
-
@classmethod
|
304
|
-
def spliter(cls, im, maxsplit=None, minwidth=3):
|
305
|
-
""" (核心处理接口功能)比较干净,白底黑字,没有太大倾斜的处理情况
|
306
|
-
如果有其他特殊情况,记得要重置这个处理方式,见EnglishWord
|
307
|
-
|
308
|
-
:param im: 输入图片路径,或者np.ndarray矩阵
|
309
|
-
:param maxsplit: 最大切分数量,即最多得到几个子区间
|
310
|
-
没设置的时候,会对所有满足条件的情况进行切割
|
311
|
-
:param minwidth: 每个切分位置最小具有的宽度
|
312
|
-
:return: [(l, r), (l, r), ...] 每一段文本的左右区间
|
313
|
-
|
314
|
-
详细文档:https://www.yuque.com/xlpr/data/cx6xm5
|
315
|
-
"""
|
316
|
-
img = xlcv.read(im, 0)
|
317
|
-
m = np.mean(img)
|
318
|
-
# 比较干净的图可以这样,直接做二值化,并且对二值化中的图要求比较高,基本不能出现一个文字的像素
|
319
|
-
bi = img < m
|
320
|
-
vec = bi.sum(axis=0) - 2
|
321
|
-
return split_vector_interval(vec, maxsplit=maxsplit, minwidth=minwidth)
|
322
|
-
|
323
|
-
@classmethod
|
324
|
-
def split_img(cls, file, maxsplit=None, minwidth=3):
|
325
|
-
"""
|
326
|
-
:param file: 输入np.ndarray图片,或者pil图片,或者图片路径
|
327
|
-
:param maxsplit:
|
328
|
-
:param minwidth:
|
329
|
-
:return: 返回切分后的np.ndarray格式的图片清单
|
330
|
-
"""
|
331
|
-
img = xlcv.read(file)
|
332
|
-
vec = cls.spliter(img, maxsplit, minwidth)
|
333
|
-
imgs = [img[:, l:r + 1] for l, r in vec]
|
334
|
-
return imgs
|
335
|
-
|
336
|
-
@classmethod
|
337
|
-
def spliter_img(cls, file, maxsplit=None, minwidth=3):
|
338
|
-
""" 可视化,测试一张图的切分效果
|
339
|
-
如果不是测试self.root里的图片,可以直接输入一个绝对路径的图片file
|
340
|
-
"""
|
341
|
-
im = xlcv.read(file, 0)
|
342
|
-
cols = cls.spliter(im, maxsplit=maxsplit, minwidth=minwidth)
|
343
|
-
|
344
|
-
lines = [[c, 0, c, im.shape[0] - 1] for c in np.array(cols, dtype=int).reshape(-1)]
|
345
|
-
# 偶数区间划为为红色
|
346
|
-
im2 = xlcv.lines(im, lines[::4], [0, 0, 255])
|
347
|
-
im2 = xlcv.lines(im2, lines[1::4], [0, 0, 255])
|
348
|
-
# 奇数区间划分为蓝色
|
349
|
-
im2 = xlcv.lines(im2, lines[2::4], [255, 0, 0])
|
350
|
-
im2 = xlcv.lines(im2, lines[3::4], [255, 0, 0])
|
351
|
-
|
352
|
-
return im2
|
353
|
-
|
354
|
-
@classmethod
|
355
|
-
def show_spliter_imgs(cls, dir_state, *, save=None, show=True):
|
356
|
-
ImagesDir.debug_func(dir_state, # 随机抽取10张图片
|
357
|
-
lambda img_file: cls.spliter_img(img_file, maxsplit=None, minwidth=3), # 执行功能
|
358
|
-
save=save, # 结果保存位置
|
359
|
-
show=show) # 是否imshow结果图
|
360
|
-
|
361
|
-
@classmethod
|
362
|
-
def relabel_labelfile(cls, p, maxsplit=None, minwidth=3, imgdir='images'):
|
363
|
-
""" 对一份文件里标注的所有图片,批量进行转换,并加入一列新的坐标数据 """
|
364
|
-
lines = p.read().splitlines()
|
365
|
-
res = []
|
366
|
-
for line in lines:
|
367
|
-
line = line.split(maxsplit=1)
|
368
|
-
im = xlcv.read(p.parent / f'{imgdir}/{line[0]}', 0)
|
369
|
-
cols = cls.spliter(im, maxsplit, minwidth)
|
370
|
-
line.append(' '.join(map(str, np.array(cols, dtype=int).reshape(-1))))
|
371
|
-
res.append('\t'.join(line))
|
372
|
-
content = '\n'.join(res)
|
373
|
-
p.with_stem(p.stem + f'+text_interval-minw={minwidth}').write(content, if_exists='replace')
|
374
|
-
|
375
|
-
@classmethod
|
376
|
-
def relabel_labelfiles(cls, root, maxsplit=None, minwidth=3, imgdir='images'):
|
377
|
-
""" 切分所有的文件
|
378
|
-
:param root: 根目录
|
379
|
-
:param imgdir: 图片所在子目录名称
|
380
|
-
:return:
|
381
|
-
"""
|
382
|
-
root = Dir(root)
|
383
|
-
cls.relabel_labelfile(root / 'val.txt', maxsplit, minwidth, imgdir)
|
384
|
-
cls.relabel_labelfile(root / 'test.txt', maxsplit, minwidth, imgdir)
|
385
|
-
cls.relabel_labelfile(root / 'train.txt', maxsplit, minwidth, imgdir)
|
386
|
-
|
387
|
-
@classmethod
|
388
|
-
def split_labelfiles(cls, src, dst, minwidth=3, imgdir='images'):
|
389
|
-
def func(name):
|
390
|
-
""" 对一份文件里标注的所有图片,批量进行转换,并加入一列新的坐标数据
|
391
|
-
|
392
|
-
p 原来的.txt标注文件路径
|
393
|
-
p_im 原来的图片路径
|
394
|
-
q 切割后的.txt标注文件路径
|
395
|
-
q_im 切割后的图片路径
|
396
|
-
|
397
|
-
"""
|
398
|
-
p, q = File(name, src), File(name, dst)
|
399
|
-
if not p: return
|
400
|
-
lines = p.read().splitlines()
|
401
|
-
res = []
|
402
|
-
for line in lines:
|
403
|
-
# 获得图片文件,切分的单词
|
404
|
-
line = line.split(maxsplit=1)
|
405
|
-
if len(line) < 2: continue
|
406
|
-
|
407
|
-
p_im = File(p.parent / f'{imgdir}/{line[0]}')
|
408
|
-
# print(p_im)
|
409
|
-
words = line[1].split()
|
410
|
-
|
411
|
-
if len(words) < 2:
|
412
|
-
q_im = File(f'{imgdir}/{p_im.name}', dst)
|
413
|
-
p_im.copy(q_im)
|
414
|
-
res.append(f'{q_im.name}\t{words[0]}')
|
415
|
-
else:
|
416
|
-
# 切分图片
|
417
|
-
imgs = cls.split_img(p_im, len(words), minwidth)
|
418
|
-
# 重新生成标注
|
419
|
-
for k, im in enumerate(imgs):
|
420
|
-
q_im = File(f'{imgdir}/{p_im.stem}_{k}', dst, suffix=p_im.suffix)
|
421
|
-
xlcv.write(im, q_im, if_exists='replace')
|
422
|
-
res.append(f'{q_im.name}\t{words[k]}')
|
423
|
-
content = '\n'.join(res)
|
424
|
-
q.write(content, if_exists='replace')
|
425
|
-
|
426
|
-
src, dst = Dir(src), Dir(dst)
|
427
|
-
for name in ['val.txt', 'test.txt', 'train.txt']:
|
428
|
-
# for name in ['append.txt']:
|
429
|
-
# for name in ['val.txt']:
|
430
|
-
func(name)
|
431
|
-
|
432
|
-
|
433
|
-
class EnglishWordTLS(TextlineSpliter):
|
434
|
-
@classmethod
|
435
|
-
def spliter(cls, img, maxsplit=None, minwidth=3):
|
436
|
-
""" 同 TextLineSpliter.spliter
|
437
|
-
这个功能针对处理 带噪声干扰的白底黑字图片
|
438
|
-
"""
|
439
|
-
img = xlcv.read(img, 0)
|
440
|
-
h, w = img.shape
|
441
|
-
vec = img[int(h / 3):int(2 * h / 3)].mean(axis=0) # 只用上下中间的三分之一
|
442
|
-
vec = vec.mean() - vec + 5 # 文字变正,背景变负;因为背景有很多黑点噪声,还要多减一
|
443
|
-
return split_vector_interval(vec, maxsplit=maxsplit, minwidth=minwidth)
|
444
|
-
|
445
|
-
|
446
|
-
class TLSMain:
|
447
|
-
def textstring2016(self):
|
448
|
-
# d = TextLineSpliter('/home/datasets/textGroup/TextString2016/')
|
449
|
-
d = r'D:\datasets\TextString2016'
|
450
|
-
# ob.test('images/T0000-03.jpg', minwidth=3)
|
451
|
-
TextlineSpliter.relabel_labelfiles(d, minwidth=3)
|
452
|
-
|
453
|
-
def casia(self):
|
454
|
-
os.chdir('/home/datasets/textGroup/casia/offlinehw/CASIA-HWDB2.x_pngImg_line')
|
455
|
-
TextlineSpliter.relabel_labelfiles('CASIA-HWDB2.0_savePTTSImg_line', minwidth=3)
|
456
|
-
TextlineSpliter.relabel_labelfiles('CASIA-HWDB2.1_savePTTSImg_line', minwidth=3)
|
457
|
-
TextlineSpliter.relabel_labelfiles('CASIA-HWDB2.2_savePTTSImg_line', minwidth=3)
|
458
|
-
|
459
|
-
def english_word(self):
|
460
|
-
# ob.test('total/1.jpg', 4, 3)
|
461
|
-
EnglishWordTLS.relabel_labelfiles(r'D:\datasets\english-word', minwidth=10, imgdir='total')
|
462
|
-
|
463
|
-
def sroie(self):
|
464
|
-
path = Dir('SROIE2019/task1train_626p_repo/task1train_626p_patch/')
|
465
|
-
root = Dir(path, '/home/datasets/textGroup')
|
466
|
-
TextlineSpliter.show_spliter_imgs(root.select('images/*.png').sample(10),
|
467
|
-
save=File(path / 'temp', '/home/datasets/textGroup'),
|
468
|
-
show=False)
|
469
|
-
|
470
|
-
|
471
|
-
if __name__ == '__main__':
|
472
|
-
with TicToc(__name__):
|
473
|
-
pass
|