PyPI - pyxllib - Versions diffs - 0.3.96__py3-none-any.whl → 0.3.197__py3-none-any.whl - Mend

pyxllib 0.3.96py3-none-any.whl → 0.3.197py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (306) hide show

pyxllib/algo/geo.py +12 -0
pyxllib/algo/intervals.py +1 -1
pyxllib/algo/matcher.py +78 -0
pyxllib/algo/pupil.py +187 -19
pyxllib/algo/specialist.py +2 -1
pyxllib/algo/stat.py +38 -2
{pyxlpr → pyxllib/autogui}/__init__.py +1 -1
pyxllib/autogui/activewin.py +246 -0
pyxllib/autogui/all.py +9 -0
pyxllib/{ext/autogui → autogui}/autogui.py +40 -11
pyxllib/autogui/uiautolib.py +362 -0
pyxllib/autogui/wechat.py +827 -0
pyxllib/autogui/wechat_msg.py +421 -0
pyxllib/autogui/wxautolib.py +84 -0
pyxllib/cv/slidercaptcha.py +137 -0
pyxllib/data/echarts.py +123 -12
pyxllib/data/jsonlib.py +89 -0
pyxllib/data/pglib.py +514 -30
pyxllib/data/sqlite.py +231 -4
pyxllib/ext/JLineViewer.py +14 -1
pyxllib/ext/drissionlib.py +277 -0
pyxllib/ext/kq5034lib.py +0 -1594
pyxllib/ext/robustprocfile.py +497 -0
pyxllib/ext/unixlib.py +6 -5
pyxllib/ext/utools.py +108 -95
pyxllib/ext/webhook.py +32 -14
pyxllib/ext/wjxlib.py +88 -0
pyxllib/ext/wpsapi.py +124 -0
pyxllib/ext/xlwork.py +9 -0
pyxllib/ext/yuquelib.py +1003 -71
pyxllib/file/docxlib.py +1 -1
pyxllib/file/libreoffice.py +165 -0
pyxllib/file/movielib.py +9 -0
pyxllib/file/packlib/__init__.py +112 -75
pyxllib/file/pdflib.py +1 -1
pyxllib/file/pupil.py +1 -1
pyxllib/file/specialist/dirlib.py +1 -1
pyxllib/file/specialist/download.py +10 -3
pyxllib/file/specialist/filelib.py +266 -55
pyxllib/file/xlsxlib.py +205 -50
pyxllib/file/xlsyncfile.py +341 -0
pyxllib/prog/cachetools.py +64 -0
pyxllib/prog/filelock.py +42 -0
pyxllib/prog/multiprogs.py +940 -0
pyxllib/prog/newbie.py +9 -2
pyxllib/prog/pupil.py +129 -60
pyxllib/prog/specialist/__init__.py +176 -2
pyxllib/prog/specialist/bc.py +5 -2
pyxllib/prog/specialist/browser.py +11 -2
pyxllib/prog/specialist/datetime.py +68 -0
pyxllib/prog/specialist/tictoc.py +12 -13
pyxllib/prog/specialist/xllog.py +5 -5
pyxllib/prog/xlosenv.py +7 -0
pyxllib/text/airscript.js +744 -0
pyxllib/text/charclasslib.py +17 -5
pyxllib/text/jiebalib.py +6 -3
pyxllib/text/jinjalib.py +32 -0
pyxllib/text/jsa_ai_prompt.md +271 -0
pyxllib/text/jscode.py +159 -4
pyxllib/text/nestenv.py +1 -1
pyxllib/text/newbie.py +12 -0
pyxllib/text/pupil/common.py +26 -0
pyxllib/text/specialist/ptag.py +2 -2
pyxllib/text/templates/echart_base.html +11 -0
pyxllib/text/templates/highlight_code.html +17 -0
pyxllib/text/templates/latex_editor.html +103 -0
pyxllib/text/xmllib.py +76 -14
pyxllib/xl.py +2 -1
pyxllib-0.3.197.dist-info/METADATA +48 -0
pyxllib-0.3.197.dist-info/RECORD +126 -0
{pyxllib-0.3.96.dist-info → pyxllib-0.3.197.dist-info}/WHEEL +1 -2
pyxllib/ext/autogui/__init__.py +0 -8
pyxllib-0.3.96.dist-info/METADATA +0 -51
pyxllib-0.3.96.dist-info/RECORD +0 -333
pyxllib-0.3.96.dist-info/top_level.txt +0 -2
pyxlpr/ai/__init__.py +0 -5
pyxlpr/ai/clientlib.py +0 -1281
pyxlpr/ai/specialist.py +0 -286
pyxlpr/ai/torch_app.py +0 -172
pyxlpr/ai/xlpaddle.py +0 -655
pyxlpr/ai/xltorch.py +0 -705
pyxlpr/data/__init__.py +0 -11
pyxlpr/data/coco.py +0 -1325
pyxlpr/data/datacls.py +0 -365
pyxlpr/data/datasets.py +0 -200
pyxlpr/data/gptlib.py +0 -1291
pyxlpr/data/icdar/__init__.py +0 -96
pyxlpr/data/icdar/deteval.py +0 -377
pyxlpr/data/icdar/icdar2013.py +0 -341
pyxlpr/data/icdar/iou.py +0 -340
pyxlpr/data/icdar/rrc_evaluation_funcs_1_1.py +0 -463
pyxlpr/data/imtextline.py +0 -473
pyxlpr/data/labelme.py +0 -866
pyxlpr/data/removeline.py +0 -179
pyxlpr/data/specialist.py +0 -57
pyxlpr/eval/__init__.py +0 -85
pyxlpr/paddleocr.py +0 -776
pyxlpr/ppocr/__init__.py +0 -15
pyxlpr/ppocr/configs/rec/multi_language/generate_multi_language_configs.py +0 -226
pyxlpr/ppocr/data/__init__.py +0 -135
pyxlpr/ppocr/data/imaug/ColorJitter.py +0 -26
pyxlpr/ppocr/data/imaug/__init__.py +0 -67
pyxlpr/ppocr/data/imaug/copy_paste.py +0 -170
pyxlpr/ppocr/data/imaug/east_process.py +0 -437
pyxlpr/ppocr/data/imaug/gen_table_mask.py +0 -244
pyxlpr/ppocr/data/imaug/iaa_augment.py +0 -114
pyxlpr/ppocr/data/imaug/label_ops.py +0 -789
pyxlpr/ppocr/data/imaug/make_border_map.py +0 -184
pyxlpr/ppocr/data/imaug/make_pse_gt.py +0 -106
pyxlpr/ppocr/data/imaug/make_shrink_map.py +0 -126
pyxlpr/ppocr/data/imaug/operators.py +0 -433
pyxlpr/ppocr/data/imaug/pg_process.py +0 -906
pyxlpr/ppocr/data/imaug/randaugment.py +0 -143
pyxlpr/ppocr/data/imaug/random_crop_data.py +0 -239
pyxlpr/ppocr/data/imaug/rec_img_aug.py +0 -533
pyxlpr/ppocr/data/imaug/sast_process.py +0 -777
pyxlpr/ppocr/data/imaug/text_image_aug/__init__.py +0 -17
pyxlpr/ppocr/data/imaug/text_image_aug/augment.py +0 -120
pyxlpr/ppocr/data/imaug/text_image_aug/warp_mls.py +0 -168
pyxlpr/ppocr/data/lmdb_dataset.py +0 -115
pyxlpr/ppocr/data/pgnet_dataset.py +0 -104
pyxlpr/ppocr/data/pubtab_dataset.py +0 -107
pyxlpr/ppocr/data/simple_dataset.py +0 -372
pyxlpr/ppocr/losses/__init__.py +0 -61
pyxlpr/ppocr/losses/ace_loss.py +0 -52
pyxlpr/ppocr/losses/basic_loss.py +0 -135
pyxlpr/ppocr/losses/center_loss.py +0 -88
pyxlpr/ppocr/losses/cls_loss.py +0 -30
pyxlpr/ppocr/losses/combined_loss.py +0 -67
pyxlpr/ppocr/losses/det_basic_loss.py +0 -208
pyxlpr/ppocr/losses/det_db_loss.py +0 -80
pyxlpr/ppocr/losses/det_east_loss.py +0 -63
pyxlpr/ppocr/losses/det_pse_loss.py +0 -149
pyxlpr/ppocr/losses/det_sast_loss.py +0 -121
pyxlpr/ppocr/losses/distillation_loss.py +0 -272
pyxlpr/ppocr/losses/e2e_pg_loss.py +0 -140
pyxlpr/ppocr/losses/kie_sdmgr_loss.py +0 -113
pyxlpr/ppocr/losses/rec_aster_loss.py +0 -99
pyxlpr/ppocr/losses/rec_att_loss.py +0 -39
pyxlpr/ppocr/losses/rec_ctc_loss.py +0 -44
pyxlpr/ppocr/losses/rec_enhanced_ctc_loss.py +0 -70
pyxlpr/ppocr/losses/rec_nrtr_loss.py +0 -30
pyxlpr/ppocr/losses/rec_sar_loss.py +0 -28
pyxlpr/ppocr/losses/rec_srn_loss.py +0 -47
pyxlpr/ppocr/losses/table_att_loss.py +0 -109
pyxlpr/ppocr/metrics/__init__.py +0 -44
pyxlpr/ppocr/metrics/cls_metric.py +0 -45
pyxlpr/ppocr/metrics/det_metric.py +0 -82
pyxlpr/ppocr/metrics/distillation_metric.py +0 -73
pyxlpr/ppocr/metrics/e2e_metric.py +0 -86
pyxlpr/ppocr/metrics/eval_det_iou.py +0 -274
pyxlpr/ppocr/metrics/kie_metric.py +0 -70
pyxlpr/ppocr/metrics/rec_metric.py +0 -75
pyxlpr/ppocr/metrics/table_metric.py +0 -50
pyxlpr/ppocr/modeling/architectures/__init__.py +0 -32
pyxlpr/ppocr/modeling/architectures/base_model.py +0 -88
pyxlpr/ppocr/modeling/architectures/distillation_model.py +0 -60
pyxlpr/ppocr/modeling/backbones/__init__.py +0 -54
pyxlpr/ppocr/modeling/backbones/det_mobilenet_v3.py +0 -268
pyxlpr/ppocr/modeling/backbones/det_resnet_vd.py +0 -246
pyxlpr/ppocr/modeling/backbones/det_resnet_vd_sast.py +0 -285
pyxlpr/ppocr/modeling/backbones/e2e_resnet_vd_pg.py +0 -265
pyxlpr/ppocr/modeling/backbones/kie_unet_sdmgr.py +0 -186
pyxlpr/ppocr/modeling/backbones/rec_mobilenet_v3.py +0 -138
pyxlpr/ppocr/modeling/backbones/rec_mv1_enhance.py +0 -258
pyxlpr/ppocr/modeling/backbones/rec_nrtr_mtb.py +0 -48
pyxlpr/ppocr/modeling/backbones/rec_resnet_31.py +0 -210
pyxlpr/ppocr/modeling/backbones/rec_resnet_aster.py +0 -143
pyxlpr/ppocr/modeling/backbones/rec_resnet_fpn.py +0 -307
pyxlpr/ppocr/modeling/backbones/rec_resnet_vd.py +0 -286
pyxlpr/ppocr/modeling/heads/__init__.py +0 -54
pyxlpr/ppocr/modeling/heads/cls_head.py +0 -52
pyxlpr/ppocr/modeling/heads/det_db_head.py +0 -118
pyxlpr/ppocr/modeling/heads/det_east_head.py +0 -121
pyxlpr/ppocr/modeling/heads/det_pse_head.py +0 -37
pyxlpr/ppocr/modeling/heads/det_sast_head.py +0 -128
pyxlpr/ppocr/modeling/heads/e2e_pg_head.py +0 -253
pyxlpr/ppocr/modeling/heads/kie_sdmgr_head.py +0 -206
pyxlpr/ppocr/modeling/heads/multiheadAttention.py +0 -163
pyxlpr/ppocr/modeling/heads/rec_aster_head.py +0 -393
pyxlpr/ppocr/modeling/heads/rec_att_head.py +0 -202
pyxlpr/ppocr/modeling/heads/rec_ctc_head.py +0 -88
pyxlpr/ppocr/modeling/heads/rec_nrtr_head.py +0 -826
pyxlpr/ppocr/modeling/heads/rec_sar_head.py +0 -402
pyxlpr/ppocr/modeling/heads/rec_srn_head.py +0 -280
pyxlpr/ppocr/modeling/heads/self_attention.py +0 -406
pyxlpr/ppocr/modeling/heads/table_att_head.py +0 -246
pyxlpr/ppocr/modeling/necks/__init__.py +0 -32
pyxlpr/ppocr/modeling/necks/db_fpn.py +0 -111
pyxlpr/ppocr/modeling/necks/east_fpn.py +0 -188
pyxlpr/ppocr/modeling/necks/fpn.py +0 -138
pyxlpr/ppocr/modeling/necks/pg_fpn.py +0 -314
pyxlpr/ppocr/modeling/necks/rnn.py +0 -92
pyxlpr/ppocr/modeling/necks/sast_fpn.py +0 -284
pyxlpr/ppocr/modeling/necks/table_fpn.py +0 -110
pyxlpr/ppocr/modeling/transforms/__init__.py +0 -28
pyxlpr/ppocr/modeling/transforms/stn.py +0 -135
pyxlpr/ppocr/modeling/transforms/tps.py +0 -308
pyxlpr/ppocr/modeling/transforms/tps_spatial_transformer.py +0 -156
pyxlpr/ppocr/optimizer/__init__.py +0 -61
pyxlpr/ppocr/optimizer/learning_rate.py +0 -228
pyxlpr/ppocr/optimizer/lr_scheduler.py +0 -49
pyxlpr/ppocr/optimizer/optimizer.py +0 -160
pyxlpr/ppocr/optimizer/regularizer.py +0 -52
pyxlpr/ppocr/postprocess/__init__.py +0 -55
pyxlpr/ppocr/postprocess/cls_postprocess.py +0 -33
pyxlpr/ppocr/postprocess/db_postprocess.py +0 -234
pyxlpr/ppocr/postprocess/east_postprocess.py +0 -143
pyxlpr/ppocr/postprocess/locality_aware_nms.py +0 -200
pyxlpr/ppocr/postprocess/pg_postprocess.py +0 -52
pyxlpr/ppocr/postprocess/pse_postprocess/__init__.py +0 -15
pyxlpr/ppocr/postprocess/pse_postprocess/pse/__init__.py +0 -29
pyxlpr/ppocr/postprocess/pse_postprocess/pse/setup.py +0 -14
pyxlpr/ppocr/postprocess/pse_postprocess/pse_postprocess.py +0 -118
pyxlpr/ppocr/postprocess/rec_postprocess.py +0 -654
pyxlpr/ppocr/postprocess/sast_postprocess.py +0 -355
pyxlpr/ppocr/tools/__init__.py +0 -14
pyxlpr/ppocr/tools/eval.py +0 -83
pyxlpr/ppocr/tools/export_center.py +0 -77
pyxlpr/ppocr/tools/export_model.py +0 -129
pyxlpr/ppocr/tools/infer/predict_cls.py +0 -151
pyxlpr/ppocr/tools/infer/predict_det.py +0 -300
pyxlpr/ppocr/tools/infer/predict_e2e.py +0 -169
pyxlpr/ppocr/tools/infer/predict_rec.py +0 -414
pyxlpr/ppocr/tools/infer/predict_system.py +0 -204
pyxlpr/ppocr/tools/infer/utility.py +0 -629
pyxlpr/ppocr/tools/infer_cls.py +0 -83
pyxlpr/ppocr/tools/infer_det.py +0 -134
pyxlpr/ppocr/tools/infer_e2e.py +0 -122
pyxlpr/ppocr/tools/infer_kie.py +0 -153
pyxlpr/ppocr/tools/infer_rec.py +0 -146
pyxlpr/ppocr/tools/infer_table.py +0 -107
pyxlpr/ppocr/tools/program.py +0 -596
pyxlpr/ppocr/tools/test_hubserving.py +0 -117
pyxlpr/ppocr/tools/train.py +0 -163
pyxlpr/ppocr/tools/xlprog.py +0 -748
pyxlpr/ppocr/utils/EN_symbol_dict.txt +0 -94
pyxlpr/ppocr/utils/__init__.py +0 -24
pyxlpr/ppocr/utils/dict/ar_dict.txt +0 -117
pyxlpr/ppocr/utils/dict/arabic_dict.txt +0 -162
pyxlpr/ppocr/utils/dict/be_dict.txt +0 -145
pyxlpr/ppocr/utils/dict/bg_dict.txt +0 -140
pyxlpr/ppocr/utils/dict/chinese_cht_dict.txt +0 -8421
pyxlpr/ppocr/utils/dict/cyrillic_dict.txt +0 -163
pyxlpr/ppocr/utils/dict/devanagari_dict.txt +0 -167
pyxlpr/ppocr/utils/dict/en_dict.txt +0 -63
pyxlpr/ppocr/utils/dict/fa_dict.txt +0 -136
pyxlpr/ppocr/utils/dict/french_dict.txt +0 -136
pyxlpr/ppocr/utils/dict/german_dict.txt +0 -143
pyxlpr/ppocr/utils/dict/hi_dict.txt +0 -162
pyxlpr/ppocr/utils/dict/it_dict.txt +0 -118
pyxlpr/ppocr/utils/dict/japan_dict.txt +0 -4399
pyxlpr/ppocr/utils/dict/ka_dict.txt +0 -153
pyxlpr/ppocr/utils/dict/korean_dict.txt +0 -3688
pyxlpr/ppocr/utils/dict/latin_dict.txt +0 -185
pyxlpr/ppocr/utils/dict/mr_dict.txt +0 -153
pyxlpr/ppocr/utils/dict/ne_dict.txt +0 -153
pyxlpr/ppocr/utils/dict/oc_dict.txt +0 -96
pyxlpr/ppocr/utils/dict/pu_dict.txt +0 -130
pyxlpr/ppocr/utils/dict/rs_dict.txt +0 -91
pyxlpr/ppocr/utils/dict/rsc_dict.txt +0 -134
pyxlpr/ppocr/utils/dict/ru_dict.txt +0 -125
pyxlpr/ppocr/utils/dict/ta_dict.txt +0 -128
pyxlpr/ppocr/utils/dict/table_dict.txt +0 -277
pyxlpr/ppocr/utils/dict/table_structure_dict.txt +0 -2759
pyxlpr/ppocr/utils/dict/te_dict.txt +0 -151
pyxlpr/ppocr/utils/dict/ug_dict.txt +0 -114
pyxlpr/ppocr/utils/dict/uk_dict.txt +0 -142
pyxlpr/ppocr/utils/dict/ur_dict.txt +0 -137
pyxlpr/ppocr/utils/dict/xi_dict.txt +0 -110
pyxlpr/ppocr/utils/dict90.txt +0 -90
pyxlpr/ppocr/utils/e2e_metric/Deteval.py +0 -574
pyxlpr/ppocr/utils/e2e_metric/polygon_fast.py +0 -83
pyxlpr/ppocr/utils/e2e_utils/extract_batchsize.py +0 -87
pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_fast.py +0 -457
pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_slow.py +0 -592
pyxlpr/ppocr/utils/e2e_utils/pgnet_pp_utils.py +0 -162
pyxlpr/ppocr/utils/e2e_utils/visual.py +0 -162
pyxlpr/ppocr/utils/en_dict.txt +0 -95
pyxlpr/ppocr/utils/gen_label.py +0 -81
pyxlpr/ppocr/utils/ic15_dict.txt +0 -36
pyxlpr/ppocr/utils/iou.py +0 -54
pyxlpr/ppocr/utils/logging.py +0 -69
pyxlpr/ppocr/utils/network.py +0 -84
pyxlpr/ppocr/utils/ppocr_keys_v1.txt +0 -6623
pyxlpr/ppocr/utils/profiler.py +0 -110
pyxlpr/ppocr/utils/save_load.py +0 -150
pyxlpr/ppocr/utils/stats.py +0 -72
pyxlpr/ppocr/utils/utility.py +0 -80
pyxlpr/ppstructure/__init__.py +0 -13
pyxlpr/ppstructure/predict_system.py +0 -187
pyxlpr/ppstructure/table/__init__.py +0 -13
pyxlpr/ppstructure/table/eval_table.py +0 -72
pyxlpr/ppstructure/table/matcher.py +0 -192
pyxlpr/ppstructure/table/predict_structure.py +0 -136
pyxlpr/ppstructure/table/predict_table.py +0 -221
pyxlpr/ppstructure/table/table_metric/__init__.py +0 -16
pyxlpr/ppstructure/table/table_metric/parallel.py +0 -51
pyxlpr/ppstructure/table/table_metric/table_metric.py +0 -247
pyxlpr/ppstructure/table/tablepyxl/__init__.py +0 -13
pyxlpr/ppstructure/table/tablepyxl/style.py +0 -283
pyxlpr/ppstructure/table/tablepyxl/tablepyxl.py +0 -118
pyxlpr/ppstructure/utility.py +0 -71
pyxlpr/xlai.py +0 -10
/pyxllib/{ext/autogui → autogui}/virtualkey.py +0 -0
{pyxllib-0.3.96.dist-info → pyxllib-0.3.197.dist-info/licenses}/LICENSE +0 -0

pyxlpr/data/labelme.py DELETED Viewed

@@ -1,866 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-# @Author : 陈坤泽
-# @Email  : 877362867@qq.com
-# @Date   : 2020/08/15 00:59
-import os
-from tqdm import tqdm
-import json
-import ujson
-import copy
-from collections import Counter
-import numpy as np
-from pyxllib.prog.newbie import round_int
-from pyxllib.prog.pupil import DictTool
-from pyxllib.prog.specialist import get_xllog, Iterate
-from pyxllib.file.specialist import PathGroups, get_encoding, XlPath
-from pyxllib.prog.specialist import mtqdm
-from pyxllib.cv.expert import xlpil
-from pyxllib.algo.geo import ltrb2xywh, rect_bounds, warp_points, resort_quad_points, rect2polygon, get_warp_mat
-def __0_basic():
-    """ 这里可以写每个模块注释 """
-class BasicLabelDataset:
-    """ 一张图一份标注文件的一些基本操作功能 """
-    def __init__(self, root, relpath2data=None, *, reads=True, prt=False, fltr=None, slt=None, extdata=None):
-        """
-        :param root: 数据所在根目录
-        :param dict[str, readed_data] relpath2data: {relpath: data1, 'a/1.txt': data2, ...}
-            如果未传入data具体值，则根据目录里的情况自动初始化获得data的值
-            relpath是对应的XlPath标注文件的相对路径字符串
-            data1, data2 是读取的标注数据，根据不同情况，会存成不同格式
-                如果是json则直接保存json内存对象结构
-                如果是txt可能会进行一定的结构化解析存储
-        :param extdata: 可以存储一些扩展信息内容
-        :param fltr: filter的缩写，PathGroups 的过滤规则。一般用来进行图片匹配。
-            None，没有过滤规则，就算不存在slt格式的情况下，也会保留分组
-            'json'等字符串规则, 使用 select_group_which_hassuffix，必须含有特定后缀的分组
-            judge(k, v)，自定义函数规则
-        :param slt: select的缩写，要选中的标注文件后缀格式
-            如果传入slt参数，该 Basic 基础类只会预设好 file 参数，数据部分会置 None，需要后续主动读取
-        >> BasicLabelData('textGroup/aabb', {'a.json': ..., 'a/1.json': ...})
-        >> BasicLabelData('textGroup/aabb', slt='json')
-        >> BasicLabelData('textGroup/aabb', fltr='jpg', slt='json')  # 只获取有对应jpg图片的json文件
-        >> BasicLabelData('textGroup/aabb', fltr='jpg|png', slt='json')
-        """
-        # 1 基础操作
-        root = XlPath(root)
-        self.root, self.rp2data, self.extdata = root, relpath2data or {}, extdata or {}
-        self.pathgs = None
-        if relpath2data is not None or slt is None:
-            return
-        # 2 如果没有默认data数据，以及传入slt参数，则需要使用默认文件关联方式读取标注
-        relpath2data = {}
-        gs = PathGroups.groupby(XlPath(root).rglob_files())
-        if isinstance(fltr, str):
-            gs = gs.select_group_which_hassuffix(fltr)
-        elif callable(fltr):
-            gs = gs.select_group(fltr)
-        self.pathgs = gs
-        # 3 读取数据
-        for stem, suffixs in tqdm(gs.data.items(), f'{self.__class__.__name__}读取数据', disable=not prt):
-            f = XlPath(stem + f'.{slt}')
-            if reads and f.exists():
-                # dprint(f)  # 空json会报错：json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
-                relpath2data[f.relpath(self.root)] = f.read_auto()
-            else:
-                relpath2data[f.relpath(self.root)] = None
-        self.rp2data = relpath2data
-    def __len__(self):
-        return len(self.rp2data)
-    def read(self, relpath, **kwargs):
-        """
-        :param relpath: 必须是斜杠表示的相对路径 'a/1.txt'、'b/2.json'
-        """
-        self.rp2data[relpath] = (self.root / relpath).read_auto(**kwargs)
-    def reads(self, prt=False, **kwargs):
-        """ 为了性能效率，初始化默认不会读取数据，需要调用reads才会开始读取数据 """
-        for k in tqdm(self.rp2data.keys(), f'读取{self.__class__.__name__}数据', disable=not prt):
-            self.rp2data[k] = (self.root / k).read_auto(**kwargs)
-    def write(self, relpath, **kwargs):
-        """
-        :param relpath: 必须是斜杠表示的相对路径 'a/1.txt'、'b/2.json'
-        """
-        data = self.rp2data[relpath]
-        file = self.root / relpath
-        if file.is_file():  # 如果文件存在，要遵循原有的编码规则
-            with open(str(file), 'rb') as f:
-                bstr = f.read()
-            encoding = get_encoding(bstr)
-            kwargs['encoding'] = encoding
-            kwargs['if_exists'] = 'replace'
-            file.write_auto(data, **kwargs)
-        else:  # 否则直接写入
-            file.write_auto(data, **kwargs)
-    def writes(self, *, max_workers=8, print_mode=False, **kwargs):
-        """ 重新写入每份标注文件
-        可能是内存里修改了数据，需要重新覆盖
-        也可能是从coco等其他格式初始化，转换而来的内存数据，需要生成对应的新标注文件
-        """
-        mtqdm(lambda x: self.write(x, **kwargs), self.rp2data.keys(), desc=f'{self.__class__.__name__}写入标注数据',
-              max_workers=max_workers, disable=not print_mode)
-def __1_labelme():
-    """ """
-# 我自己按照“红橙黄绿蓝靛紫”的顺序展示
-LABEL_COLORMAP7 = [(0, 0, 0), (255, 0, 0), (255, 125, 0), (255, 255, 0),
-                   (0, 255, 0), (0, 0, 255), (0, 255, 255), (255, 0, 255)]
-def is_labelme_json_data(data):
-    """ 是labelme的标注格式
-    :param data: dict
-    :return: True or False
-    """
-    if not isinstance(data, dict):
-        return False
-    has_keys = set('version flags shapes imagePath imageData imageHeight imageWidth'.split())
-    return not (has_keys - data.keys())
-def reduce_labelme_jsonfile(jsonpath):
-    """ 删除imageData """
-    p = str(jsonpath)
-    with open(p, 'rb') as f:
-        bstr = f.read()
-    encoding = get_encoding(bstr)
-    data = ujson.loads(bstr.decode(encoding=encoding))
-    if is_labelme_json_data(data) and data['imageData']:
-        data['imageData'] = None
-        XlPath(p).write_json(data, encoding=encoding, if_exists='replace')
-def reduce_labelme_dir(d, print_mode=False):
-    """ 精简一个目录里的所有labelme json文件 """
-    def printf(*args, **kwargs):
-        if print_mode:
-            print(*args, **kwargs)
-    i = 0
-    for f in XlPath(d).rglob_files('*.json'):
-        data = f.read_json()
-        if data.get('imageData'):
-            data['imageData'] = None
-            f.write_json(data)
-            i += 1
-            printf(i, f)
-class ToLabelmeJson:
-    """ 标注格式转label形式
-    初始化最好带有图片路径，能获得一些相关有用的信息
-    然后自定义实现一个 get_data 接口，实现self.data的初始化，运行完可以从self.data取到字典数据
-        根据需要可以定制自己的shape，修改get_shape函数
-    可以调用write写入文件
-    document: https://www.yuque.com/xlpr/pyxllib/ks5h4o
-    """
-    # 可能有其他人会用我库的高级接口，不应该莫名其妙报警告。除非我先实现自己库内该功能的剥离
-    # @deprecated(reason='建议使用LabelmeData实现')
-    def __init__(self, imgpath):
-        """
-        :param imgpath: 可选参数图片路径，强烈建议要输入，否则建立的label json会少掉图片宽高信息
-        """
-        self.imgpath = XlPath(imgpath)
-        # 读取图片数据，在一些转换规则比较复杂，有可能要用到原图数据
-        if self.imgpath:
-            # 一般都只需要获得尺寸，用pil读取即可，速度更快，不需要读取图片rgb数据
-            self.img = xlpil.read(self.imgpath)
-        else:
-            self.img = None
-        self.data = self.get_data_base()  # 存储json的字典数据
-    def get_data(self, infile):
-        """ 格式转换接口函数，继承的类需要自己实现这个方法
-        :param infile: 待解析的标注数据
-        """
-        raise NotImplementedError('get_data方法必须在子类中实现')
-    def get_data_base(self, name='', height=0, width=0):
-        """ 获得一个labelme标注文件的框架 （这是标准结构，也可以自己修改定制）
-        如果初始化时没有输入图片，也可以这里传入name等的值
-        """
-        # 1 默认属性，和图片名、尺寸
-        if self.imgpath:
-            name = self.imgpath.name
-            height, width = self.img.height, self.img.width
-        # 2 构建结构框架
-        data = {'version': '4.5.6',
-                'flags': {},
-                'shapes': [],
-                'imagePath': name,
-                'imageData': None,
-                'imageWidth': width,
-                'imageHeight': height,
-                }
-        return data
-    def get_shape(self, label, points, shape_type=None, dtype=None, group_id=None, **kwargs):
-        """ 最基本的添加形状功能
-        :param shape_type: 会根据points的点数量，智能判断类型，默认一般是polygon
-            其他需要自己指定的格式：line、circle
-        :param dtype: 可以重置points的存储数值类型，一般是浮点数，可以转成整数更精简
-        :param group_id: 本来是用来分组的，但其值会以括号的形式添加在label后面，可以在可视化中做一些特殊操作
-        """
-        # 1 优化点集数据格式
-        points = np.array(points, dtype=dtype).reshape(-1, 2).tolist()
-        # 2 判断形状类型
-        if shape_type is None:
-            m = len(points)
-            if m == 1:
-                shape_type = 'point'
-            elif m == 2:
-                shape_type = 'rectangle'
-            elif m >= 3:
-                shape_type = 'polygon'
-            else:
-                raise ValueError
-        # 3 创建标注
-        shape = {'flags': {},
-                 'group_id': group_id,
-                 'label': str(label),
-                 'points': points,
-                 'shape_type': shape_type}
-        shape.update(kwargs)
-        return shape
-    def get_shape2(self, **kwargs):
-        """ 完全使用字典的接口形式 """
-        label = kwargs.get('label', '')
-        points = kwargs['points']  # 这个是必须要有的字段
-        kw = copy.deepcopy(kwargs)
-        del kw['label']
-        del kw['points']
-        return self.get_shape(label, points, **kw)
-    def add_shape(self, *args, **kwargs):
-        self.data['shapes'].append(self.get_shape(*args, **kwargs))
-    def add_shape2(self, **kwargs):
-        self.data['shapes'].append(self.get_shape2(**kwargs))
-    def write(self, dst=None, if_exists='replace'):
-        """
-        :param dst: 往dst目标路径存入json文件，默认名称在self.imgpath同目录的同名json文件
-        :return: 写入后的文件路径
-        """
-        if dst is None and self.imgpath:
-            dst = self.imgpath.with_suffix('.json')
-        # 官方json支持indent=None的写法，但是ujson必须要显式写indent=0
-        return XlPath(dst).write_auto(self.data, if_exists=if_exists, indent=0)
-    @classmethod
-    def create_json(cls, imgpath, annotation):
-        """ 输入图片路径p，和对应的annotation标注数据（一般是对应目录下txt文件） """
-        try:
-            obj = cls(imgpath)
-        except TypeError as e:  # 解析不了的输出错误日志
-            get_xllog().exception(e)
-            return
-        obj.get_data(annotation)
-        obj.write()  # 保存json文件到img对应目录下
-    @classmethod
-    def main_normal(cls, imdir, labeldir=None, label_file_suffix='.txt'):
-        """ 封装更高层的接口，输入目录，直接标注目录下所有图片
-        :param imdir: 图片路径
-        :param labeldir: 标注数据路径，默认跟imdir同目录
-        :return:
-        """
-        ims = XlPath(imdir).rglob_images()
-        if not labeldir: labeldir = imdir
-        txts = [(XlPath(labeldir) / (f.stem + label_file_suffix)) for f in ims]
-        cls.main_pair(ims, txts)
-    @classmethod
-    def main_pair(cls, images, labels):
-        """ 一一配对匹配处理 """
-        Iterate(zip(images, labels)).run(lambda x: cls.create_json(x[0], x[1]),
-                                         pinterval='20%', max_workers=8)
-class Quad2Labelme(ToLabelmeJson):
-    """ 四边形类标注转labelme """
-    def get_data(self, infile):
-        lines = XlPath(infile).read_text().splitlines()
-        for line in lines:
-            # 一般是要改这里，每行数据的解析规则
-            vals = line.split(',', maxsplit=8)
-            if len(vals) < 9: continue
-            pts = [int(v) for v in vals[:8]]  # 点集
-            label = vals[-1]  # 标注的文本
-            # get_shape还有shape_type形状参数可以设置
-            #  如果是2个点的矩形，或者3个点以上的多边形，会自动判断，不用指定shape_type
-            self.add_shape(label, pts)
-class LabelmeDict:
-    """ Labelme格式的字典数据
-    这里的成员函数基本都是原地操作
-    """
-    @classmethod
-    def gen_data(cls, imfile=None, **kwargs):
-        """ 主要框架结构
-        :param imfile: 可以传入一张图片路径
-        """
-        # 1 传入图片路径的初始化
-        if imfile:
-            file = XlPath(imfile)
-            name = file.name
-            img = xlpil.read(file)
-            height, width = img.height, img.width
-        else:
-            name, height, width = '', 0, 0
-        # 2 字段值
-        data = {'version': '5.1.7',
-                'flags': {},
-                'shapes': [],
-                'imagePath': name,
-                'imageData': None,
-                'imageWidth': width,
-                'imageHeight': height,
-                }
-        if kwargs:
-            data.update(kwargs)
-        return data
-    @classmethod
-    def gen_ocr_data(cls, imfile=None, **kwargs):
-        """" 支持调用PaddleOCR进行预识别
-        该接口是为了方便性保留，更推荐使用 PaddleOCR.labelme_ocr 的功能进行批量识别
-        """
-        from paddleocr import PaddleOCR
-        ppocr = PaddleOCR.get_paddleocr()
-        data = cls.gen_data(imfile, **kwargs)
-        lines = ppocr.ocr(str(imfile))
-        for line in lines:
-            pts, [text, score] = line
-            pts = [[int(p[0]), int(p[1])] for p in pts]  # 转整数
-            sp = cls.gen_shape({'text': text, 'score': round(float(score), 4)}, pts)
-            data['shapes'].append(sp)
-        return data
-    @classmethod
-    def gen_shape(cls, label, points, shape_type=None, dtype=None, group_id=None, **kwargs):
-        """ 最基本的添加形状功能
-        :param label: 支持输入dict类型，会编码为json格式的字符串
-        :param shape_type: 会根据points的点数量，智能判断类型，默认一般是polygon
-            其他需要自己指定的格式：line、circle
-        :param dtype: 可以重置points的存储数值类型，一般是浮点数，可以转成整数更精简
-        :param group_id: 本来是用来分组的，但其值会以括号的形式添加在label后面，可以在可视化中做一些特殊操作
-        """
-        # 1 优化点集数据格式
-        points = np.array(points, dtype=dtype).reshape(-1, 2).tolist()
-        # 2 判断形状类型
-        if shape_type is None:
-            m = len(points)
-            if m == 1:
-                shape_type = 'point'
-            elif m == 2:
-                shape_type = 'rectangle'
-            elif m >= 3:
-                shape_type = 'polygon'
-            else:
-                raise ValueError
-        # 3 创建标注
-        if isinstance(label, dict):
-            label = json.dumps(label, ensure_ascii=False)
-        shape = {'flags': {},
-                 'group_id': group_id,
-                 'label': str(label),
-                 'points': points,
-                 'shape_type': shape_type}
-        shape.update(kwargs)
-        return shape
-    @classmethod
-    def gen_shape2(cls, **kwargs):
-        """ 完全使用字典的接口形式 """
-        label = kwargs.get('label', '')
-        points = kwargs['points']  # 这个是必须要有的字段
-        kw = copy.deepcopy(kwargs)
-        if 'label' in kw:
-            del kw['label']
-        if 'points' in kw:
-            del kw['points']
-        return cls.gen_shape(label, points, **kw)
-    @classmethod
-    def reduce(cls, lmdict, *, inplace=True):
-        if not inplace:
-            lmdict = copy.deepcopy(lmdict)
-        lmdict['imageData'] = None
-        return lmdict
-    @classmethod
-    def refine_structure(cls, old_json_path, *, old_img_path=None,
-                         new_stem_name=None, new_img_suffix=None):
-        """ 重置labelme标注文件，这是一个比较综合的调整优化接口
-        :param old_json_path: 原json路径
-        :param old_img_path: 原图路径，可以不填，从old_json_path推算出来
-        :param new_stem_name: 新的stem昵称，没写的时候，以json的stem为准
-        :param new_img_suffix: 是否要调整图片后缀格式，常用图图片格式统一操作
-            没写的时候，以找到的图片为准，如果图片没找到，则以imagePath的后缀为准
-        """
-        from pyxllib.cv.expert import xlcv
-        # 1 参数解析
-        old_json_path = XlPath(old_json_path)
-        parent = old_json_path.parent
-        lmdict = old_json_path.read_json()
-        if old_img_path is None:
-            old_img_path = parent / lmdict['imagePath']
-            if not old_img_path.is_file():
-                # 如果imagePath的图片并不存在，需要用json的名称去推导，如果也还是不存在，就按照imagePath的后缀设置
-                try:
-                    old_img_path = next(parent.glob_images(f'{old_json_path.stem}.*'))
-                except StopIteration:
-                    old_img_path = parent / (old_json_path.stem + XlPath(lmdict['imagePath']).suffix)
-        if new_stem_name is None:
-            new_stem_name = old_json_path.stem
-        if new_img_suffix is None:
-            new_img_suffix = old_img_path.suffix
-        # 2 重命名、重置
-        new_json_path = parent / (new_stem_name + '.json')
-        new_img_path = parent / (new_stem_name + new_img_suffix)
-        # 优化json数据
-        cls.reduce(lmdict)
-        lmdict['imagePath'] = new_img_path.name
-        new_json_path.write_json(lmdict)
-        if new_json_path.as_posix() != old_json_path.as_posix():
-            old_json_path.delete()
-        # TODO points浮点过长的优化？xllabelme默认优化了？
-        # 优化图片
-        if old_img_path.is_file():
-            xlcv.write(xlcv.read(old_img_path), new_img_path)
-            if new_img_path.as_posix() != old_img_path.as_posix():
-                old_img_path.delete()
-    @classmethod
-    def flip_points(cls, lmdict, direction, *, inplace=True):
-        """
-        :param direction: points的翻转方向
-            1表示顺时针转90度，2表示顺时针转180度...
-            -1表示逆时针转90度，...
-        :return:
-        """
-        if not inplace:
-            lmdict = copy.deepcopy(lmdict)
-        w, h = lmdict['imageWidth'], lmdict['imageHeight']
-        pts = [[[0, 0], [w, 0], [w, h], [0, h]],
-               [[h, 0], [h, w], [0, w], [0, 0]],
-               [[w, h], [0, h], [0, 0], [w, 0]],
-               [[0, w], [0, 0], [h, 0], [h, w]]]
-        warp_mat = get_warp_mat(pts[0], pts[direction % 4])
-        if direction % 2:
-            lmdict['imageWidth'], lmdict['imageHeight'] = lmdict['imageHeight'], lmdict['imageWidth']
-        shapes = lmdict['shapes']
-        for i, shape in enumerate(shapes):
-            pts = [warp_points(x, warp_mat)[0].tolist() for x in shape['points']]
-            if shape['shape_type'] == 'rectangle':
-                pts = resort_quad_points(rect2polygon(pts))
-                shape['points'] = [pts[0], pts[2]]
-            elif shape['shape_type'] == 'polygon' and len(pts) == 4:
-                shape['points'] = resort_quad_points(pts)
-            else:  # 其他形状暂不处理，也不报错
-                pass
-        return lmdict
-    @classmethod
-    def flip_img_and_json(cls, impath, direction):
-        """  旋转impath，如果有对应的json也会自动处理
-        demo_flip_labelme，演示如何使用翻转图片、labelme标注功能
-        :param XlPath impath: 图片路径
-        :param direction: 标记现在图片是哪个方向：0是正常，1是向右翻转，2是向下翻转，3是向左翻转
-            顺时针0123表示当前图片方向
-            甚至该参数可以设成None，没有输入的时候调用模型识别结果，不过那个模型不是很准确，先不搞这种功能
-        """
-        # 图片旋转
-        im = xlpil.read(impath)
-        im = xlpil.flip_direction(im, direction)
-        xlpil.write(im, impath)
-        # json格式
-        jsonpath = impath.with_suffix('.json')
-        if jsonpath.exists():
-            lmdict = jsonpath.read_json('utf8')  # 必须是labelme的格式，其他格式不支持处理哦
-            cls.flip_points(lmdict, -direction)  # 默认是inplace操作
-            jsonpath.write_json(lmdict, 'utf8')
-    @classmethod
-    def update_labelattr(cls, lmdict, *, points=False, inplace=True):
-        """
-        :param points: 是否更新labelattr中的points、bbox等几何信息
-            并且在无任何几何信息的情况下，增设points
-        """
-        if not inplace:
-            lmdict = copy.deepcopy(lmdict)
-        for shape in lmdict['shapes']:
-            # 1 属性字典，至少先初始化一个label属性
-            labelattr = DictTool.json_loads(shape['label'], 'label')
-            # 2 填充其他扩展属性值
-            keys = set(shape.keys())
-            stdkeys = set('label,points,group_id,shape_type,flags'.split(','))
-            for k in (keys - stdkeys):
-                labelattr[k] = shape[k]
-                del shape[k]  # 要删除原有的扩展字段值
-            # 3 处理points等几何信息
-            if points:
-                if 'bbox' in labelattr:
-                    labelattr['bbox'] = ltrb2xywh(rect_bounds(shape['points']))
-                else:
-                    labelattr['points'] = shape['points']
-            # + 写回shape
-            shape['label'] = json.dumps(labelattr, ensure_ascii=False)
-        return lmdict
-    @classmethod
-    def to_quad_pts(cls, shape):
-        """ 将一个形状标注变成4个点标注的四边形 """
-        pts = shape['points']
-        t = shape['shape_type']
-        if t == 'rectangle':
-            return rect2polygon(pts)
-        elif t == 'polygon':
-            if len(pts) != 4:
-                # 暂用外接矩形代替
-                xs = [p[0] for p in pts]
-                ys = [p[1] for p in pts]
-                r = [(min(xs), min(ys)), (max(xs), max(ys))]
-                pts = rect2polygon(r)
-            return pts
-        else:
-            raise NotImplementedError(f'{t}')
-class LabelmeDataset(BasicLabelDataset):
-    def __init__(self, root, relpath2data=None, *, reads=True, prt=False, fltr='json', slt='json', extdata=None):
-        """
-        :param root: 文件根目录
-        :param relpath2data: {jsonfile: lmdict, ...}，其中 lmdict 为一个labelme文件格式的标准内容
-            如果未传入data具体值，则根据目录里的情况自动初始化获得data的值
-            210602周三16:26，为了工程等一些考虑，删除了 is_labelme_json_data 的检查
-                尽量通过 fltr、slt 的机制选出正确的 json 文件
-        """
-        super().__init__(root, relpath2data, reads=reads, prt=prt, fltr=fltr, slt=slt, extdata=extdata)
-        # 已有的数据已经读取了，这里要补充空labelme标注
-        if self.pathgs:
-            for stem, suffixs in tqdm(self.pathgs.data.items(), f'{self.__class__.__name__}优化数据', disable=not prt):
-                f = XlPath(stem + f'.{slt}')
-                if reads and not f.exists():
-                    self.rp2data[f.relpath(self.root)] = LabelmeDict.gen_data(XlPath.init(stem, suffix=suffixs[0]))
-        # 优化rp2data，去掉一些并不是labelme的字典
-        rp2data = {}
-        for k, v in self.rp2data.items():
-            if is_labelme_json_data(v):
-                rp2data[k] = v
-        self.rp2data = rp2data
-    def reduces(self):
-        """ 移除imageData字段值 """
-        for lmdict in self.rp2data.values():
-            LabelmeDict.reduce(lmdict)
-    def refine_structures(self, *, img_suffix=None):
-        """ 整套labelme数据的重置
-        :param img_suffix: 是否统一图片的后缀格式，比如.jpg
-        不过不同的情景问题不同，请了解清楚这个函数的算法逻辑，能解决什么性质的问题后，再调用
-        """
-        # 有些字典的imagePath可能有错误，可以调用该方法修正
-        for jsonfile in tqdm(self.rp2data.keys(), desc='labelme字典优化'):
-            LabelmeDict.refine_structure(self.root / jsonfile, new_img_suffix=img_suffix)
-    def update_labelattrs(self, *, points=False):
-        """ 将shape['label'] 升级为字典类型
-        可以处理旧版不动产标注 content_class 等问题
-        """
-        for jsonfile, lmdict in self.rp2data.items():
-            LabelmeDict.update_labelattr(lmdict, points=points)
-    def to_excel(self, savepath):
-        """ 转成dataframe表格查看
-        这个细节太多，可以 labelme 先转 coco 后，借助 coco 转 excel
-            coco 里会给 image、box 编号，能显示一些补充属性
-        """
-        from pyxlpr.data.coco import CocoParser
-        gt_dict = self.to_coco_gt_dict()
-        CocoParser(gt_dict).to_excel(savepath)
-    @classmethod
-    def plot(self, img, lmdict):
-        """ 将标注画成静态图 """
-        raise NotImplementedError
-    def to_coco_gt_dict(self, categories=None):
-        """ 将labelme转成 coco gt 标注的格式
-        分两种大情况
-        1、一种是raw原始数据转labelme标注后，首次转coco格式，这种编号等相关数据都可以重新生成
-            raw_data --可视化--> labelme --转存--> coco
-        2、还有种原来就是coco，转labelme修改标注后，又要再转回coco，这种应该尽量保存原始值
-            coco --> labelme --手动修改--> labelme' --> coco'
-            这种在coco转labelme时，会做一些特殊标记，方便后续转回coco
-        3、 1, 2两种情况是可以连在一起，然后形成 labelme 和 coco 之间的多次互转的
-        :param categories: 类别
-            默认只设一个类别 {'id': 0, 'name': 'text', 'supercategory'}
-            支持自定义，所有annotations的category_id
-        :return: gt_dict
-            注意，如果对文件顺序、ann顺序有需求的，请先自行操作self.data数据后，再调用该to_coco函数
-            对image_id、annotation_id有需求的，需要使用CocoData进一步操作
-        """
-        from pyxlpr.data.coco import CocoGtData
-        if not categories:
-            if 'categories' in self.extdata:
-                # coco 转过来的labelme，存储有原始的 categories
-                categories = self.extdata['categories']
-            else:
-                categories = [{'id': 0, 'name': 'text', 'supercategory': ''}]
-        # 1 第一轮遍历：结构处理 jsonfile, lmdict --> data（image, shapes）
-        img_id, ann_id, data = 0, 0, []
-        for jsonfile, lmdict in self.rp2data.items():
-            # 1.0 升级为字典类型
-            lmdict = LabelmeDict.update_labelattr(lmdict, points=True)
-            for sp in lmdict['shapes']:  # label转成字典
-                sp['label'] = json.loads(sp['label'])
-            # 1.1 找shapes里的image
-            image = None
-            # 1.1.1 xltype='image'
-            for sp in filter(lambda x: x.get('xltype', None) == 'image', lmdict['shapes']):
-                image = DictTool.json_loads(sp['label'])
-                if not image:
-                    raise ValueError(sp['label'])
-                # TODO 删除 coco_eval 等字段？
-                del image['xltype']
-                break
-            # 1.1.2 shapes里没有图像级标注则生成一个
-            if image is None:
-                # TODO file_name 加上相对路径？
-                image = CocoGtData.gen_image(-1, lmdict['imagePath'],
-                                             lmdict['imageHeight'], lmdict['imageWidth'])
-            img_id = max(img_id, image.get('id', -1))
-            # 1.2 遍历shapes
-            shapes = []
-            for sp in lmdict['shapes']:
-                label = sp['label']
-                if 'xltype' not in label:
-                    # 普通的标注框
-                    d = sp['label'].copy()
-                    # DictTool.isub_(d, '')
-                    ann_id = max(ann_id, d.get('id', -1))
-                    shapes.append(d)
-                elif label['xltype'] == 'image':
-                    # image，图像级标注数据；之前已经处理了，这里可以跳过
-                    pass
-                elif label['xltype'] == 'seg':
-                    # seg，衍生的分割标注框，在转回coco时可以丢弃
-                    pass
-                else:
-                    raise ValueError
-            data.append([image, shapes])
-        # 2 第二轮遍历：处理id等问题
-        images, annotations = [], []
-        for image, shapes in data:
-            # 2.1 image
-            if image.get('id', -1) == -1:
-                img_id += 1
-                image['id'] = img_id
-            images.append(image)
-            # 2.2 annotations
-            for sp in shapes:
-                sp['image_id'] = img_id
-                if sp.get('id', -1) == -1:
-                    ann_id += 1
-                    sp['id'] = ann_id
-                # 如果没有框类别，会默认设置一个。 （强烈建议外部业务功能代码自行设置好category_id）
-                if 'category_id' not in sp:
-                    sp['category_id'] = categories[0]['id']
-                DictTool.isub(sp, ['category_name'])
-                ann = CocoGtData.gen_annotation(**sp)
-                annotations.append(ann)
-        # 3 result
-        gt_dict = CocoGtData.gen_gt_dict(images, annotations, categories)
-        return gt_dict
-    def to_ppdet(self, outfile=None, print_mode=True):
-        """ 转paddle的文本检测格式
-        图片要存相对目录，默认就按self的root参数设置
-        """
-        lines = []
-        # 1 转成一行行标注数据
-        for jsonfile, lmdict in tqdm(self.rp2data.items(), disable=not print_mode):
-            shapes = []  # pp格式的标注清单
-            for sp in lmdict['shapes']:
-                attrs = DictTool.json_loads(sp['label'], 'text')
-                d = {'transcription': attrs['text'],
-                     'points': round_int(LabelmeDict.to_quad_pts(sp), ndim=2)}
-                shapes.append(d)
-            imfile = os.path.split(jsonfile)[0] + f'/{lmdict["imagePath"]}'
-            lines.append(f'{imfile}\t{json.dumps(shapes, ensure_ascii=False)}')
-        # 2 输出
-        content = '\n'.join(lines)
-        if outfile:
-            XlPath(outfile).write_text(content)
-        return content
-    def get_char_count_dict(self):
-        """ 文本识别需要用到的功能，检查字符集出现情况
-        return dict: 返回一个字典，k是出现的字符，v是各字符出现的次数，按顺序从多到少排序
-            差不多是Counter的结构
-        """
-        texts = []
-        for lmdict in self.rp2data.values():
-            for sp in lmdict['shapes']:
-                text = DictTool.json_loads(sp['label'], 'text')['text']
-                texts.append(text)
-        ct = Counter(''.join(texts))
-        return {k: v for k, v in ct.most_common()}
-    def check_char_set(self, refdict=None):
-        """ 检查本套labelme数据集里，text字符集出现情况，和paddleocr的识别字典是否有额外新增字符
-        """
-        from pyxllib.algo.specialist import DictCmper
-        from pyxlpr.ppocr.utils import get_dict_content
-        # 0 计算字典
-        if refdict is None:
-            d1 = get_dict_content('ppocr_keys_v1.txt')
-            refdict = {k: 1 for k in d1.split('\n')}
-        d2 = self.get_char_count_dict()
-        print('1 整体统计信息')
-        dc = DictCmper({'refdict': refdict, 'chars': d2})
-        print(dc.pair_summary())
-        print('2 新增字符及出现数量（如果只是多出空白字符，可以统一转空格处理）')
-        keys = set(list(d2.keys())) - set(list(refdict.keys()))
-        sorted(keys, key=lambda k: -d2[k])
-        for k in keys:
-            print(repr(k), d2[k])
-        # 3 返回所有新增的非空字符
-        return {k for k in keys if k.strip()}
-    def to_pprec(self, image_dir, txt_path, *, reset=False):
-        """ 转paddle的文本识别格式
-        :param image_dir: 要导出文本行数据所在的目录
-        :param txt_path: 标注文件的路径
-        :param reset: 目标目录存在则重置
-        """
-        pass
-class ItemFormula:
-    """ 为m2302中科院题库准备的labelme数据相关处理算法
-    如果写到labelme中，其他脚本就复用不了了，所以把核心算法功能写到这里
-    line_id的功能是动态计算的，参考了与上一个shape的区别，这里暂时不提供静态算法。
-    """
-    @classmethod
-    def check_label(cls, shapes):
-        """ 检查数据异常 """
-        # "删除" 是否都为手写，并且text为空
-    @classmethod
-    def joint_label(cls, shapes):
-        """ 将shapes的label拼接成一篇文章
-        :return: [line1, line2, ...]
-        """
-        last_line_id = 1
-        paper_text = []
-        line_text = []
-        for sp in shapes:
-            label = json.loads(sp['label'])
-            if label['line_id'] != last_line_id:
-                last_line_id = label['line_id']
-                paper_text.append(' '.join(line_text))
-                line_text = []
-            if label['content_class'] == '公式':
-                t = '$' + label['text'] + '$'
-            else:
-                t = label['text']
-            line_text.append(t)
-        paper_text.append(' '.join(line_text))
-        return paper_text

pyxllib 0.3.96__py3-none-any.whl → 0.3.197__py3-none-any.whl

pyxllib 0.3.96py3-none-any.whl → 0.3.197py3-none-any.whl