PyPI - pyxllib - Versions diffs - 0.3.96__py3-none-any.whl → 0.3.197__py3-none-any.whl - Mend

pyxllib 0.3.96py3-none-any.whl → 0.3.197py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (306) hide show

pyxllib/algo/geo.py +12 -0
pyxllib/algo/intervals.py +1 -1
pyxllib/algo/matcher.py +78 -0
pyxllib/algo/pupil.py +187 -19
pyxllib/algo/specialist.py +2 -1
pyxllib/algo/stat.py +38 -2
{pyxlpr → pyxllib/autogui}/__init__.py +1 -1
pyxllib/autogui/activewin.py +246 -0
pyxllib/autogui/all.py +9 -0
pyxllib/{ext/autogui → autogui}/autogui.py +40 -11
pyxllib/autogui/uiautolib.py +362 -0
pyxllib/autogui/wechat.py +827 -0
pyxllib/autogui/wechat_msg.py +421 -0
pyxllib/autogui/wxautolib.py +84 -0
pyxllib/cv/slidercaptcha.py +137 -0
pyxllib/data/echarts.py +123 -12
pyxllib/data/jsonlib.py +89 -0
pyxllib/data/pglib.py +514 -30
pyxllib/data/sqlite.py +231 -4
pyxllib/ext/JLineViewer.py +14 -1
pyxllib/ext/drissionlib.py +277 -0
pyxllib/ext/kq5034lib.py +0 -1594
pyxllib/ext/robustprocfile.py +497 -0
pyxllib/ext/unixlib.py +6 -5
pyxllib/ext/utools.py +108 -95
pyxllib/ext/webhook.py +32 -14
pyxllib/ext/wjxlib.py +88 -0
pyxllib/ext/wpsapi.py +124 -0
pyxllib/ext/xlwork.py +9 -0
pyxllib/ext/yuquelib.py +1003 -71
pyxllib/file/docxlib.py +1 -1
pyxllib/file/libreoffice.py +165 -0
pyxllib/file/movielib.py +9 -0
pyxllib/file/packlib/__init__.py +112 -75
pyxllib/file/pdflib.py +1 -1
pyxllib/file/pupil.py +1 -1
pyxllib/file/specialist/dirlib.py +1 -1
pyxllib/file/specialist/download.py +10 -3
pyxllib/file/specialist/filelib.py +266 -55
pyxllib/file/xlsxlib.py +205 -50
pyxllib/file/xlsyncfile.py +341 -0
pyxllib/prog/cachetools.py +64 -0
pyxllib/prog/filelock.py +42 -0
pyxllib/prog/multiprogs.py +940 -0
pyxllib/prog/newbie.py +9 -2
pyxllib/prog/pupil.py +129 -60
pyxllib/prog/specialist/__init__.py +176 -2
pyxllib/prog/specialist/bc.py +5 -2
pyxllib/prog/specialist/browser.py +11 -2
pyxllib/prog/specialist/datetime.py +68 -0
pyxllib/prog/specialist/tictoc.py +12 -13
pyxllib/prog/specialist/xllog.py +5 -5
pyxllib/prog/xlosenv.py +7 -0
pyxllib/text/airscript.js +744 -0
pyxllib/text/charclasslib.py +17 -5
pyxllib/text/jiebalib.py +6 -3
pyxllib/text/jinjalib.py +32 -0
pyxllib/text/jsa_ai_prompt.md +271 -0
pyxllib/text/jscode.py +159 -4
pyxllib/text/nestenv.py +1 -1
pyxllib/text/newbie.py +12 -0
pyxllib/text/pupil/common.py +26 -0
pyxllib/text/specialist/ptag.py +2 -2
pyxllib/text/templates/echart_base.html +11 -0
pyxllib/text/templates/highlight_code.html +17 -0
pyxllib/text/templates/latex_editor.html +103 -0
pyxllib/text/xmllib.py +76 -14
pyxllib/xl.py +2 -1
pyxllib-0.3.197.dist-info/METADATA +48 -0
pyxllib-0.3.197.dist-info/RECORD +126 -0
{pyxllib-0.3.96.dist-info → pyxllib-0.3.197.dist-info}/WHEEL +1 -2
pyxllib/ext/autogui/__init__.py +0 -8
pyxllib-0.3.96.dist-info/METADATA +0 -51
pyxllib-0.3.96.dist-info/RECORD +0 -333
pyxllib-0.3.96.dist-info/top_level.txt +0 -2
pyxlpr/ai/__init__.py +0 -5
pyxlpr/ai/clientlib.py +0 -1281
pyxlpr/ai/specialist.py +0 -286
pyxlpr/ai/torch_app.py +0 -172
pyxlpr/ai/xlpaddle.py +0 -655
pyxlpr/ai/xltorch.py +0 -705
pyxlpr/data/__init__.py +0 -11
pyxlpr/data/coco.py +0 -1325
pyxlpr/data/datacls.py +0 -365
pyxlpr/data/datasets.py +0 -200
pyxlpr/data/gptlib.py +0 -1291
pyxlpr/data/icdar/__init__.py +0 -96
pyxlpr/data/icdar/deteval.py +0 -377
pyxlpr/data/icdar/icdar2013.py +0 -341
pyxlpr/data/icdar/iou.py +0 -340
pyxlpr/data/icdar/rrc_evaluation_funcs_1_1.py +0 -463
pyxlpr/data/imtextline.py +0 -473
pyxlpr/data/labelme.py +0 -866
pyxlpr/data/removeline.py +0 -179
pyxlpr/data/specialist.py +0 -57
pyxlpr/eval/__init__.py +0 -85
pyxlpr/paddleocr.py +0 -776
pyxlpr/ppocr/__init__.py +0 -15
pyxlpr/ppocr/configs/rec/multi_language/generate_multi_language_configs.py +0 -226
pyxlpr/ppocr/data/__init__.py +0 -135
pyxlpr/ppocr/data/imaug/ColorJitter.py +0 -26
pyxlpr/ppocr/data/imaug/__init__.py +0 -67
pyxlpr/ppocr/data/imaug/copy_paste.py +0 -170
pyxlpr/ppocr/data/imaug/east_process.py +0 -437
pyxlpr/ppocr/data/imaug/gen_table_mask.py +0 -244
pyxlpr/ppocr/data/imaug/iaa_augment.py +0 -114
pyxlpr/ppocr/data/imaug/label_ops.py +0 -789
pyxlpr/ppocr/data/imaug/make_border_map.py +0 -184
pyxlpr/ppocr/data/imaug/make_pse_gt.py +0 -106
pyxlpr/ppocr/data/imaug/make_shrink_map.py +0 -126
pyxlpr/ppocr/data/imaug/operators.py +0 -433
pyxlpr/ppocr/data/imaug/pg_process.py +0 -906
pyxlpr/ppocr/data/imaug/randaugment.py +0 -143
pyxlpr/ppocr/data/imaug/random_crop_data.py +0 -239
pyxlpr/ppocr/data/imaug/rec_img_aug.py +0 -533
pyxlpr/ppocr/data/imaug/sast_process.py +0 -777
pyxlpr/ppocr/data/imaug/text_image_aug/__init__.py +0 -17
pyxlpr/ppocr/data/imaug/text_image_aug/augment.py +0 -120
pyxlpr/ppocr/data/imaug/text_image_aug/warp_mls.py +0 -168
pyxlpr/ppocr/data/lmdb_dataset.py +0 -115
pyxlpr/ppocr/data/pgnet_dataset.py +0 -104
pyxlpr/ppocr/data/pubtab_dataset.py +0 -107
pyxlpr/ppocr/data/simple_dataset.py +0 -372
pyxlpr/ppocr/losses/__init__.py +0 -61
pyxlpr/ppocr/losses/ace_loss.py +0 -52
pyxlpr/ppocr/losses/basic_loss.py +0 -135
pyxlpr/ppocr/losses/center_loss.py +0 -88
pyxlpr/ppocr/losses/cls_loss.py +0 -30
pyxlpr/ppocr/losses/combined_loss.py +0 -67
pyxlpr/ppocr/losses/det_basic_loss.py +0 -208
pyxlpr/ppocr/losses/det_db_loss.py +0 -80
pyxlpr/ppocr/losses/det_east_loss.py +0 -63
pyxlpr/ppocr/losses/det_pse_loss.py +0 -149
pyxlpr/ppocr/losses/det_sast_loss.py +0 -121
pyxlpr/ppocr/losses/distillation_loss.py +0 -272
pyxlpr/ppocr/losses/e2e_pg_loss.py +0 -140
pyxlpr/ppocr/losses/kie_sdmgr_loss.py +0 -113
pyxlpr/ppocr/losses/rec_aster_loss.py +0 -99
pyxlpr/ppocr/losses/rec_att_loss.py +0 -39
pyxlpr/ppocr/losses/rec_ctc_loss.py +0 -44
pyxlpr/ppocr/losses/rec_enhanced_ctc_loss.py +0 -70
pyxlpr/ppocr/losses/rec_nrtr_loss.py +0 -30
pyxlpr/ppocr/losses/rec_sar_loss.py +0 -28
pyxlpr/ppocr/losses/rec_srn_loss.py +0 -47
pyxlpr/ppocr/losses/table_att_loss.py +0 -109
pyxlpr/ppocr/metrics/__init__.py +0 -44
pyxlpr/ppocr/metrics/cls_metric.py +0 -45
pyxlpr/ppocr/metrics/det_metric.py +0 -82
pyxlpr/ppocr/metrics/distillation_metric.py +0 -73
pyxlpr/ppocr/metrics/e2e_metric.py +0 -86
pyxlpr/ppocr/metrics/eval_det_iou.py +0 -274
pyxlpr/ppocr/metrics/kie_metric.py +0 -70
pyxlpr/ppocr/metrics/rec_metric.py +0 -75
pyxlpr/ppocr/metrics/table_metric.py +0 -50
pyxlpr/ppocr/modeling/architectures/__init__.py +0 -32
pyxlpr/ppocr/modeling/architectures/base_model.py +0 -88
pyxlpr/ppocr/modeling/architectures/distillation_model.py +0 -60
pyxlpr/ppocr/modeling/backbones/__init__.py +0 -54
pyxlpr/ppocr/modeling/backbones/det_mobilenet_v3.py +0 -268
pyxlpr/ppocr/modeling/backbones/det_resnet_vd.py +0 -246
pyxlpr/ppocr/modeling/backbones/det_resnet_vd_sast.py +0 -285
pyxlpr/ppocr/modeling/backbones/e2e_resnet_vd_pg.py +0 -265
pyxlpr/ppocr/modeling/backbones/kie_unet_sdmgr.py +0 -186
pyxlpr/ppocr/modeling/backbones/rec_mobilenet_v3.py +0 -138
pyxlpr/ppocr/modeling/backbones/rec_mv1_enhance.py +0 -258
pyxlpr/ppocr/modeling/backbones/rec_nrtr_mtb.py +0 -48
pyxlpr/ppocr/modeling/backbones/rec_resnet_31.py +0 -210
pyxlpr/ppocr/modeling/backbones/rec_resnet_aster.py +0 -143
pyxlpr/ppocr/modeling/backbones/rec_resnet_fpn.py +0 -307
pyxlpr/ppocr/modeling/backbones/rec_resnet_vd.py +0 -286
pyxlpr/ppocr/modeling/heads/__init__.py +0 -54
pyxlpr/ppocr/modeling/heads/cls_head.py +0 -52
pyxlpr/ppocr/modeling/heads/det_db_head.py +0 -118
pyxlpr/ppocr/modeling/heads/det_east_head.py +0 -121
pyxlpr/ppocr/modeling/heads/det_pse_head.py +0 -37
pyxlpr/ppocr/modeling/heads/det_sast_head.py +0 -128
pyxlpr/ppocr/modeling/heads/e2e_pg_head.py +0 -253
pyxlpr/ppocr/modeling/heads/kie_sdmgr_head.py +0 -206
pyxlpr/ppocr/modeling/heads/multiheadAttention.py +0 -163
pyxlpr/ppocr/modeling/heads/rec_aster_head.py +0 -393
pyxlpr/ppocr/modeling/heads/rec_att_head.py +0 -202
pyxlpr/ppocr/modeling/heads/rec_ctc_head.py +0 -88
pyxlpr/ppocr/modeling/heads/rec_nrtr_head.py +0 -826
pyxlpr/ppocr/modeling/heads/rec_sar_head.py +0 -402
pyxlpr/ppocr/modeling/heads/rec_srn_head.py +0 -280
pyxlpr/ppocr/modeling/heads/self_attention.py +0 -406
pyxlpr/ppocr/modeling/heads/table_att_head.py +0 -246
pyxlpr/ppocr/modeling/necks/__init__.py +0 -32
pyxlpr/ppocr/modeling/necks/db_fpn.py +0 -111
pyxlpr/ppocr/modeling/necks/east_fpn.py +0 -188
pyxlpr/ppocr/modeling/necks/fpn.py +0 -138
pyxlpr/ppocr/modeling/necks/pg_fpn.py +0 -314
pyxlpr/ppocr/modeling/necks/rnn.py +0 -92
pyxlpr/ppocr/modeling/necks/sast_fpn.py +0 -284
pyxlpr/ppocr/modeling/necks/table_fpn.py +0 -110
pyxlpr/ppocr/modeling/transforms/__init__.py +0 -28
pyxlpr/ppocr/modeling/transforms/stn.py +0 -135
pyxlpr/ppocr/modeling/transforms/tps.py +0 -308
pyxlpr/ppocr/modeling/transforms/tps_spatial_transformer.py +0 -156
pyxlpr/ppocr/optimizer/__init__.py +0 -61
pyxlpr/ppocr/optimizer/learning_rate.py +0 -228
pyxlpr/ppocr/optimizer/lr_scheduler.py +0 -49
pyxlpr/ppocr/optimizer/optimizer.py +0 -160
pyxlpr/ppocr/optimizer/regularizer.py +0 -52
pyxlpr/ppocr/postprocess/__init__.py +0 -55
pyxlpr/ppocr/postprocess/cls_postprocess.py +0 -33
pyxlpr/ppocr/postprocess/db_postprocess.py +0 -234
pyxlpr/ppocr/postprocess/east_postprocess.py +0 -143
pyxlpr/ppocr/postprocess/locality_aware_nms.py +0 -200
pyxlpr/ppocr/postprocess/pg_postprocess.py +0 -52
pyxlpr/ppocr/postprocess/pse_postprocess/__init__.py +0 -15
pyxlpr/ppocr/postprocess/pse_postprocess/pse/__init__.py +0 -29
pyxlpr/ppocr/postprocess/pse_postprocess/pse/setup.py +0 -14
pyxlpr/ppocr/postprocess/pse_postprocess/pse_postprocess.py +0 -118
pyxlpr/ppocr/postprocess/rec_postprocess.py +0 -654
pyxlpr/ppocr/postprocess/sast_postprocess.py +0 -355
pyxlpr/ppocr/tools/__init__.py +0 -14
pyxlpr/ppocr/tools/eval.py +0 -83
pyxlpr/ppocr/tools/export_center.py +0 -77
pyxlpr/ppocr/tools/export_model.py +0 -129
pyxlpr/ppocr/tools/infer/predict_cls.py +0 -151
pyxlpr/ppocr/tools/infer/predict_det.py +0 -300
pyxlpr/ppocr/tools/infer/predict_e2e.py +0 -169
pyxlpr/ppocr/tools/infer/predict_rec.py +0 -414
pyxlpr/ppocr/tools/infer/predict_system.py +0 -204
pyxlpr/ppocr/tools/infer/utility.py +0 -629
pyxlpr/ppocr/tools/infer_cls.py +0 -83
pyxlpr/ppocr/tools/infer_det.py +0 -134
pyxlpr/ppocr/tools/infer_e2e.py +0 -122
pyxlpr/ppocr/tools/infer_kie.py +0 -153
pyxlpr/ppocr/tools/infer_rec.py +0 -146
pyxlpr/ppocr/tools/infer_table.py +0 -107
pyxlpr/ppocr/tools/program.py +0 -596
pyxlpr/ppocr/tools/test_hubserving.py +0 -117
pyxlpr/ppocr/tools/train.py +0 -163
pyxlpr/ppocr/tools/xlprog.py +0 -748
pyxlpr/ppocr/utils/EN_symbol_dict.txt +0 -94
pyxlpr/ppocr/utils/__init__.py +0 -24
pyxlpr/ppocr/utils/dict/ar_dict.txt +0 -117
pyxlpr/ppocr/utils/dict/arabic_dict.txt +0 -162
pyxlpr/ppocr/utils/dict/be_dict.txt +0 -145
pyxlpr/ppocr/utils/dict/bg_dict.txt +0 -140
pyxlpr/ppocr/utils/dict/chinese_cht_dict.txt +0 -8421
pyxlpr/ppocr/utils/dict/cyrillic_dict.txt +0 -163
pyxlpr/ppocr/utils/dict/devanagari_dict.txt +0 -167
pyxlpr/ppocr/utils/dict/en_dict.txt +0 -63
pyxlpr/ppocr/utils/dict/fa_dict.txt +0 -136
pyxlpr/ppocr/utils/dict/french_dict.txt +0 -136
pyxlpr/ppocr/utils/dict/german_dict.txt +0 -143
pyxlpr/ppocr/utils/dict/hi_dict.txt +0 -162
pyxlpr/ppocr/utils/dict/it_dict.txt +0 -118
pyxlpr/ppocr/utils/dict/japan_dict.txt +0 -4399
pyxlpr/ppocr/utils/dict/ka_dict.txt +0 -153
pyxlpr/ppocr/utils/dict/korean_dict.txt +0 -3688
pyxlpr/ppocr/utils/dict/latin_dict.txt +0 -185
pyxlpr/ppocr/utils/dict/mr_dict.txt +0 -153
pyxlpr/ppocr/utils/dict/ne_dict.txt +0 -153
pyxlpr/ppocr/utils/dict/oc_dict.txt +0 -96
pyxlpr/ppocr/utils/dict/pu_dict.txt +0 -130
pyxlpr/ppocr/utils/dict/rs_dict.txt +0 -91
pyxlpr/ppocr/utils/dict/rsc_dict.txt +0 -134
pyxlpr/ppocr/utils/dict/ru_dict.txt +0 -125
pyxlpr/ppocr/utils/dict/ta_dict.txt +0 -128
pyxlpr/ppocr/utils/dict/table_dict.txt +0 -277
pyxlpr/ppocr/utils/dict/table_structure_dict.txt +0 -2759
pyxlpr/ppocr/utils/dict/te_dict.txt +0 -151
pyxlpr/ppocr/utils/dict/ug_dict.txt +0 -114
pyxlpr/ppocr/utils/dict/uk_dict.txt +0 -142
pyxlpr/ppocr/utils/dict/ur_dict.txt +0 -137
pyxlpr/ppocr/utils/dict/xi_dict.txt +0 -110
pyxlpr/ppocr/utils/dict90.txt +0 -90
pyxlpr/ppocr/utils/e2e_metric/Deteval.py +0 -574
pyxlpr/ppocr/utils/e2e_metric/polygon_fast.py +0 -83
pyxlpr/ppocr/utils/e2e_utils/extract_batchsize.py +0 -87
pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_fast.py +0 -457
pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_slow.py +0 -592
pyxlpr/ppocr/utils/e2e_utils/pgnet_pp_utils.py +0 -162
pyxlpr/ppocr/utils/e2e_utils/visual.py +0 -162
pyxlpr/ppocr/utils/en_dict.txt +0 -95
pyxlpr/ppocr/utils/gen_label.py +0 -81
pyxlpr/ppocr/utils/ic15_dict.txt +0 -36
pyxlpr/ppocr/utils/iou.py +0 -54
pyxlpr/ppocr/utils/logging.py +0 -69
pyxlpr/ppocr/utils/network.py +0 -84
pyxlpr/ppocr/utils/ppocr_keys_v1.txt +0 -6623
pyxlpr/ppocr/utils/profiler.py +0 -110
pyxlpr/ppocr/utils/save_load.py +0 -150
pyxlpr/ppocr/utils/stats.py +0 -72
pyxlpr/ppocr/utils/utility.py +0 -80
pyxlpr/ppstructure/__init__.py +0 -13
pyxlpr/ppstructure/predict_system.py +0 -187
pyxlpr/ppstructure/table/__init__.py +0 -13
pyxlpr/ppstructure/table/eval_table.py +0 -72
pyxlpr/ppstructure/table/matcher.py +0 -192
pyxlpr/ppstructure/table/predict_structure.py +0 -136
pyxlpr/ppstructure/table/predict_table.py +0 -221
pyxlpr/ppstructure/table/table_metric/__init__.py +0 -16
pyxlpr/ppstructure/table/table_metric/parallel.py +0 -51
pyxlpr/ppstructure/table/table_metric/table_metric.py +0 -247
pyxlpr/ppstructure/table/tablepyxl/__init__.py +0 -13
pyxlpr/ppstructure/table/tablepyxl/style.py +0 -283
pyxlpr/ppstructure/table/tablepyxl/tablepyxl.py +0 -118
pyxlpr/ppstructure/utility.py +0 -71
pyxlpr/xlai.py +0 -10
/pyxllib/{ext/autogui → autogui}/virtualkey.py +0 -0
{pyxllib-0.3.96.dist-info → pyxllib-0.3.197.dist-info/licenses}/LICENSE +0 -0

pyxlpr/ppstructure/table/table_metric/table_metric.py DELETED Viewed

@@ -1,247 +0,0 @@
-# Copyright 2020 IBM
-# Author: peter.zhong@au1.ibm.com
-#
-# This is free software; you can redistribute it and/or modify
-# it under the terms of the Apache 2.0 License.
-#
-# This software is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# Apache 2.0 License for more details.
-import distance
-from apted import APTED, Config
-from apted.helpers import Tree
-from lxml import etree, html
-from collections import deque
-from .parallel import parallel_process
-from tqdm import tqdm
-class TableTree(Tree):
-    def __init__(self, tag, colspan=None, rowspan=None, content=None, *children):
-        self.tag = tag
-        self.colspan = colspan
-        self.rowspan = rowspan
-        self.content = content
-        self.children = list(children)
-    def bracket(self):
-        """Show tree using brackets notation"""
-        if self.tag == 'td':
-            result = '"tag": %s, "colspan": %d, "rowspan": %d, "text": %s' % \
-                     (self.tag, self.colspan, self.rowspan, self.content)
-        else:
-            result = '"tag": %s' % self.tag
-        for child in self.children:
-            result += child.bracket()
-        return "{{{}}}".format(result)
-class CustomConfig(Config):
-    @staticmethod
-    def maximum(*sequences):
-        """Get maximum possible value
-        """
-        return max(map(len, sequences))
-    def normalized_distance(self, *sequences):
-        """Get distance from 0 to 1
-        """
-        return float(distance.levenshtein(*sequences)) / self.maximum(*sequences)
-    def rename(self, node1, node2):
-        """Compares attributes of trees"""
-        #print(node1.tag)
-        if (node1.tag != node2.tag) or (node1.colspan != node2.colspan) or (node1.rowspan != node2.rowspan):
-            return 1.
-        if node1.tag == 'td':
-            if node1.content or node2.content:
-                #print(node1.content, )
-                return self.normalized_distance(node1.content, node2.content)
-        return 0.
-class CustomConfig_del_short(Config):
-    @staticmethod
-    def maximum(*sequences):
-        """Get maximum possible value
-        """
-        return max(map(len, sequences))
-    def normalized_distance(self, *sequences):
-        """Get distance from 0 to 1
-        """
-        return float(distance.levenshtein(*sequences)) / self.maximum(*sequences)
-    def rename(self, node1, node2):
-        """Compares attributes of trees"""
-        if (node1.tag != node2.tag) or (node1.colspan != node2.colspan) or (node1.rowspan != node2.rowspan):
-            return 1.
-        if node1.tag == 'td':
-            if node1.content or node2.content:
-                #print('before')
-                #print(node1.content, node2.content)
-                #print('after')
-                node1_content = node1.content
-                node2_content = node2.content
-                if len(node1_content) < 3:
-                    node1_content = ['####']
-                if len(node2_content) < 3:
-                    node2_content = ['####']
-                return self.normalized_distance(node1_content, node2_content)
-        return 0.
-class CustomConfig_del_block(Config):
-    @staticmethod
-    def maximum(*sequences):
-        """Get maximum possible value
-        """
-        return max(map(len, sequences))
-    def normalized_distance(self, *sequences):
-        """Get distance from 0 to 1
-        """
-        return float(distance.levenshtein(*sequences)) / self.maximum(*sequences)
-    def rename(self, node1, node2):
-        """Compares attributes of trees"""
-        if (node1.tag != node2.tag) or (node1.colspan != node2.colspan) or (node1.rowspan != node2.rowspan):
-            return 1.
-        if node1.tag == 'td':
-            if node1.content or node2.content:
-                node1_content = node1.content
-                node2_content = node2.content
-                while ' '  in node1_content:
-                    print(node1_content.index(' '))
-                    node1_content.pop(node1_content.index(' '))
-                while ' ' in node2_content:
-                    print(node2_content.index(' '))
-                    node2_content.pop(node2_content.index(' '))
-                return self.normalized_distance(node1_content, node2_content)
-        return 0.
-class TEDS(object):
-    ''' Tree Edit Distance basead Similarity
-    '''
-    def __init__(self, structure_only=False, n_jobs=1, ignore_nodes=None):
-        assert isinstance(n_jobs, int) and (
-            n_jobs >= 1), 'n_jobs must be an integer greather than 1'
-        self.structure_only = structure_only
-        self.n_jobs = n_jobs
-        self.ignore_nodes = ignore_nodes
-        self.__tokens__ = []
-    def tokenize(self, node):
-        ''' Tokenizes table cells
-        '''
-        self.__tokens__.append('<%s>' % node.tag)
-        if node.text is not None:
-            self.__tokens__ += list(node.text)
-        for n in node.getchildren():
-            self.tokenize(n)
-        if node.tag != 'unk':
-            self.__tokens__.append('</%s>' % node.tag)
-        if node.tag != 'td' and node.tail is not None:
-            self.__tokens__ += list(node.tail)
-    def load_html_tree(self, node, parent=None):
-        ''' Converts HTML tree to the format required by apted
-        '''
-        global __tokens__
-        if node.tag == 'td':
-            if self.structure_only:
-                cell = []
-            else:
-                self.__tokens__ = []
-                self.tokenize(node)
-                cell = self.__tokens__[1:-1].copy()
-            new_node = TableTree(node.tag,
-                                 int(node.attrib.get('colspan', '1')),
-                                 int(node.attrib.get('rowspan', '1')),
-                                 cell, *deque())
-        else:
-            new_node = TableTree(node.tag, None, None, None, *deque())
-        if parent is not None:
-            parent.children.append(new_node)
-        if node.tag != 'td':
-            for n in node.getchildren():
-                self.load_html_tree(n, new_node)
-        if parent is None:
-            return new_node
-    def evaluate(self, pred, true):
-        ''' Computes TEDS score between the prediction and the ground truth of a
-            given sample
-        '''
-        if (not pred) or (not true):
-            return 0.0
-        parser = html.HTMLParser(remove_comments=True, encoding='utf-8')
-        pred = html.fromstring(pred, parser=parser)
-        true = html.fromstring(true, parser=parser)
-        if pred.xpath('body/table') and true.xpath('body/table'):
-            pred = pred.xpath('body/table')[0]
-            true = true.xpath('body/table')[0]
-            if self.ignore_nodes:
-                etree.strip_tags(pred, *self.ignore_nodes)
-                etree.strip_tags(true, *self.ignore_nodes)
-            n_nodes_pred = len(pred.xpath(".//*"))
-            n_nodes_true = len(true.xpath(".//*"))
-            n_nodes = max(n_nodes_pred, n_nodes_true)
-            tree_pred = self.load_html_tree(pred)
-            tree_true = self.load_html_tree(true)
-            distance = APTED(tree_pred, tree_true,
-                             CustomConfig()).compute_edit_distance()
-            return 1.0 - (float(distance) / n_nodes)
-        else:
-            return 0.0
-    def batch_evaluate(self, pred_json, true_json):
-        ''' Computes TEDS score between the prediction and the ground truth of
-            a batch of samples
-            @params pred_json: {'FILENAME': 'HTML CODE', ...}
-            @params true_json: {'FILENAME': {'html': 'HTML CODE'}, ...}
-            @output: {'FILENAME': 'TEDS SCORE', ...}
-        '''
-        samples = true_json.keys()
-        if self.n_jobs == 1:
-            scores = [self.evaluate(pred_json.get(
-                filename, ''), true_json[filename]['html']) for filename in tqdm(samples)]
-        else:
-            inputs = [{'pred': pred_json.get(
-                filename, ''), 'true': true_json[filename]['html']} for filename in samples]
-            scores = parallel_process(
-                inputs, self.evaluate, use_kwargs=True, n_jobs=self.n_jobs, front_num=1)
-        scores = dict(zip(samples, scores))
-        return scores
-    def batch_evaluate_html(self, pred_htmls, true_htmls):
-        ''' Computes TEDS score between the prediction and the ground truth of
-            a batch of samples
-        '''
-        if self.n_jobs == 1:
-            scores = [self.evaluate(pred_html, true_html) for (
-                pred_html, true_html) in zip(pred_htmls, true_htmls)]
-        else:
-            inputs = [{"pred": pred_html, "true": true_html} for(
-                pred_html, true_html) in zip(pred_htmls, true_htmls)]
-            scores = parallel_process(
-                inputs, self.evaluate, use_kwargs=True, n_jobs=self.n_jobs, front_num=1)
-        return scores
-if __name__ == '__main__':
-    import json
-    import pprint
-    with open('sample_pred.json') as fp:
-        pred_json = json.load(fp)
-    with open('sample_gt.json') as fp:
-        true_json = json.load(fp)
-    teds = TEDS(n_jobs=4)
-    scores = teds.batch_evaluate(pred_json, true_json)
-    pp = pprint.PrettyPrinter()
-    pp.pprint(scores)

pyxlpr/ppstructure/table/tablepyxl/__init__.py DELETED Viewed

@@ -1,13 +0,0 @@
-# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.

pyxlpr/ppstructure/table/tablepyxl/style.py DELETED Viewed

@@ -1,283 +0,0 @@
-# This is where we handle translating css styles into openpyxl styles
-# and cascading those from parent to child in the dom.
-from openpyxl.cell import cell
-from openpyxl.styles import Font, Alignment, PatternFill, NamedStyle, Border, Side, Color
-from openpyxl.styles.fills import FILL_SOLID
-from openpyxl.styles.numbers import FORMAT_CURRENCY_USD_SIMPLE, FORMAT_PERCENTAGE
-from openpyxl.styles.colors import BLACK
-FORMAT_DATE_MMDDYYYY = 'mm/dd/yyyy'
-def colormap(color):
-    """
-    Convenience for looking up known colors
-    """
-    cmap = {'black': BLACK}
-    return cmap.get(color, color)
-def style_string_to_dict(style):
-    """
-    Convert css style string to a python dictionary
-    """
-    def clean_split(string, delim):
-        return (s.strip() for s in string.split(delim))
-    styles = [clean_split(s, ":") for s in style.split(";") if ":" in s]
-    return dict(styles)
-def get_side(style, name):
-    return {'border_style': style.get('border-{}-style'.format(name)),
-            'color': colormap(style.get('border-{}-color'.format(name)))}
-known_styles = {}
-def style_dict_to_named_style(style_dict, number_format=None):
-    """
-    Change css style (stored in a python dictionary) to openpyxl NamedStyle
-    """
-    style_and_format_string = str({
-        'style_dict': style_dict,
-        'parent': style_dict.parent,
-        'number_format': number_format,
-    })
-    if style_and_format_string not in known_styles:
-        # Font
-        font = Font(bold=style_dict.get('font-weight') == 'bold',
-                    color=style_dict.get_color('color', None),
-                    size=style_dict.get('font-size'))
-        # Alignment
-        alignment = Alignment(horizontal=style_dict.get('text-align', 'general'),
-                              vertical=style_dict.get('vertical-align'),
-                              wrap_text=style_dict.get('white-space', 'nowrap') == 'normal')
-        # Fill
-        bg_color = style_dict.get_color('background-color')
-        fg_color = style_dict.get_color('foreground-color', Color())
-        fill_type = style_dict.get('fill-type')
-        if bg_color and bg_color != 'transparent':
-            fill = PatternFill(fill_type=fill_type or FILL_SOLID,
-                               start_color=bg_color,
-                               end_color=fg_color)
-        else:
-            fill = PatternFill()
-        # Border
-        border = Border(left=Side(**get_side(style_dict, 'left')),
-                        right=Side(**get_side(style_dict, 'right')),
-                        top=Side(**get_side(style_dict, 'top')),
-                        bottom=Side(**get_side(style_dict, 'bottom')),
-                        diagonal=Side(**get_side(style_dict, 'diagonal')),
-                        diagonal_direction=None,
-                        outline=Side(**get_side(style_dict, 'outline')),
-                        vertical=None,
-                        horizontal=None)
-        name = 'Style {}'.format(len(known_styles) + 1)
-        pyxl_style = NamedStyle(name=name, font=font, fill=fill, alignment=alignment, border=border,
-                                number_format=number_format)
-        known_styles[style_and_format_string] = pyxl_style
-    return known_styles[style_and_format_string]
-class StyleDict(dict):
-    """
-    It's like a dictionary, but it looks for items in the parent dictionary
-    """
-    def __init__(self, *args, **kwargs):
-        self.parent = kwargs.pop('parent', None)
-        super(StyleDict, self).__init__(*args, **kwargs)
-    def __getitem__(self, item):
-        if item in self:
-            return super(StyleDict, self).__getitem__(item)
-        elif self.parent:
-            return self.parent[item]
-        else:
-            raise KeyError('{} not found'.format(item))
-    def __hash__(self):
-        return hash(tuple([(k, self.get(k)) for k in self._keys()]))
-    # Yielding the keys avoids creating unnecessary data structures
-    # and happily works with both python2 and python3 where the
-    # .keys() method is a dictionary_view in python3 and a list in python2.
-    def _keys(self):
-        yielded = set()
-        for k in self.keys():
-            yielded.add(k)
-            yield k
-        if self.parent:
-            for k in self.parent._keys():
-                if k not in yielded:
-                    yielded.add(k)
-                    yield k
-    def get(self, k, d=None):
-        try:
-            return self[k]
-        except KeyError:
-            return d
-    def get_color(self, k, d=None):
-        """
-        Strip leading # off colors if necessary
-        """
-        color = self.get(k, d)
-        if hasattr(color, 'startswith') and color.startswith('#'):
-            color = color[1:]
-            if len(color) == 3:  # Premailers reduces colors like #00ff00 to #0f0, openpyxl doesn't like that
-                color = ''.join(2 * c for c in color)
-        return color
-class Element(object):
-    """
-    Our base class for representing an html element along with a cascading style.
-    The element is created along with a parent so that the StyleDict that we store
-    can point to the parent's StyleDict.
-    """
-    def __init__(self, element, parent=None):
-        self.element = element
-        self.number_format = None
-        parent_style = parent.style_dict if parent else None
-        self.style_dict = StyleDict(style_string_to_dict(element.get('style', '')), parent=parent_style)
-        self._style_cache = None
-    def style(self):
-        """
-        Turn the css styles for this element into an openpyxl NamedStyle.
-        """
-        if not self._style_cache:
-            self._style_cache = style_dict_to_named_style(self.style_dict, number_format=self.number_format)
-        return self._style_cache
-    def get_dimension(self, dimension_key):
-        """
-        Extracts the dimension from the style dict of the Element and returns it as a float.
-        """
-        dimension = self.style_dict.get(dimension_key)
-        if dimension:
-            if dimension[-2:] in ['px', 'em', 'pt', 'in', 'cm']:
-                dimension = dimension[:-2]
-            dimension = float(dimension)
-        return dimension
-class Table(Element):
-    """
-    The concrete implementations of Elements are semantically named for the types of elements we are interested in.
-    This defines a very concrete tree structure for html tables that we expect to deal with. I prefer this compared to
-    allowing Element to have an arbitrary number of children and dealing with an abstract element tree.
-    """
-    def __init__(self, table):
-        """
-        takes an html table object (from lxml)
-        """
-        super(Table, self).__init__(table)
-        table_head = table.find('thead')
-        self.head = TableHead(table_head, parent=self) if table_head is not None else None
-        table_body = table.find('tbody')
-        self.body = TableBody(table_body if table_body is not None else table, parent=self)
-class TableHead(Element):
-    """
-    This class maps to the `<th>` element of the html table.
-    """
-    def __init__(self, head, parent=None):
-        super(TableHead, self).__init__(head, parent=parent)
-        self.rows = [TableRow(tr, parent=self) for tr in head.findall('tr')]
-class TableBody(Element):
-    """
-    This class maps to the `<tbody>` element of the html table.
-    """
-    def __init__(self, body, parent=None):
-        super(TableBody, self).__init__(body, parent=parent)
-        self.rows = [TableRow(tr, parent=self) for tr in body.findall('tr')]
-class TableRow(Element):
-    """
-    This class maps to the `<tr>` element of the html table.
-    """
-    def __init__(self, tr, parent=None):
-        super(TableRow, self).__init__(tr, parent=parent)
-        self.cells = [TableCell(cell, parent=self) for cell in tr.findall('th') + tr.findall('td')]
-def element_to_string(el):
-    return _element_to_string(el).strip()
-def _element_to_string(el):
-    string = ''
-    for x in el.iterchildren():
-        string += '\n' + _element_to_string(x)
-    text = el.text.strip() if el.text else ''
-    tail = el.tail.strip() if el.tail else ''
-    return text + string + '\n' + tail
-class TableCell(Element):
-    """
-    This class maps to the `<td>` element of the html table.
-    """
-    CELL_TYPES = {'TYPE_STRING', 'TYPE_FORMULA', 'TYPE_NUMERIC', 'TYPE_BOOL', 'TYPE_CURRENCY', 'TYPE_PERCENTAGE',
-                  'TYPE_NULL', 'TYPE_INLINE', 'TYPE_ERROR', 'TYPE_FORMULA_CACHE_STRING', 'TYPE_INTEGER'}
-    def __init__(self, cell, parent=None):
-        super(TableCell, self).__init__(cell, parent=parent)
-        self.value = element_to_string(cell)
-        self.number_format = self.get_number_format()
-    def data_type(self):
-        cell_types = self.CELL_TYPES & set(self.element.get('class', '').split())
-        if cell_types:
-            if 'TYPE_FORMULA' in cell_types:
-                # Make sure TYPE_FORMULA takes precedence over the other classes in the set.
-                cell_type = 'TYPE_FORMULA'
-            elif cell_types & {'TYPE_CURRENCY', 'TYPE_INTEGER', 'TYPE_PERCENTAGE'}:
-                cell_type = 'TYPE_NUMERIC'
-            else:
-                cell_type = cell_types.pop()
-        else:
-            cell_type = 'TYPE_STRING'
-        return getattr(cell, cell_type)
-    def get_number_format(self):
-        if 'TYPE_CURRENCY' in self.element.get('class', '').split():
-            return FORMAT_CURRENCY_USD_SIMPLE
-        if 'TYPE_INTEGER' in self.element.get('class', '').split():
-            return '#,##0'
-        if 'TYPE_PERCENTAGE' in self.element.get('class', '').split():
-            return FORMAT_PERCENTAGE
-        if 'TYPE_DATE' in self.element.get('class', '').split():
-            return FORMAT_DATE_MMDDYYYY
-        if self.data_type() == cell.TYPE_NUMERIC:
-            try:
-                int(self.value)
-            except ValueError:
-                return '#,##0.##'
-            else:
-                return '#,##0'
-    def format(self, cell):
-        cell.style = self.style()
-        data_type = self.data_type()
-        if data_type:
-            cell.data_type = data_type

pyxlpr/ppstructure/table/tablepyxl/tablepyxl.py DELETED Viewed

@@ -1,118 +0,0 @@
-# Do imports like python3 so our package works for 2 and 3
-from __future__ import absolute_import
-from lxml import html
-from openpyxl import Workbook
-from openpyxl.utils import get_column_letter
-from premailer import Premailer
-from tablepyxl.style import Table
-def string_to_int(s):
-    if s.isdigit():
-        return int(s)
-    return 0
-def get_Tables(doc):
-    tree = html.fromstring(doc)
-    comments = tree.xpath('//comment()')
-    for comment in comments:
-        comment.drop_tag()
-    return [Table(table) for table in tree.xpath('//table')]
-def write_rows(worksheet, elem, row, column=1):
-    """
-    Writes every tr child element of elem to a row in the worksheet
-    returns the next row after all rows are written
-    """
-    from openpyxl.cell.cell import MergedCell
-    initial_column = column
-    for table_row in elem.rows:
-        for table_cell in table_row.cells:
-            cell = worksheet.cell(row=row, column=column)
-            while isinstance(cell, MergedCell):
-                column += 1
-                cell = worksheet.cell(row=row, column=column)
-            colspan = string_to_int(table_cell.element.get("colspan", "1"))
-            rowspan = string_to_int(table_cell.element.get("rowspan", "1"))
-            if rowspan > 1 or colspan > 1:
-                worksheet.merge_cells(start_row=row, start_column=column,
-                                      end_row=row + rowspan - 1, end_column=column + colspan - 1)
-            cell.value = table_cell.value
-            table_cell.format(cell)
-            min_width = table_cell.get_dimension('min-width')
-            max_width = table_cell.get_dimension('max-width')
-            if colspan == 1:
-                # Initially, when iterating for the first time through the loop, the width of all the cells is None.
-                # As we start filling in contents, the initial width of the cell (which can be retrieved by:
-                # worksheet.column_dimensions[get_column_letter(column)].width) is equal to the width of the previous
-                # cell in the same column (i.e. width of A2 = width of A1)
-                width = max(worksheet.column_dimensions[get_column_letter(column)].width or 0, len(table_cell.value) + 2)
-                if max_width and width > max_width:
-                    width = max_width
-                elif min_width and width < min_width:
-                    width = min_width
-                worksheet.column_dimensions[get_column_letter(column)].width = width
-            column += colspan
-        row += 1
-        column = initial_column
-    return row
-def table_to_sheet(table, wb):
-    """
-    Takes a table and workbook and writes the table to a new sheet.
-    The sheet title will be the same as the table attribute name.
-    """
-    ws = wb.create_sheet(title=table.element.get('name'))
-    insert_table(table, ws, 1, 1)
-def document_to_workbook(doc, wb=None, base_url=None):
-    """
-    Takes a string representation of an html document and writes one sheet for
-    every table in the document.
-    The workbook is returned
-    """
-    if not wb:
-        wb = Workbook()
-        wb.remove(wb.active)
-    inline_styles_doc = Premailer(doc, base_url=base_url, remove_classes=False).transform()
-    tables = get_Tables(inline_styles_doc)
-    for table in tables:
-        table_to_sheet(table, wb)
-    return wb
-def document_to_xl(doc, filename, base_url=None):
-    """
-    Takes a string representation of an html document and writes one sheet for
-    every table in the document. The workbook is written out to a file called filename
-    """
-    wb = document_to_workbook(doc, base_url=base_url)
-    wb.save(filename)
-def insert_table(table, worksheet, column, row):
-    if table.head:
-        row = write_rows(worksheet, table.head, row, column)
-    if table.body:
-        row = write_rows(worksheet, table.body, row, column)
-def insert_table_at_cell(table, cell):
-    """
-    Inserts a table at the location of an openpyxl Cell object.
-    """
-    ws = cell.parent
-    column, row = cell.column, cell.row
-    insert_table(table, ws, column, row)

pyxllib 0.3.96__py3-none-any.whl → 0.3.197__py3-none-any.whl

pyxllib 0.3.96py3-none-any.whl → 0.3.197py3-none-any.whl