pyxllib 0.3.96__py3-none-any.whl → 0.3.197__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyxllib/algo/geo.py +12 -0
- pyxllib/algo/intervals.py +1 -1
- pyxllib/algo/matcher.py +78 -0
- pyxllib/algo/pupil.py +187 -19
- pyxllib/algo/specialist.py +2 -1
- pyxllib/algo/stat.py +38 -2
- {pyxlpr → pyxllib/autogui}/__init__.py +1 -1
- pyxllib/autogui/activewin.py +246 -0
- pyxllib/autogui/all.py +9 -0
- pyxllib/{ext/autogui → autogui}/autogui.py +40 -11
- pyxllib/autogui/uiautolib.py +362 -0
- pyxllib/autogui/wechat.py +827 -0
- pyxllib/autogui/wechat_msg.py +421 -0
- pyxllib/autogui/wxautolib.py +84 -0
- pyxllib/cv/slidercaptcha.py +137 -0
- pyxllib/data/echarts.py +123 -12
- pyxllib/data/jsonlib.py +89 -0
- pyxllib/data/pglib.py +514 -30
- pyxllib/data/sqlite.py +231 -4
- pyxllib/ext/JLineViewer.py +14 -1
- pyxllib/ext/drissionlib.py +277 -0
- pyxllib/ext/kq5034lib.py +0 -1594
- pyxllib/ext/robustprocfile.py +497 -0
- pyxllib/ext/unixlib.py +6 -5
- pyxllib/ext/utools.py +108 -95
- pyxllib/ext/webhook.py +32 -14
- pyxllib/ext/wjxlib.py +88 -0
- pyxllib/ext/wpsapi.py +124 -0
- pyxllib/ext/xlwork.py +9 -0
- pyxllib/ext/yuquelib.py +1003 -71
- pyxllib/file/docxlib.py +1 -1
- pyxllib/file/libreoffice.py +165 -0
- pyxllib/file/movielib.py +9 -0
- pyxllib/file/packlib/__init__.py +112 -75
- pyxllib/file/pdflib.py +1 -1
- pyxllib/file/pupil.py +1 -1
- pyxllib/file/specialist/dirlib.py +1 -1
- pyxllib/file/specialist/download.py +10 -3
- pyxllib/file/specialist/filelib.py +266 -55
- pyxllib/file/xlsxlib.py +205 -50
- pyxllib/file/xlsyncfile.py +341 -0
- pyxllib/prog/cachetools.py +64 -0
- pyxllib/prog/filelock.py +42 -0
- pyxllib/prog/multiprogs.py +940 -0
- pyxllib/prog/newbie.py +9 -2
- pyxllib/prog/pupil.py +129 -60
- pyxllib/prog/specialist/__init__.py +176 -2
- pyxllib/prog/specialist/bc.py +5 -2
- pyxllib/prog/specialist/browser.py +11 -2
- pyxllib/prog/specialist/datetime.py +68 -0
- pyxllib/prog/specialist/tictoc.py +12 -13
- pyxllib/prog/specialist/xllog.py +5 -5
- pyxllib/prog/xlosenv.py +7 -0
- pyxllib/text/airscript.js +744 -0
- pyxllib/text/charclasslib.py +17 -5
- pyxllib/text/jiebalib.py +6 -3
- pyxllib/text/jinjalib.py +32 -0
- pyxllib/text/jsa_ai_prompt.md +271 -0
- pyxllib/text/jscode.py +159 -4
- pyxllib/text/nestenv.py +1 -1
- pyxllib/text/newbie.py +12 -0
- pyxllib/text/pupil/common.py +26 -0
- pyxllib/text/specialist/ptag.py +2 -2
- pyxllib/text/templates/echart_base.html +11 -0
- pyxllib/text/templates/highlight_code.html +17 -0
- pyxllib/text/templates/latex_editor.html +103 -0
- pyxllib/text/xmllib.py +76 -14
- pyxllib/xl.py +2 -1
- pyxllib-0.3.197.dist-info/METADATA +48 -0
- pyxllib-0.3.197.dist-info/RECORD +126 -0
- {pyxllib-0.3.96.dist-info → pyxllib-0.3.197.dist-info}/WHEEL +1 -2
- pyxllib/ext/autogui/__init__.py +0 -8
- pyxllib-0.3.96.dist-info/METADATA +0 -51
- pyxllib-0.3.96.dist-info/RECORD +0 -333
- pyxllib-0.3.96.dist-info/top_level.txt +0 -2
- pyxlpr/ai/__init__.py +0 -5
- pyxlpr/ai/clientlib.py +0 -1281
- pyxlpr/ai/specialist.py +0 -286
- pyxlpr/ai/torch_app.py +0 -172
- pyxlpr/ai/xlpaddle.py +0 -655
- pyxlpr/ai/xltorch.py +0 -705
- pyxlpr/data/__init__.py +0 -11
- pyxlpr/data/coco.py +0 -1325
- pyxlpr/data/datacls.py +0 -365
- pyxlpr/data/datasets.py +0 -200
- pyxlpr/data/gptlib.py +0 -1291
- pyxlpr/data/icdar/__init__.py +0 -96
- pyxlpr/data/icdar/deteval.py +0 -377
- pyxlpr/data/icdar/icdar2013.py +0 -341
- pyxlpr/data/icdar/iou.py +0 -340
- pyxlpr/data/icdar/rrc_evaluation_funcs_1_1.py +0 -463
- pyxlpr/data/imtextline.py +0 -473
- pyxlpr/data/labelme.py +0 -866
- pyxlpr/data/removeline.py +0 -179
- pyxlpr/data/specialist.py +0 -57
- pyxlpr/eval/__init__.py +0 -85
- pyxlpr/paddleocr.py +0 -776
- pyxlpr/ppocr/__init__.py +0 -15
- pyxlpr/ppocr/configs/rec/multi_language/generate_multi_language_configs.py +0 -226
- pyxlpr/ppocr/data/__init__.py +0 -135
- pyxlpr/ppocr/data/imaug/ColorJitter.py +0 -26
- pyxlpr/ppocr/data/imaug/__init__.py +0 -67
- pyxlpr/ppocr/data/imaug/copy_paste.py +0 -170
- pyxlpr/ppocr/data/imaug/east_process.py +0 -437
- pyxlpr/ppocr/data/imaug/gen_table_mask.py +0 -244
- pyxlpr/ppocr/data/imaug/iaa_augment.py +0 -114
- pyxlpr/ppocr/data/imaug/label_ops.py +0 -789
- pyxlpr/ppocr/data/imaug/make_border_map.py +0 -184
- pyxlpr/ppocr/data/imaug/make_pse_gt.py +0 -106
- pyxlpr/ppocr/data/imaug/make_shrink_map.py +0 -126
- pyxlpr/ppocr/data/imaug/operators.py +0 -433
- pyxlpr/ppocr/data/imaug/pg_process.py +0 -906
- pyxlpr/ppocr/data/imaug/randaugment.py +0 -143
- pyxlpr/ppocr/data/imaug/random_crop_data.py +0 -239
- pyxlpr/ppocr/data/imaug/rec_img_aug.py +0 -533
- pyxlpr/ppocr/data/imaug/sast_process.py +0 -777
- pyxlpr/ppocr/data/imaug/text_image_aug/__init__.py +0 -17
- pyxlpr/ppocr/data/imaug/text_image_aug/augment.py +0 -120
- pyxlpr/ppocr/data/imaug/text_image_aug/warp_mls.py +0 -168
- pyxlpr/ppocr/data/lmdb_dataset.py +0 -115
- pyxlpr/ppocr/data/pgnet_dataset.py +0 -104
- pyxlpr/ppocr/data/pubtab_dataset.py +0 -107
- pyxlpr/ppocr/data/simple_dataset.py +0 -372
- pyxlpr/ppocr/losses/__init__.py +0 -61
- pyxlpr/ppocr/losses/ace_loss.py +0 -52
- pyxlpr/ppocr/losses/basic_loss.py +0 -135
- pyxlpr/ppocr/losses/center_loss.py +0 -88
- pyxlpr/ppocr/losses/cls_loss.py +0 -30
- pyxlpr/ppocr/losses/combined_loss.py +0 -67
- pyxlpr/ppocr/losses/det_basic_loss.py +0 -208
- pyxlpr/ppocr/losses/det_db_loss.py +0 -80
- pyxlpr/ppocr/losses/det_east_loss.py +0 -63
- pyxlpr/ppocr/losses/det_pse_loss.py +0 -149
- pyxlpr/ppocr/losses/det_sast_loss.py +0 -121
- pyxlpr/ppocr/losses/distillation_loss.py +0 -272
- pyxlpr/ppocr/losses/e2e_pg_loss.py +0 -140
- pyxlpr/ppocr/losses/kie_sdmgr_loss.py +0 -113
- pyxlpr/ppocr/losses/rec_aster_loss.py +0 -99
- pyxlpr/ppocr/losses/rec_att_loss.py +0 -39
- pyxlpr/ppocr/losses/rec_ctc_loss.py +0 -44
- pyxlpr/ppocr/losses/rec_enhanced_ctc_loss.py +0 -70
- pyxlpr/ppocr/losses/rec_nrtr_loss.py +0 -30
- pyxlpr/ppocr/losses/rec_sar_loss.py +0 -28
- pyxlpr/ppocr/losses/rec_srn_loss.py +0 -47
- pyxlpr/ppocr/losses/table_att_loss.py +0 -109
- pyxlpr/ppocr/metrics/__init__.py +0 -44
- pyxlpr/ppocr/metrics/cls_metric.py +0 -45
- pyxlpr/ppocr/metrics/det_metric.py +0 -82
- pyxlpr/ppocr/metrics/distillation_metric.py +0 -73
- pyxlpr/ppocr/metrics/e2e_metric.py +0 -86
- pyxlpr/ppocr/metrics/eval_det_iou.py +0 -274
- pyxlpr/ppocr/metrics/kie_metric.py +0 -70
- pyxlpr/ppocr/metrics/rec_metric.py +0 -75
- pyxlpr/ppocr/metrics/table_metric.py +0 -50
- pyxlpr/ppocr/modeling/architectures/__init__.py +0 -32
- pyxlpr/ppocr/modeling/architectures/base_model.py +0 -88
- pyxlpr/ppocr/modeling/architectures/distillation_model.py +0 -60
- pyxlpr/ppocr/modeling/backbones/__init__.py +0 -54
- pyxlpr/ppocr/modeling/backbones/det_mobilenet_v3.py +0 -268
- pyxlpr/ppocr/modeling/backbones/det_resnet_vd.py +0 -246
- pyxlpr/ppocr/modeling/backbones/det_resnet_vd_sast.py +0 -285
- pyxlpr/ppocr/modeling/backbones/e2e_resnet_vd_pg.py +0 -265
- pyxlpr/ppocr/modeling/backbones/kie_unet_sdmgr.py +0 -186
- pyxlpr/ppocr/modeling/backbones/rec_mobilenet_v3.py +0 -138
- pyxlpr/ppocr/modeling/backbones/rec_mv1_enhance.py +0 -258
- pyxlpr/ppocr/modeling/backbones/rec_nrtr_mtb.py +0 -48
- pyxlpr/ppocr/modeling/backbones/rec_resnet_31.py +0 -210
- pyxlpr/ppocr/modeling/backbones/rec_resnet_aster.py +0 -143
- pyxlpr/ppocr/modeling/backbones/rec_resnet_fpn.py +0 -307
- pyxlpr/ppocr/modeling/backbones/rec_resnet_vd.py +0 -286
- pyxlpr/ppocr/modeling/heads/__init__.py +0 -54
- pyxlpr/ppocr/modeling/heads/cls_head.py +0 -52
- pyxlpr/ppocr/modeling/heads/det_db_head.py +0 -118
- pyxlpr/ppocr/modeling/heads/det_east_head.py +0 -121
- pyxlpr/ppocr/modeling/heads/det_pse_head.py +0 -37
- pyxlpr/ppocr/modeling/heads/det_sast_head.py +0 -128
- pyxlpr/ppocr/modeling/heads/e2e_pg_head.py +0 -253
- pyxlpr/ppocr/modeling/heads/kie_sdmgr_head.py +0 -206
- pyxlpr/ppocr/modeling/heads/multiheadAttention.py +0 -163
- pyxlpr/ppocr/modeling/heads/rec_aster_head.py +0 -393
- pyxlpr/ppocr/modeling/heads/rec_att_head.py +0 -202
- pyxlpr/ppocr/modeling/heads/rec_ctc_head.py +0 -88
- pyxlpr/ppocr/modeling/heads/rec_nrtr_head.py +0 -826
- pyxlpr/ppocr/modeling/heads/rec_sar_head.py +0 -402
- pyxlpr/ppocr/modeling/heads/rec_srn_head.py +0 -280
- pyxlpr/ppocr/modeling/heads/self_attention.py +0 -406
- pyxlpr/ppocr/modeling/heads/table_att_head.py +0 -246
- pyxlpr/ppocr/modeling/necks/__init__.py +0 -32
- pyxlpr/ppocr/modeling/necks/db_fpn.py +0 -111
- pyxlpr/ppocr/modeling/necks/east_fpn.py +0 -188
- pyxlpr/ppocr/modeling/necks/fpn.py +0 -138
- pyxlpr/ppocr/modeling/necks/pg_fpn.py +0 -314
- pyxlpr/ppocr/modeling/necks/rnn.py +0 -92
- pyxlpr/ppocr/modeling/necks/sast_fpn.py +0 -284
- pyxlpr/ppocr/modeling/necks/table_fpn.py +0 -110
- pyxlpr/ppocr/modeling/transforms/__init__.py +0 -28
- pyxlpr/ppocr/modeling/transforms/stn.py +0 -135
- pyxlpr/ppocr/modeling/transforms/tps.py +0 -308
- pyxlpr/ppocr/modeling/transforms/tps_spatial_transformer.py +0 -156
- pyxlpr/ppocr/optimizer/__init__.py +0 -61
- pyxlpr/ppocr/optimizer/learning_rate.py +0 -228
- pyxlpr/ppocr/optimizer/lr_scheduler.py +0 -49
- pyxlpr/ppocr/optimizer/optimizer.py +0 -160
- pyxlpr/ppocr/optimizer/regularizer.py +0 -52
- pyxlpr/ppocr/postprocess/__init__.py +0 -55
- pyxlpr/ppocr/postprocess/cls_postprocess.py +0 -33
- pyxlpr/ppocr/postprocess/db_postprocess.py +0 -234
- pyxlpr/ppocr/postprocess/east_postprocess.py +0 -143
- pyxlpr/ppocr/postprocess/locality_aware_nms.py +0 -200
- pyxlpr/ppocr/postprocess/pg_postprocess.py +0 -52
- pyxlpr/ppocr/postprocess/pse_postprocess/__init__.py +0 -15
- pyxlpr/ppocr/postprocess/pse_postprocess/pse/__init__.py +0 -29
- pyxlpr/ppocr/postprocess/pse_postprocess/pse/setup.py +0 -14
- pyxlpr/ppocr/postprocess/pse_postprocess/pse_postprocess.py +0 -118
- pyxlpr/ppocr/postprocess/rec_postprocess.py +0 -654
- pyxlpr/ppocr/postprocess/sast_postprocess.py +0 -355
- pyxlpr/ppocr/tools/__init__.py +0 -14
- pyxlpr/ppocr/tools/eval.py +0 -83
- pyxlpr/ppocr/tools/export_center.py +0 -77
- pyxlpr/ppocr/tools/export_model.py +0 -129
- pyxlpr/ppocr/tools/infer/predict_cls.py +0 -151
- pyxlpr/ppocr/tools/infer/predict_det.py +0 -300
- pyxlpr/ppocr/tools/infer/predict_e2e.py +0 -169
- pyxlpr/ppocr/tools/infer/predict_rec.py +0 -414
- pyxlpr/ppocr/tools/infer/predict_system.py +0 -204
- pyxlpr/ppocr/tools/infer/utility.py +0 -629
- pyxlpr/ppocr/tools/infer_cls.py +0 -83
- pyxlpr/ppocr/tools/infer_det.py +0 -134
- pyxlpr/ppocr/tools/infer_e2e.py +0 -122
- pyxlpr/ppocr/tools/infer_kie.py +0 -153
- pyxlpr/ppocr/tools/infer_rec.py +0 -146
- pyxlpr/ppocr/tools/infer_table.py +0 -107
- pyxlpr/ppocr/tools/program.py +0 -596
- pyxlpr/ppocr/tools/test_hubserving.py +0 -117
- pyxlpr/ppocr/tools/train.py +0 -163
- pyxlpr/ppocr/tools/xlprog.py +0 -748
- pyxlpr/ppocr/utils/EN_symbol_dict.txt +0 -94
- pyxlpr/ppocr/utils/__init__.py +0 -24
- pyxlpr/ppocr/utils/dict/ar_dict.txt +0 -117
- pyxlpr/ppocr/utils/dict/arabic_dict.txt +0 -162
- pyxlpr/ppocr/utils/dict/be_dict.txt +0 -145
- pyxlpr/ppocr/utils/dict/bg_dict.txt +0 -140
- pyxlpr/ppocr/utils/dict/chinese_cht_dict.txt +0 -8421
- pyxlpr/ppocr/utils/dict/cyrillic_dict.txt +0 -163
- pyxlpr/ppocr/utils/dict/devanagari_dict.txt +0 -167
- pyxlpr/ppocr/utils/dict/en_dict.txt +0 -63
- pyxlpr/ppocr/utils/dict/fa_dict.txt +0 -136
- pyxlpr/ppocr/utils/dict/french_dict.txt +0 -136
- pyxlpr/ppocr/utils/dict/german_dict.txt +0 -143
- pyxlpr/ppocr/utils/dict/hi_dict.txt +0 -162
- pyxlpr/ppocr/utils/dict/it_dict.txt +0 -118
- pyxlpr/ppocr/utils/dict/japan_dict.txt +0 -4399
- pyxlpr/ppocr/utils/dict/ka_dict.txt +0 -153
- pyxlpr/ppocr/utils/dict/korean_dict.txt +0 -3688
- pyxlpr/ppocr/utils/dict/latin_dict.txt +0 -185
- pyxlpr/ppocr/utils/dict/mr_dict.txt +0 -153
- pyxlpr/ppocr/utils/dict/ne_dict.txt +0 -153
- pyxlpr/ppocr/utils/dict/oc_dict.txt +0 -96
- pyxlpr/ppocr/utils/dict/pu_dict.txt +0 -130
- pyxlpr/ppocr/utils/dict/rs_dict.txt +0 -91
- pyxlpr/ppocr/utils/dict/rsc_dict.txt +0 -134
- pyxlpr/ppocr/utils/dict/ru_dict.txt +0 -125
- pyxlpr/ppocr/utils/dict/ta_dict.txt +0 -128
- pyxlpr/ppocr/utils/dict/table_dict.txt +0 -277
- pyxlpr/ppocr/utils/dict/table_structure_dict.txt +0 -2759
- pyxlpr/ppocr/utils/dict/te_dict.txt +0 -151
- pyxlpr/ppocr/utils/dict/ug_dict.txt +0 -114
- pyxlpr/ppocr/utils/dict/uk_dict.txt +0 -142
- pyxlpr/ppocr/utils/dict/ur_dict.txt +0 -137
- pyxlpr/ppocr/utils/dict/xi_dict.txt +0 -110
- pyxlpr/ppocr/utils/dict90.txt +0 -90
- pyxlpr/ppocr/utils/e2e_metric/Deteval.py +0 -574
- pyxlpr/ppocr/utils/e2e_metric/polygon_fast.py +0 -83
- pyxlpr/ppocr/utils/e2e_utils/extract_batchsize.py +0 -87
- pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_fast.py +0 -457
- pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_slow.py +0 -592
- pyxlpr/ppocr/utils/e2e_utils/pgnet_pp_utils.py +0 -162
- pyxlpr/ppocr/utils/e2e_utils/visual.py +0 -162
- pyxlpr/ppocr/utils/en_dict.txt +0 -95
- pyxlpr/ppocr/utils/gen_label.py +0 -81
- pyxlpr/ppocr/utils/ic15_dict.txt +0 -36
- pyxlpr/ppocr/utils/iou.py +0 -54
- pyxlpr/ppocr/utils/logging.py +0 -69
- pyxlpr/ppocr/utils/network.py +0 -84
- pyxlpr/ppocr/utils/ppocr_keys_v1.txt +0 -6623
- pyxlpr/ppocr/utils/profiler.py +0 -110
- pyxlpr/ppocr/utils/save_load.py +0 -150
- pyxlpr/ppocr/utils/stats.py +0 -72
- pyxlpr/ppocr/utils/utility.py +0 -80
- pyxlpr/ppstructure/__init__.py +0 -13
- pyxlpr/ppstructure/predict_system.py +0 -187
- pyxlpr/ppstructure/table/__init__.py +0 -13
- pyxlpr/ppstructure/table/eval_table.py +0 -72
- pyxlpr/ppstructure/table/matcher.py +0 -192
- pyxlpr/ppstructure/table/predict_structure.py +0 -136
- pyxlpr/ppstructure/table/predict_table.py +0 -221
- pyxlpr/ppstructure/table/table_metric/__init__.py +0 -16
- pyxlpr/ppstructure/table/table_metric/parallel.py +0 -51
- pyxlpr/ppstructure/table/table_metric/table_metric.py +0 -247
- pyxlpr/ppstructure/table/tablepyxl/__init__.py +0 -13
- pyxlpr/ppstructure/table/tablepyxl/style.py +0 -283
- pyxlpr/ppstructure/table/tablepyxl/tablepyxl.py +0 -118
- pyxlpr/ppstructure/utility.py +0 -71
- pyxlpr/xlai.py +0 -10
- /pyxllib/{ext/autogui → autogui}/virtualkey.py +0 -0
- {pyxllib-0.3.96.dist-info → pyxllib-0.3.197.dist-info/licenses}/LICENSE +0 -0
pyxllib/data/pglib.py
CHANGED
@@ -34,24 +34,46 @@ import json
|
|
34
34
|
import json
|
35
35
|
import textwrap
|
36
36
|
import datetime
|
37
|
+
import re
|
37
38
|
|
38
39
|
from tqdm import tqdm
|
39
40
|
|
40
41
|
import psycopg
|
41
42
|
import psycopg.rows
|
42
43
|
|
43
|
-
from pyxllib.prog.newbie import round_int
|
44
|
-
from pyxllib.prog.pupil import utc_now, utc_timestamp
|
44
|
+
from pyxllib.prog.newbie import round_int, human_readable_number
|
45
|
+
from pyxllib.prog.pupil import utc_now, utc_timestamp, is_valid_identifier
|
45
46
|
from pyxllib.prog.specialist import XlOsEnv
|
46
|
-
from pyxllib.
|
47
|
-
from pyxllib.
|
47
|
+
from pyxllib.algo.pupil import ValuesStat2
|
48
|
+
from pyxllib.file.specialist import get_etag, StreamJsonlWriter
|
49
|
+
from pyxllib.data.sqlite import SqlBase, SqlBuilder
|
48
50
|
|
49
51
|
|
50
52
|
class Connection(psycopg.Connection, SqlBase):
|
51
53
|
|
54
|
+
def __init__(self, *args, **kwargs):
|
55
|
+
psycopg.Connection.__init__(self, *args, **kwargs)
|
56
|
+
SqlBase.__init__(self, *args, **kwargs)
|
57
|
+
|
58
|
+
def __del__(self):
|
59
|
+
self.close()
|
60
|
+
|
52
61
|
def __1_库(self):
|
53
62
|
pass
|
54
63
|
|
64
|
+
def get_db_activities(self, datname=None):
|
65
|
+
"""
|
66
|
+
检索当前数据库的活动信息。
|
67
|
+
|
68
|
+
:param datname: 这个字段理论上应该要能自动检测出来才对,但这会急着没空钻研,先手动输入吧
|
69
|
+
"""
|
70
|
+
sql = SqlBuilder('pg_stat_activity')
|
71
|
+
sql.select('pid', 'datname', 'usename', 'state', 'query', 'age(now(), query_start) AS "query_age"')
|
72
|
+
sql.where("state = 'active'")
|
73
|
+
if datname:
|
74
|
+
sql.where(f"datname = '{datname}'")
|
75
|
+
return self.exec2dict(sql.build_select()).fetchall()
|
76
|
+
|
55
77
|
def __2_表格(self):
|
56
78
|
pass
|
57
79
|
|
@@ -84,6 +106,49 @@ class Connection(psycopg.Connection, SqlBase):
|
|
84
106
|
self.execute(f"COMMENT ON COLUMN {table_name}.{col_name} IS $zyzf${comment}$zyzf$")
|
85
107
|
self.commit()
|
86
108
|
|
109
|
+
def reset_table_item_id(self, table_name, item_id_name=None, counter_name=None):
|
110
|
+
""" 重置表格的数据的id值,也重置计数器
|
111
|
+
|
112
|
+
:param item_id_name: 表格的自增id字段名,有一套我自己风格的自动推算算法
|
113
|
+
:param counter_name: 计数器字段名,如果有的话,也会重置
|
114
|
+
"""
|
115
|
+
# 1 重置数据的编号
|
116
|
+
if item_id_name is None:
|
117
|
+
m = re.match(r'(.+?)(_table)?$', table_name)
|
118
|
+
item_id_name = m.group(1) + '_id'
|
119
|
+
|
120
|
+
sql = f"""WITH cte AS (
|
121
|
+
SELECT {item_id_name}, ROW_NUMBER() OVER (ORDER BY {item_id_name}) AS new_{item_id_name}
|
122
|
+
FROM {table_name}
|
123
|
+
)
|
124
|
+
UPDATE {table_name}
|
125
|
+
SET {item_id_name} = cte.new_{item_id_name}
|
126
|
+
FROM cte
|
127
|
+
WHERE {table_name}.{item_id_name} = cte.{item_id_name}"""
|
128
|
+
self.execute(sql) # todo 这种sql写法好像偶尔有bug会出问题
|
129
|
+
self.commit()
|
130
|
+
|
131
|
+
# 2 重置计数器
|
132
|
+
if counter_name is None:
|
133
|
+
counter_name = f'{table_name}_{item_id_name}_seq'
|
134
|
+
|
135
|
+
# 找到目前最大的id值
|
136
|
+
max_id = self.exec2one(f'SELECT MAX({item_id_name}) FROM {table_name}')
|
137
|
+
# self.execute(f'ALTER SEQUENCE {counter_name} RESTART WITH {max_id + 1}')
|
138
|
+
# 检查序列是否存在,如果不存在则创建序列,然后重置序列
|
139
|
+
# 检查序列是否存在,如果不存在则创建序列,并将其关联到指定的表和字段
|
140
|
+
self.execute(f"""
|
141
|
+
DO $$
|
142
|
+
BEGIN
|
143
|
+
IF NOT EXISTS (SELECT FROM pg_class WHERE relkind = 'S' AND relname = '{counter_name}') THEN
|
144
|
+
EXECUTE format('CREATE SEQUENCE %I', '{counter_name}');
|
145
|
+
EXECUTE format('ALTER TABLE %I ALTER COLUMN %I SET DEFAULT nextval(''%I'')', '{table_name}', '{item_id_name}', '{counter_name}');
|
146
|
+
END IF;
|
147
|
+
EXECUTE format('ALTER SEQUENCE %I RESTART WITH %s', '{counter_name}', {max_id + 1});
|
148
|
+
END $$;
|
149
|
+
""")
|
150
|
+
self.commit()
|
151
|
+
|
87
152
|
def __3_execute(self):
|
88
153
|
pass
|
89
154
|
|
@@ -99,6 +164,43 @@ class Connection(psycopg.Connection, SqlBase):
|
|
99
164
|
# cur.close()
|
100
165
|
return data
|
101
166
|
|
167
|
+
def exec2dict_batch(self, sql, batch_size=1000, use_offset=None, **kwargs):
|
168
|
+
""" 分批返回数据的版本
|
169
|
+
|
170
|
+
:param use_offset: 是否使用offset分页,会根据sql中是否含有where自动判断,但有时候最好明确指定以防错误
|
171
|
+
如果外部sql每次操作,会改变数据库的情况,导致sql的where规则虽然没变,但是数据本身发生变化,则offset应该要关闭
|
172
|
+
每次取对应的满足条件的数据即可
|
173
|
+
这种情况,也需要本函数内部主动执行commit_all的
|
174
|
+
否则,只是一种遍历查询,没有where或者where获取的数据情况是不会变化的,则要使用offset
|
175
|
+
:return:
|
176
|
+
第1个值,是一个迭代器,看起来仍然能一条一条返回,实际后台是按照batch_size打包获取的
|
177
|
+
第2个值,是数据总数
|
178
|
+
"""
|
179
|
+
if not isinstance(sql, SqlBuilder):
|
180
|
+
raise ValueError('暂时只能搭配SQLBuilder使用')
|
181
|
+
|
182
|
+
if use_offset is None:
|
183
|
+
use_offset = not sql._where
|
184
|
+
|
185
|
+
num = self.exec2one(sql.build_count())
|
186
|
+
offset = 0
|
187
|
+
|
188
|
+
def yield_row():
|
189
|
+
nonlocal offset
|
190
|
+
while True:
|
191
|
+
sql2 = sql.copy()
|
192
|
+
if not use_offset: # 如果不使用offset,那么缓存的sql操作需要全部提交,确保数据都更新后,再提取数据
|
193
|
+
self.commit_all()
|
194
|
+
sql2.limit(batch_size, offset)
|
195
|
+
rows = self.exec2dict(sql2.build_select(), **kwargs).fetchall()
|
196
|
+
if use_offset:
|
197
|
+
offset += len(rows)
|
198
|
+
if not rows:
|
199
|
+
break
|
200
|
+
yield from rows
|
201
|
+
|
202
|
+
return yield_row(), num
|
203
|
+
|
102
204
|
exec_dict = exec2dict
|
103
205
|
|
104
206
|
def __4_数据类型(self):
|
@@ -107,7 +209,7 @@ class Connection(psycopg.Connection, SqlBase):
|
|
107
209
|
@classmethod
|
108
210
|
def cvt_type(cls, val):
|
109
211
|
if isinstance(val, (dict, list)):
|
110
|
-
val = json.dumps(val, ensure_ascii=False)
|
212
|
+
val = json.dumps(val, ensure_ascii=False, default=str)
|
111
213
|
# 注意list数组类型读、写都会自动适配py
|
112
214
|
return val
|
113
215
|
|
@@ -121,15 +223,17 @@ class Connection(psycopg.Connection, SqlBase):
|
|
121
223
|
return 'boolean'
|
122
224
|
elif isinstance(val, float):
|
123
225
|
return 'float4'
|
124
|
-
elif isinstance(val, dict):
|
226
|
+
elif isinstance(val, (dict, list)):
|
125
227
|
return 'jsonb'
|
228
|
+
elif isinstance(val, datetime.datetime):
|
229
|
+
return 'timestamp'
|
126
230
|
else: # 其他list等类型,可以用json.dumps或str转文本存储
|
127
231
|
return 'text'
|
128
232
|
|
129
233
|
def __5_增删改查(self):
|
130
234
|
pass
|
131
235
|
|
132
|
-
def insert_row(self, table_name, cols, *, on_conflict='DO NOTHING', commit=
|
236
|
+
def insert_row(self, table_name, cols, *, on_conflict='DO NOTHING', commit=False):
|
133
237
|
""" 【增】插入新数据
|
134
238
|
|
135
239
|
:param dict cols: 用字典表示的要插入的值
|
@@ -140,6 +244,10 @@ class Connection(psycopg.Connection, SqlBase):
|
|
140
244
|
也可以写复杂的处理算法规则,详见 http://postgres.cn/docs/12/sql-insert.html
|
141
245
|
比如这里是插入的id重复的话,就把host_name替换掉,还可以指定nick_name替换为'abc'
|
142
246
|
注意前面的(id)是必须要输入的
|
247
|
+
|
248
|
+
注意:有个常见需求,是想插入后返回对应的id,但是这样就需要知道这张表自增的id字段名
|
249
|
+
以及还是很难获得插入后的id值,可以默认刚插入的id是最大的,但是这样并不安全,有风险
|
250
|
+
建议还是外部自己先计算全表最大的id值,自己实现自增,就能知道插入的这条数据的id了
|
143
251
|
"""
|
144
252
|
ks = ','.join(cols.keys())
|
145
253
|
vs = ','.join(['%s'] * (len(cols.keys())))
|
@@ -153,11 +261,9 @@ class Connection(psycopg.Connection, SqlBase):
|
|
153
261
|
on_conflict = f'ON CONFLICT {on_conflict}'
|
154
262
|
query += f' {on_conflict}'
|
155
263
|
|
156
|
-
self.
|
157
|
-
if commit:
|
158
|
-
self.commit()
|
264
|
+
self.commit_base(commit, query, params)
|
159
265
|
|
160
|
-
def insert_rows(self, table_name, keys, ls, *, on_conflict='DO NOTHING', commit=
|
266
|
+
def insert_rows(self, table_name, keys, ls, *, on_conflict='DO NOTHING', commit=False):
|
161
267
|
""" 【增】插入新数据
|
162
268
|
|
163
269
|
:param str keys: 要插入的字段名,一个字符串,逗号,隔开属性值
|
@@ -179,9 +285,199 @@ class Connection(psycopg.Connection, SqlBase):
|
|
179
285
|
on_conflict = f'ON CONFLICT {on_conflict}'
|
180
286
|
query += f' {on_conflict}'
|
181
287
|
|
182
|
-
self.
|
183
|
-
|
184
|
-
|
288
|
+
self.commit_base(commit, query, params)
|
289
|
+
|
290
|
+
def __6_高级统计(self):
|
291
|
+
pass
|
292
|
+
|
293
|
+
def get_column_valuesstat(self, table_name, column, filter_condition=None,
|
294
|
+
percentile_count=5,
|
295
|
+
by_data=False, data_type=None):
|
296
|
+
""" 获得指定表格的某个字段的统计特征ValuesStat2对象
|
297
|
+
|
298
|
+
:param table_name: 表名
|
299
|
+
:param column: 用于计算统计数据的字段名
|
300
|
+
不一定是标准的字段名
|
301
|
+
:param percentile_count: 分位数的数量,例如 3 表示只计算中位数
|
302
|
+
:param by_data: 是否获得原始数据
|
303
|
+
默认只获得统计特征,不获得原始数据
|
304
|
+
"""
|
305
|
+
|
306
|
+
def init_from_db_data():
|
307
|
+
sql = SqlBuilder(table_name)
|
308
|
+
if filter_condition:
|
309
|
+
sql.where(filter_condition)
|
310
|
+
values = self.exec2col(sql.build_select(column))
|
311
|
+
if data_type == 'numeric':
|
312
|
+
values = [x and float(x) for x in values]
|
313
|
+
vs = ValuesStat2(raw_values=values, data_type=data_type)
|
314
|
+
|
315
|
+
if data_type == 'text' and is_valid_identifier(column):
|
316
|
+
vs0 = self.get_column_valuesstat(table_name, column, filter_condition=filter_condition,
|
317
|
+
percentile_count=percentile_count, by_data=False)
|
318
|
+
vs.n = vs0.n
|
319
|
+
vs.dist = vs0.dist
|
320
|
+
|
321
|
+
return vs
|
322
|
+
|
323
|
+
def init_from_db():
|
324
|
+
# 1 构建基础的 SQL 查询
|
325
|
+
sql = SqlBuilder(table_name)
|
326
|
+
sql.select("COUNT(*) AS total_count")
|
327
|
+
sql.select(f"COUNT({column}) AS non_null_count")
|
328
|
+
sql.select(f"MIN({column}) AS min_value")
|
329
|
+
sql.select(f"MAX({column}) AS max_value")
|
330
|
+
if data_type and 'timestamp' in data_type:
|
331
|
+
percentile_type = 'PERCENTILE_DISC'
|
332
|
+
# todo 其实时间类也可以"泛化"一种平均值、标准差算法的,这需要获取全量数据,然后自己计算
|
333
|
+
elif data_type == 'text':
|
334
|
+
percentile_type = 'PERCENTILE_DISC'
|
335
|
+
else: # 默认是正常的数值类型
|
336
|
+
sql.select(f"SUM({column}) AS total_sum")
|
337
|
+
sql.select(f"AVG({column}) AS average")
|
338
|
+
sql.select(f"STDDEV({column}) AS standard_deviation")
|
339
|
+
percentile_type = 'PERCENTILE_CONT'
|
340
|
+
|
341
|
+
percentiles = []
|
342
|
+
# 根据分位点的数量动态添加分位数计算
|
343
|
+
if percentile_count > 2:
|
344
|
+
step = 1 / (percentile_count - 1)
|
345
|
+
percentiles = [(i * step) for i in range(1, percentile_count - 1)]
|
346
|
+
for p in percentiles:
|
347
|
+
sql.select(f"{percentile_type}({p:.2f}) WITHIN GROUP (ORDER BY {column}) "
|
348
|
+
f"AS percentile_{int(p * 100)}")
|
349
|
+
|
350
|
+
if filter_condition:
|
351
|
+
sql.where(filter_condition)
|
352
|
+
|
353
|
+
row = self.exec2dict(sql.build_select()).fetchone()
|
354
|
+
|
355
|
+
# 2 统计展示
|
356
|
+
x = ValuesStat2(data_type=data_type)
|
357
|
+
x.raw_n = row['total_count']
|
358
|
+
x.n = row['non_null_count']
|
359
|
+
if not x.n:
|
360
|
+
return x
|
361
|
+
|
362
|
+
x.sum = row.get('total_sum', None)
|
363
|
+
x.mean = row.get('average', None)
|
364
|
+
x.std = row.get('standard_deviation', None)
|
365
|
+
|
366
|
+
# 如果计算了分位数,填充相应属性
|
367
|
+
x.dist = [row['min_value']] + [row[f"percentile_{int(p * 100)}"] for p in percentiles] + [row['max_value']]
|
368
|
+
if data_type == 'numeric':
|
369
|
+
x.dist = [float(x) for x in x.dist]
|
370
|
+
|
371
|
+
return x
|
372
|
+
|
373
|
+
data_type = data_type or self.get_column_data_type(table_name, column)
|
374
|
+
|
375
|
+
# 如果不是标准的列名,强制获取数据
|
376
|
+
if not is_valid_identifier(column):
|
377
|
+
by_data = True
|
378
|
+
|
379
|
+
if by_data:
|
380
|
+
vs = init_from_db_data()
|
381
|
+
else:
|
382
|
+
vs = init_from_db()
|
383
|
+
|
384
|
+
return vs
|
385
|
+
|
386
|
+
def export_jsonl(self, file_path, table_name, key_col=None, batch_size=1000, print_mode=0):
|
387
|
+
""" 将某个表导出为本地jsonl文件
|
388
|
+
|
389
|
+
:param str|SqlBuilder table_name: 表名
|
390
|
+
支持传入SqlBuilder对象,这样可以更灵活的控制导出的数据规则
|
391
|
+
:param file_path: 导出的文件路径
|
392
|
+
:param batch_size: 每次读取的行数和保存的行数
|
393
|
+
:param key_col: 作为主键的列名,如果有的话,会自动去重
|
394
|
+
强烈推荐要设置
|
395
|
+
实际不一定要用主键,只要是有顺序值的列就行
|
396
|
+
|
397
|
+
todo 暴力最简单的版本不难写,我纠结的是缓存机制,还有bytes类型数据会有点大等问题
|
398
|
+
还需要先支持一个通用的缓存写文件功能
|
399
|
+
"""
|
400
|
+
# 1 sql
|
401
|
+
if isinstance(table_name, str):
|
402
|
+
sql = SqlBuilder(table_name)
|
403
|
+
sql.select('*')
|
404
|
+
else:
|
405
|
+
sql = table_name
|
406
|
+
m = re.search(r'FROM (\w+)', sql.build_select())
|
407
|
+
table_name = m.group(1) if m else 'table'
|
408
|
+
assert isinstance(sql, SqlBuilder)
|
409
|
+
|
410
|
+
file_path = XlPath(file_path)
|
411
|
+
if key_col:
|
412
|
+
sql.order_by(key_col)
|
413
|
+
if file_path.is_file():
|
414
|
+
# 读取现有数据,找出主键最大值
|
415
|
+
data = file_path.read_jsonl(batch_size=1000)
|
416
|
+
if data:
|
417
|
+
max_val = max([x[key_col] for x in data]) if data else None
|
418
|
+
if max_val is not None:
|
419
|
+
sql.where(f'{key_col} > {max_val}')
|
420
|
+
|
421
|
+
# 2 获取数据
|
422
|
+
file = StreamJsonlWriter(file_path, batch_size=batch_size) # 流式存储
|
423
|
+
rows, total = self.exec2dict_batch(sql, batch_size=batch_size, use_offset=True)
|
424
|
+
for row in tqdm(rows, total=total, desc=f'从{table_name}表导出数据', disable=not print_mode):
|
425
|
+
file.append_line(row)
|
426
|
+
file.flush()
|
427
|
+
|
428
|
+
def check_db_tables_size(self, db_name=None):
|
429
|
+
""" 查看指定数据下所有表格的大小 """
|
430
|
+
from datetime import datetime
|
431
|
+
import pandas as pd
|
432
|
+
|
433
|
+
if db_name is None:
|
434
|
+
# 使用sql获取当前self所在数据库
|
435
|
+
db_name = self.exec2one("SELECT current_database()")
|
436
|
+
|
437
|
+
data = []
|
438
|
+
tables = self.exec2col("SELECT table_name FROM information_schema.tables WHERE table_schema='public'")
|
439
|
+
for table_name in tables:
|
440
|
+
row = {
|
441
|
+
'database': db_name,
|
442
|
+
'table_name': table_name,
|
443
|
+
}
|
444
|
+
sz = self.exec2one(f"SELECT pg_total_relation_size('public.{table_name}')")
|
445
|
+
if not sz:
|
446
|
+
continue
|
447
|
+
lines = self.exec2one(f"SELECT COUNT(*) FROM {table_name}")
|
448
|
+
row['size'], row['lines'] = sz, lines
|
449
|
+
row['readable_size'] = human_readable_number(sz, 'KB')
|
450
|
+
row['perline_size'] = human_readable_number(sz / lines, 'KB') if lines else -1
|
451
|
+
row['update_time'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
452
|
+
data.append(row)
|
453
|
+
|
454
|
+
df = pd.DataFrame.from_dict(data)
|
455
|
+
if len(df):
|
456
|
+
df.sort_values(['size'], ascending=False, inplace=True)
|
457
|
+
df.reset_index(drop=True, inplace=True)
|
458
|
+
return df
|
459
|
+
|
460
|
+
def check_multi_db_size(self, db_list):
|
461
|
+
""" 这个功能一般要用postgres账号,才有权限处理所有数据库 """
|
462
|
+
from datetime import datetime
|
463
|
+
import pandas as pd
|
464
|
+
|
465
|
+
data = []
|
466
|
+
for db in db_list:
|
467
|
+
row = {
|
468
|
+
'name': db,
|
469
|
+
}
|
470
|
+
sz = self.exec2one(f"SELECT pg_database_size('{db}')")
|
471
|
+
row['size'] = sz
|
472
|
+
row['readable_size'] = human_readable_number(sz, 'KB')
|
473
|
+
row['update_time'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
474
|
+
|
475
|
+
data.append(row)
|
476
|
+
|
477
|
+
df = pd.DataFrame.from_dict(data)
|
478
|
+
df.sort_values(['size'], ascending=False, inplace=True)
|
479
|
+
df.reset_index(drop=True, inplace=True)
|
480
|
+
return df
|
185
481
|
|
186
482
|
|
187
483
|
"""
|
@@ -377,7 +673,7 @@ class XlprDb(Connection):
|
|
377
673
|
self.insert_row('xlapi', {'input': input, 'output': output,
|
378
674
|
'elapse_ms': elapse_ms, 'update_time': utc_timestamp(8)},
|
379
675
|
on_conflict=on_conflict)
|
380
|
-
|
676
|
+
self.commit()
|
381
677
|
return self.execute('SELECT id FROM xlapi WHERE input=%s', (input,)).fetchone()[0]
|
382
678
|
|
383
679
|
def insert_row2xlserver(self, request, xlapi_id=0, **kwargs):
|
@@ -387,7 +683,7 @@ class XlprDb(Connection):
|
|
387
683
|
'xlapi_id': xlapi_id}
|
388
684
|
kw.update(kwargs)
|
389
685
|
print(kw) # 监控谁在用api
|
390
|
-
self.insert_row('xlserver', kw)
|
686
|
+
self.insert_row('xlserver', kw, commit=True)
|
391
687
|
|
392
688
|
def __3_host_trace相关可视化(self):
|
393
689
|
""" TODO dbview 改名 host_trace """
|
@@ -419,15 +715,16 @@ class XlprDb(Connection):
|
|
419
715
|
if gpu:
|
420
716
|
status['gpu_memory'] = ssh.check_gpu_usage(print_mode=True)
|
421
717
|
if disk:
|
422
|
-
#
|
423
|
-
status['disk_memory'] = ssh.check_disk_usage(print_mode=True, timeout=
|
718
|
+
# 检查磁盘空间会很慢,如果超时可以跳过。(设置超时6小时)
|
719
|
+
status['disk_memory'] = ssh.check_disk_usage(print_mode=True, timeout=60 * 60 * 6)
|
424
720
|
except Exception as e:
|
425
721
|
status = {'error': f'{str(type(e))[8:-2]}: {e}'}
|
426
722
|
print(status)
|
427
723
|
|
428
724
|
if status:
|
429
725
|
self.insert_row('host_trace',
|
430
|
-
{'host_name': host_name, 'status': status, 'update_time': utc_timestamp(8)}
|
726
|
+
{'host_name': host_name, 'status': status, 'update_time': utc_timestamp(8)},
|
727
|
+
commit=True)
|
431
728
|
print()
|
432
729
|
|
433
730
|
def _get_host_trace_total(self, mode, title, yaxis_name, date_trunc, recent, host_attr):
|
@@ -499,6 +796,101 @@ class XlprDb(Connection):
|
|
499
796
|
def pretty_val(v):
|
500
797
|
return round_int(v) if v > 100 else round(v, 2)
|
501
798
|
|
799
|
+
try:
|
800
|
+
chart = Line()
|
801
|
+
chart.set_title(title)
|
802
|
+
chart.options['xAxis'][0].update({'min': ls[0][0], 'type': 'time',
|
803
|
+
# 'minInterval': 3600 * 1000 * 24,
|
804
|
+
'name': '时间', 'nameGap': 50, 'nameLocation': 'middle'})
|
805
|
+
chart.options['yAxis'][0].update({'name': yaxis_name, 'nameGap': 50, 'nameLocation': 'middle'})
|
806
|
+
# 目前是比较暴力的方法调整排版,后续要研究是不是能更自动灵活些
|
807
|
+
chart.options['legend'][0].update({'top': '6%', 'icon': 'pin'})
|
808
|
+
chart.options['grid'] = [{'top': 55 + len(all_users_usaged) * 4, 'containLabel': True}]
|
809
|
+
chart.options['tooltip'].opts.update({'axisPointer': {'type': 'cross'}, 'trigger': 'item'})
|
810
|
+
|
811
|
+
chart.add_series(f'total{pretty_val(ls[0][1]):g}', to_list([x[1] for x in ls]), areaStyle={})
|
812
|
+
for user, usaged in all_users_usaged.most_common():
|
813
|
+
usaged = usaged / ((ls[-1][0] - ls[0][0]).total_seconds() / 3600 + 1e-9)
|
814
|
+
chart.add_series(f'{user}{pretty_val(usaged):g}',
|
815
|
+
to_list([x[2].get(user, 0) for x in ls]),
|
816
|
+
areaStyle={}, stack='Total', emphasis={'focus': 'series'})
|
817
|
+
|
818
|
+
return '<body>' + chart.render_embed() + '</body>', sum(all_users_usaged.values())
|
819
|
+
except Exception as e:
|
820
|
+
return str(e), 0
|
821
|
+
|
822
|
+
# cdx_edit
|
823
|
+
def _get_database_trace_total(self, title, yaxis_name, date_trunc, recent, link_name):
|
824
|
+
ls = self.execute(textwrap.dedent(f"""\
|
825
|
+
WITH cte1 AS(
|
826
|
+
SELECT link_name, jsonb_each(status::jsonb) AS db_data, date_trunc('{date_trunc}', update_time) ttime
|
827
|
+
FROM database_trace WHERE update_time > %s AND link_name = '{link_name}'
|
828
|
+
), cte2 AS(
|
829
|
+
SELECT ttime, link_name, (db_data).key AS table_name, ((db_data).value->> '_total')::bigint AS total
|
830
|
+
FROM cte1
|
831
|
+
)
|
832
|
+
SELECT ttime, jsonb_object_agg(table_name, total) AS aggregated_json,SUM(total) as total
|
833
|
+
FROM cte2
|
834
|
+
GROUP BY ttime
|
835
|
+
ORDER BY ttime"""), ((utc_now(8) - recent).isoformat(timespec='seconds'),)).fetchall()
|
836
|
+
return self.database_create_stack_chart(title, ls, yaxis_name=yaxis_name)
|
837
|
+
|
838
|
+
def _get_database_trace_per_host(self, db, title, yaxis_name, date_trunc, recent, link_name):
|
839
|
+
ls = self.execute(textwrap.dedent(f"""\
|
840
|
+
WITH cte1 AS (
|
841
|
+
SELECT link_name, jsonb_each(status::jsonb) AS db_data, date_trunc('{date_trunc}', update_time) ttime
|
842
|
+
FROM database_trace WHERE update_time > %s AND link_name = '{link_name}'
|
843
|
+
), cte2 AS (
|
844
|
+
SELECT ttime, link_name, (db_data).key AS table_name, (db_data).value AS size_text
|
845
|
+
FROM cte1
|
846
|
+
), cte3 AS (
|
847
|
+
SELECT ttime, table_name, each.key AS key, each.value AS value
|
848
|
+
FROM cte2, jsonb_each_text(size_text) AS each(key, value)
|
849
|
+
)
|
850
|
+
SELECT ttime, jsonb_object_agg(key,
|
851
|
+
CASE
|
852
|
+
WHEN key = '_total' THEN NULL
|
853
|
+
ELSE (value::jsonb ->> 'size')::bigint -- Handle other keys as usual
|
854
|
+
END
|
855
|
+
) FILTER (WHERE key != '_total') AS aggregated_result, -- 确保 _total 不在 aggregated_result 中
|
856
|
+
MAX(CASE WHEN key = '_total' THEN value::bigint ELSE NULL END) AS total -- 单独提取 _total 的值
|
857
|
+
FROM cte3
|
858
|
+
WHERE (key = '_total' OR value::jsonb ? 'size') -- Ensure that '_total' is included
|
859
|
+
AND table_name = '{db}'
|
860
|
+
GROUP BY ttime
|
861
|
+
ORDER BY ttime"""), ((utc_now(8) - recent).isoformat(timespec='seconds'),)).fetchall()
|
862
|
+
return self.database_create_stack_chart(title, ls, yaxis_name=yaxis_name)
|
863
|
+
|
864
|
+
def database_create_stack_chart(self, title, ls, *, yaxis_name=''):
|
865
|
+
""" 创建展示表
|
866
|
+
|
867
|
+
:param title: 表格标题
|
868
|
+
:param list ls: n*3,第1列是时间,第3列是总值,第2列是每个用户具体的数据
|
869
|
+
"""
|
870
|
+
from pyecharts.charts import Line
|
871
|
+
all_database_usaged = Counter()
|
872
|
+
last_time = None
|
873
|
+
for x in ls:
|
874
|
+
hours = 0 if last_time is None else ((x[0] - last_time).total_seconds() / 3600)
|
875
|
+
last_time = x[0]
|
876
|
+
for k, v in x[1].items():
|
877
|
+
if k == '_total':
|
878
|
+
continue
|
879
|
+
all_database_usaged[k] += v * hours
|
880
|
+
|
881
|
+
for i, x in enumerate(ls):
|
882
|
+
ct = Counter()
|
883
|
+
for k, v in x[1].items():
|
884
|
+
ct[k] += v
|
885
|
+
ls[i] = (x[0], ct, int(x[2]))
|
886
|
+
|
887
|
+
# 2 转图表可视化
|
888
|
+
def to_list(values):
|
889
|
+
return [(x[0], v) for x, v in zip(ls, values)]
|
890
|
+
|
891
|
+
def pretty_val(v):
|
892
|
+
return round_int(v) if v > 100 else round(v, 2)
|
893
|
+
|
502
894
|
chart = Line()
|
503
895
|
chart.set_title(title)
|
504
896
|
chart.options['xAxis'][0].update({'min': ls[0][0], 'type': 'time',
|
@@ -507,24 +899,63 @@ class XlprDb(Connection):
|
|
507
899
|
chart.options['yAxis'][0].update({'name': yaxis_name, 'nameGap': 50, 'nameLocation': 'middle'})
|
508
900
|
# 目前是比较暴力的方法调整排版,后续要研究是不是能更自动灵活些
|
509
901
|
chart.options['legend'][0].update({'top': '6%', 'icon': 'pin'})
|
510
|
-
chart.options['grid'] = [{'top': 55 + len(
|
902
|
+
chart.options['grid'] = [{'top': 55 + len(all_database_usaged) * 4, 'containLabel': True}]
|
511
903
|
chart.options['tooltip'].opts.update({'axisPointer': {'type': 'cross'}, 'trigger': 'item'})
|
512
904
|
|
513
|
-
chart.add_series(f'total{pretty_val(ls[0][
|
514
|
-
for
|
905
|
+
# chart.add_series(f'total {pretty_val(ls[0][2] / 1024 / 1024 / 1024):g}',
|
906
|
+
# to_list([x[2] / 1024 / 1024 / 1024 for x in ls]), areaStyle={})
|
907
|
+
for database, usaged in all_database_usaged.most_common():
|
515
908
|
usaged = usaged / ((ls[-1][0] - ls[0][0]).total_seconds() / 3600 + 1e-9)
|
516
|
-
chart.add_series(f'{
|
517
|
-
to_list([x[
|
909
|
+
chart.add_series(f'{database} {pretty_val(usaged / 1024 / 1024 / 1024):g}',
|
910
|
+
to_list([x[1].get(database, 0) / 1024 / 1024 / 1024 for x in ls]),
|
518
911
|
areaStyle={}, stack='Total', emphasis={'focus': 'series'})
|
912
|
+
return '<body>' + chart.render_embed() + '</body>'
|
913
|
+
|
914
|
+
def dbview_xldb1_memory(self, recent=datetime.timedelta(days=180), date_trunc='day'):
|
915
|
+
from pyxllib.data.echarts import render_echart_html
|
519
916
|
|
520
|
-
|
917
|
+
db_list = ['stdata', 'xlpr', 'st', 'ckz']
|
918
|
+
args = ['数据库大小(GB)', date_trunc, recent, 'xldb1']
|
919
|
+
htmltexts = []
|
920
|
+
|
921
|
+
res = self._get_database_trace_total('xldb1数据库使用近况', *args)
|
922
|
+
htmltexts.append(res)
|
923
|
+
|
924
|
+
data_stats = []
|
925
|
+
for idx, db in enumerate(db_list, start=1):
|
926
|
+
data_stats.append(self._get_database_trace_per_host(db, f'{db}', *args))
|
927
|
+
htmltexts += data_stats
|
928
|
+
|
929
|
+
self.commit()
|
930
|
+
h = render_echart_html('database_cdx', body='<br/>'.join(htmltexts))
|
931
|
+
return h
|
932
|
+
|
933
|
+
def dbview_xldb2_memory(self, recent=datetime.timedelta(days=180), date_trunc='day'):
|
934
|
+
from pyxllib.data.echarts import render_echart_html
|
935
|
+
|
936
|
+
db_list = ['ragdata', 'kq5034']
|
937
|
+
args = ['数据库大小(GB)', date_trunc, recent, 'xldb2']
|
938
|
+
htmltexts = []
|
939
|
+
|
940
|
+
res = self._get_database_trace_total('xldb2数据库使用近况', *args)
|
941
|
+
htmltexts.append(res)
|
942
|
+
|
943
|
+
data_stats = []
|
944
|
+
for idx, db in enumerate(db_list, start=1):
|
945
|
+
data_stats.append(self._get_database_trace_per_host(db, f'{db}', *args))
|
946
|
+
htmltexts += data_stats
|
947
|
+
|
948
|
+
self.commit()
|
949
|
+
h = render_echart_html('database_cdx', body='<br/>'.join(htmltexts))
|
950
|
+
return h
|
521
951
|
|
522
952
|
def dbview_cpu(self, recent=datetime.timedelta(days=1), date_trunc='hour'):
|
523
953
|
from pyxllib.data.echarts import render_echart_html
|
524
954
|
|
525
955
|
args = ['CPU核心数(比如4核显示是400%)', date_trunc, recent, 'sum(hosts.cpu_number)*100']
|
526
956
|
|
527
|
-
htmltexts = [
|
957
|
+
htmltexts = [
|
958
|
+
'<a target="_blank" href="https://www.yuque.com/xlpr/data/hnpb2g?singleDoc#"> 《服务器监控》工具使用文档 </a>']
|
528
959
|
res = self._get_host_trace_total('cpu', 'XLPR服务器 CPU 使用近况', *args)
|
529
960
|
htmltexts.append(res[0])
|
530
961
|
|
@@ -546,7 +977,8 @@ class XlprDb(Connection):
|
|
546
977
|
|
547
978
|
args = ['内存(单位:GB)', date_trunc, recent, 'sum(hosts.cpu_gb)']
|
548
979
|
|
549
|
-
htmltexts = [
|
980
|
+
htmltexts = [
|
981
|
+
'<a target="_blank" href="https://www.yuque.com/xlpr/data/hnpb2g?singleDoc#"> 《服务器监控》工具使用文档 </a>']
|
550
982
|
res = self._get_host_trace_total('cpu_memory', 'XLPR服务器 内存 使用近况', *args)
|
551
983
|
htmltexts.append(res[0])
|
552
984
|
|
@@ -563,14 +995,15 @@ class XlprDb(Connection):
|
|
563
995
|
h = render_echart_html('cpu_memory', body='<br/>'.join(htmltexts))
|
564
996
|
return h
|
565
997
|
|
566
|
-
def dbview_disk_memory(self, recent=datetime.timedelta(days=
|
998
|
+
def dbview_disk_memory(self, recent=datetime.timedelta(days=360), date_trunc='day'):
|
567
999
|
""" 查看disk硬盘使用近况
|
568
1000
|
"""
|
569
1001
|
from pyxllib.data.echarts import render_echart_html
|
570
1002
|
|
571
1003
|
args = ['硬盘(单位:GB)', date_trunc, recent, 'sum(hosts.disk_gb)']
|
572
1004
|
|
573
|
-
htmltexts = [
|
1005
|
+
htmltexts = [
|
1006
|
+
'<a target="_blank" href="https://www.yuque.com/xlpr/data/hnpb2g?singleDoc#"> 《服务器监控》工具使用文档 </a>']
|
574
1007
|
res = self._get_host_trace_total('disk_memory', 'XLPR服务器 DISK硬盘 使用近况', *args)
|
575
1008
|
htmltexts.append(res[0])
|
576
1009
|
htmltexts.append('注:xlpr4(四卡)服务器使用du计算/home大小有问题,未统计在列<br/>')
|
@@ -597,7 +1030,8 @@ class XlprDb(Connection):
|
|
597
1030
|
|
598
1031
|
args = ['显存(单位:GB)', date_trunc, recent, 'sum(hosts.gpu_gb)']
|
599
1032
|
|
600
|
-
htmltexts = [
|
1033
|
+
htmltexts = [
|
1034
|
+
'<a target="_blank" href="https://www.yuque.com/xlpr/data/hnpb2g?singleDoc#"> 《服务器监控》工具使用文档 </a>']
|
601
1035
|
res = self._get_host_trace_total('gpu_memory', 'XLPR八台服务器 GPU显存 使用近况', *args)
|
602
1036
|
htmltexts.append(res[0])
|
603
1037
|
|
@@ -641,3 +1075,53 @@ class XlprDb(Connection):
|
|
641
1075
|
self.update_row('files', {'dhash': computed_dhash}, {'id': file_id})
|
642
1076
|
progress_bar.update(1)
|
643
1077
|
self.commit()
|
1078
|
+
|
1079
|
+
def append_history(self, table_name, where, backup_keys, *,
|
1080
|
+
can_merge=None,
|
1081
|
+
update_time=None,
|
1082
|
+
commit=False):
|
1083
|
+
""" 为表格添加历史记录,请确保这个表有一个jsonb格式的historys字段
|
1084
|
+
|
1085
|
+
这里每次都会对关键字段进行全量备份,没有进行高级的优化。
|
1086
|
+
所以只适用于一些历史记录功能场景。更复杂的还是需要另外自己定制。
|
1087
|
+
|
1088
|
+
:param table_name: 表名
|
1089
|
+
:param where: 要记录的id的规则,请确保筛选后记录是唯一的
|
1090
|
+
:param backup_keys: 需要备份的字段名
|
1091
|
+
:param can_merge: 在某些情况下,history不需要非常冗余地记录,可以给定与上一条合并的规则
|
1092
|
+
def can_merge(last, now):
|
1093
|
+
"last是上一条字典记录,now是当前要记录的字典数据,
|
1094
|
+
返回True,则用now替换last,并不新增记录"
|
1095
|
+
...
|
1096
|
+
|
1097
|
+
:param update_time: 更新时间,如果不指定则使用当前时间
|
1098
|
+
"""
|
1099
|
+
# 1 获得历史记录
|
1100
|
+
ops = ' AND '.join([f'{k}=%s' for k in where.keys()])
|
1101
|
+
historys = self.exec2one(f'SELECT historys FROM {table_name} WHERE {ops}', list(where.values())) or []
|
1102
|
+
if historys:
|
1103
|
+
status1 = historys[-1]
|
1104
|
+
else:
|
1105
|
+
status1 = {}
|
1106
|
+
|
1107
|
+
# 2 获得新记录
|
1108
|
+
if update_time is None:
|
1109
|
+
update_time = utc_timestamp()
|
1110
|
+
status2 = self.exec2dict(f'SELECT {",".join(backup_keys)} FROM {table_name} WHERE {ops}',
|
1111
|
+
list(where.values())).fetchone()
|
1112
|
+
status2['update_time'] = update_time
|
1113
|
+
|
1114
|
+
# 3 添加历史记录
|
1115
|
+
if can_merge is None:
|
1116
|
+
def can_merge(status1, status2):
|
1117
|
+
for k in backup_keys:
|
1118
|
+
if status1.get(k) != status2.get(k):
|
1119
|
+
return False
|
1120
|
+
return True
|
1121
|
+
|
1122
|
+
if historys and can_merge(status1, status2):
|
1123
|
+
historys[-1] = status2
|
1124
|
+
else:
|
1125
|
+
historys.append(status2)
|
1126
|
+
|
1127
|
+
self.update_row(table_name, {'historys': historys}, where, commit=commit)
|