pyxllib 0.3.96__py3-none-any.whl → 0.3.197__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyxllib/algo/geo.py +12 -0
- pyxllib/algo/intervals.py +1 -1
- pyxllib/algo/matcher.py +78 -0
- pyxllib/algo/pupil.py +187 -19
- pyxllib/algo/specialist.py +2 -1
- pyxllib/algo/stat.py +38 -2
- {pyxlpr → pyxllib/autogui}/__init__.py +1 -1
- pyxllib/autogui/activewin.py +246 -0
- pyxllib/autogui/all.py +9 -0
- pyxllib/{ext/autogui → autogui}/autogui.py +40 -11
- pyxllib/autogui/uiautolib.py +362 -0
- pyxllib/autogui/wechat.py +827 -0
- pyxllib/autogui/wechat_msg.py +421 -0
- pyxllib/autogui/wxautolib.py +84 -0
- pyxllib/cv/slidercaptcha.py +137 -0
- pyxllib/data/echarts.py +123 -12
- pyxllib/data/jsonlib.py +89 -0
- pyxllib/data/pglib.py +514 -30
- pyxllib/data/sqlite.py +231 -4
- pyxllib/ext/JLineViewer.py +14 -1
- pyxllib/ext/drissionlib.py +277 -0
- pyxllib/ext/kq5034lib.py +0 -1594
- pyxllib/ext/robustprocfile.py +497 -0
- pyxllib/ext/unixlib.py +6 -5
- pyxllib/ext/utools.py +108 -95
- pyxllib/ext/webhook.py +32 -14
- pyxllib/ext/wjxlib.py +88 -0
- pyxllib/ext/wpsapi.py +124 -0
- pyxllib/ext/xlwork.py +9 -0
- pyxllib/ext/yuquelib.py +1003 -71
- pyxllib/file/docxlib.py +1 -1
- pyxllib/file/libreoffice.py +165 -0
- pyxllib/file/movielib.py +9 -0
- pyxllib/file/packlib/__init__.py +112 -75
- pyxllib/file/pdflib.py +1 -1
- pyxllib/file/pupil.py +1 -1
- pyxllib/file/specialist/dirlib.py +1 -1
- pyxllib/file/specialist/download.py +10 -3
- pyxllib/file/specialist/filelib.py +266 -55
- pyxllib/file/xlsxlib.py +205 -50
- pyxllib/file/xlsyncfile.py +341 -0
- pyxllib/prog/cachetools.py +64 -0
- pyxllib/prog/filelock.py +42 -0
- pyxllib/prog/multiprogs.py +940 -0
- pyxllib/prog/newbie.py +9 -2
- pyxllib/prog/pupil.py +129 -60
- pyxllib/prog/specialist/__init__.py +176 -2
- pyxllib/prog/specialist/bc.py +5 -2
- pyxllib/prog/specialist/browser.py +11 -2
- pyxllib/prog/specialist/datetime.py +68 -0
- pyxllib/prog/specialist/tictoc.py +12 -13
- pyxllib/prog/specialist/xllog.py +5 -5
- pyxllib/prog/xlosenv.py +7 -0
- pyxllib/text/airscript.js +744 -0
- pyxllib/text/charclasslib.py +17 -5
- pyxllib/text/jiebalib.py +6 -3
- pyxllib/text/jinjalib.py +32 -0
- pyxllib/text/jsa_ai_prompt.md +271 -0
- pyxllib/text/jscode.py +159 -4
- pyxllib/text/nestenv.py +1 -1
- pyxllib/text/newbie.py +12 -0
- pyxllib/text/pupil/common.py +26 -0
- pyxllib/text/specialist/ptag.py +2 -2
- pyxllib/text/templates/echart_base.html +11 -0
- pyxllib/text/templates/highlight_code.html +17 -0
- pyxllib/text/templates/latex_editor.html +103 -0
- pyxllib/text/xmllib.py +76 -14
- pyxllib/xl.py +2 -1
- pyxllib-0.3.197.dist-info/METADATA +48 -0
- pyxllib-0.3.197.dist-info/RECORD +126 -0
- {pyxllib-0.3.96.dist-info → pyxllib-0.3.197.dist-info}/WHEEL +1 -2
- pyxllib/ext/autogui/__init__.py +0 -8
- pyxllib-0.3.96.dist-info/METADATA +0 -51
- pyxllib-0.3.96.dist-info/RECORD +0 -333
- pyxllib-0.3.96.dist-info/top_level.txt +0 -2
- pyxlpr/ai/__init__.py +0 -5
- pyxlpr/ai/clientlib.py +0 -1281
- pyxlpr/ai/specialist.py +0 -286
- pyxlpr/ai/torch_app.py +0 -172
- pyxlpr/ai/xlpaddle.py +0 -655
- pyxlpr/ai/xltorch.py +0 -705
- pyxlpr/data/__init__.py +0 -11
- pyxlpr/data/coco.py +0 -1325
- pyxlpr/data/datacls.py +0 -365
- pyxlpr/data/datasets.py +0 -200
- pyxlpr/data/gptlib.py +0 -1291
- pyxlpr/data/icdar/__init__.py +0 -96
- pyxlpr/data/icdar/deteval.py +0 -377
- pyxlpr/data/icdar/icdar2013.py +0 -341
- pyxlpr/data/icdar/iou.py +0 -340
- pyxlpr/data/icdar/rrc_evaluation_funcs_1_1.py +0 -463
- pyxlpr/data/imtextline.py +0 -473
- pyxlpr/data/labelme.py +0 -866
- pyxlpr/data/removeline.py +0 -179
- pyxlpr/data/specialist.py +0 -57
- pyxlpr/eval/__init__.py +0 -85
- pyxlpr/paddleocr.py +0 -776
- pyxlpr/ppocr/__init__.py +0 -15
- pyxlpr/ppocr/configs/rec/multi_language/generate_multi_language_configs.py +0 -226
- pyxlpr/ppocr/data/__init__.py +0 -135
- pyxlpr/ppocr/data/imaug/ColorJitter.py +0 -26
- pyxlpr/ppocr/data/imaug/__init__.py +0 -67
- pyxlpr/ppocr/data/imaug/copy_paste.py +0 -170
- pyxlpr/ppocr/data/imaug/east_process.py +0 -437
- pyxlpr/ppocr/data/imaug/gen_table_mask.py +0 -244
- pyxlpr/ppocr/data/imaug/iaa_augment.py +0 -114
- pyxlpr/ppocr/data/imaug/label_ops.py +0 -789
- pyxlpr/ppocr/data/imaug/make_border_map.py +0 -184
- pyxlpr/ppocr/data/imaug/make_pse_gt.py +0 -106
- pyxlpr/ppocr/data/imaug/make_shrink_map.py +0 -126
- pyxlpr/ppocr/data/imaug/operators.py +0 -433
- pyxlpr/ppocr/data/imaug/pg_process.py +0 -906
- pyxlpr/ppocr/data/imaug/randaugment.py +0 -143
- pyxlpr/ppocr/data/imaug/random_crop_data.py +0 -239
- pyxlpr/ppocr/data/imaug/rec_img_aug.py +0 -533
- pyxlpr/ppocr/data/imaug/sast_process.py +0 -777
- pyxlpr/ppocr/data/imaug/text_image_aug/__init__.py +0 -17
- pyxlpr/ppocr/data/imaug/text_image_aug/augment.py +0 -120
- pyxlpr/ppocr/data/imaug/text_image_aug/warp_mls.py +0 -168
- pyxlpr/ppocr/data/lmdb_dataset.py +0 -115
- pyxlpr/ppocr/data/pgnet_dataset.py +0 -104
- pyxlpr/ppocr/data/pubtab_dataset.py +0 -107
- pyxlpr/ppocr/data/simple_dataset.py +0 -372
- pyxlpr/ppocr/losses/__init__.py +0 -61
- pyxlpr/ppocr/losses/ace_loss.py +0 -52
- pyxlpr/ppocr/losses/basic_loss.py +0 -135
- pyxlpr/ppocr/losses/center_loss.py +0 -88
- pyxlpr/ppocr/losses/cls_loss.py +0 -30
- pyxlpr/ppocr/losses/combined_loss.py +0 -67
- pyxlpr/ppocr/losses/det_basic_loss.py +0 -208
- pyxlpr/ppocr/losses/det_db_loss.py +0 -80
- pyxlpr/ppocr/losses/det_east_loss.py +0 -63
- pyxlpr/ppocr/losses/det_pse_loss.py +0 -149
- pyxlpr/ppocr/losses/det_sast_loss.py +0 -121
- pyxlpr/ppocr/losses/distillation_loss.py +0 -272
- pyxlpr/ppocr/losses/e2e_pg_loss.py +0 -140
- pyxlpr/ppocr/losses/kie_sdmgr_loss.py +0 -113
- pyxlpr/ppocr/losses/rec_aster_loss.py +0 -99
- pyxlpr/ppocr/losses/rec_att_loss.py +0 -39
- pyxlpr/ppocr/losses/rec_ctc_loss.py +0 -44
- pyxlpr/ppocr/losses/rec_enhanced_ctc_loss.py +0 -70
- pyxlpr/ppocr/losses/rec_nrtr_loss.py +0 -30
- pyxlpr/ppocr/losses/rec_sar_loss.py +0 -28
- pyxlpr/ppocr/losses/rec_srn_loss.py +0 -47
- pyxlpr/ppocr/losses/table_att_loss.py +0 -109
- pyxlpr/ppocr/metrics/__init__.py +0 -44
- pyxlpr/ppocr/metrics/cls_metric.py +0 -45
- pyxlpr/ppocr/metrics/det_metric.py +0 -82
- pyxlpr/ppocr/metrics/distillation_metric.py +0 -73
- pyxlpr/ppocr/metrics/e2e_metric.py +0 -86
- pyxlpr/ppocr/metrics/eval_det_iou.py +0 -274
- pyxlpr/ppocr/metrics/kie_metric.py +0 -70
- pyxlpr/ppocr/metrics/rec_metric.py +0 -75
- pyxlpr/ppocr/metrics/table_metric.py +0 -50
- pyxlpr/ppocr/modeling/architectures/__init__.py +0 -32
- pyxlpr/ppocr/modeling/architectures/base_model.py +0 -88
- pyxlpr/ppocr/modeling/architectures/distillation_model.py +0 -60
- pyxlpr/ppocr/modeling/backbones/__init__.py +0 -54
- pyxlpr/ppocr/modeling/backbones/det_mobilenet_v3.py +0 -268
- pyxlpr/ppocr/modeling/backbones/det_resnet_vd.py +0 -246
- pyxlpr/ppocr/modeling/backbones/det_resnet_vd_sast.py +0 -285
- pyxlpr/ppocr/modeling/backbones/e2e_resnet_vd_pg.py +0 -265
- pyxlpr/ppocr/modeling/backbones/kie_unet_sdmgr.py +0 -186
- pyxlpr/ppocr/modeling/backbones/rec_mobilenet_v3.py +0 -138
- pyxlpr/ppocr/modeling/backbones/rec_mv1_enhance.py +0 -258
- pyxlpr/ppocr/modeling/backbones/rec_nrtr_mtb.py +0 -48
- pyxlpr/ppocr/modeling/backbones/rec_resnet_31.py +0 -210
- pyxlpr/ppocr/modeling/backbones/rec_resnet_aster.py +0 -143
- pyxlpr/ppocr/modeling/backbones/rec_resnet_fpn.py +0 -307
- pyxlpr/ppocr/modeling/backbones/rec_resnet_vd.py +0 -286
- pyxlpr/ppocr/modeling/heads/__init__.py +0 -54
- pyxlpr/ppocr/modeling/heads/cls_head.py +0 -52
- pyxlpr/ppocr/modeling/heads/det_db_head.py +0 -118
- pyxlpr/ppocr/modeling/heads/det_east_head.py +0 -121
- pyxlpr/ppocr/modeling/heads/det_pse_head.py +0 -37
- pyxlpr/ppocr/modeling/heads/det_sast_head.py +0 -128
- pyxlpr/ppocr/modeling/heads/e2e_pg_head.py +0 -253
- pyxlpr/ppocr/modeling/heads/kie_sdmgr_head.py +0 -206
- pyxlpr/ppocr/modeling/heads/multiheadAttention.py +0 -163
- pyxlpr/ppocr/modeling/heads/rec_aster_head.py +0 -393
- pyxlpr/ppocr/modeling/heads/rec_att_head.py +0 -202
- pyxlpr/ppocr/modeling/heads/rec_ctc_head.py +0 -88
- pyxlpr/ppocr/modeling/heads/rec_nrtr_head.py +0 -826
- pyxlpr/ppocr/modeling/heads/rec_sar_head.py +0 -402
- pyxlpr/ppocr/modeling/heads/rec_srn_head.py +0 -280
- pyxlpr/ppocr/modeling/heads/self_attention.py +0 -406
- pyxlpr/ppocr/modeling/heads/table_att_head.py +0 -246
- pyxlpr/ppocr/modeling/necks/__init__.py +0 -32
- pyxlpr/ppocr/modeling/necks/db_fpn.py +0 -111
- pyxlpr/ppocr/modeling/necks/east_fpn.py +0 -188
- pyxlpr/ppocr/modeling/necks/fpn.py +0 -138
- pyxlpr/ppocr/modeling/necks/pg_fpn.py +0 -314
- pyxlpr/ppocr/modeling/necks/rnn.py +0 -92
- pyxlpr/ppocr/modeling/necks/sast_fpn.py +0 -284
- pyxlpr/ppocr/modeling/necks/table_fpn.py +0 -110
- pyxlpr/ppocr/modeling/transforms/__init__.py +0 -28
- pyxlpr/ppocr/modeling/transforms/stn.py +0 -135
- pyxlpr/ppocr/modeling/transforms/tps.py +0 -308
- pyxlpr/ppocr/modeling/transforms/tps_spatial_transformer.py +0 -156
- pyxlpr/ppocr/optimizer/__init__.py +0 -61
- pyxlpr/ppocr/optimizer/learning_rate.py +0 -228
- pyxlpr/ppocr/optimizer/lr_scheduler.py +0 -49
- pyxlpr/ppocr/optimizer/optimizer.py +0 -160
- pyxlpr/ppocr/optimizer/regularizer.py +0 -52
- pyxlpr/ppocr/postprocess/__init__.py +0 -55
- pyxlpr/ppocr/postprocess/cls_postprocess.py +0 -33
- pyxlpr/ppocr/postprocess/db_postprocess.py +0 -234
- pyxlpr/ppocr/postprocess/east_postprocess.py +0 -143
- pyxlpr/ppocr/postprocess/locality_aware_nms.py +0 -200
- pyxlpr/ppocr/postprocess/pg_postprocess.py +0 -52
- pyxlpr/ppocr/postprocess/pse_postprocess/__init__.py +0 -15
- pyxlpr/ppocr/postprocess/pse_postprocess/pse/__init__.py +0 -29
- pyxlpr/ppocr/postprocess/pse_postprocess/pse/setup.py +0 -14
- pyxlpr/ppocr/postprocess/pse_postprocess/pse_postprocess.py +0 -118
- pyxlpr/ppocr/postprocess/rec_postprocess.py +0 -654
- pyxlpr/ppocr/postprocess/sast_postprocess.py +0 -355
- pyxlpr/ppocr/tools/__init__.py +0 -14
- pyxlpr/ppocr/tools/eval.py +0 -83
- pyxlpr/ppocr/tools/export_center.py +0 -77
- pyxlpr/ppocr/tools/export_model.py +0 -129
- pyxlpr/ppocr/tools/infer/predict_cls.py +0 -151
- pyxlpr/ppocr/tools/infer/predict_det.py +0 -300
- pyxlpr/ppocr/tools/infer/predict_e2e.py +0 -169
- pyxlpr/ppocr/tools/infer/predict_rec.py +0 -414
- pyxlpr/ppocr/tools/infer/predict_system.py +0 -204
- pyxlpr/ppocr/tools/infer/utility.py +0 -629
- pyxlpr/ppocr/tools/infer_cls.py +0 -83
- pyxlpr/ppocr/tools/infer_det.py +0 -134
- pyxlpr/ppocr/tools/infer_e2e.py +0 -122
- pyxlpr/ppocr/tools/infer_kie.py +0 -153
- pyxlpr/ppocr/tools/infer_rec.py +0 -146
- pyxlpr/ppocr/tools/infer_table.py +0 -107
- pyxlpr/ppocr/tools/program.py +0 -596
- pyxlpr/ppocr/tools/test_hubserving.py +0 -117
- pyxlpr/ppocr/tools/train.py +0 -163
- pyxlpr/ppocr/tools/xlprog.py +0 -748
- pyxlpr/ppocr/utils/EN_symbol_dict.txt +0 -94
- pyxlpr/ppocr/utils/__init__.py +0 -24
- pyxlpr/ppocr/utils/dict/ar_dict.txt +0 -117
- pyxlpr/ppocr/utils/dict/arabic_dict.txt +0 -162
- pyxlpr/ppocr/utils/dict/be_dict.txt +0 -145
- pyxlpr/ppocr/utils/dict/bg_dict.txt +0 -140
- pyxlpr/ppocr/utils/dict/chinese_cht_dict.txt +0 -8421
- pyxlpr/ppocr/utils/dict/cyrillic_dict.txt +0 -163
- pyxlpr/ppocr/utils/dict/devanagari_dict.txt +0 -167
- pyxlpr/ppocr/utils/dict/en_dict.txt +0 -63
- pyxlpr/ppocr/utils/dict/fa_dict.txt +0 -136
- pyxlpr/ppocr/utils/dict/french_dict.txt +0 -136
- pyxlpr/ppocr/utils/dict/german_dict.txt +0 -143
- pyxlpr/ppocr/utils/dict/hi_dict.txt +0 -162
- pyxlpr/ppocr/utils/dict/it_dict.txt +0 -118
- pyxlpr/ppocr/utils/dict/japan_dict.txt +0 -4399
- pyxlpr/ppocr/utils/dict/ka_dict.txt +0 -153
- pyxlpr/ppocr/utils/dict/korean_dict.txt +0 -3688
- pyxlpr/ppocr/utils/dict/latin_dict.txt +0 -185
- pyxlpr/ppocr/utils/dict/mr_dict.txt +0 -153
- pyxlpr/ppocr/utils/dict/ne_dict.txt +0 -153
- pyxlpr/ppocr/utils/dict/oc_dict.txt +0 -96
- pyxlpr/ppocr/utils/dict/pu_dict.txt +0 -130
- pyxlpr/ppocr/utils/dict/rs_dict.txt +0 -91
- pyxlpr/ppocr/utils/dict/rsc_dict.txt +0 -134
- pyxlpr/ppocr/utils/dict/ru_dict.txt +0 -125
- pyxlpr/ppocr/utils/dict/ta_dict.txt +0 -128
- pyxlpr/ppocr/utils/dict/table_dict.txt +0 -277
- pyxlpr/ppocr/utils/dict/table_structure_dict.txt +0 -2759
- pyxlpr/ppocr/utils/dict/te_dict.txt +0 -151
- pyxlpr/ppocr/utils/dict/ug_dict.txt +0 -114
- pyxlpr/ppocr/utils/dict/uk_dict.txt +0 -142
- pyxlpr/ppocr/utils/dict/ur_dict.txt +0 -137
- pyxlpr/ppocr/utils/dict/xi_dict.txt +0 -110
- pyxlpr/ppocr/utils/dict90.txt +0 -90
- pyxlpr/ppocr/utils/e2e_metric/Deteval.py +0 -574
- pyxlpr/ppocr/utils/e2e_metric/polygon_fast.py +0 -83
- pyxlpr/ppocr/utils/e2e_utils/extract_batchsize.py +0 -87
- pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_fast.py +0 -457
- pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_slow.py +0 -592
- pyxlpr/ppocr/utils/e2e_utils/pgnet_pp_utils.py +0 -162
- pyxlpr/ppocr/utils/e2e_utils/visual.py +0 -162
- pyxlpr/ppocr/utils/en_dict.txt +0 -95
- pyxlpr/ppocr/utils/gen_label.py +0 -81
- pyxlpr/ppocr/utils/ic15_dict.txt +0 -36
- pyxlpr/ppocr/utils/iou.py +0 -54
- pyxlpr/ppocr/utils/logging.py +0 -69
- pyxlpr/ppocr/utils/network.py +0 -84
- pyxlpr/ppocr/utils/ppocr_keys_v1.txt +0 -6623
- pyxlpr/ppocr/utils/profiler.py +0 -110
- pyxlpr/ppocr/utils/save_load.py +0 -150
- pyxlpr/ppocr/utils/stats.py +0 -72
- pyxlpr/ppocr/utils/utility.py +0 -80
- pyxlpr/ppstructure/__init__.py +0 -13
- pyxlpr/ppstructure/predict_system.py +0 -187
- pyxlpr/ppstructure/table/__init__.py +0 -13
- pyxlpr/ppstructure/table/eval_table.py +0 -72
- pyxlpr/ppstructure/table/matcher.py +0 -192
- pyxlpr/ppstructure/table/predict_structure.py +0 -136
- pyxlpr/ppstructure/table/predict_table.py +0 -221
- pyxlpr/ppstructure/table/table_metric/__init__.py +0 -16
- pyxlpr/ppstructure/table/table_metric/parallel.py +0 -51
- pyxlpr/ppstructure/table/table_metric/table_metric.py +0 -247
- pyxlpr/ppstructure/table/tablepyxl/__init__.py +0 -13
- pyxlpr/ppstructure/table/tablepyxl/style.py +0 -283
- pyxlpr/ppstructure/table/tablepyxl/tablepyxl.py +0 -118
- pyxlpr/ppstructure/utility.py +0 -71
- pyxlpr/xlai.py +0 -10
- /pyxllib/{ext/autogui → autogui}/virtualkey.py +0 -0
- {pyxllib-0.3.96.dist-info → pyxllib-0.3.197.dist-info/licenses}/LICENSE +0 -0
pyxllib/ext/yuquelib.py
CHANGED
@@ -3,23 +3,81 @@
|
|
3
3
|
# @Author : 陈坤泽
|
4
4
|
# @Email : 877362867@qq.com
|
5
5
|
# @Date : 2024/01/01
|
6
|
-
|
6
|
+
import html
|
7
|
+
import re
|
8
|
+
import time
|
9
|
+
from enum import Enum
|
7
10
|
|
8
11
|
import requests
|
12
|
+
import urllib.parse
|
13
|
+
|
14
|
+
from fastcore.basics import GetAttr
|
15
|
+
from pprint import pprint
|
9
16
|
|
10
17
|
from pyxllib.xl import *
|
11
18
|
from pyxllib.algo.stat import *
|
12
|
-
from
|
19
|
+
from pyxllib.prog.newbie import SingletonForEveryInitArgs
|
20
|
+
from pyxllib.text.pupil import UrlQueryBuilder
|
21
|
+
from pyxllib.text.nestenv import NestEnv
|
22
|
+
from pyxllib.text.xmllib import BeautifulSoup, XlBs4Tag
|
23
|
+
from pyxllib.cv.xlcvlib import xlcv
|
24
|
+
|
25
|
+
|
26
|
+
def __1_语雀主api():
|
27
|
+
pass
|
28
|
+
|
29
|
+
|
30
|
+
def update_yuque_doc_by_dp(doc_url):
|
31
|
+
""" 语雀更新文档的api,并不会触发pc上客户端对应内容的更新
|
32
|
+
除非打开浏览器编辑更新一下,这里可以用dp模拟自动进行这一操作
|
33
|
+
|
34
|
+
注意新建文档添加到目录的api是可以立即在客户端生效的,只是更新不行
|
35
|
+
"""
|
36
|
+
from DrissionPage import Chromium, ChromiumOptions
|
37
|
+
from pyxllib.ext.drissionlib import dp_check_quit
|
38
|
+
|
39
|
+
# 1 打开浏览器:注意必须事先手动登录过账号
|
40
|
+
co = ChromiumOptions()
|
41
|
+
# co.headless() # 无头模式效果不一定稳,一般还是不能开
|
42
|
+
# co.set_argument('--window-size', '100,100') # 修改窗口尺寸也不行
|
43
|
+
# 就只有最正常的模式才能马上触发更新
|
44
|
+
browser = Chromium(co)
|
45
|
+
|
46
|
+
# 2 确认url
|
47
|
+
if not doc_url.startswith('https://www.yuque.com/'):
|
48
|
+
doc_url = 'https://www.yuque.com/' + doc_url
|
49
|
+
tab = browser.new_tab(f'{doc_url}/edit')
|
50
|
+
|
51
|
+
# 3 操作更新文档
|
52
|
+
tab('t:button@@data-ne-type=clearFormat') # 通过查找一个元素确定文档已经加载成功
|
53
|
+
time.sleep(1)
|
54
|
+
tab.actions.type(' ')
|
55
|
+
time.sleep(1)
|
56
|
+
tab.actions.key_down('BACKSPACE')
|
57
|
+
time.sleep(2)
|
58
|
+
tab.actions.key_up('BACKSPACE')
|
59
|
+
time.sleep(5)
|
60
|
+
tab('t:button@@text():更新').click(by_js=True)
|
61
|
+
time.sleep(20)
|
62
|
+
tab.close()
|
63
|
+
|
64
|
+
# 4 退出
|
65
|
+
dp_check_quit()
|
13
66
|
|
14
67
|
|
15
|
-
class Yuque:
|
16
|
-
|
68
|
+
class Yuque(metaclass=SingletonForEveryInitArgs):
|
69
|
+
"""
|
70
|
+
https://www.yuque.com/yuque/developer/openapi
|
71
|
+
语雀请求限制:每小时最多 5000 次请求,每秒最多 100 次请求
|
72
|
+
"""
|
73
|
+
|
74
|
+
def __init__(self, token=None, user_id=None):
|
17
75
|
self.base_url = "https://www.yuque.com/api/v2"
|
18
76
|
self.headers = {
|
19
|
-
"X-Auth-Token": token,
|
77
|
+
"X-Auth-Token": token or os.getenv('YUQUE_TOKEN'),
|
20
78
|
"Content-Type": "application/json"
|
21
79
|
}
|
22
|
-
self.
|
80
|
+
self._user_id = os.getenv('YUQUE_USER_ID') or user_id
|
23
81
|
|
24
82
|
def get_user(self):
|
25
83
|
""" 获取用户信息
|
@@ -44,110 +102,204 @@ class Yuque:
|
|
44
102
|
'work_id': ''}}
|
45
103
|
"""
|
46
104
|
url = f"{self.base_url}/user"
|
47
|
-
|
48
|
-
return
|
105
|
+
resp = requests.get(url, headers=self.headers)
|
106
|
+
return resp.json()
|
107
|
+
|
108
|
+
@property
|
109
|
+
def user_id(self):
|
110
|
+
""" 很多接口需要用到用户ID,这里缓存一下 """
|
111
|
+
if self._user_id is None:
|
112
|
+
self._user_id = self.get_user()['data']['id']
|
113
|
+
return self._user_id
|
49
114
|
|
50
|
-
def
|
51
|
-
|
52
|
-
self.user_id = self.get_user()['data']['id']
|
53
|
-
return self.user_id
|
115
|
+
def __1_知识库操作(self):
|
116
|
+
pass
|
54
117
|
|
55
|
-
@run_once('id,str')
|
56
|
-
def get_repos(self):
|
118
|
+
@run_once('id,str') # todo 应该有更好的缓存机制,目前这样的实现,需要重启程序才会刷新
|
119
|
+
def get_repos(self, return_mode=0):
|
57
120
|
""" 获取某个用户的知识库列表
|
58
121
|
|
122
|
+
:param int|str return_mode: 返回模式
|
123
|
+
0(默认),返回原始json结构
|
124
|
+
df,df结构
|
125
|
+
nickname2id,获取知识库 "namespace和昵称"到ID的映射
|
59
126
|
"""
|
60
|
-
|
61
|
-
|
62
|
-
|
127
|
+
if return_mode == 0:
|
128
|
+
url = f"{self.base_url}/users/{self.user_id}/repos"
|
129
|
+
resp = requests.get(url, headers=self.headers)
|
130
|
+
return resp.json()
|
131
|
+
elif return_mode == 'df':
|
132
|
+
data = self.get_repos()
|
133
|
+
columns = ['id', 'name', 'items_count', 'namespace']
|
134
|
+
|
135
|
+
ls = []
|
136
|
+
for d in data['data']:
|
137
|
+
ls.append([d[col] for col in columns])
|
63
138
|
|
64
|
-
|
65
|
-
|
66
|
-
|
139
|
+
df = pd.DataFrame(ls, columns=columns)
|
140
|
+
return df
|
141
|
+
elif return_mode == 'nickname2id': # namespace、name到id的映射(注意这里不考虑)
|
142
|
+
data = self.get_repos()
|
143
|
+
names2id = {d['name']: d['id'] for d in data['data']}
|
144
|
+
# 例如:"日志"知识库的namespace是journal,然后 journal -> 24363220
|
145
|
+
namespace2id = {d['namespace'].split('/')[-1]: d['id'] for d in data['data']}
|
146
|
+
names2id.update(namespace2id)
|
147
|
+
return names2id
|
148
|
+
else:
|
149
|
+
raise ValueError(f'不支持的return_mode={return_mode}')
|
150
|
+
|
151
|
+
def get_repo_id(self, repo_id):
|
152
|
+
""" repo_id支持输入"昵称"来获得实际id
|
67
153
|
"""
|
68
|
-
|
69
|
-
|
154
|
+
if isinstance(repo_id, str) and not re.match(r'\d+$', repo_id):
|
155
|
+
repo_id = self.get_repos('nickname2id')[repo_id]
|
156
|
+
return repo_id
|
157
|
+
|
158
|
+
def get_repo_docs(self, repo_id, *, offset=0, limit=100, return_mode=0):
|
159
|
+
""" 获取知识库的文档列表
|
70
160
|
|
71
|
-
|
72
|
-
|
161
|
+
:param repo_id: 知识库的ID或Namespace(如"日志"是我改成的"journal")
|
162
|
+
:param int offset: 偏移多少篇文章后再取
|
163
|
+
:param int limit: 展示多少篇文章,默认100篇
|
164
|
+
:param int|str return_mode: 返回模式
|
165
|
+
私人文档:https://www.yuque.com/code4101/journal/ztvg5qh5m3ga7gh7?inner=ubc5753c5
|
166
|
+
0(默认),返回原始json结构
|
167
|
+
-1(df),df结构
|
168
|
+
:return: 文档列表
|
169
|
+
底层接口获得的数据默认是按照创建时间排序的,但是我这里会重新按照更新时间重排序
|
73
170
|
"""
|
74
|
-
|
75
|
-
|
171
|
+
repo_id = self.get_repo_id(repo_id)
|
172
|
+
if return_mode == 0:
|
173
|
+
uqb = UrlQueryBuilder()
|
174
|
+
uqb.add_param('offset', offset)
|
175
|
+
uqb.add_param('limit', limit)
|
176
|
+
url = uqb.build_url(f"{self.base_url}/repos/{repo_id}/docs")
|
177
|
+
logger.info(url)
|
178
|
+
d = requests.get(url, headers=self.headers).json()
|
179
|
+
return d
|
180
|
+
elif return_mode in (-1, 'df'):
|
181
|
+
data = self.get_repo_docs(repo_id, offset=offset, limit=limit)
|
182
|
+
# 按照updated_at降序
|
183
|
+
data['data'].sort(key=lambda x: x['updated_at'], reverse=True)
|
184
|
+
columns = ['id', 'title', 'word_count', 'description', 'updated_at']
|
76
185
|
|
77
|
-
|
78
|
-
|
79
|
-
|
186
|
+
ls = []
|
187
|
+
for d in data['data']:
|
188
|
+
ls.append([d.get(col) for col in columns])
|
80
189
|
|
81
|
-
|
82
|
-
|
190
|
+
# ls.sort(key=lambda x: x[0]) # id一般就是创建顺序
|
191
|
+
df = pd.DataFrame(ls, columns=columns)
|
192
|
+
# df['updated_at']把'2024-08-07T06:13:10.000Z'转成datetime,并改到utf8时区
|
193
|
+
df['updated_at'] = pd.to_datetime(df['updated_at']).dt.tz_convert('Asia/Shanghai')
|
194
|
+
# 不显示时区
|
195
|
+
df['updated_at'] = df['updated_at'].dt.strftime('%Y-%m-%d %H:%M:%S')
|
196
|
+
return df
|
197
|
+
else:
|
198
|
+
raise ValueError(f'不支持的return_mode={return_mode}')
|
83
199
|
|
84
|
-
def
|
85
|
-
|
200
|
+
def __2_目录操作(self):
|
201
|
+
pass
|
86
202
|
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
url = f"{self.base_url}/repos/{repo_id}/
|
91
|
-
|
92
|
-
|
203
|
+
def get_repo_toc(self, repo_id):
|
204
|
+
""" 获取知识库目录 """
|
205
|
+
repo_id = self.get_repo_id(repo_id)
|
206
|
+
url = f"{self.base_url}/repos/{repo_id}/toc"
|
207
|
+
resp = requests.get(url, headers=self.headers)
|
208
|
+
d = resp.json()
|
209
|
+
# logger.info(d)
|
210
|
+
return d['data']
|
211
|
+
|
212
|
+
def repo_toc_move(self, repo_id,
|
213
|
+
cur_doc=None, dst_doc=None,
|
214
|
+
*,
|
215
|
+
insert_ahead=False,
|
216
|
+
to_child=False):
|
217
|
+
""" 知识库/目录/移动 模式
|
93
218
|
|
94
|
-
|
95
|
-
|
219
|
+
:param dst|dict cur_doc: 移动哪个当前节点
|
220
|
+
注意把cur移动到dst的时候,cur的子节点默认都会跟着移动
|
221
|
+
|
222
|
+
这里输入的类型,一般是根据get_repo_toc,获得字典类型
|
223
|
+
但如果输入是str,则默认使用的是url模式,需要这个函数里再主动做一次get_repo_toc
|
224
|
+
:param dst|dict dst_doc: 移动到哪个目标节点
|
225
|
+
:param insert_ahead: 默认插入在目标后面
|
226
|
+
如果要插入到前面,可以启动这个参数
|
227
|
+
:param to_child: 作为目标节点的子节点插入
|
228
|
+
:return: 好像是返回新的整个目录
|
96
229
|
"""
|
97
|
-
|
98
|
-
|
230
|
+
repo_id = self.get_repo_id(f'{repo_id}')
|
231
|
+
|
232
|
+
# 输入为字符串时,默认使用的url进行定位。url只要包括'/'末尾最后的文档id即可,前缀可以省略
|
233
|
+
if isinstance(cur_doc, str) or isinstance(dst_doc, str):
|
234
|
+
toc = self.get_repo_toc(repo_id) # 知识库的整个目录
|
235
|
+
if isinstance(cur_doc, str):
|
236
|
+
cur_doc = next((d for d in toc if d['url'] == cur_doc.split('/')[-1]))
|
237
|
+
if isinstance(dst_doc, str):
|
238
|
+
dst_doc = next((d for d in toc if d['url'] == dst_doc.split('/')[-1]))
|
239
|
+
|
240
|
+
url = f"{self.base_url}/repos/{repo_id}/toc"
|
241
|
+
cfg = {
|
242
|
+
'node_uuid': cur_doc['uuid'],
|
243
|
+
'target_uuid': dst_doc['uuid'],
|
244
|
+
'action': 'prependNode' if insert_ahead else 'appendNode',
|
245
|
+
'action_mode': 'child' if to_child else 'sibling',
|
246
|
+
}
|
247
|
+
resp = requests.put(url, json=cfg, headers=self.headers)
|
248
|
+
return resp.json()
|
99
249
|
|
100
|
-
|
101
|
-
|
102
|
-
ls.append([d[col] for col in columns])
|
250
|
+
def __3_文档操作(self):
|
251
|
+
pass
|
103
252
|
|
104
|
-
|
105
|
-
|
106
|
-
return df
|
253
|
+
def ____1_获取文档(self):
|
254
|
+
pass
|
107
255
|
|
108
|
-
def
|
256
|
+
def _get_doc(self, repo_id, doc_id):
|
109
257
|
""" 获取单篇文档的详细信息
|
110
258
|
|
111
259
|
:param repo_id: 知识库的ID或Namespace
|
112
260
|
:param doc_id: 文档的ID
|
113
261
|
:return: 文档的详细信息
|
114
262
|
"""
|
263
|
+
repo_id = self.get_repo_id(repo_id)
|
115
264
|
url = f"{self.base_url}/repos/{repo_id}/docs/{doc_id}"
|
116
|
-
|
117
|
-
return
|
265
|
+
resp = requests.get(url, headers=self.headers)
|
266
|
+
return resp.json()
|
118
267
|
|
119
|
-
def
|
268
|
+
def get_doc(self, doc_url, return_mode='md'):
|
120
269
|
""" 从文档的URL中获取文档的详细信息
|
121
270
|
|
122
|
-
:param
|
271
|
+
:param doc_url: 文档的URL
|
272
|
+
可以只输入最后知识库、文档部分的url标记
|
123
273
|
:param return_mode: 返回模式,
|
124
|
-
|
125
|
-
|
274
|
+
json, 为原始json结构
|
275
|
+
md, 返回文档的主体md内容
|
276
|
+
title_and_md, 返回文档的标题和md内容
|
126
277
|
:return: 文档的详细信息
|
127
278
|
"""
|
128
|
-
repo_slug, doc_slug =
|
129
|
-
|
130
|
-
if repo_slug not in name2id:
|
131
|
-
raise ValueError(f'知识库"{repo_slug}"不存在')
|
132
|
-
repo_id = name2id[repo_slug]
|
133
|
-
res = self.get_doc(repo_id, doc_slug)
|
279
|
+
repo_slug, doc_slug = doc_url.split('/')[-2:]
|
280
|
+
data = self._get_doc(repo_slug, doc_slug)['data']
|
134
281
|
|
135
|
-
if return_mode ==
|
136
|
-
return
|
137
|
-
elif return_mode ==
|
138
|
-
return
|
282
|
+
if return_mode == 'json':
|
283
|
+
return data
|
284
|
+
elif return_mode == 'md':
|
285
|
+
return data['body']
|
286
|
+
elif return_mode == 'title_and_md':
|
287
|
+
return data["title"], data["body"]
|
139
288
|
|
140
289
|
def export_markdown(self, url, output_dir=None, post_mode=1):
|
141
290
|
""" 导出md格式文件
|
142
291
|
|
143
|
-
:param url: 文档的URL
|
144
|
-
|
292
|
+
:param str|list[str] url: 文档的URL
|
293
|
+
可以导出单篇文档,也可以打包批量导出多篇文档的md文件
|
294
|
+
:param output_dir: 导出目录
|
295
|
+
单篇的文件名是按照文章标题自动生成的
|
296
|
+
多篇的可以自己指定具体文件名
|
145
297
|
:param post_mode: 后处理模式
|
146
298
|
0,不做处理
|
147
299
|
1,做适当的精简
|
148
300
|
"""
|
149
301
|
# 1 获得内容
|
150
|
-
data = self.
|
302
|
+
data = self.get_doc(url, return_mode='json')
|
151
303
|
body = data['body']
|
152
304
|
if post_mode == 0:
|
153
305
|
pass
|
@@ -156,18 +308,798 @@ class Yuque:
|
|
156
308
|
|
157
309
|
# 2 写入文件
|
158
310
|
if output_dir is not None:
|
159
|
-
title2 = refinepath(data['title']
|
311
|
+
title2 = refinepath(data['title'])
|
160
312
|
f = XlPath(output_dir) / f'{title2}.md'
|
161
313
|
f.write_text(body)
|
162
314
|
|
163
315
|
return body
|
164
316
|
|
317
|
+
def ____2_新建文档(self):
|
318
|
+
pass
|
319
|
+
|
320
|
+
def _to_doc_data(self, doc_data, md_cvt=True):
|
321
|
+
""" 将非规范文档内容统一转为标准字典格式
|
322
|
+
|
323
|
+
:param str|dict doc_data: 文本内容(md,html,lake)或字典表达的数据
|
324
|
+
可以直接传入要更新的新的(md | html | lake)内容,会自动转为 {'body': content}
|
325
|
+
注意无论原始是body_html、body_lake,都是要上传到body字段的
|
326
|
+
|
327
|
+
其他具体参数功能:
|
328
|
+
slug可以调整url路径名
|
329
|
+
title调整标题
|
330
|
+
public参数调整公开性,0:私密, 1:公开, 2:企业内公开
|
331
|
+
format设置导入的内容格式,markdown:Markdown 格式, html:HTML 标准格式, lake:语雀 Lake 格式
|
332
|
+
|
333
|
+
:param bool md_cvt: 是否需要转换md格式
|
334
|
+
默认的md文档格式直接放回语雀,是会丢失换行的,需要对代码块外的内容,执行\n替换
|
335
|
+
"""
|
336
|
+
# 1 字符串转字典
|
337
|
+
if isinstance(doc_data, str):
|
338
|
+
doc_data = {'body': doc_data}
|
339
|
+
|
340
|
+
# 2 判断文本内容格式
|
341
|
+
if 'format' not in doc_data:
|
342
|
+
m = re.match(r'<!doctype\s(\w+?)>', doc_data['body'], flags=re.IGNORECASE)
|
343
|
+
if m:
|
344
|
+
doc_data['format'] = m.group(1).lower()
|
345
|
+
|
346
|
+
# 3 如果是md格式还要特殊处理
|
347
|
+
if doc_data.get('format', 'markdown') == 'markdown' and md_cvt:
|
348
|
+
ne = NestEnv(doc_data['body']).search(r'^```[^\n]*\n(.+?)\n^```',
|
349
|
+
flags=re.MULTILINE | re.DOTALL).invert()
|
350
|
+
doc_data['body'] = ne.replace('\n', '\n\n')
|
351
|
+
|
352
|
+
return doc_data
|
353
|
+
|
354
|
+
def create_doc(self, repo_id, doc_data,
|
355
|
+
*,
|
356
|
+
dst_doc=None, insert_ahead=False, to_child=False, # 设置文档所在位置
|
357
|
+
):
|
358
|
+
""" 创建单篇文档,并放到知识库下某指定为止
|
359
|
+
|
360
|
+
示例用法:
|
361
|
+
yuque.create_doc('周刊摘录',
|
362
|
+
{'title': '标题', 'body': '内容', 'slug': 'custom_slug/url'},
|
363
|
+
dst_doc='目标文档的slug/url', to_child=True)
|
364
|
+
"""
|
365
|
+
# 1 创建文档
|
366
|
+
repo_id = self.get_repo_id(repo_id)
|
367
|
+
url = f"{self.base_url}/repos/{repo_id}/docs"
|
368
|
+
|
369
|
+
doc_data = self._to_doc_data(doc_data)
|
370
|
+
out_data = requests.post(url, json=doc_data, headers=self.headers).json()['data']
|
371
|
+
doc_id = out_data['id']
|
372
|
+
|
373
|
+
# 2 将文档添加到目录中
|
374
|
+
url2 = f"{self.base_url}/repos/{repo_id}/toc"
|
375
|
+
in_data2 = {
|
376
|
+
"action": "prependNode", # 默认添加到知识库目录最顶上的位置
|
377
|
+
"action_mode": "child",
|
378
|
+
"doc_id": doc_id,
|
379
|
+
}
|
380
|
+
# 返回的是知识库新的目录
|
381
|
+
toc = requests.put(url2, json=in_data2, headers=self.headers).json()['data']
|
382
|
+
|
383
|
+
# 3 如果有设置目录具体位置的需求
|
384
|
+
if dst_doc:
|
385
|
+
self.repo_toc_move(repo_id, toc[0], dst_doc, insert_ahead=insert_ahead, to_child=to_child)
|
386
|
+
|
387
|
+
# 即使有dst_doc移动了目录位置,但这个新建文档本来就不带位置信息的,所以不用根据dst_doc再重新获得
|
388
|
+
return out_data
|
389
|
+
|
390
|
+
def ____3_更新文档(self):
|
391
|
+
pass
|
392
|
+
|
393
|
+
def _update_doc(self, repo_id, doc_id, doc_data):
|
394
|
+
""" 更新单篇文档的详细信息
|
395
|
+
|
396
|
+
:param repo_id: 知识库的ID或Namespace
|
397
|
+
:param doc_id: 文档的ID
|
398
|
+
:param dict doc_data: 包含文档更新内容的字典
|
399
|
+
:return: 更新后的文档的详细信息
|
400
|
+
"""
|
401
|
+
repo_id = self.get_repo_id(repo_id)
|
402
|
+
url = f"{self.base_url}/repos/{repo_id}/docs/{doc_id}"
|
403
|
+
resp = requests.put(url, json=doc_data, headers=self.headers)
|
404
|
+
return resp.json()
|
405
|
+
|
406
|
+
def update_doc(self, doc_url, doc_data, *, return_mode='json', use_dp=False):
|
407
|
+
""" 从文档的URL中更新文档的详细信息
|
408
|
+
|
409
|
+
:param doc_url: 文档的URL
|
410
|
+
:param str|json doc_data: 包含文档更新内容的字典,详见_to_doc_data接口
|
411
|
+
:param use_dp: 语雀的这个更新接口,虽然网页端可以实时刷新,但在PC端的软件并不会实时加载渲染。
|
412
|
+
所以有需要的话,要开启这个参数,使用爬虫暴力更新下文档内容。
|
413
|
+
使用此模式的时候,doc_url还需要至少有用户名的url地址,即类似'用户id/知识库id/文档id'
|
414
|
+
:param str return_mode: 返回的是更新后文档的内容,不过好像有bug,这里返回的body存储的并不是md格式
|
415
|
+
'md', 返回更新后文档的主体md内容
|
416
|
+
'json', 为原始json结构
|
417
|
+
|
418
|
+
不建议拿这个返回值,完全可以另外再重新取返回值,就是正常的md格式了
|
419
|
+
:return: 更新后的文档的详细信息
|
420
|
+
"""
|
421
|
+
# 1 基础配置
|
422
|
+
repo_slug, doc_slug = doc_url.split('/')[-2:]
|
423
|
+
doc_data = self._to_doc_data(doc_data)
|
424
|
+
|
425
|
+
# 2 提交更新文档
|
426
|
+
data = self._update_doc(repo_slug, doc_slug, doc_data)['data']
|
427
|
+
|
428
|
+
# 3 使用爬虫在浏览器模拟编辑,触发客户端更新通知
|
429
|
+
if use_dp:
|
430
|
+
update_yuque_doc_by_dp(doc_url)
|
431
|
+
|
432
|
+
# 4 拿到返回值
|
433
|
+
if return_mode == 'md':
|
434
|
+
return data['body']
|
435
|
+
elif return_mode == 'json':
|
436
|
+
return data
|
437
|
+
|
438
|
+
def ____4_删除文档(self):
|
439
|
+
pass
|
440
|
+
|
441
|
+
def _delete_doc(self, repo_id, doc_id):
|
442
|
+
""" 删除文档
|
443
|
+
|
444
|
+
这个是真删除,不是从目录中移除的意思哦。
|
445
|
+
虽然可以短期内从回收站找回来。
|
446
|
+
|
447
|
+
:param repo_id: 知识库的ID或Namespace
|
448
|
+
:param doc_id: 文档的ID
|
449
|
+
"""
|
450
|
+
repo_id = self.get_repo_id(repo_id)
|
451
|
+
url = f"{self.base_url}/repos/{repo_id}/docs/{doc_id}"
|
452
|
+
resp = requests.delete(url, headers=self.headers)
|
453
|
+
return resp.json()
|
454
|
+
|
455
|
+
def delete_doc(self, doc_url):
|
456
|
+
repo_slug, doc_slug = doc_url.split('/')[-2:]
|
457
|
+
print(self._delete_doc(repo_slug, doc_slug))
|
458
|
+
|
459
|
+
def __4_内容操作(self):
|
460
|
+
pass
|
461
|
+
|
165
462
|
def read_tables_from_doc(self, url, header=0):
|
166
463
|
""" 从文档中读取表格
|
167
464
|
|
168
465
|
:param url: 文档的URL
|
169
466
|
:return: 表格列表
|
170
467
|
"""
|
171
|
-
res = self.
|
468
|
+
res = self.get_doc(url, return_mode='json')
|
172
469
|
tables = pd.read_html(res['body_html'], header=header)
|
173
470
|
return tables
|
471
|
+
|
472
|
+
|
473
|
+
def __2_语雀lake格式结构化解析基础工具():
|
474
|
+
pass
|
475
|
+
|
476
|
+
|
477
|
+
# 语雀代码块支持的语言类型
|
478
|
+
class LakeCodeModes(Enum):
|
479
|
+
PLAIN = 'plain'
|
480
|
+
ABAP = 'abap'
|
481
|
+
AGDA = 'agda'
|
482
|
+
ARKTS = 'arkts'
|
483
|
+
ASM = 'z80'
|
484
|
+
BASH = 'bash'
|
485
|
+
BASIC = 'basic'
|
486
|
+
C = 'c'
|
487
|
+
CSHARP = 'csharp'
|
488
|
+
CPP = 'cpp'
|
489
|
+
CLOJURE = 'clojure'
|
490
|
+
CMAKE = 'cmake'
|
491
|
+
CSS = 'css'
|
492
|
+
CYPHER = 'cypher'
|
493
|
+
DART = 'dart'
|
494
|
+
DIFF = 'diff'
|
495
|
+
DOCKFILE = 'dockerfile'
|
496
|
+
ERLANG = 'erlang'
|
497
|
+
FSHARP = 'fsharp'
|
498
|
+
FORTRAN = 'fortran'
|
499
|
+
GIT = 'git'
|
500
|
+
GLSL = 'glsl'
|
501
|
+
GO = 'go'
|
502
|
+
GRAPHQL = 'graphql'
|
503
|
+
GROOVY = 'groovy'
|
504
|
+
HASKELL = 'haskell'
|
505
|
+
HTML = 'html'
|
506
|
+
HTTP = 'http'
|
507
|
+
JAVA = 'java'
|
508
|
+
JAVASCRIPT = 'javascript'
|
509
|
+
JSON = 'json'
|
510
|
+
JSX = 'jsx'
|
511
|
+
JULIA = 'julia'
|
512
|
+
KATEX = 'katex'
|
513
|
+
KOTLIN = 'kotlin'
|
514
|
+
LATEX = 'latex'
|
515
|
+
LESS = 'less'
|
516
|
+
LISP = 'lisp'
|
517
|
+
LUA = 'lua'
|
518
|
+
MAKEFILE = 'makefile'
|
519
|
+
MARKDOWN = 'markdown'
|
520
|
+
MATLAB = 'matlab'
|
521
|
+
NGINX = 'nginx'
|
522
|
+
OBJECTIVEC = 'objectivec'
|
523
|
+
OCAML = 'ocaml'
|
524
|
+
PASCAL = 'pascal'
|
525
|
+
PERL = 'perl'
|
526
|
+
PHP = 'php'
|
527
|
+
PLSQL = 'plsql'
|
528
|
+
POWERSHELL = 'powershell'
|
529
|
+
PROPERTIES = 'properties'
|
530
|
+
PROTOBUF = 'protobuf'
|
531
|
+
PYTHON = 'python'
|
532
|
+
R = 'r'
|
533
|
+
RUBY = 'ruby'
|
534
|
+
RUST = 'rust'
|
535
|
+
SASS = 'sass'
|
536
|
+
SCALA = 'scala'
|
537
|
+
SCHEME = 'scheme'
|
538
|
+
SHELL = 'shell'
|
539
|
+
SOLIDITY = 'solidity'
|
540
|
+
SQL = 'sql'
|
541
|
+
STEX = 'stex'
|
542
|
+
SWIFT = 'swift'
|
543
|
+
SYSTEMVERILOG = 'systemverilog'
|
544
|
+
TCL = 'tcl'
|
545
|
+
TOML = 'toml'
|
546
|
+
TSX = 'tsx'
|
547
|
+
TYPESCRIPT = 'typescript'
|
548
|
+
VBNET = 'vbnet'
|
549
|
+
VELOCITY = 'velocity'
|
550
|
+
VERILOG = 'verilog'
|
551
|
+
VUE = 'vue'
|
552
|
+
XML = 'xml'
|
553
|
+
YAML = 'yaml'
|
554
|
+
|
555
|
+
def __str__(self):
|
556
|
+
return self.value
|
557
|
+
|
558
|
+
|
559
|
+
def encode_block_value(value_dict):
|
560
|
+
""" 把当前value_dict的字典值转为html标签结构 """
|
561
|
+
json_str = json.dumps(value_dict)
|
562
|
+
url_encoded = urllib.parse.quote(json_str)
|
563
|
+
final_str = "data:" + url_encoded
|
564
|
+
return final_str
|
565
|
+
|
566
|
+
|
567
|
+
def decode_block_value(encoded_str):
|
568
|
+
""" 解析value字段的值 """
|
569
|
+
encoded_data = encoded_str[5:]
|
570
|
+
decoded_str = urllib.parse.unquote(encoded_data)
|
571
|
+
return json.loads(decoded_str)
|
572
|
+
|
573
|
+
|
574
|
+
class LakeBlockTypes(Enum):
|
575
|
+
HEADING = 'heading' # 标题
|
576
|
+
P = 'p' # 普通段落
|
577
|
+
OL = 'ol' # 有序列表
|
578
|
+
IMAGE = 'image' # 图片
|
579
|
+
CODEBLOCK = 'codeblock' # 代码块
|
580
|
+
SUMMARY = 'summary' # 折叠块标题
|
581
|
+
COLLAPSE = 'collapse' # 折叠块
|
582
|
+
UNKNOWN = 'unknown' # 未知类型
|
583
|
+
STR = 'str' # 文本。这个语雀本身没这个类型,是用bs解析才引出的。
|
584
|
+
|
585
|
+
def __str__(self):
|
586
|
+
return self.value
|
587
|
+
|
588
|
+
|
589
|
+
def check_block_type(tag):
|
590
|
+
""" 这个分类会根据对语雀文档结构了解的逐渐深入和逐步细化 """
|
591
|
+
match tag.tag_name:
|
592
|
+
case 'p' if tag.find('card'):
|
593
|
+
return LakeBlockTypes.IMAGE
|
594
|
+
case 'p':
|
595
|
+
return LakeBlockTypes.P
|
596
|
+
case 'card':
|
597
|
+
return LakeBlockTypes.CODEBLOCK
|
598
|
+
case 'ol':
|
599
|
+
return LakeBlockTypes.OL
|
600
|
+
case s if re.match(r'h\d+$', s):
|
601
|
+
return LakeBlockTypes.HEADING
|
602
|
+
case 'details':
|
603
|
+
return LakeBlockTypes.COLLAPSE
|
604
|
+
case 'summary':
|
605
|
+
return LakeBlockTypes.SUMMARY
|
606
|
+
case 'NavigableString':
|
607
|
+
return LakeBlockTypes.STR
|
608
|
+
case _:
|
609
|
+
raise LakeBlockTypes.UNKNOWN
|
610
|
+
|
611
|
+
|
612
|
+
def parse_blocks(childrens):
|
613
|
+
""" 获得最原始的段落数组 """
|
614
|
+
blocks = []
|
615
|
+
for c in childrens:
|
616
|
+
match check_block_type(c):
|
617
|
+
case LakeBlockTypes.P:
|
618
|
+
c = LakeP(c)
|
619
|
+
case LakeBlockTypes.HEADING:
|
620
|
+
c = LakeHeading(c)
|
621
|
+
case LakeBlockTypes.IMAGE:
|
622
|
+
c = LakeImage(c)
|
623
|
+
case LakeBlockTypes.CODEBLOCK:
|
624
|
+
c = LakeCodeBlock(c)
|
625
|
+
case LakeBlockTypes.COLLAPSE:
|
626
|
+
c = LakeCollapse(c)
|
627
|
+
case _:
|
628
|
+
c = LakeBlock(c)
|
629
|
+
blocks.append(c)
|
630
|
+
return blocks
|
631
|
+
|
632
|
+
|
633
|
+
def print_blocks(blocks, indent=0):
|
634
|
+
""" 检查文档基本内容 """
|
635
|
+
|
636
|
+
def myprint(t):
|
637
|
+
print(indent * '\t' + t)
|
638
|
+
|
639
|
+
for i, b in enumerate(blocks):
|
640
|
+
match b.type:
|
641
|
+
case LakeBlockTypes.STR:
|
642
|
+
myprint(f'{i}、{b.type} {shorten(b.text, 200)}')
|
643
|
+
case LakeBlockTypes.CODEBLOCK | LakeBlockTypes.IMAGE: # 使用 | 匹配多个类型
|
644
|
+
myprint(f'{i}、{b.type} {shorten(b.prettify(), 100)}\n')
|
645
|
+
myprint(shorten(pprint.pformat(b.value_dict), 400))
|
646
|
+
case LakeBlockTypes.COLLAPSE:
|
647
|
+
myprint(f'{i}、{b.type} {shorten(b.prettify(), 100)}')
|
648
|
+
print_blocks(b.get_blocks(), indent=indent + 1)
|
649
|
+
case _: # 默认情况
|
650
|
+
myprint(f'{i}、{b.type} {shorten(b.prettify(), 200)}')
|
651
|
+
print()
|
652
|
+
return blocks
|
653
|
+
|
654
|
+
|
655
|
+
def __3_语雀结构化模组():
|
656
|
+
pass
|
657
|
+
|
658
|
+
|
659
|
+
class LakeBlock(GetAttr, XlBs4Tag):
|
660
|
+
""" 语雀文档中的基本块类型 """
|
661
|
+
_default = 'tag'
|
662
|
+
|
663
|
+
def __init__(self, tag): # noqa
|
664
|
+
self.tag = tag
|
665
|
+
self.type = check_block_type(tag)
|
666
|
+
|
667
|
+
def is_foldable(self):
|
668
|
+
# 默认是不可折叠块
|
669
|
+
return False
|
670
|
+
|
671
|
+
def is_empty_line(self):
|
672
|
+
# 默认不是空行
|
673
|
+
return False
|
674
|
+
|
675
|
+
def __part功能(self):
|
676
|
+
pass
|
677
|
+
|
678
|
+
def _get_part_number(self, text):
|
679
|
+
m = re.match(r'^(\d+|\+)、', text)
|
680
|
+
if m:
|
681
|
+
t = m.group(1)
|
682
|
+
return t if t != '+' else True
|
683
|
+
|
684
|
+
def get_part_number(self):
|
685
|
+
""" 【私人】我的日记普遍用 "1、" "2、" "+、" 的模式来区分内容分块
|
686
|
+
该函数用来判断当前块是否是这种分块的开始
|
687
|
+
|
688
|
+
:return: 找到的话会返回匹配的数字,'+'会返回True,如果没有返回None
|
689
|
+
目前我的笔记编号一般不存在从0开始编号,也从来没用过负值。但有需要的话这里是可以扩展的。
|
690
|
+
"""
|
691
|
+
# 默认return None
|
692
|
+
return
|
693
|
+
|
694
|
+
@classmethod
|
695
|
+
def _set_part_number(cls, tag, number):
|
696
|
+
""" 常规xml模式的设置编号 """
|
697
|
+
# 1 如果没有span,在最前面加上span
|
698
|
+
if not tag.find('span'):
|
699
|
+
childrens = list(tag.children)
|
700
|
+
if childrens:
|
701
|
+
childrens[0].insert_html_before('<span></span>')
|
702
|
+
else:
|
703
|
+
tag.append_html('<span></span>')
|
704
|
+
|
705
|
+
# 2 定位第一个span
|
706
|
+
span = tag.span
|
707
|
+
|
708
|
+
# 3 string删掉原有编号
|
709
|
+
span.string = re.sub(r'^(\d+|\+)、', '', span.text)
|
710
|
+
|
711
|
+
# 4 string加上新的编号前缀
|
712
|
+
if number is not None:
|
713
|
+
span.string = f'{number}、{span.text}'
|
714
|
+
|
715
|
+
return span.string
|
716
|
+
|
717
|
+
def set_part_number(self, number):
|
718
|
+
""" 【私人】更新当前块的序号
|
719
|
+
|
720
|
+
如果当前part没有编号,会增设编号标记
|
721
|
+
如果number设为None,会删除编号标记
|
722
|
+
"""
|
723
|
+
# 这个功能很特别,还是每类节点里单独实现更合理
|
724
|
+
raise NotImplementedError
|
725
|
+
|
726
|
+
def is_part_end(self):
|
727
|
+
""" 【私人】判断当前块是否是分块的结束 """
|
728
|
+
return False
|
729
|
+
|
730
|
+
|
731
|
+
class LakeHeading(LakeBlock):
|
732
|
+
def __init__(self, tag): # noqa
|
733
|
+
super().__init__(tag)
|
734
|
+
self.type = LakeBlockTypes.HEADING
|
735
|
+
|
736
|
+
def is_part_end(self):
|
737
|
+
return True
|
738
|
+
|
739
|
+
|
740
|
+
class LakeP(LakeBlock):
|
741
|
+
""" 语雀代码块 """
|
742
|
+
|
743
|
+
def __init__(self, tag): # noqa
|
744
|
+
super().__init__(tag)
|
745
|
+
self.type = LakeBlockTypes.P
|
746
|
+
|
747
|
+
def set_part_number(self, number):
|
748
|
+
return self._set_part_number(self, number)
|
749
|
+
|
750
|
+
def is_empty_line(self):
|
751
|
+
return self.text.strip() == ''
|
752
|
+
|
753
|
+
def is_part_end(self):
|
754
|
+
# 出现新的编号块内容的时候,就是上一个part结束的时候
|
755
|
+
return not self.get_part_number()
|
756
|
+
|
757
|
+
def get_part_number(self):
|
758
|
+
return self._get_part_number(self.text)
|
759
|
+
|
760
|
+
|
761
|
+
class LakeImage(LakeBlock):
|
762
|
+
""" 语雀文档中的图片类型
|
763
|
+
|
764
|
+
这个内容结构一般是 <p><card type="inline" name="image" value="data:..."></card></p>
|
765
|
+
其中value是可以解析出一个字典,有详细数据信息的
|
766
|
+
"""
|
767
|
+
|
768
|
+
def __init__(self, tag): # noqa
|
769
|
+
super().__init__(tag)
|
770
|
+
self.type = LakeBlockTypes.IMAGE
|
771
|
+
self._value_dict = None
|
772
|
+
|
773
|
+
@property
|
774
|
+
def value_dict(self):
|
775
|
+
if self._value_dict is None:
|
776
|
+
self._value_dict = decode_block_value(self.tag.card.attrs['value'])
|
777
|
+
return self._value_dict
|
778
|
+
|
779
|
+
def update_value_dict(self):
|
780
|
+
""" 把当前value_dict的字典值更新回html标签 """
|
781
|
+
self.tag.card.attrs['value'] = encode_block_value(self.value_dict)
|
782
|
+
|
783
|
+
def __初始化(self):
|
784
|
+
pass
|
785
|
+
|
786
|
+
@classmethod
|
787
|
+
def _init_from_src(cls, src):
|
788
|
+
""" 传入一个图片url或base64的值,转为语雀图片节点 """
|
789
|
+
soup = BeautifulSoup('<p><card type="inline" name="image" value=""></card></p>', 'lxml')
|
790
|
+
soup.card.attrs['value'] = encode_block_value({'src': src})
|
791
|
+
return cls(soup)
|
792
|
+
|
793
|
+
@classmethod
|
794
|
+
def from_url(cls, url):
|
795
|
+
""" 通过一个url图片位置来初始化一张图片 """
|
796
|
+
return cls._init_from_src(url)
|
797
|
+
|
798
|
+
@classmethod
|
799
|
+
def _reduce_img(cls, img, limit_size, suffix):
|
800
|
+
# 1 初步读取压缩
|
801
|
+
im = xlcv.read(img)
|
802
|
+
if suffix is None:
|
803
|
+
suffix = '.png' if min(xlcv.imsize(im)) > 50 else '.jpg'
|
804
|
+
im = xlcv.reduce_filesize(im, limit_size, suffix)
|
805
|
+
|
806
|
+
# 2 如果是过小的.jpg图片(最短边小余50像素),需要改用.png保存
|
807
|
+
if suffix == '.jpg' and min(xlcv.imsize(im)) < 50:
|
808
|
+
suffix = '.png'
|
809
|
+
|
810
|
+
# 3
|
811
|
+
return im, suffix
|
812
|
+
|
813
|
+
@classmethod
|
814
|
+
def from_local_image(cls, img, *, limit_size=0.6 * 1024 * 1024, suffix=None):
|
815
|
+
""" 传入一个本地图片。本地图片必须转换为base64格式
|
816
|
+
|
817
|
+
:param limit_size: 整个文档有1MB的限制,所以单张图片一般最大也只能给0.5MB的尺寸
|
818
|
+
"""
|
819
|
+
im, suffix = cls._reduce_img(img, limit_size, suffix)
|
820
|
+
buffer = xlcv.to_buffer(im, suffix, b64encode=True).decode('utf-8')
|
821
|
+
return cls._init_from_src(f'data:image/{suffix[1:]};base64,{buffer}')
|
822
|
+
|
823
|
+
|
824
|
+
class LakeCodeBlock(LakeBlock):
|
825
|
+
""" 语雀代码块 """
|
826
|
+
|
827
|
+
def __init__(self, tag): # noqa
|
828
|
+
super().__init__(tag)
|
829
|
+
self.type = LakeBlockTypes.CODEBLOCK
|
830
|
+
self._value_dict = None
|
831
|
+
|
832
|
+
@property
|
833
|
+
def value_dict(self):
|
834
|
+
if self._value_dict is None:
|
835
|
+
self._value_dict = decode_block_value(self.tag.attrs['value'])
|
836
|
+
return self._value_dict
|
837
|
+
|
838
|
+
def update_value_dict(self):
|
839
|
+
""" 把当前value_dict的字典值更新回html标签 """
|
840
|
+
self.tag.attrs['value'] = encode_block_value(self.value_dict)
|
841
|
+
|
842
|
+
def is_foldable(self):
|
843
|
+
return True
|
844
|
+
|
845
|
+
def fold(self):
|
846
|
+
""" 折叠代码块 """
|
847
|
+
self.tag.value_dict['collapsed'] = True
|
848
|
+
|
849
|
+
def unfold(self):
|
850
|
+
""" 展开代码块 """
|
851
|
+
self.tag.value_dict['collapsed'] = False
|
852
|
+
|
853
|
+
def __part功能(self):
|
854
|
+
pass
|
855
|
+
|
856
|
+
def set_part_number(self, number):
|
857
|
+
title = self.value_dict['name']
|
858
|
+
title = re.sub(r'^(\d+|\+)、', '', title)
|
859
|
+
if number is not None:
|
860
|
+
title = f'{number}、{title}'
|
861
|
+
self.value_dict['name'] = title
|
862
|
+
self.update_value_dict()
|
863
|
+
return title
|
864
|
+
|
865
|
+
def is_part_end(self):
|
866
|
+
return not self.get_part_number()
|
867
|
+
|
868
|
+
def get_part_number(self):
|
869
|
+
return self._get_part_number(self.value_dict.get('name'))
|
870
|
+
|
871
|
+
|
872
|
+
class LakeCollapse(LakeBlock):
|
873
|
+
""" 语雀折叠块 """
|
874
|
+
|
875
|
+
def __init__(self, tag): # noqa
|
876
|
+
super().__init__(tag)
|
877
|
+
|
878
|
+
@classmethod
|
879
|
+
def create(cls, summary='', blocks=None, *, open=True):
|
880
|
+
""" 创建一个折叠块 """
|
881
|
+
# 1 details
|
882
|
+
summary = html.escape(summary)
|
883
|
+
summary = f'<summary><span>{summary}</span></summary>'
|
884
|
+
details = f'<details class="lake-collapse" open="{str(open).lower()}">{summary}</details>'
|
885
|
+
|
886
|
+
# 2 tag
|
887
|
+
details = BeautifulSoup(details, 'lxml').details
|
888
|
+
for b in blocks:
|
889
|
+
details.append_html(b.tag.prettify())
|
890
|
+
return cls(details)
|
891
|
+
|
892
|
+
def get_blocks(self):
|
893
|
+
""" 获得最原始的段落数组 """
|
894
|
+
return parse_blocks(self.tag.children)
|
895
|
+
|
896
|
+
def print_blocks(self, indent=1):
|
897
|
+
""" 检查文档基本内容 """
|
898
|
+
blocks = self.get_blocks()
|
899
|
+
print_blocks(blocks, indent=indent)
|
900
|
+
|
901
|
+
def add_block(self, node):
|
902
|
+
""" 在折叠块末尾添加一个新节点内容
|
903
|
+
|
904
|
+
:param node: 要添加的节点内容,或者html文本
|
905
|
+
"""
|
906
|
+
self.tag.append_html(node)
|
907
|
+
|
908
|
+
def is_foldable(self):
|
909
|
+
return True
|
910
|
+
|
911
|
+
def fold(self):
|
912
|
+
""" 折叠代码块 """
|
913
|
+
self.tag.attrs['open'] = 'false'
|
914
|
+
|
915
|
+
def unfold(self):
|
916
|
+
""" 展开代码块 """
|
917
|
+
self.tag.attrs['open'] = 'true'
|
918
|
+
|
919
|
+
def __part功能(self):
|
920
|
+
pass
|
921
|
+
|
922
|
+
def set_part_number(self, number):
|
923
|
+
return self._set_part_number(self.summary, number)
|
924
|
+
|
925
|
+
def is_part_end(self):
|
926
|
+
return not self.get_part_number()
|
927
|
+
|
928
|
+
def get_part_number(self):
|
929
|
+
return self._get_part_number(self.summary.text)
|
930
|
+
|
931
|
+
|
932
|
+
# GetAttr似乎必须放在前面,这样找不到的属性似乎是会优先使用GetAttr机制的,但后者又可以为IDE提供提示
|
933
|
+
class LakeDoc(GetAttr, XlBs4Tag):
|
934
|
+
""" 语雀文档类型 """
|
935
|
+
_default = 'soup'
|
936
|
+
|
937
|
+
def __init__(self, soup): # noqa,这个类初始化就是跟父类不同的
|
938
|
+
# 原始完整的html文档内容
|
939
|
+
self.soup: XlBs4Tag = soup
|
940
|
+
self.type = 'doc'
|
941
|
+
|
942
|
+
def __文档导入导出(self):
|
943
|
+
pass
|
944
|
+
|
945
|
+
@classmethod
|
946
|
+
def from_url(cls, url, *, yuque=None):
|
947
|
+
""" 输入语雀笔记的url
|
948
|
+
"""
|
949
|
+
yuque = yuque or Yuque()
|
950
|
+
data = yuque.get_doc(url, return_mode='json')
|
951
|
+
doc = LakeDoc.from_html(data['body_lake'])
|
952
|
+
return doc
|
953
|
+
|
954
|
+
@classmethod
|
955
|
+
def from_html(cls, lake_html_str='<body></body>'):
|
956
|
+
"""
|
957
|
+
|
958
|
+
:param lake_html_str: 至少要有一个<body></body>结构。
|
959
|
+
不过好在bs本身有很多兼容处理,基本只要输入任意正常内容,就会自动补上body结构的,我不需要手动做太多特判处理
|
960
|
+
:return:
|
961
|
+
"""
|
962
|
+
if not lake_html_str.startswith('<!doctype lake>'):
|
963
|
+
lake_html_str = '<!doctype lake>' + lake_html_str
|
964
|
+
soup = BeautifulSoup(lake_html_str, 'lxml')
|
965
|
+
return cls(soup)
|
966
|
+
|
967
|
+
def to_lake_str(self):
|
968
|
+
""" 转换成语雀html格式的字符串 """
|
969
|
+
content = self.soup.prettify().replace('\n', '')
|
970
|
+
content = re.sub('^<!DOCTYPE lake>', '<!doctype lake>', content)
|
971
|
+
content = re.sub(r'\s{2,}', '', content)
|
972
|
+
return content
|
973
|
+
|
974
|
+
def to_url(self, url, *, yuque=None, use_dp=False):
|
975
|
+
""" 把文章内容更新到指定url位置
|
976
|
+
"""
|
977
|
+
yuque = yuque or Yuque()
|
978
|
+
yuque.update_doc(url, self.to_lake_str(), use_dp=use_dp)
|
979
|
+
|
980
|
+
def __其他功能(self):
|
981
|
+
pass
|
982
|
+
|
983
|
+
def get_blocks(self):
|
984
|
+
""" 获得最原始的段落数组 """
|
985
|
+
return parse_blocks(self.soup.body.children)
|
986
|
+
|
987
|
+
def print_blocks(self):
|
988
|
+
""" 检查文档基本内容 """
|
989
|
+
blocks = self.get_blocks()
|
990
|
+
return print_blocks(blocks)
|
991
|
+
|
992
|
+
def delete_lake_id(self):
|
993
|
+
""" 删除文档中所有语雀标签的id标记 """
|
994
|
+
for tag in self.soup.find_all(True):
|
995
|
+
for name in ['data-lake-id', 'id']:
|
996
|
+
if name in tag.attrs:
|
997
|
+
del tag[name]
|
998
|
+
|
999
|
+
def add_block(self, node):
|
1000
|
+
""" 在正文末尾添加一个新节点内容
|
1001
|
+
|
1002
|
+
:param node: 要添加的节点内容,或者html文本
|
1003
|
+
"""
|
1004
|
+
self.soup.body.append_html(node)
|
1005
|
+
|
1006
|
+
def fold_blocks(self):
|
1007
|
+
""" 把文档中的可折叠块全部折叠 """
|
1008
|
+
for b in self.get_blocks():
|
1009
|
+
if b.is_foldable():
|
1010
|
+
b.fold()
|
1011
|
+
|
1012
|
+
def remove_empty_lines_between_collapses(self):
|
1013
|
+
""" 删除文档中可折叠块之间的全部空行 """
|
1014
|
+
blocks = self.get_blocks()
|
1015
|
+
collapse_indices = []
|
1016
|
+
for idx, b in enumerate(blocks):
|
1017
|
+
if b.is_foldable():
|
1018
|
+
collapse_indices.append(idx)
|
1019
|
+
|
1020
|
+
to_remove = []
|
1021
|
+
|
1022
|
+
# 检查每对相邻的可折叠块之间的块
|
1023
|
+
for i in range(len(collapse_indices) - 1):
|
1024
|
+
prev_idx = collapse_indices[i]
|
1025
|
+
current_idx = collapse_indices[i + 1]
|
1026
|
+
start = prev_idx + 1
|
1027
|
+
end = current_idx - 1
|
1028
|
+
|
1029
|
+
if start > end:
|
1030
|
+
continue
|
1031
|
+
|
1032
|
+
# 检查中间所有块是否均为空行
|
1033
|
+
all_empty = True
|
1034
|
+
for j in range(start, end + 1):
|
1035
|
+
block = blocks[j]
|
1036
|
+
if not block.is_empty_line():
|
1037
|
+
all_empty = False
|
1038
|
+
break
|
1039
|
+
if all_empty:
|
1040
|
+
to_remove.extend(range(start, end + 1))
|
1041
|
+
|
1042
|
+
# 按逆序删除,避免索引变化
|
1043
|
+
for index in sorted(to_remove, reverse=True):
|
1044
|
+
blocks[index].tag.decompose()
|
1045
|
+
|
1046
|
+
def __part系列功能(self):
|
1047
|
+
""" 偏个人向笔记风格的定制功能 """
|
1048
|
+
pass
|
1049
|
+
|
1050
|
+
def reset_part_numbers(self):
|
1051
|
+
""" 重置文档中所有的序号 """
|
1052
|
+
blocks = self.get_blocks()
|
1053
|
+
cnt = 0
|
1054
|
+
for b in blocks:
|
1055
|
+
if b.type == LakeBlockTypes.HEADING:
|
1056
|
+
cnt = 0
|
1057
|
+
elif b.get_part_number() is not None:
|
1058
|
+
cnt += 1
|
1059
|
+
b.set_part_number(cnt)
|
1060
|
+
|
1061
|
+
@classmethod
|
1062
|
+
def get_part_blocks(cls, blocks, start_idx):
|
1063
|
+
""" 获得指定part的全部blocks """
|
1064
|
+
# 1 先找到硬结尾
|
1065
|
+
part_blocks = [blocks[start_idx]]
|
1066
|
+
for b in blocks[start_idx + 1:]:
|
1067
|
+
if b.type == LakeBlockTypes.HEADING:
|
1068
|
+
break
|
1069
|
+
elif b.get_part_number() is not None:
|
1070
|
+
break
|
1071
|
+
part_blocks.append(b)
|
1072
|
+
|
1073
|
+
# 2 再去掉软结尾,即末尾是空白的行都去掉
|
1074
|
+
while part_blocks:
|
1075
|
+
b = part_blocks[-1]
|
1076
|
+
if b.is_empty_line():
|
1077
|
+
part_blocks.pop()
|
1078
|
+
else:
|
1079
|
+
break
|
1080
|
+
|
1081
|
+
return part_blocks
|
1082
|
+
|
1083
|
+
def part_to_collapse(self):
|
1084
|
+
""" 把文档中的part内容转为折叠块 """
|
1085
|
+
blocks = self.get_blocks()
|
1086
|
+
|
1087
|
+
i = 0
|
1088
|
+
while i < len(blocks):
|
1089
|
+
b = blocks[i]
|
1090
|
+
if b.get_part_number() is None:
|
1091
|
+
pass # 没有编号的跳过,不是part的起始位置
|
1092
|
+
elif b.type == LakeBlockTypes.P: # 只对段落类型的编号进行处理
|
1093
|
+
part_blocks = self.get_part_blocks(blocks, i)
|
1094
|
+
summary = part_blocks[0].text
|
1095
|
+
collapse = LakeCollapse.create(summary, part_blocks, open=False)
|
1096
|
+
# 以下.tag都不能省略
|
1097
|
+
b.tag.insert_html_before(collapse.tag)
|
1098
|
+
for b2 in part_blocks:
|
1099
|
+
b2.tag.decompose()
|
1100
|
+
i += len(part_blocks) - 1
|
1101
|
+
i += 1
|
1102
|
+
|
1103
|
+
|
1104
|
+
if __name__ == '__main__':
|
1105
|
+
pass
|