pyxllib 0.3.197__py3-none-any.whl → 3.201.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyxllib/__init__.py +14 -21
- pyxllib/algo/__init__.py +8 -8
- pyxllib/algo/disjoint.py +54 -54
- pyxllib/algo/geo.py +537 -541
- pyxllib/algo/intervals.py +964 -964
- pyxllib/algo/matcher.py +389 -389
- pyxllib/algo/newbie.py +166 -166
- pyxllib/algo/pupil.py +629 -629
- pyxllib/algo/shapelylib.py +67 -67
- pyxllib/algo/specialist.py +241 -241
- pyxllib/algo/stat.py +494 -494
- pyxllib/algo/treelib.py +145 -149
- pyxllib/algo/unitlib.py +62 -66
- pyxllib/autogui/__init__.py +5 -5
- pyxllib/autogui/activewin.py +246 -246
- pyxllib/autogui/all.py +9 -9
- pyxllib/autogui/autogui.py +846 -852
- pyxllib/autogui/uiautolib.py +362 -362
- pyxllib/autogui/virtualkey.py +102 -102
- pyxllib/autogui/wechat.py +827 -827
- pyxllib/autogui/wechat_msg.py +421 -421
- pyxllib/autogui/wxautolib.py +84 -84
- pyxllib/cv/__init__.py +5 -5
- pyxllib/cv/expert.py +267 -267
- pyxllib/cv/imfile.py +159 -159
- pyxllib/cv/imhash.py +39 -39
- pyxllib/cv/pupil.py +9 -9
- pyxllib/cv/rgbfmt.py +1525 -1525
- pyxllib/cv/slidercaptcha.py +137 -137
- pyxllib/cv/trackbartools.py +251 -251
- pyxllib/cv/xlcvlib.py +1040 -1040
- pyxllib/cv/xlpillib.py +423 -423
- pyxllib/data/echarts.py +236 -240
- pyxllib/data/jsonlib.py +85 -89
- pyxllib/data/oss.py +72 -72
- pyxllib/data/pglib.py +1111 -1127
- pyxllib/data/sqlite.py +568 -568
- pyxllib/data/sqllib.py +297 -297
- pyxllib/ext/JLineViewer.py +505 -505
- pyxllib/ext/__init__.py +6 -6
- pyxllib/ext/demolib.py +251 -246
- pyxllib/ext/drissionlib.py +277 -277
- pyxllib/ext/kq5034lib.py +12 -12
- pyxllib/ext/qt.py +449 -449
- pyxllib/ext/robustprocfile.py +493 -497
- pyxllib/ext/seleniumlib.py +76 -76
- pyxllib/ext/tk.py +173 -173
- pyxllib/ext/unixlib.py +821 -827
- pyxllib/ext/utools.py +345 -351
- pyxllib/ext/webhook.py +124 -119
- pyxllib/ext/win32lib.py +40 -40
- pyxllib/ext/wjxlib.py +91 -88
- pyxllib/ext/wpsapi.py +124 -124
- pyxllib/ext/xlwork.py +9 -9
- pyxllib/ext/yuquelib.py +1110 -1105
- pyxllib/file/__init__.py +17 -17
- pyxllib/file/docxlib.py +757 -761
- pyxllib/file/gitlib.py +309 -309
- pyxllib/file/libreoffice.py +165 -165
- pyxllib/file/movielib.py +144 -148
- pyxllib/file/newbie.py +10 -10
- pyxllib/file/onenotelib.py +1469 -1469
- pyxllib/file/packlib/__init__.py +330 -330
- pyxllib/file/packlib/zipfile.py +2441 -2441
- pyxllib/file/pdflib.py +422 -426
- pyxllib/file/pupil.py +185 -185
- pyxllib/file/specialist/__init__.py +681 -685
- pyxllib/file/specialist/dirlib.py +799 -799
- pyxllib/file/specialist/download.py +193 -193
- pyxllib/file/specialist/filelib.py +2825 -2829
- pyxllib/file/xlsxlib.py +3122 -3131
- pyxllib/file/xlsyncfile.py +341 -341
- pyxllib/prog/__init__.py +5 -5
- pyxllib/prog/cachetools.py +58 -64
- pyxllib/prog/deprecatedlib.py +233 -233
- pyxllib/prog/filelock.py +42 -42
- pyxllib/prog/ipyexec.py +253 -253
- pyxllib/prog/multiprogs.py +940 -940
- pyxllib/prog/newbie.py +451 -451
- pyxllib/prog/pupil.py +1208 -1197
- pyxllib/prog/sitepackages.py +33 -33
- pyxllib/prog/specialist/__init__.py +348 -391
- pyxllib/prog/specialist/bc.py +203 -203
- pyxllib/prog/specialist/browser.py +497 -497
- pyxllib/prog/specialist/common.py +347 -347
- pyxllib/prog/specialist/datetime.py +198 -198
- pyxllib/prog/specialist/tictoc.py +240 -240
- pyxllib/prog/specialist/xllog.py +180 -180
- pyxllib/prog/xlosenv.py +110 -108
- pyxllib/stdlib/__init__.py +17 -17
- pyxllib/stdlib/tablepyxl/__init__.py +10 -10
- pyxllib/stdlib/tablepyxl/style.py +303 -303
- pyxllib/stdlib/tablepyxl/tablepyxl.py +130 -130
- pyxllib/text/__init__.py +8 -8
- pyxllib/text/ahocorasick.py +36 -39
- pyxllib/text/airscript.js +754 -744
- pyxllib/text/charclasslib.py +121 -121
- pyxllib/text/jiebalib.py +267 -267
- pyxllib/text/jinjalib.py +27 -32
- pyxllib/text/jsa_ai_prompt.md +271 -271
- pyxllib/text/jscode.py +922 -922
- pyxllib/text/latex/__init__.py +158 -158
- pyxllib/text/levenshtein.py +303 -303
- pyxllib/text/nestenv.py +1215 -1215
- pyxllib/text/newbie.py +300 -300
- pyxllib/text/pupil/__init__.py +8 -8
- pyxllib/text/pupil/common.py +1121 -1121
- pyxllib/text/pupil/xlalign.py +326 -326
- pyxllib/text/pycode.py +47 -47
- pyxllib/text/specialist/__init__.py +8 -8
- pyxllib/text/specialist/common.py +112 -112
- pyxllib/text/specialist/ptag.py +186 -186
- pyxllib/text/spellchecker.py +172 -172
- pyxllib/text/templates/echart_base.html +10 -10
- pyxllib/text/templates/highlight_code.html +16 -16
- pyxllib/text/templates/latex_editor.html +102 -102
- pyxllib/text/vbacode.py +17 -17
- pyxllib/text/xmllib.py +741 -747
- pyxllib/xl.py +42 -39
- pyxllib/xlcv.py +17 -17
- pyxllib-3.201.1.dist-info/METADATA +296 -0
- pyxllib-3.201.1.dist-info/RECORD +125 -0
- {pyxllib-0.3.197.dist-info → pyxllib-3.201.1.dist-info}/licenses/LICENSE +190 -190
- pyxllib/ext/old.py +0 -663
- pyxllib-0.3.197.dist-info/METADATA +0 -48
- pyxllib-0.3.197.dist-info/RECORD +0 -126
- {pyxllib-0.3.197.dist-info → pyxllib-3.201.1.dist-info}/WHEEL +0 -0
pyxllib/text/spellchecker.py
CHANGED
@@ -1,172 +1,172 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
# @Author : 陈坤泽
|
4
|
-
# @Email : 877362867@qq.com
|
5
|
-
# @Date : 2021/06/06 17:00
|
6
|
-
|
7
|
-
from pyxllib.prog.pupil import check_install_package
|
8
|
-
|
9
|
-
# 拼写检查库,即词汇库
|
10
|
-
# spellchecker模块主要有两个类,SpellChecker和WordFrequency
|
11
|
-
# WordFrequency是一个词频类
|
12
|
-
# 一般导入SpellChecker就行了:from spellchecker import SpellChecker
|
13
|
-
check_install_package('pyspellchecker')
|
14
|
-
|
15
|
-
from spellchecker import SpellChecker
|
16
|
-
|
17
|
-
from pyxllib.prog.pupil import dprint
|
18
|
-
|
19
|
-
|
20
|
-
class MySpellChecker(SpellChecker):
|
21
|
-
"""
|
22
|
-
拼写检查
|
23
|
-
190923周一21:54,源自 完形填空ocr 识别项目
|
24
|
-
"""
|
25
|
-
|
26
|
-
def __init__(self, language="en", local_dictionary=None, distance=2, tokenizer=None, case_sensitive=False,
|
27
|
-
df=None):
|
28
|
-
from collections import defaultdict, Counter
|
29
|
-
|
30
|
-
# 1 原初始化功能
|
31
|
-
super(MySpellChecker, self).__init__(language=language, local_dictionary=local_dictionary,
|
32
|
-
distance=distance, tokenizer=tokenizer,
|
33
|
-
case_sensitive=case_sensitive)
|
34
|
-
|
35
|
-
# 2 自己要增加一个分析用的字典
|
36
|
-
self.checkdict = defaultdict(Counter)
|
37
|
-
for k, v in self.word_frequency._dictionary.items():
|
38
|
-
self.checkdict[k][k] = v
|
39
|
-
|
40
|
-
# 3 如果输入了一个df对象要进行更新
|
41
|
-
if df: self.update_by_dataframe(df)
|
42
|
-
|
43
|
-
def update_by_dataframe(self, df, weight_times=1):
|
44
|
-
"""
|
45
|
-
:param df: 这里的df有要求,是DataFrame对象,并且含有这些属性列:old、new、count
|
46
|
-
:param weight_times: 对要加的count乘以一个倍率
|
47
|
-
:return:
|
48
|
-
"""
|
49
|
-
# 1 是否要处理大小写
|
50
|
-
# 如果不区分大小写,需要对df先做预处理,全部转小写
|
51
|
-
# 而大小写不敏感的时候,self.word_frequency._dictionary在init时已经转小写,不用操心
|
52
|
-
if not self._case_sensitive:
|
53
|
-
df.loc[:, 'old'] = df.loc[:, 'old'].str.lower()
|
54
|
-
df.loc[:, 'new'] = df.loc[:, 'new'].str.lower()
|
55
|
-
|
56
|
-
# 2 df对self.word_frequency._dictionary、self.check的影响
|
57
|
-
d = self.word_frequency._dictionary
|
58
|
-
for index, row in df.iterrows():
|
59
|
-
old, new, count = row['old'].decode(), row['new'].decode(), row['count'] * weight_times
|
60
|
-
d[old] += count if old == new else -count
|
61
|
-
# if row['id']==300: dprint(old, new, count)
|
62
|
-
self.checkdict[old][new] += count
|
63
|
-
|
64
|
-
# 3 去除d中负值的key
|
65
|
-
self.word_frequency.remove_words([k for k in d.keys() if d[k] <= 0])
|
66
|
-
|
67
|
-
def _ensure_term(self, term):
|
68
|
-
if term not in self.checkdict:
|
69
|
-
d = {k: self.word_frequency._dictionary[k] for k in self.candidates(term)}
|
70
|
-
self.checkdict[term] = d
|
71
|
-
|
72
|
-
def correction(self, term):
|
73
|
-
# 1 本来就是正确的
|
74
|
-
w = term if self._case_sensitive else term.lower()
|
75
|
-
if w in self.word_frequency._dictionary: return term
|
76
|
-
|
77
|
-
# 2 如果是错的,且是没有记录的错误情况,则做一次候选项运算
|
78
|
-
self._ensure_term(w)
|
79
|
-
|
80
|
-
# 3 返回权重最大的结果
|
81
|
-
res = max(self.checkdict[w], key=self.checkdict[w].get)
|
82
|
-
val = self.checkdict[w].get(res)
|
83
|
-
if val <= 0: res = '^' + res # 是一个错误单词,但是没有推荐修改结果,就打一个^标记
|
84
|
-
return res
|
85
|
-
|
86
|
-
def correction_detail(self, term):
|
87
|
-
"""更加详细,给出所有候选项的纠正
|
88
|
-
|
89
|
-
>> a.correction_detail('d')
|
90
|
-
[('d', 9131), ('do', 1), ('old', 1)]
|
91
|
-
"""
|
92
|
-
w = term if self._case_sensitive else term.lower()
|
93
|
-
self._ensure_term(w)
|
94
|
-
ls = [(k, v) for k, v in self.checkdict[w].items()]
|
95
|
-
ls = sorted(ls, key=lambda x: x[1], reverse=True)
|
96
|
-
return ls
|
97
|
-
|
98
|
-
|
99
|
-
def demo_myspellchecker():
|
100
|
-
# 类的初始化大概要0.4秒
|
101
|
-
a = MySpellChecker()
|
102
|
-
|
103
|
-
# sql的加载更新大概要1秒
|
104
|
-
# hsql = HistudySQL('ckz', 'tr_develop')
|
105
|
-
# df = hsql.query('SELECT * FROM spell_check')
|
106
|
-
# a.update_by_dataframe(df)
|
107
|
-
|
108
|
-
# dprint(a.correction_detail('d'))
|
109
|
-
# dprint(a.correction_detail('wrod')) # wrod有很多种可能性,但word权重是最大的
|
110
|
-
# dprint(a.correction_detail('ckzckzckzckzckzckz')) # wrod有很多种可能性,但word权重是最大的
|
111
|
-
# dprint(a.correction('ckzckzckzckzckzckz')) # wrod有很多种可能性,但word权重是最大的
|
112
|
-
dprint(a.correction_detail('ike'))
|
113
|
-
dprint(a.correction_detail('dean'))
|
114
|
-
dprint(a.correction_detail('stud'))
|
115
|
-
dprint(a.correction_detail('U'))
|
116
|
-
|
117
|
-
|
118
|
-
def demo_spellchecker():
|
119
|
-
"""演示如何使用spellchecker库
|
120
|
-
官方介绍文档 pyspellchecker · PyPI: https://pypi.org/project/pyspellchecker/
|
121
|
-
190909周一15:58,from 陈坤泽
|
122
|
-
"""
|
123
|
-
# 1 创建对象
|
124
|
-
# 可以设置语言、大小写敏感、拼写检查的最大距离
|
125
|
-
# 默认'en'英语,大小写不敏感
|
126
|
-
spell = SpellChecker()
|
127
|
-
# 如果是英语,SpellChecker会自动加载语言包site-packages\spellchecker\resources\en.json.gz,大概12万个词汇,包括词频权重
|
128
|
-
d = spell.word_frequency # 这里的d是WordFrequency对象,其底层用了Counter类进行数据存储
|
129
|
-
dprint(d.unique_words, d.total_words) # 词汇数,权重总和
|
130
|
-
|
131
|
-
# 2 修改词频表 spell.word_frequency
|
132
|
-
dprint(d['ckz']) # 不存在的词汇直接输出0
|
133
|
-
d.add('ckz') # 可以添加ckz词汇的一次词频
|
134
|
-
d.load_words(['ckz', 'ckz', 'lyb']) # 可以批量添加词汇
|
135
|
-
dprint(d['ckz'], d['lyb']) # d['ckz']=3 d['lyb']=1
|
136
|
-
d.load_words(['ckz'] * 100 + ['lyb'] * 500) # 可以用这种技巧进行大权重的添加
|
137
|
-
dprint(d['ckz'], d['lyb']) # d['ckz']=103 d['lyb']=501
|
138
|
-
|
139
|
-
# 同理,去除也有remove和remove_words两种方法
|
140
|
-
d.remove('ckz')
|
141
|
-
# d.remove_words(['ckz', 'lyb']) # 不过注意不能删除已经不存在的key('ckz'),否则会报KeyError
|
142
|
-
dprint(d['ckz'], d['lyb']) # d['ckz']=0 d['lyb']=501
|
143
|
-
# remove是完全去除单词,如果只是要减权重可以访问底层的_dictionary对象操作
|
144
|
-
d._dictionary['lyb'] -= 100 # 当然不太建议直接访问下划线开头的成员变量~~
|
145
|
-
dprint(d['lyb']) # ['lyb']=401
|
146
|
-
|
147
|
-
# 还可以按阈值删除词频不超过设置阈值的词汇
|
148
|
-
d.remove_by_threshold(5)
|
149
|
-
|
150
|
-
# 3 spell的基本功能
|
151
|
-
# (1)用unknown可以找到可能拼写错误的单词,再用correction可以获得最佳修改意见
|
152
|
-
misspelled = spell.unknown(['something', 'is', 'hapenning', 'here'])
|
153
|
-
dprint(misspelled) # misspelled<set>={'hapenning'}
|
154
|
-
|
155
|
-
for word in misspelled:
|
156
|
-
# Get the one `most likely` answer
|
157
|
-
dprint(spell.correction(word)) # <str>='happening'
|
158
|
-
# Get a list of `likely` options
|
159
|
-
dprint(spell.candidates(word)) # <set>={'henning', 'happening', 'penning'}
|
160
|
-
|
161
|
-
# 注意默认的spell不区分大小写,如果词库存储了100次'ckz'
|
162
|
-
# 此时判断任意大小写形式组合的'CKZ'都是返回原值
|
163
|
-
# 例如 spell.correction('ckZ') => 'ckZ'
|
164
|
-
|
165
|
-
# (2)可以通过修改spell.word_frequency影响correction的计算结果
|
166
|
-
dprint(d['henning'], d['happening'], d['penning'])
|
167
|
-
# d['henning']<int>=53 d['happening']<int>=4538 d['penning']<int>=23
|
168
|
-
d._dictionary['henning'] += 10000
|
169
|
-
dprint(spell.correction('hapenning')) # <str>='henning'
|
170
|
-
|
171
|
-
# (3)词汇在整个字典里占的权重
|
172
|
-
dprint(spell.word_probability('henning')) # <float>=0.0001040741914298211
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : 陈坤泽
|
4
|
+
# @Email : 877362867@qq.com
|
5
|
+
# @Date : 2021/06/06 17:00
|
6
|
+
|
7
|
+
from pyxllib.prog.pupil import check_install_package
|
8
|
+
|
9
|
+
# 拼写检查库,即词汇库
|
10
|
+
# spellchecker模块主要有两个类,SpellChecker和WordFrequency
|
11
|
+
# WordFrequency是一个词频类
|
12
|
+
# 一般导入SpellChecker就行了:from spellchecker import SpellChecker
|
13
|
+
check_install_package('pyspellchecker')
|
14
|
+
|
15
|
+
from spellchecker import SpellChecker
|
16
|
+
|
17
|
+
from pyxllib.prog.pupil import dprint
|
18
|
+
|
19
|
+
|
20
|
+
class MySpellChecker(SpellChecker):
|
21
|
+
"""
|
22
|
+
拼写检查
|
23
|
+
190923周一21:54,源自 完形填空ocr 识别项目
|
24
|
+
"""
|
25
|
+
|
26
|
+
def __init__(self, language="en", local_dictionary=None, distance=2, tokenizer=None, case_sensitive=False,
|
27
|
+
df=None):
|
28
|
+
from collections import defaultdict, Counter
|
29
|
+
|
30
|
+
# 1 原初始化功能
|
31
|
+
super(MySpellChecker, self).__init__(language=language, local_dictionary=local_dictionary,
|
32
|
+
distance=distance, tokenizer=tokenizer,
|
33
|
+
case_sensitive=case_sensitive)
|
34
|
+
|
35
|
+
# 2 自己要增加一个分析用的字典
|
36
|
+
self.checkdict = defaultdict(Counter)
|
37
|
+
for k, v in self.word_frequency._dictionary.items():
|
38
|
+
self.checkdict[k][k] = v
|
39
|
+
|
40
|
+
# 3 如果输入了一个df对象要进行更新
|
41
|
+
if df: self.update_by_dataframe(df)
|
42
|
+
|
43
|
+
def update_by_dataframe(self, df, weight_times=1):
|
44
|
+
"""
|
45
|
+
:param df: 这里的df有要求,是DataFrame对象,并且含有这些属性列:old、new、count
|
46
|
+
:param weight_times: 对要加的count乘以一个倍率
|
47
|
+
:return:
|
48
|
+
"""
|
49
|
+
# 1 是否要处理大小写
|
50
|
+
# 如果不区分大小写,需要对df先做预处理,全部转小写
|
51
|
+
# 而大小写不敏感的时候,self.word_frequency._dictionary在init时已经转小写,不用操心
|
52
|
+
if not self._case_sensitive:
|
53
|
+
df.loc[:, 'old'] = df.loc[:, 'old'].str.lower()
|
54
|
+
df.loc[:, 'new'] = df.loc[:, 'new'].str.lower()
|
55
|
+
|
56
|
+
# 2 df对self.word_frequency._dictionary、self.check的影响
|
57
|
+
d = self.word_frequency._dictionary
|
58
|
+
for index, row in df.iterrows():
|
59
|
+
old, new, count = row['old'].decode(), row['new'].decode(), row['count'] * weight_times
|
60
|
+
d[old] += count if old == new else -count
|
61
|
+
# if row['id']==300: dprint(old, new, count)
|
62
|
+
self.checkdict[old][new] += count
|
63
|
+
|
64
|
+
# 3 去除d中负值的key
|
65
|
+
self.word_frequency.remove_words([k for k in d.keys() if d[k] <= 0])
|
66
|
+
|
67
|
+
def _ensure_term(self, term):
|
68
|
+
if term not in self.checkdict:
|
69
|
+
d = {k: self.word_frequency._dictionary[k] for k in self.candidates(term)}
|
70
|
+
self.checkdict[term] = d
|
71
|
+
|
72
|
+
def correction(self, term):
|
73
|
+
# 1 本来就是正确的
|
74
|
+
w = term if self._case_sensitive else term.lower()
|
75
|
+
if w in self.word_frequency._dictionary: return term
|
76
|
+
|
77
|
+
# 2 如果是错的,且是没有记录的错误情况,则做一次候选项运算
|
78
|
+
self._ensure_term(w)
|
79
|
+
|
80
|
+
# 3 返回权重最大的结果
|
81
|
+
res = max(self.checkdict[w], key=self.checkdict[w].get)
|
82
|
+
val = self.checkdict[w].get(res)
|
83
|
+
if val <= 0: res = '^' + res # 是一个错误单词,但是没有推荐修改结果,就打一个^标记
|
84
|
+
return res
|
85
|
+
|
86
|
+
def correction_detail(self, term):
|
87
|
+
"""更加详细,给出所有候选项的纠正
|
88
|
+
|
89
|
+
>> a.correction_detail('d')
|
90
|
+
[('d', 9131), ('do', 1), ('old', 1)]
|
91
|
+
"""
|
92
|
+
w = term if self._case_sensitive else term.lower()
|
93
|
+
self._ensure_term(w)
|
94
|
+
ls = [(k, v) for k, v in self.checkdict[w].items()]
|
95
|
+
ls = sorted(ls, key=lambda x: x[1], reverse=True)
|
96
|
+
return ls
|
97
|
+
|
98
|
+
|
99
|
+
def demo_myspellchecker():
|
100
|
+
# 类的初始化大概要0.4秒
|
101
|
+
a = MySpellChecker()
|
102
|
+
|
103
|
+
# sql的加载更新大概要1秒
|
104
|
+
# hsql = HistudySQL('ckz', 'tr_develop')
|
105
|
+
# df = hsql.query('SELECT * FROM spell_check')
|
106
|
+
# a.update_by_dataframe(df)
|
107
|
+
|
108
|
+
# dprint(a.correction_detail('d'))
|
109
|
+
# dprint(a.correction_detail('wrod')) # wrod有很多种可能性,但word权重是最大的
|
110
|
+
# dprint(a.correction_detail('ckzckzckzckzckzckz')) # wrod有很多种可能性,但word权重是最大的
|
111
|
+
# dprint(a.correction('ckzckzckzckzckzckz')) # wrod有很多种可能性,但word权重是最大的
|
112
|
+
dprint(a.correction_detail('ike'))
|
113
|
+
dprint(a.correction_detail('dean'))
|
114
|
+
dprint(a.correction_detail('stud'))
|
115
|
+
dprint(a.correction_detail('U'))
|
116
|
+
|
117
|
+
|
118
|
+
def demo_spellchecker():
|
119
|
+
"""演示如何使用spellchecker库
|
120
|
+
官方介绍文档 pyspellchecker · PyPI: https://pypi.org/project/pyspellchecker/
|
121
|
+
190909周一15:58,from 陈坤泽
|
122
|
+
"""
|
123
|
+
# 1 创建对象
|
124
|
+
# 可以设置语言、大小写敏感、拼写检查的最大距离
|
125
|
+
# 默认'en'英语,大小写不敏感
|
126
|
+
spell = SpellChecker()
|
127
|
+
# 如果是英语,SpellChecker会自动加载语言包site-packages\spellchecker\resources\en.json.gz,大概12万个词汇,包括词频权重
|
128
|
+
d = spell.word_frequency # 这里的d是WordFrequency对象,其底层用了Counter类进行数据存储
|
129
|
+
dprint(d.unique_words, d.total_words) # 词汇数,权重总和
|
130
|
+
|
131
|
+
# 2 修改词频表 spell.word_frequency
|
132
|
+
dprint(d['ckz']) # 不存在的词汇直接输出0
|
133
|
+
d.add('ckz') # 可以添加ckz词汇的一次词频
|
134
|
+
d.load_words(['ckz', 'ckz', 'lyb']) # 可以批量添加词汇
|
135
|
+
dprint(d['ckz'], d['lyb']) # d['ckz']=3 d['lyb']=1
|
136
|
+
d.load_words(['ckz'] * 100 + ['lyb'] * 500) # 可以用这种技巧进行大权重的添加
|
137
|
+
dprint(d['ckz'], d['lyb']) # d['ckz']=103 d['lyb']=501
|
138
|
+
|
139
|
+
# 同理,去除也有remove和remove_words两种方法
|
140
|
+
d.remove('ckz')
|
141
|
+
# d.remove_words(['ckz', 'lyb']) # 不过注意不能删除已经不存在的key('ckz'),否则会报KeyError
|
142
|
+
dprint(d['ckz'], d['lyb']) # d['ckz']=0 d['lyb']=501
|
143
|
+
# remove是完全去除单词,如果只是要减权重可以访问底层的_dictionary对象操作
|
144
|
+
d._dictionary['lyb'] -= 100 # 当然不太建议直接访问下划线开头的成员变量~~
|
145
|
+
dprint(d['lyb']) # ['lyb']=401
|
146
|
+
|
147
|
+
# 还可以按阈值删除词频不超过设置阈值的词汇
|
148
|
+
d.remove_by_threshold(5)
|
149
|
+
|
150
|
+
# 3 spell的基本功能
|
151
|
+
# (1)用unknown可以找到可能拼写错误的单词,再用correction可以获得最佳修改意见
|
152
|
+
misspelled = spell.unknown(['something', 'is', 'hapenning', 'here'])
|
153
|
+
dprint(misspelled) # misspelled<set>={'hapenning'}
|
154
|
+
|
155
|
+
for word in misspelled:
|
156
|
+
# Get the one `most likely` answer
|
157
|
+
dprint(spell.correction(word)) # <str>='happening'
|
158
|
+
# Get a list of `likely` options
|
159
|
+
dprint(spell.candidates(word)) # <set>={'henning', 'happening', 'penning'}
|
160
|
+
|
161
|
+
# 注意默认的spell不区分大小写,如果词库存储了100次'ckz'
|
162
|
+
# 此时判断任意大小写形式组合的'CKZ'都是返回原值
|
163
|
+
# 例如 spell.correction('ckZ') => 'ckZ'
|
164
|
+
|
165
|
+
# (2)可以通过修改spell.word_frequency影响correction的计算结果
|
166
|
+
dprint(d['henning'], d['happening'], d['penning'])
|
167
|
+
# d['henning']<int>=53 d['happening']<int>=4538 d['penning']<int>=23
|
168
|
+
d._dictionary['henning'] += 10000
|
169
|
+
dprint(spell.correction('hapenning')) # <str>='henning'
|
170
|
+
|
171
|
+
# (3)词汇在整个字典里占的权重
|
172
|
+
dprint(spell.word_probability('henning')) # <float>=0.0001040741914298211
|
@@ -1,11 +1,11 @@
|
|
1
|
-
<!DOCTYPE html>
|
2
|
-
<html lang="en">
|
3
|
-
<head>
|
4
|
-
<meta charset="UTF-8">
|
5
|
-
<title>{{ title }}</title>
|
6
|
-
<script type="text/javascript" src="https://assets.pyecharts.org/assets/echarts.min.js"></script>
|
7
|
-
</head>
|
8
|
-
<body>
|
9
|
-
{{ body }}
|
10
|
-
</body>
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html lang="en">
|
3
|
+
<head>
|
4
|
+
<meta charset="UTF-8">
|
5
|
+
<title>{{ title }}</title>
|
6
|
+
<script type="text/javascript" src="https://assets.pyecharts.org/assets/echarts.min.js"></script>
|
7
|
+
</head>
|
8
|
+
<body>
|
9
|
+
{{ body }}
|
10
|
+
</body>
|
11
11
|
</html>
|
@@ -1,17 +1,17 @@
|
|
1
|
-
<!DOCTYPE html>
|
2
|
-
<html>
|
3
|
-
<head>
|
4
|
-
<title>{{ title }}</title>
|
5
|
-
<script src="https://cdn.jsdelivr.net/gh/google/code-prettify@master/loader/run_prettify.js"></script>
|
6
|
-
{% if use_mathjax %}
|
7
|
-
<script src="https://a.cdn.histudy.com/lib/config/mathjax_config-klxx.js?v=1.1"></script>
|
8
|
-
<script type="text/javascript" async="" src="https://a.cdn.histudy.com/lib/mathjax/2.7.1/MathJax/MathJax.js?config=TeX-AMS-MML_SVG">
|
9
|
-
MathJax.Hub.Config(MATHJAX_KLXX_CONFIG);
|
10
|
-
</script>
|
11
|
-
{% else %}
|
12
|
-
{% endif%}
|
13
|
-
</head>
|
14
|
-
<body>
|
15
|
-
{{ body }}
|
16
|
-
</body>
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<title>{{ title }}</title>
|
5
|
+
<script src="https://cdn.jsdelivr.net/gh/google/code-prettify@master/loader/run_prettify.js"></script>
|
6
|
+
{% if use_mathjax %}
|
7
|
+
<script src="https://a.cdn.histudy.com/lib/config/mathjax_config-klxx.js?v=1.1"></script>
|
8
|
+
<script type="text/javascript" async="" src="https://a.cdn.histudy.com/lib/mathjax/2.7.1/MathJax/MathJax.js?config=TeX-AMS-MML_SVG">
|
9
|
+
MathJax.Hub.Config(MATHJAX_KLXX_CONFIG);
|
10
|
+
</script>
|
11
|
+
{% else %}
|
12
|
+
{% endif%}
|
13
|
+
</head>
|
14
|
+
<body>
|
15
|
+
{{ body }}
|
16
|
+
</body>
|
17
17
|
</html>
|
@@ -1,103 +1,103 @@
|
|
1
|
-
<!DOCTYPE html>
|
2
|
-
<html>
|
3
|
-
<head>
|
4
|
-
<meta charset="UTF-8">
|
5
|
-
<style>
|
6
|
-
body {
|
7
|
-
background-color: #A4C0A7;
|
8
|
-
}
|
9
|
-
|
10
|
-
topic {
|
11
|
-
display: block;
|
12
|
-
border-style: solid;
|
13
|
-
border-color: gray;
|
14
|
-
padding: 9px;
|
15
|
-
margin: 3px;
|
16
|
-
zoom: 1.5
|
17
|
-
}
|
18
|
-
|
19
|
-
textarea {
|
20
|
-
display: block;
|
21
|
-
border-style: solid;
|
22
|
-
background-color: #A4C0A7;
|
23
|
-
margin: 3px;
|
24
|
-
width: 99%;
|
25
|
-
border-color: gray;
|
26
|
-
zoom: 2
|
27
|
-
}
|
28
|
-
</style>
|
29
|
-
<title>LaTeX公式草稿 </title>
|
30
|
-
<script type="text/javascript" src="https://tr.histudy.com/static/js/load-mathjax.js"></script>
|
31
|
-
</head>
|
32
|
-
<body id="oliga">
|
33
|
-
|
34
|
-
<textarea id="mID1" rows="9" autofocus="autofocus" >
|
35
|
-
{{ text }}
|
36
|
-
</textarea>
|
37
|
-
|
38
|
-
<button accesskey="`" onclick="rTeX('mID1')" >
|
39
|
-
按【Alt + `】刷新渲染
|
40
|
-
</button>
|
41
|
-
<button accesskey="A" onclick="混合渲染()" >
|
42
|
-
按【Alt + A】 或 【F1】 刷新渲染混杂模式
|
43
|
-
</button>
|
44
|
-
按 【F4】 开启/关闭时时渲染,按 【F5】 刷新重置
|
45
|
-
|
46
|
-
<topic id="mID1renderer">
|
47
|
-
</topic>
|
48
|
-
|
49
|
-
</body>
|
50
|
-
|
51
|
-
<script>
|
52
|
-
|
53
|
-
function 公式渲染(s){ return ' ' + MathJax.tex2svg(s.replace(/\$/g, '')).firstChild.outerHTML + ' '}
|
54
|
-
|
55
|
-
function tabular转html(s){
|
56
|
-
s = s.replace(/\\begin\{tabular\}.+?}/g, '<table border=1 >\n<tr><td>').replace(/(\\hline|\\cline\{.+?\})/g, ' ')
|
57
|
-
s = s.replace(/\\\\\s*\\end\{tabular\}/g, '\\end\{tabular\}')
|
58
|
-
s = s.replace(/\\end\{tabular\}/g, '</td></tr>\n</table>')
|
59
|
-
s = s.replace(/\\\\/g, '</td></tr>\n <tr><td>').replace(/&/g, '</td><td>')
|
60
|
-
return s
|
61
|
-
}
|
62
|
-
|
63
|
-
function 混合渲染(){
|
64
|
-
t = document.getElementById('mID1').value
|
65
|
-
// t = t.replace(/\$.+?\$/g, 公式渲染).replace(/\n\n+/g, '\n<p/>')
|
66
|
-
t = t.replace(/\$\n?([^\n\$]+\n?)+\$/g, 公式渲染).replace(/\n\n+/g, '\n<p/>')
|
67
|
-
t = t.replace(/\\begin\{tabular\}[\S\s]+?\\end\{tabular\}/gm, tabular转html)
|
68
|
-
t = t.replace(/\\ce\{.+?\}/g, 公式渲染) // 不严谨
|
69
|
-
document.getElementById("mID1renderer").innerHTML = t
|
70
|
-
document.getElementById('mID1').focus()
|
71
|
-
}
|
72
|
-
|
73
|
-
时时渲染 = false
|
74
|
-
function relax(){console.log('休息')}
|
75
|
-
document.onkeyup = 混合渲染
|
76
|
-
|
77
|
-
document.onkeydown = function hotkey(keyboardPressed) {
|
78
|
-
var theKeyPressed = window.event.keyCode;
|
79
|
-
if (theKeyPressed == 112){console.log('按了F1,一次渲染'); 混合渲染() ; keyboardPressed.preventDefault(); return false;}
|
80
|
-
else if (theKeyPressed == 115){console.log('按了F4,开启动态渲染');
|
81
|
-
if (时时渲染) {时时渲染 = false; document.onkeyup = relax } else {时时渲染 = true; document.onkeyup = 混合渲染; 混合渲染()} ;
|
82
|
-
keyboardPressed.preventDefault(); return false;}
|
83
|
-
}
|
84
|
-
|
85
|
-
|
86
|
-
function rTeX(MathID) {
|
87
|
-
if (document.getElementById(MathID).value.includes('$')) {混合渲染(); alert('不用手工加美元符,会自动加,\n\n你加了美元符我就认为是《混合》型【公式+文本】'); return false}
|
88
|
-
document.getElementById(MathID + "renderer").innerHTML = MathJax.tex2svg(document.getElementById(MathID).value).firstChild.outerHTML
|
89
|
-
<!--let mathTopic = document.getElementById("da");-->
|
90
|
-
<!--MathJax.Hub.queue(["Typeset", MathJax.Hub]);-->
|
91
|
-
<!--MathJax.Hub.Typeset(["da"]);-->
|
92
|
-
document.getElementById(MathID).focus()
|
93
|
-
console.log('可以考虑用前后兄弟节点:previousSibling、nextSibling,相对节点')
|
94
|
-
}
|
95
|
-
|
96
|
-
function enableMathType() {
|
97
|
-
document.getElementById("MathLoader").src="https://tr.histudy.com/static/js/load-mathjax.js";
|
98
|
-
}
|
99
|
-
</script>
|
100
|
-
|
101
|
-
<script type="text/javascript" ID="MathLoader"></script>
|
102
|
-
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<meta charset="UTF-8">
|
5
|
+
<style>
|
6
|
+
body {
|
7
|
+
background-color: #A4C0A7;
|
8
|
+
}
|
9
|
+
|
10
|
+
topic {
|
11
|
+
display: block;
|
12
|
+
border-style: solid;
|
13
|
+
border-color: gray;
|
14
|
+
padding: 9px;
|
15
|
+
margin: 3px;
|
16
|
+
zoom: 1.5
|
17
|
+
}
|
18
|
+
|
19
|
+
textarea {
|
20
|
+
display: block;
|
21
|
+
border-style: solid;
|
22
|
+
background-color: #A4C0A7;
|
23
|
+
margin: 3px;
|
24
|
+
width: 99%;
|
25
|
+
border-color: gray;
|
26
|
+
zoom: 2
|
27
|
+
}
|
28
|
+
</style>
|
29
|
+
<title>LaTeX公式草稿 </title>
|
30
|
+
<script type="text/javascript" src="https://tr.histudy.com/static/js/load-mathjax.js"></script>
|
31
|
+
</head>
|
32
|
+
<body id="oliga">
|
33
|
+
|
34
|
+
<textarea id="mID1" rows="9" autofocus="autofocus" >
|
35
|
+
{{ text }}
|
36
|
+
</textarea>
|
37
|
+
|
38
|
+
<button accesskey="`" onclick="rTeX('mID1')" >
|
39
|
+
按【Alt + `】刷新渲染
|
40
|
+
</button>
|
41
|
+
<button accesskey="A" onclick="混合渲染()" >
|
42
|
+
按【Alt + A】 或 【F1】 刷新渲染混杂模式
|
43
|
+
</button>
|
44
|
+
按 【F4】 开启/关闭时时渲染,按 【F5】 刷新重置
|
45
|
+
|
46
|
+
<topic id="mID1renderer">
|
47
|
+
</topic>
|
48
|
+
|
49
|
+
</body>
|
50
|
+
|
51
|
+
<script>
|
52
|
+
|
53
|
+
function 公式渲染(s){ return ' ' + MathJax.tex2svg(s.replace(/\$/g, '')).firstChild.outerHTML + ' '}
|
54
|
+
|
55
|
+
function tabular转html(s){
|
56
|
+
s = s.replace(/\\begin\{tabular\}.+?}/g, '<table border=1 >\n<tr><td>').replace(/(\\hline|\\cline\{.+?\})/g, ' ')
|
57
|
+
s = s.replace(/\\\\\s*\\end\{tabular\}/g, '\\end\{tabular\}')
|
58
|
+
s = s.replace(/\\end\{tabular\}/g, '</td></tr>\n</table>')
|
59
|
+
s = s.replace(/\\\\/g, '</td></tr>\n <tr><td>').replace(/&/g, '</td><td>')
|
60
|
+
return s
|
61
|
+
}
|
62
|
+
|
63
|
+
function 混合渲染(){
|
64
|
+
t = document.getElementById('mID1').value
|
65
|
+
// t = t.replace(/\$.+?\$/g, 公式渲染).replace(/\n\n+/g, '\n<p/>')
|
66
|
+
t = t.replace(/\$\n?([^\n\$]+\n?)+\$/g, 公式渲染).replace(/\n\n+/g, '\n<p/>')
|
67
|
+
t = t.replace(/\\begin\{tabular\}[\S\s]+?\\end\{tabular\}/gm, tabular转html)
|
68
|
+
t = t.replace(/\\ce\{.+?\}/g, 公式渲染) // 不严谨
|
69
|
+
document.getElementById("mID1renderer").innerHTML = t
|
70
|
+
document.getElementById('mID1').focus()
|
71
|
+
}
|
72
|
+
|
73
|
+
时时渲染 = false
|
74
|
+
function relax(){console.log('休息')}
|
75
|
+
document.onkeyup = 混合渲染
|
76
|
+
|
77
|
+
document.onkeydown = function hotkey(keyboardPressed) {
|
78
|
+
var theKeyPressed = window.event.keyCode;
|
79
|
+
if (theKeyPressed == 112){console.log('按了F1,一次渲染'); 混合渲染() ; keyboardPressed.preventDefault(); return false;}
|
80
|
+
else if (theKeyPressed == 115){console.log('按了F4,开启动态渲染');
|
81
|
+
if (时时渲染) {时时渲染 = false; document.onkeyup = relax } else {时时渲染 = true; document.onkeyup = 混合渲染; 混合渲染()} ;
|
82
|
+
keyboardPressed.preventDefault(); return false;}
|
83
|
+
}
|
84
|
+
|
85
|
+
|
86
|
+
function rTeX(MathID) {
|
87
|
+
if (document.getElementById(MathID).value.includes('$')) {混合渲染(); alert('不用手工加美元符,会自动加,\n\n你加了美元符我就认为是《混合》型【公式+文本】'); return false}
|
88
|
+
document.getElementById(MathID + "renderer").innerHTML = MathJax.tex2svg(document.getElementById(MathID).value).firstChild.outerHTML
|
89
|
+
<!--let mathTopic = document.getElementById("da");-->
|
90
|
+
<!--MathJax.Hub.queue(["Typeset", MathJax.Hub]);-->
|
91
|
+
<!--MathJax.Hub.Typeset(["da"]);-->
|
92
|
+
document.getElementById(MathID).focus()
|
93
|
+
console.log('可以考虑用前后兄弟节点:previousSibling、nextSibling,相对节点')
|
94
|
+
}
|
95
|
+
|
96
|
+
function enableMathType() {
|
97
|
+
document.getElementById("MathLoader").src="https://tr.histudy.com/static/js/load-mathjax.js";
|
98
|
+
}
|
99
|
+
</script>
|
100
|
+
|
101
|
+
<script type="text/javascript" ID="MathLoader"></script>
|
102
|
+
|
103
103
|
</html>
|
pyxllib/text/vbacode.py
CHANGED
@@ -1,17 +1,17 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
# @Author : 陈坤泽
|
4
|
-
# @Email : 877362867@qq.com
|
5
|
-
# @Date : 2023/10/20
|
6
|
-
|
7
|
-
import re
|
8
|
-
|
9
|
-
|
10
|
-
class VBACodeFixer:
|
11
|
-
@classmethod
|
12
|
-
def simplify_code(cls, code_text):
|
13
|
-
""" 代码简化,去掉一些冗余写法 """
|
14
|
-
code_text = re.sub(r'ActiveSheet\.(Range|Rows|Columns|Cells)', r'\1', code_text)
|
15
|
-
code_text = re.sub(r'(\w+)\.(Row|Column)\s*\+\s*\1\.\2s\.Count\s*-\s*1', r'\1.\2End', code_text)
|
16
|
-
|
17
|
-
return 1, code_text.strip()
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : 陈坤泽
|
4
|
+
# @Email : 877362867@qq.com
|
5
|
+
# @Date : 2023/10/20
|
6
|
+
|
7
|
+
import re
|
8
|
+
|
9
|
+
|
10
|
+
class VBACodeFixer:
|
11
|
+
@classmethod
|
12
|
+
def simplify_code(cls, code_text):
|
13
|
+
""" 代码简化,去掉一些冗余写法 """
|
14
|
+
code_text = re.sub(r'ActiveSheet\.(Range|Rows|Columns|Cells)', r'\1', code_text)
|
15
|
+
code_text = re.sub(r'(\w+)\.(Row|Column)\s*\+\s*\1\.\2s\.Count\s*-\s*1', r'\1.\2End', code_text)
|
16
|
+
|
17
|
+
return 1, code_text.strip()
|