pyxllib 0.0.43__py3-none-any.whl → 0.3.197__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyxllib/__init__.py +9 -2
- pyxllib/algo/__init__.py +8 -0
- pyxllib/algo/disjoint.py +54 -0
- pyxllib/algo/geo.py +541 -0
- pyxllib/{util/mathlib.py → algo/intervals.py} +172 -36
- pyxllib/algo/matcher.py +389 -0
- pyxllib/algo/newbie.py +166 -0
- pyxllib/algo/pupil.py +629 -0
- pyxllib/algo/shapelylib.py +67 -0
- pyxllib/algo/specialist.py +241 -0
- pyxllib/algo/stat.py +494 -0
- pyxllib/algo/treelib.py +149 -0
- pyxllib/algo/unitlib.py +66 -0
- pyxllib/autogui/__init__.py +5 -0
- pyxllib/autogui/activewin.py +246 -0
- pyxllib/autogui/all.py +9 -0
- pyxllib/autogui/autogui.py +852 -0
- pyxllib/autogui/uiautolib.py +362 -0
- pyxllib/autogui/virtualkey.py +102 -0
- pyxllib/autogui/wechat.py +827 -0
- pyxllib/autogui/wechat_msg.py +421 -0
- pyxllib/autogui/wxautolib.py +84 -0
- pyxllib/cv/__init__.py +1 -11
- pyxllib/cv/expert.py +267 -0
- pyxllib/cv/{imlib.py → imfile.py} +18 -83
- pyxllib/cv/imhash.py +39 -0
- pyxllib/cv/pupil.py +9 -0
- pyxllib/cv/rgbfmt.py +1525 -0
- pyxllib/cv/slidercaptcha.py +137 -0
- pyxllib/cv/trackbartools.py +163 -49
- pyxllib/cv/xlcvlib.py +1040 -0
- pyxllib/cv/xlpillib.py +423 -0
- pyxllib/data/__init__.py +0 -0
- pyxllib/data/echarts.py +240 -0
- pyxllib/data/jsonlib.py +89 -0
- pyxllib/{util/oss2_.py → data/oss.py} +11 -9
- pyxllib/data/pglib.py +1127 -0
- pyxllib/data/sqlite.py +568 -0
- pyxllib/{util → data}/sqllib.py +13 -31
- pyxllib/ext/JLineViewer.py +505 -0
- pyxllib/ext/__init__.py +6 -0
- pyxllib/{util → ext}/demolib.py +119 -35
- pyxllib/ext/drissionlib.py +277 -0
- pyxllib/ext/kq5034lib.py +12 -0
- pyxllib/{util/main.py → ext/old.py} +122 -284
- pyxllib/ext/qt.py +449 -0
- pyxllib/ext/robustprocfile.py +497 -0
- pyxllib/ext/seleniumlib.py +76 -0
- pyxllib/{util/tklib.py → ext/tk.py} +10 -11
- pyxllib/ext/unixlib.py +827 -0
- pyxllib/ext/utools.py +351 -0
- pyxllib/{util/webhooklib.py → ext/webhook.py} +45 -17
- pyxllib/ext/win32lib.py +40 -0
- pyxllib/ext/wjxlib.py +88 -0
- pyxllib/ext/wpsapi.py +124 -0
- pyxllib/ext/xlwork.py +9 -0
- pyxllib/ext/yuquelib.py +1105 -0
- pyxllib/file/__init__.py +17 -0
- pyxllib/file/docxlib.py +761 -0
- pyxllib/{util → file}/gitlib.py +40 -27
- pyxllib/file/libreoffice.py +165 -0
- pyxllib/file/movielib.py +148 -0
- pyxllib/file/newbie.py +10 -0
- pyxllib/file/onenotelib.py +1469 -0
- pyxllib/file/packlib/__init__.py +330 -0
- pyxllib/{util → file/packlib}/zipfile.py +598 -195
- pyxllib/file/pdflib.py +426 -0
- pyxllib/file/pupil.py +185 -0
- pyxllib/file/specialist/__init__.py +685 -0
- pyxllib/{basic/_5_dirlib.py → file/specialist/dirlib.py} +364 -93
- pyxllib/file/specialist/download.py +193 -0
- pyxllib/file/specialist/filelib.py +2829 -0
- pyxllib/file/xlsxlib.py +3131 -0
- pyxllib/file/xlsyncfile.py +341 -0
- pyxllib/prog/__init__.py +5 -0
- pyxllib/prog/cachetools.py +64 -0
- pyxllib/prog/deprecatedlib.py +233 -0
- pyxllib/prog/filelock.py +42 -0
- pyxllib/prog/ipyexec.py +253 -0
- pyxllib/prog/multiprogs.py +940 -0
- pyxllib/prog/newbie.py +451 -0
- pyxllib/prog/pupil.py +1197 -0
- pyxllib/{sitepackages.py → prog/sitepackages.py} +5 -3
- pyxllib/prog/specialist/__init__.py +391 -0
- pyxllib/prog/specialist/bc.py +203 -0
- pyxllib/prog/specialist/browser.py +497 -0
- pyxllib/prog/specialist/common.py +347 -0
- pyxllib/prog/specialist/datetime.py +199 -0
- pyxllib/prog/specialist/tictoc.py +240 -0
- pyxllib/prog/specialist/xllog.py +180 -0
- pyxllib/prog/xlosenv.py +108 -0
- pyxllib/stdlib/__init__.py +17 -0
- pyxllib/{util → stdlib}/tablepyxl/__init__.py +1 -3
- pyxllib/{util → stdlib}/tablepyxl/style.py +1 -1
- pyxllib/{util → stdlib}/tablepyxl/tablepyxl.py +2 -4
- pyxllib/text/__init__.py +8 -0
- pyxllib/text/ahocorasick.py +39 -0
- pyxllib/text/airscript.js +744 -0
- pyxllib/text/charclasslib.py +121 -0
- pyxllib/text/jiebalib.py +267 -0
- pyxllib/text/jinjalib.py +32 -0
- pyxllib/text/jsa_ai_prompt.md +271 -0
- pyxllib/text/jscode.py +922 -0
- pyxllib/text/latex/__init__.py +158 -0
- pyxllib/text/levenshtein.py +303 -0
- pyxllib/text/nestenv.py +1215 -0
- pyxllib/text/newbie.py +300 -0
- pyxllib/text/pupil/__init__.py +8 -0
- pyxllib/text/pupil/common.py +1121 -0
- pyxllib/text/pupil/xlalign.py +326 -0
- pyxllib/text/pycode.py +47 -0
- pyxllib/text/specialist/__init__.py +8 -0
- pyxllib/text/specialist/common.py +112 -0
- pyxllib/text/specialist/ptag.py +186 -0
- pyxllib/text/spellchecker.py +172 -0
- pyxllib/text/templates/echart_base.html +11 -0
- pyxllib/text/templates/highlight_code.html +17 -0
- pyxllib/text/templates/latex_editor.html +103 -0
- pyxllib/text/vbacode.py +17 -0
- pyxllib/text/xmllib.py +747 -0
- pyxllib/xl.py +39 -0
- pyxllib/xlcv.py +17 -0
- pyxllib-0.3.197.dist-info/METADATA +48 -0
- pyxllib-0.3.197.dist-info/RECORD +126 -0
- {pyxllib-0.0.43.dist-info → pyxllib-0.3.197.dist-info}/WHEEL +4 -5
- pyxllib/basic/_1_strlib.py +0 -945
- pyxllib/basic/_2_timelib.py +0 -488
- pyxllib/basic/_3_pathlib.py +0 -916
- pyxllib/basic/_4_loglib.py +0 -419
- pyxllib/basic/__init__.py +0 -54
- pyxllib/basic/arrow_.py +0 -250
- pyxllib/basic/chardet_.py +0 -66
- pyxllib/basic/dirlib.py +0 -529
- pyxllib/basic/dprint.py +0 -202
- pyxllib/basic/extension.py +0 -12
- pyxllib/basic/judge.py +0 -31
- pyxllib/basic/log.py +0 -204
- pyxllib/basic/pathlib_.py +0 -705
- pyxllib/basic/pytictoc.py +0 -102
- pyxllib/basic/qiniu_.py +0 -61
- pyxllib/basic/strlib.py +0 -761
- pyxllib/basic/timer.py +0 -132
- pyxllib/cv/cv.py +0 -834
- pyxllib/cv/cvlib/_1_geo.py +0 -543
- pyxllib/cv/cvlib/_2_cvprcs.py +0 -309
- pyxllib/cv/cvlib/_2_imgproc.py +0 -594
- pyxllib/cv/cvlib/_3_pilprcs.py +0 -80
- pyxllib/cv/cvlib/_4_cvimg.py +0 -211
- pyxllib/cv/cvlib/__init__.py +0 -10
- pyxllib/cv/debugtools.py +0 -82
- pyxllib/cv/fitz_.py +0 -300
- pyxllib/cv/installer.py +0 -42
- pyxllib/debug/_0_installer.py +0 -38
- pyxllib/debug/_1_typelib.py +0 -277
- pyxllib/debug/_2_chrome.py +0 -198
- pyxllib/debug/_3_showdir.py +0 -161
- pyxllib/debug/_4_bcompare.py +0 -140
- pyxllib/debug/__init__.py +0 -49
- pyxllib/debug/bcompare.py +0 -132
- pyxllib/debug/chrome.py +0 -198
- pyxllib/debug/installer.py +0 -38
- pyxllib/debug/showdir.py +0 -158
- pyxllib/debug/typelib.py +0 -278
- pyxllib/image/__init__.py +0 -12
- pyxllib/torch/__init__.py +0 -20
- pyxllib/torch/modellib.py +0 -37
- pyxllib/torch/trainlib.py +0 -344
- pyxllib/util/__init__.py +0 -20
- pyxllib/util/aip_.py +0 -141
- pyxllib/util/casiadb.py +0 -59
- pyxllib/util/excellib.py +0 -495
- pyxllib/util/filelib.py +0 -612
- pyxllib/util/jsondata.py +0 -27
- pyxllib/util/jsondata2.py +0 -92
- pyxllib/util/labelmelib.py +0 -139
- pyxllib/util/onepy/__init__.py +0 -29
- pyxllib/util/onepy/onepy.py +0 -574
- pyxllib/util/onepy/onmanager.py +0 -170
- pyxllib/util/pyautogui_.py +0 -219
- pyxllib/util/textlib.py +0 -1305
- pyxllib/util/unorder.py +0 -22
- pyxllib/util/xmllib.py +0 -639
- pyxllib-0.0.43.dist-info/METADATA +0 -39
- pyxllib-0.0.43.dist-info/RECORD +0 -80
- pyxllib-0.0.43.dist-info/top_level.txt +0 -1
- {pyxllib-0.0.43.dist-info → pyxllib-0.3.197.dist-info/licenses}/LICENSE +0 -0
pyxllib/text/jscode.py
ADDED
@@ -0,0 +1,922 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : 陈坤泽,梁奕本(js去注释部分)
|
4
|
+
# @Email : 877362867@qq.com, https://lyeebn.gitee.io/technology-shop/HeyBoss.html
|
5
|
+
# @Date : 2023/10/20
|
6
|
+
|
7
|
+
from collections import Counter
|
8
|
+
import re
|
9
|
+
import textwrap
|
10
|
+
import os
|
11
|
+
|
12
|
+
from jinja2 import Template
|
13
|
+
|
14
|
+
try:
|
15
|
+
import jsbeautifier
|
16
|
+
except ModuleNotFoundError:
|
17
|
+
pass
|
18
|
+
|
19
|
+
from pyxllib.file.specialist import XlPath
|
20
|
+
from pyxllib.prog.cachetools import xlcache
|
21
|
+
|
22
|
+
|
23
|
+
def __1_删注释功能():
|
24
|
+
"""
|
25
|
+
用编译语法解析方式分析,清理JS中的注释,支持嵌套
|
26
|
+
|
27
|
+
Usage:
|
28
|
+
1、A simple function
|
29
|
+
from pyxllib.text.jscode import dropJScomment
|
30
|
+
dropJScomment(jsSourceCodeAsString)
|
31
|
+
|
32
|
+
2、Object
|
33
|
+
from pyxllib.text.jscode import JSParser
|
34
|
+
js = JSParser(jsSourceCode)
|
35
|
+
jsc = js.clearComment()
|
36
|
+
"""
|
37
|
+
|
38
|
+
|
39
|
+
def 删注释周围留空(c, arr): # 应急
|
40
|
+
arr.reverse()
|
41
|
+
for i in arr:
|
42
|
+
# 先用这个快速处理,有空再优化: https://blog.csdn.net/cooco369/article/details/82994932
|
43
|
+
# c = c[:i].rstrip() + '%' + c[i:].lstrip() # debug,定位
|
44
|
+
有回车 = False
|
45
|
+
l = r = 上一回车处 = i
|
46
|
+
r = i + 1
|
47
|
+
len_c = len(c)
|
48
|
+
while len_c > l > 0:
|
49
|
+
l -= 1
|
50
|
+
ci = c[l]
|
51
|
+
if ci == '\n': # \r已统一为\n
|
52
|
+
有回车 = True
|
53
|
+
elif ci not in '\t \v': # TODO 中文空格行不行
|
54
|
+
break
|
55
|
+
while r < len_c:
|
56
|
+
ci = c[r]
|
57
|
+
if ci in '\n\r': # \r已统一为\n
|
58
|
+
有回车 = True
|
59
|
+
上一回车处 = r # 保持缩进,但有 Bug 灵异,难道是 ?
|
60
|
+
elif ci not in '\t \v': # TODO 中文空格行不行
|
61
|
+
break
|
62
|
+
r += 1
|
63
|
+
# print(r)
|
64
|
+
c = c[:l + 1] + ('\n' if 有回车 else '') + c[上一回车处:] # 有必要多留个空格吗
|
65
|
+
# 已知 BUG:连续注释后的缩进无法保持,但不影响 JS 代码逻辑就是了
|
66
|
+
return c
|
67
|
+
|
68
|
+
|
69
|
+
def 回溯区分正则除号(c):
|
70
|
+
# 此函数参数需要提前处理:高偶合,非内聚
|
71
|
+
# 原理:除号是二元运算符,其前面必有一个量:数值(可以是变量名或字符串字面量),这不太好穷举
|
72
|
+
# 而正则为一字面量,前面可能必为某种运算符 = + ,或特殊符号:& | 逻辑? 括号 ( , 参数,[ { 对象 : ,或 ; 语句结束符,回车。有个坑:折行要注意。
|
73
|
+
# 摆脱对正则的依赖,变成无第三无依赖
|
74
|
+
# True 为 正则, False 为除号
|
75
|
+
i = len(c)
|
76
|
+
while i > 0:
|
77
|
+
i -= 1
|
78
|
+
ci = c[i]
|
79
|
+
if ci in '\t \v':
|
80
|
+
continue # 暂时无法给出结论
|
81
|
+
elif ci in '\n\r': # 折行,暂时无法给出结论,其实可以与如上合并,区别可能是回车前可能是已省略的分号
|
82
|
+
# return 回溯区分正则除号(c, i=i) # 经验证,js 中 转义回车仅限于字符串内,运算符后是可折行的, \ 反而语法错误
|
83
|
+
continue # 经考虑,还是不必递归了
|
84
|
+
elif ci in '{[(=,;?:&|!*~%^': # todo ++ -- ,注:~ 为非预期操作,但因类型转换语法允许 https://developer.mozilla.org/zh-CN/docs/Web/JavaScript/Guide/Expressions_and_operators
|
85
|
+
return True # 正则
|
86
|
+
elif ci == '+': # todo ++ --
|
87
|
+
i -= 1
|
88
|
+
if i > 0 and c[i] == '+': return False # 除号 # ++: / 前 为变量 为除尘
|
89
|
+
return True # 正则
|
90
|
+
elif ci == '-': # 要么 变量--,要反报错
|
91
|
+
i -= 1
|
92
|
+
if i > 0 and c[i] == '-': return False # 除号 # ++: / 前 为变量 为除尘
|
93
|
+
return True # raise BaseException('/ 前单 -') # 实测因类型转换,两边转数值类型,最多是 NaN,而不至于报错
|
94
|
+
else:
|
95
|
+
return False # 除号,可能是变量,什么的
|
96
|
+
return True # 正则
|
97
|
+
|
98
|
+
|
99
|
+
class JSParser:
|
100
|
+
def 普通引号(self, 号='"'): # 号:开始结束定界符,下同
|
101
|
+
while self.indexPointer < self.jsCodeLength: # 正常会提前 return 见下
|
102
|
+
self.indexPointer += 1
|
103
|
+
si = self.jsSourceCode[self.indexPointer]
|
104
|
+
self.jsWithoutComment += si # 这应该是以下所有情况包括 else 都要的
|
105
|
+
if si == 号:
|
106
|
+
return
|
107
|
+
elif si == '\\': # 转义,提前吃掉, \n'"` 等
|
108
|
+
# 即 r += '\\'
|
109
|
+
self.indexPointer += 1
|
110
|
+
if self.jsSourceCode[
|
111
|
+
self.indexPointer + 1] == 号: # 超标报错正好,对应其语法错误,TODO 以后完善,实测 JS, 如果在引号中回车前没\反是其语法错误;就当源文件语法吧
|
112
|
+
pass
|
113
|
+
self.jsWithoutComment += self.jsSourceCode[self.indexPointer] # 转义后的字符好像都是要吃掉的,不用 if,待深入思考
|
114
|
+
elif si == '\n':
|
115
|
+
raise BaseException('原稿语法有误》缺右字符串定界:\a' + 号)
|
116
|
+
|
117
|
+
def 反引号(self, 号='`'):
|
118
|
+
while self.indexPointer < self.jsCodeLength: # 正常会提前 return 见下
|
119
|
+
self.indexPointer += 1
|
120
|
+
si = self.jsSourceCode[self.indexPointer]
|
121
|
+
self.jsWithoutComment += si # 这应该是以下所有情况包括 else 都要的
|
122
|
+
if si == 号:
|
123
|
+
return
|
124
|
+
elif si == '\\':
|
125
|
+
# 即 r += '\\'
|
126
|
+
self.indexPointer += 1
|
127
|
+
if self.jsSourceCode[
|
128
|
+
self.indexPointer + 1] == 号: # 超标报错正好,对应其语法错误,TODO 以后完善,实测 JS, 如果在引号中回车前没\反是其语法错误;就当源文件语法吧
|
129
|
+
pass
|
130
|
+
self.jsWithoutComment += self.jsSourceCode[self.indexPointer] # 转义后的字符好像都是要吃掉的,不用 if,待深入思考
|
131
|
+
elif si == '$' and self.jsSourceCode[
|
132
|
+
self.indexPointer + 1] == '{': # 重要区别 。经实验 ${ 后必有 } 否则报错:g = `2${2+3 7`
|
133
|
+
self.indexPointer += 1
|
134
|
+
self.jsWithoutComment += '{'
|
135
|
+
self.反引号嵌套表达式允许多行注释()
|
136
|
+
pass
|
137
|
+
|
138
|
+
def 反引号嵌套表达式允许多行注释(self): # 适合钻牛角尖, TODO 用 堆栈 结构练手? 这好像可以 递归 main 了,差个 } return 吧
|
139
|
+
while self.indexPointer < self.jsCodeLength: # 正常会提前 return 见下
|
140
|
+
self.indexPointer += 1
|
141
|
+
si = self.jsSourceCode[self.indexPointer]
|
142
|
+
if si == '}':
|
143
|
+
self.jsWithoutComment += si
|
144
|
+
return
|
145
|
+
elif si in '"\'': # 吃撑了的嵌套骚操作 si == '"' or si == "'"
|
146
|
+
self.jsWithoutComment += si
|
147
|
+
self.普通引号(号=si)
|
148
|
+
elif si == "`": # `,还能嵌套 String.raw` 算了,不玩了
|
149
|
+
self.jsWithoutComment += si
|
150
|
+
self.反引号()
|
151
|
+
elif si == '/': # 以下这一块逻辑 同 main ?
|
152
|
+
self.反斜线然后呢()
|
153
|
+
# if s[idx+1] == '*': # 注意这里,嵌套了注释!超标正好报错,有效的 JS 代码 这里不会超标
|
154
|
+
# idx += 1
|
155
|
+
# 多行注释()
|
156
|
+
# elif s[idx+1] == '/':
|
157
|
+
# idx += 1
|
158
|
+
# 单行注释()
|
159
|
+
# else:
|
160
|
+
# r += s[idx]
|
161
|
+
else:
|
162
|
+
self.jsWithoutComment += si
|
163
|
+
|
164
|
+
def 反斜线然后呢(self): # 共用于【main】与 【反引号嵌套表达式允许多行注释】中的表达式
|
165
|
+
偷看 = self.jsSourceCode[self.indexPointer + 1]
|
166
|
+
if 偷看 == '/':
|
167
|
+
self.单行注释()
|
168
|
+
self.jsWithoutComment += self.注释占位
|
169
|
+
elif 偷看 == '*':
|
170
|
+
self.jsWithoutComment += self.注释占位
|
171
|
+
self.indexPointer += 1
|
172
|
+
self.多行注释()
|
173
|
+
elif 回溯区分正则除号(
|
174
|
+
self.jsWithoutComment): # bool(re.search('[\n\r(\[\{=+,;&?|]([ \t]*|\\[\n\r])*[ \t]*$', self.jsWithoutComment)): # def 区分正则或除号(): ←
|
175
|
+
self.jsWithoutComment += '/'
|
176
|
+
self.正则()
|
177
|
+
else:
|
178
|
+
self.jsWithoutComment += '/'
|
179
|
+
# 除号不必特殊处理吧()
|
180
|
+
# TODO,重要【严重】有没有可能是除号:q = 1 / 2 + /2/ // 除号、正则 、注释并存
|
181
|
+
# 如果是除号,那之前应该有数值 \d),不好判断,考虑到 JS 有些隐性的类型转换,如:'6' / 3
|
182
|
+
# 如果是正则,往前回溯应该必有 = 或 + ,也可能在函数作为参数 (甚至可能还有变态 通过 \ 加回车,在正则前折行) re 如下
|
183
|
+
|
184
|
+
def 源反引号(self, 号='`'):
|
185
|
+
return self.反引号(号=号) # TODO 先借用,也就差个 \ ,其它有什么区别待考虑
|
186
|
+
pass
|
187
|
+
|
188
|
+
def 单行注释(self):
|
189
|
+
# while (s[idx] != '\n' or s[idx] != '\r') and idx < 长 :
|
190
|
+
while (self.jsSourceCode[self.indexPointer] not in '\n\r') and self.indexPointer < self.jsCodeLength:
|
191
|
+
self.indexPointer += 1
|
192
|
+
self.注释location.append(len(self.jsWithoutComment))
|
193
|
+
self.jsWithoutComment += '\n' # 补回车于删注释点后,不能反了
|
194
|
+
|
195
|
+
def 多行注释(self):
|
196
|
+
self.indexPointer += 1 # 要吗?加过没
|
197
|
+
while not (self.jsSourceCode[self.indexPointer] == '*' and self.jsSourceCode[
|
198
|
+
self.indexPointer + 1] == '/'): self.indexPointer += 1
|
199
|
+
self.indexPointer += 1
|
200
|
+
# r += '\n'
|
201
|
+
self.注释location.append(len(self.jsWithoutComment))
|
202
|
+
|
203
|
+
def 正则(self): # 正常的 JS 代码中 正则中无回车符
|
204
|
+
while True: # not (jsSourceCode[self.indexPointer + 1] == '/')
|
205
|
+
self.indexPointer += 1 # TODO 放哪里
|
206
|
+
si = self.jsSourceCode[self.indexPointer]
|
207
|
+
self.jsWithoutComment += si
|
208
|
+
if si == '/':
|
209
|
+
return
|
210
|
+
elif si in '\n\r':
|
211
|
+
raise BaseException('正则还能折行?你是哪个老师教的')
|
212
|
+
elif self.jsSourceCode[self.indexPointer] == '\\':
|
213
|
+
self.indexPointer += 1 # TODO 放哪里
|
214
|
+
self.jsWithoutComment += self.jsSourceCode[self.indexPointer]
|
215
|
+
|
216
|
+
def __init__(self, jsSourceCode):
|
217
|
+
# 就传源码吧,文件打开让用户自己去做,不然还得判断是文件名还是字符串,还得判断文件是否存在,还得依赖 os 包
|
218
|
+
self.jsSourceCode = jsSourceCode.replace('\r', '\n') + '\n ' # 防超标,为啥 .strip() 会异常
|
219
|
+
self.jsCodeLength = len(jsSourceCode)
|
220
|
+
self.indexPointer = -1 # 游标指针
|
221
|
+
self.注释占位 = '' # 生僻占位?以便后期删其前 \s
|
222
|
+
self.注释location = [] # 记录当时 len(self.jsWithoutComment),改天写吧
|
223
|
+
self.jsWithoutComment = False # init 先执行,所以不能执行调此类的其它函数,全写到一个这个函数里也麻烦,缩进了两级,如果要用方法可能重复执行,故用缓存法,
|
224
|
+
|
225
|
+
def clearComment(self): # 难道这个函数要在放类外吗(以便 init 能调用)
|
226
|
+
if self.jsWithoutComment: return self.jsWithoutComment # 已经求值过就用缓存,避免如下代码重复执行。这里一个变量两用:初始 bool 类型 False ,之后存清注释的str结果,类型改变,如果要让 GPT 改为 C++ 可能要注意一下,除了这里,别的地文都没有动态类型
|
227
|
+
# 仅第一次计算,
|
228
|
+
self.jsWithoutComment = ''
|
229
|
+
while self.indexPointer < self.jsCodeLength: # 要注意在哪里加 1 ,统一在处理开头处加吧,让当前处理的指标与相应字符初始一致,特殊情况再特殊加
|
230
|
+
self.indexPointer += 1
|
231
|
+
si = self.jsSourceCode[self.indexPointer]
|
232
|
+
if si == '/': # 正则、单/多行注释、除号,idea:注释前可能有多余的 \s,删注释时,可以留个 unicode 记号,到时用正则删
|
233
|
+
self.反斜线然后呢()
|
234
|
+
continue # 其后的情况都要 r += si, TODO 合并
|
235
|
+
elif si in '"\'': # ' " →1 si == '"' or si == "'"
|
236
|
+
self.jsWithoutComment += si
|
237
|
+
self.普通引号(号=si)
|
238
|
+
elif si == "`": # `
|
239
|
+
self.jsWithoutComment += si
|
240
|
+
self.反引号()
|
241
|
+
elif si == "S" and self.jsCodeLength - self.indexPointer > 11 and self.jsSourceCode[
|
242
|
+
self.indexPointer:self.indexPointer + 11] == 'String.raw`': # String.raw` 元字符串
|
243
|
+
self.jsWithoutComment += si
|
244
|
+
self.indexPointer += 10;
|
245
|
+
self.jsWithoutComment += 'tring.raw`' # 分两步,以便上面那个能和其它情景合并
|
246
|
+
self.源反引号()
|
247
|
+
elif si == "\n": # 压缩连续回车(空行)寻找一回车位置,这可能出现 BUG 吗?慎重,危险,不能出现于字符串,注释等中
|
248
|
+
tmp = self.indexPointer
|
249
|
+
while tmp < self.jsCodeLength:
|
250
|
+
tmp += 1
|
251
|
+
if self.jsSourceCode[tmp] == '\n':
|
252
|
+
self.indexPointer = tmp
|
253
|
+
elif self.jsSourceCode[tmp] in '\t ':
|
254
|
+
break
|
255
|
+
else:
|
256
|
+
break
|
257
|
+
self.jsWithoutComment += si
|
258
|
+
else:
|
259
|
+
self.jsWithoutComment += si
|
260
|
+
# 格式化字符串好像没什么特殊的
|
261
|
+
pass
|
262
|
+
self.jsWithoutComment = 删注释周围留空(self.jsWithoutComment, self.注释location).strip()
|
263
|
+
# print(self.注释location)
|
264
|
+
return self.jsWithoutComment
|
265
|
+
|
266
|
+
|
267
|
+
def remove_js_comments(jsSourceCode): # 对外接口,将本来用得两行代码封装为一行
|
268
|
+
js = JSParser(jsSourceCode)
|
269
|
+
return js.clearComment()
|
270
|
+
|
271
|
+
|
272
|
+
def __2_类js的as处理功能():
|
273
|
+
"""
|
274
|
+
|
275
|
+
250306周四15:23,airscript_head应该是旧版的jsa工具代码。
|
276
|
+
而get_airscript_head2应该是获得新版jsa工具代码的函数接口。
|
277
|
+
|
278
|
+
:return:
|
279
|
+
"""
|
280
|
+
pass
|
281
|
+
|
282
|
+
|
283
|
+
airscript_head = r"""
|
284
|
+
// 0 基础组件代码(可以放在功能代码之前,也能放在最后面)
|
285
|
+
|
286
|
+
// 根据提供的 pattern 在 range 中寻找 cell
|
287
|
+
// 如果没有提供 range,默认在 ActiveSheet.UsedRange 中寻找
|
288
|
+
function findCell(pattern, range = ActiveSheet.UsedRange) {
|
289
|
+
const cell = range.Find(pattern, range, xlValues, xlWhole)
|
290
|
+
return cell
|
291
|
+
}
|
292
|
+
|
293
|
+
function levenshteinDistance(a, b) {
|
294
|
+
const matrix = [];
|
295
|
+
|
296
|
+
let i;
|
297
|
+
for (i = 0; i <= b.length; i++) {
|
298
|
+
matrix[i] = [i];
|
299
|
+
}
|
300
|
+
|
301
|
+
let j;
|
302
|
+
for (j = 0; j <= a.length; j++) {
|
303
|
+
matrix[0][j] = j;
|
304
|
+
}
|
305
|
+
|
306
|
+
for (i = 1; i <= b.length; i++) {
|
307
|
+
for (j = 1; j <= a.length; j++) {
|
308
|
+
if (b.charAt(i - 1) === a.charAt(j - 1)) {
|
309
|
+
matrix[i][j] = matrix[i - 1][j - 1];
|
310
|
+
} else {
|
311
|
+
matrix[i][j] = Math.min(matrix[i - 1][j - 1] + 1, Math.min(matrix[i][j - 1] + 1, matrix[i - 1][j] + 1));
|
312
|
+
}
|
313
|
+
}
|
314
|
+
}
|
315
|
+
|
316
|
+
return matrix[b.length][a.length];
|
317
|
+
}
|
318
|
+
|
319
|
+
// 根据提供的 pattern 在 range 中寻找 column
|
320
|
+
// 如果没有提供 range,默认在 ActiveSheet.UsedRange 中寻找
|
321
|
+
function findColumn(pattern, range = ActiveSheet.UsedRange) {
|
322
|
+
let cell = findCell(pattern, range); // 首先尝试精确匹配
|
323
|
+
if (!cell) { // 如果精确匹配失败,尝试模糊匹配
|
324
|
+
let minDistance = Infinity;
|
325
|
+
let minDistanceColumn;
|
326
|
+
for (let i = 1; i <= range.Columns.Count; i++) {
|
327
|
+
let columnName = range.Cells(1, i).Value;
|
328
|
+
let distance = levenshteinDistance(pattern, columnName);
|
329
|
+
if (distance < minDistance) {
|
330
|
+
minDistance = distance;
|
331
|
+
minDistanceColumn = i;
|
332
|
+
}
|
333
|
+
}
|
334
|
+
return minDistanceColumn;
|
335
|
+
}
|
336
|
+
if (cell) { return cell.Column }
|
337
|
+
}
|
338
|
+
|
339
|
+
// 根据提供的 pattern 在 range 中寻找 row
|
340
|
+
// 如果没有提供 range,默认在 ActiveSheet.UsedRange 中寻找
|
341
|
+
function findRow(pattern, range = ActiveSheet.UsedRange) {
|
342
|
+
const cell = findCell(pattern, range)
|
343
|
+
if (cell) { return cell.Row }
|
344
|
+
}
|
345
|
+
|
346
|
+
// 判断一个 cells 集合是否为空
|
347
|
+
function isEmpty(cells) {
|
348
|
+
for (let i = 1; i <= cells.Count; i++) {
|
349
|
+
if (cells.Item(i).Text) {
|
350
|
+
return false;
|
351
|
+
}
|
352
|
+
}
|
353
|
+
return true;
|
354
|
+
}
|
355
|
+
|
356
|
+
// 获取实际使用的区域
|
357
|
+
function getUsedRange(maxRows = 500, maxColumns = 100, startFromA1 = true) {
|
358
|
+
/* 允许通过"表格上下文"信息,调整这里数据行的上限500行,或者列上限100列
|
359
|
+
注意,如果分析预设的表格数据在这个限定参数内可以不改
|
360
|
+
只有表格未知,或者明确数据量超过设置时,需要重新调整这里的参数
|
361
|
+
调整的时候千万不要故意凑的刚刚好,可以设置一定的冗余区间
|
362
|
+
比如数据说有4101条,那么这里阈值设置为5000也是可以的,比较保险。
|
363
|
+
*/
|
364
|
+
|
365
|
+
// 默认获得的区间,有可能是有冗余的空行,所以还要进一步优化
|
366
|
+
let usedRange = ActiveSheet.UsedRange;
|
367
|
+
|
368
|
+
let lastRow = Math.min(usedRange.Rows.Count, maxRows);
|
369
|
+
let lastColumn = Math.min(usedRange.Columns.Count, maxColumns);
|
370
|
+
|
371
|
+
let firstRow = 1;
|
372
|
+
let firstColumn = 1;
|
373
|
+
|
374
|
+
// 找到最后一个非空行
|
375
|
+
for (; lastRow >= firstRow; lastRow--) {
|
376
|
+
if (!isEmpty(usedRange.Rows(lastRow).Cells)) {
|
377
|
+
break;
|
378
|
+
}
|
379
|
+
}
|
380
|
+
|
381
|
+
// 找到最后一个非空列
|
382
|
+
for (; lastColumn >= firstColumn; lastColumn--) {
|
383
|
+
if (!isEmpty(usedRange.Columns(lastColumn).Cells)) {
|
384
|
+
break;
|
385
|
+
}
|
386
|
+
}
|
387
|
+
|
388
|
+
// 如果表格不是从"A1"开始,找到第一个非空行和非空列
|
389
|
+
if (!startFromA1) {
|
390
|
+
for (; firstRow <= lastRow; firstRow++) {
|
391
|
+
if (!isEmpty(usedRange.Rows(firstRow).Cells)) {
|
392
|
+
break;
|
393
|
+
}
|
394
|
+
}
|
395
|
+
|
396
|
+
for (; firstColumn <= lastColumn; firstColumn++) {
|
397
|
+
if (!isEmpty(usedRange.Columns(firstColumn).Cells)) {
|
398
|
+
break;
|
399
|
+
}
|
400
|
+
}
|
401
|
+
}
|
402
|
+
|
403
|
+
// 创建一个新的 Range 对象,它只包含非空的行和列
|
404
|
+
let newUsedRange = ActiveSheet.Range(
|
405
|
+
usedRange.Cells(firstRow, firstColumn),
|
406
|
+
usedRange.Cells(lastRow, lastColumn)
|
407
|
+
);
|
408
|
+
|
409
|
+
return newUsedRange; // 返回新的实际数据区域
|
410
|
+
}
|
411
|
+
|
412
|
+
// 将 Excel 日期转换为 JavaScript 日期
|
413
|
+
function xlDateToJSDate(xlDate) {
|
414
|
+
return new Date((xlDate - 25569) * 24 * 3600 * 1000);
|
415
|
+
}
|
416
|
+
|
417
|
+
// 判断日期是否在本周
|
418
|
+
function isCurrentWeek(date) {
|
419
|
+
const today = new Date();
|
420
|
+
today.setHours(0, 0, 0, 0); // 把时间设为午夜以准确地比较日期
|
421
|
+
const firstDayOfWeek = new Date(today.setDate(today.getDate() - today.getDay()));
|
422
|
+
const lastDayOfWeek = new Date(today.setDate(today.getDate() - today.getDay() + 6));
|
423
|
+
return date >= firstDayOfWeek && date <= lastDayOfWeek;
|
424
|
+
}
|
425
|
+
|
426
|
+
// 判断日期是否在当前月份
|
427
|
+
function isCurrentMonth(date) {
|
428
|
+
const currentDate = new Date();
|
429
|
+
currentDate.setHours(0, 0, 0, 0); // 把时间设为午夜stdcode以准确地比较日期
|
430
|
+
return date.getMonth() === currentDate.getMonth() && date.getFullYear() === currentDate.getFullYear();
|
431
|
+
}
|
432
|
+
|
433
|
+
// 判断日期是否在下周
|
434
|
+
function isNextWeek(date) {
|
435
|
+
const today = new Date();
|
436
|
+
today.setHours(0, 0, 0, 0); // 把时间设为午夜以准确地比较日期
|
437
|
+
const nextWeek = new Date(today.getFullYear(), today.getMonth(), today.getDate() + 7);
|
438
|
+
return date > today && date <= nextWeek;
|
439
|
+
}
|
440
|
+
|
441
|
+
// 判断日期是否在下个月
|
442
|
+
function isNextMonth(date) {
|
443
|
+
const today = new Date();
|
444
|
+
today.setHours(0, 0, 0, 0); // 把时间设为午夜以准确地比较日期
|
445
|
+
const nextMonth = new Date(today.getFullYear(), today.getMonth() + 1, 1);
|
446
|
+
const endDateOfNextMonth = new Date(today.getFullYear(), today.getMonth() + 2, 0);
|
447
|
+
return date >= nextMonth && date <= endDateOfNextMonth;
|
448
|
+
}
|
449
|
+
""".strip()
|
450
|
+
|
451
|
+
|
452
|
+
@xlcache()
|
453
|
+
def get_airscript_head2(definitions=False):
|
454
|
+
""" 原始airscript.js并不是能直接全部运行的代码,里面有些占位变量要替换掉
|
455
|
+
|
456
|
+
:param bool definitions:
|
457
|
+
False: 正常填充变量后,返回全部代码
|
458
|
+
True: 填充后,拆分成一个个函数定义的字典返回
|
459
|
+
:return:
|
460
|
+
"""
|
461
|
+
s = (XlPath(__file__).parent / 'airscript.js').read_text().strip()
|
462
|
+
vars = {
|
463
|
+
'JSA_POST_HOST_URL': os.getenv('JSA_POST_HOST_URL'),
|
464
|
+
'JSA_POST_TOKEN': os.getenv('JSA_POST_TOKEN'),
|
465
|
+
'JSA_POST_DEFAULT_HOST': os.getenv('JSA_POST_DEFAULT_HOST'),
|
466
|
+
}
|
467
|
+
content = Template(s).render(vars)
|
468
|
+
if not definitions:
|
469
|
+
return content
|
470
|
+
return extract_definitions_with_comments(content + '\n')
|
471
|
+
|
472
|
+
|
473
|
+
class AirScriptCodeFixer:
|
474
|
+
@classmethod
|
475
|
+
def fix_colors(cls, code_text):
|
476
|
+
# 1 一些错误的颜色设置方法
|
477
|
+
if re.search(r'(?<!\.)\b(Color.\w+)\b', code_text):
|
478
|
+
return 0, code_text
|
479
|
+
|
480
|
+
# 2 不能像vba那样,直接对颜色设置一个数值
|
481
|
+
match = re.search(r'\.Color\s*=\s*(\d+)', code_text)
|
482
|
+
if match:
|
483
|
+
color_number = int(match.group(1))
|
484
|
+
red = color_number % 256
|
485
|
+
green = (color_number // 256) % 256
|
486
|
+
blue = (color_number // 256 // 256) % 256
|
487
|
+
rgb_format = f'RGB({red}, {green}, {blue})'
|
488
|
+
code_text = code_text[:match.start(1)] + rgb_format + code_text[match.end(1):]
|
489
|
+
|
490
|
+
# 3 一些错误的颜色设置方法,进行修正
|
491
|
+
configs = {
|
492
|
+
'红色': 'RGB(255, 0, 0)',
|
493
|
+
'黄色': 'RGB(255, 255, 0)',
|
494
|
+
'绿色': 'RGB(0, 255, 0)',
|
495
|
+
'蓝色': 'RGB(0, 0, 255)',
|
496
|
+
'灰色': 'RGB(128, 128, 128)',
|
497
|
+
'red': 'RGB(255, 0, 0)',
|
498
|
+
'yellow': 'RGB(255, 255, 0)',
|
499
|
+
'green': 'RGB(0, 255, 0)',
|
500
|
+
'blue': 'RGB(0, 0, 255)',
|
501
|
+
'black': 'RGB(0, 0, 0)',
|
502
|
+
'gray': 'RGB(128, 128, 128)',
|
503
|
+
'grey': 'RGB(128, 128, 128)',
|
504
|
+
'purple': 'RGB(128, 0, 128)',
|
505
|
+
'pink': 'RGB(255, 192, 203)',
|
506
|
+
'orange': 'RGB(255, 128, 0)',
|
507
|
+
}
|
508
|
+
|
509
|
+
def replace_color_fmt(m):
|
510
|
+
t1, t2 = m.groups()
|
511
|
+
t2 = t2.strip('"\'').lower()
|
512
|
+
if t2 in configs:
|
513
|
+
return f'{t1}{configs[t2]}'
|
514
|
+
elif m2 := re.search(r'[a-fA-F0-9]{6}', t2):
|
515
|
+
res = f'{t1}RGB({int(m2.group(0)[:2], 16)}, ' \
|
516
|
+
f'{int(m2.group(0)[2:4], 16)}, ' \
|
517
|
+
f'{int(m2.group(0)[4:], 16)})'
|
518
|
+
return res
|
519
|
+
return t1 + m.group(2)
|
520
|
+
|
521
|
+
text = re.sub(r'''(\bColor\s*=\s*)(['"].+?['"])''', replace_color_fmt, code_text)
|
522
|
+
|
523
|
+
# 4 经过优化仍无法修正的颜色问题
|
524
|
+
if re.search(r'''\bColor\s*=\s*['"]''', text):
|
525
|
+
# global count_target
|
526
|
+
# ms = re.findall(r'''\bColor\s*=\s*(['"].+)''', text)
|
527
|
+
# for m in ms:
|
528
|
+
# count_target[m] += 1
|
529
|
+
return 0, text
|
530
|
+
|
531
|
+
return 1, text
|
532
|
+
|
533
|
+
@classmethod
|
534
|
+
def fix_miscellaneous(cls, code_text):
|
535
|
+
""" 修复其他各种杂项问题 """
|
536
|
+
text = code_text
|
537
|
+
|
538
|
+
# Cannot convert a Symbol value to a string, 一般是对Excel对象使用'+='运算报错
|
539
|
+
text = re.sub(r'(\s+)((?:.+)Value2?)\s+(?:\+=)\s+(.+)', r'\1\2 = \2 + \3', text) # 531条
|
540
|
+
|
541
|
+
# 各种错误的接口调用形式
|
542
|
+
text = text.replace('.Range.Find(', '.Find(') # 8条
|
543
|
+
|
544
|
+
# sort接口问题
|
545
|
+
text = re.sub(r'(\.Sort\(.*?,\s+)(-1|0|false)\)', r'\g<1>2)', text) # 328条
|
546
|
+
|
547
|
+
# 做数据有效性的时候,有时候会有重复的引号嵌套
|
548
|
+
text = re.sub(r'''(Formula\d:\s*')"(.+?)"''', r'\1\2', text)
|
549
|
+
|
550
|
+
# 230907周四19:56,枚举值不用放在字符串中
|
551
|
+
text = re.sub(r'''(['"`])(xlCellTypeVisible)\1''', r'\2', text)
|
552
|
+
|
553
|
+
# 231106周一18:42,range的使用规范性
|
554
|
+
text = re.sub(r'Range\(("|\')([A-Z]+|\d+)("|\')\)', r'Range(\1\2:\2\1)', text)
|
555
|
+
|
556
|
+
return 1, text
|
557
|
+
|
558
|
+
@classmethod
|
559
|
+
def delete_error_record(cls, code_text):
|
560
|
+
return 1, code_text
|
561
|
+
|
562
|
+
@classmethod
|
563
|
+
def check_assistant_content(cls, code_text):
|
564
|
+
text = code_text
|
565
|
+
|
566
|
+
global count_target
|
567
|
+
pieces = re.findall(r'[a-zA-Z_\d\.]+\.Columns', text)
|
568
|
+
count_target += Counter([x.strip() for x in pieces])
|
569
|
+
|
570
|
+
# Columns前一般用ActiveSheet就行了
|
571
|
+
|
572
|
+
return 1, text
|
573
|
+
|
574
|
+
@classmethod
|
575
|
+
def simplify_advtools(cls, code_text):
|
576
|
+
""" 移除高级工具函数代码,用其他更简洁的方式取代 """
|
577
|
+
text = code_text
|
578
|
+
text = text.replace('getUsedRange()', 'ActiveSheet.UsedRange')
|
579
|
+
text = re.sub(r'''findCell\(((['"]).+?\2)(, [a-zA-Z]+)?\)''',
|
580
|
+
r'ActiveSheet.UsedRange.Find(\1)', text)
|
581
|
+
text = re.sub(r'''findColumn\(((['"]).+?\2)(, [a-zA-Z]+)?\)''',
|
582
|
+
r'ActiveSheet.UsedRange.Find(\1).Column', text)
|
583
|
+
text = re.sub(r'''findRow\(((['"]).+?\2(, [a-zA-Z]+)?)\)''',
|
584
|
+
r'ActiveSheet.UsedRange.Find(\1).Row', text)
|
585
|
+
|
586
|
+
return 1, text
|
587
|
+
|
588
|
+
@classmethod
|
589
|
+
def simplify_code(cls, code_text, indent=4):
|
590
|
+
""" 代码简化,去掉一些冗余写法
|
591
|
+
|
592
|
+
包括代码美化,默认缩进是4,但在训练阶段,建议默认缩进是2,
|
593
|
+
"""
|
594
|
+
# 1 代码精简
|
595
|
+
code_text = re.sub(r'Application\.(WorksheetFunction|ActiveWorkbook|ActiveSheet|Sheets|Range|Workbook)', r'\1',
|
596
|
+
code_text)
|
597
|
+
code_text = re.sub(r'Workbook\.(Sheets)', r'\1', code_text)
|
598
|
+
code_text = re.sub(r'ActiveSheet\.(Range|Rows|Columns|Cells)', r'\1', code_text)
|
599
|
+
code_text = re.sub(r'(\w+)\.(Row|Column)\s*\+\s*\1\.\2s\.Count\s*-\s*1', r'\1.\2End', code_text)
|
600
|
+
code_text = re.sub(r'\bvar\b', 'let', code_text)
|
601
|
+
code_text = code_text.replace('Sheets.Item(', 'Sheets(')
|
602
|
+
code_text = re.sub(r'Application.Enum.\w+.(\w+)', r'\1', code_text)
|
603
|
+
|
604
|
+
# 2 代码美化
|
605
|
+
opts = jsbeautifier.default_options()
|
606
|
+
opts.indent_size = indent
|
607
|
+
code_text = jsbeautifier.beautify(code_text, opts)
|
608
|
+
|
609
|
+
return 1, code_text.strip()
|
610
|
+
|
611
|
+
@classmethod
|
612
|
+
def simplify_code2(cls, code_text, indent=4):
|
613
|
+
""" 有些规则可能在标注数据中想留着,但训练的时候想删除,则可以调用这个进一步级别的简化 """
|
614
|
+
_, code_text = cls.simplify_code(code_text, indent)
|
615
|
+
return code_text
|
616
|
+
|
617
|
+
@classmethod
|
618
|
+
def fix_stdcode(cls, code_text):
|
619
|
+
""" 更智能的,缺什么组件才补什么组件 """
|
620
|
+
# 1 检查依赖补充
|
621
|
+
text = code_text
|
622
|
+
_, text = cls.simplify_advtools(text)
|
623
|
+
|
624
|
+
defined_vars = set(re.findall(r'(?:<=^|\b)(?:var|let|const|function)\s+(\w+)(?:\s+|\()', text))
|
625
|
+
used_vars = set(re.findall(r'(?<!\.)\b(\w+)\b', text))
|
626
|
+
|
627
|
+
# 2 提取js中的函数
|
628
|
+
def extract_functions(code_string):
|
629
|
+
pattern = r"(function\s+(\w+).+?^\})"
|
630
|
+
matches = re.findall(pattern, code_string, re.MULTILINE | re.DOTALL)
|
631
|
+
return {name: func for func, name in matches}
|
632
|
+
|
633
|
+
js_funcs = extract_functions(airscript_head)
|
634
|
+
|
635
|
+
# 3 补充缺失的定义
|
636
|
+
pre_additional_code = []
|
637
|
+
for name, code in {'xlDateToJSDate': '',
|
638
|
+
'isCurrentWeek': '',
|
639
|
+
'isCurrentMonth': '',
|
640
|
+
'isNextWeek': '',
|
641
|
+
'isNextMonth': '',
|
642
|
+
'usedRange': 'const usedRange = ActiveSheet.UsedRange;',
|
643
|
+
'headerRows': 'const headerRows = usedRange.Rows("1:1");',
|
644
|
+
'firstDataRow': 'const firstDataRow = headerRows.RowEnd + 1;',
|
645
|
+
'lastDataRow': 'const lastRow = usedRange.RowEnd;',
|
646
|
+
}.items():
|
647
|
+
if name in used_vars and name not in defined_vars:
|
648
|
+
if name in js_funcs:
|
649
|
+
code = js_funcs[name]
|
650
|
+
if code:
|
651
|
+
pre_additional_code.append(code)
|
652
|
+
used_vars.remove(name)
|
653
|
+
else: # 有未定义就使用的变量,这条数据不要了
|
654
|
+
return 0, text
|
655
|
+
else:
|
656
|
+
# 还得再检查一波是不是有叫'xxxColumn'的变量未定义被使用
|
657
|
+
logo = True
|
658
|
+
for name in used_vars:
|
659
|
+
if name.endswith('Column') and name not in defined_vars:
|
660
|
+
logo = False
|
661
|
+
break
|
662
|
+
if logo and pre_additional_code:
|
663
|
+
text = '\n'.join(pre_additional_code) + '\n' + text
|
664
|
+
return 1, text
|
665
|
+
|
666
|
+
@classmethod
|
667
|
+
def pre_proc(cls, code_text):
|
668
|
+
code_text = re.sub(r'^\\n', '', code_text, flags=re.MULTILINE)
|
669
|
+
return 1, code_text
|
670
|
+
|
671
|
+
@classmethod
|
672
|
+
def fix_loc_head(cls, code_text):
|
673
|
+
""" 修复定位头 """
|
674
|
+
m1 = re.search(r'//\s*1([\.\s]+)定位', code_text)
|
675
|
+
m2 = re.search(r'//\s*2([\.\s]+)业务功能', code_text)
|
676
|
+
if not m1 and m2:
|
677
|
+
code_text = '// 1' + m2.group(1) + '定位\n' + code_text
|
678
|
+
return 1, code_text
|
679
|
+
|
680
|
+
@classmethod
|
681
|
+
def remove_stdcode(cls, code_text):
|
682
|
+
""" 删除开头固定的组件头代码 """
|
683
|
+
code_text = re.sub(r'(.*?)(//\s*1[\.\s]+定位)', r'\2', code_text, flags=re.DOTALL)
|
684
|
+
code_text = re.sub(r'// 0 基础组件代码(可以放在功能代码之前,也能放在最后面).+?$', '', code_text,
|
685
|
+
flags=re.DOTALL)
|
686
|
+
return 1, code_text
|
687
|
+
|
688
|
+
@classmethod
|
689
|
+
def fix_texts(cls, code_text):
|
690
|
+
""" 修复文本中出现的关键词,描述 """
|
691
|
+
s = code_text
|
692
|
+
s = s.replace('<表格结构信息描述>', '表格摘要')
|
693
|
+
s = s.replace('<孩子:表格摘要>', '表格摘要')
|
694
|
+
return 1, s
|
695
|
+
|
696
|
+
@classmethod
|
697
|
+
def fix_base(cls, code_text):
|
698
|
+
text = code_text
|
699
|
+
for func in [
|
700
|
+
cls.simplify_code,
|
701
|
+
cls.fix_colors,
|
702
|
+
cls.fix_miscellaneous,
|
703
|
+
cls.advanced_remove_comments_regex,
|
704
|
+
]:
|
705
|
+
status, text = func(text)
|
706
|
+
if not status:
|
707
|
+
return status, text
|
708
|
+
return status, text
|
709
|
+
|
710
|
+
@classmethod
|
711
|
+
def fix_base2(cls, code_text):
|
712
|
+
text = code_text
|
713
|
+
for func in [
|
714
|
+
cls.simplify_code,
|
715
|
+
cls.fix_colors,
|
716
|
+
cls.fix_miscellaneous,
|
717
|
+
]:
|
718
|
+
status, text = func(text)
|
719
|
+
if not status:
|
720
|
+
return status, text
|
721
|
+
return status, text
|
722
|
+
|
723
|
+
@classmethod
|
724
|
+
def fix_all(cls, code_text):
|
725
|
+
old_text = code_text
|
726
|
+
text = code_text
|
727
|
+
for func in [
|
728
|
+
cls.simplify_code,
|
729
|
+
cls.fix_colors,
|
730
|
+
cls.fix_miscellaneous,
|
731
|
+
cls.fix_stdcode,
|
732
|
+
# cls.advanced_remove_comments_regex,
|
733
|
+
]:
|
734
|
+
status, text = func(text)
|
735
|
+
if not status:
|
736
|
+
return status, text
|
737
|
+
# if text != old_text:
|
738
|
+
# bcompare(old_text, text)
|
739
|
+
# dprint()
|
740
|
+
return status, text
|
741
|
+
|
742
|
+
@classmethod
|
743
|
+
def format_hanging_indent(cls, text):
|
744
|
+
r""" 优化悬挂缩进的文本排版
|
745
|
+
|
746
|
+
:param str text: 输入文本
|
747
|
+
:return str: 优化后的文本
|
748
|
+
|
749
|
+
>>> AirScriptCodeFixer.format_hanging_indent('const usedRange = getUsedRange();\\n const headerRows = usedRange.Rows(\'1:1\');')
|
750
|
+
'const usedRange = getUsedRange();\\nconst headerRows = usedRange.Rows(\'1:1\');'
|
751
|
+
"""
|
752
|
+
lines = text.strip().split('\n') # 去掉前后空行并分割成行
|
753
|
+
first_line = lines.pop(0) # 取出第1行
|
754
|
+
remaining_text = '\n'.join(lines) # 剩余行合并为一个字符串
|
755
|
+
dedented_text = textwrap.dedent(remaining_text) # 对剩余行进行反缩进处理
|
756
|
+
|
757
|
+
return 1, first_line + '\n' + dedented_text # 将处理后的剩余行和第1行拼接回去
|
758
|
+
|
759
|
+
@classmethod
|
760
|
+
def remove_comments_regex(cls, js_code):
|
761
|
+
""" 这个代码功能并不严谨,只是一个临时快速方案 """
|
762
|
+
js_code = re.sub(r'^\s*/\*.*?\*/\n?', '', js_code, flags=re.DOTALL | re.MULTILINE)
|
763
|
+
js_code = re.sub(r'^\s*//.*\n?', '', js_code, flags=re.MULTILINE)
|
764
|
+
|
765
|
+
# Removing multi-line comments
|
766
|
+
js_code = re.sub(r'\s*/\*.*?\*/', '', js_code, flags=re.DOTALL)
|
767
|
+
# Removing single-line comments
|
768
|
+
js_code = re.sub(r'\s*//.*', '', js_code)
|
769
|
+
return js_code
|
770
|
+
|
771
|
+
@classmethod
|
772
|
+
def advanced_remove_comments_regex(cls, js_code):
|
773
|
+
# Regex to match strings, either single or double quoted
|
774
|
+
string_pattern = r'(?:"[^"\\]*(?:\\.[^"\\]*)*"|\'[^\'\\]*(?:\\.[^\'\\]*)*\')'
|
775
|
+
|
776
|
+
# Combined regex pattern to match strings or single/multi-line comments
|
777
|
+
pattern = r'|'.join([
|
778
|
+
string_pattern, # match strings first to avoid removing content inside them
|
779
|
+
r'\/\/[^\n]*', # single line comments
|
780
|
+
r'\/\*.*?\*\/' # multi-line comments
|
781
|
+
])
|
782
|
+
|
783
|
+
def replacer(match):
|
784
|
+
# If the matched text is a string, return it unchanged
|
785
|
+
if match.group(0).startswith(('"', "'")):
|
786
|
+
return match.group(0)
|
787
|
+
# Otherwise, it's a comment, so return an empty string
|
788
|
+
return ''
|
789
|
+
|
790
|
+
# Use re.sub with the replacer function
|
791
|
+
return 1, re.sub(pattern, replacer, js_code, flags=re.DOTALL)
|
792
|
+
|
793
|
+
@classmethod
|
794
|
+
def remove_js_comment(cls, js_code):
|
795
|
+
try:
|
796
|
+
js_code2 = remove_js_comments(js_code)
|
797
|
+
except BaseException as e:
|
798
|
+
js_code2 = cls.remove_comments_regex(js_code)
|
799
|
+
return 1, js_code2
|
800
|
+
|
801
|
+
|
802
|
+
def __3_js代码结构解析():
|
803
|
+
pass
|
804
|
+
|
805
|
+
|
806
|
+
def extract_definitions_with_comments(js_code):
|
807
|
+
""" 找出、切分每段函数的定义(包含函数开头的注释)
|
808
|
+
|
809
|
+
这里是用正则实现的版本,强制要求函数结束的时候用的是单行}结尾
|
810
|
+
如果实现中间内容也会出现这种单行}结尾,可以想写特殊手段规避开
|
811
|
+
"""
|
812
|
+
pattern = r"""
|
813
|
+
# 第1组:前缀注释
|
814
|
+
( (?:(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)|(?://[^\n]*))\s* )*
|
815
|
+
# 第2组:声明部分
|
816
|
+
( \b(?:var|let|const|function|class)\b\s+ )
|
817
|
+
# 第3组:变量名或函数名
|
818
|
+
(\w+).*?
|
819
|
+
# 第4组:不以{结尾的行,或者已{结尾的行且后续有配对的}
|
820
|
+
( (?:[^\{]\n) | (?:\{[\s\S]+?(?<=\n\}\n)) )
|
821
|
+
"""
|
822
|
+
matches = re.finditer(pattern, js_code, flags=re.VERBOSE)
|
823
|
+
|
824
|
+
definitions = {}
|
825
|
+
for match in matches:
|
826
|
+
identifier = match.group(3).strip() # 根据正则表达式的修改,更新捕获组的索引
|
827
|
+
full_definition = match.group(0).strip()
|
828
|
+
definitions[identifier] = full_definition
|
829
|
+
return definitions
|
830
|
+
|
831
|
+
|
832
|
+
def find_identifiers_in_code(code):
|
833
|
+
""" 正则实现的找标识符的版本
|
834
|
+
|
835
|
+
用基于esprima的语法树实现的方式,遇到不是那么标准的代码的时候,太多问题和局限了
|
836
|
+
还会多此一举过滤掉注释部分等
|
837
|
+
"""
|
838
|
+
return set(re.findall(r'\b(\w+)\b', code))
|
839
|
+
|
840
|
+
|
841
|
+
def find_direct_dependencies(definitions):
|
842
|
+
"""
|
843
|
+
查找每个定义中的直接依赖关系。
|
844
|
+
使用 esprima 提取代码中的标识符,并与定义列表求交集。
|
845
|
+
|
846
|
+
:param definitions: 要输入一组数据是因为只检查这一组内的命名空间的东西
|
847
|
+
"""
|
848
|
+
keys = set(definitions.keys())
|
849
|
+
dependencies = {key: [] for key in definitions}
|
850
|
+
|
851
|
+
for key, code in definitions.items():
|
852
|
+
identifiers = find_identifiers_in_code(code)
|
853
|
+
direct_deps = identifiers.intersection(keys)
|
854
|
+
dependencies[key] = list(direct_deps - {key}) # 排除自身
|
855
|
+
|
856
|
+
return dependencies
|
857
|
+
|
858
|
+
|
859
|
+
def assemble_dependencies_from_jstools(cur_code, jstools=None, place_tail=False, old_jsa=False):
|
860
|
+
"""
|
861
|
+
根据输入的 cur_code ,从预设的jstools工具代码库中自动提取所有相关依赖定义
|
862
|
+
|
863
|
+
:param str cur_node: 当前代码
|
864
|
+
:param str jstools: 依赖的工具代码
|
865
|
+
:param bool place_tail: 把工具代码放在末尾
|
866
|
+
放在末尾的目的,是类似jsa那样的场景能在开头直接看到关键的业务代码逻辑
|
867
|
+
|
868
|
+
一般大部分工具函数都是可以放在末尾的
|
869
|
+
但是要注意也有个别特殊的实现,是以定义变量的模式来使用的,则不能放倒末尾
|
870
|
+
|
871
|
+
"""
|
872
|
+
# 1 获得工具代码
|
873
|
+
# wps场景支持全局return处理,但这个在编译器里会报错,可以先暴力删掉,不影响我这里的相关处理逻辑
|
874
|
+
identifiers_in_input = find_identifiers_in_code(cur_code)
|
875
|
+
|
876
|
+
if jstools is None:
|
877
|
+
definitions = get_airscript_head2(True)
|
878
|
+
else:
|
879
|
+
definitions = extract_definitions_with_comments(jstools)
|
880
|
+
if old_jsa: # 如果使用的是旧版的jsa1.0,需要做个转换处理
|
881
|
+
# definitions字典里会有类似 as1_func 这样的key,还会有对应的 func 这样的key
|
882
|
+
# 将原本func的删掉,然后把as1_func替换成func的定义
|
883
|
+
# 注意字典替换后,value里的代码函数名等也要改掉
|
884
|
+
for key in list(definitions.keys()):
|
885
|
+
if key.startswith('as1_'):
|
886
|
+
definitions[key[4:]] = re.sub(r'(function\s+)as1_', r'\1', definitions[key])
|
887
|
+
|
888
|
+
# 2 找到所有使用到的符号
|
889
|
+
# 初始化结果列表,并按照 definitions 的顺序存储
|
890
|
+
visited = set()
|
891
|
+
dependencies = find_direct_dependencies(definitions)
|
892
|
+
|
893
|
+
def resolve_dependencies(identifier):
|
894
|
+
"""递归解决依赖,确保按照 definitions 的顺序添加"""
|
895
|
+
if identifier in visited:
|
896
|
+
return
|
897
|
+
visited.add(identifier)
|
898
|
+
for dep in dependencies[identifier]:
|
899
|
+
resolve_dependencies(dep)
|
900
|
+
|
901
|
+
# 从输入代码的标识符开始,递归查找依赖
|
902
|
+
for identifier in set(definitions.keys()).intersection(identifiers_in_input):
|
903
|
+
resolve_dependencies(identifier)
|
904
|
+
|
905
|
+
# 3 拼接代码
|
906
|
+
required_code = [definitions[identifier] for identifier in definitions if identifier in visited]
|
907
|
+
if place_tail:
|
908
|
+
# required_code.insert(0, '\n\n// 以下是工具代码')
|
909
|
+
required_code.insert(0, '\n\nfunction __x_工具代码() {\n}')
|
910
|
+
required_code.insert(0, cur_code)
|
911
|
+
else:
|
912
|
+
required_code.append(cur_code)
|
913
|
+
|
914
|
+
return "\n\n".join(required_code)
|
915
|
+
|
916
|
+
|
917
|
+
if __name__ == '__main__':
|
918
|
+
# 1 检查正则匹配从代码提取的结构化字典
|
919
|
+
# d = get_airscript_head2(True)
|
920
|
+
|
921
|
+
# 2 检查使用runIsolatedPyScript函数时提取依赖项的具体效果
|
922
|
+
print(assemble_dependencies_from_jstools('runIsolatedPyScript'))
|