b28-cli 1.6.6 → 1.6.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.MD +16 -9
- package/dist/ExtractFileOrigin.js +96 -6
- package/dist/cmd.js +251 -158
- package/dist/debug/test.js +28 -11
- package/dist/extract/vue/html-parser.js +118 -24
- package/dist/extractOrigin/extract.js +48 -103
- package/dist/extractOrigin/extract_html_ori.js +54 -32
- package/dist/extractOrigin/extract_js_ori.js +32 -387
- package/dist/handle.js +3 -1
- package/dist/index.js +0 -0
- package/dist/util/config.js +11 -1
- package/dist/util/index.js +17 -2
- package/index.js +0 -0
- package/package.json +5 -2
- package/test.js +182 -0
package/dist/debug/test.js
CHANGED
|
@@ -212,8 +212,22 @@ function transAllFile() {
|
|
|
212
212
|
});
|
|
213
213
|
}
|
|
214
214
|
|
|
215
|
+
function transOrigin() {
|
|
216
|
+
(0, _index2.default)({
|
|
217
|
+
commandType: _config.COMMAD.ORIGINAL_CODE,
|
|
218
|
+
baseProPath: "D:/Desktop/trans/test/web",
|
|
219
|
+
baseProOutPath: "D:/Desktop/trans/test",
|
|
220
|
+
enPath: "D:/Desktop/trans/test/server/multilang_en_EN.c",
|
|
221
|
+
otherLangPath: "D:/Desktop/trans/test/server/multilang_ru_RU.c,D:/Desktop/trans/test/server/multilang_zh_CN.c"
|
|
222
|
+
// ignoreCode:"",
|
|
223
|
+
// templateExp: /<%([^\n]*?)%>/g
|
|
224
|
+
}).then(function (data) {
|
|
225
|
+
var t = data;
|
|
226
|
+
});
|
|
227
|
+
}
|
|
228
|
+
|
|
215
229
|
module.exports = function () {
|
|
216
|
-
var command = "
|
|
230
|
+
var command = "origin";
|
|
217
231
|
switch (command) {
|
|
218
232
|
case "check":
|
|
219
233
|
check();
|
|
@@ -255,19 +269,19 @@ module.exports = function () {
|
|
|
255
269
|
// // onlyZH: true,
|
|
256
270
|
// // baseReadPath: "C:/Users/lenovo/Desktop/src",
|
|
257
271
|
// // baseOutPath: "C:/Users/lenovo/Desktop"
|
|
258
|
-
//
|
|
259
|
-
//
|
|
260
|
-
// baseReadPath: "D:/project/GNEUI/SourceCodes/Trunk/GNEUIv1.0/A27",
|
|
261
|
-
// baseOutPath: "C:/Users/lenovo/Desktop"
|
|
272
|
+
// baseReadPath: "./test/vue/debug/file",
|
|
273
|
+
// baseOutPath: "./test/vue/debug"
|
|
274
|
+
// // baseReadPath: "D:/project/GNEUI/SourceCodes/Trunk/GNEUIv1.0/A27",
|
|
275
|
+
// // baseOutPath: "C:/Users/lenovo/Desktop"
|
|
262
276
|
// });
|
|
263
277
|
(0, _index2.default)({
|
|
264
278
|
commandType: _config.COMMAD.TRANSLATE,
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
baseTranslatePath: "C:/Users/lenovo/Desktop/src",
|
|
269
|
-
baseTransOutPath: "C:/Users/lenovo/Desktop/out",
|
|
270
|
-
languagePath: "C:/Users/lenovo/Desktop/lang.xlsx",
|
|
279
|
+
baseTranslatePath: "./test/vue/debug/file",
|
|
280
|
+
baseTransOutPath: "./test/vue/debug/out",
|
|
281
|
+
languagePath: "./test/vue/debug/lang.xlsx",
|
|
282
|
+
// baseTranslatePath: "C:/Users/lenovo/Desktop/src",
|
|
283
|
+
// baseTransOutPath: "C:/Users/lenovo/Desktop/out",
|
|
284
|
+
// languagePath: "C:/Users/lenovo/Desktop/lang.xlsx",
|
|
271
285
|
hongPath: "",
|
|
272
286
|
sheetName: "",
|
|
273
287
|
// keyName: "CN",
|
|
@@ -279,5 +293,8 @@ module.exports = function () {
|
|
|
279
293
|
case "getWord":
|
|
280
294
|
getWords();
|
|
281
295
|
break;
|
|
296
|
+
case "origin":
|
|
297
|
+
transOrigin();
|
|
298
|
+
break;
|
|
282
299
|
}
|
|
283
300
|
};
|
|
@@ -118,6 +118,7 @@ function parseText(text, delimiters // 纯文本插入分隔符。默认为["{{"
|
|
|
118
118
|
function parseExp(tocken) {
|
|
119
119
|
var offset = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
120
120
|
var needMerge = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false;
|
|
121
|
+
var isAbsolute = arguments.length > 3 && arguments[3] !== undefined ? arguments[3] : false;
|
|
121
122
|
|
|
122
123
|
var ast = void 0,
|
|
123
124
|
start = tocken.start + offset;
|
|
@@ -128,7 +129,12 @@ function parseExp(tocken) {
|
|
|
128
129
|
plugins: ["objectRestSpread"]
|
|
129
130
|
});
|
|
130
131
|
} catch (err) {
|
|
131
|
-
|
|
132
|
+
// 处理解析报错/未能解析的语法
|
|
133
|
+
return {
|
|
134
|
+
value: tocken.value,
|
|
135
|
+
start: tocken.start + offset,
|
|
136
|
+
isAbsolute: isAbsolute
|
|
137
|
+
};
|
|
132
138
|
}
|
|
133
139
|
|
|
134
140
|
ast = ast.program;
|
|
@@ -243,6 +249,72 @@ function parseExp(tocken) {
|
|
|
243
249
|
}
|
|
244
250
|
}
|
|
245
251
|
|
|
252
|
+
function filter(keys, key, index) {
|
|
253
|
+
return keys.some(function (v) {
|
|
254
|
+
if (v === key || key.lastIndexOf(v) !== -1 && key.lastIndexOf(v) === index - v.length) return true;
|
|
255
|
+
});
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
function getTockensFromStr(content, offset) {
|
|
259
|
+
var regqutoe = new RegExp(/((["'])(?:\\.|[^\\\n])*?\2)/g); //获取""和''内的内容包括引号
|
|
260
|
+
var matchKeyWord = ["_("];
|
|
261
|
+
var maxBackLen = 25; //定义最长回溯长度,一般js里的关键字长度不会超过25
|
|
262
|
+
|
|
263
|
+
var ret = [];
|
|
264
|
+
|
|
265
|
+
//对于已注释的代码不进行提取操作
|
|
266
|
+
//去除 /* xxx */ 或者// 注释的代码段
|
|
267
|
+
// content = content.replace(
|
|
268
|
+
// /((\/\*)((.|\s)*?)(\*\/))|((\/\/)((?:\\.|[^\\\n])*?)(\n))/g,
|
|
269
|
+
// ""
|
|
270
|
+
// );
|
|
271
|
+
//end
|
|
272
|
+
|
|
273
|
+
content.replace(regqutoe, function (matchString) {
|
|
274
|
+
var tocken = {
|
|
275
|
+
start: arguments[3] + offset,
|
|
276
|
+
end: arguments[3] + offset + matchString.length,
|
|
277
|
+
value: matchString,
|
|
278
|
+
isTrans: false,
|
|
279
|
+
type: "string"
|
|
280
|
+
};
|
|
281
|
+
|
|
282
|
+
if (arguments[3] >= 2 && content.slice(arguments[3] - 2, arguments[3]) === "_(") {
|
|
283
|
+
tocken.isTrans = true;
|
|
284
|
+
tocken.start += 1;
|
|
285
|
+
tocken.end -= 1;
|
|
286
|
+
tocken.type = "";
|
|
287
|
+
}
|
|
288
|
+
matchString = matchString.slice(1, matchString.length - 1);
|
|
289
|
+
if (matchString.trim().length > 0) {
|
|
290
|
+
if (/[\u4e00-\u9fa5]/.test(matchString)) {
|
|
291
|
+
//是否含有中文
|
|
292
|
+
if (/_\(['"].*?['"][\),]/.test(matchString)) {
|
|
293
|
+
//说明是在引号中间包裹的_("")
|
|
294
|
+
//分为带参数,不带参数
|
|
295
|
+
//双引号 单引号的四种情况
|
|
296
|
+
matchString = matchString.split("_('").pop().split("_(\"").pop().split("',")[0].split("\",")[0].split("\")")[0].split("')")[0];
|
|
297
|
+
}
|
|
298
|
+
tocken.value = matchString;
|
|
299
|
+
ret.push(tocken);
|
|
300
|
+
} else {
|
|
301
|
+
if (matchString.trim().length > 1 && /[a-z]/i.test(matchString)) {
|
|
302
|
+
var backLength = arguments[3] >= maxBackLen ? maxBackLen : arguments[3]; //计算回溯长度,一般js里的关键字长度不会超过25
|
|
303
|
+
|
|
304
|
+
var backStr = content.substr(arguments[3] - backLength, backLength);
|
|
305
|
+
if (filter(matchKeyWord, backStr, backLength) || matchString.indexOf(" ") > -1 && !/^[#\.\=\+\-\*\/]/.test(matchString.trim())) {
|
|
306
|
+
tocken.value = matchString;
|
|
307
|
+
//回溯查找
|
|
308
|
+
ret.push(tocken);
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
});
|
|
314
|
+
|
|
315
|
+
return ret;
|
|
316
|
+
}
|
|
317
|
+
|
|
246
318
|
/**
|
|
247
319
|
* 解析parseExp的结果,即{{}}模板表达式内的ast,提取词条或者合并词条
|
|
248
320
|
* 如果文本中存在翻译函数,则直接处理翻译函数,其它词条文本会被处理,但是不会进行任何的参数合并操作,只处理词条
|
|
@@ -252,8 +324,17 @@ function listModuleTockens(tockens, text) {
|
|
|
252
324
|
args: [],
|
|
253
325
|
text: "",
|
|
254
326
|
isTrans: false,
|
|
255
|
-
trans: []
|
|
327
|
+
trans: [],
|
|
328
|
+
stringTrans: false
|
|
256
329
|
};
|
|
330
|
+
// 处理未能解析的词条
|
|
331
|
+
if (Object.prototype.toString.call(tockens) === "[object Object]") {
|
|
332
|
+
outData.stringTrans = true;
|
|
333
|
+
// 解析当前字符串,找出是否有需要处理的词条
|
|
334
|
+
outData.trans = getTockensFromStr(text, tockens.isAbsolute ? tockens.start : 0);
|
|
335
|
+
return outData;
|
|
336
|
+
}
|
|
337
|
+
|
|
257
338
|
for (var i = 0, l = tockens.length; i < l; i++) {
|
|
258
339
|
var tocken = tockens[i];
|
|
259
340
|
|
|
@@ -514,16 +595,18 @@ function parseTemplate(template, target) {
|
|
|
514
595
|
|
|
515
596
|
parseHTML(template, {
|
|
516
597
|
start: function start(attrs) {
|
|
598
|
+
var _this = this;
|
|
599
|
+
|
|
517
600
|
var word = "";
|
|
518
601
|
attrs.forEach(function (attr) {
|
|
519
602
|
// 指令默认已添加翻译函数,未添加翻译函数代表不提取
|
|
520
603
|
if (attr.directive) {
|
|
521
604
|
// value的偏移量,计算value的ast时,start index需要偏移到value的起始位置
|
|
522
605
|
var _offset = template.slice(attr.start, attr.end).indexOf(attr.value);
|
|
523
|
-
var tockens = parseExp(attr, _offset);
|
|
606
|
+
var tockens = parseExp(attr, _offset, false, true);
|
|
524
607
|
var outData = listModuleTockens(tockens, attr.value);
|
|
525
608
|
// 对于v-bind指令,只有当指令内容包含翻译函数时才会进行处理,其它情况不进行处理
|
|
526
|
-
if (outData.isTrans) {
|
|
609
|
+
if (outData.isTrans || outData.stringTrans) {
|
|
527
610
|
outData.trans.forEach(function (item) {
|
|
528
611
|
word = "";
|
|
529
612
|
if (item.isTrans) {
|
|
@@ -542,39 +625,50 @@ function parseTemplate(template, target) {
|
|
|
542
625
|
}
|
|
543
626
|
});
|
|
544
627
|
}
|
|
545
|
-
} else {
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
628
|
+
} else if (/^v-/.test(attr.name)) {
|
|
629
|
+
// v-指令解析 +2为="的长度
|
|
630
|
+
var attrVal = template.slice(attr.start, attr.end);
|
|
631
|
+
var start = attr.start + attrVal.indexOf(attr.value);
|
|
632
|
+
_this.chars(attr.value, start, attr.end - 1, true);
|
|
633
|
+
} else if (_util.chineseRE.test(attr.value)) {
|
|
634
|
+
word = getWord(attr.value);
|
|
635
|
+
if (word && word !== attr.value) {
|
|
636
|
+
langs.push({
|
|
637
|
+
start: attr.start,
|
|
638
|
+
end: attr.end,
|
|
639
|
+
value: word,
|
|
640
|
+
name: attr.name,
|
|
641
|
+
needBind: true
|
|
642
|
+
});
|
|
557
643
|
}
|
|
558
644
|
}
|
|
559
645
|
});
|
|
560
646
|
},
|
|
561
647
|
|
|
562
648
|
// 处理文本元素textnode
|
|
563
|
-
chars: function chars(text, start, end) {
|
|
649
|
+
chars: function chars(text, start, end, isDirective) {
|
|
564
650
|
if (!text.trim()) {
|
|
565
651
|
return;
|
|
566
652
|
}
|
|
567
653
|
|
|
568
654
|
// 文本直接添加,指令则为对象
|
|
569
655
|
var textArr = [];
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
656
|
+
var tockens = void 0,
|
|
657
|
+
needMerge = void 0;
|
|
658
|
+
if (isDirective) {
|
|
659
|
+
tockens = [{
|
|
660
|
+
value: text,
|
|
661
|
+
directive: true,
|
|
662
|
+
start: start,
|
|
663
|
+
end: end
|
|
664
|
+
}];
|
|
665
|
+
} else {
|
|
666
|
+
var outExp = parseText(text, delimiters);
|
|
667
|
+
tockens = outExp.tockens;
|
|
668
|
+
needMerge = outExp.needMerge;
|
|
669
|
+
}
|
|
574
670
|
|
|
575
671
|
// 文本内容中包含指令
|
|
576
|
-
|
|
577
|
-
|
|
578
672
|
if (tockens) {
|
|
579
673
|
var _ret = function () {
|
|
580
674
|
// 解析指令表达式转成AST
|
|
@@ -595,7 +689,7 @@ function parseTemplate(template, target) {
|
|
|
595
689
|
// 解析指令ast和字符串,输出最终的翻译表达式
|
|
596
690
|
for (var i = 0, l = textArr.length; i < l; i++) {
|
|
597
691
|
var tocken = textArr[i];
|
|
598
|
-
if (hasTrans && ((typeof tocken === "undefined" ? "undefined" : (0, _typeof3.default)(tocken)) !== "object" || !tocken.isTrans)) {
|
|
692
|
+
if (hasTrans && ((typeof tocken === "undefined" ? "undefined" : (0, _typeof3.default)(tocken)) !== "object" || !tocken.isTrans) && !tocken.stringTrans) {
|
|
599
693
|
continue;
|
|
600
694
|
}
|
|
601
695
|
if (typeof tocken === "string") {
|
|
@@ -20,7 +20,7 @@ function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { de
|
|
|
20
20
|
* 词条提取、翻译基类
|
|
21
21
|
*/
|
|
22
22
|
var Extract = function () {
|
|
23
|
-
function Extract(option) {
|
|
23
|
+
function Extract(option, words) {
|
|
24
24
|
(0, _classCallCheck3.default)(this, Extract);
|
|
25
25
|
|
|
26
26
|
this.option = Object.assign({}, {
|
|
@@ -29,45 +29,43 @@ var Extract = function () {
|
|
|
29
29
|
ignoreCode: /<!--\s*hide|-->/g,
|
|
30
30
|
// 将对应的词条全部修改为'/**<%%>**/window.MS'
|
|
31
31
|
// templateExp: /(\=|\+|\-|\*|\/|\s|\(|\[|\{)\s*<%.*?%>/g
|
|
32
|
-
templateExp:
|
|
32
|
+
templateExp: /<%\s*multilang\(([^\n]*?)\);*\s*%>/g ///<%([^\n]*?)%>/g,只能存在一个捕获组
|
|
33
33
|
// 自定义不提词条规则,可以是正则也可以是function
|
|
34
|
-
customRules: []
|
|
34
|
+
// customRules: []
|
|
35
35
|
}, option);
|
|
36
|
-
this.init();
|
|
36
|
+
this.init(words);
|
|
37
37
|
}
|
|
38
38
|
|
|
39
|
-
Extract.prototype.init = function init() {
|
|
40
|
-
|
|
41
|
-
|
|
39
|
+
Extract.prototype.init = function init(words) {
|
|
40
|
+
this.words = words;
|
|
42
41
|
// 记录当前的文件路径
|
|
43
42
|
this.curFilePath = "";
|
|
44
|
-
// 提取的词条,去除了重复项,当为翻译模式时间,只存储未被翻译的词条
|
|
45
|
-
this.words = [];
|
|
46
43
|
// 是否正在处理文件
|
|
47
44
|
this.isWorking = false;
|
|
48
45
|
// 待处理文件列表
|
|
49
46
|
this.handleList = [];
|
|
50
47
|
this.ignoreRE = _config.IGNORE_REGEXP.slice(0);
|
|
51
48
|
this.ignoreFuns = _config.IGNORE_FUNCTIONS.slice(0);
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
49
|
+
this.suspectLine = [];
|
|
50
|
+
|
|
51
|
+
// let customRules = this.option.customRules;
|
|
52
|
+
// if (Array.isArray(customRules)) {
|
|
53
|
+
// customRules.forEach(item => {
|
|
54
|
+
// if (typeof item === "function") {
|
|
55
|
+
// this.ignoreFuns.push(item);
|
|
56
|
+
// } else {
|
|
57
|
+
// this.ignoreRE.push(item);
|
|
58
|
+
// }
|
|
59
|
+
// });
|
|
60
|
+
// } else if (typeof customRules === "function") {
|
|
61
|
+
// this.ignoreFuns.push(customRules);
|
|
62
|
+
// } else if (customRules) {
|
|
63
|
+
// this.ignoreRE.push(string2Regexp(customRules));
|
|
64
|
+
// }
|
|
67
65
|
};
|
|
68
66
|
|
|
69
67
|
Extract.prototype.handleFile = function handleFile(filePath) {
|
|
70
|
-
var
|
|
68
|
+
var _this = this;
|
|
71
69
|
|
|
72
70
|
// log(`开始提取文件-${filePath}`);
|
|
73
71
|
this.isWorking = true;
|
|
@@ -75,17 +73,31 @@ var Extract = function () {
|
|
|
75
73
|
return (0, _index.loadFile)(filePath).then(function (data) {
|
|
76
74
|
// 写入文件
|
|
77
75
|
(0, _index.log)("\u6DFB\u52A0\u7FFB\u8BD1\u51FD\u6570-" + filePath);
|
|
78
|
-
return
|
|
76
|
+
return _this.transNode(data);
|
|
79
77
|
}).then(function (AST) {
|
|
80
|
-
return
|
|
78
|
+
return _this.scanNode(AST);
|
|
81
79
|
}).then(function (fileData) {
|
|
82
|
-
(0, _index.writeTextFile)(_path2.default.resolve(
|
|
83
|
-
|
|
84
|
-
|
|
80
|
+
(0, _index.writeTextFile)(_path2.default.resolve(_this.option.baseWritePath, _path2.default.relative(_this.option.baseReadPath, _this.curFilePath)), fileData);
|
|
81
|
+
_this.complete();
|
|
82
|
+
// 记录可能需要人工二次审核的词条
|
|
83
|
+
var matchs = fileData.match(/(alert|confirm)\(.*?\)/gi);
|
|
84
|
+
if (matchs) {
|
|
85
|
+
matchs = matchs.filter(function (item) {
|
|
86
|
+
return !(/(alert|confirm)\((.).*?\2\)/gi.test(item) || /_\(.*?\)/g.test(item) || !/\s/g.test(item));
|
|
87
|
+
});
|
|
88
|
+
if (matchs.length > 0) {
|
|
89
|
+
var _suspectLine;
|
|
90
|
+
|
|
91
|
+
(_suspectLine = _this.suspectLine).push.apply(_suspectLine, ["#--#--#--#--#--# " + _this.curFilePath + " #--#--#--#--#--#"].concat(matchs));
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
return _this.startTrans();
|
|
85
96
|
}).catch(function (error) {
|
|
86
|
-
|
|
87
|
-
(0, _index.log)("\u6587\u4EF6[" + filePath + "]\u5904\u7406\u51FA\u9519
|
|
88
|
-
|
|
97
|
+
_this.copyFile(filePath);
|
|
98
|
+
(0, _index.log)("\u6587\u4EF6[" + filePath + "]\u5904\u7406\u51FA\u9519", _index.LOG_TYPE.ERROR);
|
|
99
|
+
(0, _index.log)(error, _index.LOG_TYPE.ERROR);
|
|
100
|
+
return _this.startTrans();
|
|
89
101
|
});
|
|
90
102
|
};
|
|
91
103
|
|
|
@@ -98,92 +110,25 @@ var Extract = function () {
|
|
|
98
110
|
return Promise.resolve(data);
|
|
99
111
|
};
|
|
100
112
|
|
|
101
|
-
Extract.prototype.setAttr = function setAttr(attr, value) {
|
|
102
|
-
if (Object.prototype.toString.call(attr) === "[object Object]") {
|
|
103
|
-
for (var key in attr) {
|
|
104
|
-
this.setSingleAttr(key, attr[key]);
|
|
105
|
-
}
|
|
106
|
-
} else {
|
|
107
|
-
this.setSingleAttr(attr, value);
|
|
108
|
-
}
|
|
109
|
-
};
|
|
110
|
-
|
|
111
|
-
Extract.prototype.setSingleAttr = function setSingleAttr(attr, value) {
|
|
112
|
-
this.option[attr] = value;
|
|
113
|
-
};
|
|
114
|
-
|
|
115
113
|
Extract.prototype.startTrans = function startTrans() {
|
|
116
114
|
// 当一个文件执行完成,立即执行下一个指令
|
|
117
115
|
if (this.handleList.length > 0) {
|
|
118
116
|
return this.handleFile(this.handleList.shift());
|
|
119
117
|
}
|
|
120
|
-
return Promise.resolve(
|
|
118
|
+
return Promise.resolve(this.suspectLine);
|
|
121
119
|
};
|
|
122
120
|
|
|
123
121
|
Extract.prototype.addTask = function addTask(filePath) {
|
|
124
122
|
this.handleList.push(filePath);
|
|
125
123
|
};
|
|
126
124
|
|
|
127
|
-
Extract.prototype.
|
|
128
|
-
|
|
129
|
-
this.words.push(word);
|
|
130
|
-
}
|
|
131
|
-
};
|
|
132
|
-
|
|
133
|
-
Extract.prototype.addWords = function addWords(words) {
|
|
134
|
-
var _this3 = this;
|
|
135
|
-
|
|
136
|
-
words.forEach(function (word) {
|
|
137
|
-
_this3.addWord(word);
|
|
138
|
-
});
|
|
139
|
-
};
|
|
140
|
-
|
|
141
|
-
Extract.prototype.getWord = function getWord(val) {
|
|
142
|
-
if (!val || /^\s*$/.test(val)) {
|
|
143
|
-
return "";
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
// 经过处理的字符串
|
|
147
|
-
if (/\{%s\}/i.test(val)) {
|
|
148
|
-
return val;
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
var skip = this.ignoreRE.some(function (item) {
|
|
152
|
-
return item.test(val);
|
|
153
|
-
});
|
|
154
|
-
if (skip) {
|
|
155
|
-
return "";
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
for (var i = 0, l = this.ignoreFuns.length; i < l; i++) {
|
|
159
|
-
var fun = this.ignoreFuns[i],
|
|
160
|
-
str = val.replace(/(^\s+)|(\s+$)/g, "");
|
|
161
|
-
|
|
162
|
-
if (typeof fun === "function") {
|
|
163
|
-
if (skip = fun(str)) {
|
|
164
|
-
break;
|
|
165
|
-
}
|
|
166
|
-
}
|
|
167
|
-
}
|
|
168
|
-
if (skip) {
|
|
169
|
-
return "";
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
var addValue = "";
|
|
173
|
-
|
|
174
|
-
//中英文都提取
|
|
175
|
-
if (/[a-z]/i.test(val) || /[\u4e00-\u9fa5]/.test(val)) {
|
|
176
|
-
addValue = val;
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
return addValue;
|
|
125
|
+
Extract.prototype.transWord = function transWord(val) {
|
|
126
|
+
return this.words[val];
|
|
180
127
|
};
|
|
181
128
|
|
|
182
129
|
Extract.prototype.complete = function complete() {
|
|
183
130
|
this.isWorking = false;
|
|
184
131
|
this.option.onComplete && this.option.onComplete(this.curFilePath, this.words);
|
|
185
|
-
// 重置提取的词条
|
|
186
|
-
this.words = [];
|
|
187
132
|
};
|
|
188
133
|
|
|
189
134
|
return Extract;
|
|
@@ -35,20 +35,23 @@ var JSDOM = jsdom.JSDOM;
|
|
|
35
35
|
var ExtractHTML = function (_Extract) {
|
|
36
36
|
(0, _inherits3.default)(ExtractHTML, _Extract);
|
|
37
37
|
|
|
38
|
-
function ExtractHTML(option) {
|
|
38
|
+
function ExtractHTML(option, words) {
|
|
39
39
|
(0, _classCallCheck3.default)(this, ExtractHTML);
|
|
40
40
|
|
|
41
|
-
var _this = (0, _possibleConstructorReturn3.default)(this, _Extract.call(this, option));
|
|
41
|
+
var _this = (0, _possibleConstructorReturn3.default)(this, _Extract.call(this, option, words));
|
|
42
42
|
|
|
43
43
|
_this.extractJS = new _extract_js_ori2.default({
|
|
44
44
|
ignoreCode: _this.option.ignoreCode,
|
|
45
45
|
templateExp: _this.option.templateExp
|
|
46
|
-
});
|
|
46
|
+
}, words);
|
|
47
47
|
_this.jsHandleList = [];
|
|
48
48
|
return _this;
|
|
49
49
|
}
|
|
50
50
|
|
|
51
51
|
ExtractHTML.prototype.transNode = function transNode(html) {
|
|
52
|
+
var _this2 = this;
|
|
53
|
+
|
|
54
|
+
html = html.replace(/\s*<!--\s*<(\/?)script>\s*-->/ig, '');
|
|
52
55
|
this.oldHtml = html;
|
|
53
56
|
this.scripts = [];
|
|
54
57
|
this.getHeaderTag(html);
|
|
@@ -59,7 +62,7 @@ var ExtractHTML = function (_Extract) {
|
|
|
59
62
|
var dom = new JSDOM(html, {
|
|
60
63
|
virtualConsole: virtualConsole
|
|
61
64
|
});
|
|
62
|
-
var document = dom.window.document;
|
|
65
|
+
var document = _this2.document = dom.window.document;
|
|
63
66
|
resolve(document);
|
|
64
67
|
} catch (err) {
|
|
65
68
|
reject(err);
|
|
@@ -76,7 +79,7 @@ var ExtractHTML = function (_Extract) {
|
|
|
76
79
|
|
|
77
80
|
|
|
78
81
|
ExtractHTML.prototype.scanNode = function scanNode(document) {
|
|
79
|
-
var
|
|
82
|
+
var _this3 = this;
|
|
80
83
|
|
|
81
84
|
// 遍历各节点
|
|
82
85
|
this.listNode(document.documentElement);
|
|
@@ -84,26 +87,46 @@ var ExtractHTML = function (_Extract) {
|
|
|
84
87
|
return this.nextJsTask().then(function () {
|
|
85
88
|
// ͨ通过正则替换,为了规避jsdom对html中的特殊字符串进行编码
|
|
86
89
|
var outHtml = document.documentElement.innerHTML;
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
90
|
+
return _this3.handleHtml(outHtml);
|
|
91
|
+
});
|
|
92
|
+
};
|
|
93
|
+
|
|
94
|
+
ExtractHTML.prototype.handleHtml = function handleHtml(htmlCode) {
|
|
95
|
+
var _this4 = this;
|
|
91
96
|
|
|
92
|
-
|
|
97
|
+
var oldHtml = this.oldHtml;
|
|
98
|
+
var match = htmlCode.match(/<script\b[^>]*>[\s\S]*?<\/script>/gi);
|
|
99
|
+
htmlCode = oldHtml.replace(/<script\b[^>]*>[\s\S]*?<\/script>/gi, function () {
|
|
100
|
+
return match.shift();
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
var templateExp = this.option.templateExp;
|
|
104
|
+
|
|
105
|
+
return htmlCode.replace(templateExp, function (match, p1, index) {
|
|
106
|
+
var reg = (0, _index.escapeRegExp)(match);
|
|
107
|
+
var tempReg = templateExp.toString();
|
|
108
|
+
reg += templateExp.source;
|
|
109
|
+
reg = new RegExp(reg, tempReg.slice(tempReg.lastIndexOf("/") + 1));
|
|
110
|
+
var word = _this4.transWord(p1);
|
|
111
|
+
if (reg.test(htmlCode) && word && !/\s/.test(htmlCode[index + match.length])) {
|
|
112
|
+
return "word ";
|
|
113
|
+
}
|
|
114
|
+
return word || match;
|
|
93
115
|
});
|
|
94
116
|
};
|
|
95
117
|
|
|
96
118
|
ExtractHTML.prototype.handleJsTask = function handleJsTask(child) {
|
|
97
|
-
var
|
|
119
|
+
var _this5 = this;
|
|
98
120
|
|
|
99
|
-
return this.extractJS.transNode(child.
|
|
100
|
-
return
|
|
121
|
+
return this.extractJS.transNode(child.innerHTML, true).then(function (AST) {
|
|
122
|
+
return _this5.extractJS.scanNode(AST);
|
|
101
123
|
}).then(function (fileData) {
|
|
102
124
|
// 写入文件
|
|
103
|
-
child.
|
|
104
|
-
return
|
|
105
|
-
}).catch(function () {
|
|
106
|
-
|
|
125
|
+
child.innerHTML = fileData;
|
|
126
|
+
return _this5.nextJsTask();
|
|
127
|
+
}).catch(function (e) {
|
|
128
|
+
(0, _index.log)(e, _index.LOG_TYPE.ERROR);
|
|
129
|
+
return _this5.nextJsTask();
|
|
107
130
|
});
|
|
108
131
|
};
|
|
109
132
|
|
|
@@ -126,25 +149,24 @@ var ExtractHTML = function (_Extract) {
|
|
|
126
149
|
|
|
127
150
|
var firstChild = element.firstChild,
|
|
128
151
|
nextSibling = element.nextSibling,
|
|
129
|
-
nodeType = element.nodeType,
|
|
130
152
|
nodeName = element.nodeName.toLowerCase();
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
}
|
|
137
|
-
} else {
|
|
138
|
-
// 处理子节点
|
|
139
|
-
if (firstChild) {
|
|
140
|
-
this.listNode(firstChild);
|
|
141
|
-
}
|
|
153
|
+
|
|
154
|
+
if (nodeName == "script") {
|
|
155
|
+
this.addJsTask(element);
|
|
156
|
+
nextSibling && this.listNode(nextSibling);
|
|
157
|
+
return;
|
|
142
158
|
}
|
|
143
159
|
|
|
144
|
-
//
|
|
145
|
-
if (
|
|
146
|
-
this.listNode(nextSibling);
|
|
160
|
+
// noscript内的文本不处理
|
|
161
|
+
if (nodeName === "noscript" || nodeName == "style") {
|
|
162
|
+
nextSibling && this.listNode(nextSibling);
|
|
163
|
+
return;
|
|
147
164
|
}
|
|
165
|
+
// 处理子节点
|
|
166
|
+
firstChild && this.listNode(firstChild);
|
|
167
|
+
|
|
168
|
+
// 处理兄弟节点
|
|
169
|
+
nextSibling && this.listNode(nextSibling);
|
|
148
170
|
};
|
|
149
171
|
|
|
150
172
|
return ExtractHTML;
|