wikiparser-node 0.3.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/config/default.json +13 -17
- package/config/llwiki.json +11 -79
- package/config/moegirl.json +7 -1
- package/config/zhwiki.json +1269 -0
- package/index.js +130 -97
- package/lib/element.js +410 -518
- package/lib/node.js +493 -115
- package/lib/ranges.js +27 -19
- package/lib/text.js +175 -0
- package/lib/title.js +14 -6
- package/mixin/attributeParent.js +70 -24
- package/mixin/fixedToken.js +18 -10
- package/mixin/hidden.js +6 -4
- package/mixin/sol.js +39 -12
- package/package.json +17 -4
- package/parser/brackets.js +18 -18
- package/parser/commentAndExt.js +16 -14
- package/parser/converter.js +14 -13
- package/parser/externalLinks.js +12 -11
- package/parser/hrAndDoubleUnderscore.js +24 -14
- package/parser/html.js +8 -7
- package/parser/links.js +13 -13
- package/parser/list.js +12 -11
- package/parser/magicLinks.js +11 -10
- package/parser/quotes.js +6 -5
- package/parser/selector.js +175 -0
- package/parser/table.js +31 -24
- package/src/arg.js +91 -43
- package/src/atom/hidden.js +5 -2
- package/src/atom/index.js +17 -9
- package/src/attribute.js +210 -101
- package/src/converter.js +78 -43
- package/src/converterFlags.js +104 -45
- package/src/converterRule.js +136 -78
- package/src/extLink.js +81 -27
- package/src/gallery.js +63 -20
- package/src/heading.js +58 -20
- package/src/html.js +138 -48
- package/src/imageParameter.js +93 -58
- package/src/index.js +314 -186
- package/src/link/category.js +22 -54
- package/src/link/file.js +83 -32
- package/src/link/galleryImage.js +21 -7
- package/src/link/index.js +170 -81
- package/src/magicLink.js +64 -14
- package/src/nowiki/comment.js +36 -10
- package/src/nowiki/dd.js +37 -22
- package/src/nowiki/doubleUnderscore.js +21 -7
- package/src/nowiki/hr.js +11 -7
- package/src/nowiki/index.js +16 -9
- package/src/nowiki/list.js +2 -2
- package/src/nowiki/noinclude.js +8 -4
- package/src/nowiki/quote.js +38 -7
- package/src/onlyinclude.js +24 -7
- package/src/parameter.js +102 -62
- package/src/syntax.js +23 -20
- package/src/table/index.js +282 -174
- package/src/table/td.js +112 -61
- package/src/table/tr.js +135 -74
- package/src/tagPair/ext.js +30 -23
- package/src/tagPair/include.js +26 -11
- package/src/tagPair/index.js +72 -29
- package/src/transclude.js +235 -127
- package/tool/index.js +42 -32
- package/util/debug.js +21 -18
- package/util/diff.js +76 -0
- package/util/lint.js +40 -0
- package/util/string.js +56 -26
- package/.eslintrc.json +0 -319
- package/errors/README +0 -1
- package/jsconfig.json +0 -7
- package/printed/README +0 -1
- package/typings/element.d.ts +0 -28
- package/typings/index.d.ts +0 -52
- package/typings/node.d.ts +0 -23
- package/typings/parser.d.ts +0 -9
- package/typings/table.d.ts +0 -14
- package/typings/token.d.ts +0 -22
- package/typings/tool.d.ts +0 -10
package/parser/brackets.js
CHANGED
|
@@ -1,18 +1,20 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
3
|
const {removeComment} = require('../util/string'),
|
|
4
|
-
|
|
4
|
+
Parser = require('..');
|
|
5
5
|
|
|
6
6
|
/**
|
|
7
|
-
*
|
|
7
|
+
* 解析花括号
|
|
8
|
+
* @param {string} text wikitext
|
|
8
9
|
* @param {accum} accum
|
|
10
|
+
* @throws TranscludeToken.constructor()
|
|
9
11
|
*/
|
|
10
12
|
const parseBrackets = (text, config = Parser.getConfig(), accum = []) => {
|
|
11
|
-
const source = '^(\0\\d+c\
|
|
13
|
+
const source = '^(\0\\d+c\x7F)*={1,6}|\\[\\[|\\{{2,}|-\\{(?!\\{)',
|
|
12
14
|
/** @type {BracketExecArray[]} */ stack = [],
|
|
13
|
-
closes = {'=': '\n', '{': '}{2,}|\\|', '-': '}-', '[': ']]'},
|
|
15
|
+
closes = {'=': '\n', '{': '\\}{2,}|\\|', '-': '\\}-', '[': '\\]\\]'},
|
|
14
16
|
/** @type {Record<string, string>} */ marks = {'!': '!', '!!': '+', '(!': '{', '!)': '}', '!-': '-', '=': '~'};
|
|
15
|
-
let regex = RegExp(source, '
|
|
17
|
+
let regex = new RegExp(source, 'gmu'),
|
|
16
18
|
/** @type {BracketExecArray} */ mt = regex.exec(text),
|
|
17
19
|
moreBraces = text.includes('}}'),
|
|
18
20
|
lastIndex;
|
|
@@ -24,17 +26,17 @@ const parseBrackets = (text, config = Parser.getConfig(), accum = []) => {
|
|
|
24
26
|
}
|
|
25
27
|
const {0: syntax, index: curIndex} = mt ?? {0: '\n', index: text.length},
|
|
26
28
|
/** @type {BracketExecArray} */ top = stack.pop() ?? {},
|
|
27
|
-
{0: open, index, parts} = top,
|
|
28
|
-
innerEqual = syntax === '=' &&
|
|
29
|
-
if (
|
|
29
|
+
{0: open, index, parts, findEqual: topFindEqual, pos: topPos} = top,
|
|
30
|
+
innerEqual = syntax === '=' && topFindEqual;
|
|
31
|
+
if (syntax === ']]' || syntax === '}-') { // 情形1:闭合内链或转换
|
|
30
32
|
lastIndex = curIndex + 2;
|
|
31
33
|
} else if (syntax === '\n') { // 情形2:闭合标题
|
|
32
34
|
lastIndex = curIndex + 1;
|
|
33
35
|
const {pos, findEqual} = stack.at(-1) ?? {};
|
|
34
36
|
if (!pos || findEqual || removeComment(text.slice(pos, index)) !== '') {
|
|
35
|
-
const rmt = /^(={1,6})(.+)\1((?:\s|\0\d+c\
|
|
37
|
+
const rmt = /^(={1,6})(.+)\1((?:\s|\0\d+c\x7F)*)$/u.exec(text.slice(index, curIndex));
|
|
36
38
|
if (rmt) {
|
|
37
|
-
text = `${text.slice(0, index)}\0${accum.length}h\
|
|
39
|
+
text = `${text.slice(0, index)}\0${accum.length}h\x7F${text.slice(curIndex)}`;
|
|
38
40
|
lastIndex = index + 4 + String(accum.length).length;
|
|
39
41
|
const HeadingToken = require('../src/heading');
|
|
40
42
|
new HeadingToken(rmt[1].length, rmt.slice(2), config, accum);
|
|
@@ -42,7 +44,7 @@ const parseBrackets = (text, config = Parser.getConfig(), accum = []) => {
|
|
|
42
44
|
}
|
|
43
45
|
} else if (syntax === '|' || innerEqual) { // 情形3:模板内部,含行首单个'='
|
|
44
46
|
lastIndex = curIndex + 1;
|
|
45
|
-
parts.at(-1).push(text.slice(
|
|
47
|
+
parts.at(-1).push(text.slice(topPos, curIndex));
|
|
46
48
|
if (syntax === '|') {
|
|
47
49
|
parts.push([]);
|
|
48
50
|
}
|
|
@@ -54,9 +56,8 @@ const parseBrackets = (text, config = Parser.getConfig(), accum = []) => {
|
|
|
54
56
|
rest = open.length - close.length,
|
|
55
57
|
{length} = accum;
|
|
56
58
|
lastIndex = curIndex + close.length; // 这不是最终的lastIndex
|
|
57
|
-
parts.at(-1).push(text.slice(
|
|
58
|
-
|
|
59
|
-
const ch = close.length === 2 ? marks[removeComment(parts[0][0])] ?? 't' : 't';
|
|
59
|
+
parts.at(-1).push(text.slice(topPos, curIndex));
|
|
60
|
+
const ch = close.length === 2 ? marks[removeComment(parts[0][0])] ?? 't' : 't'; // 标记{{!}}等
|
|
60
61
|
let skip = false;
|
|
61
62
|
if (close.length === 3) {
|
|
62
63
|
const ArgToken = require('../src/arg');
|
|
@@ -75,8 +76,7 @@ const parseBrackets = (text, config = Parser.getConfig(), accum = []) => {
|
|
|
75
76
|
}
|
|
76
77
|
}
|
|
77
78
|
if (!skip) {
|
|
78
|
-
|
|
79
|
-
text = `${text.slice(0, index + rest)}\0${length}${ch}\x7f${text.slice(lastIndex)}`;
|
|
79
|
+
text = `${text.slice(0, index + rest)}\0${length}${ch}\x7F${text.slice(lastIndex)}`;
|
|
80
80
|
lastIndex = index + rest + 3 + String(length).length;
|
|
81
81
|
if (rest > 1) {
|
|
82
82
|
stack.push({0: open.slice(0, rest), index, pos: index + rest, parts: [[]]});
|
|
@@ -98,10 +98,10 @@ const parseBrackets = (text, config = Parser.getConfig(), accum = []) => {
|
|
|
98
98
|
stack.pop();
|
|
99
99
|
curTop = stack.at(-1);
|
|
100
100
|
}
|
|
101
|
-
regex = RegExp(source + (curTop
|
|
101
|
+
regex = new RegExp(source + (curTop
|
|
102
102
|
? `|${closes[curTop[0][0]]}${curTop.findEqual ? '|=' : ''}`
|
|
103
103
|
: ''
|
|
104
|
-
), '
|
|
104
|
+
), 'gmu');
|
|
105
105
|
regex.lastIndex = lastIndex;
|
|
106
106
|
mt = regex.exec(text);
|
|
107
107
|
}
|
package/parser/commentAndExt.js
CHANGED
|
@@ -1,49 +1,51 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
const
|
|
3
|
+
const Parser = require('..');
|
|
4
4
|
|
|
5
5
|
/**
|
|
6
|
-
*
|
|
6
|
+
* 解析HTML注释和扩展标签
|
|
7
|
+
* @param {string} text wikitext
|
|
7
8
|
* @param {accum} accum
|
|
9
|
+
* @param {boolean} includeOnly 是否嵌入
|
|
8
10
|
*/
|
|
9
11
|
const parseCommentAndExt = (text, config = Parser.getConfig(), accum = [], includeOnly = false) => {
|
|
10
|
-
const onlyinclude = /<onlyinclude>(.*?)<\/onlyinclude>/
|
|
12
|
+
const onlyinclude = /<onlyinclude>(.*?)<\/onlyinclude>/gsu;
|
|
11
13
|
if (includeOnly && text.search(onlyinclude) !== -1) { // `<onlyinclude>`拥有最高优先级
|
|
12
|
-
return text.
|
|
13
|
-
const str = `\0${accum.length}e\
|
|
14
|
-
|
|
14
|
+
return text.replaceAll(onlyinclude, /** @param {string} inner */ (_, inner) => {
|
|
15
|
+
const str = `\0${accum.length}e\x7F`;
|
|
16
|
+
const OnlyincludeToken = require('../src/onlyinclude');
|
|
15
17
|
new OnlyincludeToken(inner, config, accum);
|
|
16
18
|
return str;
|
|
17
|
-
}).
|
|
19
|
+
}).replaceAll(/(?<=^|\0\d+e\x7F).*?(?=$|\0\d+e\x7F)/gsu, substr => {
|
|
18
20
|
if (substr === '') {
|
|
19
21
|
return '';
|
|
20
22
|
}
|
|
21
23
|
const NoincludeToken = require('../src/nowiki/noinclude');
|
|
22
24
|
new NoincludeToken(substr, config, accum);
|
|
23
|
-
return `\0${accum.length - 1}c\
|
|
25
|
+
return `\0${accum.length - 1}c\x7F`;
|
|
24
26
|
});
|
|
25
27
|
}
|
|
26
28
|
const ext = config.ext.join('|'),
|
|
27
29
|
includeRegex = includeOnly ? 'includeonly' : '(?:no|only)include',
|
|
28
30
|
noincludeRegex = includeOnly ? 'noinclude' : 'includeonly',
|
|
29
|
-
regex = RegExp(
|
|
31
|
+
regex = new RegExp(
|
|
30
32
|
'<!--.*?(?:-->|$)|' // comment
|
|
31
33
|
+ `<${includeRegex}(?:\\s[^>]*?)?>|</${includeRegex}\\s*>|` // <includeonly>
|
|
32
34
|
+ `<(${ext})(\\s[^>]*?)?(?:/>|>(.*?)</(\\1\\s*)>)|` // 扩展标签
|
|
33
35
|
+ `<(${noincludeRegex})(\\s[^>]*?)?(?:/>|>(.*?)(?:</(\\5\\s*)>|$))`, // <noinclude>
|
|
34
|
-
'
|
|
36
|
+
'gisu',
|
|
35
37
|
);
|
|
36
|
-
return text.
|
|
38
|
+
return text.replaceAll(
|
|
37
39
|
regex,
|
|
38
40
|
/** @type {function(...string): string} */
|
|
39
41
|
(substr, name, attr, inner, closing, include, includeAttr, includeInner, includeClosing) => {
|
|
40
|
-
const str = `\0${accum.length}${name ? 'e' : 'c'}\
|
|
42
|
+
const str = `\0${accum.length}${name ? 'e' : 'c'}\x7F`;
|
|
41
43
|
if (name) {
|
|
42
44
|
const ExtToken = require('../src/tagPair/ext');
|
|
43
45
|
new ExtToken(name, attr, inner, closing, config, accum);
|
|
44
46
|
} else if (substr.startsWith('<!--')) {
|
|
45
|
-
const CommentToken = require('../src/nowiki/comment')
|
|
46
|
-
|
|
47
|
+
const CommentToken = require('../src/nowiki/comment');
|
|
48
|
+
const closed = substr.endsWith('-->');
|
|
47
49
|
new CommentToken(substr.slice(4, closed ? -3 : undefined), closed, config, accum);
|
|
48
50
|
} else if (include) {
|
|
49
51
|
const IncludeToken = require('../src/tagPair/include');
|
package/parser/converter.js
CHANGED
|
@@ -1,32 +1,33 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
const
|
|
3
|
+
const Parser = require('..');
|
|
4
4
|
|
|
5
5
|
/**
|
|
6
|
-
*
|
|
6
|
+
* 解析语言变体转换
|
|
7
|
+
* @param {string} wikitext wikitext
|
|
7
8
|
* @param {accum} accum
|
|
8
9
|
*/
|
|
9
|
-
const parseConverter = (
|
|
10
|
-
const ConverterToken = require('../src/converter')
|
|
11
|
-
|
|
12
|
-
regex2 = /-\{|\}-/
|
|
10
|
+
const parseConverter = (wikitext, config = Parser.getConfig(), accum = []) => {
|
|
11
|
+
const ConverterToken = require('../src/converter');
|
|
12
|
+
const regex1 = /-\{/gu,
|
|
13
|
+
regex2 = /-\{|\}-/gu,
|
|
13
14
|
/** @type {RegExpExecArray[]} */ stack = [];
|
|
14
15
|
let regex = regex1,
|
|
15
|
-
mt = regex.exec(
|
|
16
|
+
mt = regex.exec(wikitext);
|
|
16
17
|
while (mt) {
|
|
17
18
|
const {0: syntax, index} = mt;
|
|
18
19
|
if (syntax === '}-') {
|
|
19
20
|
const top = stack.pop(),
|
|
20
21
|
{length} = accum,
|
|
21
|
-
str =
|
|
22
|
+
str = wikitext.slice(top.index + 2, index),
|
|
22
23
|
i = str.indexOf('|'),
|
|
23
24
|
[flags, text] = i === -1 ? [[], str] : [str.slice(0, i).split(';'), str.slice(i + 1)],
|
|
24
|
-
temp = text.
|
|
25
|
+
temp = text.replaceAll(/(&[#a-z\d]+);/giu, '$1\x01'),
|
|
25
26
|
variants = `(?:${config.variants.join('|')})`,
|
|
26
|
-
rules = temp.split(RegExp(`;(?=\\s*(?:${variants}|[^;]*?=>\\s*${variants})\\s*:)
|
|
27
|
+
rules = temp.split(new RegExp(`;(?=\\s*(?:${variants}|[^;]*?=>\\s*${variants})\\s*:)`, 'u'))
|
|
27
28
|
.map(rule => rule.replaceAll('\x01', ';'));
|
|
28
29
|
new ConverterToken(flags, rules, config, accum);
|
|
29
|
-
|
|
30
|
+
wikitext = `${wikitext.slice(0, top.index)}\0${length}v\x7F${wikitext.slice(index + 2)}`;
|
|
30
31
|
if (stack.length === 0) {
|
|
31
32
|
regex = regex1;
|
|
32
33
|
}
|
|
@@ -35,9 +36,9 @@ const parseConverter = (firstChild, config = Parser.getConfig(), accum = []) =>
|
|
|
35
36
|
stack.push(mt);
|
|
36
37
|
regex = regex2;
|
|
37
38
|
}
|
|
38
|
-
mt = regex.exec(
|
|
39
|
+
mt = regex.exec(wikitext);
|
|
39
40
|
}
|
|
40
|
-
return
|
|
41
|
+
return wikitext;
|
|
41
42
|
};
|
|
42
43
|
|
|
43
44
|
Parser.parsers.parseConverter = __filename;
|
package/parser/externalLinks.js
CHANGED
|
@@ -1,28 +1,29 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
3
|
const {extUrlChar} = require('../util/string'),
|
|
4
|
-
|
|
4
|
+
Parser = require('..');
|
|
5
5
|
|
|
6
6
|
/**
|
|
7
|
-
*
|
|
7
|
+
* 解析外部链接
|
|
8
|
+
* @param {string} wikitext wikitext
|
|
8
9
|
* @param {accum} accum
|
|
9
10
|
*/
|
|
10
|
-
const parseExternalLinks = (
|
|
11
|
-
const ExtLinkToken = require('../src/extLink')
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
return
|
|
11
|
+
const parseExternalLinks = (wikitext, config = Parser.getConfig(), accum = []) => {
|
|
12
|
+
const ExtLinkToken = require('../src/extLink');
|
|
13
|
+
const regex = new RegExp(
|
|
14
|
+
`\\[((?:${config.protocol}|//)${extUrlChar})(\\p{Zs}*)([^\\]\x01-\x08\x0A-\x1F\uFFFD]*)\\]`,
|
|
15
|
+
'giu',
|
|
16
|
+
);
|
|
17
|
+
return wikitext.replaceAll(regex, /** @type {function(...string): string} */ (_, url, space, text) => {
|
|
17
18
|
const {length} = accum,
|
|
18
|
-
mt = /&[lg]t
|
|
19
|
+
mt = /&[lg]t;/u.exec(url);
|
|
19
20
|
if (mt) {
|
|
20
21
|
url = url.slice(0, mt.index);
|
|
21
22
|
space = '';
|
|
22
23
|
text = `${url.slice(mt.index)}${space}${text}`;
|
|
23
24
|
}
|
|
24
25
|
new ExtLinkToken(url, space, text, config, accum);
|
|
25
|
-
return `\0${length}w\
|
|
26
|
+
return `\0${length}w\x7F`;
|
|
26
27
|
});
|
|
27
28
|
};
|
|
28
29
|
|
|
@@ -1,25 +1,35 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
const
|
|
3
|
+
const Parser = require('..'),
|
|
4
|
+
AstText = require('../lib/text'),
|
|
5
|
+
Token = require('../src');
|
|
4
6
|
|
|
5
7
|
/**
|
|
6
|
-
*
|
|
8
|
+
* 解析\<hr\>和状态开关
|
|
9
|
+
* @param {Token & {firstChild: AstText}} root 根节点
|
|
7
10
|
* @param {accum} accum
|
|
8
11
|
*/
|
|
9
|
-
const parseHrAndDoubleUnderscore = (firstChild, config = Parser.getConfig(), accum = []) => {
|
|
12
|
+
const parseHrAndDoubleUnderscore = ({firstChild: {data}, type, name}, config = Parser.getConfig(), accum = []) => {
|
|
10
13
|
const HrToken = require('../src/nowiki/hr'),
|
|
11
|
-
DoubleUnderscoreToken = require('../src/nowiki/doubleUnderscore')
|
|
12
|
-
|
|
13
|
-
|
|
14
|
+
DoubleUnderscoreToken = require('../src/nowiki/doubleUnderscore');
|
|
15
|
+
const {doubleUnderscore} = config;
|
|
16
|
+
if (type !== 'root' && (type !== 'ext-inner' || name !== 'poem')) {
|
|
17
|
+
data = `\0${data}`;
|
|
18
|
+
}
|
|
19
|
+
data = data.replaceAll(/^((?:\0\d+c\x7F)*)(-{4,})/gmu, (_, lead, m) => {
|
|
14
20
|
new HrToken(m.length, config, accum);
|
|
15
|
-
return `${lead}\0${accum.length - 1}r\
|
|
16
|
-
}).
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
21
|
+
return `${lead}\0${accum.length - 1}r\x7F`;
|
|
22
|
+
}).replaceAll(
|
|
23
|
+
new RegExp(`__(${doubleUnderscore.flat().join('|')})__`, 'giu'),
|
|
24
|
+
/** @param {string} p1 */ (m, p1) => {
|
|
25
|
+
if (doubleUnderscore[0].includes(p1.toLowerCase()) || doubleUnderscore[1].includes(p1)) {
|
|
26
|
+
new DoubleUnderscoreToken(p1, config, accum);
|
|
27
|
+
return `\0${accum.length - 1}u\x7F`;
|
|
28
|
+
}
|
|
29
|
+
return m;
|
|
30
|
+
},
|
|
31
|
+
);
|
|
32
|
+
return type === 'root' || type === 'ext-inner' && name === 'poem' ? data : data.slice(1);
|
|
23
33
|
};
|
|
24
34
|
|
|
25
35
|
Parser.parsers.parseHrAndDoubleUnderscore = __filename;
|
package/parser/html.js
CHANGED
|
@@ -1,15 +1,16 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
const
|
|
3
|
+
const Parser = require('..');
|
|
4
4
|
|
|
5
5
|
/**
|
|
6
|
-
*
|
|
6
|
+
* 解析HTML标签
|
|
7
|
+
* @param {string} wikitext wikitext
|
|
7
8
|
* @param {accum} accum
|
|
8
9
|
*/
|
|
9
|
-
const parseHtml = (
|
|
10
|
-
const regex = /^(\/?)([a-z][^\s/>]*)(\s[^>]*?)?(\/?>)([^<]*)$/
|
|
10
|
+
const parseHtml = (wikitext, config = Parser.getConfig(), accum = []) => {
|
|
11
|
+
const regex = /^(\/?)([a-z][^\s/>]*)(\s[^>]*?)?(\/?>)([^<]*)$/iu,
|
|
11
12
|
elements = config.html.flat(),
|
|
12
|
-
bits =
|
|
13
|
+
bits = wikitext.split('<');
|
|
13
14
|
let text = bits.shift();
|
|
14
15
|
for (const x of bits) {
|
|
15
16
|
const mt = regex.exec(x),
|
|
@@ -19,8 +20,8 @@ const parseHtml = (firstChild, config = Parser.getConfig(), accum = []) => {
|
|
|
19
20
|
text += `<${x}`;
|
|
20
21
|
continue;
|
|
21
22
|
}
|
|
23
|
+
const AttributeToken = require('../src/attribute');
|
|
22
24
|
const [, slash,, params = '', brace, rest] = mt,
|
|
23
|
-
AttributeToken = require('../src/attribute'),
|
|
24
25
|
attr = new AttributeToken(params, 'html-attr', name, config, accum),
|
|
25
26
|
itemprop = attr.getAttr('itemprop');
|
|
26
27
|
if (name === 'meta' && (itemprop === undefined || attr.getAttr('content') === undefined)
|
|
@@ -30,7 +31,7 @@ const parseHtml = (firstChild, config = Parser.getConfig(), accum = []) => {
|
|
|
30
31
|
accum.pop();
|
|
31
32
|
continue;
|
|
32
33
|
}
|
|
33
|
-
text += `\0${accum.length}x\
|
|
34
|
+
text += `\0${accum.length}x\x7F${rest}`;
|
|
34
35
|
const HtmlToken = require('../src/html');
|
|
35
36
|
new HtmlToken(t, attr, slash === '/', brace === '/>', config, accum);
|
|
36
37
|
}
|
package/parser/links.js
CHANGED
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
const
|
|
4
|
-
Token = require('../src');
|
|
3
|
+
const Parser = require('..');
|
|
5
4
|
|
|
6
5
|
/**
|
|
7
|
-
*
|
|
6
|
+
* 解析内部链接
|
|
7
|
+
* @param {string} wikitext wikitext
|
|
8
8
|
* @param {accum} accum
|
|
9
9
|
*/
|
|
10
|
-
const parseLinks = (
|
|
11
|
-
const parseQuotes = require('./quotes.js')
|
|
12
|
-
|
|
13
|
-
regexImg = /^([^\n<>[\]{}|]+)\|(.*)$/
|
|
14
|
-
regexExt = RegExp(`^\\s*(?:${config.protocol})`, '
|
|
15
|
-
bits =
|
|
10
|
+
const parseLinks = (wikitext, config = Parser.getConfig(), accum = []) => {
|
|
11
|
+
const parseQuotes = require('./quotes.js');
|
|
12
|
+
const regex = /^([^\n<>[\]{}|]+)(?:\|(.*?[^\]]))?\]\](.*)$/su,
|
|
13
|
+
regexImg = /^([^\n<>[\]{}|]+)\|(.*)$/su,
|
|
14
|
+
regexExt = new RegExp(`^\\s*(?:${config.protocol})`, 'iu'),
|
|
15
|
+
bits = wikitext.split('[[');
|
|
16
16
|
let s = bits.shift();
|
|
17
17
|
for (let i = 0; i < bits.length; i++) {
|
|
18
18
|
let mightBeImg, link, text, after;
|
|
@@ -20,7 +20,7 @@ const parseLinks = (firstChild, config = Parser.getConfig(), accum = []) => {
|
|
|
20
20
|
m = regex.exec(x);
|
|
21
21
|
if (m) {
|
|
22
22
|
[, link, text, after] = m;
|
|
23
|
-
if (after
|
|
23
|
+
if (after[0] === ']' && text?.includes('[')) {
|
|
24
24
|
text += ']';
|
|
25
25
|
after = after.slice(1);
|
|
26
26
|
}
|
|
@@ -31,7 +31,7 @@ const parseLinks = (firstChild, config = Parser.getConfig(), accum = []) => {
|
|
|
31
31
|
[, link, text] = m2;
|
|
32
32
|
}
|
|
33
33
|
}
|
|
34
|
-
if (link === undefined || regexExt.test(link) || /\0\d+[exhbru]\
|
|
34
|
+
if (link === undefined || regexExt.test(link) || /\0\d+[exhbru]\x7F/u.test(link)) {
|
|
35
35
|
s += `[[${x}`;
|
|
36
36
|
continue;
|
|
37
37
|
}
|
|
@@ -41,7 +41,7 @@ const parseLinks = (firstChild, config = Parser.getConfig(), accum = []) => {
|
|
|
41
41
|
page = decodeURIComponent(link);
|
|
42
42
|
} catch {}
|
|
43
43
|
}
|
|
44
|
-
const force = link.trim()
|
|
44
|
+
const force = link.trim()[0] === ':';
|
|
45
45
|
if (force && mightBeImg) {
|
|
46
46
|
s += `[[${x}`;
|
|
47
47
|
continue;
|
|
@@ -79,7 +79,7 @@ const parseLinks = (firstChild, config = Parser.getConfig(), accum = []) => {
|
|
|
79
79
|
}
|
|
80
80
|
}
|
|
81
81
|
text &&= parseQuotes(text, config, accum);
|
|
82
|
-
s += `\0${accum.length}l\
|
|
82
|
+
s += `\0${accum.length}l\x7F${after}`;
|
|
83
83
|
let LinkToken = require('../src/link');
|
|
84
84
|
if (!force) {
|
|
85
85
|
if (!interwiki && ns === 6) {
|
package/parser/list.js
CHANGED
|
@@ -1,26 +1,27 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
const
|
|
3
|
+
const Parser = require('..');
|
|
4
4
|
|
|
5
5
|
/**
|
|
6
|
-
*
|
|
6
|
+
* 解析列表
|
|
7
|
+
* @param {string} text wikitext
|
|
7
8
|
* @param {accum} accum
|
|
8
9
|
*/
|
|
9
10
|
const parseList = (text, config = Parser.getConfig(), accum = []) => {
|
|
10
|
-
const mt = /^((?:\0\d+c\
|
|
11
|
+
const mt = /^((?:\0\d+c\x7F)*)([;:*#]+)/u.exec(text);
|
|
11
12
|
if (!mt) {
|
|
12
13
|
return text;
|
|
13
14
|
}
|
|
14
|
-
const ListToken = require('../src/nowiki/list')
|
|
15
|
-
|
|
16
|
-
text = `${comment}\0${accum.length}d\
|
|
15
|
+
const ListToken = require('../src/nowiki/list');
|
|
16
|
+
const [total, comment, prefix] = mt;
|
|
17
|
+
text = `${comment}\0${accum.length}d\x7F${text.slice(total.length)}`;
|
|
17
18
|
new ListToken(prefix, config, accum);
|
|
18
19
|
let dt = prefix.split(';').length - 1;
|
|
19
20
|
if (!dt) {
|
|
20
21
|
return text;
|
|
21
22
|
}
|
|
22
23
|
const DdToken = require('../src/nowiki/dd');
|
|
23
|
-
let regex = /:+|-\{/
|
|
24
|
+
let regex = /:+|-\{/gu,
|
|
24
25
|
ex = regex.exec(text),
|
|
25
26
|
lc = 0;
|
|
26
27
|
while (ex && dt) {
|
|
@@ -28,16 +29,16 @@ const parseList = (text, config = Parser.getConfig(), accum = []) => {
|
|
|
28
29
|
if (syntax[0] === ':') {
|
|
29
30
|
if (syntax.length >= dt) {
|
|
30
31
|
new DdToken(':'.repeat(dt), config, accum);
|
|
31
|
-
return `${text.slice(0, index)}\0${accum.length - 1}d\
|
|
32
|
+
return `${text.slice(0, index)}\0${accum.length - 1}d\x7F${text.slice(index + dt)}`;
|
|
32
33
|
}
|
|
33
|
-
text = `${text.slice(0, index)}\0${accum.length}d\
|
|
34
|
+
text = `${text.slice(0, index)}\0${accum.length}d\x7F${text.slice(regex.lastIndex)}`;
|
|
34
35
|
dt -= syntax.length;
|
|
35
36
|
regex.lastIndex = index + 4 + String(accum.length).length;
|
|
36
37
|
new DdToken(syntax, config, accum);
|
|
37
38
|
} else if (syntax === '-{') {
|
|
38
39
|
if (!lc) {
|
|
39
40
|
const {lastIndex} = regex;
|
|
40
|
-
regex = /-\{|\}-/
|
|
41
|
+
regex = /-\{|\}-/gu;
|
|
41
42
|
regex.lastIndex = lastIndex;
|
|
42
43
|
}
|
|
43
44
|
lc++;
|
|
@@ -45,7 +46,7 @@ const parseList = (text, config = Parser.getConfig(), accum = []) => {
|
|
|
45
46
|
lc--;
|
|
46
47
|
if (!lc) {
|
|
47
48
|
const {lastIndex} = regex;
|
|
48
|
-
regex = /:+|-\{/
|
|
49
|
+
regex = /:+|-\{/gu;
|
|
49
50
|
regex.lastIndex = lastIndex;
|
|
50
51
|
}
|
|
51
52
|
}
|
package/parser/magicLinks.js
CHANGED
|
@@ -1,28 +1,29 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
3
|
const {extUrlChar} = require('../util/string'),
|
|
4
|
-
|
|
4
|
+
Parser = require('..');
|
|
5
5
|
|
|
6
6
|
/**
|
|
7
|
-
*
|
|
7
|
+
* 解析自由外链
|
|
8
|
+
* @param {string} wikitext wikitext
|
|
8
9
|
* @param {accum} accum
|
|
9
10
|
*/
|
|
10
|
-
const parseMagicLinks = (
|
|
11
|
-
const MagicLinkToken = require('../src/magicLink')
|
|
12
|
-
|
|
13
|
-
return
|
|
11
|
+
const parseMagicLinks = (wikitext, config = Parser.getConfig(), accum = []) => {
|
|
12
|
+
const MagicLinkToken = require('../src/magicLink');
|
|
13
|
+
const regex = new RegExp(`\\b(?:${config.protocol})(${extUrlChar})`, 'giu');
|
|
14
|
+
return wikitext.replaceAll(regex, /** @param {string} p1 */ (m, p1) => {
|
|
14
15
|
let trail = '',
|
|
15
16
|
url = m;
|
|
16
|
-
const m2 = /&(?:lt|gt|nbsp|#x0*(?:3[ce]|a0)|#0*(?:6[02]|160));/
|
|
17
|
+
const m2 = /&(?:lt|gt|nbsp|#x0*(?:3[ce]|a0)|#0*(?:6[02]|160));/iu.exec(url);
|
|
17
18
|
if (m2) {
|
|
18
19
|
trail = url.slice(m2.index);
|
|
19
20
|
url = url.slice(0, m2.index);
|
|
20
21
|
}
|
|
21
|
-
const sep = RegExp(`[,;.:!?${url.includes('(') ? '' : ')'}]
|
|
22
|
+
const sep = new RegExp(`[,;.:!?${url.includes('(') ? '' : ')'}]+$`, 'u'),
|
|
22
23
|
sepChars = sep.exec(url);
|
|
23
24
|
if (sepChars) {
|
|
24
25
|
let correction = 0;
|
|
25
|
-
if (sepChars[0]
|
|
26
|
+
if (sepChars[0][0] === ';' && /&(?:[a-z]+|#x[\da-f]+|#\d+)$/iu.test(url.slice(0, sepChars.index))) {
|
|
26
27
|
correction = 1;
|
|
27
28
|
}
|
|
28
29
|
trail = `${url.slice(sepChars.index + correction)}${trail}`;
|
|
@@ -32,7 +33,7 @@ const parseMagicLinks = (firstChild, config = Parser.getConfig(), accum = []) =>
|
|
|
32
33
|
return m;
|
|
33
34
|
}
|
|
34
35
|
new MagicLinkToken(url, false, config, accum);
|
|
35
|
-
return `\0${accum.length - 1}w\
|
|
36
|
+
return `\0${accum.length - 1}w\x7F${trail}`;
|
|
36
37
|
});
|
|
37
38
|
};
|
|
38
39
|
|
package/parser/quotes.js
CHANGED
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
const
|
|
3
|
+
const Parser = require('..');
|
|
4
4
|
|
|
5
5
|
/**
|
|
6
|
-
*
|
|
6
|
+
* 解析单引号
|
|
7
|
+
* @param {string} text wikitext
|
|
7
8
|
* @param {accum} accum
|
|
8
9
|
*/
|
|
9
10
|
const parseQuotes = (text, config = Parser.getConfig(), accum = []) => {
|
|
10
|
-
const arr = text.split(/('{2,})/),
|
|
11
|
+
const arr = text.split(/('{2,})/u),
|
|
11
12
|
{length} = arr;
|
|
12
13
|
if (length === 1) {
|
|
13
14
|
return text;
|
|
@@ -16,7 +17,7 @@ const parseQuotes = (text, config = Parser.getConfig(), accum = []) => {
|
|
|
16
17
|
nItalic = 0,
|
|
17
18
|
firstSingle, firstMulti, firstSpace;
|
|
18
19
|
for (let i = 1; i < length; i += 2) {
|
|
19
|
-
const len = arr[i]
|
|
20
|
+
const {length: len} = arr[i];
|
|
20
21
|
switch (len) {
|
|
21
22
|
case 2:
|
|
22
23
|
nItalic++;
|
|
@@ -54,7 +55,7 @@ const parseQuotes = (text, config = Parser.getConfig(), accum = []) => {
|
|
|
54
55
|
const QuoteToken = require('../src/nowiki/quote');
|
|
55
56
|
for (let i = 1; i < length; i += 2) {
|
|
56
57
|
new QuoteToken(arr[i].length, config, accum);
|
|
57
|
-
arr[i] = `\0${accum.length - 1}q\
|
|
58
|
+
arr[i] = `\0${accum.length - 1}q\x7F`;
|
|
58
59
|
}
|
|
59
60
|
return arr.join('');
|
|
60
61
|
};
|