wikiparser-node 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +472 -34
- package/README.md +1 -1
- package/config/default.json +58 -30
- package/config/llwiki.json +22 -90
- package/config/moegirl.json +51 -13
- package/config/zhwiki.json +1269 -0
- package/index.js +114 -104
- package/lib/element.js +448 -440
- package/lib/node.js +335 -115
- package/lib/ranges.js +27 -18
- package/lib/text.js +146 -0
- package/lib/title.js +13 -5
- package/mixin/attributeParent.js +70 -24
- package/mixin/fixedToken.js +14 -6
- package/mixin/hidden.js +6 -4
- package/mixin/sol.js +27 -10
- package/package.json +9 -3
- package/parser/brackets.js +22 -17
- package/parser/commentAndExt.js +18 -16
- package/parser/converter.js +14 -13
- package/parser/externalLinks.js +12 -11
- package/parser/hrAndDoubleUnderscore.js +23 -14
- package/parser/html.js +10 -9
- package/parser/links.js +15 -14
- package/parser/list.js +12 -11
- package/parser/magicLinks.js +12 -11
- package/parser/quotes.js +6 -5
- package/parser/selector.js +175 -0
- package/parser/table.js +25 -18
- package/printed/example.json +120 -0
- package/src/arg.js +56 -32
- package/src/atom/hidden.js +5 -2
- package/src/atom/index.js +17 -9
- package/src/attribute.js +182 -100
- package/src/converter.js +68 -41
- package/src/converterFlags.js +67 -45
- package/src/converterRule.js +117 -65
- package/src/extLink.js +66 -18
- package/src/gallery.js +42 -15
- package/src/heading.js +34 -15
- package/src/html.js +97 -35
- package/src/imageParameter.js +83 -54
- package/src/index.js +299 -178
- package/src/link/category.js +20 -52
- package/src/link/file.js +59 -28
- package/src/link/galleryImage.js +21 -7
- package/src/link/index.js +146 -60
- package/src/magicLink.js +34 -12
- package/src/nowiki/comment.js +22 -10
- package/src/nowiki/dd.js +37 -22
- package/src/nowiki/doubleUnderscore.js +16 -7
- package/src/nowiki/hr.js +11 -7
- package/src/nowiki/index.js +16 -9
- package/src/nowiki/list.js +2 -2
- package/src/nowiki/noinclude.js +8 -4
- package/src/nowiki/quote.js +11 -7
- package/src/onlyinclude.js +19 -7
- package/src/parameter.js +65 -38
- package/src/syntax.js +26 -20
- package/src/table/index.js +260 -165
- package/src/table/td.js +98 -52
- package/src/table/tr.js +102 -58
- package/src/tagPair/ext.js +27 -19
- package/src/tagPair/include.js +16 -11
- package/src/tagPair/index.js +64 -29
- package/src/transclude.js +170 -93
- package/test/api.js +83 -0
- package/test/real.js +133 -0
- package/test/test.js +28 -0
- package/test/util.js +80 -0
- package/tool/index.js +41 -31
- package/typings/api.d.ts +13 -0
- package/typings/array.d.ts +28 -0
- package/typings/event.d.ts +24 -0
- package/typings/index.d.ts +46 -4
- package/typings/node.d.ts +15 -9
- package/typings/parser.d.ts +7 -0
- package/typings/tool.d.ts +3 -2
- package/util/debug.js +21 -18
- package/util/string.js +40 -27
- package/typings/element.d.ts +0 -28
package/parser/commentAndExt.js
CHANGED
|
@@ -1,26 +1,28 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
const
|
|
3
|
+
const Parser = require('..');
|
|
4
4
|
|
|
5
5
|
/**
|
|
6
|
-
*
|
|
6
|
+
* 解析HTML注释和扩展标签
|
|
7
|
+
* @param {string} text wikitext
|
|
7
8
|
* @param {accum} accum
|
|
9
|
+
* @param {boolean} includeOnly 是否嵌入
|
|
8
10
|
*/
|
|
9
11
|
const parseCommentAndExt = (text, config = Parser.getConfig(), accum = [], includeOnly = false) => {
|
|
10
|
-
const onlyinclude = /<onlyinclude>(.*?)<\/onlyinclude>/
|
|
11
|
-
if (includeOnly &&
|
|
12
|
-
return text.
|
|
13
|
-
const str = `\
|
|
14
|
-
|
|
12
|
+
const onlyinclude = /<onlyinclude>(.*?)<\/onlyinclude>/gsu;
|
|
13
|
+
if (includeOnly && text.search(onlyinclude) !== -1) { // `<onlyinclude>`拥有最高优先级
|
|
14
|
+
return text.replaceAll(onlyinclude, /** @param {string} inner */ (_, inner) => {
|
|
15
|
+
const str = `\0${accum.length}e\x7F`;
|
|
16
|
+
const OnlyincludeToken = require('../src/onlyinclude');
|
|
15
17
|
new OnlyincludeToken(inner, config, accum);
|
|
16
18
|
return str;
|
|
17
|
-
}).
|
|
19
|
+
}).replaceAll(/(?<=^|\0\d+e\x7F).*?(?=$|\0\d+e\x7F)/gsu, substr => {
|
|
18
20
|
if (substr === '') {
|
|
19
21
|
return '';
|
|
20
22
|
}
|
|
21
23
|
const NoincludeToken = require('../src/nowiki/noinclude');
|
|
22
24
|
new NoincludeToken(substr, config, accum);
|
|
23
|
-
return `\
|
|
25
|
+
return `\0${accum.length - 1}c\x7F`;
|
|
24
26
|
});
|
|
25
27
|
}
|
|
26
28
|
const ext = config.ext.join('|'),
|
|
@@ -28,22 +30,22 @@ const parseCommentAndExt = (text, config = Parser.getConfig(), accum = [], inclu
|
|
|
28
30
|
noincludeRegex = includeOnly ? 'noinclude' : 'includeonly',
|
|
29
31
|
regex = new RegExp(
|
|
30
32
|
'<!--.*?(?:-->|$)|' // comment
|
|
31
|
-
+ `<${includeRegex}(?:\\s
|
|
32
|
-
+ `<(${ext})(\\s
|
|
33
|
-
+ `<(${noincludeRegex})(\\s
|
|
34
|
-
'
|
|
33
|
+
+ `<${includeRegex}(?:\\s[^>]*?)?>|</${includeRegex}\\s*>|` // <includeonly>
|
|
34
|
+
+ `<(${ext})(\\s[^>]*?)?(?:/>|>(.*?)</(\\1\\s*)>)|` // 扩展标签
|
|
35
|
+
+ `<(${noincludeRegex})(\\s[^>]*?)?(?:/>|>(.*?)(?:</(\\5\\s*)>|$))`, // <noinclude>
|
|
36
|
+
'gisu',
|
|
35
37
|
);
|
|
36
38
|
return text.replace(
|
|
37
39
|
regex,
|
|
38
40
|
/** @type {function(...string): string} */
|
|
39
41
|
(substr, name, attr, inner, closing, include, includeAttr, includeInner, includeClosing) => {
|
|
40
|
-
const str = `\
|
|
42
|
+
const str = `\0${accum.length}${name ? 'e' : 'c'}\x7F`;
|
|
41
43
|
if (name) {
|
|
42
44
|
const ExtToken = require('../src/tagPair/ext');
|
|
43
45
|
new ExtToken(name, attr, inner, closing, config, accum);
|
|
44
46
|
} else if (substr.startsWith('<!--')) {
|
|
45
|
-
const CommentToken = require('../src/nowiki/comment')
|
|
46
|
-
|
|
47
|
+
const CommentToken = require('../src/nowiki/comment');
|
|
48
|
+
const closed = substr.endsWith('-->');
|
|
47
49
|
new CommentToken(substr.slice(4, closed ? -3 : undefined), closed, config, accum);
|
|
48
50
|
} else if (include) {
|
|
49
51
|
const IncludeToken = require('../src/tagPair/include');
|
package/parser/converter.js
CHANGED
|
@@ -1,32 +1,33 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
const
|
|
3
|
+
const Parser = require('..');
|
|
4
4
|
|
|
5
5
|
/**
|
|
6
|
-
*
|
|
6
|
+
* 解析语言变体转换
|
|
7
|
+
* @param {string} wikitext wikitext
|
|
7
8
|
* @param {accum} accum
|
|
8
9
|
*/
|
|
9
|
-
const parseConverter = (
|
|
10
|
-
const ConverterToken = require('../src/converter')
|
|
11
|
-
|
|
12
|
-
regex2 =
|
|
10
|
+
const parseConverter = (wikitext, config = Parser.getConfig(), accum = []) => {
|
|
11
|
+
const ConverterToken = require('../src/converter');
|
|
12
|
+
const regex1 = /-\{/gu,
|
|
13
|
+
regex2 = /-\{|\}-/gu,
|
|
13
14
|
/** @type {RegExpExecArray[]} */ stack = [];
|
|
14
15
|
let regex = regex1,
|
|
15
|
-
mt = regex.exec(
|
|
16
|
+
mt = regex.exec(wikitext);
|
|
16
17
|
while (mt) {
|
|
17
18
|
const {0: syntax, index} = mt;
|
|
18
19
|
if (syntax === '}-') {
|
|
19
20
|
const top = stack.pop(),
|
|
20
21
|
{length} = accum,
|
|
21
|
-
str =
|
|
22
|
+
str = wikitext.slice(top.index + 2, index),
|
|
22
23
|
i = str.indexOf('|'),
|
|
23
24
|
[flags, text] = i === -1 ? [[], str] : [str.slice(0, i).split(';'), str.slice(i + 1)],
|
|
24
|
-
temp = text.replace(/(
|
|
25
|
+
temp = text.replace(/(&[#a-z\d]+);/iu, '$1\x01'),
|
|
25
26
|
variants = `(?:${config.variants.join('|')})`,
|
|
26
|
-
rules = temp.split(new RegExp(`;(?=\\s*(?:${variants}|[^;]*?=>\\s*${variants})\\s*:)
|
|
27
|
+
rules = temp.split(new RegExp(`;(?=\\s*(?:${variants}|[^;]*?=>\\s*${variants})\\s*:)`, 'u'))
|
|
27
28
|
.map(rule => rule.replaceAll('\x01', ';'));
|
|
28
29
|
new ConverterToken(flags, rules, config, accum);
|
|
29
|
-
|
|
30
|
+
wikitext = `${wikitext.slice(0, top.index)}\0${length}v\x7F${wikitext.slice(index + 2)}`;
|
|
30
31
|
if (stack.length === 0) {
|
|
31
32
|
regex = regex1;
|
|
32
33
|
}
|
|
@@ -35,9 +36,9 @@ const parseConverter = (firstChild, config = Parser.getConfig(), accum = []) =>
|
|
|
35
36
|
stack.push(mt);
|
|
36
37
|
regex = regex2;
|
|
37
38
|
}
|
|
38
|
-
mt = regex.exec(
|
|
39
|
+
mt = regex.exec(wikitext);
|
|
39
40
|
}
|
|
40
|
-
return
|
|
41
|
+
return wikitext;
|
|
41
42
|
};
|
|
42
43
|
|
|
43
44
|
Parser.parsers.parseConverter = __filename;
|
package/parser/externalLinks.js
CHANGED
|
@@ -1,28 +1,29 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
3
|
const {extUrlChar} = require('../util/string'),
|
|
4
|
-
|
|
4
|
+
Parser = require('..');
|
|
5
5
|
|
|
6
6
|
/**
|
|
7
|
-
*
|
|
7
|
+
* 解析外部链接
|
|
8
|
+
* @param {string} wikitext wikitext
|
|
8
9
|
* @param {accum} accum
|
|
9
10
|
*/
|
|
10
|
-
const parseExternalLinks = (
|
|
11
|
-
const ExtLinkToken = require('../src/extLink')
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
return
|
|
11
|
+
const parseExternalLinks = (wikitext, config = Parser.getConfig(), accum = []) => {
|
|
12
|
+
const ExtLinkToken = require('../src/extLink');
|
|
13
|
+
const regex = new RegExp(
|
|
14
|
+
`\\[((?:${config.protocol}|//)${extUrlChar})(\\p{Zs}*)([^\\]\x01-\x08\x0A-\x1F\uFFFD]*)\\]`,
|
|
15
|
+
'giu',
|
|
16
|
+
);
|
|
17
|
+
return wikitext.replace(regex, /** @type {function(...string): string} */ (_, url, space, text) => {
|
|
17
18
|
const {length} = accum,
|
|
18
|
-
mt =
|
|
19
|
+
mt = /&[lg]t;/u.exec(url);
|
|
19
20
|
if (mt) {
|
|
20
21
|
url = url.slice(0, mt.index);
|
|
21
22
|
space = '';
|
|
22
23
|
text = `${url.slice(mt.index)}${space}${text}`;
|
|
23
24
|
}
|
|
24
25
|
new ExtLinkToken(url, space, text, config, accum);
|
|
25
|
-
return `\
|
|
26
|
+
return `\0${length}w\x7F`;
|
|
26
27
|
});
|
|
27
28
|
};
|
|
28
29
|
|
|
@@ -1,25 +1,34 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
const
|
|
3
|
+
const Parser = require('..'),
|
|
4
|
+
AstText = require('../lib/text');
|
|
4
5
|
|
|
5
6
|
/**
|
|
6
|
-
*
|
|
7
|
+
* 解析\<hr\>和状态开关
|
|
8
|
+
* @param {{firstChild: AstText, type: string}} root 根节点
|
|
7
9
|
* @param {accum} accum
|
|
8
10
|
*/
|
|
9
|
-
const parseHrAndDoubleUnderscore = (firstChild, config = Parser.getConfig(), accum = []) => {
|
|
11
|
+
const parseHrAndDoubleUnderscore = ({firstChild: {data}, type}, config = Parser.getConfig(), accum = []) => {
|
|
10
12
|
const HrToken = require('../src/nowiki/hr'),
|
|
11
|
-
DoubleUnderscoreToken = require('../src/nowiki/doubleUnderscore')
|
|
12
|
-
|
|
13
|
-
|
|
13
|
+
DoubleUnderscoreToken = require('../src/nowiki/doubleUnderscore');
|
|
14
|
+
const {doubleUnderscore} = config;
|
|
15
|
+
if (type !== 'root') {
|
|
16
|
+
data = `\0${data}`;
|
|
17
|
+
}
|
|
18
|
+
data = data.replaceAll(/^((?:\0\d+c\x7F)*)(-{4,})/gmu, (_, lead, m) => {
|
|
14
19
|
new HrToken(m.length, config, accum);
|
|
15
|
-
return
|
|
16
|
-
}).
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
20
|
+
return `${lead}\0${accum.length - 1}r\x7F`;
|
|
21
|
+
}).replaceAll(
|
|
22
|
+
new RegExp(`__(${doubleUnderscore.flat().join('|')})__`, 'giu'),
|
|
23
|
+
/** @param {string} p1 */ (m, p1) => {
|
|
24
|
+
if (doubleUnderscore[0].includes(p1.toLowerCase()) || doubleUnderscore[1].includes(p1)) {
|
|
25
|
+
new DoubleUnderscoreToken(p1, config, accum);
|
|
26
|
+
return `\0${accum.length - 1}u\x7F`;
|
|
27
|
+
}
|
|
28
|
+
return m;
|
|
29
|
+
},
|
|
30
|
+
);
|
|
31
|
+
return type === 'root' ? data : data.slice(1);
|
|
23
32
|
};
|
|
24
33
|
|
|
25
34
|
Parser.parsers.parseHrAndDoubleUnderscore = __filename;
|
package/parser/html.js
CHANGED
|
@@ -1,26 +1,27 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
const
|
|
3
|
+
const Parser = require('..');
|
|
4
4
|
|
|
5
5
|
/**
|
|
6
|
-
*
|
|
6
|
+
* 解析HTML标签
|
|
7
|
+
* @param {string} wikitext wikitext
|
|
7
8
|
* @param {accum} accum
|
|
8
9
|
*/
|
|
9
|
-
const parseHtml = (
|
|
10
|
-
const regex = /^(\/?)([a-z][^\s/>]*)([^>]*?)(\/?>)([^<]*)$/
|
|
10
|
+
const parseHtml = (wikitext, config = Parser.getConfig(), accum = []) => {
|
|
11
|
+
const regex = /^(\/?)([a-z][^\s/>]*)(\s[^>]*?)?(\/?>)([^<]*)$/iu,
|
|
11
12
|
elements = config.html.flat(),
|
|
12
|
-
bits =
|
|
13
|
+
bits = wikitext.split('<');
|
|
13
14
|
let text = bits.shift();
|
|
14
15
|
for (const x of bits) {
|
|
15
|
-
const mt =
|
|
16
|
+
const mt = regex.exec(x),
|
|
16
17
|
t = mt?.[2],
|
|
17
18
|
name = t?.toLowerCase();
|
|
18
19
|
if (!mt || !elements.includes(name)) {
|
|
19
20
|
text += `<${x}`;
|
|
20
21
|
continue;
|
|
21
22
|
}
|
|
22
|
-
const
|
|
23
|
-
|
|
23
|
+
const AttributeToken = require('../src/attribute');
|
|
24
|
+
const [, slash,, params = '', brace, rest] = mt,
|
|
24
25
|
attr = new AttributeToken(params, 'html-attr', name, config, accum),
|
|
25
26
|
itemprop = attr.getAttr('itemprop');
|
|
26
27
|
if (name === 'meta' && (itemprop === undefined || attr.getAttr('content') === undefined)
|
|
@@ -30,7 +31,7 @@ const parseHtml = (firstChild, config = Parser.getConfig(), accum = []) => {
|
|
|
30
31
|
accum.pop();
|
|
31
32
|
continue;
|
|
32
33
|
}
|
|
33
|
-
text += `\
|
|
34
|
+
text += `\0${accum.length}x\x7F${rest}`;
|
|
34
35
|
const HtmlToken = require('../src/html');
|
|
35
36
|
new HtmlToken(t, attr, slash === '/', brace === '/>', config, accum);
|
|
36
37
|
}
|
package/parser/links.js
CHANGED
|
@@ -1,37 +1,38 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
const
|
|
3
|
+
const Parser = require('..'),
|
|
4
4
|
Token = require('../src');
|
|
5
5
|
|
|
6
6
|
/**
|
|
7
|
-
*
|
|
7
|
+
* 解析内部链接
|
|
8
|
+
* @param {string} wikitext wikitext
|
|
8
9
|
* @param {accum} accum
|
|
9
10
|
*/
|
|
10
|
-
const parseLinks = (
|
|
11
|
-
const parseQuotes = require('./quotes.js')
|
|
12
|
-
|
|
13
|
-
regexImg = /^([^\n<>[\]{}|]+)\|(.*)
|
|
14
|
-
regexExt = new RegExp(`^\\s*(?:${config.protocol})`, '
|
|
15
|
-
bits =
|
|
11
|
+
const parseLinks = (wikitext, config = Parser.getConfig(), accum = []) => {
|
|
12
|
+
const parseQuotes = require('./quotes.js');
|
|
13
|
+
const regex = /^([^\n<>[\]{}|]+)(?:\|(.*?[^\]]))?\]\](.*)$/su,
|
|
14
|
+
regexImg = /^([^\n<>[\]{}|]+)\|(.*)$/su,
|
|
15
|
+
regexExt = new RegExp(`^\\s*(?:${config.protocol})`, 'iu'),
|
|
16
|
+
bits = wikitext.split('[[');
|
|
16
17
|
let s = bits.shift();
|
|
17
18
|
for (let i = 0; i < bits.length; i++) {
|
|
18
19
|
let mightBeImg, link, text, after;
|
|
19
20
|
const x = bits[i],
|
|
20
|
-
m =
|
|
21
|
+
m = regex.exec(x);
|
|
21
22
|
if (m) {
|
|
22
23
|
[, link, text, after] = m;
|
|
23
|
-
if (after
|
|
24
|
+
if (after[0] === ']' && text?.includes('[')) {
|
|
24
25
|
text += ']';
|
|
25
26
|
after = after.slice(1);
|
|
26
27
|
}
|
|
27
28
|
} else {
|
|
28
|
-
const m2 =
|
|
29
|
+
const m2 = regexImg.exec(x);
|
|
29
30
|
if (m2) {
|
|
30
31
|
mightBeImg = true;
|
|
31
32
|
[, link, text] = m2;
|
|
32
33
|
}
|
|
33
34
|
}
|
|
34
|
-
if (link === undefined || regexExt.test(link) || /\
|
|
35
|
+
if (link === undefined || regexExt.test(link) || /\0\d+[exhbru]\x7F/u.test(link)) {
|
|
35
36
|
s += `[[${x}`;
|
|
36
37
|
continue;
|
|
37
38
|
}
|
|
@@ -41,7 +42,7 @@ const parseLinks = (firstChild, config = Parser.getConfig(), accum = []) => {
|
|
|
41
42
|
page = decodeURIComponent(link);
|
|
42
43
|
} catch {}
|
|
43
44
|
}
|
|
44
|
-
const force = link.trim()
|
|
45
|
+
const force = link.trim()[0] === ':';
|
|
45
46
|
if (force && mightBeImg) {
|
|
46
47
|
s += `[[${x}`;
|
|
47
48
|
continue;
|
|
@@ -79,7 +80,7 @@ const parseLinks = (firstChild, config = Parser.getConfig(), accum = []) => {
|
|
|
79
80
|
}
|
|
80
81
|
}
|
|
81
82
|
text &&= parseQuotes(text, config, accum);
|
|
82
|
-
s += `\
|
|
83
|
+
s += `\0${accum.length}l\x7F${after}`;
|
|
83
84
|
let LinkToken = require('../src/link');
|
|
84
85
|
if (!force) {
|
|
85
86
|
if (!interwiki && ns === 6) {
|
package/parser/list.js
CHANGED
|
@@ -1,26 +1,27 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
const
|
|
3
|
+
const Parser = require('..');
|
|
4
4
|
|
|
5
5
|
/**
|
|
6
|
-
*
|
|
6
|
+
* 解析列表
|
|
7
|
+
* @param {string} text wikitext
|
|
7
8
|
* @param {accum} accum
|
|
8
9
|
*/
|
|
9
10
|
const parseList = (text, config = Parser.getConfig(), accum = []) => {
|
|
10
|
-
const mt =
|
|
11
|
+
const mt = /^((?:\0\d+c\x7F)*)([;:*#]+)/u.exec(text);
|
|
11
12
|
if (!mt) {
|
|
12
13
|
return text;
|
|
13
14
|
}
|
|
14
|
-
const ListToken = require('../src/nowiki/list')
|
|
15
|
-
|
|
16
|
-
text = `${comment}\
|
|
15
|
+
const ListToken = require('../src/nowiki/list');
|
|
16
|
+
const [total, comment, prefix] = mt;
|
|
17
|
+
text = `${comment}\0${accum.length}d\x7F${text.slice(total.length)}`;
|
|
17
18
|
new ListToken(prefix, config, accum);
|
|
18
19
|
let dt = prefix.split(';').length - 1;
|
|
19
20
|
if (!dt) {
|
|
20
21
|
return text;
|
|
21
22
|
}
|
|
22
23
|
const DdToken = require('../src/nowiki/dd');
|
|
23
|
-
let regex =
|
|
24
|
+
let regex = /:+|-\{/gu,
|
|
24
25
|
ex = regex.exec(text),
|
|
25
26
|
lc = 0;
|
|
26
27
|
while (ex && dt) {
|
|
@@ -28,16 +29,16 @@ const parseList = (text, config = Parser.getConfig(), accum = []) => {
|
|
|
28
29
|
if (syntax[0] === ':') {
|
|
29
30
|
if (syntax.length >= dt) {
|
|
30
31
|
new DdToken(':'.repeat(dt), config, accum);
|
|
31
|
-
return `${text.slice(0, index)}\
|
|
32
|
+
return `${text.slice(0, index)}\0${accum.length - 1}d\x7F${text.slice(index + dt)}`;
|
|
32
33
|
}
|
|
33
|
-
text = `${text.slice(0, index)}\
|
|
34
|
+
text = `${text.slice(0, index)}\0${accum.length}d\x7F${text.slice(regex.lastIndex)}`;
|
|
34
35
|
dt -= syntax.length;
|
|
35
36
|
regex.lastIndex = index + 4 + String(accum.length).length;
|
|
36
37
|
new DdToken(syntax, config, accum);
|
|
37
38
|
} else if (syntax === '-{') {
|
|
38
39
|
if (!lc) {
|
|
39
40
|
const {lastIndex} = regex;
|
|
40
|
-
regex =
|
|
41
|
+
regex = /-\{|\}-/gu;
|
|
41
42
|
regex.lastIndex = lastIndex;
|
|
42
43
|
}
|
|
43
44
|
lc++;
|
|
@@ -45,7 +46,7 @@ const parseList = (text, config = Parser.getConfig(), accum = []) => {
|
|
|
45
46
|
lc--;
|
|
46
47
|
if (!lc) {
|
|
47
48
|
const {lastIndex} = regex;
|
|
48
|
-
regex =
|
|
49
|
+
regex = /:+|-\{/gu;
|
|
49
50
|
regex.lastIndex = lastIndex;
|
|
50
51
|
}
|
|
51
52
|
}
|
package/parser/magicLinks.js
CHANGED
|
@@ -1,28 +1,29 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
3
|
const {extUrlChar} = require('../util/string'),
|
|
4
|
-
|
|
4
|
+
Parser = require('..');
|
|
5
5
|
|
|
6
6
|
/**
|
|
7
|
-
*
|
|
7
|
+
* 解析自由外链
|
|
8
|
+
* @param {string} wikitext wikitext
|
|
8
9
|
* @param {accum} accum
|
|
9
10
|
*/
|
|
10
|
-
const parseMagicLinks = (
|
|
11
|
-
const MagicLinkToken = require('../src/magicLink')
|
|
12
|
-
|
|
13
|
-
return
|
|
11
|
+
const parseMagicLinks = (wikitext, config = Parser.getConfig(), accum = []) => {
|
|
12
|
+
const MagicLinkToken = require('../src/magicLink');
|
|
13
|
+
const regex = new RegExp(`\\b(?:${config.protocol})(${extUrlChar})`, 'giu');
|
|
14
|
+
return wikitext.replace(regex, /** @param {string} p1 */ (m, p1) => {
|
|
14
15
|
let trail = '',
|
|
15
16
|
url = m;
|
|
16
|
-
const m2 =
|
|
17
|
+
const m2 = /&(?:lt|gt|nbsp|#x0*(?:3[ce]|a0)|#0*(?:6[02]|160));/iu.exec(url);
|
|
17
18
|
if (m2) {
|
|
18
19
|
trail = url.slice(m2.index);
|
|
19
20
|
url = url.slice(0, m2.index);
|
|
20
21
|
}
|
|
21
|
-
const sep = new RegExp(`[,;.:!?${url.includes('(') ? '' : ')'}]
|
|
22
|
-
sepChars =
|
|
22
|
+
const sep = new RegExp(`[,;.:!?${url.includes('(') ? '' : ')'}]+$`, 'u'),
|
|
23
|
+
sepChars = sep.exec(url);
|
|
23
24
|
if (sepChars) {
|
|
24
25
|
let correction = 0;
|
|
25
|
-
if (sepChars[0]
|
|
26
|
+
if (sepChars[0][0] === ';' && /&(?:[a-z]+|#x[\da-f]+|#\d+)$/iu.test(url.slice(0, sepChars.index))) {
|
|
26
27
|
correction = 1;
|
|
27
28
|
}
|
|
28
29
|
trail = `${url.slice(sepChars.index + correction)}${trail}`;
|
|
@@ -32,7 +33,7 @@ const parseMagicLinks = (firstChild, config = Parser.getConfig(), accum = []) =>
|
|
|
32
33
|
return m;
|
|
33
34
|
}
|
|
34
35
|
new MagicLinkToken(url, false, config, accum);
|
|
35
|
-
return `\
|
|
36
|
+
return `\0${accum.length - 1}w\x7F${trail}`;
|
|
36
37
|
});
|
|
37
38
|
};
|
|
38
39
|
|
package/parser/quotes.js
CHANGED
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
const
|
|
3
|
+
const Parser = require('..');
|
|
4
4
|
|
|
5
5
|
/**
|
|
6
|
-
*
|
|
6
|
+
* 解析单引号
|
|
7
|
+
* @param {string} text wikitext
|
|
7
8
|
* @param {accum} accum
|
|
8
9
|
*/
|
|
9
10
|
const parseQuotes = (text, config = Parser.getConfig(), accum = []) => {
|
|
10
|
-
const arr = text.split(/('{2,})/),
|
|
11
|
+
const arr = text.split(/('{2,})/u),
|
|
11
12
|
{length} = arr;
|
|
12
13
|
if (length === 1) {
|
|
13
14
|
return text;
|
|
@@ -16,7 +17,7 @@ const parseQuotes = (text, config = Parser.getConfig(), accum = []) => {
|
|
|
16
17
|
nItalic = 0,
|
|
17
18
|
firstSingle, firstMulti, firstSpace;
|
|
18
19
|
for (let i = 1; i < length; i += 2) {
|
|
19
|
-
const len = arr[i]
|
|
20
|
+
const {length: len} = arr[i];
|
|
20
21
|
switch (len) {
|
|
21
22
|
case 2:
|
|
22
23
|
nItalic++;
|
|
@@ -54,7 +55,7 @@ const parseQuotes = (text, config = Parser.getConfig(), accum = []) => {
|
|
|
54
55
|
const QuoteToken = require('../src/nowiki/quote');
|
|
55
56
|
for (let i = 1; i < length; i += 2) {
|
|
56
57
|
new QuoteToken(arr[i].length, config, accum);
|
|
57
|
-
arr[i] = `\
|
|
58
|
+
arr[i] = `\0${accum.length - 1}q\x7F`;
|
|
58
59
|
}
|
|
59
60
|
return arr.join('');
|
|
60
61
|
};
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const Parser = require('..');
|
|
4
|
+
|
|
5
|
+
const /** @type {pseudo[]} */ simplePseudos = [
|
|
6
|
+
'root',
|
|
7
|
+
'first-child',
|
|
8
|
+
'first-of-type',
|
|
9
|
+
'last-child',
|
|
10
|
+
'last-of-type',
|
|
11
|
+
'only-child',
|
|
12
|
+
'only-of-type',
|
|
13
|
+
'empty',
|
|
14
|
+
'parent',
|
|
15
|
+
'header',
|
|
16
|
+
'hidden',
|
|
17
|
+
'visible',
|
|
18
|
+
'only-whitespace',
|
|
19
|
+
'local-link',
|
|
20
|
+
'read-only',
|
|
21
|
+
'read-write',
|
|
22
|
+
'invalid',
|
|
23
|
+
'required',
|
|
24
|
+
'optional',
|
|
25
|
+
],
|
|
26
|
+
/** @type {pseudo[]} */ complexPseudos = [
|
|
27
|
+
'is',
|
|
28
|
+
'not',
|
|
29
|
+
'nth-child',
|
|
30
|
+
'nth-of-type',
|
|
31
|
+
'nth-last-child',
|
|
32
|
+
'nth-last-of-type',
|
|
33
|
+
'contains',
|
|
34
|
+
'has',
|
|
35
|
+
'lang',
|
|
36
|
+
],
|
|
37
|
+
specialChars = [
|
|
38
|
+
['[', '['],
|
|
39
|
+
[']', ']'],
|
|
40
|
+
['(', '('],
|
|
41
|
+
[')', ')'],
|
|
42
|
+
['"', '"'],
|
|
43
|
+
["'", '''],
|
|
44
|
+
[':', ':'],
|
|
45
|
+
['\\', '\'],
|
|
46
|
+
['&', '&'],
|
|
47
|
+
],
|
|
48
|
+
pseudoRegex = new RegExp(`:(${complexPseudos.join('|')})$`, 'u'),
|
|
49
|
+
regularRegex = /[[(,>+~]|\s+/u, // eslint-disable-line regexp/no-super-linear-move
|
|
50
|
+
attributeRegex = /^\s*(\w+)\s*(?:([~|^$*!]?=)\s*("[^"]*"|'[^']*'|[^\s[\]]+)(?:\s+(i))?\s*)?\]/u,
|
|
51
|
+
functionRegex = /^(\s*"[^"]*"\s*|\s*'[^']*'\s*|[^()]*)\)/u;
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* 清理转义符号
|
|
55
|
+
* @param {string} selector
|
|
56
|
+
*/
|
|
57
|
+
const sanitize = selector => {
|
|
58
|
+
for (const [c, escaped] of specialChars) {
|
|
59
|
+
selector = selector.replaceAll(`\\${c}`, escaped);
|
|
60
|
+
}
|
|
61
|
+
return selector;
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* 还原转义符号
|
|
66
|
+
* @param {string|undefined} selector
|
|
67
|
+
*/
|
|
68
|
+
const desanitize = selector => {
|
|
69
|
+
if (selector === undefined) {
|
|
70
|
+
return undefined;
|
|
71
|
+
}
|
|
72
|
+
for (const [c, escaped] of specialChars) {
|
|
73
|
+
selector = selector.replaceAll(escaped, c);
|
|
74
|
+
}
|
|
75
|
+
return selector.trim();
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* 去除首尾的引号
|
|
80
|
+
* @param {string|undefined} val 属性值或伪选择器函数的参数
|
|
81
|
+
*/
|
|
82
|
+
const deQuote = val => {
|
|
83
|
+
if (val === undefined) {
|
|
84
|
+
return undefined;
|
|
85
|
+
}
|
|
86
|
+
const quotes = /^(["']).*\1$/u.exec(val)?.[1];
|
|
87
|
+
return quotes ? val.slice(1, -1) : val;
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* 解析简单伪选择器
|
|
92
|
+
* @param {SelectorArray} step 当前顶部
|
|
93
|
+
* @param {string} str 不含属性和复杂伪选择器的语句
|
|
94
|
+
* @throws `SyntaxError` 非法的选择器
|
|
95
|
+
*/
|
|
96
|
+
const pushSimple = (step, str) => {
|
|
97
|
+
const pieces = str.trim().split(':'),
|
|
98
|
+
// eslint-disable-next-line unicorn/explicit-length-check
|
|
99
|
+
i = pieces.slice(1).findIndex(pseudo => simplePseudos.includes(pseudo)) + 1 || pieces.length;
|
|
100
|
+
if (pieces.slice(i).some(pseudo => !simplePseudos.includes(pseudo))) {
|
|
101
|
+
throw new SyntaxError(`非法的选择器!\n${str}\n可能需要将':'转义为'\\:'。`);
|
|
102
|
+
}
|
|
103
|
+
step.push(desanitize(pieces.slice(0, i).join(':')), ...pieces.slice(i).map(piece => `:${piece}`));
|
|
104
|
+
};
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* 解析选择器
|
|
108
|
+
* @param {string} selector
|
|
109
|
+
* @throws `SyntaxError` 非法的选择器
|
|
110
|
+
*/
|
|
111
|
+
const parseSelector = selector => {
|
|
112
|
+
selector = selector.trim();
|
|
113
|
+
const /** @type {SelectorArray[][]} */ stack = [[[]]];
|
|
114
|
+
let sanitized = sanitize(selector),
|
|
115
|
+
regex = regularRegex,
|
|
116
|
+
mt = regex.exec(sanitized),
|
|
117
|
+
[condition] = stack,
|
|
118
|
+
[step] = condition;
|
|
119
|
+
while (mt) {
|
|
120
|
+
let {0: syntax, index} = mt;
|
|
121
|
+
if (syntax.trim() === '') {
|
|
122
|
+
index += syntax.length;
|
|
123
|
+
const char = sanitized[index];
|
|
124
|
+
syntax = [',', '>', '+', '~'].includes(char) ? char : '';
|
|
125
|
+
}
|
|
126
|
+
if (syntax === ',') { // 情形1:并列
|
|
127
|
+
pushSimple(step, sanitized.slice(0, index));
|
|
128
|
+
condition = [[]];
|
|
129
|
+
[step] = condition;
|
|
130
|
+
stack.push(condition);
|
|
131
|
+
} else if (['>', '+', '~', ''].includes(syntax)) { // 情形2:关系
|
|
132
|
+
pushSimple(step, sanitized.slice(0, index));
|
|
133
|
+
if (!step.some(Boolean)) {
|
|
134
|
+
throw new SyntaxError(`非法的选择器!\n${selector}\n可能需要通用选择器'*'。`);
|
|
135
|
+
}
|
|
136
|
+
step.relation = syntax;
|
|
137
|
+
step = [];
|
|
138
|
+
condition.push(step);
|
|
139
|
+
} else if (syntax === '[') { // 情形3:属性开启
|
|
140
|
+
pushSimple(step, sanitized.slice(0, index));
|
|
141
|
+
regex = attributeRegex;
|
|
142
|
+
} else if (syntax.at(-1) === ']') { // 情形4:属性闭合
|
|
143
|
+
mt[3] = desanitize(deQuote(mt[3]));
|
|
144
|
+
step.push(mt.slice(1));
|
|
145
|
+
regex = regularRegex;
|
|
146
|
+
} else if (syntax === '(') { // 情形5:伪选择器开启
|
|
147
|
+
const pseudoExec = pseudoRegex.exec(sanitized.slice(0, index));
|
|
148
|
+
if (!pseudoExec) {
|
|
149
|
+
throw new SyntaxError(`非法的选择器!\n${desanitize(sanitized)}\n请检查伪选择器是否存在。`);
|
|
150
|
+
}
|
|
151
|
+
pushSimple(step, sanitized.slice(0, pseudoExec.index));
|
|
152
|
+
step.push(pseudoExec[1]); // 临时存放复杂伪选择器
|
|
153
|
+
regex = functionRegex;
|
|
154
|
+
} else { // 情形6:伪选择器闭合
|
|
155
|
+
const /** @type {pseudo} */ pseudo = step.pop();
|
|
156
|
+
mt.push(pseudo);
|
|
157
|
+
mt[1] = deQuote(mt[1]);
|
|
158
|
+
step.push(mt.slice(1));
|
|
159
|
+
regex = regularRegex;
|
|
160
|
+
}
|
|
161
|
+
sanitized = sanitized.slice(index + syntax.length);
|
|
162
|
+
if ([',', '>', '+', '~'].includes(syntax)) {
|
|
163
|
+
sanitized = sanitized.trim();
|
|
164
|
+
}
|
|
165
|
+
mt = regex.exec(sanitized);
|
|
166
|
+
}
|
|
167
|
+
if (regex !== regularRegex) {
|
|
168
|
+
throw new SyntaxError(`非法的选择器!\n${selector}\n检测到未闭合的'${regex === attributeRegex ? '[' : '('}'`);
|
|
169
|
+
}
|
|
170
|
+
pushSimple(step, sanitized);
|
|
171
|
+
return stack;
|
|
172
|
+
};
|
|
173
|
+
|
|
174
|
+
Parser.parsers.parseSelector = __filename;
|
|
175
|
+
module.exports = parseSelector;
|