wikiparser-node 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +47 -4
- package/config/default.json +45 -13
- package/config/llwiki.json +11 -11
- package/config/moegirl.json +44 -12
- package/index.js +11 -11
- package/lib/element.js +6 -6
- package/lib/ranges.js +1 -1
- package/lib/title.js +1 -1
- package/package.json +5 -2
- package/parser/brackets.js +11 -6
- package/parser/commentAndExt.js +9 -9
- package/parser/converter.js +5 -5
- package/parser/externalLinks.js +4 -4
- package/parser/hrAndDoubleUnderscore.js +4 -4
- package/parser/html.js +4 -4
- package/parser/links.js +7 -7
- package/parser/list.js +7 -7
- package/parser/magicLinks.js +5 -5
- package/parser/quotes.js +1 -1
- package/parser/table.js +8 -8
- package/src/attribute.js +5 -5
- package/src/converterFlags.js +6 -6
- package/src/extLink.js +1 -1
- package/src/gallery.js +16 -1
- package/src/heading.js +1 -1
- package/src/imageParameter.js +5 -5
- package/src/index.js +7 -7
- package/src/link/index.js +9 -9
- package/src/magicLink.js +3 -3
- package/src/nowiki/dd.js +1 -1
- package/src/syntax.js +3 -0
- package/src/table/index.js +2 -2
- package/src/table/td.js +5 -4
- package/src/table/tr.js +1 -1
- package/src/transclude.js +9 -7
- package/util/debug.js +1 -1
- package/util/string.js +5 -5
package/parser/commentAndExt.js
CHANGED
|
@@ -8,36 +8,36 @@ const /** @type {Parser} */ Parser = require('..');
|
|
|
8
8
|
*/
|
|
9
9
|
const parseCommentAndExt = (text, config = Parser.getConfig(), accum = [], includeOnly = false) => {
|
|
10
10
|
const onlyinclude = /<onlyinclude>(.*?)<\/onlyinclude>/gs;
|
|
11
|
-
if (includeOnly &&
|
|
11
|
+
if (includeOnly && text.search(onlyinclude) !== -1) { // `<onlyinclude>`拥有最高优先级
|
|
12
12
|
return text.replace(onlyinclude, /** @param {string} inner */ (_, inner) => {
|
|
13
|
-
const str = `\
|
|
13
|
+
const str = `\0${accum.length}e\x7f`,
|
|
14
14
|
OnlyincludeToken = require('../src/onlyinclude');
|
|
15
15
|
new OnlyincludeToken(inner, config, accum);
|
|
16
16
|
return str;
|
|
17
|
-
}).replace(/(?<=^|\
|
|
17
|
+
}).replace(/(?<=^|\0\d+e\x7f).*?(?=$|\0\d+e\x7f)/gs, substr => {
|
|
18
18
|
if (substr === '') {
|
|
19
19
|
return '';
|
|
20
20
|
}
|
|
21
21
|
const NoincludeToken = require('../src/nowiki/noinclude');
|
|
22
22
|
new NoincludeToken(substr, config, accum);
|
|
23
|
-
return `\
|
|
23
|
+
return `\0${accum.length - 1}c\x7f`;
|
|
24
24
|
});
|
|
25
25
|
}
|
|
26
26
|
const ext = config.ext.join('|'),
|
|
27
27
|
includeRegex = includeOnly ? 'includeonly' : '(?:no|only)include',
|
|
28
28
|
noincludeRegex = includeOnly ? 'noinclude' : 'includeonly',
|
|
29
|
-
regex =
|
|
29
|
+
regex = RegExp(
|
|
30
30
|
'<!--.*?(?:-->|$)|' // comment
|
|
31
|
-
+ `<${includeRegex}(?:\\s
|
|
32
|
-
+ `<(${ext})(\\s
|
|
33
|
-
+ `<(${noincludeRegex})(\\s
|
|
31
|
+
+ `<${includeRegex}(?:\\s[^>]*?)?>|</${includeRegex}\\s*>|` // <includeonly>
|
|
32
|
+
+ `<(${ext})(\\s[^>]*?)?(?:/>|>(.*?)</(\\1\\s*)>)|` // 扩展标签
|
|
33
|
+
+ `<(${noincludeRegex})(\\s[^>]*?)?(?:/>|>(.*?)(?:</(\\5\\s*)>|$))`, // <noinclude>
|
|
34
34
|
'gis',
|
|
35
35
|
);
|
|
36
36
|
return text.replace(
|
|
37
37
|
regex,
|
|
38
38
|
/** @type {function(...string): string} */
|
|
39
39
|
(substr, name, attr, inner, closing, include, includeAttr, includeInner, includeClosing) => {
|
|
40
|
-
const str = `\
|
|
40
|
+
const str = `\0${accum.length}${name ? 'e' : 'c'}\x7f`;
|
|
41
41
|
if (name) {
|
|
42
42
|
const ExtToken = require('../src/tagPair/ext');
|
|
43
43
|
new ExtToken(name, attr, inner, closing, config, accum);
|
package/parser/converter.js
CHANGED
|
@@ -8,8 +8,8 @@ const /** @type {Parser} */ Parser = require('..');
|
|
|
8
8
|
*/
|
|
9
9
|
const parseConverter = (firstChild, config = Parser.getConfig(), accum = []) => {
|
|
10
10
|
const ConverterToken = require('../src/converter'),
|
|
11
|
-
regex1 =
|
|
12
|
-
regex2 =
|
|
11
|
+
regex1 = /-\{/g,
|
|
12
|
+
regex2 = /-\{|\}-/g,
|
|
13
13
|
/** @type {RegExpExecArray[]} */ stack = [];
|
|
14
14
|
let regex = regex1,
|
|
15
15
|
mt = regex.exec(firstChild);
|
|
@@ -21,12 +21,12 @@ const parseConverter = (firstChild, config = Parser.getConfig(), accum = []) =>
|
|
|
21
21
|
str = firstChild.slice(top.index + 2, index),
|
|
22
22
|
i = str.indexOf('|'),
|
|
23
23
|
[flags, text] = i === -1 ? [[], str] : [str.slice(0, i).split(';'), str.slice(i + 1)],
|
|
24
|
-
temp = text.replace(/(
|
|
24
|
+
temp = text.replace(/(&[#a-z\d]+);/i, '$1\x01'),
|
|
25
25
|
variants = `(?:${config.variants.join('|')})`,
|
|
26
|
-
rules = temp.split(
|
|
26
|
+
rules = temp.split(RegExp(`;(?=\\s*(?:${variants}|[^;]*?=>\\s*${variants})\\s*:)`))
|
|
27
27
|
.map(rule => rule.replaceAll('\x01', ';'));
|
|
28
28
|
new ConverterToken(flags, rules, config, accum);
|
|
29
|
-
firstChild = `${firstChild.slice(0, top.index)}\
|
|
29
|
+
firstChild = `${firstChild.slice(0, top.index)}\0${length}v\x7f${firstChild.slice(index + 2)}`;
|
|
30
30
|
if (stack.length === 0) {
|
|
31
31
|
regex = regex1;
|
|
32
32
|
}
|
package/parser/externalLinks.js
CHANGED
|
@@ -9,20 +9,20 @@ const {extUrlChar} = require('../util/string'),
|
|
|
9
9
|
*/
|
|
10
10
|
const parseExternalLinks = (firstChild, config = Parser.getConfig(), accum = []) => {
|
|
11
11
|
const ExtLinkToken = require('../src/extLink'),
|
|
12
|
-
regex =
|
|
12
|
+
regex = RegExp(
|
|
13
13
|
`\\[((?:${config.protocol}|//)${extUrlChar})(\\p{Zs}*)([^\\]\x01-\x08\x0a-\x1f\ufffd]*)\\]`,
|
|
14
|
-
'
|
|
14
|
+
'giu',
|
|
15
15
|
);
|
|
16
16
|
return firstChild.replace(regex, /** @type {function(...string): string} */ (_, url, space, text) => {
|
|
17
17
|
const {length} = accum,
|
|
18
|
-
mt =
|
|
18
|
+
mt = /&[lg]t;/.exec(url);
|
|
19
19
|
if (mt) {
|
|
20
20
|
url = url.slice(0, mt.index);
|
|
21
21
|
space = '';
|
|
22
22
|
text = `${url.slice(mt.index)}${space}${text}`;
|
|
23
23
|
}
|
|
24
24
|
new ExtLinkToken(url, space, text, config, accum);
|
|
25
|
-
return `\
|
|
25
|
+
return `\0${length}w\x7f`;
|
|
26
26
|
});
|
|
27
27
|
};
|
|
28
28
|
|
|
@@ -10,13 +10,13 @@ const parseHrAndDoubleUnderscore = (firstChild, config = Parser.getConfig(), acc
|
|
|
10
10
|
const HrToken = require('../src/nowiki/hr'),
|
|
11
11
|
DoubleUnderscoreToken = require('../src/nowiki/doubleUnderscore'),
|
|
12
12
|
{doubleUnderscore} = config;
|
|
13
|
-
return firstChild.replace(
|
|
13
|
+
return firstChild.replace(/^((?:\0\d+c\x7f)*)-{4,}/gm, (_, lead, m) => {
|
|
14
14
|
new HrToken(m.length, config, accum);
|
|
15
|
-
return
|
|
16
|
-
}).replace(
|
|
15
|
+
return `${lead}\0${accum.length - 1}r\x7f`;
|
|
16
|
+
}).replace(RegExp(`__(${doubleUnderscore.flat().join('|')})__`, 'gi'), /** @param {string} p1 */(m, p1) => {
|
|
17
17
|
if (doubleUnderscore[0].includes(p1.toLowerCase()) || doubleUnderscore[1].includes(p1)) {
|
|
18
18
|
new DoubleUnderscoreToken(p1, config, accum);
|
|
19
|
-
return `\
|
|
19
|
+
return `\0${accum.length - 1}u\x7f`;
|
|
20
20
|
}
|
|
21
21
|
return m;
|
|
22
22
|
});
|
package/parser/html.js
CHANGED
|
@@ -7,19 +7,19 @@ const /** @type {Parser} */ Parser = require('..');
|
|
|
7
7
|
* @param {accum} accum
|
|
8
8
|
*/
|
|
9
9
|
const parseHtml = (firstChild, config = Parser.getConfig(), accum = []) => {
|
|
10
|
-
const regex = /^(\/?)([a-z][^\s/>]*)([^>]*?)(\/?>)([^<]*)$/i,
|
|
10
|
+
const regex = /^(\/?)([a-z][^\s/>]*)(\s[^>]*?)?(\/?>)([^<]*)$/i,
|
|
11
11
|
elements = config.html.flat(),
|
|
12
12
|
bits = firstChild.split('<');
|
|
13
13
|
let text = bits.shift();
|
|
14
14
|
for (const x of bits) {
|
|
15
|
-
const mt =
|
|
15
|
+
const mt = regex.exec(x),
|
|
16
16
|
t = mt?.[2],
|
|
17
17
|
name = t?.toLowerCase();
|
|
18
18
|
if (!mt || !elements.includes(name)) {
|
|
19
19
|
text += `<${x}`;
|
|
20
20
|
continue;
|
|
21
21
|
}
|
|
22
|
-
const [, slash,, params, brace, rest] = mt,
|
|
22
|
+
const [, slash,, params = '', brace, rest] = mt,
|
|
23
23
|
AttributeToken = require('../src/attribute'),
|
|
24
24
|
attr = new AttributeToken(params, 'html-attr', name, config, accum),
|
|
25
25
|
itemprop = attr.getAttr('itemprop');
|
|
@@ -30,7 +30,7 @@ const parseHtml = (firstChild, config = Parser.getConfig(), accum = []) => {
|
|
|
30
30
|
accum.pop();
|
|
31
31
|
continue;
|
|
32
32
|
}
|
|
33
|
-
text += `\
|
|
33
|
+
text += `\0${accum.length}x\x7f${rest}`;
|
|
34
34
|
const HtmlToken = require('../src/html');
|
|
35
35
|
new HtmlToken(t, attr, slash === '/', brace === '/>', config, accum);
|
|
36
36
|
}
|
package/parser/links.js
CHANGED
|
@@ -9,15 +9,15 @@ const /** @type {Parser} */ Parser = require('..'),
|
|
|
9
9
|
*/
|
|
10
10
|
const parseLinks = (firstChild, config = Parser.getConfig(), accum = []) => {
|
|
11
11
|
const parseQuotes = require('./quotes.js'),
|
|
12
|
-
regex = /^([^\n<>[\]{}|]+)(?:\|(
|
|
13
|
-
regexImg = /^([^\n<>[\]{}|]+)\|(.*)
|
|
14
|
-
regexExt =
|
|
12
|
+
regex = /^([^\n<>[\]{}|]+)(?:\|(.*?[^\]]))?\]\](.*)$/s,
|
|
13
|
+
regexImg = /^([^\n<>[\]{}|]+)\|(.*)$/s,
|
|
14
|
+
regexExt = RegExp(`^\\s*(?:${config.protocol})`, 'i'),
|
|
15
15
|
bits = firstChild.split('[[');
|
|
16
16
|
let s = bits.shift();
|
|
17
17
|
for (let i = 0; i < bits.length; i++) {
|
|
18
18
|
let mightBeImg, link, text, after;
|
|
19
19
|
const x = bits[i],
|
|
20
|
-
m =
|
|
20
|
+
m = regex.exec(x);
|
|
21
21
|
if (m) {
|
|
22
22
|
[, link, text, after] = m;
|
|
23
23
|
if (after.startsWith(']') && text?.includes('[')) {
|
|
@@ -25,13 +25,13 @@ const parseLinks = (firstChild, config = Parser.getConfig(), accum = []) => {
|
|
|
25
25
|
after = after.slice(1);
|
|
26
26
|
}
|
|
27
27
|
} else {
|
|
28
|
-
const m2 =
|
|
28
|
+
const m2 = regexImg.exec(x);
|
|
29
29
|
if (m2) {
|
|
30
30
|
mightBeImg = true;
|
|
31
31
|
[, link, text] = m2;
|
|
32
32
|
}
|
|
33
33
|
}
|
|
34
|
-
if (link === undefined || regexExt.test(link) || /\
|
|
34
|
+
if (link === undefined || regexExt.test(link) || /\0\d+[exhbru]\x7f/.test(link)) {
|
|
35
35
|
s += `[[${x}`;
|
|
36
36
|
continue;
|
|
37
37
|
}
|
|
@@ -79,7 +79,7 @@ const parseLinks = (firstChild, config = Parser.getConfig(), accum = []) => {
|
|
|
79
79
|
}
|
|
80
80
|
}
|
|
81
81
|
text &&= parseQuotes(text, config, accum);
|
|
82
|
-
s += `\
|
|
82
|
+
s += `\0${accum.length}l\x7f${after}`;
|
|
83
83
|
let LinkToken = require('../src/link');
|
|
84
84
|
if (!force) {
|
|
85
85
|
if (!interwiki && ns === 6) {
|
package/parser/list.js
CHANGED
|
@@ -7,20 +7,20 @@ const /** @type {Parser} */ Parser = require('..');
|
|
|
7
7
|
* @param {accum} accum
|
|
8
8
|
*/
|
|
9
9
|
const parseList = (text, config = Parser.getConfig(), accum = []) => {
|
|
10
|
-
const mt =
|
|
10
|
+
const mt = /^((?:\0\d+c\x7f)*)([;:*#]+)/.exec(text);
|
|
11
11
|
if (!mt) {
|
|
12
12
|
return text;
|
|
13
13
|
}
|
|
14
14
|
const ListToken = require('../src/nowiki/list'),
|
|
15
15
|
[total, comment, prefix] = mt;
|
|
16
|
-
text = `${comment}\
|
|
16
|
+
text = `${comment}\0${accum.length}d\x7f${text.slice(total.length)}`;
|
|
17
17
|
new ListToken(prefix, config, accum);
|
|
18
18
|
let dt = prefix.split(';').length - 1;
|
|
19
19
|
if (!dt) {
|
|
20
20
|
return text;
|
|
21
21
|
}
|
|
22
22
|
const DdToken = require('../src/nowiki/dd');
|
|
23
|
-
let regex =
|
|
23
|
+
let regex = /:+|-\{/g,
|
|
24
24
|
ex = regex.exec(text),
|
|
25
25
|
lc = 0;
|
|
26
26
|
while (ex && dt) {
|
|
@@ -28,16 +28,16 @@ const parseList = (text, config = Parser.getConfig(), accum = []) => {
|
|
|
28
28
|
if (syntax[0] === ':') {
|
|
29
29
|
if (syntax.length >= dt) {
|
|
30
30
|
new DdToken(':'.repeat(dt), config, accum);
|
|
31
|
-
return `${text.slice(0, index)}\
|
|
31
|
+
return `${text.slice(0, index)}\0${accum.length - 1}d\x7f${text.slice(index + dt)}`;
|
|
32
32
|
}
|
|
33
|
-
text = `${text.slice(0, index)}\
|
|
33
|
+
text = `${text.slice(0, index)}\0${accum.length}d\x7f${text.slice(regex.lastIndex)}`;
|
|
34
34
|
dt -= syntax.length;
|
|
35
35
|
regex.lastIndex = index + 4 + String(accum.length).length;
|
|
36
36
|
new DdToken(syntax, config, accum);
|
|
37
37
|
} else if (syntax === '-{') {
|
|
38
38
|
if (!lc) {
|
|
39
39
|
const {lastIndex} = regex;
|
|
40
|
-
regex =
|
|
40
|
+
regex = /-\{|\}-/g;
|
|
41
41
|
regex.lastIndex = lastIndex;
|
|
42
42
|
}
|
|
43
43
|
lc++;
|
|
@@ -45,7 +45,7 @@ const parseList = (text, config = Parser.getConfig(), accum = []) => {
|
|
|
45
45
|
lc--;
|
|
46
46
|
if (!lc) {
|
|
47
47
|
const {lastIndex} = regex;
|
|
48
|
-
regex =
|
|
48
|
+
regex = /:+|-\{/g;
|
|
49
49
|
regex.lastIndex = lastIndex;
|
|
50
50
|
}
|
|
51
51
|
}
|
package/parser/magicLinks.js
CHANGED
|
@@ -9,17 +9,17 @@ const {extUrlChar} = require('../util/string'),
|
|
|
9
9
|
*/
|
|
10
10
|
const parseMagicLinks = (firstChild, config = Parser.getConfig(), accum = []) => {
|
|
11
11
|
const MagicLinkToken = require('../src/magicLink'),
|
|
12
|
-
regex =
|
|
12
|
+
regex = RegExp(`\\b(?:${config.protocol})(${extUrlChar})`, 'giu');
|
|
13
13
|
return firstChild.replace(regex, /** @param {string} p1 */ (m, p1) => {
|
|
14
14
|
let trail = '',
|
|
15
15
|
url = m;
|
|
16
|
-
const m2 =
|
|
16
|
+
const m2 = /&(?:lt|gt|nbsp|#x0*(?:3[ce]|a0)|#0*(?:6[02]|160));/i.exec(url);
|
|
17
17
|
if (m2) {
|
|
18
18
|
trail = url.slice(m2.index);
|
|
19
19
|
url = url.slice(0, m2.index);
|
|
20
20
|
}
|
|
21
|
-
const sep =
|
|
22
|
-
sepChars =
|
|
21
|
+
const sep = RegExp(`[,;.:!?${url.includes('(') ? '' : ')'}]+$`),
|
|
22
|
+
sepChars = sep.exec(url);
|
|
23
23
|
if (sepChars) {
|
|
24
24
|
let correction = 0;
|
|
25
25
|
if (sepChars[0].startsWith(';') && /&(?:[a-z]+|#x[\da-f]+|#\d+)$/i.test(url.slice(0, sepChars.index))) {
|
|
@@ -32,7 +32,7 @@ const parseMagicLinks = (firstChild, config = Parser.getConfig(), accum = []) =>
|
|
|
32
32
|
return m;
|
|
33
33
|
}
|
|
34
34
|
new MagicLinkToken(url, false, config, accum);
|
|
35
|
-
return `\
|
|
35
|
+
return `\0${accum.length - 1}w\x7f${trail}`;
|
|
36
36
|
});
|
|
37
37
|
};
|
|
38
38
|
|
package/parser/quotes.js
CHANGED
|
@@ -54,7 +54,7 @@ const parseQuotes = (text, config = Parser.getConfig(), accum = []) => {
|
|
|
54
54
|
const QuoteToken = require('../src/nowiki/quote');
|
|
55
55
|
for (let i = 1; i < length; i += 2) {
|
|
56
56
|
new QuoteToken(arr[i].length, config, accum);
|
|
57
|
-
arr[i] = `\
|
|
57
|
+
arr[i] = `\0${accum.length - 1}q\x7f`;
|
|
58
58
|
}
|
|
59
59
|
return arr.join('');
|
|
60
60
|
};
|
package/parser/table.js
CHANGED
|
@@ -32,9 +32,9 @@ const parseTable = ({firstChild, type}, config = Parser.getConfig(), accum = [])
|
|
|
32
32
|
};
|
|
33
33
|
for (const outLine of lines) {
|
|
34
34
|
let top = stack.pop();
|
|
35
|
-
const [spaces] =
|
|
35
|
+
const [spaces] = /^(?:\s|\0\d+c\x7f)*/.exec(outLine);
|
|
36
36
|
const line = outLine.slice(spaces.length),
|
|
37
|
-
matchesStart =
|
|
37
|
+
matchesStart = /^(:*)((?:\s|\0\d+c\x7f)*)(\{\||\{\0\d+!\x7f|\0\d+\{\x7f)(.*)$/.exec(line);
|
|
38
38
|
if (matchesStart) {
|
|
39
39
|
while (top && top.type !== 'td') {
|
|
40
40
|
top = stack.pop();
|
|
@@ -43,7 +43,7 @@ const parseTable = ({firstChild, type}, config = Parser.getConfig(), accum = [])
|
|
|
43
43
|
if (indent) {
|
|
44
44
|
new DdToken(indent, config, accum);
|
|
45
45
|
}
|
|
46
|
-
push(`\n${spaces}${indent && `\
|
|
46
|
+
push(`\n${spaces}${indent && `\0${accum.length - 1}d\x7f`}${moreSpaces}\0${accum.length}b\x7f`, top);
|
|
47
47
|
const table = new TableToken(tableSyntax, attr, config, accum);
|
|
48
48
|
stack.push(...top ? [top] : [], table);
|
|
49
49
|
continue;
|
|
@@ -51,9 +51,9 @@ const parseTable = ({firstChild, type}, config = Parser.getConfig(), accum = [])
|
|
|
51
51
|
out += `\n${outLine}`;
|
|
52
52
|
continue;
|
|
53
53
|
}
|
|
54
|
-
const matches
|
|
55
|
-
/^(?:(
|
|
56
|
-
|
|
54
|
+
const matches
|
|
55
|
+
= /^(?:(\|\}|\0\d+!\x7f\}|\0\d+\}\x7f)|(\|-+|\0\d+!\x7f-+|\0\d+-\x7f-*)(?!-)|(!|(?:\||\0\d+!\x7f)\+?))(.*)$/
|
|
56
|
+
.exec(line);
|
|
57
57
|
if (!matches) {
|
|
58
58
|
push(`\n${outLine}`, top);
|
|
59
59
|
stack.push(...top ? [top] : []);
|
|
@@ -81,8 +81,8 @@ const parseTable = ({firstChild, type}, config = Parser.getConfig(), accum = [])
|
|
|
81
81
|
top = stack.pop();
|
|
82
82
|
}
|
|
83
83
|
const regex = cell === '!'
|
|
84
|
-
? /!!|(?:\||\
|
|
85
|
-
: /(?:\||\
|
|
84
|
+
? /!!|(?:\||\0\d+!\x7f){2}|\0\d+\+\x7f/g
|
|
85
|
+
: /(?:\||\0\d+!\x7f){2}|\0\d+\+\x7f/g;
|
|
86
86
|
let mt = regex.exec(attr),
|
|
87
87
|
lastIndex = 0,
|
|
88
88
|
lastSyntax = `\n${spaces}${cell}`;
|
package/src/attribute.js
CHANGED
|
@@ -63,11 +63,11 @@ class AttributeToken extends Token {
|
|
|
63
63
|
token = Parser.run(() => new Token(string, config).parseOnce(0, include).parseOnce());
|
|
64
64
|
string = token.firstChild;
|
|
65
65
|
}
|
|
66
|
-
string = removeComment(string).replace(/\
|
|
66
|
+
string = removeComment(string).replace(/\0\d+~\x7f/g, '=');
|
|
67
67
|
const build = /** @param {string|boolean} str */ str =>
|
|
68
68
|
typeof str === 'boolean' || !token ? str : token.buildFromStr(str).map(String).join('');
|
|
69
69
|
for (const [, key,, quoted, unquoted] of string
|
|
70
|
-
.matchAll(/([^\s/][^\s/=]*)(?:\s*=\s*(?:(["'])(.*?)(?:\2|$)|(\S*)))?/
|
|
70
|
+
.matchAll(/([^\s/][^\s/=]*)(?:\s*=\s*(?:(["'])(.*?)(?:\2|$)|(\S*)))?/gs)
|
|
71
71
|
) {
|
|
72
72
|
if (!this.setAttr(build(key), build(quoted ?? unquoted ?? true), true)) {
|
|
73
73
|
this.#sanitized = false;
|
|
@@ -112,12 +112,12 @@ class AttributeToken extends Token {
|
|
|
112
112
|
if (this.type !== 'ext-attr') {
|
|
113
113
|
for (let [key, text] of this.#attr) {
|
|
114
114
|
let built = false;
|
|
115
|
-
if (key.includes('\
|
|
115
|
+
if (key.includes('\0')) {
|
|
116
116
|
this.#attr.delete(key);
|
|
117
117
|
key = this.buildFromStr(key).map(String).join('');
|
|
118
118
|
built = true;
|
|
119
119
|
}
|
|
120
|
-
if (typeof text === 'string' && text.includes('\
|
|
120
|
+
if (typeof text === 'string' && text.includes('\0')) {
|
|
121
121
|
text = this.buildFromStr(text).map(String).join('');
|
|
122
122
|
built = true;
|
|
123
123
|
}
|
|
@@ -184,7 +184,7 @@ class AttributeToken extends Token {
|
|
|
184
184
|
parsedKey = this.type !== 'ext-attr' && !init
|
|
185
185
|
? Parser.run(() => new Token(key, config).parseOnce(0, include).parseOnce().firstChild)
|
|
186
186
|
: key;
|
|
187
|
-
if (!/^(?:[\w:]|\
|
|
187
|
+
if (!/^(?:[\w:]|\0\d+[t!~{}+-]\x7f)(?:[\w:.-]|\0\d+[t!~{}+-]\x7f)*$/.test(parsedKey)) {
|
|
188
188
|
if (init) {
|
|
189
189
|
return false;
|
|
190
190
|
}
|
package/src/converterFlags.js
CHANGED
|
@@ -99,11 +99,16 @@ class ConverterFlagsToken extends Token {
|
|
|
99
99
|
return new Set(this.#flags);
|
|
100
100
|
}
|
|
101
101
|
|
|
102
|
+
/** @complexity `n` */
|
|
103
|
+
getUnknownFlags() {
|
|
104
|
+
return this.#flags.filter(flag => /\{\{[^{}]+\}\}/.test(flag));
|
|
105
|
+
}
|
|
106
|
+
|
|
102
107
|
/** @complexity `n` */
|
|
103
108
|
getEffectiveFlags() {
|
|
104
109
|
const {variants} = this.getAttribute('config'),
|
|
105
110
|
variantFlags = this.#flags.filter(flag => variants.includes(flag)),
|
|
106
|
-
unknownFlags = this
|
|
111
|
+
unknownFlags = this.getUnknownFlags();
|
|
107
112
|
if (variantFlags.length) {
|
|
108
113
|
return new Set([...variantFlags, ...unknownFlags]);
|
|
109
114
|
}
|
|
@@ -138,11 +143,6 @@ class ConverterFlagsToken extends Token {
|
|
|
138
143
|
return flags;
|
|
139
144
|
}
|
|
140
145
|
|
|
141
|
-
/** @complexity `n` */
|
|
142
|
-
getUnknownFlags() {
|
|
143
|
-
return [...this.getFlags()].filter(flag => /{{.+}}/.test(flag));
|
|
144
|
-
}
|
|
145
|
-
|
|
146
146
|
/** @param {string} flag */
|
|
147
147
|
hasFlag(flag) {
|
|
148
148
|
if (typeof flag !== 'string') {
|
package/src/extLink.js
CHANGED
|
@@ -53,7 +53,7 @@ class ExtLinkToken extends Token {
|
|
|
53
53
|
#correct() {
|
|
54
54
|
if (!this.#space && this.childNodes.length > 1
|
|
55
55
|
// 都替换成`<`肯定不对,但无妨
|
|
56
|
-
&& /^[^[\]<>"{\
|
|
56
|
+
&& /^[^[\]<>"{\0-\x1f\x7f\p{Zs}\ufffd]/u.test(this.lastElementChild.text().replace(/&[lg]t;/, '<'))
|
|
57
57
|
) {
|
|
58
58
|
this.#space = ' ';
|
|
59
59
|
}
|
package/src/gallery.js
CHANGED
|
@@ -20,7 +20,7 @@ class GalleryToken extends Token {
|
|
|
20
20
|
constructor(inner, config = Parser.getConfig(), accum = []) {
|
|
21
21
|
super(undefined, config, true, accum, {String: ':', GalleryImageToken: ':'});
|
|
22
22
|
for (const line of inner?.split('\n') ?? []) {
|
|
23
|
-
const matches =
|
|
23
|
+
const matches = /^([^|]+)(?:\|(.*))?/.exec(line);
|
|
24
24
|
if (!matches) {
|
|
25
25
|
this.appendChild(line);
|
|
26
26
|
continue;
|
|
@@ -58,6 +58,21 @@ class GalleryToken extends Token {
|
|
|
58
58
|
text() {
|
|
59
59
|
return text(this.children, '\n');
|
|
60
60
|
}
|
|
61
|
+
|
|
62
|
+
/** @param {string} file */
|
|
63
|
+
insertImage(file, i = this.childNodes.length) {
|
|
64
|
+
let title;
|
|
65
|
+
try {
|
|
66
|
+
title = this.normalizeTitle(decodeURIComponent(file), 6, true);
|
|
67
|
+
} catch {
|
|
68
|
+
title = this.normalizeTitle(file, 6, true);
|
|
69
|
+
}
|
|
70
|
+
if (!title.valid) {
|
|
71
|
+
throw new SyntaxError(`非法的文件名:${file}`);
|
|
72
|
+
}
|
|
73
|
+
const token = Parser.run(() => new GalleryImageToken(file, undefined, title, this.getAttribute('config')));
|
|
74
|
+
return this.insertAt(token, i);
|
|
75
|
+
}
|
|
61
76
|
}
|
|
62
77
|
|
|
63
78
|
Parser.classes.GalleryToken = __filename;
|
package/src/heading.js
CHANGED
package/src/imageParameter.js
CHANGED
|
@@ -22,7 +22,7 @@ class ImageParameterToken extends Token {
|
|
|
22
22
|
* @returns {T extends 'link' ? string|Symbol : boolean}
|
|
23
23
|
*/
|
|
24
24
|
static #validate(key, value, config = Parser.getConfig()) {
|
|
25
|
-
value = value.replace(/\
|
|
25
|
+
value = value.replace(/\0\d+t\x7f/g, '').trim();
|
|
26
26
|
if (key === 'width') {
|
|
27
27
|
return /^\d*(?:x\d*)?$/.test(value);
|
|
28
28
|
} else if (['alt', 'class', 'manualthumb', 'frameless', 'framed', 'thumbnail'].includes(key)) {
|
|
@@ -31,11 +31,11 @@ class ImageParameterToken extends Token {
|
|
|
31
31
|
if (!value) {
|
|
32
32
|
return this.#noLink;
|
|
33
33
|
}
|
|
34
|
-
const regex =
|
|
34
|
+
const regex = RegExp(`(?:${config.protocol}|//)${extUrlChar}(?=\0\\d+t\x7f|$)`, 'iu');
|
|
35
35
|
if (regex.test(value)) {
|
|
36
36
|
return value;
|
|
37
37
|
}
|
|
38
|
-
if (/^\[\[
|
|
38
|
+
if (/^\[\[.+\]\]$/.test(value)) {
|
|
39
39
|
value = value.slice(2, -2);
|
|
40
40
|
}
|
|
41
41
|
if (value.includes('%')) {
|
|
@@ -106,11 +106,11 @@ class ImageParameterToken extends Token {
|
|
|
106
106
|
constructor(str, config = Parser.getConfig(), accum = []) {
|
|
107
107
|
const regexes = Object.entries(config.img).map(
|
|
108
108
|
/** @returns {[string, string, RegExp]} */
|
|
109
|
-
([syntax, param]) => [syntax, param,
|
|
109
|
+
([syntax, param]) => [syntax, param, RegExp(`^(\\s*)${syntax.replace('$1', '(.*)')}(\\s*)$`)],
|
|
110
110
|
),
|
|
111
111
|
param = regexes.find(([,, regex]) => regex.test(str));
|
|
112
112
|
if (param) {
|
|
113
|
-
const mt =
|
|
113
|
+
const mt = param[2].exec(str);
|
|
114
114
|
if (mt.length === 4 && !ImageParameterToken.#validate(param[1], mt[2], config)) {
|
|
115
115
|
// pass
|
|
116
116
|
} else {
|
package/src/index.js
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
/*
|
|
4
4
|
* PHP解析器的步骤:
|
|
5
5
|
* -1. 替换签名和`{{subst:}}`,参见Parser::preSaveTransform;这在revision中不可能保留,可以跳过
|
|
6
|
-
* 0. 移除特定字符`\
|
|
6
|
+
* 0. 移除特定字符`\0`和`\x7f`,参见Parser::parse
|
|
7
7
|
* 1. 注释/扩展标签('<'相关),参见Preprocessor_Hash::buildDomTreeArrayFromText和Sanitizer::decodeTagAttributes
|
|
8
8
|
* 2. 模板/模板变量/标题,注意rightmost法则,以及`-{`和`[[`可以破坏`{{`或`{{{`语法,
|
|
9
9
|
* 参见Preprocessor_Hash::buildDomTreeArrayFromText
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
*/
|
|
20
20
|
|
|
21
21
|
/*
|
|
22
|
-
* \
|
|
22
|
+
* \0\d+.\x7f标记Token:
|
|
23
23
|
* e: ExtToken
|
|
24
24
|
* c: CommentToken、NoIncludeToken和IncludeToken
|
|
25
25
|
* !: `{{!}}`专用
|
|
@@ -52,7 +52,7 @@ class Token extends AstElement {
|
|
|
52
52
|
type = 'root';
|
|
53
53
|
/** 解析阶段,参见顶部注释。只对plain Token有意义。 */ #stage = 0;
|
|
54
54
|
#config;
|
|
55
|
-
/** 这个数组起两个作用:1. 数组中的Token会在build时替换`/\
|
|
55
|
+
/** 这个数组起两个作用:1. 数组中的Token会在build时替换`/\0\d+.\x7f/`标记;2. 数组中的Token会依次执行parseOnce和build方法。 */
|
|
56
56
|
#accum;
|
|
57
57
|
/** @type {Record<string, Ranges>} */ #acceptable;
|
|
58
58
|
#protectedChildren = new Ranges();
|
|
@@ -66,7 +66,7 @@ class Token extends AstElement {
|
|
|
66
66
|
constructor(wikitext, config = Parser.getConfig(), halfParsed = false, accum = [], acceptable = null) {
|
|
67
67
|
super();
|
|
68
68
|
if (typeof wikitext === 'string') {
|
|
69
|
-
this.appendChild(halfParsed ? wikitext : wikitext.replace(/[\
|
|
69
|
+
this.appendChild(halfParsed ? wikitext : wikitext.replace(/[\0\x7f]/g, ''));
|
|
70
70
|
}
|
|
71
71
|
this.#config = config;
|
|
72
72
|
this.#accum = accum;
|
|
@@ -474,7 +474,7 @@ class Token extends AstElement {
|
|
|
474
474
|
if (!Parser.debugging && externalUse('buildFromStr')) {
|
|
475
475
|
this.debugOnly('buildFromStr');
|
|
476
476
|
}
|
|
477
|
-
return str.split(/[\
|
|
477
|
+
return str.split(/[\0\x7f]/).map((s, i) => {
|
|
478
478
|
if (i % 2 === 0) {
|
|
479
479
|
return s;
|
|
480
480
|
} else if (!isNaN(s.at(-1))) {
|
|
@@ -494,7 +494,7 @@ class Token extends AstElement {
|
|
|
494
494
|
}
|
|
495
495
|
this.#stage = MAX_STAGE;
|
|
496
496
|
const {childNodes: {length}, firstChild} = this;
|
|
497
|
-
if (length !== 1 || typeof firstChild !== 'string' || !firstChild.includes('\
|
|
497
|
+
if (length !== 1 || typeof firstChild !== 'string' || !firstChild.includes('\0')) {
|
|
498
498
|
return this;
|
|
499
499
|
}
|
|
500
500
|
this.replaceChildren(...this.buildFromStr(firstChild));
|
|
@@ -556,7 +556,7 @@ class Token extends AstElement {
|
|
|
556
556
|
if (table instanceof TableToken && table.type !== 'td') {
|
|
557
557
|
table.normalize();
|
|
558
558
|
const [, child] = table.childNodes;
|
|
559
|
-
if (typeof child === 'string' && child.includes('\
|
|
559
|
+
if (typeof child === 'string' && child.includes('\0')) {
|
|
560
560
|
table.removeAt(1);
|
|
561
561
|
const inner = new Token(child, this.#config, true, this.#accum);
|
|
562
562
|
table.insertAt(inner, 1);
|
package/src/link/index.js
CHANGED
|
@@ -53,10 +53,10 @@ class LinkToken extends Token {
|
|
|
53
53
|
}
|
|
54
54
|
|
|
55
55
|
afterBuild() {
|
|
56
|
-
if (this.name.includes('\
|
|
56
|
+
if (this.name.includes('\0')) {
|
|
57
57
|
this.setAttribute('name', text(this.buildFromStr(this.name)));
|
|
58
58
|
}
|
|
59
|
-
if (this.fragment.includes('\
|
|
59
|
+
if (this.fragment.includes('\0')) {
|
|
60
60
|
this.setAttribute('fragment', text(this.buildFromStr(this.fragment)));
|
|
61
61
|
}
|
|
62
62
|
const that = this;
|
|
@@ -154,7 +154,7 @@ class LinkToken extends Token {
|
|
|
154
154
|
|
|
155
155
|
/** @param {string} fragment */
|
|
156
156
|
#setFragment(fragment, page = true) {
|
|
157
|
-
fragment = String(fragment).replace(/[<>[]
|
|
157
|
+
fragment = String(fragment).replace(/[<>[]#|=!\]/g, p => encodeURIComponent(p));
|
|
158
158
|
const include = this.getAttribute('include'),
|
|
159
159
|
config = this.getAttribute('config'),
|
|
160
160
|
root = Parser.parse(`[[${page ? `:${this.name}` : ''}#${fragment}]]`, include, 6, config),
|
|
@@ -213,19 +213,19 @@ class LinkToken extends Token {
|
|
|
213
213
|
if (/[#%]/.test(linkText)) {
|
|
214
214
|
throw new Error('Pipe trick 不能用于带有"#"或"%"的场合!');
|
|
215
215
|
}
|
|
216
|
-
const m1 =
|
|
216
|
+
const m1 = /^:?(?:[ \w\x80-\xff-]+:)?([^(]+)\(.+\)$/.exec(linkText);
|
|
217
217
|
if (m1) {
|
|
218
|
-
this.setLinkText(m1[1]);
|
|
218
|
+
this.setLinkText(m1[1].trim());
|
|
219
219
|
return;
|
|
220
220
|
}
|
|
221
|
-
const m2 =
|
|
221
|
+
const m2 = /^:?(?:[ \w\x80-\xff-]+:)?([^(]+)(.+)$/.exec(linkText);
|
|
222
222
|
if (m2) {
|
|
223
|
-
this.setLinkText(m2[1]);
|
|
223
|
+
this.setLinkText(m2[1].trim());
|
|
224
224
|
return;
|
|
225
225
|
}
|
|
226
|
-
const m3 =
|
|
226
|
+
const m3 = /^:?(?:[ \w\x80-\xff-]+:)?(.+?)(?:(?<!\()\(.+\))?(?:, |,|، )./.exec(linkText);
|
|
227
227
|
if (m3) {
|
|
228
|
-
this.setLinkText(m3[1]);
|
|
228
|
+
this.setLinkText(m3[1].trim());
|
|
229
229
|
return;
|
|
230
230
|
}
|
|
231
231
|
this.setLinkText(linkText);
|
package/src/magicLink.js
CHANGED
|
@@ -12,13 +12,13 @@ class MagicLinkToken extends Token {
|
|
|
12
12
|
#protocolRegex;
|
|
13
13
|
|
|
14
14
|
get protocol() {
|
|
15
|
-
return this.
|
|
15
|
+
return this.#protocolRegex.exec(this.text())?.[0];
|
|
16
16
|
}
|
|
17
17
|
set protocol(value) {
|
|
18
18
|
if (typeof value !== 'string') {
|
|
19
19
|
this.typeError('protocol', 'String');
|
|
20
20
|
}
|
|
21
|
-
if (!
|
|
21
|
+
if (!RegExp(`${this.#protocolRegex.source}$`, 'i').test(value)) {
|
|
22
22
|
throw new RangeError(`非法的外链协议:${value}`);
|
|
23
23
|
}
|
|
24
24
|
this.replaceChildren(this.text().replace(this.#protocolRegex, value));
|
|
@@ -33,7 +33,7 @@ class MagicLinkToken extends Token {
|
|
|
33
33
|
if (doubleSlash) {
|
|
34
34
|
this.type = 'ext-link-url';
|
|
35
35
|
}
|
|
36
|
-
this.#protocolRegex =
|
|
36
|
+
this.#protocolRegex = RegExp(`^(?:${config.protocol}${doubleSlash ? '|//' : ''})`, 'i');
|
|
37
37
|
}
|
|
38
38
|
|
|
39
39
|
afterBuild() {
|
package/src/nowiki/dd.js
CHANGED
|
@@ -32,7 +32,7 @@ class DdToken extends NowikiToken {
|
|
|
32
32
|
/** @param {string} str */
|
|
33
33
|
setText(str) {
|
|
34
34
|
const src = this.type === 'dd' ? ':' : ';:*#';
|
|
35
|
-
if (
|
|
35
|
+
if (RegExp(`[^${src}]`).test(str)) {
|
|
36
36
|
throw new RangeError(`${this.constructor.name} 仅能包含${src.split('').map(c => `"${c}"`).join('、')}!`);
|
|
37
37
|
}
|
|
38
38
|
this.#update(str);
|
package/src/syntax.js
CHANGED
|
@@ -19,6 +19,9 @@ class SyntaxToken extends Token {
|
|
|
19
19
|
* @param {acceptable} acceptable
|
|
20
20
|
*/
|
|
21
21
|
constructor(wikitext, pattern, type = 'plain', config = Parser.getConfig(), accum = [], acceptable = null) {
|
|
22
|
+
if (pattern.global) {
|
|
23
|
+
throw new RangeError(`SyntaxToken 的语法正则不能含有 g 修饰符:${pattern}`);
|
|
24
|
+
}
|
|
22
25
|
super(wikitext, config, true, accum, acceptable);
|
|
23
26
|
this.type = type;
|
|
24
27
|
this.#pattern = pattern;
|