wikiparser-node 0.2.3 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +98 -8
- package/config/default.json +45 -13
- package/config/llwiki.json +11 -11
- package/config/moegirl.json +44 -12
- package/index.js +11 -11
- package/lib/element.js +8 -8
- package/lib/node.js +2 -2
- package/lib/ranges.js +1 -1
- package/lib/title.js +7 -3
- package/mixin/attributeParent.js +2 -2
- package/mixin/fixedToken.js +1 -1
- package/mixin/hidden.js +1 -1
- package/mixin/sol.js +2 -2
- package/package.json +6 -3
- package/parser/brackets.js +11 -6
- package/parser/commentAndExt.js +9 -9
- package/parser/converter.js +5 -5
- package/parser/externalLinks.js +4 -4
- package/parser/hrAndDoubleUnderscore.js +4 -4
- package/parser/html.js +4 -4
- package/parser/links.js +9 -9
- package/parser/list.js +7 -7
- package/parser/magicLinks.js +5 -5
- package/parser/quotes.js +3 -3
- package/parser/table.js +8 -8
- package/src/attribute.js +5 -5
- package/src/converterFlags.js +6 -6
- package/src/converterRule.js +1 -1
- package/src/extLink.js +2 -1
- package/src/gallery.js +59 -11
- package/src/heading.js +1 -1
- package/src/imageParameter.js +5 -5
- package/src/index.js +7 -7
- package/src/link/category.js +1 -1
- package/src/link/file.js +1 -1
- package/src/link/galleryImage.js +47 -0
- package/src/link/index.js +15 -14
- package/src/magicLink.js +14 -4
- package/src/nowiki/dd.js +1 -1
- package/src/syntax.js +3 -0
- package/src/table/index.js +7 -7
- package/src/table/td.js +18 -15
- package/src/table/tr.js +4 -4
- package/src/tagPair/ext.js +11 -3
- package/src/transclude.js +9 -7
- package/util/debug.js +1 -1
- package/util/string.js +7 -7
- package/errors/2022-12-07T10:07:09.577Z +0 -1
- package/errors/2022-12-07T10:07:09.577Z.err +0 -11
- package/errors/2022-12-07T10:07:09.577Z.json +0 -5
- package/errors/2022-12-07T10:22:31.325Z +0 -1
- package/errors/2022-12-07T10:22:31.325Z.err +0 -11
- package/errors/2022-12-07T10:22:31.325Z.json +0 -5
package/lib/title.js
CHANGED
|
@@ -5,6 +5,8 @@ const {ucfirst} = require('../util/string'),
|
|
|
5
5
|
|
|
6
6
|
class Title {
|
|
7
7
|
title = '';
|
|
8
|
+
main = '';
|
|
9
|
+
prefix = '';
|
|
8
10
|
ns = 0;
|
|
9
11
|
interwiki = '';
|
|
10
12
|
fragment = '';
|
|
@@ -19,7 +21,7 @@ class Title {
|
|
|
19
21
|
namespace = '';
|
|
20
22
|
title = title.slice(1).trim();
|
|
21
23
|
}
|
|
22
|
-
const iw = Parser.isInterwiki(title, config);
|
|
24
|
+
const iw = defaultNs ? null : Parser.isInterwiki(title, config);
|
|
23
25
|
if (iw) {
|
|
24
26
|
this.interwiki = iw[1].toLowerCase();
|
|
25
27
|
title = title.slice(iw[0].length);
|
|
@@ -48,8 +50,10 @@ class Title {
|
|
|
48
50
|
this.fragment ||= fragment;
|
|
49
51
|
title = title.slice(0, i).trim();
|
|
50
52
|
}
|
|
51
|
-
this.
|
|
52
|
-
this.
|
|
53
|
+
this.main = ucfirst(title);
|
|
54
|
+
this.prefix = `${namespace}${namespace && ':'}`;
|
|
55
|
+
this.title = `${iw ? `${this.interwiki}:` : ''}${this.prefix}${this.main}`;
|
|
56
|
+
this.valid = Boolean(this.main || this.fragment) && !/\0\d+[eh!+-]\x7f|[<>[\]{}|]/.test(this.title);
|
|
53
57
|
}
|
|
54
58
|
}
|
|
55
59
|
|
package/mixin/attributeParent.js
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
3
|
const /** @type {Parser} */ Parser = require('..'),
|
|
4
|
-
AttributeToken = require('../src/attribute');
|
|
4
|
+
AttributeToken = require('../src/attribute');
|
|
5
5
|
|
|
6
6
|
/**
|
|
7
7
|
* @template T
|
|
8
8
|
* @param {T} constructor
|
|
9
9
|
* @returns {T}
|
|
10
10
|
*/
|
|
11
|
-
const attributeParent = (
|
|
11
|
+
const attributeParent = (ct, i = 0) => class extends ct {
|
|
12
12
|
/**
|
|
13
13
|
* @this {{children: AttributeToken[]}}
|
|
14
14
|
* @param {string} key
|
package/mixin/fixedToken.js
CHANGED
package/mixin/hidden.js
CHANGED
package/mixin/sol.js
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
3
|
const /** @type {Parser} */ Parser = require('..'),
|
|
4
|
-
Token = require('../src');
|
|
4
|
+
Token = require('../src');
|
|
5
5
|
|
|
6
6
|
/**
|
|
7
7
|
* @template T
|
|
8
8
|
* @param {T} constructor
|
|
9
9
|
* @returns {T}
|
|
10
10
|
*/
|
|
11
|
-
const sol =
|
|
11
|
+
const sol = ct => class extends ct {
|
|
12
12
|
/** @this {Token} */
|
|
13
13
|
prependNewLine() {
|
|
14
14
|
const {previousVisibleSibling = '', parentNode} = this;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "wikiparser-node",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.1",
|
|
4
4
|
"description": "A Node.js parser for MediaWiki markup with AST",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"mediawiki",
|
|
@@ -21,8 +21,11 @@
|
|
|
21
21
|
"test": "echo 'Error: no test specified' && exit 1"
|
|
22
22
|
},
|
|
23
23
|
"devDependencies": {
|
|
24
|
-
"
|
|
25
|
-
"
|
|
24
|
+
"@types/node": "^17.0.23",
|
|
25
|
+
"eslint": "^8.30.0",
|
|
26
|
+
"eslint-plugin-promise": "^6.1.1",
|
|
27
|
+
"eslint-plugin-n": "^15.6.0",
|
|
28
|
+
"eslint-plugin-regexp": "^1.11.0"
|
|
26
29
|
},
|
|
27
30
|
"engines": {
|
|
28
31
|
"node": "^18.4.0"
|
package/parser/brackets.js
CHANGED
|
@@ -8,15 +8,20 @@ const {removeComment} = require('../util/string'),
|
|
|
8
8
|
* @param {accum} accum
|
|
9
9
|
*/
|
|
10
10
|
const parseBrackets = (text, config = Parser.getConfig(), accum = []) => {
|
|
11
|
-
const source = '(
|
|
11
|
+
const source = '^(\0\\d+c\x7f)*={1,6}|\\[\\[|\\{{2,}|-\\{(?!\\{)',
|
|
12
12
|
/** @type {BracketExecArray[]} */ stack = [],
|
|
13
13
|
closes = {'=': '\n', '{': '}{2,}|\\|', '-': '}-', '[': ']]'},
|
|
14
14
|
/** @type {Record<string, string>} */ marks = {'!': '!', '!!': '+', '(!': '{', '!)': '}', '!-': '-', '=': '~'};
|
|
15
|
-
let regex =
|
|
15
|
+
let regex = RegExp(source, 'gm'),
|
|
16
16
|
/** @type {BracketExecArray} */ mt = regex.exec(text),
|
|
17
17
|
moreBraces = text.includes('}}'),
|
|
18
18
|
lastIndex;
|
|
19
19
|
while (mt || lastIndex <= text.length && stack.at(-1)?.[0]?.[0] === '=') {
|
|
20
|
+
if (mt?.[1]) {
|
|
21
|
+
const [, {length}] = mt;
|
|
22
|
+
mt[0] = mt[0].slice(length);
|
|
23
|
+
mt.index += length;
|
|
24
|
+
}
|
|
20
25
|
const {0: syntax, index: curIndex} = mt ?? {0: '\n', index: text.length},
|
|
21
26
|
/** @type {BracketExecArray} */ top = stack.pop() ?? {},
|
|
22
27
|
{0: open, index, parts} = top,
|
|
@@ -27,9 +32,9 @@ const parseBrackets = (text, config = Parser.getConfig(), accum = []) => {
|
|
|
27
32
|
lastIndex = curIndex + 1;
|
|
28
33
|
const {pos, findEqual} = stack.at(-1) ?? {};
|
|
29
34
|
if (!pos || findEqual || removeComment(text.slice(pos, index)) !== '') {
|
|
30
|
-
const rmt =
|
|
35
|
+
const rmt = /^(={1,6})(.+)\1((?:\s|\0\d+c\x7f)*)$/.exec(text.slice(index, curIndex));
|
|
31
36
|
if (rmt) {
|
|
32
|
-
text = `${text.slice(0, index)}\
|
|
37
|
+
text = `${text.slice(0, index)}\0${accum.length}h\x7f${text.slice(curIndex)}`;
|
|
33
38
|
lastIndex = index + 4 + String(accum.length).length;
|
|
34
39
|
const HeadingToken = require('../src/heading');
|
|
35
40
|
new HeadingToken(rmt[1].length, rmt.slice(2), config, accum);
|
|
@@ -71,7 +76,7 @@ const parseBrackets = (text, config = Parser.getConfig(), accum = []) => {
|
|
|
71
76
|
}
|
|
72
77
|
if (!skip) {
|
|
73
78
|
/* 标记{{!}}结束 */
|
|
74
|
-
text = `${text.slice(0, index + rest)}\
|
|
79
|
+
text = `${text.slice(0, index + rest)}\0${length}${ch}\x7f${text.slice(lastIndex)}`;
|
|
75
80
|
lastIndex = index + rest + 3 + String(length).length;
|
|
76
81
|
if (rest > 1) {
|
|
77
82
|
stack.push({0: open.slice(0, rest), index, pos: index + rest, parts: [[]]});
|
|
@@ -93,7 +98,7 @@ const parseBrackets = (text, config = Parser.getConfig(), accum = []) => {
|
|
|
93
98
|
stack.pop();
|
|
94
99
|
curTop = stack.at(-1);
|
|
95
100
|
}
|
|
96
|
-
regex =
|
|
101
|
+
regex = RegExp(source + (curTop
|
|
97
102
|
? `|${closes[curTop[0][0]]}${curTop.findEqual ? '|=' : ''}`
|
|
98
103
|
: ''
|
|
99
104
|
), 'gm');
|
package/parser/commentAndExt.js
CHANGED
|
@@ -8,36 +8,36 @@ const /** @type {Parser} */ Parser = require('..');
|
|
|
8
8
|
*/
|
|
9
9
|
const parseCommentAndExt = (text, config = Parser.getConfig(), accum = [], includeOnly = false) => {
|
|
10
10
|
const onlyinclude = /<onlyinclude>(.*?)<\/onlyinclude>/gs;
|
|
11
|
-
if (includeOnly &&
|
|
11
|
+
if (includeOnly && text.search(onlyinclude) !== -1) { // `<onlyinclude>`拥有最高优先级
|
|
12
12
|
return text.replace(onlyinclude, /** @param {string} inner */ (_, inner) => {
|
|
13
|
-
const str = `\
|
|
13
|
+
const str = `\0${accum.length}e\x7f`,
|
|
14
14
|
OnlyincludeToken = require('../src/onlyinclude');
|
|
15
15
|
new OnlyincludeToken(inner, config, accum);
|
|
16
16
|
return str;
|
|
17
|
-
}).replace(/(?<=^|\
|
|
17
|
+
}).replace(/(?<=^|\0\d+e\x7f).*?(?=$|\0\d+e\x7f)/gs, substr => {
|
|
18
18
|
if (substr === '') {
|
|
19
19
|
return '';
|
|
20
20
|
}
|
|
21
21
|
const NoincludeToken = require('../src/nowiki/noinclude');
|
|
22
22
|
new NoincludeToken(substr, config, accum);
|
|
23
|
-
return `\
|
|
23
|
+
return `\0${accum.length - 1}c\x7f`;
|
|
24
24
|
});
|
|
25
25
|
}
|
|
26
26
|
const ext = config.ext.join('|'),
|
|
27
27
|
includeRegex = includeOnly ? 'includeonly' : '(?:no|only)include',
|
|
28
28
|
noincludeRegex = includeOnly ? 'noinclude' : 'includeonly',
|
|
29
|
-
regex =
|
|
29
|
+
regex = RegExp(
|
|
30
30
|
'<!--.*?(?:-->|$)|' // comment
|
|
31
|
-
+ `<${includeRegex}(?:\\s
|
|
32
|
-
+ `<(${ext})(\\s
|
|
33
|
-
+ `<(${noincludeRegex})(\\s
|
|
31
|
+
+ `<${includeRegex}(?:\\s[^>]*?)?>|</${includeRegex}\\s*>|` // <includeonly>
|
|
32
|
+
+ `<(${ext})(\\s[^>]*?)?(?:/>|>(.*?)</(\\1\\s*)>)|` // 扩展标签
|
|
33
|
+
+ `<(${noincludeRegex})(\\s[^>]*?)?(?:/>|>(.*?)(?:</(\\5\\s*)>|$))`, // <noinclude>
|
|
34
34
|
'gis',
|
|
35
35
|
);
|
|
36
36
|
return text.replace(
|
|
37
37
|
regex,
|
|
38
38
|
/** @type {function(...string): string} */
|
|
39
39
|
(substr, name, attr, inner, closing, include, includeAttr, includeInner, includeClosing) => {
|
|
40
|
-
const str = `\
|
|
40
|
+
const str = `\0${accum.length}${name ? 'e' : 'c'}\x7f`;
|
|
41
41
|
if (name) {
|
|
42
42
|
const ExtToken = require('../src/tagPair/ext');
|
|
43
43
|
new ExtToken(name, attr, inner, closing, config, accum);
|
package/parser/converter.js
CHANGED
|
@@ -8,8 +8,8 @@ const /** @type {Parser} */ Parser = require('..');
|
|
|
8
8
|
*/
|
|
9
9
|
const parseConverter = (firstChild, config = Parser.getConfig(), accum = []) => {
|
|
10
10
|
const ConverterToken = require('../src/converter'),
|
|
11
|
-
regex1 =
|
|
12
|
-
regex2 =
|
|
11
|
+
regex1 = /-\{/g,
|
|
12
|
+
regex2 = /-\{|\}-/g,
|
|
13
13
|
/** @type {RegExpExecArray[]} */ stack = [];
|
|
14
14
|
let regex = regex1,
|
|
15
15
|
mt = regex.exec(firstChild);
|
|
@@ -21,12 +21,12 @@ const parseConverter = (firstChild, config = Parser.getConfig(), accum = []) =>
|
|
|
21
21
|
str = firstChild.slice(top.index + 2, index),
|
|
22
22
|
i = str.indexOf('|'),
|
|
23
23
|
[flags, text] = i === -1 ? [[], str] : [str.slice(0, i).split(';'), str.slice(i + 1)],
|
|
24
|
-
temp = text.replace(/(
|
|
24
|
+
temp = text.replace(/(&[#a-z\d]+);/i, '$1\x01'),
|
|
25
25
|
variants = `(?:${config.variants.join('|')})`,
|
|
26
|
-
rules = temp.split(
|
|
26
|
+
rules = temp.split(RegExp(`;(?=\\s*(?:${variants}|[^;]*?=>\\s*${variants})\\s*:)`))
|
|
27
27
|
.map(rule => rule.replaceAll('\x01', ';'));
|
|
28
28
|
new ConverterToken(flags, rules, config, accum);
|
|
29
|
-
firstChild = `${firstChild.slice(0, top.index)}\
|
|
29
|
+
firstChild = `${firstChild.slice(0, top.index)}\0${length}v\x7f${firstChild.slice(index + 2)}`;
|
|
30
30
|
if (stack.length === 0) {
|
|
31
31
|
regex = regex1;
|
|
32
32
|
}
|
package/parser/externalLinks.js
CHANGED
|
@@ -9,20 +9,20 @@ const {extUrlChar} = require('../util/string'),
|
|
|
9
9
|
*/
|
|
10
10
|
const parseExternalLinks = (firstChild, config = Parser.getConfig(), accum = []) => {
|
|
11
11
|
const ExtLinkToken = require('../src/extLink'),
|
|
12
|
-
regex =
|
|
12
|
+
regex = RegExp(
|
|
13
13
|
`\\[((?:${config.protocol}|//)${extUrlChar})(\\p{Zs}*)([^\\]\x01-\x08\x0a-\x1f\ufffd]*)\\]`,
|
|
14
|
-
'
|
|
14
|
+
'giu',
|
|
15
15
|
);
|
|
16
16
|
return firstChild.replace(regex, /** @type {function(...string): string} */ (_, url, space, text) => {
|
|
17
17
|
const {length} = accum,
|
|
18
|
-
mt =
|
|
18
|
+
mt = /&[lg]t;/.exec(url);
|
|
19
19
|
if (mt) {
|
|
20
20
|
url = url.slice(0, mt.index);
|
|
21
21
|
space = '';
|
|
22
22
|
text = `${url.slice(mt.index)}${space}${text}`;
|
|
23
23
|
}
|
|
24
24
|
new ExtLinkToken(url, space, text, config, accum);
|
|
25
|
-
return `\
|
|
25
|
+
return `\0${length}w\x7f`;
|
|
26
26
|
});
|
|
27
27
|
};
|
|
28
28
|
|
|
@@ -10,13 +10,13 @@ const parseHrAndDoubleUnderscore = (firstChild, config = Parser.getConfig(), acc
|
|
|
10
10
|
const HrToken = require('../src/nowiki/hr'),
|
|
11
11
|
DoubleUnderscoreToken = require('../src/nowiki/doubleUnderscore'),
|
|
12
12
|
{doubleUnderscore} = config;
|
|
13
|
-
return firstChild.replace(
|
|
13
|
+
return firstChild.replace(/^((?:\0\d+c\x7f)*)-{4,}/gm, (_, lead, m) => {
|
|
14
14
|
new HrToken(m.length, config, accum);
|
|
15
|
-
return
|
|
16
|
-
}).replace(
|
|
15
|
+
return `${lead}\0${accum.length - 1}r\x7f`;
|
|
16
|
+
}).replace(RegExp(`__(${doubleUnderscore.flat().join('|')})__`, 'gi'), /** @param {string} p1 */(m, p1) => {
|
|
17
17
|
if (doubleUnderscore[0].includes(p1.toLowerCase()) || doubleUnderscore[1].includes(p1)) {
|
|
18
18
|
new DoubleUnderscoreToken(p1, config, accum);
|
|
19
|
-
return `\
|
|
19
|
+
return `\0${accum.length - 1}u\x7f`;
|
|
20
20
|
}
|
|
21
21
|
return m;
|
|
22
22
|
});
|
package/parser/html.js
CHANGED
|
@@ -7,19 +7,19 @@ const /** @type {Parser} */ Parser = require('..');
|
|
|
7
7
|
* @param {accum} accum
|
|
8
8
|
*/
|
|
9
9
|
const parseHtml = (firstChild, config = Parser.getConfig(), accum = []) => {
|
|
10
|
-
const regex = /^(\/?)([a-z][^\s/>]*)([^>]*?)(\/?>)([^<]*)$/i,
|
|
10
|
+
const regex = /^(\/?)([a-z][^\s/>]*)(\s[^>]*?)?(\/?>)([^<]*)$/i,
|
|
11
11
|
elements = config.html.flat(),
|
|
12
12
|
bits = firstChild.split('<');
|
|
13
13
|
let text = bits.shift();
|
|
14
14
|
for (const x of bits) {
|
|
15
|
-
const mt =
|
|
15
|
+
const mt = regex.exec(x),
|
|
16
16
|
t = mt?.[2],
|
|
17
17
|
name = t?.toLowerCase();
|
|
18
18
|
if (!mt || !elements.includes(name)) {
|
|
19
19
|
text += `<${x}`;
|
|
20
20
|
continue;
|
|
21
21
|
}
|
|
22
|
-
const [, slash,, params, brace, rest] = mt,
|
|
22
|
+
const [, slash,, params = '', brace, rest] = mt,
|
|
23
23
|
AttributeToken = require('../src/attribute'),
|
|
24
24
|
attr = new AttributeToken(params, 'html-attr', name, config, accum),
|
|
25
25
|
itemprop = attr.getAttr('itemprop');
|
|
@@ -30,7 +30,7 @@ const parseHtml = (firstChild, config = Parser.getConfig(), accum = []) => {
|
|
|
30
30
|
accum.pop();
|
|
31
31
|
continue;
|
|
32
32
|
}
|
|
33
|
-
text += `\
|
|
33
|
+
text += `\0${accum.length}x\x7f${rest}`;
|
|
34
34
|
const HtmlToken = require('../src/html');
|
|
35
35
|
new HtmlToken(t, attr, slash === '/', brace === '/>', config, accum);
|
|
36
36
|
}
|
package/parser/links.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
3
|
const /** @type {Parser} */ Parser = require('..'),
|
|
4
|
-
Token = require('../src');
|
|
4
|
+
Token = require('../src');
|
|
5
5
|
|
|
6
6
|
/**
|
|
7
7
|
* @param {string} firstChild
|
|
@@ -9,15 +9,15 @@ const /** @type {Parser} */ Parser = require('..'),
|
|
|
9
9
|
*/
|
|
10
10
|
const parseLinks = (firstChild, config = Parser.getConfig(), accum = []) => {
|
|
11
11
|
const parseQuotes = require('./quotes.js'),
|
|
12
|
-
regex = /^([^\n<>[\]{}|]+)(?:\|(
|
|
13
|
-
regexImg = /^([^\n<>[\]{}|]+)\|(.*)
|
|
14
|
-
regexExt =
|
|
12
|
+
regex = /^([^\n<>[\]{}|]+)(?:\|(.*?[^\]]))?\]\](.*)$/s,
|
|
13
|
+
regexImg = /^([^\n<>[\]{}|]+)\|(.*)$/s,
|
|
14
|
+
regexExt = RegExp(`^\\s*(?:${config.protocol})`, 'i'),
|
|
15
15
|
bits = firstChild.split('[[');
|
|
16
16
|
let s = bits.shift();
|
|
17
17
|
for (let i = 0; i < bits.length; i++) {
|
|
18
18
|
let mightBeImg, link, text, after;
|
|
19
19
|
const x = bits[i],
|
|
20
|
-
m =
|
|
20
|
+
m = regex.exec(x);
|
|
21
21
|
if (m) {
|
|
22
22
|
[, link, text, after] = m;
|
|
23
23
|
if (after.startsWith(']') && text?.includes('[')) {
|
|
@@ -25,13 +25,13 @@ const parseLinks = (firstChild, config = Parser.getConfig(), accum = []) => {
|
|
|
25
25
|
after = after.slice(1);
|
|
26
26
|
}
|
|
27
27
|
} else {
|
|
28
|
-
const m2 =
|
|
28
|
+
const m2 = regexImg.exec(x);
|
|
29
29
|
if (m2) {
|
|
30
30
|
mightBeImg = true;
|
|
31
31
|
[, link, text] = m2;
|
|
32
32
|
}
|
|
33
33
|
}
|
|
34
|
-
if (link === undefined || regexExt.test(link) || /\
|
|
34
|
+
if (link === undefined || regexExt.test(link) || /\0\d+[exhbru]\x7f/.test(link)) {
|
|
35
35
|
s += `[[${x}`;
|
|
36
36
|
continue;
|
|
37
37
|
}
|
|
@@ -78,8 +78,8 @@ const parseLinks = (firstChild, config = Parser.getConfig(), accum = []) => {
|
|
|
78
78
|
continue;
|
|
79
79
|
}
|
|
80
80
|
}
|
|
81
|
-
text
|
|
82
|
-
s += `\
|
|
81
|
+
text &&= parseQuotes(text, config, accum);
|
|
82
|
+
s += `\0${accum.length}l\x7f${after}`;
|
|
83
83
|
let LinkToken = require('../src/link');
|
|
84
84
|
if (!force) {
|
|
85
85
|
if (!interwiki && ns === 6) {
|
package/parser/list.js
CHANGED
|
@@ -7,20 +7,20 @@ const /** @type {Parser} */ Parser = require('..');
|
|
|
7
7
|
* @param {accum} accum
|
|
8
8
|
*/
|
|
9
9
|
const parseList = (text, config = Parser.getConfig(), accum = []) => {
|
|
10
|
-
const mt =
|
|
10
|
+
const mt = /^((?:\0\d+c\x7f)*)([;:*#]+)/.exec(text);
|
|
11
11
|
if (!mt) {
|
|
12
12
|
return text;
|
|
13
13
|
}
|
|
14
14
|
const ListToken = require('../src/nowiki/list'),
|
|
15
15
|
[total, comment, prefix] = mt;
|
|
16
|
-
text = `${comment}\
|
|
16
|
+
text = `${comment}\0${accum.length}d\x7f${text.slice(total.length)}`;
|
|
17
17
|
new ListToken(prefix, config, accum);
|
|
18
18
|
let dt = prefix.split(';').length - 1;
|
|
19
19
|
if (!dt) {
|
|
20
20
|
return text;
|
|
21
21
|
}
|
|
22
22
|
const DdToken = require('../src/nowiki/dd');
|
|
23
|
-
let regex =
|
|
23
|
+
let regex = /:+|-\{/g,
|
|
24
24
|
ex = regex.exec(text),
|
|
25
25
|
lc = 0;
|
|
26
26
|
while (ex && dt) {
|
|
@@ -28,16 +28,16 @@ const parseList = (text, config = Parser.getConfig(), accum = []) => {
|
|
|
28
28
|
if (syntax[0] === ':') {
|
|
29
29
|
if (syntax.length >= dt) {
|
|
30
30
|
new DdToken(':'.repeat(dt), config, accum);
|
|
31
|
-
return `${text.slice(0, index)}\
|
|
31
|
+
return `${text.slice(0, index)}\0${accum.length - 1}d\x7f${text.slice(index + dt)}`;
|
|
32
32
|
}
|
|
33
|
-
text = `${text.slice(0, index)}\
|
|
33
|
+
text = `${text.slice(0, index)}\0${accum.length}d\x7f${text.slice(regex.lastIndex)}`;
|
|
34
34
|
dt -= syntax.length;
|
|
35
35
|
regex.lastIndex = index + 4 + String(accum.length).length;
|
|
36
36
|
new DdToken(syntax, config, accum);
|
|
37
37
|
} else if (syntax === '-{') {
|
|
38
38
|
if (!lc) {
|
|
39
39
|
const {lastIndex} = regex;
|
|
40
|
-
regex =
|
|
40
|
+
regex = /-\{|\}-/g;
|
|
41
41
|
regex.lastIndex = lastIndex;
|
|
42
42
|
}
|
|
43
43
|
lc++;
|
|
@@ -45,7 +45,7 @@ const parseList = (text, config = Parser.getConfig(), accum = []) => {
|
|
|
45
45
|
lc--;
|
|
46
46
|
if (!lc) {
|
|
47
47
|
const {lastIndex} = regex;
|
|
48
|
-
regex =
|
|
48
|
+
regex = /:+|-\{/g;
|
|
49
49
|
regex.lastIndex = lastIndex;
|
|
50
50
|
}
|
|
51
51
|
}
|
package/parser/magicLinks.js
CHANGED
|
@@ -9,17 +9,17 @@ const {extUrlChar} = require('../util/string'),
|
|
|
9
9
|
*/
|
|
10
10
|
const parseMagicLinks = (firstChild, config = Parser.getConfig(), accum = []) => {
|
|
11
11
|
const MagicLinkToken = require('../src/magicLink'),
|
|
12
|
-
regex =
|
|
12
|
+
regex = RegExp(`\\b(?:${config.protocol})(${extUrlChar})`, 'giu');
|
|
13
13
|
return firstChild.replace(regex, /** @param {string} p1 */ (m, p1) => {
|
|
14
14
|
let trail = '',
|
|
15
15
|
url = m;
|
|
16
|
-
const m2 =
|
|
16
|
+
const m2 = /&(?:lt|gt|nbsp|#x0*(?:3[ce]|a0)|#0*(?:6[02]|160));/i.exec(url);
|
|
17
17
|
if (m2) {
|
|
18
18
|
trail = url.slice(m2.index);
|
|
19
19
|
url = url.slice(0, m2.index);
|
|
20
20
|
}
|
|
21
|
-
const sep =
|
|
22
|
-
sepChars =
|
|
21
|
+
const sep = RegExp(`[,;.:!?${url.includes('(') ? '' : ')'}]+$`),
|
|
22
|
+
sepChars = sep.exec(url);
|
|
23
23
|
if (sepChars) {
|
|
24
24
|
let correction = 0;
|
|
25
25
|
if (sepChars[0].startsWith(';') && /&(?:[a-z]+|#x[\da-f]+|#\d+)$/i.test(url.slice(0, sepChars.index))) {
|
|
@@ -32,7 +32,7 @@ const parseMagicLinks = (firstChild, config = Parser.getConfig(), accum = []) =>
|
|
|
32
32
|
return m;
|
|
33
33
|
}
|
|
34
34
|
new MagicLinkToken(url, false, config, accum);
|
|
35
|
-
return `\
|
|
35
|
+
return `\0${accum.length - 1}w\x7f${trail}`;
|
|
36
36
|
});
|
|
37
37
|
};
|
|
38
38
|
|
package/parser/quotes.js
CHANGED
|
@@ -35,8 +35,8 @@ const parseQuotes = (text, config = Parser.getConfig(), accum = []) => {
|
|
|
35
35
|
}
|
|
36
36
|
} else if (arr[i - 1].at(-2) === ' ') {
|
|
37
37
|
firstSingle = i;
|
|
38
|
-
} else
|
|
39
|
-
firstMulti
|
|
38
|
+
} else {
|
|
39
|
+
firstMulti ||= i;
|
|
40
40
|
}
|
|
41
41
|
break;
|
|
42
42
|
default:
|
|
@@ -54,7 +54,7 @@ const parseQuotes = (text, config = Parser.getConfig(), accum = []) => {
|
|
|
54
54
|
const QuoteToken = require('../src/nowiki/quote');
|
|
55
55
|
for (let i = 1; i < length; i += 2) {
|
|
56
56
|
new QuoteToken(arr[i].length, config, accum);
|
|
57
|
-
arr[i] = `\
|
|
57
|
+
arr[i] = `\0${accum.length - 1}q\x7f`;
|
|
58
58
|
}
|
|
59
59
|
return arr.join('');
|
|
60
60
|
};
|
package/parser/table.js
CHANGED
|
@@ -32,9 +32,9 @@ const parseTable = ({firstChild, type}, config = Parser.getConfig(), accum = [])
|
|
|
32
32
|
};
|
|
33
33
|
for (const outLine of lines) {
|
|
34
34
|
let top = stack.pop();
|
|
35
|
-
const [spaces] =
|
|
35
|
+
const [spaces] = /^(?:\s|\0\d+c\x7f)*/.exec(outLine);
|
|
36
36
|
const line = outLine.slice(spaces.length),
|
|
37
|
-
matchesStart =
|
|
37
|
+
matchesStart = /^(:*)((?:\s|\0\d+c\x7f)*)(\{\||\{\0\d+!\x7f|\0\d+\{\x7f)(.*)$/.exec(line);
|
|
38
38
|
if (matchesStart) {
|
|
39
39
|
while (top && top.type !== 'td') {
|
|
40
40
|
top = stack.pop();
|
|
@@ -43,7 +43,7 @@ const parseTable = ({firstChild, type}, config = Parser.getConfig(), accum = [])
|
|
|
43
43
|
if (indent) {
|
|
44
44
|
new DdToken(indent, config, accum);
|
|
45
45
|
}
|
|
46
|
-
push(`\n${spaces}${indent && `\
|
|
46
|
+
push(`\n${spaces}${indent && `\0${accum.length - 1}d\x7f`}${moreSpaces}\0${accum.length}b\x7f`, top);
|
|
47
47
|
const table = new TableToken(tableSyntax, attr, config, accum);
|
|
48
48
|
stack.push(...top ? [top] : [], table);
|
|
49
49
|
continue;
|
|
@@ -51,9 +51,9 @@ const parseTable = ({firstChild, type}, config = Parser.getConfig(), accum = [])
|
|
|
51
51
|
out += `\n${outLine}`;
|
|
52
52
|
continue;
|
|
53
53
|
}
|
|
54
|
-
const matches
|
|
55
|
-
/^(?:(
|
|
56
|
-
|
|
54
|
+
const matches
|
|
55
|
+
= /^(?:(\|\}|\0\d+!\x7f\}|\0\d+\}\x7f)|(\|-+|\0\d+!\x7f-+|\0\d+-\x7f-*)(?!-)|(!|(?:\||\0\d+!\x7f)\+?))(.*)$/
|
|
56
|
+
.exec(line);
|
|
57
57
|
if (!matches) {
|
|
58
58
|
push(`\n${outLine}`, top);
|
|
59
59
|
stack.push(...top ? [top] : []);
|
|
@@ -81,8 +81,8 @@ const parseTable = ({firstChild, type}, config = Parser.getConfig(), accum = [])
|
|
|
81
81
|
top = stack.pop();
|
|
82
82
|
}
|
|
83
83
|
const regex = cell === '!'
|
|
84
|
-
? /!!|(?:\||\
|
|
85
|
-
: /(?:\||\
|
|
84
|
+
? /!!|(?:\||\0\d+!\x7f){2}|\0\d+\+\x7f/g
|
|
85
|
+
: /(?:\||\0\d+!\x7f){2}|\0\d+\+\x7f/g;
|
|
86
86
|
let mt = regex.exec(attr),
|
|
87
87
|
lastIndex = 0,
|
|
88
88
|
lastSyntax = `\n${spaces}${cell}`;
|
package/src/attribute.js
CHANGED
|
@@ -63,11 +63,11 @@ class AttributeToken extends Token {
|
|
|
63
63
|
token = Parser.run(() => new Token(string, config).parseOnce(0, include).parseOnce());
|
|
64
64
|
string = token.firstChild;
|
|
65
65
|
}
|
|
66
|
-
string = removeComment(string).replace(/\
|
|
66
|
+
string = removeComment(string).replace(/\0\d+~\x7f/g, '=');
|
|
67
67
|
const build = /** @param {string|boolean} str */ str =>
|
|
68
68
|
typeof str === 'boolean' || !token ? str : token.buildFromStr(str).map(String).join('');
|
|
69
69
|
for (const [, key,, quoted, unquoted] of string
|
|
70
|
-
.matchAll(/([^\s/][^\s/=]*)(?:\s*=\s*(?:(["'])(.*?)(?:\2|$)|(\S*)))?/
|
|
70
|
+
.matchAll(/([^\s/][^\s/=]*)(?:\s*=\s*(?:(["'])(.*?)(?:\2|$)|(\S*)))?/gs)
|
|
71
71
|
) {
|
|
72
72
|
if (!this.setAttr(build(key), build(quoted ?? unquoted ?? true), true)) {
|
|
73
73
|
this.#sanitized = false;
|
|
@@ -112,12 +112,12 @@ class AttributeToken extends Token {
|
|
|
112
112
|
if (this.type !== 'ext-attr') {
|
|
113
113
|
for (let [key, text] of this.#attr) {
|
|
114
114
|
let built = false;
|
|
115
|
-
if (key.includes('\
|
|
115
|
+
if (key.includes('\0')) {
|
|
116
116
|
this.#attr.delete(key);
|
|
117
117
|
key = this.buildFromStr(key).map(String).join('');
|
|
118
118
|
built = true;
|
|
119
119
|
}
|
|
120
|
-
if (typeof text === 'string' && text.includes('\
|
|
120
|
+
if (typeof text === 'string' && text.includes('\0')) {
|
|
121
121
|
text = this.buildFromStr(text).map(String).join('');
|
|
122
122
|
built = true;
|
|
123
123
|
}
|
|
@@ -184,7 +184,7 @@ class AttributeToken extends Token {
|
|
|
184
184
|
parsedKey = this.type !== 'ext-attr' && !init
|
|
185
185
|
? Parser.run(() => new Token(key, config).parseOnce(0, include).parseOnce().firstChild)
|
|
186
186
|
: key;
|
|
187
|
-
if (!/^(?:[\w:]|\
|
|
187
|
+
if (!/^(?:[\w:]|\0\d+[t!~{}+-]\x7f)(?:[\w:.-]|\0\d+[t!~{}+-]\x7f)*$/.test(parsedKey)) {
|
|
188
188
|
if (init) {
|
|
189
189
|
return false;
|
|
190
190
|
}
|
package/src/converterFlags.js
CHANGED
|
@@ -99,11 +99,16 @@ class ConverterFlagsToken extends Token {
|
|
|
99
99
|
return new Set(this.#flags);
|
|
100
100
|
}
|
|
101
101
|
|
|
102
|
+
/** @complexity `n` */
|
|
103
|
+
getUnknownFlags() {
|
|
104
|
+
return this.#flags.filter(flag => /\{\{[^{}]+\}\}/.test(flag));
|
|
105
|
+
}
|
|
106
|
+
|
|
102
107
|
/** @complexity `n` */
|
|
103
108
|
getEffectiveFlags() {
|
|
104
109
|
const {variants} = this.getAttribute('config'),
|
|
105
110
|
variantFlags = this.#flags.filter(flag => variants.includes(flag)),
|
|
106
|
-
unknownFlags = this
|
|
111
|
+
unknownFlags = this.getUnknownFlags();
|
|
107
112
|
if (variantFlags.length) {
|
|
108
113
|
return new Set([...variantFlags, ...unknownFlags]);
|
|
109
114
|
}
|
|
@@ -138,11 +143,6 @@ class ConverterFlagsToken extends Token {
|
|
|
138
143
|
return flags;
|
|
139
144
|
}
|
|
140
145
|
|
|
141
|
-
/** @complexity `n` */
|
|
142
|
-
getUnknownFlags() {
|
|
143
|
-
return [...this.getFlags()].filter(flag => /{{.+}}/.test(flag));
|
|
144
|
-
}
|
|
145
|
-
|
|
146
146
|
/** @param {string} flag */
|
|
147
147
|
hasFlag(flag) {
|
|
148
148
|
if (typeof flag !== 'string') {
|
package/src/converterRule.js
CHANGED
|
@@ -47,7 +47,7 @@ class ConverterRuleToken extends Token {
|
|
|
47
47
|
|
|
48
48
|
cloneNode() {
|
|
49
49
|
const cloned = this.cloneChildren(),
|
|
50
|
-
placeholders = ['', ':', '
|
|
50
|
+
placeholders = ['', 'zh:', '=>zh:'],
|
|
51
51
|
placeholder = placeholders[cloned.length - 1],
|
|
52
52
|
token = Parser.run(() => new ConverterRuleToken(placeholder, placeholder, this.getAttribute('config')));
|
|
53
53
|
for (let i = 0; i < cloned.length; i++) {
|
package/src/extLink.js
CHANGED
|
@@ -47,12 +47,13 @@ class ExtLinkToken extends Token {
|
|
|
47
47
|
if (text) {
|
|
48
48
|
token.appendChild(text);
|
|
49
49
|
}
|
|
50
|
+
return token;
|
|
50
51
|
}
|
|
51
52
|
|
|
52
53
|
#correct() {
|
|
53
54
|
if (!this.#space && this.childNodes.length > 1
|
|
54
55
|
// 都替换成`<`肯定不对,但无妨
|
|
55
|
-
&& /^[^[\]<>"{\
|
|
56
|
+
&& /^[^[\]<>"{\0-\x1f\x7f\p{Zs}\ufffd]/u.test(this.lastElementChild.text().replace(/&[lg]t;/, '<'))
|
|
56
57
|
) {
|
|
57
58
|
this.#space = ' ';
|
|
58
59
|
}
|