wikiparser-node 0.8.0-b → 0.8.0-m
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/default.json +832 -0
- package/config/llwiki.json +630 -0
- package/config/minimum.json +142 -0
- package/config/moegirl.json +728 -0
- package/config/zhwiki.json +1269 -0
- package/index.js +79 -0
- package/lib/element.js +137 -0
- package/lib/node.js +226 -0
- package/lib/text.js +123 -0
- package/lib/title.js +60 -0
- package/mixin/hidden.js +18 -0
- package/package.json +9 -11
- package/parser/brackets.js +119 -0
- package/parser/commentAndExt.js +61 -0
- package/parser/converter.js +45 -0
- package/parser/externalLinks.js +32 -0
- package/parser/hrAndDoubleUnderscore.js +37 -0
- package/parser/html.js +41 -0
- package/parser/links.js +93 -0
- package/parser/list.js +58 -0
- package/parser/magicLinks.js +40 -0
- package/parser/quotes.js +63 -0
- package/parser/table.js +113 -0
- package/src/arg.js +89 -0
- package/src/atom/hidden.js +11 -0
- package/src/atom/index.js +26 -0
- package/src/attribute.js +277 -0
- package/src/attributes.js +150 -0
- package/src/converter.js +70 -0
- package/src/converterFlags.js +97 -0
- package/src/converterRule.js +75 -0
- package/src/extLink.js +60 -0
- package/src/gallery.js +101 -0
- package/src/hasNowiki/index.js +32 -0
- package/src/hasNowiki/pre.js +28 -0
- package/src/heading.js +83 -0
- package/src/html.js +133 -0
- package/src/imageParameter.js +106 -0
- package/src/imagemap.js +140 -0
- package/src/imagemapLink.js +29 -0
- package/src/index.js +407 -0
- package/src/link/category.js +13 -0
- package/src/link/file.js +125 -0
- package/src/link/galleryImage.js +62 -0
- package/src/link/index.js +125 -0
- package/src/magicLink.js +68 -0
- package/src/nested/choose.js +23 -0
- package/src/nested/combobox.js +22 -0
- package/src/nested/index.js +69 -0
- package/src/nested/references.js +22 -0
- package/src/nowiki/comment.js +47 -0
- package/src/nowiki/dd.js +13 -0
- package/src/nowiki/doubleUnderscore.js +26 -0
- package/src/nowiki/hr.js +22 -0
- package/src/nowiki/index.js +34 -0
- package/src/nowiki/list.js +13 -0
- package/src/nowiki/noinclude.js +14 -0
- package/src/nowiki/quote.js +55 -0
- package/src/onlyinclude.js +39 -0
- package/src/paramTag/index.js +66 -0
- package/src/paramTag/inputbox.js +32 -0
- package/src/parameter.js +97 -0
- package/src/syntax.js +23 -0
- package/src/table/index.js +46 -0
- package/src/table/td.js +119 -0
- package/src/table/tr.js +74 -0
- package/src/tagPair/ext.js +121 -0
- package/src/tagPair/include.js +26 -0
- package/src/tagPair/index.js +77 -0
- package/src/transclude.js +323 -0
- package/util/base.js +17 -0
- package/util/diff.js +76 -0
- package/util/lint.js +54 -0
- package/util/string.js +60 -0
- package/bundle/bundle.min.js +0 -38
- package/extensions/editor.css +0 -60
- package/extensions/editor.js +0 -317
- package/extensions/ui.css +0 -119
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const {removeComment} = require('../util/string'),
|
|
4
|
+
Parser = require('..'),
|
|
5
|
+
HeadingToken = require('../src/heading'),
|
|
6
|
+
TranscludeToken = require('../src/transclude'),
|
|
7
|
+
ArgToken = require('../src/arg');
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* 解析花括号
|
|
11
|
+
* @param {string} text wikitext
|
|
12
|
+
* @param {accum} accum
|
|
13
|
+
* @throws TranscludeToken.constructor()
|
|
14
|
+
*/
|
|
15
|
+
const parseBrackets = (text, config = Parser.getConfig(), accum = []) => {
|
|
16
|
+
const source = `${config.excludes.includes('heading') ? '' : '^(\0\\d+c\x7F)*={1,6}|'}\\[\\[|\\{{2,}|-\\{(?!\\{)`,
|
|
17
|
+
/** @type {BracketExecArray[]} */ stack = [],
|
|
18
|
+
closes = {'=': '\n', '{': '\\}{2,}|\\|', '-': '\\}-', '[': '\\]\\]'},
|
|
19
|
+
/** @type {Record<string, string>} */ marks = {'!': '!', '!!': '+', '(!': '{', '!)': '}', '!-': '-', '=': '~'};
|
|
20
|
+
let regex = new RegExp(source, 'gmu'),
|
|
21
|
+
/** @type {BracketExecArray} */ mt = regex.exec(text),
|
|
22
|
+
moreBraces = text.includes('}}'),
|
|
23
|
+
lastIndex;
|
|
24
|
+
while (mt || lastIndex <= text.length && stack.at(-1)?.[0]?.[0] === '=') {
|
|
25
|
+
if (mt?.[1]) {
|
|
26
|
+
const [, {length}] = mt;
|
|
27
|
+
mt[0] = mt[0].slice(length);
|
|
28
|
+
mt.index += length;
|
|
29
|
+
}
|
|
30
|
+
const {0: syntax, index: curIndex} = mt ?? {0: '\n', index: text.length},
|
|
31
|
+
/** @type {BracketExecArray} */ top = stack.pop() ?? {},
|
|
32
|
+
{0: open, index, parts, findEqual: topFindEqual, pos: topPos} = top,
|
|
33
|
+
innerEqual = syntax === '=' && topFindEqual;
|
|
34
|
+
if (syntax === ']]' || syntax === '}-') { // 情形1:闭合内链或转换
|
|
35
|
+
lastIndex = curIndex + 2;
|
|
36
|
+
} else if (syntax === '\n') { // 情形2:闭合标题
|
|
37
|
+
lastIndex = curIndex + 1;
|
|
38
|
+
const {pos, findEqual} = stack.at(-1) ?? {};
|
|
39
|
+
if (!pos || findEqual || removeComment(text.slice(pos, index)) !== '') {
|
|
40
|
+
const rmt = /^(={1,6})(.+)\1((?:\s|\0\d+c\x7F)*)$/u.exec(text.slice(index, curIndex));
|
|
41
|
+
if (rmt) {
|
|
42
|
+
text = `${text.slice(0, index)}\0${accum.length}h\x7F${text.slice(curIndex)}`;
|
|
43
|
+
lastIndex = index + 4 + String(accum.length).length;
|
|
44
|
+
new HeadingToken(rmt[1].length, rmt.slice(2), config, accum);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
} else if (syntax === '|' || innerEqual) { // 情形3:模板内部,含行首单个'='
|
|
48
|
+
lastIndex = curIndex + 1;
|
|
49
|
+
parts.at(-1).push(text.slice(topPos, curIndex));
|
|
50
|
+
if (syntax === '|') {
|
|
51
|
+
parts.push([]);
|
|
52
|
+
}
|
|
53
|
+
top.pos = lastIndex;
|
|
54
|
+
top.findEqual = syntax === '|';
|
|
55
|
+
stack.push(top);
|
|
56
|
+
} else if (syntax.startsWith('}}')) { // 情形4:闭合模板
|
|
57
|
+
const close = syntax.slice(0, Math.min(open.length, 3)),
|
|
58
|
+
rest = open.length - close.length,
|
|
59
|
+
{length} = accum;
|
|
60
|
+
lastIndex = curIndex + close.length; // 这不是最终的lastIndex
|
|
61
|
+
parts.at(-1).push(text.slice(topPos, curIndex));
|
|
62
|
+
let skip = false,
|
|
63
|
+
ch = 't';
|
|
64
|
+
if (close.length === 3) {
|
|
65
|
+
new ArgToken(parts.map(part => part.join('=')), config, accum);
|
|
66
|
+
} else {
|
|
67
|
+
const name = removeComment(parts[0][0]).trim();
|
|
68
|
+
if (name in marks) {
|
|
69
|
+
ch = marks[name]; // 标记{{!}}等
|
|
70
|
+
} else if (/^(?:fullurl|canonicalurl|filepath):.|^server$/iu.test(name)) {
|
|
71
|
+
ch = 'm';
|
|
72
|
+
} else if (/^#vardefine:./iu.test(name)) {
|
|
73
|
+
ch = 'c';
|
|
74
|
+
}
|
|
75
|
+
try {
|
|
76
|
+
new TranscludeToken(parts[0][0], parts.slice(1), config, accum);
|
|
77
|
+
} catch (e) {
|
|
78
|
+
if (e instanceof Error && e.message.startsWith('非法的模板名称:')) {
|
|
79
|
+
lastIndex = index + open.length;
|
|
80
|
+
skip = true;
|
|
81
|
+
} else {
|
|
82
|
+
throw e;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
if (!skip) {
|
|
87
|
+
text = `${text.slice(0, index + rest)}\0${length}${ch}\x7F${text.slice(lastIndex)}`;
|
|
88
|
+
lastIndex = index + rest + 3 + String(length).length;
|
|
89
|
+
if (rest > 1) {
|
|
90
|
+
stack.push({0: open.slice(0, rest), index, pos: index + rest, parts: [[]]});
|
|
91
|
+
} else if (rest === 1 && text[index - 1] === '-') {
|
|
92
|
+
stack.push({0: '-{', index: index - 1, pos: index + 1, parts: [[]]});
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
} else { // 情形5:开启
|
|
96
|
+
lastIndex = curIndex + syntax.length;
|
|
97
|
+
if (syntax[0] === '{') {
|
|
98
|
+
mt.pos = lastIndex;
|
|
99
|
+
mt.parts = [[]];
|
|
100
|
+
}
|
|
101
|
+
stack.push(...'0' in top ? [top] : [], mt);
|
|
102
|
+
}
|
|
103
|
+
moreBraces &&= text.slice(lastIndex).includes('}}');
|
|
104
|
+
let curTop = stack.at(-1);
|
|
105
|
+
if (!moreBraces && curTop?.[0]?.[0] === '{') {
|
|
106
|
+
stack.pop();
|
|
107
|
+
curTop = stack.at(-1);
|
|
108
|
+
}
|
|
109
|
+
regex = new RegExp(source + (curTop
|
|
110
|
+
? `|${closes[curTop[0][0]]}${curTop.findEqual ? '|=' : ''}`
|
|
111
|
+
: ''
|
|
112
|
+
), 'gmu');
|
|
113
|
+
regex.lastIndex = lastIndex;
|
|
114
|
+
mt = regex.exec(text);
|
|
115
|
+
}
|
|
116
|
+
return text;
|
|
117
|
+
};
|
|
118
|
+
|
|
119
|
+
module.exports = parseBrackets;
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const Parser = require('..'),
|
|
4
|
+
OnlyincludeToken = require('../src/onlyinclude'),
|
|
5
|
+
NoincludeToken = require('../src/nowiki/noinclude'),
|
|
6
|
+
IncludeToken = require('../src/tagPair/include'),
|
|
7
|
+
ExtToken = require('../src/tagPair/ext'),
|
|
8
|
+
CommentToken = require('../src/nowiki/comment');
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* 解析HTML注释和扩展标签
|
|
12
|
+
* @param {string} text wikitext
|
|
13
|
+
* @param {accum} accum
|
|
14
|
+
* @param {boolean} includeOnly 是否嵌入
|
|
15
|
+
*/
|
|
16
|
+
const parseCommentAndExt = (text, config = Parser.getConfig(), accum = [], includeOnly = false) => {
|
|
17
|
+
const onlyinclude = /<onlyinclude>(.*?)<\/onlyinclude>/gsu;
|
|
18
|
+
if (includeOnly && text.search(onlyinclude) !== -1) { // `<onlyinclude>`拥有最高优先级
|
|
19
|
+
return text.replace(onlyinclude, /** @param {string} inner */ (_, inner) => {
|
|
20
|
+
const str = `\0${accum.length}e\x7F`;
|
|
21
|
+
new OnlyincludeToken(inner, config, accum);
|
|
22
|
+
return str;
|
|
23
|
+
}).replace(/(^|\0\d+e\x7F)(.*?)(?=$|\0\d+e\x7F)/gsu, (_, lead, substr) => {
|
|
24
|
+
if (substr === '') {
|
|
25
|
+
return lead;
|
|
26
|
+
}
|
|
27
|
+
new NoincludeToken(substr, config, accum);
|
|
28
|
+
return `${lead}\0${accum.length - 1}c\x7F`;
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
const ext = config.ext.join('|'),
|
|
32
|
+
includeRegex = includeOnly ? 'includeonly' : '(?:no|only)include',
|
|
33
|
+
noincludeRegex = includeOnly ? 'noinclude' : 'includeonly',
|
|
34
|
+
regex = new RegExp(
|
|
35
|
+
'<!--.*?(?:-->|$)|' // comment
|
|
36
|
+
+ `<${includeRegex}(?:\\s[^>]*?)?>|</${includeRegex}\\s*>|` // <includeonly>
|
|
37
|
+
+ `<(${ext})(\\s[^>]*?)?(?:/>|>(.*?)</(\\1\\s*)>)|` // 扩展标签
|
|
38
|
+
+ `<(${noincludeRegex})(\\s[^>]*?)?(?:/>|>(.*?)(?:</(\\5\\s*)>|$))`, // <noinclude>
|
|
39
|
+
'gisu',
|
|
40
|
+
);
|
|
41
|
+
return text.replace(
|
|
42
|
+
regex,
|
|
43
|
+
/** @type {function(...string): string} */
|
|
44
|
+
(substr, name, attr, inner, closing, include, includeAttr, includeInner, includeClosing) => {
|
|
45
|
+
const str = `\0${accum.length}${name ? 'e' : 'c'}\x7F`;
|
|
46
|
+
if (name) {
|
|
47
|
+
new ExtToken(name, attr, inner, closing, config, accum);
|
|
48
|
+
} else if (substr.startsWith('<!--')) {
|
|
49
|
+
const closed = substr.endsWith('-->');
|
|
50
|
+
new CommentToken(substr.slice(4, closed ? -3 : undefined), closed, config, accum);
|
|
51
|
+
} else if (include) {
|
|
52
|
+
new IncludeToken(include, includeAttr, includeInner, includeClosing, config, accum);
|
|
53
|
+
} else {
|
|
54
|
+
new NoincludeToken(substr, config, accum);
|
|
55
|
+
}
|
|
56
|
+
return str;
|
|
57
|
+
},
|
|
58
|
+
);
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
module.exports = parseCommentAndExt;
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const Parser = require('..'),
|
|
4
|
+
ConverterToken = require('../src/converter');
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* 解析语言变体转换
|
|
8
|
+
* @param {string} wikitext wikitext
|
|
9
|
+
* @param {accum} accum
|
|
10
|
+
*/
|
|
11
|
+
const parseConverter = (wikitext, config = Parser.getConfig(), accum = []) => {
|
|
12
|
+
const regex1 = /-\{/gu,
|
|
13
|
+
regex2 = /-\{|\}-/gu,
|
|
14
|
+
/** @type {RegExpExecArray[]} */ stack = [];
|
|
15
|
+
let regex = regex1,
|
|
16
|
+
mt = regex.exec(wikitext);
|
|
17
|
+
while (mt) {
|
|
18
|
+
const {0: syntax, index} = mt;
|
|
19
|
+
if (syntax === '}-') {
|
|
20
|
+
const top = stack.pop(),
|
|
21
|
+
{length} = accum,
|
|
22
|
+
str = wikitext.slice(top.index + 2, index),
|
|
23
|
+
i = str.indexOf('|'),
|
|
24
|
+
[flags, text] = i === -1 ? [[], str] : [str.slice(0, i).split(';'), str.slice(i + 1)],
|
|
25
|
+
temp = text.replace(/(&[#a-z\d]+);/giu, '$1\x01'),
|
|
26
|
+
variants = `(?:${config.variants.join('|')})`,
|
|
27
|
+
rules = temp.split(new RegExp(`;(?=\\s*(?:${variants}|[^;]*?=>\\s*${variants})\\s*:)`, 'u'))
|
|
28
|
+
.map(rule => rule.replaceAll('\x01', ';'));
|
|
29
|
+
new ConverterToken(flags, rules, config, accum);
|
|
30
|
+
wikitext = `${wikitext.slice(0, top.index)}\0${length}v\x7F${wikitext.slice(index + 2)}`;
|
|
31
|
+
if (stack.length === 0) {
|
|
32
|
+
regex = regex1;
|
|
33
|
+
}
|
|
34
|
+
regex.lastIndex = top.index + 3 + String(length).length;
|
|
35
|
+
} else {
|
|
36
|
+
stack.push(mt);
|
|
37
|
+
regex = regex2;
|
|
38
|
+
regex.lastIndex = index + 2;
|
|
39
|
+
}
|
|
40
|
+
mt = regex.exec(wikitext);
|
|
41
|
+
}
|
|
42
|
+
return wikitext;
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
module.exports = parseConverter;
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const {extUrlChar, extUrlCharFirst} = require('../util/string'),
|
|
4
|
+
Parser = require('..'),
|
|
5
|
+
ExtLinkToken = require('../src/extLink');
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* 解析外部链接
|
|
9
|
+
* @param {string} wikitext wikitext
|
|
10
|
+
* @param {accum} accum
|
|
11
|
+
*/
|
|
12
|
+
const parseExternalLinks = (wikitext, config = Parser.getConfig(), accum = []) => {
|
|
13
|
+
const regex = new RegExp(
|
|
14
|
+
`\\[((?:(?:${config.protocol}|//)${extUrlCharFirst}|\0\\d+m\x7F)${
|
|
15
|
+
extUrlChar
|
|
16
|
+
})(\\p{Zs}*)([^\\]\x01-\x08\x0A-\x1F\uFFFD]*)\\]`,
|
|
17
|
+
'giu',
|
|
18
|
+
);
|
|
19
|
+
return wikitext.replace(regex, /** @type {function(...string): string} */ (_, url, space, text) => {
|
|
20
|
+
const {length} = accum,
|
|
21
|
+
mt = /&[lg]t;/u.exec(url);
|
|
22
|
+
if (mt) {
|
|
23
|
+
url = url.slice(0, mt.index);
|
|
24
|
+
space = '';
|
|
25
|
+
text = `${url.slice(mt.index)}${space}${text}`;
|
|
26
|
+
}
|
|
27
|
+
new ExtLinkToken(url, space, text, config, accum);
|
|
28
|
+
return `\0${length}w\x7F`;
|
|
29
|
+
});
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
module.exports = parseExternalLinks;
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const Parser = require('..'),
|
|
4
|
+
AstText = require('../lib/text'),
|
|
5
|
+
Token = require('../src'),
|
|
6
|
+
HrToken = require('../src/nowiki/hr'),
|
|
7
|
+
DoubleUnderscoreToken = require('../src/nowiki/doubleUnderscore');
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* 解析\<hr\>和状态开关
|
|
11
|
+
* @param {Token & {firstChild: AstText}} root 根节点
|
|
12
|
+
* @param {accum} accum
|
|
13
|
+
*/
|
|
14
|
+
const parseHrAndDoubleUnderscore = ({firstChild: {data}, type, name}, config = Parser.getConfig(), accum = []) => {
|
|
15
|
+
const {doubleUnderscore} = config,
|
|
16
|
+
insensitive = new Set(doubleUnderscore[0]),
|
|
17
|
+
sensitive = new Set(doubleUnderscore[1]);
|
|
18
|
+
if (type !== 'root' && (type !== 'ext-inner' || name !== 'poem')) {
|
|
19
|
+
data = `\0${data}`;
|
|
20
|
+
}
|
|
21
|
+
data = data.replace(/^((?:\0\d+c\x7F)*)(-{4,})/gmu, (_, lead, m) => {
|
|
22
|
+
new HrToken(m.length, config, accum);
|
|
23
|
+
return `${lead}\0${accum.length - 1}r\x7F`;
|
|
24
|
+
}).replace(
|
|
25
|
+
new RegExp(`__(${doubleUnderscore.flat().join('|')})__`, 'giu'),
|
|
26
|
+
/** @param {string} p1 */ (m, p1) => {
|
|
27
|
+
if (insensitive.has(p1.toLowerCase()) || sensitive.has(p1)) {
|
|
28
|
+
new DoubleUnderscoreToken(p1, config, accum);
|
|
29
|
+
return `\0${accum.length - 1}u\x7F`;
|
|
30
|
+
}
|
|
31
|
+
return m;
|
|
32
|
+
},
|
|
33
|
+
);
|
|
34
|
+
return type === 'root' || type === 'ext-inner' && name === 'poem' ? data : data.slice(1);
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
module.exports = parseHrAndDoubleUnderscore;
|
package/parser/html.js
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const Parser = require('..'),
|
|
4
|
+
AttributesToken = require('../src/attributes'),
|
|
5
|
+
HtmlToken = require('../src/html');
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* 解析HTML标签
|
|
9
|
+
* @param {string} wikitext wikitext
|
|
10
|
+
* @param {accum} accum
|
|
11
|
+
*/
|
|
12
|
+
const parseHtml = (wikitext, config = Parser.getConfig(), accum = []) => {
|
|
13
|
+
const regex = /^(\/?)([a-z][^\s/>]*)((?:\s|\/(?!>))[^>]*?)?(\/?>)([^<]*)$/iu,
|
|
14
|
+
elements = new Set(config.html.flat()),
|
|
15
|
+
bits = wikitext.split('<');
|
|
16
|
+
let text = bits.shift();
|
|
17
|
+
for (const x of bits) {
|
|
18
|
+
const mt = regex.exec(x),
|
|
19
|
+
t = mt?.[2],
|
|
20
|
+
name = t?.toLowerCase();
|
|
21
|
+
if (!mt || !elements.has(name)) {
|
|
22
|
+
text += `<${x}`;
|
|
23
|
+
continue;
|
|
24
|
+
}
|
|
25
|
+
const [, slash,, params = '', brace, rest] = mt,
|
|
26
|
+
attr = new AttributesToken(params, 'html-attrs', name, config, accum),
|
|
27
|
+
itemprop = attr.getAttr('itemprop');
|
|
28
|
+
if (name === 'meta' && (itemprop === undefined || attr.getAttr('content') === undefined)
|
|
29
|
+
|| name === 'link' && (itemprop === undefined || attr.getAttr('href') === undefined)
|
|
30
|
+
) {
|
|
31
|
+
text += `<${x}`;
|
|
32
|
+
accum.pop();
|
|
33
|
+
continue;
|
|
34
|
+
}
|
|
35
|
+
text += `\0${accum.length}x\x7F${rest}`;
|
|
36
|
+
new HtmlToken(t, attr, slash === '/', brace === '/>', config, accum);
|
|
37
|
+
}
|
|
38
|
+
return text;
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
module.exports = parseHtml;
|
package/parser/links.js
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const Parser = require('..'),
|
|
4
|
+
LinkToken = require('../src/link'),
|
|
5
|
+
FileToken = require('../src/link/file'),
|
|
6
|
+
CategoryToken = require('../src/link/category');
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* 解析内部链接
|
|
10
|
+
* @param {string} wikitext wikitext
|
|
11
|
+
* @param {accum} accum
|
|
12
|
+
*/
|
|
13
|
+
const parseLinks = (wikitext, config = Parser.getConfig(), accum = []) => {
|
|
14
|
+
const parseQuotes = require('./quotes.js');
|
|
15
|
+
const regex = /^((?:(?!\0\d+!\x7F)[^\n<>[\]{}|])+)(?:(\||\0\d+!\x7F)(.*?[^\]]))?\]\](.*)$/su,
|
|
16
|
+
regexImg = /^((?:(?!\0\d+!\x7F)[^\n<>[\]{}|])+)(\||\0\d+!\x7F)(.*)$/su,
|
|
17
|
+
regexExt = new RegExp(`^\\s*(?:${config.protocol})`, 'iu'),
|
|
18
|
+
bits = wikitext.split('[[');
|
|
19
|
+
let s = bits.shift();
|
|
20
|
+
for (let i = 0; i < bits.length; i++) {
|
|
21
|
+
let mightBeImg, link, delimiter, text, after;
|
|
22
|
+
const x = bits[i],
|
|
23
|
+
m = regex.exec(x);
|
|
24
|
+
if (m) {
|
|
25
|
+
[, link, delimiter, text, after] = m;
|
|
26
|
+
if (after[0] === ']' && text?.includes('[')) {
|
|
27
|
+
text += ']';
|
|
28
|
+
after = after.slice(1);
|
|
29
|
+
}
|
|
30
|
+
} else {
|
|
31
|
+
const m2 = regexImg.exec(x);
|
|
32
|
+
if (m2) {
|
|
33
|
+
mightBeImg = true;
|
|
34
|
+
[, link, delimiter, text] = m2;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
if (link === undefined || regexExt.test(link) || /\0\d+[exhbru]\x7F/u.test(link)) {
|
|
38
|
+
s += `[[${x}`;
|
|
39
|
+
continue;
|
|
40
|
+
}
|
|
41
|
+
const force = link.trim()[0] === ':';
|
|
42
|
+
if (force && mightBeImg) {
|
|
43
|
+
s += `[[${x}`;
|
|
44
|
+
continue;
|
|
45
|
+
}
|
|
46
|
+
const title = Parser.normalizeTitle(link, 0, false, config, true, true, true),
|
|
47
|
+
{ns, valid} = title;
|
|
48
|
+
if (!valid) {
|
|
49
|
+
s += `[[${x}`;
|
|
50
|
+
continue;
|
|
51
|
+
} else if (mightBeImg) {
|
|
52
|
+
if (ns !== 6) {
|
|
53
|
+
s += `[[${x}`;
|
|
54
|
+
continue;
|
|
55
|
+
}
|
|
56
|
+
let found;
|
|
57
|
+
for (i++; i < bits.length; i++) {
|
|
58
|
+
const next = bits[i],
|
|
59
|
+
p = next.split(']]');
|
|
60
|
+
if (p.length > 2) {
|
|
61
|
+
found = true;
|
|
62
|
+
text += `[[${p[0]}]]${p[1]}`;
|
|
63
|
+
after = p.slice(2).join(']]');
|
|
64
|
+
break;
|
|
65
|
+
} else if (p.length === 2) {
|
|
66
|
+
text += `[[${p[0]}]]${p[1]}`;
|
|
67
|
+
} else {
|
|
68
|
+
text += `[[${next}`;
|
|
69
|
+
break;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
text = parseLinks(text, config, accum);
|
|
73
|
+
if (!found) {
|
|
74
|
+
s += `[[${link}${delimiter}${text}`;
|
|
75
|
+
continue;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
text &&= parseQuotes(text, config, accum);
|
|
79
|
+
s += `\0${accum.length}l\x7F${after}`;
|
|
80
|
+
let SomeLinkToken = LinkToken;
|
|
81
|
+
if (!force) {
|
|
82
|
+
if (ns === 6) {
|
|
83
|
+
SomeLinkToken = FileToken;
|
|
84
|
+
} else if (ns === 14) {
|
|
85
|
+
SomeLinkToken = CategoryToken;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
new SomeLinkToken(link, text, title, config, accum, delimiter);
|
|
89
|
+
}
|
|
90
|
+
return s;
|
|
91
|
+
};
|
|
92
|
+
|
|
93
|
+
module.exports = parseLinks;
|
package/parser/list.js
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const Parser = require('..'),
|
|
4
|
+
ListToken = require('../src/nowiki/list'),
|
|
5
|
+
DdToken = require('../src/nowiki/dd');
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* 解析列表
|
|
9
|
+
* @param {string} text wikitext
|
|
10
|
+
* @param {accum} accum
|
|
11
|
+
*/
|
|
12
|
+
const parseList = (text, config = Parser.getConfig(), accum = []) => {
|
|
13
|
+
const mt = /^((?:\0\d+c\x7F)*)([;:*#]+)/u.exec(text);
|
|
14
|
+
if (!mt) {
|
|
15
|
+
return text;
|
|
16
|
+
}
|
|
17
|
+
const [total, comment, prefix] = mt;
|
|
18
|
+
text = `${comment}\0${accum.length}d\x7F${text.slice(total.length)}`;
|
|
19
|
+
new ListToken(prefix, config, accum);
|
|
20
|
+
let dt = prefix.split(';').length - 1;
|
|
21
|
+
if (!dt) {
|
|
22
|
+
return text;
|
|
23
|
+
}
|
|
24
|
+
let regex = /:+|-\{/gu,
|
|
25
|
+
ex = regex.exec(text),
|
|
26
|
+
lc = 0;
|
|
27
|
+
while (ex && dt) {
|
|
28
|
+
const {0: syntax, index} = ex;
|
|
29
|
+
if (syntax[0] === ':') {
|
|
30
|
+
if (syntax.length >= dt) {
|
|
31
|
+
new DdToken(':'.repeat(dt), config, accum);
|
|
32
|
+
return `${text.slice(0, index)}\0${accum.length - 1}d\x7F${text.slice(index + dt)}`;
|
|
33
|
+
}
|
|
34
|
+
text = `${text.slice(0, index)}\0${accum.length}d\x7F${text.slice(regex.lastIndex)}`;
|
|
35
|
+
dt -= syntax.length;
|
|
36
|
+
regex.lastIndex = index + 4 + String(accum.length).length;
|
|
37
|
+
new DdToken(syntax, config, accum);
|
|
38
|
+
} else if (syntax === '-{') {
|
|
39
|
+
if (!lc) {
|
|
40
|
+
const {lastIndex} = regex;
|
|
41
|
+
regex = /-\{|\}-/gu;
|
|
42
|
+
regex.lastIndex = lastIndex;
|
|
43
|
+
}
|
|
44
|
+
lc++;
|
|
45
|
+
} else {
|
|
46
|
+
lc--;
|
|
47
|
+
if (!lc) {
|
|
48
|
+
const {lastIndex} = regex;
|
|
49
|
+
regex = /:+|-\{/gu;
|
|
50
|
+
regex.lastIndex = lastIndex;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
ex = regex.exec(text);
|
|
54
|
+
}
|
|
55
|
+
return text;
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
module.exports = parseList;
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const {extUrlChar, extUrlCharFirst} = require('../util/string'),
|
|
4
|
+
Parser = require('..'),
|
|
5
|
+
MagicLinkToken = require('../src/magicLink');
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* 解析自由外链
|
|
9
|
+
* @param {string} wikitext wikitext
|
|
10
|
+
* @param {accum} accum
|
|
11
|
+
*/
|
|
12
|
+
const parseMagicLinks = (wikitext, config = Parser.getConfig(), accum = []) => {
|
|
13
|
+
const regex = new RegExp(`(^|[^\\p{L}\\d_])(?:${config.protocol})(${extUrlCharFirst}${extUrlChar})`, 'giu');
|
|
14
|
+
return wikitext.replace(regex, /** @param {string} p1 */ (m, lead, p1) => {
|
|
15
|
+
let trail = '',
|
|
16
|
+
url = lead ? m.slice(1) : m;
|
|
17
|
+
const m2 = /&(?:lt|gt|nbsp|#x0*(?:3[ce]|a0)|#0*(?:6[02]|160));/iu.exec(url);
|
|
18
|
+
if (m2) {
|
|
19
|
+
trail = url.slice(m2.index);
|
|
20
|
+
url = url.slice(0, m2.index);
|
|
21
|
+
}
|
|
22
|
+
const sep = new RegExp(`[,;.:!?${url.includes('(') ? '' : ')'}]+$`, 'u'),
|
|
23
|
+
sepChars = sep.exec(url);
|
|
24
|
+
if (sepChars) {
|
|
25
|
+
let correction = 0;
|
|
26
|
+
if (sepChars[0][0] === ';' && /&(?:[a-z]+|#x[\da-f]+|#\d+)$/iu.test(url.slice(0, sepChars.index))) {
|
|
27
|
+
correction = 1;
|
|
28
|
+
}
|
|
29
|
+
trail = `${url.slice(sepChars.index + correction)}${trail}`;
|
|
30
|
+
url = url.slice(0, sepChars.index + correction);
|
|
31
|
+
}
|
|
32
|
+
if (trail.length >= p1.length) {
|
|
33
|
+
return m;
|
|
34
|
+
}
|
|
35
|
+
new MagicLinkToken(url, false, config, accum);
|
|
36
|
+
return `${lead}\0${accum.length - 1}w\x7F${trail}`;
|
|
37
|
+
});
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
module.exports = parseMagicLinks;
|
package/parser/quotes.js
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const Parser = require('..'),
|
|
4
|
+
QuoteToken = require('../src/nowiki/quote');
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* 解析单引号
|
|
8
|
+
* @param {string} text wikitext
|
|
9
|
+
* @param {accum} accum
|
|
10
|
+
*/
|
|
11
|
+
const parseQuotes = (text, config = Parser.getConfig(), accum = []) => {
|
|
12
|
+
const arr = text.split(/('{2,})/u),
|
|
13
|
+
{length} = arr;
|
|
14
|
+
if (length === 1) {
|
|
15
|
+
return text;
|
|
16
|
+
}
|
|
17
|
+
let nBold = 0,
|
|
18
|
+
nItalic = 0,
|
|
19
|
+
firstSingle, firstMulti, firstSpace;
|
|
20
|
+
for (let i = 1; i < length; i += 2) {
|
|
21
|
+
const {length: len} = arr[i];
|
|
22
|
+
switch (len) {
|
|
23
|
+
case 2:
|
|
24
|
+
nItalic++;
|
|
25
|
+
break;
|
|
26
|
+
case 4:
|
|
27
|
+
arr[i - 1] += `'`;
|
|
28
|
+
arr[i] = `'''`;
|
|
29
|
+
// fall through
|
|
30
|
+
case 3:
|
|
31
|
+
nBold++;
|
|
32
|
+
if (firstSingle) {
|
|
33
|
+
break;
|
|
34
|
+
} else if (arr[i - 1].endsWith(' ')) {
|
|
35
|
+
if (!firstMulti && !firstSpace) {
|
|
36
|
+
firstSpace = i;
|
|
37
|
+
}
|
|
38
|
+
} else if (arr[i - 1].at(-2) === ' ') {
|
|
39
|
+
firstSingle = i;
|
|
40
|
+
} else {
|
|
41
|
+
firstMulti ||= i;
|
|
42
|
+
}
|
|
43
|
+
break;
|
|
44
|
+
default:
|
|
45
|
+
arr[i - 1] += `'`.repeat(len - 5);
|
|
46
|
+
arr[i] = `'''''`;
|
|
47
|
+
nItalic++;
|
|
48
|
+
nBold++;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
if (nItalic % 2 === 1 && nBold % 2 === 1) {
|
|
52
|
+
const i = firstSingle ?? firstMulti ?? firstSpace;
|
|
53
|
+
arr[i] = `''`;
|
|
54
|
+
arr[i - 1] += `'`;
|
|
55
|
+
}
|
|
56
|
+
for (let i = 1; i < length; i += 2) {
|
|
57
|
+
new QuoteToken(arr[i].length, config, accum);
|
|
58
|
+
arr[i] = `\0${accum.length - 1}q\x7F`;
|
|
59
|
+
}
|
|
60
|
+
return arr.join('');
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
module.exports = parseQuotes;
|