wikiparser-node 0.7.1-b → 0.8.0-m
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/default.json +832 -0
- package/config/llwiki.json +630 -0
- package/config/minimum.json +142 -0
- package/config/moegirl.json +728 -0
- package/config/zhwiki.json +1269 -0
- package/index.js +79 -0
- package/lib/element.js +137 -0
- package/lib/node.js +226 -0
- package/lib/text.js +123 -0
- package/lib/title.js +60 -0
- package/mixin/hidden.js +18 -0
- package/package.json +9 -11
- package/parser/brackets.js +119 -0
- package/parser/commentAndExt.js +61 -0
- package/parser/converter.js +45 -0
- package/parser/externalLinks.js +32 -0
- package/parser/hrAndDoubleUnderscore.js +37 -0
- package/parser/html.js +41 -0
- package/parser/links.js +93 -0
- package/parser/list.js +58 -0
- package/parser/magicLinks.js +40 -0
- package/parser/quotes.js +63 -0
- package/parser/table.js +113 -0
- package/src/arg.js +89 -0
- package/src/atom/hidden.js +11 -0
- package/src/atom/index.js +26 -0
- package/src/attribute.js +277 -0
- package/src/attributes.js +150 -0
- package/src/converter.js +70 -0
- package/src/converterFlags.js +97 -0
- package/src/converterRule.js +75 -0
- package/src/extLink.js +60 -0
- package/src/gallery.js +101 -0
- package/src/hasNowiki/index.js +32 -0
- package/src/hasNowiki/pre.js +28 -0
- package/src/heading.js +83 -0
- package/src/html.js +133 -0
- package/src/imageParameter.js +106 -0
- package/src/imagemap.js +140 -0
- package/src/imagemapLink.js +29 -0
- package/src/index.js +407 -0
- package/src/link/category.js +13 -0
- package/src/link/file.js +125 -0
- package/src/link/galleryImage.js +62 -0
- package/src/link/index.js +125 -0
- package/src/magicLink.js +68 -0
- package/src/nested/choose.js +23 -0
- package/src/nested/combobox.js +22 -0
- package/src/nested/index.js +69 -0
- package/src/nested/references.js +22 -0
- package/src/nowiki/comment.js +47 -0
- package/src/nowiki/dd.js +13 -0
- package/src/nowiki/doubleUnderscore.js +26 -0
- package/src/nowiki/hr.js +22 -0
- package/src/nowiki/index.js +34 -0
- package/src/nowiki/list.js +13 -0
- package/src/nowiki/noinclude.js +14 -0
- package/src/nowiki/quote.js +55 -0
- package/src/onlyinclude.js +39 -0
- package/src/paramTag/index.js +66 -0
- package/src/paramTag/inputbox.js +32 -0
- package/src/parameter.js +97 -0
- package/src/syntax.js +23 -0
- package/src/table/index.js +46 -0
- package/src/table/td.js +119 -0
- package/src/table/tr.js +74 -0
- package/src/tagPair/ext.js +121 -0
- package/src/tagPair/include.js +26 -0
- package/src/tagPair/index.js +77 -0
- package/src/transclude.js +323 -0
- package/util/base.js +17 -0
- package/util/diff.js +76 -0
- package/util/lint.js +54 -0
- package/util/string.js +60 -0
- package/bundle/bundle.min.js +0 -40
- package/extensions/editor.css +0 -60
- package/extensions/editor.js +0 -324
- package/extensions/ui.css +0 -119
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const {extUrlChar, extUrlCharFirst} = require('../util/string'),
|
|
4
|
+
Parser = require('..'),
|
|
5
|
+
Token = require('.');
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* 图片参数
|
|
9
|
+
* @classdesc `{childNodes: ...(AstText|Token)}`
|
|
10
|
+
*/
|
|
11
|
+
class ImageParameterToken extends Token {
|
|
12
|
+
/**
|
|
13
|
+
* 检查图片参数是否合法
|
|
14
|
+
* @template {string} T
|
|
15
|
+
* @param {T} key 参数名
|
|
16
|
+
* @param {string} value 参数值
|
|
17
|
+
*/
|
|
18
|
+
static #validate(key, value, config = Parser.getConfig(), halfParsed = false) {
|
|
19
|
+
value = value.replace(/\0\d+t\x7F/gu, '').trim();
|
|
20
|
+
switch (key) {
|
|
21
|
+
case 'width':
|
|
22
|
+
return /^\d*(?:x\d*)?$/u.test(value);
|
|
23
|
+
case 'link': {
|
|
24
|
+
if (!value) {
|
|
25
|
+
return true;
|
|
26
|
+
}
|
|
27
|
+
const regex = new RegExp(`(?:(?:${config.protocol}|//)${extUrlCharFirst}|\0\\d+m\x7F)${
|
|
28
|
+
extUrlChar
|
|
29
|
+
}(?=\0\\d+t\x7F|$)`, 'iu');
|
|
30
|
+
if (regex.test(value)) {
|
|
31
|
+
return true;
|
|
32
|
+
} else if (value.startsWith('[[') && value.endsWith(']]')) {
|
|
33
|
+
value = value.slice(2, -2);
|
|
34
|
+
}
|
|
35
|
+
const title = Parser.normalizeTitle(value, 0, false, config, halfParsed, true, true);
|
|
36
|
+
return title.valid;
|
|
37
|
+
}
|
|
38
|
+
case 'lang':
|
|
39
|
+
return config.variants.includes(value);
|
|
40
|
+
case 'alt':
|
|
41
|
+
case 'class':
|
|
42
|
+
case 'manualthumb':
|
|
43
|
+
return true;
|
|
44
|
+
default:
|
|
45
|
+
return !isNaN(value);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
type = 'image-parameter';
|
|
50
|
+
#syntax = '';
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* @param {string} str 图片参数
|
|
54
|
+
* @param {accum} accum
|
|
55
|
+
*/
|
|
56
|
+
constructor(str, config = Parser.getConfig(), accum = []) {
|
|
57
|
+
const regexes = Object.entries(config.img).map(
|
|
58
|
+
/** @returns {[string, string, RegExp]} */
|
|
59
|
+
([syntax, param]) => [syntax, param, new RegExp(`^(\\s*)${syntax.replace('$1', '(.*)')}(\\s*)$`, 'u')],
|
|
60
|
+
),
|
|
61
|
+
param = regexes.find(([,, regex]) => regex.test(str));
|
|
62
|
+
if (param) {
|
|
63
|
+
const mt = param[2].exec(str);
|
|
64
|
+
if (mt.length !== 4 || ImageParameterToken.#validate(param[1], mt[2], config, true)) {
|
|
65
|
+
if (mt.length === 3) {
|
|
66
|
+
super(undefined, config, true, accum);
|
|
67
|
+
this.#syntax = str;
|
|
68
|
+
} else {
|
|
69
|
+
super(mt[2], config, true, accum, {
|
|
70
|
+
});
|
|
71
|
+
this.#syntax = `${mt[1]}${param[0]}${mt[3]}`;
|
|
72
|
+
}
|
|
73
|
+
this.setAttribute('name', param[1]);
|
|
74
|
+
return;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
super(str, {...config, excludes: [...config.excludes, 'list']}, true, accum);
|
|
78
|
+
this.setAttribute('name', 'caption').setAttribute('stage', 7);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/** @override */
|
|
82
|
+
isPlain() {
|
|
83
|
+
return this.name === 'caption';
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* @override
|
|
88
|
+
*/
|
|
89
|
+
toString(selector) {
|
|
90
|
+
return this.#syntax
|
|
91
|
+
? this.#syntax.replace('$1', super.toString(selector))
|
|
92
|
+
: super.toString(selector);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/** @override */
|
|
96
|
+
text() {
|
|
97
|
+
return this.#syntax ? this.#syntax.replace('$1', super.text()).trim() : super.text().trim();
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/** @override */
|
|
101
|
+
getPadding() {
|
|
102
|
+
return Math.max(0, this.#syntax.indexOf('$1'));
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
module.exports = ImageParameterToken;
|
package/src/imagemap.js
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const {generateForSelf, generateForChild} = require('../util/lint'),
|
|
4
|
+
Parser = require('..'),
|
|
5
|
+
Token = require('.'),
|
|
6
|
+
NoincludeToken = require('./nowiki/noinclude'),
|
|
7
|
+
GalleryImageToken = require('./link/galleryImage'),
|
|
8
|
+
ImagemapLinkToken = require('./imagemapLink');
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* `<imagemap>`
|
|
12
|
+
* @classdesc `{childNodes: ...NoincludeToken, GalleryImageToken, ...(NoincludeToken|ImagemapLinkToken|AstText)}`
|
|
13
|
+
*/
|
|
14
|
+
class ImagemapToken extends Token {
|
|
15
|
+
type = 'ext-inner';
|
|
16
|
+
name = 'imagemap';
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* 图片
|
|
20
|
+
* @returns {GalleryImageToken}
|
|
21
|
+
*/
|
|
22
|
+
get image() {
|
|
23
|
+
return this.childNodes.find(({type}) => type === 'imagemap-image');
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* @param {string} inner 标签内部wikitext
|
|
28
|
+
* @param {accum} accum
|
|
29
|
+
* @throws `SyntaxError` 没有合法图片
|
|
30
|
+
*/
|
|
31
|
+
constructor(inner, config = Parser.getConfig(), accum = []) {
|
|
32
|
+
super(undefined, config, true, accum, {
|
|
33
|
+
});
|
|
34
|
+
if (!inner) {
|
|
35
|
+
return;
|
|
36
|
+
}
|
|
37
|
+
const lines = inner.split('\n'),
|
|
38
|
+
protocols = new Set(config.protocol.split('|')),
|
|
39
|
+
SingleLineNoincludeToken = NoincludeToken,
|
|
40
|
+
fallback = /** @param {string} line 一行文本 */ line => {
|
|
41
|
+
super.insertAt(new SingleLineNoincludeToken(line, config, accum));
|
|
42
|
+
};
|
|
43
|
+
let first = true,
|
|
44
|
+
error = false;
|
|
45
|
+
for (const line of lines) {
|
|
46
|
+
const trimmed = line.trim();
|
|
47
|
+
if (error || !trimmed || trimmed[0] === '#') {
|
|
48
|
+
//
|
|
49
|
+
} else if (first) {
|
|
50
|
+
const [file, ...options] = line.split('|'),
|
|
51
|
+
title = this.normalizeTitle(file, 0, true);
|
|
52
|
+
if (title.valid && !title.interwiki && title.ns === 6) {
|
|
53
|
+
const token = new GalleryImageToken(
|
|
54
|
+
file, options.length > 0 ? options.join('|') : undefined, title, config, accum,
|
|
55
|
+
);
|
|
56
|
+
token.type = 'imagemap-image';
|
|
57
|
+
super.insertAt(token);
|
|
58
|
+
first = false;
|
|
59
|
+
continue;
|
|
60
|
+
} else {
|
|
61
|
+
error = true;
|
|
62
|
+
}
|
|
63
|
+
} else if (line.trim().split(/[\t ]/u)[0] === 'desc') {
|
|
64
|
+
super.insertAt(line);
|
|
65
|
+
continue;
|
|
66
|
+
} else if (line.includes('[')) {
|
|
67
|
+
const i = line.indexOf('['),
|
|
68
|
+
substr = line.slice(i),
|
|
69
|
+
mtIn = /^\[{2}([^|]+)(?:\|([^\]]+))?\]{2}[\w\s]*$/u.exec(substr);
|
|
70
|
+
if (mtIn) {
|
|
71
|
+
const title = this.normalizeTitle(mtIn[1], 0, true, false, true);
|
|
72
|
+
if (title.valid) {
|
|
73
|
+
super.insertAt(new ImagemapLinkToken(
|
|
74
|
+
line.slice(0, i),
|
|
75
|
+
[...mtIn.slice(1), title],
|
|
76
|
+
substr.slice(substr.indexOf(']]') + 2),
|
|
77
|
+
config,
|
|
78
|
+
accum,
|
|
79
|
+
));
|
|
80
|
+
continue;
|
|
81
|
+
}
|
|
82
|
+
} else if (protocols.has(substr.slice(1, substr.indexOf(':') + 1))
|
|
83
|
+
|| protocols.has(substr.slice(1, substr.indexOf('//') + 2))
|
|
84
|
+
) {
|
|
85
|
+
const mtEx = /^\[([^\]\s]+)(?:(\s+)(\S[^\]]*)?)?\][\w\s]*$/u.exec(substr);
|
|
86
|
+
if (mtEx) {
|
|
87
|
+
super.insertAt(new ImagemapLinkToken(
|
|
88
|
+
line.slice(0, i),
|
|
89
|
+
mtEx.slice(1),
|
|
90
|
+
substr.slice(substr.indexOf(']') + 1),
|
|
91
|
+
config,
|
|
92
|
+
accum,
|
|
93
|
+
));
|
|
94
|
+
continue;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
fallback(line);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* @override
|
|
104
|
+
*/
|
|
105
|
+
toString(selector) {
|
|
106
|
+
return super.toString(selector, '\n');
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/** @override */
|
|
110
|
+
text() {
|
|
111
|
+
return super.text('\n').replace(/\n{2,}/gu, '\n');
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/** @override */
|
|
115
|
+
getGaps() {
|
|
116
|
+
return 1;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* @override
|
|
121
|
+
* @param {number} start 起始位置
|
|
122
|
+
*/
|
|
123
|
+
lint(start = 0) {
|
|
124
|
+
const errors = super.lint(start),
|
|
125
|
+
rect = {start, ...this.getRootNode().posFromIndex(start)};
|
|
126
|
+
if (this.image) {
|
|
127
|
+
errors.push(
|
|
128
|
+
...this.childNodes.filter(child => {
|
|
129
|
+
const str = String(child).trim();
|
|
130
|
+
return child.type === 'noinclude' && str && str[0] !== '#';
|
|
131
|
+
}).map(child => generateForChild(child, rect, '无效的<imagemap>链接')),
|
|
132
|
+
);
|
|
133
|
+
} else {
|
|
134
|
+
errors.push(generateForSelf(this, rect, '缺少图片的<imagemap>'));
|
|
135
|
+
}
|
|
136
|
+
return errors;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
module.exports = ImagemapToken;
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const Title = require('../lib/title'),
|
|
4
|
+
Token = require('.'),
|
|
5
|
+
NoincludeToken = require('./nowiki/noinclude'),
|
|
6
|
+
LinkToken = require('./link'),
|
|
7
|
+
ExtLinkToken = require('./extLink');
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* `<imagemap>`内的链接
|
|
11
|
+
* @classdesc `{childNodes: [AstText, LinkToken|ExtLinkToken, NoincludeToken]}`
|
|
12
|
+
*/
|
|
13
|
+
class ImagemapLinkToken extends Token {
|
|
14
|
+
type = 'imagemap-link';
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* @param {string} pre 链接前的文本
|
|
18
|
+
* @param {[string, string, string|Title]} linkStuff 内外链接
|
|
19
|
+
* @param {string} post 链接后的文本
|
|
20
|
+
* @param {accum} accum
|
|
21
|
+
*/
|
|
22
|
+
constructor(pre, linkStuff, post, config, accum) {
|
|
23
|
+
const SomeLinkToken = linkStuff[2] instanceof Title ? LinkToken : ExtLinkToken;
|
|
24
|
+
super(undefined, config, true, accum);
|
|
25
|
+
this.append(pre, new SomeLinkToken(...linkStuff, config, accum), new NoincludeToken(post, config, accum));
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
module.exports = ImagemapLinkToken;
|
package/src/index.js
ADDED
|
@@ -0,0 +1,407 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/*
|
|
4
|
+
* PHP解析器的步骤:
|
|
5
|
+
* -1. 替换签名和`{{subst:}}`,参见Parser::preSaveTransform;这在revision中不可能保留,可以跳过
|
|
6
|
+
* 0. 移除特定字符`\0`和`\x7F`,参见Parser::parse
|
|
7
|
+
* 1. 注释/扩展标签('<'相关),参见Preprocessor_Hash::buildDomTreeArrayFromText和Sanitizer::decodeTagAttributes
|
|
8
|
+
* 2. 模板/模板变量/标题,注意rightmost法则,以及`-{`和`[[`可以破坏`{{`或`{{{`语法,
|
|
9
|
+
* 参见Preprocessor_Hash::buildDomTreeArrayFromText
|
|
10
|
+
* 3. HTML标签(允许不匹配),参见Sanitizer::internalRemoveHtmlTags
|
|
11
|
+
* 4. 表格,参见Parser::handleTables
|
|
12
|
+
* 5. 水平线和状态开关,参见Parser::internalParse
|
|
13
|
+
* 6. 内链,含文件和分类,参见Parser::handleInternalLinks2
|
|
14
|
+
* 7. `'`,参见Parser::doQuotes
|
|
15
|
+
* 8. 外链,参见Parser::handleExternalLinks
|
|
16
|
+
* 9. ISBN、RFC(未来将废弃,不予支持)和自由外链,参见Parser::handleMagicLinks
|
|
17
|
+
* 10. 段落和列表,参见BlockLevelPass::execute
|
|
18
|
+
* 11. 转换,参见LanguageConverter::recursiveConvertTopLevel
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
/*
|
|
22
|
+
* \0\d+.\x7F标记Token:
|
|
23
|
+
* e: ExtToken
|
|
24
|
+
* a: AttributeToken
|
|
25
|
+
* c: CommentToken、NoIncludeToken和IncludeToken
|
|
26
|
+
* !: `{{!}}`专用
|
|
27
|
+
* {: `{{(!}}`专用
|
|
28
|
+
* }: `{{!)}}`专用
|
|
29
|
+
* -: `{{!-}}`专用
|
|
30
|
+
* +: `{{!!}}`专用
|
|
31
|
+
* ~: `{{=}}`专用
|
|
32
|
+
* m: `{{fullurl:}}`、`{{canonicalurl:}}`或`{{filepath:}}`
|
|
33
|
+
* t: ArgToken或TranscludeToken
|
|
34
|
+
* h: HeadingToken
|
|
35
|
+
* x: HtmlToken
|
|
36
|
+
* b: TableToken
|
|
37
|
+
* r: HrToken
|
|
38
|
+
* u: DoubleUnderscoreToken
|
|
39
|
+
* l: LinkToken
|
|
40
|
+
* q: QuoteToken
|
|
41
|
+
* w: ExtLinkToken
|
|
42
|
+
* d: ListToken
|
|
43
|
+
* v: ConverterToken
|
|
44
|
+
*/
|
|
45
|
+
|
|
46
|
+
const {text} = require('../util/string'),
|
|
47
|
+
Parser = require('..'),
|
|
48
|
+
AstElement = require('../lib/element'),
|
|
49
|
+
AstText = require('../lib/text');
|
|
50
|
+
const {MAX_STAGE} = Parser;
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* 所有节点的基类
|
|
54
|
+
* @classdesc `{childNodes: ...(AstText|Token)}`
|
|
55
|
+
*/
|
|
56
|
+
class Token extends AstElement {
|
|
57
|
+
type = 'root';
|
|
58
|
+
#stage = 0; // 解析阶段,参见顶部注释。只对plain Token有意义。
|
|
59
|
+
#config;
|
|
60
|
+
// 这个数组起两个作用:1. 数组中的Token会在build时替换`/\0\d+.\x7F/`标记;2. 数组中的Token会依次执行parseOnce和build方法。
|
|
61
|
+
#accum;
|
|
62
|
+
/** @type {boolean} */ #include;
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* 将维基语法替换为占位符
|
|
66
|
+
* @param {number} n 解析阶段
|
|
67
|
+
* @param {boolean} include 是否嵌入
|
|
68
|
+
*/
|
|
69
|
+
#parseOnce = (n = this.#stage, include = false) => {
|
|
70
|
+
if (n < this.#stage || !this.isPlain() || this.length === 0) {
|
|
71
|
+
return this;
|
|
72
|
+
}
|
|
73
|
+
switch (n) {
|
|
74
|
+
case 0:
|
|
75
|
+
if (this.type === 'root') {
|
|
76
|
+
this.#accum.shift();
|
|
77
|
+
}
|
|
78
|
+
this.#include = Boolean(include);
|
|
79
|
+
this.#parseCommentAndExt(include);
|
|
80
|
+
break;
|
|
81
|
+
case 1:
|
|
82
|
+
this.#parseBrackets();
|
|
83
|
+
break;
|
|
84
|
+
case 2:
|
|
85
|
+
this.#parseHtml();
|
|
86
|
+
break;
|
|
87
|
+
case 3:
|
|
88
|
+
this.#parseTable();
|
|
89
|
+
break;
|
|
90
|
+
case 4:
|
|
91
|
+
this.#parseHrAndDoubleUndescore();
|
|
92
|
+
break;
|
|
93
|
+
case 5:
|
|
94
|
+
this.#parseLinks();
|
|
95
|
+
break;
|
|
96
|
+
case 6:
|
|
97
|
+
this.#parseQuotes();
|
|
98
|
+
break;
|
|
99
|
+
|
|
100
|
+
case 7:
|
|
101
|
+
this.#parseExternalLinks();
|
|
102
|
+
break;
|
|
103
|
+
case 8:
|
|
104
|
+
this.#parseMagicLinks();
|
|
105
|
+
break;
|
|
106
|
+
case 9:
|
|
107
|
+
this.#parseList();
|
|
108
|
+
break;
|
|
109
|
+
case 10:
|
|
110
|
+
this.#parseConverter();
|
|
111
|
+
// no default
|
|
112
|
+
}
|
|
113
|
+
if (this.type === 'root') {
|
|
114
|
+
for (const token of this.#accum) {
|
|
115
|
+
token.getAttribute('parseOnce')(n, include);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
this.#stage++;
|
|
119
|
+
return this;
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* 重建wikitext
|
|
124
|
+
* @template {string} T
|
|
125
|
+
* @param {string} str 半解析的字符串
|
|
126
|
+
* @param {T} type 返回类型
|
|
127
|
+
* @complexity `n`
|
|
128
|
+
* @returns {T extends 'string|text' ? string : (Token|AstText)[]}
|
|
129
|
+
*/
|
|
130
|
+
#buildFromStr = (str, type) => {
|
|
131
|
+
const nodes = str.split(/[\0\x7F]/u).map((s, i) => {
|
|
132
|
+
if (i % 2 === 0) {
|
|
133
|
+
return new AstText(s, this.#config);
|
|
134
|
+
} else if (isNaN(s.at(-1))) {
|
|
135
|
+
return this.#accum[Number(s.slice(0, -1))];
|
|
136
|
+
}
|
|
137
|
+
throw new Error(`解析错误!未正确标记的 Token:${s}`);
|
|
138
|
+
});
|
|
139
|
+
if (type === 'string') {
|
|
140
|
+
return nodes.map(String).join('');
|
|
141
|
+
} else if (type === 'text') {
|
|
142
|
+
return text(nodes);
|
|
143
|
+
}
|
|
144
|
+
return nodes;
|
|
145
|
+
};
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* 将占位符替换为子Token
|
|
149
|
+
* @complexity `n`
|
|
150
|
+
*/
|
|
151
|
+
#build = () => {
|
|
152
|
+
this.#stage = MAX_STAGE;
|
|
153
|
+
const {length, firstChild} = this,
|
|
154
|
+
str = String(firstChild);
|
|
155
|
+
if (length === 1 && firstChild.type === 'text' && str.includes('\0')) {
|
|
156
|
+
this.replaceChildren(...this.#buildFromStr(str));
|
|
157
|
+
this.normalize();
|
|
158
|
+
if (this.type === 'root') {
|
|
159
|
+
for (const token of this.#accum) {
|
|
160
|
+
token.getAttribute('build')();
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
};
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* @param {string} wikitext wikitext
|
|
168
|
+
* @param {accum} accum
|
|
169
|
+
*/
|
|
170
|
+
constructor(wikitext, config = Parser.getConfig(), halfParsed = false, accum = [], acceptable = undefined) {
|
|
171
|
+
super();
|
|
172
|
+
if (typeof wikitext === 'string') {
|
|
173
|
+
this.insertAt(halfParsed ? wikitext : wikitext.replace(/[\0\x7F]/gu, ''));
|
|
174
|
+
}
|
|
175
|
+
this.#config = config;
|
|
176
|
+
this.#accum = accum;
|
|
177
|
+
accum.push(this);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* @override
|
|
182
|
+
* @template {string} T
|
|
183
|
+
* @param {T} key 属性键
|
|
184
|
+
* @returns {TokenAttribute<T>}
|
|
185
|
+
*/
|
|
186
|
+
getAttribute(key) {
|
|
187
|
+
switch (key) {
|
|
188
|
+
case 'config':
|
|
189
|
+
return structuredClone(this.#config);
|
|
190
|
+
case 'accum':
|
|
191
|
+
return this.#accum;
|
|
192
|
+
case 'parseOnce':
|
|
193
|
+
return this.#parseOnce;
|
|
194
|
+
case 'buildFromStr':
|
|
195
|
+
return this.#buildFromStr;
|
|
196
|
+
case 'build':
|
|
197
|
+
return this.#build;
|
|
198
|
+
case 'include': {
|
|
199
|
+
if (this.#include !== undefined) {
|
|
200
|
+
return this.#include;
|
|
201
|
+
}
|
|
202
|
+
const root = this.getRootNode();
|
|
203
|
+
if (root.type === 'root' && root !== this) {
|
|
204
|
+
return root.getAttribute('include');
|
|
205
|
+
}
|
|
206
|
+
return false;
|
|
207
|
+
}
|
|
208
|
+
default:
|
|
209
|
+
return super.getAttribute(key);
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
/**
|
|
214
|
+
* @override
|
|
215
|
+
* @template {string} T
|
|
216
|
+
* @param {T} key 属性键
|
|
217
|
+
* @param {TokenAttribute<T>} value 属性值
|
|
218
|
+
*/
|
|
219
|
+
setAttribute(key, value) {
|
|
220
|
+
switch (key) {
|
|
221
|
+
case 'stage':
|
|
222
|
+
if (this.#stage === 0 && this.type === 'root') {
|
|
223
|
+
this.#accum.shift();
|
|
224
|
+
}
|
|
225
|
+
this.#stage = value;
|
|
226
|
+
return this;
|
|
227
|
+
default:
|
|
228
|
+
return super.setAttribute(key, value);
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
/** 是否是普通节点 */
|
|
233
|
+
isPlain() {
|
|
234
|
+
return this.constructor === Token;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
/**
|
|
238
|
+
* @override
|
|
239
|
+
* @template {string|Token} T
|
|
240
|
+
* @param {T} token 待插入的子节点
|
|
241
|
+
* @param {number} i 插入位置
|
|
242
|
+
* @complexity `n`
|
|
243
|
+
* @returns {T extends Token ? Token : AstText}
|
|
244
|
+
*/
|
|
245
|
+
insertAt(token, i = this.length) {
|
|
246
|
+
if (typeof token === 'string') {
|
|
247
|
+
token = new AstText(token, this.#config);
|
|
248
|
+
}
|
|
249
|
+
super.insertAt(token, i);
|
|
250
|
+
if (token.type === 'root') {
|
|
251
|
+
token.type = 'plain';
|
|
252
|
+
}
|
|
253
|
+
return token;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
/**
|
|
257
|
+
* 规范化页面标题
|
|
258
|
+
* @param {string} title 标题(含或不含命名空间前缀)
|
|
259
|
+
* @param {number} defaultNs 命名空间
|
|
260
|
+
* @param {boolean} decode 是否需要解码
|
|
261
|
+
* @param {boolean} selfLink 是否允许selfLink
|
|
262
|
+
*/
|
|
263
|
+
normalizeTitle(title, defaultNs = 0, halfParsed = false, decode = false, selfLink = false) {
|
|
264
|
+
return Parser.normalizeTitle(title, defaultNs, this.#include, this.#config, halfParsed, decode, selfLink);
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
/** 生成部分Token的`name`属性 */
|
|
268
|
+
afterBuild() {
|
|
269
|
+
if (this.type === 'root') {
|
|
270
|
+
for (const token of this.#accum) {
|
|
271
|
+
token.afterBuild();
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
/**
|
|
277
|
+
* 解析、重构、生成部分Token的`name`属性
|
|
278
|
+
* @param {number} n 最大解析层级
|
|
279
|
+
* @param {boolean} include 是否嵌入
|
|
280
|
+
*/
|
|
281
|
+
parse(n = MAX_STAGE, include = false) {
|
|
282
|
+
while (this.#stage < n) {
|
|
283
|
+
this.#parseOnce(this.#stage, include);
|
|
284
|
+
}
|
|
285
|
+
if (n) {
|
|
286
|
+
this.#build();
|
|
287
|
+
this.afterBuild();
|
|
288
|
+
}
|
|
289
|
+
return this;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
/**
|
|
293
|
+
* 解析HTML注释和扩展标签
|
|
294
|
+
* @param {boolean} includeOnly 是否嵌入
|
|
295
|
+
*/
|
|
296
|
+
#parseCommentAndExt(includeOnly) {
|
|
297
|
+
const parseCommentAndExt = require('../parser/commentAndExt');
|
|
298
|
+
this.setText(parseCommentAndExt(String(this.firstChild), this.#config, this.#accum, includeOnly));
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
/** 解析花括号 */
|
|
302
|
+
#parseBrackets() {
|
|
303
|
+
const parseBrackets = require('../parser/brackets');
|
|
304
|
+
const str = this.type === 'root' ? String(this.firstChild) : `\0${String(this.firstChild)}`,
|
|
305
|
+
parsed = parseBrackets(str, this.#config, this.#accum);
|
|
306
|
+
this.setText(this.type === 'root' ? parsed : parsed.slice(1));
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
/** 解析HTML标签 */
|
|
310
|
+
#parseHtml() {
|
|
311
|
+
if (this.#config.excludes.includes('html')) {
|
|
312
|
+
return;
|
|
313
|
+
}
|
|
314
|
+
const parseHtml = require('../parser/html');
|
|
315
|
+
this.setText(parseHtml(String(this.firstChild), this.#config, this.#accum));
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
/** 解析表格 */
|
|
319
|
+
#parseTable() {
|
|
320
|
+
if (this.#config.excludes.includes('table')) {
|
|
321
|
+
return;
|
|
322
|
+
}
|
|
323
|
+
const parseTable = require('../parser/table'),
|
|
324
|
+
TableToken = require('./table');
|
|
325
|
+
this.setText(parseTable(this, this.#config, this.#accum));
|
|
326
|
+
for (const table of this.#accum) {
|
|
327
|
+
if (table instanceof TableToken && table.type !== 'td') {
|
|
328
|
+
table.normalize();
|
|
329
|
+
const {childNodes: [, child]} = table;
|
|
330
|
+
if (typeof child === 'string' && child.includes('\0')) {
|
|
331
|
+
table.removeAt(1);
|
|
332
|
+
const inner = new Token(child, this.#config, true, this.#accum);
|
|
333
|
+
table.insertAt(inner, 1);
|
|
334
|
+
inner.setAttribute('stage', 4);
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
/** 解析\<hr\>和状态开关 */
|
|
341
|
+
#parseHrAndDoubleUndescore() {
|
|
342
|
+
if (this.#config.excludes.includes('hr')) {
|
|
343
|
+
return;
|
|
344
|
+
}
|
|
345
|
+
const parseHrAndDoubleUnderscore = require('../parser/hrAndDoubleUnderscore');
|
|
346
|
+
this.setText(parseHrAndDoubleUnderscore(this, this.#config, this.#accum));
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
/** 解析内部链接 */
|
|
350
|
+
#parseLinks() {
|
|
351
|
+
const parseLinks = require('../parser/links');
|
|
352
|
+
this.setText(parseLinks(String(this.firstChild), this.#config, this.#accum));
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
/** 解析单引号 */
|
|
356
|
+
#parseQuotes() {
|
|
357
|
+
if (this.#config.excludes.includes('quote')) {
|
|
358
|
+
return;
|
|
359
|
+
}
|
|
360
|
+
const parseQuotes = require('../parser/quotes');
|
|
361
|
+
const lines = String(this.firstChild).split('\n');
|
|
362
|
+
for (let i = 0; i < lines.length; i++) {
|
|
363
|
+
lines[i] = parseQuotes(lines[i], this.#config, this.#accum);
|
|
364
|
+
}
|
|
365
|
+
this.setText(lines.join('\n'));
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
/** 解析外部链接 */
|
|
369
|
+
#parseExternalLinks() {
|
|
370
|
+
if (this.#config.excludes.includes('extLink')) {
|
|
371
|
+
return;
|
|
372
|
+
}
|
|
373
|
+
const parseExternalLinks = require('../parser/externalLinks');
|
|
374
|
+
this.setText(parseExternalLinks(String(this.firstChild), this.#config, this.#accum));
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
/** 解析自由外链 */
|
|
378
|
+
#parseMagicLinks() {
|
|
379
|
+
if (this.#config.excludes.includes('magicLink')) {
|
|
380
|
+
return;
|
|
381
|
+
}
|
|
382
|
+
const parseMagicLinks = require('../parser/magicLinks');
|
|
383
|
+
this.setText(parseMagicLinks(String(this.firstChild), this.#config, this.#accum));
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
/** 解析列表 */
|
|
387
|
+
#parseList() {
|
|
388
|
+
if (this.#config.excludes.includes('list')) {
|
|
389
|
+
return;
|
|
390
|
+
}
|
|
391
|
+
const parseList = require('../parser/list');
|
|
392
|
+
const lines = String(this.firstChild).split('\n');
|
|
393
|
+
let i = this.type === 'root' || this.type === 'ext-inner' && this.type === 'poem' ? 0 : 1;
|
|
394
|
+
for (; i < lines.length; i++) {
|
|
395
|
+
lines[i] = parseList(lines[i], this.#config, this.#accum);
|
|
396
|
+
}
|
|
397
|
+
this.setText(lines.join('\n'));
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
/** 解析语言变体转换 */
|
|
401
|
+
#parseConverter() {
|
|
402
|
+
const parseConverter = require('../parser/converter');
|
|
403
|
+
this.setText(parseConverter(String(this.firstChild), this.#config, this.#accum));
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
module.exports = Token;
|