wikiparser-node 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +229 -0
- package/LICENSE +674 -0
- package/README.md +1896 -0
- package/config/default.json +766 -0
- package/config/llwiki.json +686 -0
- package/config/moegirl.json +721 -0
- package/index.js +159 -0
- package/jsconfig.json +7 -0
- package/lib/element.js +690 -0
- package/lib/node.js +357 -0
- package/lib/ranges.js +122 -0
- package/lib/title.js +57 -0
- package/mixin/attributeParent.js +67 -0
- package/mixin/fixedToken.js +32 -0
- package/mixin/hidden.js +22 -0
- package/package.json +30 -0
- package/parser/brackets.js +107 -0
- package/parser/commentAndExt.js +61 -0
- package/parser/externalLinks.js +30 -0
- package/parser/hrAndDoubleUnderscore.js +26 -0
- package/parser/html.js +41 -0
- package/parser/links.js +92 -0
- package/parser/magicLinks.js +40 -0
- package/parser/quotes.js +63 -0
- package/parser/table.js +97 -0
- package/src/arg.js +150 -0
- package/src/atom/hidden.js +10 -0
- package/src/atom/index.js +33 -0
- package/src/attribute.js +342 -0
- package/src/extLink.js +116 -0
- package/src/heading.js +91 -0
- package/src/html.js +144 -0
- package/src/imageParameter.js +172 -0
- package/src/index.js +602 -0
- package/src/link/category.js +88 -0
- package/src/link/file.js +201 -0
- package/src/link/index.js +214 -0
- package/src/listToken.js +47 -0
- package/src/magicLink.js +66 -0
- package/src/nowiki/comment.js +45 -0
- package/src/nowiki/doubleUnderscore.js +42 -0
- package/src/nowiki/hr.js +41 -0
- package/src/nowiki/index.js +37 -0
- package/src/nowiki/noinclude.js +24 -0
- package/src/nowiki/quote.js +37 -0
- package/src/onlyinclude.js +42 -0
- package/src/parameter.js +165 -0
- package/src/syntax.js +80 -0
- package/src/table/index.js +867 -0
- package/src/table/td.js +259 -0
- package/src/table/tr.js +244 -0
- package/src/tagPair/ext.js +85 -0
- package/src/tagPair/include.js +45 -0
- package/src/tagPair/index.js +91 -0
- package/src/transclude.js +627 -0
- package/tool/index.js +898 -0
- package/typings/element.d.ts +28 -0
- package/typings/index.d.ts +49 -0
- package/typings/node.d.ts +23 -0
- package/typings/parser.d.ts +9 -0
- package/typings/table.d.ts +14 -0
- package/typings/token.d.ts +21 -0
- package/typings/tool.d.ts +10 -0
- package/util/debug.js +70 -0
- package/util/string.js +60 -0
package/src/index.js
ADDED
|
@@ -0,0 +1,602 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/*
|
|
4
|
+
* PHP解析器的步骤:
|
|
5
|
+
* -1. 替换签名和`{{subst:}}`,参见Parser::preSaveTransform;这在revision中不可能保留,可以跳过
|
|
6
|
+
* 0. 移除特定字符`\x00`和`\x7f`,参见Parser::parse
|
|
7
|
+
* 1. 注释/扩展标签('<'相关),参见Preprocessor_Hash::buildDomTreeArrayFromText和Sanitizer::decodeTagAttributes
|
|
8
|
+
* 2. 模板/模板变量/标题,注意rightmost法则,以及`-{`和`[[`可以破坏`{{`或`{{{`语法,
|
|
9
|
+
* 参见Preprocessor_Hash::buildDomTreeArrayFromText
|
|
10
|
+
* 3. HTML标签(允许不匹配),参见Sanitizer::internalRemoveHtmlTags
|
|
11
|
+
* 4. 表格,参见Parser::handleTables
|
|
12
|
+
* 5. 水平线和状态开关,参见Parser::internalParse
|
|
13
|
+
* 6. 内链,含文件和分类,参见Parser::handleInternalLinks2
|
|
14
|
+
* 7. `'`,参见Parser::doQuotes
|
|
15
|
+
* 8. 外链,参见Parser::handleExternalLinks
|
|
16
|
+
* 9. ISBN、RFC(未来将废弃,不予支持)和自由外链,参见Parser::handleMagicLinks
|
|
17
|
+
* 10. 段落和列表,参见BlockLevelPass::execute
|
|
18
|
+
* 11. 转换,参见LanguageConverter::recursiveConvertTopLevel
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
/*
|
|
22
|
+
* \x00\d+.\x7f标记Token:
|
|
23
|
+
* e: ExtToken
|
|
24
|
+
* c: CommentToken、NoIncludeToken和IncludeToken
|
|
25
|
+
* !: `{{!}}`专用
|
|
26
|
+
* {: `{{(!}}`专用
|
|
27
|
+
* }: `{{!)}}`专用
|
|
28
|
+
* -: `{{!-}}`专用
|
|
29
|
+
* +: `{{!!}}`专用
|
|
30
|
+
* ~: `{{=}}`专用
|
|
31
|
+
* t: ArgToken或TranscludeToken
|
|
32
|
+
* h: HeadingToken
|
|
33
|
+
* x: HtmlToken
|
|
34
|
+
* b: TableToken
|
|
35
|
+
* r: HrToken
|
|
36
|
+
* u: DoubleUnderscoreToken
|
|
37
|
+
* l: LinkToken
|
|
38
|
+
* q: QuoteToken
|
|
39
|
+
* w: ExtLinkToken
|
|
40
|
+
*/
|
|
41
|
+
|
|
42
|
+
const {typeError, externalUse} = require('../util/debug'),
|
|
43
|
+
Ranges = require('../lib/ranges'),
|
|
44
|
+
AstElement = require('../lib/element'),
|
|
45
|
+
assert = require('assert/strict'),
|
|
46
|
+
/** @type {Parser} */ Parser = require('..'),
|
|
47
|
+
{MAX_STAGE} = Parser;
|
|
48
|
+
|
|
49
|
+
class Token extends AstElement {
|
|
50
|
+
type = 'root';
|
|
51
|
+
/** 解析阶段,参见顶部注释。只对plain Token有意义。 */ #stage = 0;
|
|
52
|
+
/** @type {ParserConfig} */ #config;
|
|
53
|
+
/**
|
|
54
|
+
* 这个数组起两个作用:1. 数组中的Token会在build时替换`/\x00\d+.\x7f/`标记;2. 数组中的Token会依次执行parseOnce和build方法。
|
|
55
|
+
* @type {accum}
|
|
56
|
+
*/
|
|
57
|
+
#accum;
|
|
58
|
+
/** @type {Record<string, Ranges>} */ #acceptable;
|
|
59
|
+
#protectedChildren = new Ranges();
|
|
60
|
+
/** @type {boolean} */ #include;
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* @param {?string} wikitext
|
|
64
|
+
* @param {accum} accum
|
|
65
|
+
* @param {acceptable} acceptable
|
|
66
|
+
*/
|
|
67
|
+
constructor(wikitext, config = Parser.getConfig(), halfParsed = false, accum = [], acceptable = null) {
|
|
68
|
+
super();
|
|
69
|
+
if (typeof wikitext === 'string') {
|
|
70
|
+
this.appendChild(halfParsed ? wikitext : wikitext.replace(/[\x00\x7f]/g, ''));
|
|
71
|
+
}
|
|
72
|
+
this.setAttribute('config', config).setAttribute('accum', accum).setAttribute('acceptable', acceptable);
|
|
73
|
+
accum.push(this);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/** @complexity `n` */
|
|
77
|
+
cloneChildren() {
|
|
78
|
+
if (!Parser.debugging && externalUse('cloneChildren')) {
|
|
79
|
+
this.debugOnly('cloneChildren');
|
|
80
|
+
}
|
|
81
|
+
return this.childNodes.map(child => typeof child === 'string' ? child : child.cloneNode());
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/** @complexity `n` */
|
|
85
|
+
cloneNode() {
|
|
86
|
+
if (!this.isPlain()) {
|
|
87
|
+
throw new Error(`未定义 ${this.constructor.name} 的复制方法!`);
|
|
88
|
+
}
|
|
89
|
+
const cloned = this.cloneChildren();
|
|
90
|
+
return Parser.run(() => {
|
|
91
|
+
const token = new Token(undefined, this.#config, false, [], this.#acceptable);
|
|
92
|
+
token.type = this.type;
|
|
93
|
+
token.append(...cloned);
|
|
94
|
+
token.protectChildren(...this.#protectedChildren);
|
|
95
|
+
return token;
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* @template {string} T
|
|
101
|
+
* @param {T} key
|
|
102
|
+
* @returns {TokenAttribute<T>}
|
|
103
|
+
*/
|
|
104
|
+
getAttribute(key) {
|
|
105
|
+
switch (key) {
|
|
106
|
+
case 'stage':
|
|
107
|
+
return this.#stage;
|
|
108
|
+
case 'config':
|
|
109
|
+
return structuredClone(this.#config);
|
|
110
|
+
case 'accum':
|
|
111
|
+
return this.#accum;
|
|
112
|
+
case 'acceptable':
|
|
113
|
+
return this.#acceptable ? {...this.#acceptable} : null;
|
|
114
|
+
case 'protectedChildren':
|
|
115
|
+
return new Ranges(this.#protectedChildren);
|
|
116
|
+
case 'include': {
|
|
117
|
+
if (this.#include !== undefined) {
|
|
118
|
+
return this.#include;
|
|
119
|
+
}
|
|
120
|
+
const includeToken = this.querySelector('include');
|
|
121
|
+
if (includeToken) {
|
|
122
|
+
return includeToken.name === 'noinclude';
|
|
123
|
+
}
|
|
124
|
+
const noincludeToken = this.querySelector('noinclude');
|
|
125
|
+
return Boolean(noincludeToken) && !/^<\/?noinclude(?:\s[^>]*)?\/?>$/i.test(noincludeToken.toString());
|
|
126
|
+
}
|
|
127
|
+
default:
|
|
128
|
+
return super.getAttribute(key);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* @template {string} T
|
|
134
|
+
* @param {T} key
|
|
135
|
+
* @param {TokenAttribute<T>} value
|
|
136
|
+
*/
|
|
137
|
+
setAttribute(key, value) {
|
|
138
|
+
if (key === 'include' || !Parser.running && ['config', 'accum'].includes(key)) {
|
|
139
|
+
throw new RangeError(`禁止手动指定私有的 #${key} 属性!`);
|
|
140
|
+
} else if (!Parser.debugging && ['stage', 'acceptable', 'protectedChildren'].includes(key)
|
|
141
|
+
&& externalUse('setAttribute')
|
|
142
|
+
) {
|
|
143
|
+
throw new RangeError(`使用 ${this.constructor.name}.setAttribute 方法设置私有属性 #${key} 仅用于代码调试!`);
|
|
144
|
+
}
|
|
145
|
+
switch (key) {
|
|
146
|
+
case 'stage':
|
|
147
|
+
if (this.#stage === 0 && this.type === 'root') {
|
|
148
|
+
this.#accum.shift();
|
|
149
|
+
}
|
|
150
|
+
this.#stage = value;
|
|
151
|
+
return this;
|
|
152
|
+
case 'config':
|
|
153
|
+
this.#config = value;
|
|
154
|
+
return this;
|
|
155
|
+
case 'accum':
|
|
156
|
+
this.#accum = value;
|
|
157
|
+
return this;
|
|
158
|
+
case 'protectedChildren':
|
|
159
|
+
this.#protectedChildren = value;
|
|
160
|
+
return this;
|
|
161
|
+
case 'acceptable': {
|
|
162
|
+
const /** @type {acceptable} */ acceptable = {};
|
|
163
|
+
if (value) {
|
|
164
|
+
for (const [k, v] of Object.entries(value)) {
|
|
165
|
+
if (k.startsWith('Stage-')) {
|
|
166
|
+
for (let i = 0; i <= Number(k.slice(6)); i++) {
|
|
167
|
+
for (const type of Parser.aliases[i]) {
|
|
168
|
+
acceptable[type] = new Ranges(v);
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
} else if (k.startsWith('!')) { // `!`项必须放在最后
|
|
172
|
+
delete acceptable[k.slice(1)];
|
|
173
|
+
} else {
|
|
174
|
+
acceptable[k] = new Ranges(v);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
this.#acceptable = value && acceptable;
|
|
179
|
+
return this;
|
|
180
|
+
}
|
|
181
|
+
default:
|
|
182
|
+
return super.setAttribute(key, value);
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
isPlain() {
|
|
187
|
+
return this.constructor === Token;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/** @param {...string|number|Range} args */
|
|
191
|
+
protectChildren(...args) {
|
|
192
|
+
if (!Parser.debugging && externalUse('protectChildren')) {
|
|
193
|
+
this.debugOnly('protectChildren');
|
|
194
|
+
}
|
|
195
|
+
this.#protectedChildren.push(...new Ranges(args));
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
/**
|
|
199
|
+
* @param {number} i
|
|
200
|
+
* @returns {string|Token}
|
|
201
|
+
* @complexity `n`
|
|
202
|
+
*/
|
|
203
|
+
removeAt(i) {
|
|
204
|
+
if (!Parser.running) {
|
|
205
|
+
const protectedIndices = this.#protectedChildren.applyTo(this.childNodes);
|
|
206
|
+
if (protectedIndices.includes(i)) {
|
|
207
|
+
throw new Error(`${this.constructor.name} 的第 ${i} 个子节点不可移除!`);
|
|
208
|
+
} else if (this.#acceptable) {
|
|
209
|
+
const acceptableIndices = Object.fromEntries(
|
|
210
|
+
Object.entries(this.#acceptable)
|
|
211
|
+
.map(([str, ranges]) => [str, ranges.applyTo(this.childNodes.length - 1)]),
|
|
212
|
+
),
|
|
213
|
+
nodesAfter = i === -1 ? [] : this.childNodes.slice(i + 1);
|
|
214
|
+
if (nodesAfter.some(({constructor: {name}}, j) => !acceptableIndices[name].includes(i + j))) {
|
|
215
|
+
throw new Error(`移除 ${this.constructor.name} 的第 ${i} 个子节点会破坏规定的顺序!`);
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
return super.removeAt(i);
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
/**
|
|
223
|
+
* @template {string|Token} T
|
|
224
|
+
* @param {T} token
|
|
225
|
+
* @complexity `n`
|
|
226
|
+
*/
|
|
227
|
+
insertAt(token, i = this.childNodes.length) {
|
|
228
|
+
if (!Parser.running && this.#acceptable) {
|
|
229
|
+
const acceptableIndices = Object.fromEntries(
|
|
230
|
+
Object.entries(this.#acceptable)
|
|
231
|
+
.map(([str, ranges]) => [str, ranges.applyTo(this.childNodes.length + 1)]),
|
|
232
|
+
),
|
|
233
|
+
nodesAfter = this.childNodes.slice(i),
|
|
234
|
+
insertedName = token.constructor.name,
|
|
235
|
+
k = i < 0 ? i + this.childNodes.length : i;
|
|
236
|
+
if (!acceptableIndices[insertedName].includes(k)) {
|
|
237
|
+
throw new RangeError(`${this.constructor.name} 的第 ${k} 个子节点不能为 ${insertedName}!`);
|
|
238
|
+
} else if (nodesAfter.some(({constructor: {name}}, j) => !acceptableIndices[name].includes(k + j + 1))) {
|
|
239
|
+
throw new Error(`${this.constructor.name} 插入新的第 ${k} 个子节点会破坏规定的顺序!`);
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
super.insertAt(token, i);
|
|
243
|
+
if (token instanceof Token && token.type === 'root') {
|
|
244
|
+
token.type = 'plain';
|
|
245
|
+
}
|
|
246
|
+
return token;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/**
|
|
250
|
+
* @param {Token} token
|
|
251
|
+
* @complexity `n`
|
|
252
|
+
*/
|
|
253
|
+
safeReplaceWith(token) {
|
|
254
|
+
const {parentNode} = this;
|
|
255
|
+
if (!parentNode) {
|
|
256
|
+
throw new Error('不存在父节点!');
|
|
257
|
+
} else if (token.constructor !== this.constructor) {
|
|
258
|
+
typeError(this, 'safeReplaceWith', this.constructor.name);
|
|
259
|
+
}
|
|
260
|
+
try {
|
|
261
|
+
assert.deepEqual(token.getAttribute('acceptable'), this.#acceptable);
|
|
262
|
+
} catch (e) {
|
|
263
|
+
if (e instanceof assert.AssertionError) {
|
|
264
|
+
throw new Error(`待替换的 ${this.constructor.name} 带有不同的 #acceptable 属性!`);
|
|
265
|
+
}
|
|
266
|
+
throw e;
|
|
267
|
+
}
|
|
268
|
+
const i = parentNode.childNodes.indexOf(this);
|
|
269
|
+
super.removeAt.call(parentNode, i);
|
|
270
|
+
super.insertAt.call(parentNode, token, i);
|
|
271
|
+
if (token.type === 'root') {
|
|
272
|
+
token.type = 'plain';
|
|
273
|
+
}
|
|
274
|
+
const e = new Event('replace', {bubbles: true});
|
|
275
|
+
token.dispatchEvent(e, {position: i, oldToken: this, newToken: token});
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
/** @param {string} title */
|
|
279
|
+
isInterwiki(title) {
|
|
280
|
+
return Parser.isInterwiki(title, this.#config);
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
/** @param {string} title */
|
|
284
|
+
normalizeTitle(title, defaultNs = 0, halfParsed = false) {
|
|
285
|
+
return Parser.normalizeTitle(title, defaultNs, this.#include, this.#config, halfParsed);
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
/** @complexity `n` */
|
|
289
|
+
sections() {
|
|
290
|
+
if (this.type !== 'root') {
|
|
291
|
+
return;
|
|
292
|
+
}
|
|
293
|
+
const {childNodes} = this,
|
|
294
|
+
headings = [...childNodes.entries()]
|
|
295
|
+
.filter(([, child]) => child instanceof Token && child.type === 'heading')
|
|
296
|
+
.map(/** @param {[number, Token]} */ ([i, {name}]) => [i, Number(name)]),
|
|
297
|
+
lastHeading = [-1, -1, -1, -1, -1, -1];
|
|
298
|
+
const /** @type {(string|Token)[][]} */ sections = new Array(headings.length);
|
|
299
|
+
for (const [i, [index, level]] of headings.entries()) {
|
|
300
|
+
for (let j = level; j < 6; j++) {
|
|
301
|
+
const last = lastHeading[j];
|
|
302
|
+
if (last >= 0) {
|
|
303
|
+
sections[last] = childNodes.slice(headings[last][0], index);
|
|
304
|
+
}
|
|
305
|
+
lastHeading[j] = j === level ? i : -1;
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
for (const last of lastHeading) {
|
|
309
|
+
if (last >= 0) {
|
|
310
|
+
sections[last] = childNodes.slice(headings[last][0]);
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
sections.unshift(childNodes.slice(0, headings[0]?.[0]));
|
|
314
|
+
return sections;
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
/**
|
|
318
|
+
* @param {number} n
|
|
319
|
+
* @complexity `n`
|
|
320
|
+
*/
|
|
321
|
+
section(n) {
|
|
322
|
+
if (typeof n !== 'number') {
|
|
323
|
+
typeError(this, 'section', 'Number');
|
|
324
|
+
}
|
|
325
|
+
return this.sections()[n];
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
/**
|
|
329
|
+
* @param {string|undefined} tag
|
|
330
|
+
* @returns {[Token, Token]}
|
|
331
|
+
* @complexity `n`
|
|
332
|
+
*/
|
|
333
|
+
findEnclosingHtml(tag) {
|
|
334
|
+
if (tag !== undefined && typeof tag !== 'string') {
|
|
335
|
+
typeError(this, 'findEnclosingHtml', 'String');
|
|
336
|
+
}
|
|
337
|
+
tag = tag?.toLowerCase();
|
|
338
|
+
if (tag !== undefined && !this.#config.html.slice(0, 2).flat().includes(tag)) {
|
|
339
|
+
throw new RangeError(`非法的标签或空标签:${tag}`);
|
|
340
|
+
}
|
|
341
|
+
const {parentElement} = this;
|
|
342
|
+
if (!parentElement) {
|
|
343
|
+
return;
|
|
344
|
+
}
|
|
345
|
+
const {children} = parentElement,
|
|
346
|
+
index = children.indexOf(this);
|
|
347
|
+
let i;
|
|
348
|
+
for (i = index - 1; i >= 0; i--) {
|
|
349
|
+
if (children[i].matches(`html${tag && '#'}${tag ?? ''}[selfClosing=false][closing=false]`)) {
|
|
350
|
+
break;
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
if (i === -1) {
|
|
354
|
+
return parentElement.findEnclosingHtml(tag);
|
|
355
|
+
}
|
|
356
|
+
const opening = children[i],
|
|
357
|
+
{name} = opening;
|
|
358
|
+
for (i = index + 1; i < children.length; i++) {
|
|
359
|
+
if (children[i].matches(`html#${name}[selfClosing=false][closing=true]`)) {
|
|
360
|
+
break;
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
return i === children.length
|
|
364
|
+
? parentElement.findEnclosingHtml(tag)
|
|
365
|
+
: [opening, children[i]];
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
/** @complexity `n` */
|
|
369
|
+
getCategories() {
|
|
370
|
+
return this.querySelectorAll('category').map(({name, sortkey}) => [name, sortkey]);
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
redoQuotes() {
|
|
374
|
+
const acceptable = this.getAttribute('acceptable');
|
|
375
|
+
if (acceptable && !acceptable.QuoteToken?.some(
|
|
376
|
+
range => typeof range !== 'number' && range.start === 0 && range.end === Infinity && range.step === 1,
|
|
377
|
+
)) {
|
|
378
|
+
throw new Error(`${this.constructor.name} 不接受 QuoteToken 作为子节点!`);
|
|
379
|
+
}
|
|
380
|
+
for (const quote of this.childNodes) {
|
|
381
|
+
if (quote instanceof Token && quote.type === 'quote') {
|
|
382
|
+
quote.replaceWith(quote.firstChild);
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
this.normalize();
|
|
386
|
+
/** @type {[number, string][]} */
|
|
387
|
+
const textNodes = [...this.childNodes.entries()].filter(([, child]) => typeof child === 'string'),
|
|
388
|
+
indices = textNodes.map(([i]) => this.getRelativeIndex(i)),
|
|
389
|
+
token = Parser.run(() => {
|
|
390
|
+
const root = new Token(textNodes.map(([, str]) => str).join(''), this.getAttribute('config'));
|
|
391
|
+
return root.setAttribute('stage', 6).parse(7);
|
|
392
|
+
});
|
|
393
|
+
for (const quote of token.children.reverse()) {
|
|
394
|
+
if (quote.type === 'quote') {
|
|
395
|
+
const index = quote.getRelativeIndex(),
|
|
396
|
+
n = indices.findLastIndex(textIndex => textIndex <= index);
|
|
397
|
+
this.splitText(n, index - indices[n]);
|
|
398
|
+
this.splitText(n + 1, Number(quote.name));
|
|
399
|
+
this.removeAt(n + 1);
|
|
400
|
+
this.insertAt(quote, n + 1);
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
this.normalize();
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
/**
|
|
407
|
+
* 将维基语法替换为占位符
|
|
408
|
+
* @this {Token & {firstChild: string}}
|
|
409
|
+
*/
|
|
410
|
+
parseOnce(n = this.#stage, include = false) {
|
|
411
|
+
if (!Parser.debugging && externalUse('parseOnce')) {
|
|
412
|
+
this.debugOnly('parseOnce');
|
|
413
|
+
} else if (n < this.#stage || !this.isPlain() || this.childNodes.length === 0) {
|
|
414
|
+
return this;
|
|
415
|
+
}
|
|
416
|
+
switch (n) {
|
|
417
|
+
case 0:
|
|
418
|
+
if (this.type === 'root') {
|
|
419
|
+
this.#accum.shift();
|
|
420
|
+
}
|
|
421
|
+
this.#parseCommentAndExt(include);
|
|
422
|
+
break;
|
|
423
|
+
case 1:
|
|
424
|
+
this.#parseBrackets();
|
|
425
|
+
break;
|
|
426
|
+
case 2:
|
|
427
|
+
this.#parseHtml();
|
|
428
|
+
break;
|
|
429
|
+
case 3:
|
|
430
|
+
this.#parseTable();
|
|
431
|
+
break;
|
|
432
|
+
case 4:
|
|
433
|
+
this.#parseHrAndDoubleUndescore();
|
|
434
|
+
break;
|
|
435
|
+
case 5:
|
|
436
|
+
this.#parseLinks();
|
|
437
|
+
break;
|
|
438
|
+
case 6: {
|
|
439
|
+
const lines = this.firstChild.split('\n');
|
|
440
|
+
for (let i = 0; i < lines.length; i++) {
|
|
441
|
+
lines[i] = this.#parseQuotes(lines[i]);
|
|
442
|
+
}
|
|
443
|
+
this.setText(lines.join('\n'));
|
|
444
|
+
break;
|
|
445
|
+
}
|
|
446
|
+
case 7:
|
|
447
|
+
this.#parseExternalLinks();
|
|
448
|
+
break;
|
|
449
|
+
case 8:
|
|
450
|
+
this.#parseMagicLinks();
|
|
451
|
+
break;
|
|
452
|
+
case 9:
|
|
453
|
+
break;
|
|
454
|
+
case 10:
|
|
455
|
+
// no default
|
|
456
|
+
}
|
|
457
|
+
if (this.type === 'root') {
|
|
458
|
+
for (const token of this.#accum) {
|
|
459
|
+
token.parseOnce(n, include);
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
this.#stage++;
|
|
463
|
+
return this;
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
/**
|
|
467
|
+
* @param {string} str
|
|
468
|
+
* @complexity `n`
|
|
469
|
+
*/
|
|
470
|
+
buildFromStr(str) {
|
|
471
|
+
if (!Parser.debugging && externalUse('buildFromStr')) {
|
|
472
|
+
this.debugOnly('buildFromStr');
|
|
473
|
+
}
|
|
474
|
+
return str.split(/[\x00\x7f]/).map((s, i) => {
|
|
475
|
+
if (i % 2 === 0) {
|
|
476
|
+
return s;
|
|
477
|
+
} else if (!isNaN(s.at(-1))) {
|
|
478
|
+
throw new Error(`解析错误!未正确标记的 Token:${s}`);
|
|
479
|
+
}
|
|
480
|
+
return this.#accum[Number(s.slice(0, -1))];
|
|
481
|
+
});
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
/**
|
|
485
|
+
* 将占位符替换为子Token
|
|
486
|
+
* @complexity `n`
|
|
487
|
+
*/
|
|
488
|
+
build() {
|
|
489
|
+
if (!Parser.debugging && externalUse('build')) {
|
|
490
|
+
this.debugOnly('build');
|
|
491
|
+
}
|
|
492
|
+
this.#stage = MAX_STAGE;
|
|
493
|
+
const {childNodes: {length}, firstChild} = this;
|
|
494
|
+
if (length !== 1 || typeof firstChild !== 'string' || !firstChild.includes('\x00')) {
|
|
495
|
+
return this;
|
|
496
|
+
}
|
|
497
|
+
this.replaceChildren(...this.buildFromStr(firstChild));
|
|
498
|
+
this.normalize();
|
|
499
|
+
if (this.type === 'root') {
|
|
500
|
+
for (const token of this.#accum) {
|
|
501
|
+
token.build();
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
return this;
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
/** 生成部分Token的`name`属性 */
|
|
508
|
+
afterBuild() {
|
|
509
|
+
if (!Parser.debugging && externalUse('afterBuild')) {
|
|
510
|
+
this.debugOnly('afterBuild');
|
|
511
|
+
} else if (this.type === 'root') {
|
|
512
|
+
for (const token of this.#accum) {
|
|
513
|
+
token.afterBuild();
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
return this;
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
/** 解析、重构、生成部分Token的`name`属性 */
|
|
520
|
+
parse(n = MAX_STAGE, include = false) {
|
|
521
|
+
if (typeof n !== 'number') {
|
|
522
|
+
typeError(this, 'parse', 'Number');
|
|
523
|
+
} else if (n < MAX_STAGE && !Parser.debugging && externalUse('parse')) {
|
|
524
|
+
Parser.warn('指定解析层级的方法仅供熟练用户使用!');
|
|
525
|
+
}
|
|
526
|
+
this.#include = Boolean(include);
|
|
527
|
+
while (this.#stage < n) {
|
|
528
|
+
this.parseOnce(this.#stage, include);
|
|
529
|
+
}
|
|
530
|
+
return n ? this.build().afterBuild() : this;
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
/** @this {Token & {firstChild: string}} */
|
|
534
|
+
#parseCommentAndExt(includeOnly = false) {
|
|
535
|
+
const parseCommentAndExt = require('../parser/commentAndExt');
|
|
536
|
+
this.setText(parseCommentAndExt(this.firstChild, this.#config, this.#accum, includeOnly));
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
/** @this {Token & {firstChild: string}} */
|
|
540
|
+
#parseBrackets() {
|
|
541
|
+
const parseBrackets = require('../parser/brackets');
|
|
542
|
+
this.setText(parseBrackets(this.firstChild, this.#config, this.#accum));
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
/** @this {Token & {firstChild: string}} */
|
|
546
|
+
#parseHtml() {
|
|
547
|
+
const parseHtml = require('../parser/html');
|
|
548
|
+
this.setText(parseHtml(this.firstChild, this.#config, this.#accum));
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
/** @this {Token & {firstChild: string}} */
|
|
552
|
+
#parseTable() {
|
|
553
|
+
const parseTable = require('../parser/table'),
|
|
554
|
+
TableToken = require('./table');
|
|
555
|
+
this.setText(parseTable(this, this.#config, this.#accum));
|
|
556
|
+
for (const table of this.#accum) {
|
|
557
|
+
if (table instanceof TableToken && table.type !== 'td') {
|
|
558
|
+
table.normalize();
|
|
559
|
+
const [, child] = table.childNodes;
|
|
560
|
+
if (typeof child === 'string' && child.includes('\x00')) {
|
|
561
|
+
table.removeAt(1);
|
|
562
|
+
const inner = new Token(child, this.#config, true, this.#accum);
|
|
563
|
+
table.insertAt(inner, 1);
|
|
564
|
+
inner.setAttribute('stage', 4);
|
|
565
|
+
}
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
/** @this {Token & {firstChild: string}} */
|
|
571
|
+
#parseHrAndDoubleUndescore() {
|
|
572
|
+
const parseHrAndDoubleUnderscore = require('../parser/hrAndDoubleUnderscore');
|
|
573
|
+
this.setText(parseHrAndDoubleUnderscore(this.firstChild, this.#config, this.#accum));
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
/** @this {Token & {firstChild: string}} */
|
|
577
|
+
#parseLinks() {
|
|
578
|
+
const parseLinks = require('../parser/links');
|
|
579
|
+
this.setText(parseLinks(this.firstChild, this.#config, this.#accum));
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
/** @param {string} text */
|
|
583
|
+
#parseQuotes(text) {
|
|
584
|
+
const parseQuotes = require('../parser/quotes');
|
|
585
|
+
return parseQuotes(text, this.#config, this.#accum);
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
/** @this {Token & {firstChild: string}} */
|
|
589
|
+
#parseExternalLinks() {
|
|
590
|
+
const parseExternalLinks = require('../parser/externalLinks');
|
|
591
|
+
this.setText(parseExternalLinks(this.firstChild, this.#config, this.#accum));
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
/** @this {Token & {firstChild: string}} */
|
|
595
|
+
#parseMagicLinks() {
|
|
596
|
+
const parseMagicLinks = require('../parser/magicLinks');
|
|
597
|
+
this.setText(parseMagicLinks(this.firstChild, this.#config, this.#accum));
|
|
598
|
+
}
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
Parser.classes.Token = __filename;
|
|
602
|
+
module.exports = Token;
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const /** @type {Parser} */ Parser = require('../..'),
|
|
4
|
+
LinkToken = require('.'),
|
|
5
|
+
Token = require('..'); // eslint-disable-line no-unused-vars
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* 分类
|
|
9
|
+
* @classdesc `{childNodes: [AtomToken, ?Token]}`
|
|
10
|
+
*/
|
|
11
|
+
class CategoryToken extends LinkToken {
|
|
12
|
+
type = 'category';
|
|
13
|
+
sortkey = '';
|
|
14
|
+
|
|
15
|
+
setFragment = undefined;
|
|
16
|
+
asSelfLink = undefined;
|
|
17
|
+
pipeTrick = undefined;
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* @param {string} link
|
|
21
|
+
* @param {string|undefined} text
|
|
22
|
+
* @param {Title} title
|
|
23
|
+
* @param {accum} accum
|
|
24
|
+
*/
|
|
25
|
+
constructor(link, text, title, config = Parser.getConfig(), accum = []) {
|
|
26
|
+
super(link, text, title, config, accum);
|
|
27
|
+
this.seal(['sortkey', 'setFragment', 'asSelfLink', 'pipeTrick']);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
afterBuild() {
|
|
31
|
+
super.afterBuild();
|
|
32
|
+
this.#updateSortkey();
|
|
33
|
+
const that = this;
|
|
34
|
+
const /** @type {AstListener} */ categoryListener = ({prevTarget}) => {
|
|
35
|
+
if (prevTarget?.type === 'link-text') {
|
|
36
|
+
that.#updateSortkey();
|
|
37
|
+
}
|
|
38
|
+
};
|
|
39
|
+
this.addEventListener(['remove', 'insert', 'replace', 'text'], categoryListener);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
#updateSortkey() {
|
|
43
|
+
this.setAttribute('sortkey', this.children[1]?.text()
|
|
44
|
+
?.replace(/&#(\d+);/g, /** @param {string} p1 */ (_, p1) => String.fromCharCode(Number(p1)))
|
|
45
|
+
?.replace(/&#x([\da-f]+);/gi, /** @param {string} p1 */ (_, p1) => String.fromCharCode(parseInt(p1, 16)))
|
|
46
|
+
?.replaceAll('\n', '') ?? '',
|
|
47
|
+
);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/** @param {number} i */
|
|
51
|
+
removeAt(i) {
|
|
52
|
+
if (i === 1) {
|
|
53
|
+
this.setAttribute('sortkey', '');
|
|
54
|
+
}
|
|
55
|
+
return super.removeAt(i);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* @template {string|Token} T
|
|
60
|
+
* @param {T} token
|
|
61
|
+
* @param {number} i
|
|
62
|
+
*/
|
|
63
|
+
insertAt(token, i) {
|
|
64
|
+
super.insertAt(token, i);
|
|
65
|
+
if (i === 1 && !Parser.running) {
|
|
66
|
+
this.#updateSortkey();
|
|
67
|
+
}
|
|
68
|
+
return token;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/** @returns {string} */
|
|
72
|
+
text() {
|
|
73
|
+
return `[[${this.firstElementChild.text()}]]`;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/** @returns {[number, string][]} */
|
|
77
|
+
plain() {
|
|
78
|
+
return [];
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/** @param {string} text */
|
|
82
|
+
setSortkey(text) {
|
|
83
|
+
this.setLinkText(text);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
Parser.classes.CategoryToken = __filename;
|
|
88
|
+
module.exports = CategoryToken;
|