wikiparser-node 0.4.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/default.json +129 -66
- package/config/zhwiki.json +4 -4
- package/index.js +97 -65
- package/lib/element.js +159 -302
- package/lib/node.js +384 -198
- package/lib/ranges.js +3 -4
- package/lib/text.js +65 -36
- package/lib/title.js +9 -8
- package/mixin/fixedToken.js +4 -4
- package/mixin/hidden.js +2 -0
- package/mixin/sol.js +16 -7
- package/package.json +14 -3
- package/parser/brackets.js +8 -2
- package/parser/commentAndExt.js +1 -1
- package/parser/converter.js +1 -1
- package/parser/externalLinks.js +2 -2
- package/parser/hrAndDoubleUnderscore.js +8 -7
- package/parser/links.js +8 -9
- package/parser/magicLinks.js +1 -1
- package/parser/selector.js +5 -5
- package/parser/table.js +18 -16
- package/src/arg.js +71 -42
- package/src/atom/index.js +7 -5
- package/src/attribute.js +102 -64
- package/src/charinsert.js +91 -0
- package/src/converter.js +34 -15
- package/src/converterFlags.js +87 -40
- package/src/converterRule.js +59 -53
- package/src/extLink.js +45 -37
- package/src/gallery.js +71 -16
- package/src/hasNowiki/index.js +42 -0
- package/src/hasNowiki/pre.js +40 -0
- package/src/heading.js +41 -18
- package/src/html.js +76 -48
- package/src/imageParameter.js +73 -51
- package/src/imagemap.js +205 -0
- package/src/imagemapLink.js +43 -0
- package/src/index.js +243 -138
- package/src/link/category.js +10 -14
- package/src/link/file.js +112 -56
- package/src/link/galleryImage.js +74 -10
- package/src/link/index.js +86 -61
- package/src/magicLink.js +48 -21
- package/src/nested/choose.js +24 -0
- package/src/nested/combobox.js +23 -0
- package/src/nested/index.js +88 -0
- package/src/nested/references.js +23 -0
- package/src/nowiki/comment.js +18 -4
- package/src/nowiki/dd.js +2 -2
- package/src/nowiki/doubleUnderscore.js +16 -11
- package/src/nowiki/index.js +12 -0
- package/src/nowiki/quote.js +28 -1
- package/src/onlyinclude.js +15 -8
- package/src/paramTag/index.js +83 -0
- package/src/paramTag/inputbox.js +42 -0
- package/src/parameter.js +73 -46
- package/src/syntax.js +9 -1
- package/src/table/index.js +58 -44
- package/src/table/td.js +63 -63
- package/src/table/tr.js +52 -35
- package/src/tagPair/ext.js +60 -43
- package/src/tagPair/include.js +11 -1
- package/src/tagPair/index.js +29 -20
- package/src/transclude.js +214 -166
- package/tool/index.js +720 -439
- package/util/base.js +17 -0
- package/util/debug.js +1 -1
- package/{test/util.js → util/diff.js} +15 -19
- package/util/lint.js +40 -0
- package/util/string.js +37 -20
- package/.eslintrc.json +0 -714
- package/errors/README +0 -1
- package/jsconfig.json +0 -7
- package/printed/README +0 -1
- package/printed/example.json +0 -120
- package/test/api.js +0 -83
- package/test/real.js +0 -133
- package/test/test.js +0 -28
- package/typings/api.d.ts +0 -13
- package/typings/array.d.ts +0 -28
- package/typings/event.d.ts +0 -24
- package/typings/index.d.ts +0 -94
- package/typings/node.d.ts +0 -29
- package/typings/parser.d.ts +0 -16
- package/typings/table.d.ts +0 -14
- package/typings/token.d.ts +0 -22
- package/typings/tool.d.ts +0 -11
package/lib/ranges.js
CHANGED
|
@@ -62,12 +62,11 @@ class Range {
|
|
|
62
62
|
|
|
63
63
|
/**
|
|
64
64
|
* 将Range转换为针对特定数组的下标集
|
|
65
|
-
* @param {number
|
|
65
|
+
* @param {number|*[]} arr 参考数组
|
|
66
66
|
* @complexity `n`
|
|
67
67
|
*/
|
|
68
68
|
applyTo(arr) {
|
|
69
|
-
return new Array(typeof arr === 'number' ? arr : arr.length).fill().map((_, i) => i)
|
|
70
|
-
.slice(this.start, this.end)
|
|
69
|
+
return new Array(typeof arr === 'number' ? arr : arr.length).fill().map((_, i) => i).slice(this.start, this.end)
|
|
71
70
|
.filter((_, j) => j % this.step === 0);
|
|
72
71
|
}
|
|
73
72
|
}
|
|
@@ -100,7 +99,7 @@ class Ranges extends Array {
|
|
|
100
99
|
|
|
101
100
|
/**
|
|
102
101
|
* 将Ranges转换为针对特定Array的下标集
|
|
103
|
-
* @param {number
|
|
102
|
+
* @param {number|*[]} arr 参考数组
|
|
104
103
|
* @complexity `n`
|
|
105
104
|
*/
|
|
106
105
|
applyTo(arr) {
|
package/lib/text.js
CHANGED
|
@@ -23,6 +23,68 @@ class AstText extends AstNode {
|
|
|
23
23
|
});
|
|
24
24
|
}
|
|
25
25
|
|
|
26
|
+
/** 输出字符串 */
|
|
27
|
+
toString() {
|
|
28
|
+
return this.data;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* 修改内容
|
|
33
|
+
* @param {string} text 新内容
|
|
34
|
+
*/
|
|
35
|
+
#setData(text) {
|
|
36
|
+
text = String(text);
|
|
37
|
+
const {data} = this,
|
|
38
|
+
e = new Event('text', {bubbles: true});
|
|
39
|
+
this.setAttribute('data', text);
|
|
40
|
+
if (data !== text) {
|
|
41
|
+
this.dispatchEvent(e, {oldText: data, newText: text});
|
|
42
|
+
}
|
|
43
|
+
return this;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* 替换字符串
|
|
48
|
+
* @param {string} text 替换的字符串
|
|
49
|
+
*/
|
|
50
|
+
replaceData(text = '') {
|
|
51
|
+
this.#setData(text);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
static errorSyntax = /[{}]+|\[{2,}|\[(?!(?:(?!https?\b)[^[])*\])|(?<=^|\])([^[]*?)\]+|<(?=\s*\/?\w+[\s/>])/giu;
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Linter
|
|
58
|
+
* @param {number} start 起始位置
|
|
59
|
+
* @returns {LintError[]}
|
|
60
|
+
*/
|
|
61
|
+
lint(start = 0) {
|
|
62
|
+
const {data} = this,
|
|
63
|
+
errors = [...data.matchAll(AstText.errorSyntax)];
|
|
64
|
+
if (errors.length > 0) {
|
|
65
|
+
const {top, left} = this.getRootNode().posFromIndex(start);
|
|
66
|
+
return errors.map(({0: error, 1: prefix, index}) => {
|
|
67
|
+
if (prefix) {
|
|
68
|
+
index += prefix.length;
|
|
69
|
+
error = error.slice(prefix.length);
|
|
70
|
+
}
|
|
71
|
+
const lines = data.slice(0, index).split('\n'),
|
|
72
|
+
startLine = lines.length + top - 1,
|
|
73
|
+
{length} = lines.at(-1),
|
|
74
|
+
startCol = lines.length > 1 ? length : left + length;
|
|
75
|
+
return {
|
|
76
|
+
message: `孤立的"${error[0]}"`,
|
|
77
|
+
severity: error[0] === '{' || error[0] === '}' ? 'error' : 'warning',
|
|
78
|
+
startLine,
|
|
79
|
+
endLine: startLine,
|
|
80
|
+
startCol,
|
|
81
|
+
endCol: startCol + error.length,
|
|
82
|
+
};
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
return [];
|
|
86
|
+
}
|
|
87
|
+
|
|
26
88
|
/** 复制 */
|
|
27
89
|
cloneNode() {
|
|
28
90
|
return new AstText(this.data);
|
|
@@ -33,6 +95,7 @@ class AstText extends AstNode {
|
|
|
33
95
|
* @template {string} T
|
|
34
96
|
* @param {T} key 属性键
|
|
35
97
|
* @returns {TokenAttribute<T>}
|
|
98
|
+
* @throws `Error` 文本节点没有子节点
|
|
36
99
|
*/
|
|
37
100
|
getAttribute(key) {
|
|
38
101
|
return key === 'verifyChild'
|
|
@@ -42,35 +105,14 @@ class AstText extends AstNode {
|
|
|
42
105
|
: super.getAttribute(key);
|
|
43
106
|
}
|
|
44
107
|
|
|
45
|
-
/** 输出字符串 */
|
|
46
|
-
toString() {
|
|
47
|
-
return this.data;
|
|
48
|
-
}
|
|
49
|
-
|
|
50
108
|
/** @override */
|
|
51
109
|
text() {
|
|
52
110
|
return this.data;
|
|
53
111
|
}
|
|
54
112
|
|
|
55
|
-
/**
|
|
56
|
-
* 修改内容
|
|
57
|
-
* @param {string} text 新内容
|
|
58
|
-
*/
|
|
59
|
-
#setData(text) {
|
|
60
|
-
text = String(text);
|
|
61
|
-
const {data} = this,
|
|
62
|
-
e = new Event('text', {bubbles: true});
|
|
63
|
-
this.setAttribute('data', text);
|
|
64
|
-
if (data !== text) {
|
|
65
|
-
this.dispatchEvent(e, {oldText: data, newText: text});
|
|
66
|
-
}
|
|
67
|
-
return this;
|
|
68
|
-
}
|
|
69
|
-
|
|
70
113
|
/**
|
|
71
114
|
* 在后方添加字符串
|
|
72
115
|
* @param {string} text 添加的字符串
|
|
73
|
-
* @throws `Error` 禁止外部调用
|
|
74
116
|
*/
|
|
75
117
|
appendData(text) {
|
|
76
118
|
this.#setData(this.data + text);
|
|
@@ -80,8 +122,6 @@ class AstText extends AstNode {
|
|
|
80
122
|
* 删减字符串
|
|
81
123
|
* @param {number} offset 起始位置
|
|
82
124
|
* @param {number} count 删减字符数
|
|
83
|
-
* @throws `RangeError` 错误的删减位置
|
|
84
|
-
* @throws `Error` 禁止外部调用
|
|
85
125
|
*/
|
|
86
126
|
deleteData(offset, count) {
|
|
87
127
|
this.#setData(this.data.slice(0, offset) + this.data.slice(offset + count));
|
|
@@ -91,22 +131,11 @@ class AstText extends AstNode {
|
|
|
91
131
|
* 插入字符串
|
|
92
132
|
* @param {number} offset 插入位置
|
|
93
133
|
* @param {string} text 待插入的字符串
|
|
94
|
-
* @throws `RangeError` 错误的插入位置
|
|
95
|
-
* @throws `Error` 禁止外部调用
|
|
96
134
|
*/
|
|
97
135
|
insertData(offset, text) {
|
|
98
136
|
this.#setData(this.data.slice(0, offset) + text + this.data.slice(offset));
|
|
99
137
|
}
|
|
100
138
|
|
|
101
|
-
/**
|
|
102
|
-
* 替换字符串
|
|
103
|
-
* @param {string} text 替换的字符串
|
|
104
|
-
* @throws `Error` 禁止外部调用
|
|
105
|
-
*/
|
|
106
|
-
replaceData(text = '') {
|
|
107
|
-
this.#setData(text);
|
|
108
|
-
}
|
|
109
|
-
|
|
110
139
|
/**
|
|
111
140
|
* 提取子串
|
|
112
141
|
* @param {number} offset 起始位置
|
|
@@ -123,9 +152,9 @@ class AstText extends AstNode {
|
|
|
123
152
|
* @throws `Error` 没有父节点
|
|
124
153
|
*/
|
|
125
154
|
splitText(offset) {
|
|
126
|
-
if (
|
|
155
|
+
if (!Number.isInteger(offset)) {
|
|
127
156
|
this.typeError('splitText', 'Number');
|
|
128
|
-
} else if (offset > this.length || offset < -this.length
|
|
157
|
+
} else if (offset > this.length || offset < -this.length) {
|
|
129
158
|
throw new RangeError(`错误的断开位置!${offset}`);
|
|
130
159
|
}
|
|
131
160
|
const {parentNode, data} = this;
|
package/lib/title.js
CHANGED
|
@@ -4,13 +4,13 @@ const Parser = require('..');
|
|
|
4
4
|
|
|
5
5
|
/** MediaWiki页面标题对象 */
|
|
6
6
|
class Title {
|
|
7
|
+
valid = true;
|
|
8
|
+
ns = 0;
|
|
7
9
|
title = '';
|
|
8
10
|
main = '';
|
|
9
11
|
prefix = '';
|
|
10
|
-
ns = 0;
|
|
11
12
|
interwiki = '';
|
|
12
13
|
fragment = '';
|
|
13
|
-
valid = true;
|
|
14
14
|
|
|
15
15
|
/**
|
|
16
16
|
* @param {string} title 标题(含或不含命名空间前缀)
|
|
@@ -39,24 +39,25 @@ class Title {
|
|
|
39
39
|
}
|
|
40
40
|
this.ns = nsid[namespace.toLowerCase()];
|
|
41
41
|
const i = title.indexOf('#');
|
|
42
|
+
let fragment = '';
|
|
42
43
|
if (i !== -1) {
|
|
43
|
-
|
|
44
|
+
fragment = title.slice(i + 1).trimEnd();
|
|
44
45
|
if (fragment.includes('%')) {
|
|
45
46
|
try {
|
|
46
|
-
|
|
47
|
+
fragment = decodeURIComponent(fragment);
|
|
47
48
|
} catch {}
|
|
48
49
|
} else if (fragment.includes('.')) {
|
|
49
50
|
try {
|
|
50
|
-
|
|
51
|
+
fragment = decodeURIComponent(fragment.replaceAll('.', '%'));
|
|
51
52
|
} catch {}
|
|
52
53
|
}
|
|
53
|
-
this.fragment ||= fragment;
|
|
54
54
|
title = title.slice(0, i).trim();
|
|
55
55
|
}
|
|
56
|
+
this.valid = Boolean(title || fragment) && !/\0\d+[eh!+-]\x7F|[<>[\]{}|]/u.test(title);
|
|
56
57
|
this.main = title && `${title[0].toUpperCase()}${title.slice(1)}`;
|
|
57
58
|
this.prefix = `${namespace}${namespace && ':'}`;
|
|
58
|
-
this.title = `${iw ? `${this.interwiki}:` : ''}${this.prefix}${this.main}`;
|
|
59
|
-
this.
|
|
59
|
+
this.title = `${iw ? `${this.interwiki}:` : ''}${this.prefix}${this.main.replaceAll(' ', '_')}`;
|
|
60
|
+
this.fragment = fragment;
|
|
60
61
|
}
|
|
61
62
|
|
|
62
63
|
/** @override */
|
package/mixin/fixedToken.js
CHANGED
|
@@ -28,11 +28,11 @@ const fixedToken = Constructor => class extends Constructor {
|
|
|
28
28
|
* @throws `Error`
|
|
29
29
|
*/
|
|
30
30
|
insertAt(token, i = this.childNodes.length) {
|
|
31
|
-
if (
|
|
32
|
-
|
|
31
|
+
if (Parser.running) {
|
|
32
|
+
super.insertAt(token, i);
|
|
33
|
+
return token;
|
|
33
34
|
}
|
|
34
|
-
|
|
35
|
-
return token;
|
|
35
|
+
throw new Error(`${this.constructor.name} 不可插入元素!`);
|
|
36
36
|
}
|
|
37
37
|
};
|
|
38
38
|
|
package/mixin/hidden.js
CHANGED
package/mixin/sol.js
CHANGED
|
@@ -9,25 +9,34 @@ const Parser = require('..'),
|
|
|
9
9
|
* @param {T} Constructor 基类
|
|
10
10
|
* @returns {T}
|
|
11
11
|
*/
|
|
12
|
-
const sol = Constructor => class extends Constructor {
|
|
12
|
+
const sol = Constructor => class SolToken extends Constructor {
|
|
13
13
|
/**
|
|
14
|
-
*
|
|
14
|
+
* 是否可以视为root节点
|
|
15
15
|
* @this {Token}
|
|
16
|
+
* @param {boolean} includeHeading 是否包括HeadingToken
|
|
17
|
+
*/
|
|
18
|
+
#isRoot(includeHeading) {
|
|
19
|
+
const {parentNode, type} = this;
|
|
20
|
+
return parentNode?.type === 'root'
|
|
21
|
+
|| parentNode?.type === 'ext-inner' && (includeHeading || type !== 'heading' && parentNode.name === 'poem');
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* 在前方插入newline
|
|
26
|
+
* @this {SolToken & Token}
|
|
16
27
|
*/
|
|
17
28
|
prependNewLine() {
|
|
18
|
-
|
|
19
|
-
return (previousVisibleSibling || parentNode?.type !== 'root') && String(previousVisibleSibling).at(-1) !== '\n'
|
|
29
|
+
return (this.previousVisibleSibling || !this.#isRoot()) && String(this.previousVisibleSibling).at(-1) !== '\n'
|
|
20
30
|
? '\n'
|
|
21
31
|
: '';
|
|
22
32
|
}
|
|
23
33
|
|
|
24
34
|
/**
|
|
25
35
|
* 在后方插入newline
|
|
26
|
-
* @this {Token}
|
|
36
|
+
* @this {SolToken & Token}
|
|
27
37
|
*/
|
|
28
38
|
appendNewLine() {
|
|
29
|
-
|
|
30
|
-
return (nextVisibleSibling || parentNode?.type !== 'root') && String(nextVisibleSibling ?? '')[0] !== '\n'
|
|
39
|
+
return (this.nextVisibleSibling || !this.#isRoot(true)) && String(this.nextVisibleSibling ?? '')[0] !== '\n'
|
|
31
40
|
? '\n'
|
|
32
41
|
: '';
|
|
33
42
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "wikiparser-node",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.6.1",
|
|
4
4
|
"description": "A Node.js parser for MediaWiki markup with AST",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"mediawiki",
|
|
@@ -13,13 +13,24 @@
|
|
|
13
13
|
},
|
|
14
14
|
"license": "GPL-3.0",
|
|
15
15
|
"author": "Bhsd",
|
|
16
|
+
"files": [
|
|
17
|
+
"/index.js",
|
|
18
|
+
"/parser/",
|
|
19
|
+
"/util/",
|
|
20
|
+
"/lib/",
|
|
21
|
+
"/src/",
|
|
22
|
+
"/config/",
|
|
23
|
+
"/mixin/",
|
|
24
|
+
"/tool/"
|
|
25
|
+
],
|
|
16
26
|
"repository": {
|
|
17
27
|
"type": "git",
|
|
18
28
|
"url": "git+https://github.com/bhsd-harry/wikiparser-node.git"
|
|
19
29
|
},
|
|
20
30
|
"scripts": {
|
|
21
|
-
"test": "node test/test.js",
|
|
22
|
-
"real": "node test/real.js"
|
|
31
|
+
"test": "eslint . && node test/test.js",
|
|
32
|
+
"real": "node test/real.js",
|
|
33
|
+
"single": "node --prof test/single.js && node --prof-process isolate-0x*-v8.log > test/processed.txt && rm isolate-0x*-v8.log"
|
|
23
34
|
},
|
|
24
35
|
"devDependencies": {
|
|
25
36
|
"@types/node": "^17.0.23",
|
package/parser/brackets.js
CHANGED
|
@@ -57,12 +57,18 @@ const parseBrackets = (text, config = Parser.getConfig(), accum = []) => {
|
|
|
57
57
|
{length} = accum;
|
|
58
58
|
lastIndex = curIndex + close.length; // 这不是最终的lastIndex
|
|
59
59
|
parts.at(-1).push(text.slice(topPos, curIndex));
|
|
60
|
-
|
|
61
|
-
|
|
60
|
+
let skip = false,
|
|
61
|
+
ch = 't';
|
|
62
62
|
if (close.length === 3) {
|
|
63
63
|
const ArgToken = require('../src/arg');
|
|
64
64
|
new ArgToken(parts.map(part => part.join('=')), config, accum);
|
|
65
65
|
} else {
|
|
66
|
+
const name = removeComment(parts[0][0]);
|
|
67
|
+
if (name in marks) {
|
|
68
|
+
ch = marks[name]; // 标记{{!}}等
|
|
69
|
+
} else if (/^(?:fullurl|canonicalurl|filepath):./iu.test(name)) {
|
|
70
|
+
ch = 'm';
|
|
71
|
+
}
|
|
66
72
|
try {
|
|
67
73
|
const TranscludeToken = require('../src/transclude');
|
|
68
74
|
new TranscludeToken(parts[0][0], parts.slice(1), config, accum);
|
package/parser/commentAndExt.js
CHANGED
|
@@ -35,7 +35,7 @@ const parseCommentAndExt = (text, config = Parser.getConfig(), accum = [], inclu
|
|
|
35
35
|
+ `<(${noincludeRegex})(\\s[^>]*?)?(?:/>|>(.*?)(?:</(\\5\\s*)>|$))`, // <noinclude>
|
|
36
36
|
'gisu',
|
|
37
37
|
);
|
|
38
|
-
return text.
|
|
38
|
+
return text.replaceAll(
|
|
39
39
|
regex,
|
|
40
40
|
/** @type {function(...string): string} */
|
|
41
41
|
(substr, name, attr, inner, closing, include, includeAttr, includeInner, includeClosing) => {
|
package/parser/converter.js
CHANGED
|
@@ -22,7 +22,7 @@ const parseConverter = (wikitext, config = Parser.getConfig(), accum = []) => {
|
|
|
22
22
|
str = wikitext.slice(top.index + 2, index),
|
|
23
23
|
i = str.indexOf('|'),
|
|
24
24
|
[flags, text] = i === -1 ? [[], str] : [str.slice(0, i).split(';'), str.slice(i + 1)],
|
|
25
|
-
temp = text.
|
|
25
|
+
temp = text.replaceAll(/(&[#a-z\d]+);/giu, '$1\x01'),
|
|
26
26
|
variants = `(?:${config.variants.join('|')})`,
|
|
27
27
|
rules = temp.split(new RegExp(`;(?=\\s*(?:${variants}|[^;]*?=>\\s*${variants})\\s*:)`, 'u'))
|
|
28
28
|
.map(rule => rule.replaceAll('\x01', ';'));
|
package/parser/externalLinks.js
CHANGED
|
@@ -11,10 +11,10 @@ const {extUrlChar} = require('../util/string'),
|
|
|
11
11
|
const parseExternalLinks = (wikitext, config = Parser.getConfig(), accum = []) => {
|
|
12
12
|
const ExtLinkToken = require('../src/extLink');
|
|
13
13
|
const regex = new RegExp(
|
|
14
|
-
`\\[((?:${config.protocol}|//)${extUrlChar})(\\p{Zs}*)([^\\]\x01-\x08\x0A-\x1F\uFFFD]*)\\]`,
|
|
14
|
+
`\\[((?:${config.protocol}|//)${extUrlChar}|\0\\d+m\x7F)(\\p{Zs}*)([^\\]\x01-\x08\x0A-\x1F\uFFFD]*)\\]`,
|
|
15
15
|
'giu',
|
|
16
16
|
);
|
|
17
|
-
return wikitext.
|
|
17
|
+
return wikitext.replaceAll(regex, /** @type {function(...string): string} */ (_, url, space, text) => {
|
|
18
18
|
const {length} = accum,
|
|
19
19
|
mt = /&[lg]t;/u.exec(url);
|
|
20
20
|
if (mt) {
|
|
@@ -1,18 +1,19 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
const Parser = require('..')
|
|
4
|
-
AstText = require('../lib/text');
|
|
3
|
+
const Parser = require('..');
|
|
5
4
|
|
|
6
5
|
/**
|
|
7
6
|
* 解析\<hr\>和状态开关
|
|
8
|
-
* @param {{firstChild: AstText
|
|
7
|
+
* @param {Token & {firstChild: AstText}} root 根节点
|
|
9
8
|
* @param {accum} accum
|
|
10
9
|
*/
|
|
11
|
-
const parseHrAndDoubleUnderscore = ({firstChild: {data}, type}, config = Parser.getConfig(), accum = []) => {
|
|
12
|
-
const
|
|
10
|
+
const parseHrAndDoubleUnderscore = ({firstChild: {data}, type, name}, config = Parser.getConfig(), accum = []) => {
|
|
11
|
+
const AstText = require('../lib/text'),
|
|
12
|
+
Token = require('../src'),
|
|
13
|
+
HrToken = require('../src/nowiki/hr'),
|
|
13
14
|
DoubleUnderscoreToken = require('../src/nowiki/doubleUnderscore');
|
|
14
15
|
const {doubleUnderscore} = config;
|
|
15
|
-
if (type !== 'root') {
|
|
16
|
+
if (type !== 'root' && (type !== 'ext-inner' || name !== 'poem')) {
|
|
16
17
|
data = `\0${data}`;
|
|
17
18
|
}
|
|
18
19
|
data = data.replaceAll(/^((?:\0\d+c\x7F)*)(-{4,})/gmu, (_, lead, m) => {
|
|
@@ -28,7 +29,7 @@ const parseHrAndDoubleUnderscore = ({firstChild: {data}, type}, config = Parser.
|
|
|
28
29
|
return m;
|
|
29
30
|
},
|
|
30
31
|
);
|
|
31
|
-
return type === 'root' ? data : data.slice(1);
|
|
32
|
+
return type === 'root' || type === 'ext-inner' && name === 'poem' ? data : data.slice(1);
|
|
32
33
|
};
|
|
33
34
|
|
|
34
35
|
Parser.parsers.parseHrAndDoubleUnderscore = __filename;
|
package/parser/links.js
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
const Parser = require('..')
|
|
4
|
-
Token = require('../src');
|
|
3
|
+
const Parser = require('..');
|
|
5
4
|
|
|
6
5
|
/**
|
|
7
6
|
* 解析内部链接
|
|
@@ -10,17 +9,17 @@ const Parser = require('..'),
|
|
|
10
9
|
*/
|
|
11
10
|
const parseLinks = (wikitext, config = Parser.getConfig(), accum = []) => {
|
|
12
11
|
const parseQuotes = require('./quotes.js');
|
|
13
|
-
const regex = /^([^\n<>[\]{}|]+)(
|
|
14
|
-
regexImg = /^([^\n<>[\]{}|]+)
|
|
12
|
+
const regex = /^((?:(?!\0\d+!\x7F)[^\n<>[\]{}|])+)(?:(\||\0\d+!\x7F)(.*?[^\]]))?\]\](.*)$/su,
|
|
13
|
+
regexImg = /^((?:(?!\0\d+!\x7F)[^\n<>[\]{}|])+)(\||\0\d+!\x7F)(.*)$/su,
|
|
15
14
|
regexExt = new RegExp(`^\\s*(?:${config.protocol})`, 'iu'),
|
|
16
15
|
bits = wikitext.split('[[');
|
|
17
16
|
let s = bits.shift();
|
|
18
17
|
for (let i = 0; i < bits.length; i++) {
|
|
19
|
-
let mightBeImg, link, text, after;
|
|
18
|
+
let mightBeImg, link, delimiter, text, after;
|
|
20
19
|
const x = bits[i],
|
|
21
20
|
m = regex.exec(x);
|
|
22
21
|
if (m) {
|
|
23
|
-
[, link, text, after] = m;
|
|
22
|
+
[, link, delimiter, text, after] = m;
|
|
24
23
|
if (after[0] === ']' && text?.includes('[')) {
|
|
25
24
|
text += ']';
|
|
26
25
|
after = after.slice(1);
|
|
@@ -29,7 +28,7 @@ const parseLinks = (wikitext, config = Parser.getConfig(), accum = []) => {
|
|
|
29
28
|
const m2 = regexImg.exec(x);
|
|
30
29
|
if (m2) {
|
|
31
30
|
mightBeImg = true;
|
|
32
|
-
[, link, text] = m2;
|
|
31
|
+
[, link, delimiter, text] = m2;
|
|
33
32
|
}
|
|
34
33
|
}
|
|
35
34
|
if (link === undefined || regexExt.test(link) || /\0\d+[exhbru]\x7F/u.test(link)) {
|
|
@@ -75,7 +74,7 @@ const parseLinks = (wikitext, config = Parser.getConfig(), accum = []) => {
|
|
|
75
74
|
}
|
|
76
75
|
text = parseLinks(text, config, accum);
|
|
77
76
|
if (!found) {
|
|
78
|
-
s += `[[${link}
|
|
77
|
+
s += `[[${link}${delimiter}${text}`;
|
|
79
78
|
continue;
|
|
80
79
|
}
|
|
81
80
|
}
|
|
@@ -89,7 +88,7 @@ const parseLinks = (wikitext, config = Parser.getConfig(), accum = []) => {
|
|
|
89
88
|
LinkToken = require('../src/link/category');
|
|
90
89
|
}
|
|
91
90
|
}
|
|
92
|
-
new LinkToken(link, text, title, config, accum);
|
|
91
|
+
new LinkToken(link, text, title, config, accum, delimiter);
|
|
93
92
|
}
|
|
94
93
|
return s;
|
|
95
94
|
};
|
package/parser/magicLinks.js
CHANGED
|
@@ -11,7 +11,7 @@ const {extUrlChar} = require('../util/string'),
|
|
|
11
11
|
const parseMagicLinks = (wikitext, config = Parser.getConfig(), accum = []) => {
|
|
12
12
|
const MagicLinkToken = require('../src/magicLink');
|
|
13
13
|
const regex = new RegExp(`\\b(?:${config.protocol})(${extUrlChar})`, 'giu');
|
|
14
|
-
return wikitext.
|
|
14
|
+
return wikitext.replaceAll(regex, /** @param {string} p1 */ (m, p1) => {
|
|
15
15
|
let trail = '',
|
|
16
16
|
url = m;
|
|
17
17
|
const m2 = /&(?:lt|gt|nbsp|#x0*(?:3[ce]|a0)|#0*(?:6[02]|160));/iu.exec(url);
|
package/parser/selector.js
CHANGED
|
@@ -46,7 +46,7 @@ const /** @type {pseudo[]} */ simplePseudos = [
|
|
|
46
46
|
['&', '&'],
|
|
47
47
|
],
|
|
48
48
|
pseudoRegex = new RegExp(`:(${complexPseudos.join('|')})$`, 'u'),
|
|
49
|
-
regularRegex = /[[(,>+~]|\s+/u,
|
|
49
|
+
regularRegex = /[[(,>+~]|\s+/u,
|
|
50
50
|
attributeRegex = /^\s*(\w+)\s*(?:([~|^$*!]?=)\s*("[^"]*"|'[^']*'|[^\s[\]]+)(?:\s+(i))?\s*)?\]/u,
|
|
51
51
|
functionRegex = /^(\s*"[^"]*"\s*|\s*'[^']*'\s*|[^()]*)\)/u;
|
|
52
52
|
|
|
@@ -164,11 +164,11 @@ const parseSelector = selector => {
|
|
|
164
164
|
}
|
|
165
165
|
mt = regex.exec(sanitized);
|
|
166
166
|
}
|
|
167
|
-
if (regex
|
|
168
|
-
|
|
167
|
+
if (regex === regularRegex) {
|
|
168
|
+
pushSimple(step, sanitized);
|
|
169
|
+
return stack;
|
|
169
170
|
}
|
|
170
|
-
|
|
171
|
-
return stack;
|
|
171
|
+
throw new SyntaxError(`非法的选择器!\n${selector}\n检测到未闭合的'${regex === attributeRegex ? '[' : '('}'`);
|
|
172
172
|
};
|
|
173
173
|
|
|
174
174
|
Parser.parsers.parseSelector = __filename;
|
package/parser/table.js
CHANGED
|
@@ -1,22 +1,23 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
3
|
const Parser = require('..'),
|
|
4
|
-
AstText = require('../lib/text')
|
|
4
|
+
AstText = require('../lib/text'),
|
|
5
|
+
Token = require('../src'),
|
|
6
|
+
TableToken = require('../src/table'),
|
|
7
|
+
TrToken = require('../src/table/tr'),
|
|
8
|
+
TdToken = require('../src/table/td');
|
|
5
9
|
|
|
6
10
|
/**
|
|
7
11
|
* 解析表格,注意`tr`和`td`包含开头的换行
|
|
8
|
-
* @param {{firstChild: AstText
|
|
12
|
+
* @param {Token & {firstChild: AstText}} root 根节点
|
|
9
13
|
* @param {accum} accum
|
|
10
14
|
*/
|
|
11
|
-
const parseTable = ({firstChild: {data}, type}, config = Parser.getConfig(), accum = []) => {
|
|
12
|
-
const Token = require('../src'),
|
|
13
|
-
TableToken = require('../src/table'),
|
|
14
|
-
TrToken = require('../src/table/tr'),
|
|
15
|
-
TdToken = require('../src/table/td'),
|
|
16
|
-
DdToken = require('../src/nowiki/dd');
|
|
15
|
+
const parseTable = ({firstChild: {data}, type, name}, config = Parser.getConfig(), accum = []) => {
|
|
17
16
|
const /** @type {TrToken[]} */ stack = [],
|
|
18
17
|
lines = data.split('\n');
|
|
19
|
-
let out = type === 'root'
|
|
18
|
+
let out = type === 'root' || type === 'parameter-value' || type === 'ext-inner' && name === 'poem'
|
|
19
|
+
? ''
|
|
20
|
+
: `\n${lines.shift()}`;
|
|
20
21
|
|
|
21
22
|
/**
|
|
22
23
|
* 向表格中插入纯文本
|
|
@@ -28,13 +29,13 @@ const parseTable = ({firstChild: {data}, type}, config = Parser.getConfig(), acc
|
|
|
28
29
|
out += str;
|
|
29
30
|
return;
|
|
30
31
|
}
|
|
31
|
-
const {
|
|
32
|
-
if (
|
|
33
|
-
|
|
32
|
+
const /** @type {Token}} */ {lastChild} = top;
|
|
33
|
+
if (lastChild.constructor === Token) {
|
|
34
|
+
lastChild.setText(String(lastChild) + str);
|
|
34
35
|
} else {
|
|
35
36
|
const token = new Token(str, config, true, accum);
|
|
36
37
|
token.type = 'table-inter';
|
|
37
|
-
top.
|
|
38
|
+
top.insertAt(token.setAttribute('stage', 3));
|
|
38
39
|
}
|
|
39
40
|
};
|
|
40
41
|
for (const outLine of lines) {
|
|
@@ -48,6 +49,7 @@ const parseTable = ({firstChild: {data}, type}, config = Parser.getConfig(), acc
|
|
|
48
49
|
}
|
|
49
50
|
const [, indent, moreSpaces, tableSyntax, attr] = matchesStart;
|
|
50
51
|
if (indent) {
|
|
52
|
+
const DdToken = require('../src/nowiki/dd');
|
|
51
53
|
new DdToken(indent, config, accum);
|
|
52
54
|
}
|
|
53
55
|
push(`\n${spaces}${indent && `\0${accum.length - 1}d\x7F`}${moreSpaces}\0${accum.length}b\x7F`, top);
|
|
@@ -82,7 +84,7 @@ const parseTable = ({firstChild: {data}, type}, config = Parser.getConfig(), acc
|
|
|
82
84
|
}
|
|
83
85
|
const tr = new TrToken(`\n${spaces}${row}`, attr, config, accum);
|
|
84
86
|
stack.push(top, tr);
|
|
85
|
-
top.
|
|
87
|
+
top.insertAt(tr);
|
|
86
88
|
} else {
|
|
87
89
|
if (top.type === 'td') {
|
|
88
90
|
top = stack.pop();
|
|
@@ -95,14 +97,14 @@ const parseTable = ({firstChild: {data}, type}, config = Parser.getConfig(), acc
|
|
|
95
97
|
lastSyntax = `\n${spaces}${cell}`;
|
|
96
98
|
while (mt) {
|
|
97
99
|
const td = new TdToken(lastSyntax, attr.slice(lastIndex, mt.index), config, accum);
|
|
98
|
-
top.
|
|
100
|
+
top.insertAt(td);
|
|
99
101
|
({lastIndex} = regex);
|
|
100
102
|
[lastSyntax] = mt;
|
|
101
103
|
mt = regex.exec(attr);
|
|
102
104
|
}
|
|
103
105
|
const td = new TdToken(lastSyntax, attr.slice(lastIndex), config, accum);
|
|
104
106
|
stack.push(top, td);
|
|
105
|
-
top.
|
|
107
|
+
top.insertAt(td);
|
|
106
108
|
}
|
|
107
109
|
}
|
|
108
110
|
return out.slice(1);
|