wikiparser-node 0.8.1 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/moegirl.json +1 -0
- package/i18n/zh-hans.json +44 -0
- package/i18n/zh-hant.json +44 -0
- package/index.js +15 -3
- package/lib/element.js +27 -27
- package/lib/node.js +3 -5
- package/lib/text.js +11 -33
- package/lib/title.js +8 -6
- package/mixin/sol.js +3 -14
- package/package.json +7 -6
- package/parser/brackets.js +8 -2
- package/parser/commentAndExt.js +1 -4
- package/parser/links.js +1 -1
- package/parser/quotes.js +1 -1
- package/parser/selector.js +5 -5
- package/src/arg.js +7 -3
- package/src/attribute.js +14 -8
- package/src/attributes.js +6 -7
- package/src/converterFlags.js +3 -3
- package/src/converterRule.js +4 -6
- package/src/extLink.js +4 -3
- package/src/gallery.js +6 -9
- package/src/heading.js +8 -8
- package/src/html.js +15 -9
- package/src/imageParameter.js +78 -55
- package/src/imagemap.js +5 -5
- package/src/imagemapLink.js +1 -1
- package/src/index.js +9 -6
- package/src/link/category.js +4 -9
- package/src/link/file.js +16 -11
- package/src/link/galleryImage.js +7 -7
- package/src/link/index.js +19 -18
- package/src/magicLink.js +2 -2
- package/src/nested/index.js +3 -6
- package/src/nowiki/comment.js +2 -2
- package/src/nowiki/index.js +2 -2
- package/src/nowiki/quote.js +3 -3
- package/src/paramTag/index.js +3 -3
- package/src/parameter.js +2 -2
- package/src/table/index.js +5 -6
- package/src/table/td.js +5 -6
- package/src/table/tr.js +3 -3
- package/src/tagPair/ext.js +14 -8
- package/src/transclude.js +43 -26
- package/util/lint.js +8 -7
- package/util/string.js +20 -1
package/config/moegirl.json
CHANGED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
{
|
|
2
|
+
"<imagemap> without an image": "缺少图片的<imagemap>",
|
|
3
|
+
"$1 in URL": "URL中的$1",
|
|
4
|
+
"additional \"|\" in a table cell": "表格单元格中多余的\"|\"",
|
|
5
|
+
"additional \"|\" in the link text": "链接文本中多余的\"|\"",
|
|
6
|
+
"attributes of a closing tag": "位于闭合标签的属性",
|
|
7
|
+
"conflicting image $1 parameter": "冲突的图片$1参数",
|
|
8
|
+
"containing invalid attribute": "包含无效属性",
|
|
9
|
+
"content to be moved out from the table": "将被移出表格的内容",
|
|
10
|
+
"duplicated $1 attribute": "重复的$1属性",
|
|
11
|
+
"duplicated image $1 parameter": "重复的图片$1参数",
|
|
12
|
+
"duplicated parameter": "重复参数",
|
|
13
|
+
"extension tag in HTML tag attributes": "HTML标签属性中的扩展标签",
|
|
14
|
+
"frame": "框架",
|
|
15
|
+
"full-width punctuation": "全角标点",
|
|
16
|
+
"horizontal-alignment": "水平对齐",
|
|
17
|
+
"HTML tag in table attributes": "表格属性中的HTML标签",
|
|
18
|
+
"illegal attribute name": "非法的属性名",
|
|
19
|
+
"illegal module name": "非法的模块名称",
|
|
20
|
+
"insecure style": "不安全的样式",
|
|
21
|
+
"invalid content in <$1>": "<$1>内的无效内容",
|
|
22
|
+
"invalid conversion flag": "无效的转换标记",
|
|
23
|
+
"invalid gallery image": "无效的图库图片",
|
|
24
|
+
"invalid gallery image parameter": "无效的图库图片参数",
|
|
25
|
+
"invalid link in <imagemap>": "无效的<imagemap>链接",
|
|
26
|
+
"invalid parameter of $1": "$1的无效参数",
|
|
27
|
+
"invalid self-closing tag": "无效自封闭标签",
|
|
28
|
+
"invisible content inside triple brackets": "三重括号内的不可见部分",
|
|
29
|
+
"lonely \"$1\"": "孤立的\"$1\"",
|
|
30
|
+
"nothing should be in <$1>": "<$1>标签内不应有任何内容",
|
|
31
|
+
"section header in a HTML tag": "HTML标签属性中的段落标题",
|
|
32
|
+
"tag that is both closing and self-closing": "同时闭合和自封闭的标签",
|
|
33
|
+
"unbalanced \"=\" in a section header": "段落标题中不平衡的\"=\"",
|
|
34
|
+
"unclosed HTML comment": "未闭合的HTML注释",
|
|
35
|
+
"unclosed quotes": "未闭合的引号",
|
|
36
|
+
"unclosed table": "未闭合的表格",
|
|
37
|
+
"unclosed tag": "未闭合的标签",
|
|
38
|
+
"unescaped query string in an anonymous parameter": "匿名参数中未转义的查询参数",
|
|
39
|
+
"unexpected template argument": "未预期的模板参数",
|
|
40
|
+
"unmatched closing tag": "未匹配的闭合标签",
|
|
41
|
+
"unnecessary URL encoding in an internal link": "内链中不必要的URL编码",
|
|
42
|
+
"useless fragment": "多余的fragment",
|
|
43
|
+
"vertical-alignment": "垂直对齐"
|
|
44
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
{
|
|
2
|
+
"<imagemap> without an image": "缺少圖片的<imagemap>",
|
|
3
|
+
"$1 in URL": "URL中的$1",
|
|
4
|
+
"additional \"|\" in a table cell": "表哥單元格中多餘的\"|\"",
|
|
5
|
+
"additional \"|\" in the link text": "連結文本中多餘的\"|\"",
|
|
6
|
+
"attributes of a closing tag": "位於閉合標籤的屬性",
|
|
7
|
+
"conflicting image $1 parameter": "衝突的圖片$1參數",
|
|
8
|
+
"containing invalid attribute": "包含無效屬性",
|
|
9
|
+
"content to be moved out from the table": "將被移出表格的內容",
|
|
10
|
+
"duplicated $1 attribute": "重複的$1屬性",
|
|
11
|
+
"duplicated image $1 parameter": "重複的圖片$1參數",
|
|
12
|
+
"duplicated parameter": "重複參數",
|
|
13
|
+
"extension tag in HTML tag attributes": "HTML標籤屬性中的擴展標籤",
|
|
14
|
+
"frame": "框架",
|
|
15
|
+
"full-width punctuation": "全形標點",
|
|
16
|
+
"horizontal-alignment": "水瓶對齊",
|
|
17
|
+
"HTML tag in table attributes": "表格屬性中的HTML標籤",
|
|
18
|
+
"illegal attribute name": "非法的屬性名",
|
|
19
|
+
"illegal module name": "非法的模組名稱",
|
|
20
|
+
"insecure style": "不安全的樣式",
|
|
21
|
+
"invalid content in <$1>": "<$1>內的無效內容",
|
|
22
|
+
"invalid conversion flag": "無效的轉換標記",
|
|
23
|
+
"invalid gallery image": "無效的圖庫圖片",
|
|
24
|
+
"invalid gallery image parameter": "無效的圖庫圖片參數",
|
|
25
|
+
"invalid link in <imagemap>": "無效的<imagemap>連結",
|
|
26
|
+
"invalid parameter of $1": "$1的無效參數",
|
|
27
|
+
"invalid self-closing tag": "無效自封閉標籤",
|
|
28
|
+
"invisible content inside triple brackets": "三重括號內的不可見部分",
|
|
29
|
+
"lonely \"$1\"": "孤立的\"$1\"",
|
|
30
|
+
"nothing should be in <$1>": "<$1>標籤內不應有任何內容",
|
|
31
|
+
"section header in a HTML tag": "HTML標籤屬性中的段落標題",
|
|
32
|
+
"tag that is both closing and self-closing": "同時閉合和自封閉的標籤",
|
|
33
|
+
"unbalanced \"=\" in a section header": "段落標題中不平衡的\"=\"",
|
|
34
|
+
"unclosed HTML comment": "未閉合的HTML註釋",
|
|
35
|
+
"unclosed quotes": "未閉合的引號",
|
|
36
|
+
"unclosed table": "未閉合的表格",
|
|
37
|
+
"unclosed tag": "未閉合的標籤",
|
|
38
|
+
"unescaped query string in an anonymous parameter": "匿名參數中未轉義的查詢參數",
|
|
39
|
+
"unexpected template argument": "未預期的模板參數",
|
|
40
|
+
"unmatched closing tag": "未匹配的閉合標籤",
|
|
41
|
+
"unnecessary URL encoding in an internal link": "內部連結中不必要的URL編碼",
|
|
42
|
+
"useless fragment": "多餘的fragment",
|
|
43
|
+
"vertical-alignment": "垂直對齊"
|
|
44
|
+
}
|
package/index.js
CHANGED
|
@@ -5,6 +5,7 @@ const fs = require('fs'),
|
|
|
5
5
|
|
|
6
6
|
const /** @type {Parser} */ Parser = {
|
|
7
7
|
config: './config/default',
|
|
8
|
+
i18n: undefined,
|
|
8
9
|
|
|
9
10
|
MAX_STAGE: 11,
|
|
10
11
|
|
|
@@ -122,7 +123,18 @@ const /** @type {Parser} */ Parser = {
|
|
|
122
123
|
promises: [Promise.resolve()],
|
|
123
124
|
|
|
124
125
|
getConfig() {
|
|
125
|
-
|
|
126
|
+
if (typeof this.config === 'string') {
|
|
127
|
+
this.config = require(this.config);
|
|
128
|
+
}
|
|
129
|
+
return {...this.config, excludes: []};
|
|
130
|
+
},
|
|
131
|
+
|
|
132
|
+
msg(msg, arg) {
|
|
133
|
+
if (typeof this.i18n === 'string') {
|
|
134
|
+
this.i18n = require(this.i18n);
|
|
135
|
+
}
|
|
136
|
+
msg = this.i18n?.[msg] ?? msg;
|
|
137
|
+
return msg.replace('$1', arg);
|
|
126
138
|
},
|
|
127
139
|
|
|
128
140
|
normalizeTitle(
|
|
@@ -154,7 +166,7 @@ const /** @type {Parser} */ Parser = {
|
|
|
154
166
|
*/
|
|
155
167
|
const build = keys => {
|
|
156
168
|
for (const key of keys) {
|
|
157
|
-
if (titleObj[key]
|
|
169
|
+
if (titleObj[key]?.includes('\0')) {
|
|
158
170
|
titleObj[key] = token.getAttribute('buildFromStr')(titleObj[key], 'text');
|
|
159
171
|
}
|
|
160
172
|
}
|
|
@@ -276,7 +288,7 @@ const /** @type {Parser} */ Parser = {
|
|
|
276
288
|
|
|
277
289
|
reparse(date) {
|
|
278
290
|
const main = fs.readdirSync(path.join(__dirname, 'errors'))
|
|
279
|
-
.find(name => name.startsWith(date) && name.
|
|
291
|
+
.find(name => name.startsWith(date) && name.endsWith('Z'));
|
|
280
292
|
if (!main) {
|
|
281
293
|
throw new RangeError(`找不到对应时间戳的错误记录:${date}`);
|
|
282
294
|
}
|
package/lib/element.js
CHANGED
|
@@ -9,6 +9,24 @@ const fs = require('fs'),
|
|
|
9
9
|
AstNode = require('./node'),
|
|
10
10
|
AstText = require('./text');
|
|
11
11
|
|
|
12
|
+
const lintIgnoredExt = new Set([
|
|
13
|
+
'nowiki',
|
|
14
|
+
'pre',
|
|
15
|
+
'charinsert',
|
|
16
|
+
'score',
|
|
17
|
+
'syntaxhighlight',
|
|
18
|
+
'source',
|
|
19
|
+
'math',
|
|
20
|
+
'chem',
|
|
21
|
+
'ce',
|
|
22
|
+
'graph',
|
|
23
|
+
'mapframe',
|
|
24
|
+
'maplink',
|
|
25
|
+
'quiz',
|
|
26
|
+
'templatedata',
|
|
27
|
+
'timeline',
|
|
28
|
+
]);
|
|
29
|
+
|
|
12
30
|
/**
|
|
13
31
|
* 检测:lang()伪选择器
|
|
14
32
|
* @param {AstElement & {attributes: Records<string, string|true>}} node 节点
|
|
@@ -204,11 +222,12 @@ class AstElement extends AstNode {
|
|
|
204
222
|
|
|
205
223
|
/**
|
|
206
224
|
* 检查是否符合解析后的选择器,不含节点关系
|
|
207
|
-
* @this {AstElement & {link: string, constructor: {fixed: boolean}}}
|
|
225
|
+
* @this {AstElement & {link: string|Title, constructor: {fixed: boolean}}}
|
|
208
226
|
* @param {SelectorArray} step 解析后的选择器
|
|
209
227
|
* @throws `SyntaxError` 未定义的伪选择器
|
|
210
228
|
*/
|
|
211
229
|
#matches(step) {
|
|
230
|
+
const Title = require('./title');
|
|
212
231
|
const {parentNode, type, name, childNodes, link, constructor: {fixed, name: tokenName}} = this,
|
|
213
232
|
children = parentNode?.children,
|
|
214
233
|
childrenOfType = children?.filter(({type: t}) => t === type),
|
|
@@ -254,7 +273,7 @@ class AstElement extends AstNode {
|
|
|
254
273
|
|| (type === 'file' || type === 'gallery-image' && link);
|
|
255
274
|
case ':local-link':
|
|
256
275
|
return (type === 'link' || type === 'file' || type === 'gallery-image')
|
|
257
|
-
&& link
|
|
276
|
+
&& link instanceof Title && link.title === '';
|
|
258
277
|
case ':read-only':
|
|
259
278
|
return fixed;
|
|
260
279
|
case ':read-write':
|
|
@@ -492,32 +511,14 @@ class AstElement extends AstNode {
|
|
|
492
511
|
: this.childNodes.map(child => child.toString(selector)).join(separator);
|
|
493
512
|
}
|
|
494
513
|
|
|
495
|
-
static lintIgnoredExt = new Set([
|
|
496
|
-
'nowiki',
|
|
497
|
-
'pre',
|
|
498
|
-
'charinsert',
|
|
499
|
-
'score',
|
|
500
|
-
'syntaxhighlight',
|
|
501
|
-
'source',
|
|
502
|
-
'math',
|
|
503
|
-
'chem',
|
|
504
|
-
'ce',
|
|
505
|
-
'graph',
|
|
506
|
-
'mapframe',
|
|
507
|
-
'maplink',
|
|
508
|
-
'quiz',
|
|
509
|
-
'templatedata',
|
|
510
|
-
'timeline',
|
|
511
|
-
]);
|
|
512
|
-
|
|
513
514
|
/**
|
|
514
515
|
* Linter
|
|
515
516
|
* @param {number} start 起始位置
|
|
516
517
|
*/
|
|
517
|
-
lint(start =
|
|
518
|
+
lint(start = this.getAbsoluteIndex()) {
|
|
518
519
|
const SyntaxToken = require('../src/syntax');
|
|
519
520
|
if (this instanceof SyntaxToken || this.constructor.hidden
|
|
520
|
-
|| this.type === 'ext-inner' &&
|
|
521
|
+
|| this.type === 'ext-inner' && lintIgnoredExt.has(this.name)
|
|
521
522
|
) {
|
|
522
523
|
return [];
|
|
523
524
|
}
|
|
@@ -547,11 +548,10 @@ class AstElement extends AstNode {
|
|
|
547
548
|
* @returns {Record<string, *>}
|
|
548
549
|
*/
|
|
549
550
|
json(file) {
|
|
550
|
-
const
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
};
|
|
551
|
+
const json = {
|
|
552
|
+
...this,
|
|
553
|
+
childNodes: this.childNodes.map(child => child.type === 'text' ? String(child) : child.json()),
|
|
554
|
+
};
|
|
555
555
|
if (typeof file === 'string') {
|
|
556
556
|
fs.writeFileSync(
|
|
557
557
|
path.join(__dirname.slice(0, -4), 'printed', `${file}${file.endsWith('.json') ? '' : '.json'}`),
|
package/lib/node.js
CHANGED
|
@@ -102,15 +102,14 @@ class AstNode {
|
|
|
102
102
|
* @complexity `n`
|
|
103
103
|
*/
|
|
104
104
|
get eof() {
|
|
105
|
-
|
|
106
|
-
if (type === 'root') {
|
|
105
|
+
if (this.type === 'root') {
|
|
107
106
|
return true;
|
|
108
107
|
}
|
|
109
108
|
let {nextSibling} = this;
|
|
110
109
|
while (nextSibling?.type === 'text' && String(nextSibling).trim() === '') {
|
|
111
110
|
({nextSibling} = nextSibling);
|
|
112
111
|
}
|
|
113
|
-
return nextSibling === undefined && parentNode?.eof;
|
|
112
|
+
return nextSibling === undefined && this.parentNode?.eof;
|
|
114
113
|
}
|
|
115
114
|
|
|
116
115
|
constructor() {
|
|
@@ -331,8 +330,7 @@ class AstNode {
|
|
|
331
330
|
* @throws `RangeError` 找不到子节点
|
|
332
331
|
*/
|
|
333
332
|
#getChildIndex(node) {
|
|
334
|
-
const
|
|
335
|
-
i = childNodes.indexOf(node);
|
|
333
|
+
const i = this.childNodes.indexOf(node);
|
|
336
334
|
if (i === -1) {
|
|
337
335
|
Parser.error('找不到子节点!', node);
|
|
338
336
|
throw new RangeError('找不到子节点!');
|
package/lib/text.js
CHANGED
|
@@ -4,7 +4,7 @@ const Parser = require('..'),
|
|
|
4
4
|
AstNode = require('./node'),
|
|
5
5
|
AstElement = require('./element');
|
|
6
6
|
|
|
7
|
-
const errorSyntax = /https
|
|
7
|
+
const errorSyntax = /https?:\/\/|\{+|\}+|\[{2,}|\[(?![^[]*\])|(?<=^|\])([^[]*?)\]+|<\s*\/?([a-z]\w*)(?=[\s/>])/giu,
|
|
8
8
|
errorSyntaxUrl = /\{+|\}+|\[{2,}|\[(?![^[]*\])|(?<=^|\])([^[]*?)\]+|<\s*\/?([a-z]\w*)(?=[\s/>])/giu,
|
|
9
9
|
disallowedTags = [
|
|
10
10
|
'html',
|
|
@@ -61,7 +61,6 @@ const errorSyntax = /https?:|\{+|\}+|\[{2,}|\[(?![^[]*\])|(?<=^|\])([^[]*?)\]+|<
|
|
|
61
61
|
|
|
62
62
|
/** 文本节点 */
|
|
63
63
|
class AstText extends AstNode {
|
|
64
|
-
#config;
|
|
65
64
|
type = 'text';
|
|
66
65
|
/** @type {string} */ data;
|
|
67
66
|
|
|
@@ -71,9 +70,8 @@ class AstText extends AstNode {
|
|
|
71
70
|
}
|
|
72
71
|
|
|
73
72
|
/** @param {string} text 包含文本 */
|
|
74
|
-
constructor(text = ''
|
|
73
|
+
constructor(text = '') {
|
|
75
74
|
super();
|
|
76
|
-
this.#config = config;
|
|
77
75
|
Object.defineProperties(this, {
|
|
78
76
|
data: {value: text, writable: false},
|
|
79
77
|
childNodes: {enumerable: false, configurable: false},
|
|
@@ -97,20 +95,20 @@ class AstText extends AstNode {
|
|
|
97
95
|
* @param {number} start 起始位置
|
|
98
96
|
* @returns {LintError[]}
|
|
99
97
|
*/
|
|
100
|
-
lint(start =
|
|
98
|
+
lint(start = this.getAbsoluteIndex()) {
|
|
101
99
|
const {data, parentNode} = this,
|
|
102
100
|
type = parentNode?.type,
|
|
103
101
|
name = parentNode?.name,
|
|
104
|
-
urlAttr = ['itemtype', 'src', 'srcset'].includes(parentNode?.parentNode?.name),
|
|
105
102
|
errorRegex
|
|
106
103
|
= type === 'free-ext-link' || type === 'ext-link-url' || type === 'image-parameter' && name === 'link'
|
|
107
104
|
? errorSyntaxUrl
|
|
108
105
|
: errorSyntax,
|
|
109
|
-
errors = [...data.matchAll(errorRegex)]
|
|
106
|
+
errors = [...data.matchAll(errorRegex)],
|
|
107
|
+
{ext, html} = this.getRootNode().getAttribute('config');
|
|
110
108
|
if (errors.length > 0) {
|
|
111
109
|
const root = this.getRootNode(),
|
|
112
110
|
{top, left} = root.posFromIndex(start),
|
|
113
|
-
tags = new Set([
|
|
111
|
+
tags = new Set([ext, html, disallowedTags].flat(2));
|
|
114
112
|
return errors.map(/** @returns {LintError} */ ({0: error, 1: prefix, 2: tag, index}) => {
|
|
115
113
|
if (prefix) {
|
|
116
114
|
index += prefix.length;
|
|
@@ -124,29 +122,9 @@ class AstText extends AstNode {
|
|
|
124
122
|
{0: char, length} = error,
|
|
125
123
|
endIndex = startIndex + length,
|
|
126
124
|
end = char === '}' || char === ']' ? endIndex : startIndex + (char === 'h' ? 49 : 50);
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
case 'ext-link-text':
|
|
131
|
-
severity = 'warning';
|
|
132
|
-
break;
|
|
133
|
-
case 'attr-value':
|
|
134
|
-
if (urlAttr) {
|
|
135
|
-
return false;
|
|
136
|
-
}
|
|
137
|
-
break;
|
|
138
|
-
case 'ext-inner':
|
|
139
|
-
if (name === 'sm2' || name === 'flashmp3') {
|
|
140
|
-
return false;
|
|
141
|
-
}
|
|
142
|
-
// no default
|
|
143
|
-
}
|
|
144
|
-
} else if (char === '<' && !tags.has(tag.toLowerCase())) {
|
|
145
|
-
return false;
|
|
146
|
-
}
|
|
147
|
-
return {
|
|
148
|
-
message: `孤立的"${char === 'h' ? error : char}"`,
|
|
149
|
-
severity,
|
|
125
|
+
return (char !== 'h' || index > 0) && (char !== '<' || tags.has(tag.toLowerCase())) && {
|
|
126
|
+
message: Parser.msg('lonely "$1"', char === 'h' ? error : char),
|
|
127
|
+
severity: length > 1 ? 'error' : 'warning',
|
|
150
128
|
startIndex,
|
|
151
129
|
endIndex,
|
|
152
130
|
startLine,
|
|
@@ -184,7 +162,7 @@ class AstText extends AstNode {
|
|
|
184
162
|
|
|
185
163
|
/** 复制 */
|
|
186
164
|
cloneNode() {
|
|
187
|
-
return new AstText(this.data
|
|
165
|
+
return new AstText(this.data);
|
|
188
166
|
}
|
|
189
167
|
|
|
190
168
|
/**
|
|
@@ -253,7 +231,7 @@ class AstText extends AstNode {
|
|
|
253
231
|
if (!parentNode) {
|
|
254
232
|
throw new Error('待分裂的文本节点没有父节点!');
|
|
255
233
|
}
|
|
256
|
-
const newText = new AstText(data.slice(offset)
|
|
234
|
+
const newText = new AstText(data.slice(offset)),
|
|
257
235
|
childNodes = [...parentNode.childNodes];
|
|
258
236
|
this.setAttribute('data', data.slice(0, offset));
|
|
259
237
|
childNodes.splice(childNodes.indexOf(this) + 1, 0, newText);
|
package/lib/title.js
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
const
|
|
3
|
+
const {decodeHtml} = require('../util/string'),
|
|
4
|
+
Parser = require('..');
|
|
4
5
|
|
|
5
6
|
/** MediaWiki页面标题对象 */
|
|
6
7
|
class Title {
|
|
7
8
|
valid = true;
|
|
8
9
|
ns = 0;
|
|
9
|
-
fragment
|
|
10
|
+
fragment;
|
|
10
11
|
encoded = false;
|
|
11
12
|
title = '';
|
|
12
13
|
main = '';
|
|
@@ -22,9 +23,10 @@ class Title {
|
|
|
22
23
|
constructor(title, defaultNs = 0, config = Parser.getConfig(), decode = false, selfLink = false) {
|
|
23
24
|
const {namespaces, nsid} = config;
|
|
24
25
|
let namespace = namespaces[defaultNs];
|
|
26
|
+
title = decodeHtml(title);
|
|
25
27
|
if (decode && title.includes('%')) {
|
|
26
28
|
try {
|
|
27
|
-
const encoded = /%(?!5[bd]|7[b-d])[\da-f]{2}/iu.test(title);
|
|
29
|
+
const encoded = /%(?!21|3[ce]|5[bd]|7[b-d])[\da-f]{2}/iu.test(title);
|
|
28
30
|
title = decodeURIComponent(title);
|
|
29
31
|
this.encoded = encoded;
|
|
30
32
|
} catch {}
|
|
@@ -49,7 +51,7 @@ class Title {
|
|
|
49
51
|
}
|
|
50
52
|
this.ns = nsid[namespace.toLowerCase()];
|
|
51
53
|
const i = title.indexOf('#');
|
|
52
|
-
let fragment
|
|
54
|
+
let fragment;
|
|
53
55
|
if (i !== -1) {
|
|
54
56
|
fragment = title.slice(i + 1).trimEnd();
|
|
55
57
|
if (fragment.includes('%')) {
|
|
@@ -63,7 +65,7 @@ class Title {
|
|
|
63
65
|
}
|
|
64
66
|
title = title.slice(0, i).trim();
|
|
65
67
|
}
|
|
66
|
-
this.valid = Boolean(title || selfLink && fragment || this.interwiki)
|
|
68
|
+
this.valid = Boolean(title || selfLink && fragment !== undefined || this.interwiki)
|
|
67
69
|
&& !/\0\d+[eh!+-]\x7F|[<>[\]{}|]|%[\da-f]{2}/iu.test(title);
|
|
68
70
|
this.fragment = fragment;
|
|
69
71
|
this.main = title && `${title[0].toUpperCase()}${title.slice(1)}`;
|
|
@@ -73,7 +75,7 @@ class Title {
|
|
|
73
75
|
|
|
74
76
|
/** @override */
|
|
75
77
|
toString() {
|
|
76
|
-
return `${this.title}${this.fragment
|
|
78
|
+
return `${this.title}${this.fragment === undefined ? '' : `#${this.fragment}`}`;
|
|
77
79
|
}
|
|
78
80
|
}
|
|
79
81
|
|
package/mixin/sol.js
CHANGED
|
@@ -13,12 +13,11 @@ const sol = Constructor => class SolToken extends Constructor {
|
|
|
13
13
|
/**
|
|
14
14
|
* 是否可以视为root节点
|
|
15
15
|
* @this {Token}
|
|
16
|
-
* @param {boolean} includeHeading 是否包括HeadingToken
|
|
17
16
|
*/
|
|
18
|
-
#isRoot(
|
|
17
|
+
#isRoot() {
|
|
19
18
|
const {parentNode, type} = this;
|
|
20
19
|
return parentNode?.type === 'root'
|
|
21
|
-
||
|
|
20
|
+
|| type !== 'heading' && parentNode?.type === 'ext-inner' && parentNode.name === 'poem';
|
|
22
21
|
}
|
|
23
22
|
|
|
24
23
|
/**
|
|
@@ -26,17 +25,7 @@ const sol = Constructor => class SolToken extends Constructor {
|
|
|
26
25
|
* @this {SolToken & Token}
|
|
27
26
|
*/
|
|
28
27
|
prependNewLine() {
|
|
29
|
-
return (this.previousVisibleSibling || !this.#isRoot()) && String(this.previousVisibleSibling).
|
|
30
|
-
? '\n'
|
|
31
|
-
: '';
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
/**
|
|
35
|
-
* 在后方插入newline
|
|
36
|
-
* @this {SolToken & Token}
|
|
37
|
-
*/
|
|
38
|
-
appendNewLine() {
|
|
39
|
-
return (this.nextVisibleSibling || !this.#isRoot(true)) && String(this.nextVisibleSibling ?? '')[0] !== '\n'
|
|
28
|
+
return (this.previousVisibleSibling || !this.#isRoot()) && !String(this.previousVisibleSibling).endsWith('\n')
|
|
40
29
|
? '\n'
|
|
41
30
|
: '';
|
|
42
31
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "wikiparser-node",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.9.0",
|
|
4
4
|
"description": "A Node.js parser for MediaWiki markup with AST",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"mediawiki",
|
|
@@ -15,13 +15,14 @@
|
|
|
15
15
|
"author": "Bhsd",
|
|
16
16
|
"files": [
|
|
17
17
|
"/index.js",
|
|
18
|
-
"/parser/",
|
|
19
|
-
"/util/",
|
|
20
|
-
"/lib/",
|
|
21
|
-
"/src/",
|
|
22
18
|
"/config/",
|
|
19
|
+
"/i18n/",
|
|
20
|
+
"/lib/",
|
|
23
21
|
"/mixin/",
|
|
24
|
-
"/
|
|
22
|
+
"/parser/",
|
|
23
|
+
"/src/",
|
|
24
|
+
"/tool/",
|
|
25
|
+
"/util/"
|
|
25
26
|
],
|
|
26
27
|
"repository": {
|
|
27
28
|
"type": "git",
|
package/parser/brackets.js
CHANGED
|
@@ -14,6 +14,7 @@ const {removeComment} = require('../util/string'),
|
|
|
14
14
|
*/
|
|
15
15
|
const parseBrackets = (text, config = Parser.getConfig(), accum = []) => {
|
|
16
16
|
const source = `${config.excludes.includes('heading') ? '' : '^(\0\\d+c\x7F)*={1,6}|'}\\[\\[|\\{{2,}|-\\{(?!\\{)`,
|
|
17
|
+
{parserFunction: [,,, subst]} = config,
|
|
17
18
|
/** @type {BracketExecArray[]} */ stack = [],
|
|
18
19
|
closes = {'=': '\n', '{': '\\}{2,}|\\|', '-': '\\}-', '[': '\\]\\]'},
|
|
19
20
|
/** @type {Record<string, string>} */ marks = {'!': '!', '!!': '+', '(!': '{', '!)': '}', '!-': '-', '=': '~'};
|
|
@@ -62,12 +63,17 @@ const parseBrackets = (text, config = Parser.getConfig(), accum = []) => {
|
|
|
62
63
|
let skip = false,
|
|
63
64
|
ch = 't';
|
|
64
65
|
if (close.length === 3) {
|
|
65
|
-
|
|
66
|
+
const argParts = parts.map(part => part.join('=')),
|
|
67
|
+
str = argParts.length > 1 && removeComment(argParts[1]).trim();
|
|
68
|
+
new ArgToken(argParts, config, accum);
|
|
69
|
+
if (str && str.endsWith(':') && subst.includes(str.slice(0, -1).toLowerCase())) {
|
|
70
|
+
ch = 's';
|
|
71
|
+
}
|
|
66
72
|
} else {
|
|
67
73
|
const name = removeComment(parts[0][0]).trim();
|
|
68
74
|
if (name in marks) {
|
|
69
75
|
ch = marks[name]; // 标记{{!}}等
|
|
70
|
-
} else if (/^(?:
|
|
76
|
+
} else if (/^(?:filepath|(?:full|canonical)urle?):.|^server$/iu.test(name)) {
|
|
71
77
|
ch = 'm';
|
|
72
78
|
} else if (/^#vardefine:./iu.test(name)) {
|
|
73
79
|
ch = 'c';
|
package/parser/commentAndExt.js
CHANGED
|
@@ -20,10 +20,7 @@ const parseCommentAndExt = (text, config = Parser.getConfig(), accum = [], inclu
|
|
|
20
20
|
const str = `\0${accum.length}e\x7F`;
|
|
21
21
|
new OnlyincludeToken(inner, config, accum);
|
|
22
22
|
return str;
|
|
23
|
-
}).replace(/(?<=^|\0\d+e\x7F)
|
|
24
|
-
if (substr === '') {
|
|
25
|
-
return '';
|
|
26
|
-
}
|
|
23
|
+
}).replace(/(?<=^|\0\d+e\x7F)[^\0]+(?=$|\0\d+e\x7F)/gu, substr => {
|
|
27
24
|
new NoincludeToken(substr, config, accum);
|
|
28
25
|
return `\0${accum.length - 1}c\x7F`;
|
|
29
26
|
});
|
package/parser/links.js
CHANGED
|
@@ -85,7 +85,7 @@ const parseLinks = (wikitext, config = Parser.getConfig(), accum = []) => {
|
|
|
85
85
|
SomeLinkToken = CategoryToken;
|
|
86
86
|
}
|
|
87
87
|
}
|
|
88
|
-
new SomeLinkToken(link, text,
|
|
88
|
+
new SomeLinkToken(link, text, config, accum, delimiter);
|
|
89
89
|
}
|
|
90
90
|
return s;
|
|
91
91
|
};
|
package/parser/quotes.js
CHANGED
|
@@ -31,7 +31,7 @@ const parseQuotes = (text, config = Parser.getConfig(), accum = []) => {
|
|
|
31
31
|
nBold++;
|
|
32
32
|
if (firstSingle) {
|
|
33
33
|
break;
|
|
34
|
-
} else if (arr[i - 1].
|
|
34
|
+
} else if (arr[i - 1].endsWith(' ')) {
|
|
35
35
|
if (!firstMulti && !firstSpace) {
|
|
36
36
|
firstSpace = i;
|
|
37
37
|
}
|
package/parser/selector.js
CHANGED
|
@@ -48,7 +48,9 @@ const /** @type {Set<pseudo>} */ simplePseudos = new Set([
|
|
|
48
48
|
pseudoRegex = new RegExp(`:(${complexPseudos.join('|')})$`, 'u'),
|
|
49
49
|
regularRegex = /[[(,>+~]|\s+/u,
|
|
50
50
|
attributeRegex = /^\s*(\w+)\s*(?:([~|^$*!]?=)\s*("[^"]*"|'[^']*'|[^\s[\]]+)(?:\s+(i))?\s*)?\]/u,
|
|
51
|
-
functionRegex = /^(\s*"[^"]*"\s*|\s*'[^']*'\s*|[^()]*)\)/u
|
|
51
|
+
functionRegex = /^(\s*"[^"]*"\s*|\s*'[^']*'\s*|[^()]*)\)/u,
|
|
52
|
+
grouping = new Set([',', '>', '+', '~']),
|
|
53
|
+
combinator = new Set(['>', '+', '~', '']);
|
|
52
54
|
|
|
53
55
|
/**
|
|
54
56
|
* 清理转义符号
|
|
@@ -110,9 +112,7 @@ const pushSimple = (step, str) => {
|
|
|
110
112
|
*/
|
|
111
113
|
const parseSelector = selector => {
|
|
112
114
|
selector = selector.trim();
|
|
113
|
-
const /** @type {SelectorArray[][]} */ stack = [[[]]]
|
|
114
|
-
grouping = new Set([',', '>', '+', '~']),
|
|
115
|
-
combinator = new Set(['>', '+', '~', '']);
|
|
115
|
+
const /** @type {SelectorArray[][]} */ stack = [[[]]];
|
|
116
116
|
let sanitized = sanitize(selector),
|
|
117
117
|
regex = regularRegex,
|
|
118
118
|
mt = regex.exec(sanitized),
|
|
@@ -141,7 +141,7 @@ const parseSelector = selector => {
|
|
|
141
141
|
} else if (syntax === '[') { // 情形3:属性开启
|
|
142
142
|
pushSimple(step, sanitized.slice(0, index));
|
|
143
143
|
regex = attributeRegex;
|
|
144
|
-
} else if (syntax.
|
|
144
|
+
} else if (syntax.endsWith(']')) { // 情形4:属性闭合
|
|
145
145
|
mt[3] = desanitize(deQuote(mt[3]));
|
|
146
146
|
step.push(mt.slice(1));
|
|
147
147
|
regex = regularRegex;
|
package/src/arg.js
CHANGED
|
@@ -78,9 +78,9 @@ class ArgToken extends Token {
|
|
|
78
78
|
* @param {number} start 起始位置
|
|
79
79
|
* @returns {LintError[]}
|
|
80
80
|
*/
|
|
81
|
-
lint(start =
|
|
81
|
+
lint(start = this.getAbsoluteIndex()) {
|
|
82
82
|
if (!this.getAttribute('include')) {
|
|
83
|
-
return [generateForSelf(this, {start}, '
|
|
83
|
+
return [generateForSelf(this, {start}, 'unexpected template argument')];
|
|
84
84
|
}
|
|
85
85
|
const {childNodes: [argName, argDefault, ...rest]} = this,
|
|
86
86
|
errors = argName.lint(start + 3);
|
|
@@ -89,7 +89,11 @@ class ArgToken extends Token {
|
|
|
89
89
|
}
|
|
90
90
|
if (rest.length > 0) {
|
|
91
91
|
const rect = {start, ...this.getRootNode().posFromIndex(start)};
|
|
92
|
-
errors.push(...rest.map(child =>
|
|
92
|
+
errors.push(...rest.map(child => {
|
|
93
|
+
const error = generateForChild(child, rect, 'invisible content inside triple brackets'),
|
|
94
|
+
{startIndex, startCol, excerpt} = error;
|
|
95
|
+
return {...error, startIndex: startIndex - 1, startCol: startCol - 1, excerpt: `|${excerpt}`};
|
|
96
|
+
}));
|
|
93
97
|
}
|
|
94
98
|
return errors;
|
|
95
99
|
}
|