wikiparser-node 0.3.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/config/default.json +13 -17
- package/config/llwiki.json +11 -79
- package/config/moegirl.json +7 -1
- package/config/zhwiki.json +1269 -0
- package/index.js +130 -97
- package/lib/element.js +410 -518
- package/lib/node.js +493 -115
- package/lib/ranges.js +27 -19
- package/lib/text.js +175 -0
- package/lib/title.js +14 -6
- package/mixin/attributeParent.js +70 -24
- package/mixin/fixedToken.js +18 -10
- package/mixin/hidden.js +6 -4
- package/mixin/sol.js +39 -12
- package/package.json +17 -4
- package/parser/brackets.js +18 -18
- package/parser/commentAndExt.js +16 -14
- package/parser/converter.js +14 -13
- package/parser/externalLinks.js +12 -11
- package/parser/hrAndDoubleUnderscore.js +24 -14
- package/parser/html.js +8 -7
- package/parser/links.js +13 -13
- package/parser/list.js +12 -11
- package/parser/magicLinks.js +11 -10
- package/parser/quotes.js +6 -5
- package/parser/selector.js +175 -0
- package/parser/table.js +31 -24
- package/src/arg.js +91 -43
- package/src/atom/hidden.js +5 -2
- package/src/atom/index.js +17 -9
- package/src/attribute.js +210 -101
- package/src/converter.js +78 -43
- package/src/converterFlags.js +104 -45
- package/src/converterRule.js +136 -78
- package/src/extLink.js +81 -27
- package/src/gallery.js +63 -20
- package/src/heading.js +58 -20
- package/src/html.js +138 -48
- package/src/imageParameter.js +93 -58
- package/src/index.js +314 -186
- package/src/link/category.js +22 -54
- package/src/link/file.js +83 -32
- package/src/link/galleryImage.js +21 -7
- package/src/link/index.js +170 -81
- package/src/magicLink.js +64 -14
- package/src/nowiki/comment.js +36 -10
- package/src/nowiki/dd.js +37 -22
- package/src/nowiki/doubleUnderscore.js +21 -7
- package/src/nowiki/hr.js +11 -7
- package/src/nowiki/index.js +16 -9
- package/src/nowiki/list.js +2 -2
- package/src/nowiki/noinclude.js +8 -4
- package/src/nowiki/quote.js +38 -7
- package/src/onlyinclude.js +24 -7
- package/src/parameter.js +102 -62
- package/src/syntax.js +23 -20
- package/src/table/index.js +282 -174
- package/src/table/td.js +112 -61
- package/src/table/tr.js +135 -74
- package/src/tagPair/ext.js +30 -23
- package/src/tagPair/include.js +26 -11
- package/src/tagPair/index.js +72 -29
- package/src/transclude.js +235 -127
- package/tool/index.js +42 -32
- package/util/debug.js +21 -18
- package/util/diff.js +76 -0
- package/util/lint.js +40 -0
- package/util/string.js +56 -26
- package/.eslintrc.json +0 -319
- package/errors/README +0 -1
- package/jsconfig.json +0 -7
- package/printed/README +0 -1
- package/typings/element.d.ts +0 -28
- package/typings/index.d.ts +0 -52
- package/typings/node.d.ts +0 -23
- package/typings/parser.d.ts +0 -9
- package/typings/table.d.ts +0 -14
- package/typings/token.d.ts +0 -22
- package/typings/tool.d.ts +0 -10
package/index.js
CHANGED
|
@@ -7,25 +7,102 @@ const fs = require('fs'),
|
|
|
7
7
|
const /** @type {Parser} */ Parser = {
|
|
8
8
|
warning: true,
|
|
9
9
|
debugging: false,
|
|
10
|
+
running: false,
|
|
11
|
+
|
|
12
|
+
config: './config/default',
|
|
13
|
+
|
|
14
|
+
MAX_STAGE: 11,
|
|
15
|
+
|
|
16
|
+
classes: {},
|
|
17
|
+
mixins: {},
|
|
18
|
+
parsers: {},
|
|
19
|
+
|
|
20
|
+
aliases: [
|
|
21
|
+
['AstText'],
|
|
22
|
+
['CommentToken', 'ExtToken', 'IncludeToken', 'NoincludeToken'],
|
|
23
|
+
['ArgToken', 'TranscludeToken', 'HeadingToken'],
|
|
24
|
+
['HtmlToken'],
|
|
25
|
+
['TableToken'],
|
|
26
|
+
['HrToken', 'DoubleUnderscoreToken'],
|
|
27
|
+
['LinkToken', 'FileToken', 'CategoryToken'],
|
|
28
|
+
['QuoteToken'],
|
|
29
|
+
['ExtLinkToken'],
|
|
30
|
+
['MagicLinkToken'],
|
|
31
|
+
['ListToken', 'DdToken'],
|
|
32
|
+
['ConverterToken'],
|
|
33
|
+
],
|
|
34
|
+
typeAliases: {
|
|
35
|
+
include: ['includeonly'],
|
|
36
|
+
ext: ['extension'],
|
|
37
|
+
'ext-attr': ['extension-attr'],
|
|
38
|
+
'ext-inner': ['extension-inner'],
|
|
39
|
+
arg: ['argument'],
|
|
40
|
+
'arg-name': ['argument-name'],
|
|
41
|
+
'arg-default': ['argument-default'],
|
|
42
|
+
'magic-word': ['parser-function', 'parser-func'],
|
|
43
|
+
'invoke-function': ['invoke-func', 'lua-function', 'lua-func', 'module-function', 'module-func'],
|
|
44
|
+
'invoke-module': ['lua-module'],
|
|
45
|
+
parameter: ['param'],
|
|
46
|
+
'parameter-key': ['param-key'],
|
|
47
|
+
'parameter-value': ['parameter-val', 'param-value', 'param-val'],
|
|
48
|
+
heading: ['header'],
|
|
49
|
+
'heading-title': ['header-title'],
|
|
50
|
+
'heading-trail': ['header-trail'],
|
|
51
|
+
'table-attr': ['tr-attr', 'table-row-attr', 'td-attr', 'table-cell-attr', 'table-data-attr'],
|
|
52
|
+
tr: ['table-row'],
|
|
53
|
+
td: ['table-cell', 'table-data'],
|
|
54
|
+
'td-inner': ['table-cell-inner', 'table-data-inner'],
|
|
55
|
+
'double-underscore': ['underscore', 'behavior-switch', 'behaviour-switch'],
|
|
56
|
+
hr: ['horizontal'],
|
|
57
|
+
category: ['category-link', 'cat', 'cat-link'],
|
|
58
|
+
file: ['file-link', 'image', 'image-link', 'img', 'img-link'],
|
|
59
|
+
'gallery-image': ['gallery-file', 'gallery-img'],
|
|
60
|
+
'image-parameter': ['img-parameter', 'image-param', 'img-param'],
|
|
61
|
+
quote: ['quotes', 'quot', 'apostrophe', 'apostrophes', 'apos'],
|
|
62
|
+
'ext-link': ['external-link'],
|
|
63
|
+
'ext-link-text': ['external-link-text'],
|
|
64
|
+
'ext-link-url': ['external-link-url'],
|
|
65
|
+
'free-ext-link': ['free-external-link', 'magic-link'],
|
|
66
|
+
list: ['ol', 'ordered-list', 'ul', 'unordered-list', 'dl', 'description-list'],
|
|
67
|
+
dd: ['indent', 'indentation'],
|
|
68
|
+
converter: ['convert', 'conversion'],
|
|
69
|
+
'converter-flags': ['convert-flags', 'conversion-flags'],
|
|
70
|
+
'converter-flag': ['convert-flag', 'conversion-flag'],
|
|
71
|
+
'converter-rule': ['convert-rule', 'conversion-rule'],
|
|
72
|
+
'converter-rule-noconvert': ['convert-rule-noconvert', 'conversion-rule-noconvert'],
|
|
73
|
+
'converter-rule-variant': ['convert-rule-variant', 'conversion-rule-variant'],
|
|
74
|
+
'converter-rule-to': ['convert-rule-to', 'conversion-rule-to'],
|
|
75
|
+
'converter-rule-from': ['convert-rule-from', 'conversion-rule-from'],
|
|
76
|
+
},
|
|
77
|
+
|
|
78
|
+
promises: [Promise.resolve()],
|
|
10
79
|
|
|
11
80
|
warn(msg, ...args) {
|
|
12
81
|
if (this.warning) {
|
|
13
|
-
console.warn('\
|
|
82
|
+
console.warn('\x1B[33m%s\x1B[0m', msg, ...args);
|
|
14
83
|
}
|
|
15
84
|
},
|
|
16
85
|
debug(msg, ...args) {
|
|
17
86
|
if (this.debugging) {
|
|
18
|
-
console.debug('\
|
|
87
|
+
console.debug('\x1B[34m%s\x1B[0m', msg, ...args);
|
|
19
88
|
}
|
|
20
89
|
},
|
|
21
90
|
error(msg, ...args) {
|
|
22
|
-
console.error('\
|
|
91
|
+
console.error('\x1B[31m%s\x1B[0m', msg, ...args);
|
|
23
92
|
},
|
|
24
93
|
info(msg, ...args) {
|
|
25
|
-
console.info('\
|
|
94
|
+
console.info('\x1B[32m%s\x1B[0m', msg, ...args);
|
|
26
95
|
},
|
|
27
96
|
|
|
28
|
-
|
|
97
|
+
log(f) {
|
|
98
|
+
if (typeof f === 'function') {
|
|
99
|
+
console.log(String(f));
|
|
100
|
+
}
|
|
101
|
+
},
|
|
102
|
+
|
|
103
|
+
getConfig() {
|
|
104
|
+
return require(this.config);
|
|
105
|
+
},
|
|
29
106
|
|
|
30
107
|
run(callback) {
|
|
31
108
|
const {running} = this;
|
|
@@ -40,10 +117,6 @@ const /** @type {Parser} */ Parser = {
|
|
|
40
117
|
}
|
|
41
118
|
},
|
|
42
119
|
|
|
43
|
-
classes: {},
|
|
44
|
-
mixins: {},
|
|
45
|
-
parsers: {},
|
|
46
|
-
|
|
47
120
|
clearCache() {
|
|
48
121
|
const entries = [
|
|
49
122
|
...Object.entries(this.classes),
|
|
@@ -60,53 +133,35 @@ const /** @type {Parser} */ Parser = {
|
|
|
60
133
|
}
|
|
61
134
|
},
|
|
62
135
|
|
|
63
|
-
log(f) {
|
|
64
|
-
if (typeof f === 'function') {
|
|
65
|
-
console.log(f.toString());
|
|
66
|
-
}
|
|
67
|
-
},
|
|
68
|
-
|
|
69
|
-
aliases: [
|
|
70
|
-
['String'],
|
|
71
|
-
['CommentToken', 'ExtToken', 'IncludeToken', 'NoincludeToken'],
|
|
72
|
-
['ArgToken', 'TranscludeToken', 'HeadingToken'],
|
|
73
|
-
['HtmlToken'],
|
|
74
|
-
['TableToken'],
|
|
75
|
-
['HrToken', 'DoubleUnderscoreToken'],
|
|
76
|
-
['LinkToken', 'FileToken', 'CategoryToken'],
|
|
77
|
-
['QuoteToken'],
|
|
78
|
-
['ExtLinkToken'],
|
|
79
|
-
['MagicLinkToken'],
|
|
80
|
-
['ListToken', 'DdToken'],
|
|
81
|
-
['ConverterToken'],
|
|
82
|
-
],
|
|
83
|
-
|
|
84
|
-
config: './config/default',
|
|
85
|
-
|
|
86
|
-
getConfig() {
|
|
87
|
-
return require(this.config);
|
|
88
|
-
},
|
|
89
|
-
|
|
90
136
|
isInterwiki(title, {interwiki} = Parser.getConfig()) {
|
|
91
137
|
title = String(title);
|
|
92
|
-
return RegExp(`^(${interwiki.join('|')})\\s*:`, '
|
|
138
|
+
return new RegExp(`^(${interwiki.join('|')})\\s*:`, 'iu')
|
|
139
|
+
.exec(title.replaceAll('_', ' ').replace(/^\s*:?\s*/u, ''));
|
|
93
140
|
},
|
|
94
141
|
|
|
95
142
|
normalizeTitle(title, defaultNs = 0, include = false, config = Parser.getConfig(), halfParsed = false) {
|
|
96
|
-
title = String(title);
|
|
97
143
|
let /** @type {Token} */ token;
|
|
98
144
|
if (!halfParsed) {
|
|
99
145
|
const Token = require('./src');
|
|
100
|
-
token = this.run(() =>
|
|
146
|
+
token = this.run(() => {
|
|
147
|
+
const newToken = new Token(String(title), config),
|
|
148
|
+
parseOnce = newToken.getAttribute('parseOnce');
|
|
149
|
+
parseOnce(0, include);
|
|
150
|
+
return parseOnce();
|
|
151
|
+
});
|
|
101
152
|
title = token.firstChild;
|
|
102
153
|
}
|
|
103
|
-
const Title = require('./lib/title')
|
|
104
|
-
|
|
154
|
+
const Title = require('./lib/title');
|
|
155
|
+
const titleObj = new Title(String(title), defaultNs, config);
|
|
105
156
|
if (token) {
|
|
106
|
-
|
|
157
|
+
/**
|
|
158
|
+
* 重建部分属性值
|
|
159
|
+
* @param {string[]} keys 属性键
|
|
160
|
+
*/
|
|
161
|
+
const build = keys => {
|
|
107
162
|
for (const key of keys) {
|
|
108
163
|
if (titleObj[key].includes('\0')) {
|
|
109
|
-
titleObj[key] = text(token.buildFromStr(titleObj[key]));
|
|
164
|
+
titleObj[key] = text(token.getAttribute('buildFromStr')(titleObj[key]));
|
|
110
165
|
}
|
|
111
166
|
}
|
|
112
167
|
};
|
|
@@ -117,17 +172,15 @@ const /** @type {Parser} */ Parser = {
|
|
|
117
172
|
return titleObj;
|
|
118
173
|
},
|
|
119
174
|
|
|
120
|
-
MAX_STAGE
|
|
121
|
-
|
|
122
|
-
parse(wikitext, include = false, maxStage = Parser.MAX_STAGE, config = Parser.getConfig()) {
|
|
175
|
+
parse(wikitext, include, maxStage = Parser.MAX_STAGE, config = Parser.getConfig()) {
|
|
123
176
|
const Token = require('./src');
|
|
124
|
-
let token;
|
|
177
|
+
let /** @type {Token} */ token;
|
|
125
178
|
this.run(() => {
|
|
126
179
|
if (typeof wikitext === 'string') {
|
|
127
180
|
token = new Token(wikitext, config);
|
|
128
181
|
} else if (wikitext instanceof Token) {
|
|
129
182
|
token = wikitext;
|
|
130
|
-
wikitext = token
|
|
183
|
+
wikitext = String(token);
|
|
131
184
|
} else {
|
|
132
185
|
throw new TypeError('待解析的内容应为 String 或 Token!');
|
|
133
186
|
}
|
|
@@ -137,7 +190,7 @@ const /** @type {Parser} */ Parser = {
|
|
|
137
190
|
if (e instanceof Error) {
|
|
138
191
|
const file = path.join(__dirname, 'errors', new Date().toISOString()),
|
|
139
192
|
stage = token.getAttribute('stage');
|
|
140
|
-
fs.writeFileSync(file, stage === this.MAX_STAGE ? wikitext : token
|
|
193
|
+
fs.writeFileSync(file, stage === this.MAX_STAGE ? wikitext : String(token));
|
|
141
194
|
fs.writeFileSync(`${file}.err`, e.stack);
|
|
142
195
|
fs.writeFileSync(`${file}.json`, JSON.stringify({
|
|
143
196
|
stage, include: token.getAttribute('include'), config: this.config,
|
|
@@ -146,25 +199,46 @@ const /** @type {Parser} */ Parser = {
|
|
|
146
199
|
throw e;
|
|
147
200
|
}
|
|
148
201
|
});
|
|
202
|
+
if (this.debugging) {
|
|
203
|
+
let restored = String(token),
|
|
204
|
+
process = '解析';
|
|
205
|
+
if (restored === wikitext) {
|
|
206
|
+
const entities = {lt: '<', gt: '>', amp: '&'};
|
|
207
|
+
restored = token.print().replaceAll(
|
|
208
|
+
/<[^<]+?>|&([lg]t|amp);/gu,
|
|
209
|
+
/** @param {string} s */ (_, s) => s ? entities[s] : '',
|
|
210
|
+
);
|
|
211
|
+
process = '渲染HTML';
|
|
212
|
+
}
|
|
213
|
+
if (restored !== wikitext) {
|
|
214
|
+
const diff = require('./util/diff');
|
|
215
|
+
const {promises: {0: cur, length}} = this;
|
|
216
|
+
this.promises.unshift((async () => {
|
|
217
|
+
await cur;
|
|
218
|
+
this.error(`${process}过程中不可逆地修改了原始文本!`);
|
|
219
|
+
return diff(wikitext, restored, length);
|
|
220
|
+
})());
|
|
221
|
+
}
|
|
222
|
+
}
|
|
149
223
|
return token;
|
|
150
224
|
},
|
|
151
225
|
|
|
152
226
|
reparse(date) {
|
|
153
227
|
const main = fs.readdirSync(path.join(__dirname, 'errors'))
|
|
154
|
-
.find(name => name.startsWith(date) && name.
|
|
228
|
+
.find(name => name.startsWith(date) && name.at(-1) === 'Z');
|
|
155
229
|
if (!main) {
|
|
156
230
|
throw new RangeError(`找不到对应时间戳的错误记录:${date}`);
|
|
157
231
|
}
|
|
158
|
-
const
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
232
|
+
const file = path.join(__dirname, 'errors', main),
|
|
233
|
+
wikitext = fs.readFileSync(file, 'utf8');
|
|
234
|
+
const {stage, include, config} = require(`${file}.json`),
|
|
235
|
+
Token = require('./src');
|
|
162
236
|
this.config = config;
|
|
163
237
|
return this.run(() => {
|
|
164
238
|
const halfParsed = stage < this.MAX_STAGE,
|
|
165
239
|
token = new Token(wikitext, this.getConfig(), halfParsed);
|
|
166
240
|
if (halfParsed) {
|
|
167
|
-
token.setAttribute('stage', stage).parseOnce(stage, include);
|
|
241
|
+
token.setAttribute('stage', stage).getAttribute('parseOnce')(stage, include);
|
|
168
242
|
} else {
|
|
169
243
|
token.parse(undefined, include);
|
|
170
244
|
}
|
|
@@ -179,52 +253,11 @@ const /** @type {Parser} */ Parser = {
|
|
|
179
253
|
delete require.cache[require.resolve('./tool')];
|
|
180
254
|
return require('./tool');
|
|
181
255
|
},
|
|
182
|
-
|
|
183
|
-
typeAliases: {
|
|
184
|
-
ext: ['extension'],
|
|
185
|
-
'ext-inner': ['extension-inner'],
|
|
186
|
-
arg: ['argument'],
|
|
187
|
-
'arg-name': ['argument-name'],
|
|
188
|
-
'arg-default': ['argument-default'],
|
|
189
|
-
'arg-redundant': ['argument-redundant'],
|
|
190
|
-
template: ['tpl'],
|
|
191
|
-
'template-name': ['tpl-name'],
|
|
192
|
-
'magic-word': ['parser-function', 'parser-func'],
|
|
193
|
-
'invoke-function': ['invoke-func'],
|
|
194
|
-
'invoke-module': ['invoke-mod'],
|
|
195
|
-
parameter: ['param'],
|
|
196
|
-
'parameter-key': ['param-key'],
|
|
197
|
-
'parameter-value': ['parameter-val', 'param-value', 'param-val'],
|
|
198
|
-
heading: ['header'],
|
|
199
|
-
'heading-title': ['header-title'],
|
|
200
|
-
table: ['tbl'],
|
|
201
|
-
'table-inter': ['tbl-inter'],
|
|
202
|
-
tr: ['table-row', 'tbl-row'],
|
|
203
|
-
td: ['table-cell', 'tbl-cell', 'table-data', 'tbl-data'],
|
|
204
|
-
'double-underscore': ['underscore', 'behavior-switch', 'behaviour-switch'],
|
|
205
|
-
hr: ['horizontal'],
|
|
206
|
-
category: ['category-link', 'cat', 'cat-link'],
|
|
207
|
-
file: ['file-link', 'image', 'image-link', 'img', 'img-link'],
|
|
208
|
-
'image-parameter': ['img-parameter', 'image-param', 'img-param'],
|
|
209
|
-
quote: ['quotes', 'quot', 'apostrophe', 'apostrophes', 'apos'],
|
|
210
|
-
'ext-link': ['external-link'],
|
|
211
|
-
'ext-link-text': ['external-link-text'],
|
|
212
|
-
'ext-link-url': ['external-link-url'],
|
|
213
|
-
'free-ext-link': ['free-external-link', 'magic-link'],
|
|
214
|
-
dd: ['indent', 'indentation'],
|
|
215
|
-
converter: ['convert', 'conversion'],
|
|
216
|
-
'converter-flags': ['convert-flags', 'conversion-flags', 'converter-flag', 'convert-flag', 'conversion-flag'],
|
|
217
|
-
'converter-rule': ['convert-rule', 'conversion-rule'],
|
|
218
|
-
'converter-rule-noconvert': ['convert-rule-noconvert', 'conversion-rule-noconvert'],
|
|
219
|
-
'converter-rule-variant': ['convert-rule-variant', 'conversion-rule-variant'],
|
|
220
|
-
'converter-rule-to': ['convert-rule-to', 'conversion-rule-to'],
|
|
221
|
-
'converter-rule-from': ['convert-rule-from', 'conversion-rule-from'],
|
|
222
|
-
},
|
|
223
256
|
};
|
|
224
257
|
|
|
225
258
|
const /** @type {PropertyDescriptorMap} */ def = {};
|
|
226
259
|
for (const key in Parser) {
|
|
227
|
-
if (['
|
|
260
|
+
if (['aliases', 'MAX_STAGE', 'typeAliases', 'promises'].includes(key)) {
|
|
228
261
|
def[key] = {enumerable: false, writable: false};
|
|
229
262
|
} else if (!['config', 'isInterwiki', 'normalizeTitle', 'parse', 'getTool'].includes(key)) {
|
|
230
263
|
def[key] = {enumerable: false};
|