wikiparser-node 0.8.0-m → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/README.md +39 -0
  2. package/index.js +253 -11
  3. package/lib/element.js +481 -7
  4. package/lib/node.js +552 -6
  5. package/lib/ranges.js +130 -0
  6. package/lib/text.js +108 -16
  7. package/lib/title.js +21 -0
  8. package/mixin/attributeParent.js +117 -0
  9. package/mixin/fixedToken.js +40 -0
  10. package/mixin/hidden.js +3 -0
  11. package/mixin/singleLine.js +31 -0
  12. package/mixin/sol.js +65 -0
  13. package/package.json +5 -4
  14. package/parser/brackets.js +1 -0
  15. package/parser/commentAndExt.js +4 -3
  16. package/parser/converter.js +1 -0
  17. package/parser/externalLinks.js +1 -0
  18. package/parser/hrAndDoubleUnderscore.js +1 -0
  19. package/parser/html.js +1 -0
  20. package/parser/links.js +5 -4
  21. package/parser/list.js +1 -0
  22. package/parser/magicLinks.js +5 -4
  23. package/parser/quotes.js +2 -1
  24. package/parser/selector.js +177 -0
  25. package/parser/table.js +1 -0
  26. package/src/arg.js +116 -2
  27. package/src/atom/hidden.js +2 -0
  28. package/src/atom/index.js +17 -0
  29. package/src/attribute.js +181 -4
  30. package/src/attributes.js +308 -4
  31. package/src/charinsert.js +97 -0
  32. package/src/converter.js +108 -2
  33. package/src/converterFlags.js +187 -0
  34. package/src/converterRule.js +184 -1
  35. package/src/extLink.js +120 -1
  36. package/src/gallery.js +57 -6
  37. package/src/hasNowiki/index.js +12 -0
  38. package/src/hasNowiki/pre.js +12 -0
  39. package/src/heading.js +55 -4
  40. package/src/html.js +118 -3
  41. package/src/imageParameter.js +176 -5
  42. package/src/imagemap.js +60 -1
  43. package/src/imagemapLink.js +13 -1
  44. package/src/index.js +529 -3
  45. package/src/link/category.js +37 -1
  46. package/src/link/file.js +159 -2
  47. package/src/link/galleryImage.js +59 -1
  48. package/src/link/index.js +259 -1
  49. package/src/magicLink.js +90 -9
  50. package/src/nested/choose.js +1 -0
  51. package/src/nested/combobox.js +1 -0
  52. package/src/nested/index.js +30 -3
  53. package/src/nested/references.js +1 -0
  54. package/src/nowiki/comment.js +25 -1
  55. package/src/nowiki/dd.js +47 -1
  56. package/src/nowiki/doubleUnderscore.js +31 -1
  57. package/src/nowiki/hr.js +20 -1
  58. package/src/nowiki/index.js +23 -1
  59. package/src/nowiki/list.js +5 -2
  60. package/src/nowiki/noinclude.js +14 -0
  61. package/src/nowiki/quote.js +16 -2
  62. package/src/onlyinclude.js +26 -1
  63. package/src/paramTag/index.js +24 -1
  64. package/src/paramTag/inputbox.js +4 -1
  65. package/src/parameter.js +148 -6
  66. package/src/syntax.js +68 -0
  67. package/src/table/index.js +940 -2
  68. package/src/table/td.js +225 -5
  69. package/src/table/tr.js +247 -2
  70. package/src/tagPair/ext.js +24 -3
  71. package/src/tagPair/include.js +24 -0
  72. package/src/tagPair/index.js +51 -2
  73. package/src/transclude.js +512 -11
  74. package/tool/index.js +1202 -0
  75. package/util/debug.js +73 -0
  76. package/util/string.js +48 -1
  77. package/config/minimum.json +0 -142
package/README.md ADDED
@@ -0,0 +1,39 @@
1
+ [![npm version](https://badge.fury.io/js/wikiparser-node.svg)](https://www.npmjs.com/package/wikiparser-node)
2
+
3
+ # 简介
4
+ wikiparser-node 是一款由 Bhsd 开发的基于 [Node.js](https://nodejs.org/en/) 环境的离线[维基文本](https://www.mediawiki.org/wiki/Wikitext)语法解析器,可以解析绝大部分的维基语法并生成[语法树](https://en.wikipedia.org/wiki/Abstract_syntax_tree),还可以很方便地对语法树进行查询和修改,最后返回修改后的维基文本。语法树的每个节点对应一个仿照 [HTMLElement](https://developer.mozilla.org/en-US/docs/Web/API/HTMLElement) 类设计的类 [Token](https://github.com/bhsd-harry/wikiparser-node/wiki/01.-Token)。
5
+
6
+ # 使用方法
7
+
8
+ ```js
9
+ var Parser = require('wikiparser-node');
10
+ ```
11
+
12
+ 更多文档请查阅 [Wiki](https://github.com/bhsd-harry/wikiparser-node/wiki)。
13
+
14
+ # 目录
15
+
16
+ 1. [Parser](https://github.com/bhsd-harry/wikiparser-node/wiki/Home#parser)
17
+ 2. [AstElement](https://github.com/bhsd-harry/wikiparser-node/wiki/01.-Token#astelement)
18
+ 3. [Token](https://github.com/bhsd-harry/wikiparser-node/wiki/01.-Token#token)
19
+ 4. [CommentToken](https://github.com/bhsd-harry/wikiparser-node/wiki/02.-CommentToken等#commenttoken)
20
+ 5. [ExtToken](https://github.com/bhsd-harry/wikiparser-node/wiki/03.-ExtToken)
21
+ 6. [AttributeToken](https://github.com/bhsd-harry/wikiparser-node/wiki/04.-AttributeToken)
22
+ 7. [HeadingToken](https://github.com/bhsd-harry/wikiparser-node/wiki/05.-HeadingToken)
23
+ 8. [ArgToken](https://github.com/bhsd-harry/wikiparser-node/wiki/06.-ArgToken)
24
+ 9. [TranscludeToken](https://github.com/bhsd-harry/wikiparser-node/wiki/07.-TranscludeToken)
25
+ 10. [ParameterToken](https://github.com/bhsd-harry/wikiparser-node/wiki/08.-ParameterToken)
26
+ 11. [HtmlToken](https://github.com/bhsd-harry/wikiparser-node/wiki/09.-HtmlToken)
27
+ 12. [TableToken](https://github.com/bhsd-harry/wikiparser-node/wiki/10.-TableToken)
28
+ 13. [TdToken](https://github.com/bhsd-harry/wikiparser-node/wiki/11.-TdToken)
29
+ 14. [DoubleUnderscoreToken](https://github.com/bhsd-harry/wikiparser-node/wiki/12.-DoubleUnderscoreToken)
30
+ 15. [LinkToken](https://github.com/bhsd-harry/wikiparser-node/wiki/13.-LinkToken)
31
+ 16. [CategoryToken](https://github.com/bhsd-harry/wikiparser-node/wiki/14.-CategoryToken)
32
+ 17. [FileToken](https://github.com/bhsd-harry/wikiparser-node/wiki/15.-FileToken和GalleryImageToken#filetoken)
33
+ 18. [ImageParameterToken](https://github.com/bhsd-harry/wikiparser-node/wiki/16.-ImageParameterToken)
34
+ 19. [ExtLinkToken](https://github.com/bhsd-harry/wikiparser-node/wiki/17.-ExtLinkToken和MagicLinkToken#extlinktoken)
35
+ 20. [MagicLinkToken](https://github.com/bhsd-harry/wikiparser-node/wiki/17.-ExtLinkToken和MagicLinkToken#magiclinktoken)
36
+ 21. [ConverterToken](https://github.com/bhsd-harry/wikiparser-node/wiki/18.-ConverterToken)
37
+ 22. [ConverterRuleToken](https://github.com/bhsd-harry/wikiparser-node/wiki/19.-ConverterRuleToken)
38
+ 23. [选择器](https://github.com/bhsd-harry/wikiparser-node/wiki/20.-选择器)
39
+ 24. [$ (TokenCollection)](https://github.com/bhsd-harry/wikiparser-node/wiki/21.-$-(TokenCollection))
package/index.js CHANGED
@@ -1,16 +1,128 @@
1
1
  'use strict';
2
2
 
3
+ const fs = require('fs'),
4
+ path = require('path');
5
+
3
6
  const /** @type {Parser} */ Parser = {
4
- config: undefined,
5
- minConfig: require('./config/minimum'),
7
+ config: './config/default',
6
8
 
7
9
  MAX_STAGE: 11,
8
10
 
9
- getConfig(path) {
10
- if (path) {
11
- this.config = require(path);
12
- }
13
- return {...this.minConfig, ...this.config, excludes: []};
11
+ warning: true,
12
+ debugging: false,
13
+ running: false,
14
+
15
+ classes: {},
16
+ mixins: {},
17
+ parsers: {},
18
+ tool: {},
19
+
20
+ aliases: [
21
+ ['AstText'],
22
+ ['CommentToken', 'ExtToken', 'IncludeToken', 'NoincludeToken'],
23
+ ['ArgToken', 'TranscludeToken', 'HeadingToken'],
24
+ ['HtmlToken'],
25
+ ['TableToken'],
26
+ ['HrToken', 'DoubleUnderscoreToken'],
27
+ ['LinkToken', 'FileToken', 'CategoryToken'],
28
+ ['QuoteToken'],
29
+ ['ExtLinkToken'],
30
+ ['MagicLinkToken'],
31
+ ['ListToken', 'DdToken'],
32
+ ['ConverterToken'],
33
+ ],
34
+ typeAliases: {
35
+ text: ['string', 'str'],
36
+ plain: ['regular', 'normal'],
37
+ // comment and extension
38
+ onlyinclude: ['only-include'],
39
+ noinclude: ['no-include'],
40
+ include: ['includeonly', 'include-only'],
41
+ comment: undefined,
42
+ ext: ['extension'],
43
+ 'ext-attrs': ['extension-attrs', 'ext-attributes', 'extension-attributes'],
44
+ 'ext-attr-dirty': ['extension-attr-dirty', 'ext-attribute-dirty', 'extension-attribute-dirty'],
45
+ 'ext-attr': ['extension-attr', 'ext-attribute', 'extension-attribute'],
46
+ 'attr-key': ['attribute-key'],
47
+ 'attr-value': ['attribute-value', 'attr-val', 'attribute-val'],
48
+ 'ext-inner': ['extension-inner'],
49
+ // triple brackets
50
+ arg: ['argument'],
51
+ 'arg-name': ['argument-name'],
52
+ 'arg-default': ['argument-default'],
53
+ hidden: ['arg-redundant'],
54
+ // double brackets
55
+ 'magic-word': ['parser-function', 'parser-func'],
56
+ 'magic-word-name': ['parser-function-name', 'parser-func-name'],
57
+ 'invoke-function': ['invoke-func', 'lua-function', 'lua-func', 'module-function', 'module-func'],
58
+ 'invoke-module': ['lua-module'],
59
+ template: undefined,
60
+ 'template-name': undefined,
61
+ parameter: ['param'],
62
+ 'parameter-key': ['param-key'],
63
+ 'parameter-value': ['parameter-val', 'param-value', 'param-val'],
64
+ // heading
65
+ heading: ['header'],
66
+ 'heading-title': ['header-title'],
67
+ 'heading-trail': ['header-trail'],
68
+ // html
69
+ html: undefined,
70
+ 'html-attrs': ['html-attributes'],
71
+ 'html-attr-dirty': ['html-attribute-dirty'],
72
+ 'html-attr': ['html-attribute'],
73
+ // table
74
+ table: undefined,
75
+ tr: ['table-row'],
76
+ td: ['table-cell', 'table-data'],
77
+ 'table-syntax': undefined,
78
+ 'table-attrs': ['tr-attrs', 'td-attrs', 'table-attributes', 'tr-attributes', 'td-attributes'],
79
+ 'table-attr-dirty':
80
+ ['tr-attr-dirty', 'td-attr-dirty', 'table-attribute-dirty', 'tr-attribute-dirty', 'td-attribute-dirty'],
81
+ 'table-attr': ['tr-attr', 'td-attr', 'table-attribute', 'tr-attribute', 'td-attribute'],
82
+ 'table-inter': undefined,
83
+ 'td-inner': ['table-cell-inner', 'table-data-inner'],
84
+ // hr and double-underscore
85
+ hr: ['horizontal'],
86
+ 'double-underscore': ['underscore', 'behavior-switch', 'behaviour-switch'],
87
+ // link
88
+ link: ['wikilink'],
89
+ 'link-target': ['wikilink-target'],
90
+ 'link-text': ['wikilink-text'],
91
+ category: ['category-link', 'cat', 'cat-link'],
92
+ file: ['file-link', 'image', 'image-link', 'img', 'img-link'],
93
+ 'gallery-image': ['gallery-file', 'gallery-img'],
94
+ 'imagemap-image': ['imagemap-file', 'imagemap-img', 'image-map-image', 'image-map-file', 'image-map-img'],
95
+ 'image-parameter': ['img-parameter', 'image-param', 'img-param'],
96
+ // quotes
97
+ quote: ['quotes', 'quot', 'apostrophe', 'apostrophes', 'apos'],
98
+ // external link
99
+ 'ext-link': ['external-link'],
100
+ 'ext-link-text': ['external-link-text'],
101
+ 'ext-link-url': ['external-link-url'],
102
+ // magic link
103
+ 'free-ext-link': ['free-external-link', 'magic-link'],
104
+ // list
105
+ list: ['ol', 'ordered-list', 'ul', 'unordered-list', 'dl', 'description-list'],
106
+ dd: ['indent', 'indentation'],
107
+ // converter
108
+ converter: ['convert', 'conversion'],
109
+ 'converter-flags': ['convert-flags', 'conversion-flags'],
110
+ 'converter-flag': ['convert-flag', 'conversion-flag'],
111
+ 'converter-rule': ['convert-rule', 'conversion-rule'],
112
+ 'converter-rule-noconvert': ['convert-rule-noconvert', 'conversion-rule-noconvert'],
113
+ 'converter-rule-variant': ['convert-rule-variant', 'conversion-rule-variant'],
114
+ 'converter-rule-to': ['convert-rule-to', 'conversion-rule-to'],
115
+ 'converter-rule-from': ['convert-rule-from', 'conversion-rule-from'],
116
+ // specific extensions
117
+ 'param-line': ['parameter-line'],
118
+ 'charinsert-line': undefined,
119
+ 'imagemap-link': ['image-map-link'],
120
+ },
121
+
122
+ promises: [Promise.resolve()],
123
+
124
+ getConfig() {
125
+ return {...require(this.config), excludes: []};
14
126
  },
15
127
 
16
128
  normalizeTitle(
@@ -35,6 +147,22 @@ const /** @type {Parser} */ Parser = {
35
147
  }
36
148
  const Title = require('./lib/title');
37
149
  const titleObj = new Title(String(title), defaultNs, config, decode, selfLink);
150
+ if (token) {
151
+ /**
152
+ * 重建部分属性值
153
+ * @param {string[]} keys 属性键
154
+ */
155
+ const build = keys => {
156
+ for (const key of keys) {
157
+ if (titleObj[key].includes('\0')) {
158
+ titleObj[key] = token.getAttribute('buildFromStr')(titleObj[key], 'text');
159
+ }
160
+ }
161
+ };
162
+ this.run(() => {
163
+ build(['title', 'main', 'fragment']);
164
+ });
165
+ }
38
166
  return titleObj;
39
167
  },
40
168
 
@@ -48,13 +176,96 @@ const /** @type {Parser} */ Parser = {
48
176
  token = new Token(wikitext, config);
49
177
  try {
50
178
  token.parse(maxStage, include);
51
- } catch {}
179
+ } catch (e) {
180
+ if (e instanceof Error) {
181
+ const file = path.join(__dirname, 'errors', new Date().toISOString()),
182
+ stage = token.getAttribute('stage');
183
+ fs.writeFileSync(file, stage === this.MAX_STAGE ? wikitext : String(token));
184
+ fs.writeFileSync(`${file}.err`, e.stack);
185
+ fs.writeFileSync(`${file}.json`, JSON.stringify({
186
+ stage, include: token.getAttribute('include'), config: this.config,
187
+ }, null, '\t'));
188
+ }
189
+ throw e;
190
+ }
52
191
  });
192
+ if (this.debugging) {
193
+ let restored = String(token),
194
+ process = '解析';
195
+ if (restored === wikitext) {
196
+ const entities = {lt: '<', gt: '>', amp: '&'};
197
+ restored = token.print().replace(
198
+ /<[^<]+?>|&([lg]t|amp);/gu,
199
+ /** @param {string} s */ (_, s) => s ? entities[s] : '',
200
+ );
201
+ process = '渲染HTML';
202
+ }
203
+ if (restored !== wikitext) {
204
+ const diff = require('./util/diff');
205
+ const {promises: {0: cur, length}} = this;
206
+ this.promises.unshift((async () => {
207
+ await cur;
208
+ this.error(`${process}过程中不可逆地修改了原始文本!`);
209
+ return diff(wikitext, restored, length);
210
+ })());
211
+ }
212
+ }
53
213
  return token;
54
214
  },
55
215
 
56
216
  run(callback) {
57
- return callback();
217
+ const {running} = this;
218
+ this.running = true;
219
+ try {
220
+ const result = callback();
221
+ this.running = running;
222
+ return result;
223
+ } catch (e) {
224
+ this.running = running;
225
+ throw e;
226
+ }
227
+ },
228
+
229
+ warn(msg, ...args) {
230
+ if (this.warning) {
231
+ console.warn('\x1B[33m%s\x1B[0m', msg, ...args);
232
+ }
233
+ },
234
+ debug(msg, ...args) {
235
+ if (this.debugging) {
236
+ console.debug('\x1B[34m%s\x1B[0m', msg, ...args);
237
+ }
238
+ },
239
+ error(msg, ...args) {
240
+ console.error('\x1B[31m%s\x1B[0m', msg, ...args);
241
+ },
242
+ info(msg, ...args) {
243
+ console.info('\x1B[32m%s\x1B[0m', msg, ...args);
244
+ },
245
+
246
+ log(f) {
247
+ if (typeof f === 'function') {
248
+ console.log(String(f));
249
+ }
250
+ },
251
+
252
+ clearCache() {
253
+ const entries = [
254
+ ...Object.entries(this.classes),
255
+ ...Object.entries(this.mixins),
256
+ ...Object.entries(this.parsers),
257
+ ...Object.entries(this.tool),
258
+ ];
259
+ for (const [, filePath] of entries) {
260
+ try {
261
+ delete require.cache[require.resolve(filePath)];
262
+ } catch {}
263
+ }
264
+ for (const [name, filePath] of entries) {
265
+ if (name in global) {
266
+ global[name] = require(filePath);
267
+ }
268
+ }
58
269
  },
59
270
 
60
271
  isInterwiki(title, {interwiki} = Parser.getConfig()) {
@@ -62,11 +273,42 @@ const /** @type {Parser} */ Parser = {
62
273
  return new RegExp(`^(${interwiki.join('|')})\\s*:`, 'iu')
63
274
  .exec(title.replaceAll('_', ' ').replace(/^\s*:?\s*/u, ''));
64
275
  },
276
+
277
+ reparse(date) {
278
+ const main = fs.readdirSync(path.join(__dirname, 'errors'))
279
+ .find(name => name.startsWith(date) && name.at(-1) === 'Z');
280
+ if (!main) {
281
+ throw new RangeError(`找不到对应时间戳的错误记录:${date}`);
282
+ }
283
+ const file = path.join(__dirname, 'errors', main),
284
+ wikitext = fs.readFileSync(file, 'utf8');
285
+ const {stage, include, config} = require(`${file}.json`),
286
+ Token = require('./src');
287
+ this.config = config;
288
+ return this.run(() => {
289
+ const halfParsed = stage < this.MAX_STAGE,
290
+ token = new Token(wikitext, this.getConfig(), halfParsed);
291
+ if (halfParsed) {
292
+ token.setAttribute('stage', stage).getAttribute('parseOnce')(stage, include);
293
+ } else {
294
+ token.parse(undefined, include);
295
+ }
296
+ fs.unlinkSync(file);
297
+ fs.unlinkSync(`${file}.err`);
298
+ fs.unlinkSync(`${file}.json`);
299
+ return token;
300
+ });
301
+ },
302
+
303
+ getTool() {
304
+ delete require.cache[require.resolve('./tool')];
305
+ return require('./tool');
306
+ },
65
307
  };
66
308
 
67
309
  const /** @type {PropertyDescriptorMap} */ def = {},
68
- immutable = new Set(['MAX_STAGE', 'minConfig']),
69
- enumerable = new Set(['config', 'normalizeTitle', 'parse', 'isInterwiki']);
310
+ immutable = new Set(['MAX_STAGE', 'aliases', 'typeAliases', 'promises']),
311
+ enumerable = new Set(['config', 'normalizeTitle', 'parse', 'isInterwiki', 'getTool']);
70
312
  for (const key in Parser) {
71
313
  if (immutable.has(key)) {
72
314
  def[key] = {enumerable: false, writable: false};