wikiparser-node 0.3.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/README.md +1 -1
  2. package/config/default.json +13 -17
  3. package/config/llwiki.json +11 -79
  4. package/config/moegirl.json +7 -1
  5. package/config/zhwiki.json +1269 -0
  6. package/index.js +130 -97
  7. package/lib/element.js +410 -518
  8. package/lib/node.js +493 -115
  9. package/lib/ranges.js +27 -19
  10. package/lib/text.js +175 -0
  11. package/lib/title.js +14 -6
  12. package/mixin/attributeParent.js +70 -24
  13. package/mixin/fixedToken.js +18 -10
  14. package/mixin/hidden.js +6 -4
  15. package/mixin/sol.js +39 -12
  16. package/package.json +17 -4
  17. package/parser/brackets.js +18 -18
  18. package/parser/commentAndExt.js +16 -14
  19. package/parser/converter.js +14 -13
  20. package/parser/externalLinks.js +12 -11
  21. package/parser/hrAndDoubleUnderscore.js +24 -14
  22. package/parser/html.js +8 -7
  23. package/parser/links.js +13 -13
  24. package/parser/list.js +12 -11
  25. package/parser/magicLinks.js +11 -10
  26. package/parser/quotes.js +6 -5
  27. package/parser/selector.js +175 -0
  28. package/parser/table.js +31 -24
  29. package/src/arg.js +91 -43
  30. package/src/atom/hidden.js +5 -2
  31. package/src/atom/index.js +17 -9
  32. package/src/attribute.js +210 -101
  33. package/src/converter.js +78 -43
  34. package/src/converterFlags.js +104 -45
  35. package/src/converterRule.js +136 -78
  36. package/src/extLink.js +81 -27
  37. package/src/gallery.js +63 -20
  38. package/src/heading.js +58 -20
  39. package/src/html.js +138 -48
  40. package/src/imageParameter.js +93 -58
  41. package/src/index.js +314 -186
  42. package/src/link/category.js +22 -54
  43. package/src/link/file.js +83 -32
  44. package/src/link/galleryImage.js +21 -7
  45. package/src/link/index.js +170 -81
  46. package/src/magicLink.js +64 -14
  47. package/src/nowiki/comment.js +36 -10
  48. package/src/nowiki/dd.js +37 -22
  49. package/src/nowiki/doubleUnderscore.js +21 -7
  50. package/src/nowiki/hr.js +11 -7
  51. package/src/nowiki/index.js +16 -9
  52. package/src/nowiki/list.js +2 -2
  53. package/src/nowiki/noinclude.js +8 -4
  54. package/src/nowiki/quote.js +38 -7
  55. package/src/onlyinclude.js +24 -7
  56. package/src/parameter.js +102 -62
  57. package/src/syntax.js +23 -20
  58. package/src/table/index.js +282 -174
  59. package/src/table/td.js +112 -61
  60. package/src/table/tr.js +135 -74
  61. package/src/tagPair/ext.js +30 -23
  62. package/src/tagPair/include.js +26 -11
  63. package/src/tagPair/index.js +72 -29
  64. package/src/transclude.js +235 -127
  65. package/tool/index.js +42 -32
  66. package/util/debug.js +21 -18
  67. package/util/diff.js +76 -0
  68. package/util/lint.js +40 -0
  69. package/util/string.js +56 -26
  70. package/.eslintrc.json +0 -319
  71. package/errors/README +0 -1
  72. package/jsconfig.json +0 -7
  73. package/printed/README +0 -1
  74. package/typings/element.d.ts +0 -28
  75. package/typings/index.d.ts +0 -52
  76. package/typings/node.d.ts +0 -23
  77. package/typings/parser.d.ts +0 -9
  78. package/typings/table.d.ts +0 -14
  79. package/typings/token.d.ts +0 -22
  80. package/typings/tool.d.ts +0 -10
package/index.js CHANGED
@@ -7,25 +7,102 @@ const fs = require('fs'),
7
7
  const /** @type {Parser} */ Parser = {
8
8
  warning: true,
9
9
  debugging: false,
10
+ running: false,
11
+
12
+ config: './config/default',
13
+
14
+ MAX_STAGE: 11,
15
+
16
+ classes: {},
17
+ mixins: {},
18
+ parsers: {},
19
+
20
+ aliases: [
21
+ ['AstText'],
22
+ ['CommentToken', 'ExtToken', 'IncludeToken', 'NoincludeToken'],
23
+ ['ArgToken', 'TranscludeToken', 'HeadingToken'],
24
+ ['HtmlToken'],
25
+ ['TableToken'],
26
+ ['HrToken', 'DoubleUnderscoreToken'],
27
+ ['LinkToken', 'FileToken', 'CategoryToken'],
28
+ ['QuoteToken'],
29
+ ['ExtLinkToken'],
30
+ ['MagicLinkToken'],
31
+ ['ListToken', 'DdToken'],
32
+ ['ConverterToken'],
33
+ ],
34
+ typeAliases: {
35
+ include: ['includeonly'],
36
+ ext: ['extension'],
37
+ 'ext-attr': ['extension-attr'],
38
+ 'ext-inner': ['extension-inner'],
39
+ arg: ['argument'],
40
+ 'arg-name': ['argument-name'],
41
+ 'arg-default': ['argument-default'],
42
+ 'magic-word': ['parser-function', 'parser-func'],
43
+ 'invoke-function': ['invoke-func', 'lua-function', 'lua-func', 'module-function', 'module-func'],
44
+ 'invoke-module': ['lua-module'],
45
+ parameter: ['param'],
46
+ 'parameter-key': ['param-key'],
47
+ 'parameter-value': ['parameter-val', 'param-value', 'param-val'],
48
+ heading: ['header'],
49
+ 'heading-title': ['header-title'],
50
+ 'heading-trail': ['header-trail'],
51
+ 'table-attr': ['tr-attr', 'table-row-attr', 'td-attr', 'table-cell-attr', 'table-data-attr'],
52
+ tr: ['table-row'],
53
+ td: ['table-cell', 'table-data'],
54
+ 'td-inner': ['table-cell-inner', 'table-data-inner'],
55
+ 'double-underscore': ['underscore', 'behavior-switch', 'behaviour-switch'],
56
+ hr: ['horizontal'],
57
+ category: ['category-link', 'cat', 'cat-link'],
58
+ file: ['file-link', 'image', 'image-link', 'img', 'img-link'],
59
+ 'gallery-image': ['gallery-file', 'gallery-img'],
60
+ 'image-parameter': ['img-parameter', 'image-param', 'img-param'],
61
+ quote: ['quotes', 'quot', 'apostrophe', 'apostrophes', 'apos'],
62
+ 'ext-link': ['external-link'],
63
+ 'ext-link-text': ['external-link-text'],
64
+ 'ext-link-url': ['external-link-url'],
65
+ 'free-ext-link': ['free-external-link', 'magic-link'],
66
+ list: ['ol', 'ordered-list', 'ul', 'unordered-list', 'dl', 'description-list'],
67
+ dd: ['indent', 'indentation'],
68
+ converter: ['convert', 'conversion'],
69
+ 'converter-flags': ['convert-flags', 'conversion-flags'],
70
+ 'converter-flag': ['convert-flag', 'conversion-flag'],
71
+ 'converter-rule': ['convert-rule', 'conversion-rule'],
72
+ 'converter-rule-noconvert': ['convert-rule-noconvert', 'conversion-rule-noconvert'],
73
+ 'converter-rule-variant': ['convert-rule-variant', 'conversion-rule-variant'],
74
+ 'converter-rule-to': ['convert-rule-to', 'conversion-rule-to'],
75
+ 'converter-rule-from': ['convert-rule-from', 'conversion-rule-from'],
76
+ },
77
+
78
+ promises: [Promise.resolve()],
10
79
 
11
80
  warn(msg, ...args) {
12
81
  if (this.warning) {
13
- console.warn('\x1b[33m%s\x1b[0m', msg, ...args);
82
+ console.warn('\x1B[33m%s\x1B[0m', msg, ...args);
14
83
  }
15
84
  },
16
85
  debug(msg, ...args) {
17
86
  if (this.debugging) {
18
- console.debug('\x1b[34m%s\x1b[0m', msg, ...args);
87
+ console.debug('\x1B[34m%s\x1B[0m', msg, ...args);
19
88
  }
20
89
  },
21
90
  error(msg, ...args) {
22
- console.error('\x1b[31m%s\x1b[0m', msg, ...args);
91
+ console.error('\x1B[31m%s\x1B[0m', msg, ...args);
23
92
  },
24
93
  info(msg, ...args) {
25
- console.info('\x1b[32m%s\x1b[0m', msg, ...args);
94
+ console.info('\x1B[32m%s\x1B[0m', msg, ...args);
26
95
  },
27
96
 
28
- running: false,
97
+ log(f) {
98
+ if (typeof f === 'function') {
99
+ console.log(String(f));
100
+ }
101
+ },
102
+
103
+ getConfig() {
104
+ return require(this.config);
105
+ },
29
106
 
30
107
  run(callback) {
31
108
  const {running} = this;
@@ -40,10 +117,6 @@ const /** @type {Parser} */ Parser = {
40
117
  }
41
118
  },
42
119
 
43
- classes: {},
44
- mixins: {},
45
- parsers: {},
46
-
47
120
  clearCache() {
48
121
  const entries = [
49
122
  ...Object.entries(this.classes),
@@ -60,53 +133,35 @@ const /** @type {Parser} */ Parser = {
60
133
  }
61
134
  },
62
135
 
63
- log(f) {
64
- if (typeof f === 'function') {
65
- console.log(f.toString());
66
- }
67
- },
68
-
69
- aliases: [
70
- ['String'],
71
- ['CommentToken', 'ExtToken', 'IncludeToken', 'NoincludeToken'],
72
- ['ArgToken', 'TranscludeToken', 'HeadingToken'],
73
- ['HtmlToken'],
74
- ['TableToken'],
75
- ['HrToken', 'DoubleUnderscoreToken'],
76
- ['LinkToken', 'FileToken', 'CategoryToken'],
77
- ['QuoteToken'],
78
- ['ExtLinkToken'],
79
- ['MagicLinkToken'],
80
- ['ListToken', 'DdToken'],
81
- ['ConverterToken'],
82
- ],
83
-
84
- config: './config/default',
85
-
86
- getConfig() {
87
- return require(this.config);
88
- },
89
-
90
136
  isInterwiki(title, {interwiki} = Parser.getConfig()) {
91
137
  title = String(title);
92
- return RegExp(`^(${interwiki.join('|')})\\s*:`, 'i').exec(title.replaceAll('_', ' ').replace(/^\s*:?\s*/, ''));
138
+ return new RegExp(`^(${interwiki.join('|')})\\s*:`, 'iu')
139
+ .exec(title.replaceAll('_', ' ').replace(/^\s*:?\s*/u, ''));
93
140
  },
94
141
 
95
142
  normalizeTitle(title, defaultNs = 0, include = false, config = Parser.getConfig(), halfParsed = false) {
96
- title = String(title);
97
143
  let /** @type {Token} */ token;
98
144
  if (!halfParsed) {
99
145
  const Token = require('./src');
100
- token = this.run(() => new Token(title, config).parseOnce(0, include).parseOnce());
146
+ token = this.run(() => {
147
+ const newToken = new Token(String(title), config),
148
+ parseOnce = newToken.getAttribute('parseOnce');
149
+ parseOnce(0, include);
150
+ return parseOnce();
151
+ });
101
152
  title = token.firstChild;
102
153
  }
103
- const Title = require('./lib/title'),
104
- titleObj = new Title(title, defaultNs, config);
154
+ const Title = require('./lib/title');
155
+ const titleObj = new Title(String(title), defaultNs, config);
105
156
  if (token) {
106
- const build = /** @param {string[]} keys */ keys => {
157
+ /**
158
+ * 重建部分属性值
159
+ * @param {string[]} keys 属性键
160
+ */
161
+ const build = keys => {
107
162
  for (const key of keys) {
108
163
  if (titleObj[key].includes('\0')) {
109
- titleObj[key] = text(token.buildFromStr(titleObj[key]));
164
+ titleObj[key] = text(token.getAttribute('buildFromStr')(titleObj[key]));
110
165
  }
111
166
  }
112
167
  };
@@ -117,17 +172,15 @@ const /** @type {Parser} */ Parser = {
117
172
  return titleObj;
118
173
  },
119
174
 
120
- MAX_STAGE: 11,
121
-
122
- parse(wikitext, include = false, maxStage = Parser.MAX_STAGE, config = Parser.getConfig()) {
175
+ parse(wikitext, include, maxStage = Parser.MAX_STAGE, config = Parser.getConfig()) {
123
176
  const Token = require('./src');
124
- let token;
177
+ let /** @type {Token} */ token;
125
178
  this.run(() => {
126
179
  if (typeof wikitext === 'string') {
127
180
  token = new Token(wikitext, config);
128
181
  } else if (wikitext instanceof Token) {
129
182
  token = wikitext;
130
- wikitext = token.toString();
183
+ wikitext = String(token);
131
184
  } else {
132
185
  throw new TypeError('待解析的内容应为 String 或 Token!');
133
186
  }
@@ -137,7 +190,7 @@ const /** @type {Parser} */ Parser = {
137
190
  if (e instanceof Error) {
138
191
  const file = path.join(__dirname, 'errors', new Date().toISOString()),
139
192
  stage = token.getAttribute('stage');
140
- fs.writeFileSync(file, stage === this.MAX_STAGE ? wikitext : token.toString());
193
+ fs.writeFileSync(file, stage === this.MAX_STAGE ? wikitext : String(token));
141
194
  fs.writeFileSync(`${file}.err`, e.stack);
142
195
  fs.writeFileSync(`${file}.json`, JSON.stringify({
143
196
  stage, include: token.getAttribute('include'), config: this.config,
@@ -146,25 +199,46 @@ const /** @type {Parser} */ Parser = {
146
199
  throw e;
147
200
  }
148
201
  });
202
+ if (this.debugging) {
203
+ let restored = String(token),
204
+ process = '解析';
205
+ if (restored === wikitext) {
206
+ const entities = {lt: '<', gt: '>', amp: '&'};
207
+ restored = token.print().replaceAll(
208
+ /<[^<]+?>|&([lg]t|amp);/gu,
209
+ /** @param {string} s */ (_, s) => s ? entities[s] : '',
210
+ );
211
+ process = '渲染HTML';
212
+ }
213
+ if (restored !== wikitext) {
214
+ const diff = require('./util/diff');
215
+ const {promises: {0: cur, length}} = this;
216
+ this.promises.unshift((async () => {
217
+ await cur;
218
+ this.error(`${process}过程中不可逆地修改了原始文本!`);
219
+ return diff(wikitext, restored, length);
220
+ })());
221
+ }
222
+ }
149
223
  return token;
150
224
  },
151
225
 
152
226
  reparse(date) {
153
227
  const main = fs.readdirSync(path.join(__dirname, 'errors'))
154
- .find(name => name.startsWith(date) && name.endsWith('Z'));
228
+ .find(name => name.startsWith(date) && name.at(-1) === 'Z');
155
229
  if (!main) {
156
230
  throw new RangeError(`找不到对应时间戳的错误记录:${date}`);
157
231
  }
158
- const Token = require('./src'),
159
- file = path.join(__dirname, 'errors', main),
160
- wikitext = fs.readFileSync(file, 'utf8'),
161
- {stage, include, config} = require(`${file}.json`);
232
+ const file = path.join(__dirname, 'errors', main),
233
+ wikitext = fs.readFileSync(file, 'utf8');
234
+ const {stage, include, config} = require(`${file}.json`),
235
+ Token = require('./src');
162
236
  this.config = config;
163
237
  return this.run(() => {
164
238
  const halfParsed = stage < this.MAX_STAGE,
165
239
  token = new Token(wikitext, this.getConfig(), halfParsed);
166
240
  if (halfParsed) {
167
- token.setAttribute('stage', stage).parseOnce(stage, include);
241
+ token.setAttribute('stage', stage).getAttribute('parseOnce')(stage, include);
168
242
  } else {
169
243
  token.parse(undefined, include);
170
244
  }
@@ -179,52 +253,11 @@ const /** @type {Parser} */ Parser = {
179
253
  delete require.cache[require.resolve('./tool')];
180
254
  return require('./tool');
181
255
  },
182
-
183
- typeAliases: {
184
- ext: ['extension'],
185
- 'ext-inner': ['extension-inner'],
186
- arg: ['argument'],
187
- 'arg-name': ['argument-name'],
188
- 'arg-default': ['argument-default'],
189
- 'arg-redundant': ['argument-redundant'],
190
- template: ['tpl'],
191
- 'template-name': ['tpl-name'],
192
- 'magic-word': ['parser-function', 'parser-func'],
193
- 'invoke-function': ['invoke-func'],
194
- 'invoke-module': ['invoke-mod'],
195
- parameter: ['param'],
196
- 'parameter-key': ['param-key'],
197
- 'parameter-value': ['parameter-val', 'param-value', 'param-val'],
198
- heading: ['header'],
199
- 'heading-title': ['header-title'],
200
- table: ['tbl'],
201
- 'table-inter': ['tbl-inter'],
202
- tr: ['table-row', 'tbl-row'],
203
- td: ['table-cell', 'tbl-cell', 'table-data', 'tbl-data'],
204
- 'double-underscore': ['underscore', 'behavior-switch', 'behaviour-switch'],
205
- hr: ['horizontal'],
206
- category: ['category-link', 'cat', 'cat-link'],
207
- file: ['file-link', 'image', 'image-link', 'img', 'img-link'],
208
- 'image-parameter': ['img-parameter', 'image-param', 'img-param'],
209
- quote: ['quotes', 'quot', 'apostrophe', 'apostrophes', 'apos'],
210
- 'ext-link': ['external-link'],
211
- 'ext-link-text': ['external-link-text'],
212
- 'ext-link-url': ['external-link-url'],
213
- 'free-ext-link': ['free-external-link', 'magic-link'],
214
- dd: ['indent', 'indentation'],
215
- converter: ['convert', 'conversion'],
216
- 'converter-flags': ['convert-flags', 'conversion-flags', 'converter-flag', 'convert-flag', 'conversion-flag'],
217
- 'converter-rule': ['convert-rule', 'conversion-rule'],
218
- 'converter-rule-noconvert': ['convert-rule-noconvert', 'conversion-rule-noconvert'],
219
- 'converter-rule-variant': ['convert-rule-variant', 'conversion-rule-variant'],
220
- 'converter-rule-to': ['convert-rule-to', 'conversion-rule-to'],
221
- 'converter-rule-from': ['convert-rule-from', 'conversion-rule-from'],
222
- },
223
256
  };
224
257
 
225
258
  const /** @type {PropertyDescriptorMap} */ def = {};
226
259
  for (const key in Parser) {
227
- if (['alises', 'MAX_STAGE'].includes(key)) {
260
+ if (['aliases', 'MAX_STAGE', 'typeAliases', 'promises'].includes(key)) {
228
261
  def[key] = {enumerable: false, writable: false};
229
262
  } else if (!['config', 'isInterwiki', 'normalizeTitle', 'parse', 'getTool'].includes(key)) {
230
263
  def[key] = {enumerable: false};