wikiparser-node 0.3.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/README.md +1 -1
  2. package/config/default.json +13 -17
  3. package/config/llwiki.json +11 -79
  4. package/config/moegirl.json +7 -1
  5. package/config/zhwiki.json +1269 -0
  6. package/index.js +130 -97
  7. package/lib/element.js +410 -518
  8. package/lib/node.js +493 -115
  9. package/lib/ranges.js +27 -19
  10. package/lib/text.js +175 -0
  11. package/lib/title.js +14 -6
  12. package/mixin/attributeParent.js +70 -24
  13. package/mixin/fixedToken.js +18 -10
  14. package/mixin/hidden.js +6 -4
  15. package/mixin/sol.js +39 -12
  16. package/package.json +17 -4
  17. package/parser/brackets.js +18 -18
  18. package/parser/commentAndExt.js +16 -14
  19. package/parser/converter.js +14 -13
  20. package/parser/externalLinks.js +12 -11
  21. package/parser/hrAndDoubleUnderscore.js +24 -14
  22. package/parser/html.js +8 -7
  23. package/parser/links.js +13 -13
  24. package/parser/list.js +12 -11
  25. package/parser/magicLinks.js +11 -10
  26. package/parser/quotes.js +6 -5
  27. package/parser/selector.js +175 -0
  28. package/parser/table.js +31 -24
  29. package/src/arg.js +91 -43
  30. package/src/atom/hidden.js +5 -2
  31. package/src/atom/index.js +17 -9
  32. package/src/attribute.js +210 -101
  33. package/src/converter.js +78 -43
  34. package/src/converterFlags.js +104 -45
  35. package/src/converterRule.js +136 -78
  36. package/src/extLink.js +81 -27
  37. package/src/gallery.js +63 -20
  38. package/src/heading.js +58 -20
  39. package/src/html.js +138 -48
  40. package/src/imageParameter.js +93 -58
  41. package/src/index.js +314 -186
  42. package/src/link/category.js +22 -54
  43. package/src/link/file.js +83 -32
  44. package/src/link/galleryImage.js +21 -7
  45. package/src/link/index.js +170 -81
  46. package/src/magicLink.js +64 -14
  47. package/src/nowiki/comment.js +36 -10
  48. package/src/nowiki/dd.js +37 -22
  49. package/src/nowiki/doubleUnderscore.js +21 -7
  50. package/src/nowiki/hr.js +11 -7
  51. package/src/nowiki/index.js +16 -9
  52. package/src/nowiki/list.js +2 -2
  53. package/src/nowiki/noinclude.js +8 -4
  54. package/src/nowiki/quote.js +38 -7
  55. package/src/onlyinclude.js +24 -7
  56. package/src/parameter.js +102 -62
  57. package/src/syntax.js +23 -20
  58. package/src/table/index.js +282 -174
  59. package/src/table/td.js +112 -61
  60. package/src/table/tr.js +135 -74
  61. package/src/tagPair/ext.js +30 -23
  62. package/src/tagPair/include.js +26 -11
  63. package/src/tagPair/index.js +72 -29
  64. package/src/transclude.js +235 -127
  65. package/tool/index.js +42 -32
  66. package/util/debug.js +21 -18
  67. package/util/diff.js +76 -0
  68. package/util/lint.js +40 -0
  69. package/util/string.js +56 -26
  70. package/.eslintrc.json +0 -319
  71. package/errors/README +0 -1
  72. package/jsconfig.json +0 -7
  73. package/printed/README +0 -1
  74. package/typings/element.d.ts +0 -28
  75. package/typings/index.d.ts +0 -52
  76. package/typings/node.d.ts +0 -23
  77. package/typings/parser.d.ts +0 -9
  78. package/typings/table.d.ts +0 -14
  79. package/typings/token.d.ts +0 -22
  80. package/typings/tool.d.ts +0 -10
package/src/attribute.js CHANGED
@@ -1,19 +1,88 @@
1
1
  'use strict';
2
2
 
3
3
  const {externalUse} = require('../util/debug'),
4
+ {generateForSelf} = require('../util/lint'),
4
5
  {toCase, removeComment, normalizeSpace} = require('../util/string'),
5
- /** @type {Parser} */ Parser = require('..'),
6
+ Parser = require('..'),
6
7
  Token = require('.');
7
8
 
8
9
  const stages = {'ext-attr': 0, 'html-attr': 2, 'table-attr': 3};
9
10
 
10
11
  /**
11
12
  * 扩展和HTML标签属性
12
- * @classdesc `{childNodes: [string]|(string|ArgToken|TranscludeToken)[]}`
13
+ * @classdesc `{childNodes: [AstText]|(AstText|ArgToken|TranscludeToken)[]}`
13
14
  */
14
15
  class AttributeToken extends Token {
15
16
  /** @type {Map<string, string|true>} */ #attr = new Map();
16
17
  #sanitized = true;
18
+ #quoteBalance = true;
19
+
20
+ /**
21
+ * @override
22
+ * @param {string} key 属性键
23
+ * @param {string|undefined} equal 属性规则运算符,`equal`存在时`val`和`i`也一定存在
24
+ * @param {string|undefined} val 属性值
25
+ * @param {string|undefined} i 是否对大小写不敏感
26
+ */
27
+ #matchesAttr = (key, equal, val, i) => {
28
+ if (!equal) {
29
+ return this.hasAttr(key);
30
+ } else if (!this.hasAttr(key)) {
31
+ return equal === '!=';
32
+ }
33
+ val = toCase(val, i);
34
+ const attr = this.getAttr(key),
35
+ thisVal = toCase(attr === true ? '' : attr, i);
36
+ switch (equal) {
37
+ case '~=':
38
+ return attr !== true && thisVal.split(/\s/u).includes(val);
39
+ case '|=': // 允许`val === ''`
40
+ return thisVal === val || thisVal.startsWith(`${val}-`);
41
+ case '^=':
42
+ return attr !== true && thisVal.startsWith(val);
43
+ case '$=':
44
+ return attr !== true && thisVal.endsWith(val);
45
+ case '*=':
46
+ return attr !== true && thisVal.includes(val);
47
+ case '!=':
48
+ return thisVal !== val;
49
+ default: // `=`
50
+ return thisVal === val;
51
+ }
52
+ };
53
+
54
+ /**
55
+ * getAttr()方法的getter写法
56
+ * @returns {Record<string, string|true>}
57
+ */
58
+ get attributes() {
59
+ return this.getAttr();
60
+ }
61
+
62
+ /** 以字符串表示的class属性 */
63
+ get className() {
64
+ const attr = this.getAttr('class');
65
+ return typeof attr === 'string' ? attr : '';
66
+ }
67
+
68
+ set className(className) {
69
+ this.setAttr('class', className);
70
+ }
71
+
72
+ /** 以Set表示的class属性 */
73
+ get classList() {
74
+ return new Set(this.className.split(/\s/u));
75
+ }
76
+
77
+ /** id属性 */
78
+ get id() {
79
+ const attr = this.getAttr('id');
80
+ return typeof attr === 'string' ? attr : '';
81
+ }
82
+
83
+ set id(id) {
84
+ this.setAttr('id', id);
85
+ }
17
86
 
18
87
  /**
19
88
  * 从`this.#attr`更新`childNodes`
@@ -22,10 +91,9 @@ class AttributeToken extends Token {
22
91
  #updateFromAttr() {
23
92
  let equal = '=';
24
93
  const ParameterToken = require('./parameter'),
25
- parent = this.closest('ext, parameter');
26
- if (parent instanceof ParameterToken && parent.anon
27
- && parent.parentNode?.matches('template, magic-word#invoke')
28
- ) {
94
+ TranscludeToken = require('./transclude');
95
+ const /** @type {ParameterToken & {parentNode: TranscludeToken}} */ parent = this.closest('ext, parameter');
96
+ if (parent instanceof ParameterToken && parent.anon && parent.parentNode?.isTemplate()) {
29
97
  equal = '{{=}}';
30
98
  }
31
99
  return [...this.#attr].map(([k, v]) => {
@@ -37,7 +105,10 @@ class AttributeToken extends Token {
37
105
  }).join(' ');
38
106
  }
39
107
 
40
- /** @complexity `n` */
108
+ /**
109
+ * 清理标签属性
110
+ * @complexity `n`
111
+ */
41
112
  sanitize() {
42
113
  if (!Parser.running && !this.#sanitized) {
43
114
  Parser.warn(`${this.constructor.name}.sanitize 方法将清理无效属性!`);
@@ -47,6 +118,7 @@ class AttributeToken extends Token {
47
118
  this.replaceChildren(...token.childNodes, true);
48
119
  });
49
120
  this.#sanitized = true;
121
+ this.#quoteBalance = true;
50
122
  }
51
123
 
52
124
  /**
@@ -55,30 +127,42 @@ class AttributeToken extends Token {
55
127
  */
56
128
  #parseAttr() {
57
129
  this.#attr.clear();
58
- let string = this.toString(),
59
- /** @type {Token & {firstChild: string}} */ token;
130
+ let string = this.toString('comment, include, noinclude, heading, html'),
131
+ token;
60
132
  if (this.type !== 'ext-attr' && !Parser.running) {
61
133
  const config = this.getAttribute('config'),
62
134
  include = this.getAttribute('include');
63
- token = Parser.run(() => new Token(string, config).parseOnce(0, include).parseOnce());
64
- string = token.firstChild;
135
+ token = Parser.run(() => {
136
+ const newToken = new Token(string, config),
137
+ parseOnce = newToken.getAttribute('parseOnce');
138
+ parseOnce(0, include);
139
+ return parseOnce();
140
+ });
141
+ string = String(token);
65
142
  }
66
- string = removeComment(string).replace(/\0\d+~\x7f/g, '=');
67
- const build = /** @param {string|boolean} str */ str =>
68
- typeof str === 'boolean' || !token ? str : token.buildFromStr(str).map(String).join('');
69
- for (const [, key,, quoted, unquoted] of string
70
- .matchAll(/([^\s/][^\s/=]*)(?:\s*=\s*(?:(["'])(.*?)(?:\2|$)|(\S*)))?/gs)
143
+ string = removeComment(string).replaceAll(/\0\d+~\x7F/gu, '=');
144
+
145
+ /**
146
+ * 解析并重建标签属性
147
+ * @param {string|boolean} str 半解析的标签属性文本
148
+ */
149
+ const build = str =>
150
+ typeof str === 'boolean' || !token ? str : token.getAttribute('buildFromStr')(str).map(String).join('');
151
+ for (const [, key, quoteStart, quoted, quoteEnd, unquoted] of string
152
+ .matchAll(/([^\s/][^\s/=]*)(?:\s*=\s*(?:(["'])(.*?)(\2|$)|(\S*)))?/gsu)
71
153
  ) {
72
154
  if (!this.setAttr(build(key), build(quoted ?? unquoted ?? true), true)) {
73
155
  this.#sanitized = false;
156
+ } else if (quoteStart !== quoteEnd) {
157
+ this.#quoteBalance = false;
74
158
  }
75
159
  }
76
160
  }
77
161
 
78
162
  /**
79
- * @param {string} attr
80
- * @param {'ext-attr'|'html-attr'|'table-attr'} type
81
- * @param {string} name
163
+ * @param {string} attr 标签属性
164
+ * @param {'ext-attr'|'html-attr'|'table-attr'} type 标签类型
165
+ * @param {string} name 标签名
82
166
  * @param {accum} accum
83
167
  */
84
168
  constructor(attr, type, name, config = Parser.getConfig(), accum = []) {
@@ -87,8 +171,9 @@ class AttributeToken extends Token {
87
171
  this.setAttribute('name', name).#parseAttr();
88
172
  }
89
173
 
174
+ /** @override */
90
175
  cloneNode() {
91
- const cloned = this.cloneChildren();
176
+ const cloned = this.cloneChildNodes();
92
177
  return Parser.run(() => {
93
178
  const token = new AttributeToken(undefined, this.type, this.name, this.getAttribute('config'));
94
179
  token.append(...cloned);
@@ -97,28 +182,31 @@ class AttributeToken extends Token {
97
182
  }
98
183
 
99
184
  /**
185
+ * @override
100
186
  * @template {string} T
101
- * @param {T} key
187
+ * @param {T} key 属性键
102
188
  * @returns {TokenAttribute<T>}
103
189
  */
104
190
  getAttribute(key) {
105
- if (key === 'attr') {
106
- return new Map(this.#attr);
191
+ if (key === 'matchesAttr') {
192
+ return this.#matchesAttr;
107
193
  }
108
- return super.getAttribute(key);
194
+ return key === 'attr' ? new Map(this.#attr) : super.getAttribute(key);
109
195
  }
110
196
 
197
+ /** @override */
111
198
  afterBuild() {
112
199
  if (this.type !== 'ext-attr') {
200
+ const buildFromStr = this.getAttribute('buildFromStr');
113
201
  for (let [key, text] of this.#attr) {
114
202
  let built = false;
115
203
  if (key.includes('\0')) {
116
204
  this.#attr.delete(key);
117
- key = this.buildFromStr(key).map(String).join('');
205
+ key = buildFromStr(key).map(String).join('');
118
206
  built = true;
119
207
  }
120
208
  if (typeof text === 'string' && text.includes('\0')) {
121
- text = this.buildFromStr(text).map(String).join('');
209
+ text = buildFromStr(text).map(String).join('');
122
210
  built = true;
123
211
  }
124
212
  if (built) {
@@ -126,65 +214,74 @@ class AttributeToken extends Token {
126
214
  }
127
215
  }
128
216
  }
129
- const that = this,
130
- /** @type {AstListener} */ attributeListener = ({type, target}) => {
131
- if (type === 'text' || target !== that) {
132
- that.#parseAttr();
133
- }
134
- };
217
+ const /** @type {AstListener} */ attributeListener = ({type, target}) => {
218
+ if (type === 'text' || target !== this) {
219
+ this.#parseAttr();
220
+ }
221
+ };
135
222
  this.addEventListener(['remove', 'insert', 'replace', 'text'], attributeListener);
136
223
  return this;
137
224
  }
138
225
 
139
- /** @param {string} key */
226
+ /**
227
+ * 标签是否具有某属性
228
+ * @param {string} key 属性键
229
+ */
140
230
  hasAttr(key) {
141
- if (typeof key !== 'string') {
142
- this.typeError('hasAttr', 'String');
143
- }
144
- return this.#attr.has(key.toLowerCase().trim());
231
+ return typeof key === 'string' ? this.#attr.has(key.toLowerCase().trim()) : this.typeError('hasAttr', 'String');
145
232
  }
146
233
 
147
234
  /**
235
+ * 获取标签属性
148
236
  * @template {string|undefined} T
149
- * @param {T} key
237
+ * @param {T} key 属性键
150
238
  * @returns {T extends string ? string|true : Record<string, string|true>}
151
239
  */
152
240
  getAttr(key) {
153
241
  if (key === undefined) {
154
242
  return Object.fromEntries(this.#attr);
155
- } else if (typeof key !== 'string') {
156
- this.typeError('getAttr', 'String');
157
243
  }
158
- return this.#attr.get(key.toLowerCase().trim());
244
+ return typeof key === 'string' ? this.#attr.get(key.toLowerCase().trim()) : this.typeError('getAttr', 'String');
159
245
  }
160
246
 
247
+ /** 获取全部的标签属性名 */
161
248
  getAttrNames() {
162
249
  return [...this.#attr.keys()];
163
250
  }
164
251
 
252
+ /** 标签是否具有任意属性 */
165
253
  hasAttrs() {
166
254
  return this.getAttrNames().length > 0;
167
255
  }
168
256
 
169
257
  /**
170
- * @param {string} key
171
- * @param {string|boolean} value
258
+ * 设置标签属性
259
+ * @param {string} key 属性键
260
+ * @param {string|boolean} value 属性值
261
+ * @param {boolean} init 是否是初次解析
172
262
  * @complexity `n`
263
+ * @throws `RangeError` 扩展标签属性不能包含">"
264
+ * @throws `RangeError` 无效的属性名
173
265
  */
174
- setAttr(key, value, init = false) {
266
+ setAttr(key, value, init) {
175
267
  init &&= !externalUse('setAttr');
176
- if (typeof key !== 'string' || !['string', 'boolean'].includes(typeof value)) {
177
- this.typeError('setValue', 'String', 'Boolean');
268
+ if (typeof key !== 'string' || typeof value !== 'string' && typeof value !== 'boolean') {
269
+ this.typeError('setAttr', 'String', 'Boolean');
178
270
  } else if (!init && this.type === 'ext-attr' && typeof value === 'string' && value.includes('>')) {
179
271
  throw new RangeError('扩展标签属性不能包含 ">"!');
180
272
  }
181
273
  key = key.toLowerCase().trim();
182
274
  const config = this.getAttribute('config'),
183
275
  include = this.getAttribute('include'),
184
- parsedKey = this.type !== 'ext-attr' && !init
185
- ? Parser.run(() => new Token(key, config).parseOnce(0, include).parseOnce().firstChild)
186
- : key;
187
- if (!/^(?:[\w:]|\0\d+[t!~{}+-]\x7f)(?:[\w:.-]|\0\d+[t!~{}+-]\x7f)*$/.test(parsedKey)) {
276
+ parsedKey = this.type === 'ext-attr' || init
277
+ ? key
278
+ : Parser.run(() => {
279
+ const token = new Token(key, config),
280
+ parseOnce = token.getAttribute('parseOnce');
281
+ parseOnce(0, include);
282
+ return String(parseOnce());
283
+ });
284
+ if (!/^(?:[\w:]|\0\d+[t!~{}+-]\x7F)(?:[\w:.-]|\0\d+[t!~{}+-]\x7F)*$/u.test(parsedKey)) {
188
285
  if (init) {
189
286
  return false;
190
287
  }
@@ -192,7 +289,7 @@ class AttributeToken extends Token {
192
289
  } else if (value === false) {
193
290
  this.#attr.delete(key);
194
291
  } else {
195
- this.#attr.set(key, value === true ? true : value.replace(/\s/g, ' ').trim());
292
+ this.#attr.set(key, value === true ? true : value.replaceAll(/\s/gu, ' ').trim());
196
293
  }
197
294
  if (!init) {
198
295
  this.sanitize();
@@ -201,7 +298,8 @@ class AttributeToken extends Token {
201
298
  }
202
299
 
203
300
  /**
204
- * @param {string} key
301
+ * 移除标签属性
302
+ * @param {string} key 属性键
205
303
  * @complexity `n`
206
304
  */
207
305
  removeAttr(key) {
@@ -215,9 +313,11 @@ class AttributeToken extends Token {
215
313
  }
216
314
 
217
315
  /**
218
- * @param {string} key
219
- * @param {boolean|undefined} force
316
+ * 开关标签属性
317
+ * @param {string} key 属性键
318
+ * @param {boolean|undefined} force 强制开启或关闭
220
319
  * @complexity `n`
320
+ * @throws `RangeError` 不为Boolean类型的属性值
221
321
  */
222
322
  toggleAttr(key, force) {
223
323
  if (typeof key !== 'string') {
@@ -233,31 +333,71 @@ class AttributeToken extends Token {
233
333
  this.setAttr(key, force === true || force === undefined && value === false);
234
334
  }
235
335
 
336
+ /**
337
+ * 生成引导空格
338
+ * @param {string} str 属性字符串
339
+ */
236
340
  #leadingSpace(str = super.toString()) {
237
- return this.type !== 'table-attr' && str && !/^\s/.test(str) ? ' ' : '';
341
+ return this.type !== 'table-attr' && str && str.trimStart() === str ? ' ' : '';
238
342
  }
239
343
 
240
- /** @this {AttributeToken & Token} */
241
- toString() {
242
- const str = this.type === 'table-attr' ? normalizeSpace(this) : super.toString();
344
+ /**
345
+ * @override
346
+ * @this {AttributeToken & Token}
347
+ * @param {string} selector
348
+ */
349
+ toString(selector) {
350
+ if (this.type === 'table-attr') {
351
+ normalizeSpace(this);
352
+ }
353
+ const str = super.toString(selector);
243
354
  return `${this.#leadingSpace(str)}${str}`;
244
355
  }
245
356
 
357
+ /** @override */
246
358
  getPadding() {
247
359
  return this.#leadingSpace().length;
248
360
  }
249
361
 
362
+ /**
363
+ * @override
364
+ * @this {AttributeToken & {parentNode: HtmlToken}}
365
+ * @param {number} start 起始位置
366
+ */
367
+ lint(start = 0) {
368
+ const HtmlToken = require('./html');
369
+ const errors = super.lint(start);
370
+ let /** @type {{top: number, left: number}} */ rect;
371
+ if (this.type === 'html-attr' && this.parentNode.closing && this.text().trim()) {
372
+ rect = this.getRootNode().posFromIndex(start);
373
+ errors.push(generateForSelf(this, rect, '位于闭合标签的属性'));
374
+ }
375
+ if (!this.#sanitized) {
376
+ rect ||= this.getRootNode().posFromIndex(start);
377
+ errors.push(generateForSelf(this, rect, '包含无效属性'));
378
+ } else if (!this.#quoteBalance) {
379
+ rect ||= this.getRootNode().posFromIndex(start);
380
+ errors.push(generateForSelf(this, rect, '未闭合的引号', 'warning'));
381
+ }
382
+ return errors;
383
+ }
384
+
385
+ /** @override */
250
386
  text() {
251
- let str = this.#updateFromAttr();
252
- str = `${this.#leadingSpace(str)}${str}`;
253
- return this.type === 'table-attr' ? normalizeSpace(str) : str;
387
+ if (this.type === 'table-attr') {
388
+ normalizeSpace(this);
389
+ }
390
+ const str = this.#updateFromAttr();
391
+ return `${this.#leadingSpace(str)}${str}`;
254
392
  }
255
393
 
256
394
  /**
257
- * @param {number} i
395
+ * @override
396
+ * @param {number} i 移除位置
397
+ * @param {boolean} done 是否已解析过改变后的标签属性
258
398
  * @complexity `n`
259
399
  */
260
- removeAt(i, done = false) {
400
+ removeAt(i, done) {
261
401
  done &&= !externalUse('removeAt');
262
402
  done ||= Parser.running;
263
403
  const token = super.removeAt(i);
@@ -268,8 +408,11 @@ class AttributeToken extends Token {
268
408
  }
269
409
 
270
410
  /**
271
- * @template {string|Token} T
272
- * @param {T} token
411
+ * @override
412
+ * @template {Token} T
413
+ * @param {T} token 待插入的节点
414
+ * @param {number} i 插入位置
415
+ * @param {boolean} done 是否已解析过改变后的标签属性
273
416
  * @complexity `n`
274
417
  */
275
418
  insertAt(token, i = this.childNodes.length, done = false) {
@@ -283,7 +426,8 @@ class AttributeToken extends Token {
283
426
  }
284
427
 
285
428
  /**
286
- * @param {...string|Token} elements
429
+ * @override
430
+ * @param {...Token} elements 待替换的子节点
287
431
  * @complexity `n²`
288
432
  */
289
433
  replaceChildren(...elements) {
@@ -299,41 +443,6 @@ class AttributeToken extends Token {
299
443
  this.insertAt(element, undefined, done);
300
444
  }
301
445
  }
302
-
303
- /**
304
- * @param {string} key
305
- * @param {string|undefined} equal - `equal`存在时`val`和`i`也一定存在
306
- * @param {string|undefined} val
307
- * @param {string|undefined} i
308
- */
309
- matchesAttr(key, equal, val, i) {
310
- if (externalUse('matchesAttr')) {
311
- throw new Error(`禁止外部调用 ${this.constructor.name}.matchesAttr 方法!`);
312
- } else if (!equal) {
313
- return this.hasAttr(key);
314
- } else if (!this.hasAttr(key)) {
315
- return equal === '!=';
316
- }
317
- val = toCase(val, i);
318
- const attr = this.getAttr(key),
319
- thisVal = toCase(attr === true ? '' : attr, i);
320
- switch (equal) {
321
- case '~=':
322
- return attr !== true && thisVal.split(/\s/).some(v => v === val);
323
- case '|=': // 允许`val === ''`
324
- return thisVal === val || thisVal.startsWith(`${val}-`);
325
- case '^=':
326
- return attr !== true && thisVal.startsWith(val);
327
- case '$=':
328
- return attr !== true && thisVal.endsWith(val);
329
- case '*=':
330
- return attr !== true && thisVal.includes(val);
331
- case '!=':
332
- return thisVal !== val;
333
- default: // `=`
334
- return thisVal === val;
335
- }
336
- }
337
446
  }
338
447
 
339
448
  Parser.classes.AttributeToken = __filename;