wikiparser-node 0.3.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/README.md +1 -1
  2. package/config/default.json +13 -17
  3. package/config/llwiki.json +11 -79
  4. package/config/moegirl.json +7 -1
  5. package/config/zhwiki.json +1269 -0
  6. package/index.js +130 -97
  7. package/lib/element.js +410 -518
  8. package/lib/node.js +493 -115
  9. package/lib/ranges.js +27 -19
  10. package/lib/text.js +175 -0
  11. package/lib/title.js +14 -6
  12. package/mixin/attributeParent.js +70 -24
  13. package/mixin/fixedToken.js +18 -10
  14. package/mixin/hidden.js +6 -4
  15. package/mixin/sol.js +39 -12
  16. package/package.json +17 -4
  17. package/parser/brackets.js +18 -18
  18. package/parser/commentAndExt.js +16 -14
  19. package/parser/converter.js +14 -13
  20. package/parser/externalLinks.js +12 -11
  21. package/parser/hrAndDoubleUnderscore.js +24 -14
  22. package/parser/html.js +8 -7
  23. package/parser/links.js +13 -13
  24. package/parser/list.js +12 -11
  25. package/parser/magicLinks.js +11 -10
  26. package/parser/quotes.js +6 -5
  27. package/parser/selector.js +175 -0
  28. package/parser/table.js +31 -24
  29. package/src/arg.js +91 -43
  30. package/src/atom/hidden.js +5 -2
  31. package/src/atom/index.js +17 -9
  32. package/src/attribute.js +210 -101
  33. package/src/converter.js +78 -43
  34. package/src/converterFlags.js +104 -45
  35. package/src/converterRule.js +136 -78
  36. package/src/extLink.js +81 -27
  37. package/src/gallery.js +63 -20
  38. package/src/heading.js +58 -20
  39. package/src/html.js +138 -48
  40. package/src/imageParameter.js +93 -58
  41. package/src/index.js +314 -186
  42. package/src/link/category.js +22 -54
  43. package/src/link/file.js +83 -32
  44. package/src/link/galleryImage.js +21 -7
  45. package/src/link/index.js +170 -81
  46. package/src/magicLink.js +64 -14
  47. package/src/nowiki/comment.js +36 -10
  48. package/src/nowiki/dd.js +37 -22
  49. package/src/nowiki/doubleUnderscore.js +21 -7
  50. package/src/nowiki/hr.js +11 -7
  51. package/src/nowiki/index.js +16 -9
  52. package/src/nowiki/list.js +2 -2
  53. package/src/nowiki/noinclude.js +8 -4
  54. package/src/nowiki/quote.js +38 -7
  55. package/src/onlyinclude.js +24 -7
  56. package/src/parameter.js +102 -62
  57. package/src/syntax.js +23 -20
  58. package/src/table/index.js +282 -174
  59. package/src/table/td.js +112 -61
  60. package/src/table/tr.js +135 -74
  61. package/src/tagPair/ext.js +30 -23
  62. package/src/tagPair/include.js +26 -11
  63. package/src/tagPair/index.js +72 -29
  64. package/src/transclude.js +235 -127
  65. package/tool/index.js +42 -32
  66. package/util/debug.js +21 -18
  67. package/util/diff.js +76 -0
  68. package/util/lint.js +40 -0
  69. package/util/string.js +56 -26
  70. package/.eslintrc.json +0 -319
  71. package/errors/README +0 -1
  72. package/jsconfig.json +0 -7
  73. package/printed/README +0 -1
  74. package/typings/element.d.ts +0 -28
  75. package/typings/index.d.ts +0 -52
  76. package/typings/node.d.ts +0 -23
  77. package/typings/parser.d.ts +0 -9
  78. package/typings/table.d.ts +0 -14
  79. package/typings/token.d.ts +0 -22
  80. package/typings/tool.d.ts +0 -10
package/lib/element.js CHANGED
@@ -1,172 +1,194 @@
1
1
  'use strict';
2
2
 
3
3
  const fs = require('fs'),
4
+ path = require('path'),
4
5
  {externalUse} = require('../util/debug'),
5
- {toCase, noWrap} = require('../util/string'),
6
+ {toCase, noWrap, print} = require('../util/string'),
6
7
  {nth} = require('./ranges'),
7
- EventEmitter = require('events'),
8
+ parseSelector = require('../parser/selector'),
9
+ Parser = require('..'),
8
10
  AstNode = require('./node'),
9
- /** @type {Parser} */ Parser = require('..');
10
-
11
+ AstText = require('./text');
12
+
13
+ /**
14
+ * 检测:lang()伪选择器
15
+ * @param {AstElement & {attributes: Records<string, string|true>}} node 节点
16
+ * @param {RegExp} regex 语言正则
17
+ */
18
+ const matchesLang = ({attributes}, regex) => {
19
+ const /** @type {string} */ lang = attributes?.lang;
20
+ return typeof lang === 'string' && regex.test(lang);
21
+ };
22
+
23
+ /** 类似HTMLElement */
11
24
  class AstElement extends AstNode {
12
- /** @type {string} */ type;
13
25
  /** @type {string} */ name;
14
- #events = new EventEmitter();
15
26
 
16
- /** @complexity `n` */
27
+ /**
28
+ * 检查是否符合某条属性规则
29
+ * @param {string} key 属性键
30
+ * @param {string|undefined} equal 属性规则运算符,`equal`存在时`val`和`i`也一定存在
31
+ * @param {string|undefined} val 属性值
32
+ * @param {string|undefined} i 是否对大小写不敏感
33
+ */
34
+ #matchesAttr = (key, equal, val, i) => {
35
+ if (!equal) {
36
+ return this.hasAttribute(key);
37
+ } else if (!this.hasAttribute(key)) {
38
+ return equal === '!=';
39
+ }
40
+ val = toCase(val, i);
41
+ if (equal === '~=') {
42
+ let /** @type {Iterable<string>} */ thisVals = this[key];
43
+ if (typeof thisVals === 'string') {
44
+ thisVals = thisVals.split(/\s/u);
45
+ }
46
+ return Boolean(thisVals?.[Symbol.iterator]) && [...thisVals].some(v => toCase(v, i) === val);
47
+ }
48
+ const thisVal = toCase(this.getAttribute(key), i);
49
+ switch (equal) {
50
+ case '|=':
51
+ return thisVal === val || thisVal.startsWith(`${val}-`);
52
+ case '^=':
53
+ return thisVal.startsWith(val);
54
+ case '$=':
55
+ return thisVal.endsWith(val);
56
+ case '*=':
57
+ return thisVal.includes(val);
58
+ case '!=':
59
+ return thisVal !== val;
60
+ default: // `=`
61
+ return thisVal === val;
62
+ }
63
+ };
64
+
65
+ /**
66
+ * 全部非文本子节点
67
+ * @complexity `n`
68
+ */
17
69
  get children() {
18
- const /** @type {this[]} */ children = this.childNodes.filter(ele => ele instanceof AstElement);
70
+ const /** @type {this[]} */ children = this.childNodes.filter(({type}) => type !== 'text');
19
71
  return children;
20
72
  }
21
- /** @complexity `n` */
73
+
74
+ /**
75
+ * 非文本子节点总数
76
+ * @complexity `n`
77
+ */
22
78
  get childElementCount() {
23
79
  return this.children.length;
24
80
  }
25
- /** @returns {this} */
81
+
82
+ /**
83
+ * 首位非文本子节点
84
+ * @returns {this}
85
+ */
26
86
  get firstElementChild() {
27
- return this.childNodes.find(ele => ele instanceof AstElement);
87
+ return this.childNodes.find(({type}) => type !== 'text');
28
88
  }
29
- /** @complexity `n` */
89
+
90
+ /**
91
+ * 末位非文本子节点
92
+ * @returns {this}
93
+ */
30
94
  get lastElementChild() {
31
- return this.children.at(-1);
95
+ return this.childNodes.findLast(({type}) => type !== 'text');
32
96
  }
97
+
98
+ /** 父节点 */
33
99
  get parentElement() {
34
100
  return this.parentNode;
35
101
  }
36
- get isConnected() {
37
- return this.getRootNode().type === 'root';
38
- }
39
- /** @complexity `n` */
40
- get nextElementSibling() {
41
- const children = this.parentElement?.children;
42
- return children && children[children.indexOf(this) + 1];
43
- }
44
- /** @complexity `n` */
45
- get previousElementSibling() {
46
- const children = this.parentElement?.children;
47
- return children && children[children.indexOf(this) - 1];
102
+
103
+ /**
104
+ * AstElement.prototype.text()的getter写法
105
+ * @complexity `n`
106
+ */
107
+ get outerText() {
108
+ return this.text();
48
109
  }
49
- /** @complexity `n` */
110
+
111
+ /**
112
+ * 不可见
113
+ */
50
114
  get hidden() {
51
115
  return this.text() === '';
52
116
  }
53
- /** @complexity `n` */
117
+
118
+ /**
119
+ * 后一个可见的兄弟节点
120
+ * @complexity `n`
121
+ */
54
122
  get nextVisibleSibling() {
55
123
  let {nextSibling} = this;
56
- while (nextSibling === '' || nextSibling instanceof AstElement && nextSibling.hidden) {
124
+ while (nextSibling?.text() === '') {
57
125
  ({nextSibling} = nextSibling);
58
126
  }
59
127
  return nextSibling;
60
128
  }
61
- /** @complexity `n` */
129
+
130
+ /**
131
+ * 前一个可见的兄弟节点
132
+ * @complexity `n`
133
+ */
62
134
  get previousVisibleSibling() {
63
135
  let {previousSibling} = this;
64
- while (previousSibling === '' || previousSibling instanceof AstElement && previousSibling.hidden) {
136
+ while (previousSibling?.text() === '') {
65
137
  ({previousSibling} = previousSibling);
66
138
  }
67
139
  return previousSibling;
68
140
  }
69
141
 
70
- constructor() {
71
- super();
72
- this.seal('name');
142
+ /** 内部高度 */
143
+ get clientHeight() {
144
+ const {innerText} = this;
145
+ return typeof innerText === 'string' ? innerText.split('\n').length : undefined;
73
146
  }
74
147
 
75
- /** @complexity `n` */
76
- destroy() {
77
- if (this.parentNode) {
78
- throw new Error('不能销毁子节点!');
79
- }
80
- for (const element of this.children) {
81
- element.setAttribute('parentNode');
82
- }
83
- Object.setPrototypeOf(this, null);
148
+ /** 内部宽度 */
149
+ get clientWidth() {
150
+ const {innerText} = this;
151
+ return typeof innerText === 'string' ? innerText.split('\n').at(-1).length : undefined;
84
152
  }
85
153
 
86
- /**
87
- * @param {string|string[]} types
88
- * @param {AstListener} listener
89
- * @param {{once: boolean}} options
90
- */
91
- addEventListener(types, listener, options) {
92
- if (Array.isArray(types)) {
93
- for (const type of types) {
94
- this.addEventListener(type, listener, options);
95
- }
96
- } else if (typeof types !== 'string' || typeof listener !== 'function') {
97
- this.typeError('addEventListener', 'String', 'Function');
98
- } else {
99
- this.#events[options?.once ? 'once' : 'on'](types, listener);
100
- }
154
+ constructor() {
155
+ super();
156
+ this.seal('name');
101
157
  }
102
158
 
103
159
  /**
104
- * @param {string|string[]} types
105
- * @param {AstListener} listener
160
+ * 销毁
161
+ * @complexity `n`
162
+ * @param {boolean} deep 是否从根节点开始销毁
163
+ * @throws `Error` 不能销毁子节点
106
164
  */
107
- removeEventListener(types, listener) {
108
- if (Array.isArray(types)) {
109
- for (const type of types) {
110
- this.removeEventListener(type, listener);
111
- }
112
- } else if (typeof types !== 'string' || typeof listener !== 'function') {
113
- this.typeError('removeEventListener', 'String', 'Function');
114
- } else {
115
- this.#events.off(types, listener);
165
+ destroy(deep) {
166
+ if (this.parentNode && !deep) {
167
+ throw new Error('不能销毁子节点!');
116
168
  }
117
- }
118
-
119
- /** @param {string|string[]} types */
120
- removeAllEventListeners(types) {
121
- if (Array.isArray(types)) {
122
- for (const type of types) {
123
- this.removeAllEventListeners(type);
124
- }
125
- } else if (types !== undefined && typeof types !== 'string') {
126
- this.typeError('removeAllEventListeners', 'String');
127
- } else {
128
- this.#events.removeAllListeners(types);
169
+ this.parentNode?.destroy(deep);
170
+ for (const child of this.childNodes) {
171
+ child.setAttribute('parentNode');
129
172
  }
173
+ Object.setPrototypeOf(this, null);
130
174
  }
131
175
 
132
176
  /**
133
- * @param {string} type
134
- * @returns {AstListener[]}
135
- */
136
- listEventListeners(type) {
137
- if (typeof type !== 'string') {
138
- this.typeError('listEventListeners', 'String');
139
- }
140
- return this.#events.listeners(type);
141
- }
142
-
143
- /**
144
- * @param {AstEvent} e
145
- * @param {any} data
177
+ * @override
178
+ * @template {string} T
179
+ * @param {T} key 属性键
180
+ * @returns {TokenAttribute<T>}
146
181
  */
147
- dispatchEvent(e, data) {
148
- if (!(e instanceof Event)) {
149
- this.typeError('dispatchEvent', 'Event');
150
- } else if (!e.target) { // 初始化
151
- Object.defineProperty(e, 'target', {value: this, enumerable: true});
152
- e.stopPropagation = function() {
153
- Object.defineProperty(this, 'bubbles', {value: false});
154
- };
155
- }
156
- Object.defineProperties(e, { // 每次bubble更新
157
- prevTarget: {value: e.currentTarget, enumerable: true, configurable: true},
158
- currentTarget: {value: this, enumerable: true, configurable: true},
159
- });
160
- this.#events.emit(e.type, e, data);
161
- if (e.bubbles && this.parentElement) {
162
- this.parentElement.dispatchEvent(e, data);
163
- }
182
+ getAttribute(key) {
183
+ return key === 'matchesAttr' ? this.#matchesAttr : super.getAttribute(key);
164
184
  }
165
185
 
166
186
  /**
187
+ * @override
167
188
  * @template {string} T
168
- * @param {T} key
169
- * @param {TokenAttribute<T>} value
189
+ * @param {T} key 属性键
190
+ * @param {TokenAttribute<T>} value 属性值
191
+ * @throws `RangeError` 禁止手动指定的属性
170
192
  */
171
193
  setAttribute(key, value) {
172
194
  if (key === 'name' && externalUse('setAttribute')) {
@@ -175,36 +197,9 @@ class AstElement extends AstNode {
175
197
  return super.setAttribute(key, value);
176
198
  }
177
199
 
178
- /** @param {number} i */
179
- removeAt(i) {
180
- const element = super.removeAt(i),
181
- e = new Event('remove', {bubbles: true});
182
- this.dispatchEvent(e, {position: i, removed: element});
183
- return element;
184
- }
185
-
186
200
  /**
187
- * @template {string|this} T
188
- * @param {T} element
189
- * @complexity `n`
190
- */
191
- insertAt(element, i = this.childNodes.length) {
192
- super.insertAt(element, i);
193
- const e = new Event('insert', {bubbles: true});
194
- this.dispatchEvent(e, {position: i < 0 ? i + this.childNodes.length - 1 : i, inserted: element});
195
- return element;
196
- }
197
-
198
- /** @param {string} str */
199
- setText(str, i = 0) {
200
- const oldText = super.setText(str, i),
201
- e = new Event('text', {bubbles: true});
202
- this.dispatchEvent(e, {position: i, oldText, newText: str});
203
- return oldText;
204
- }
205
-
206
- /**
207
- * @param {...string|this} elements
201
+ * 在末尾批量插入子节点
202
+ * @param {...this} elements 插入节点
208
203
  * @complexity `n`
209
204
  */
210
205
  append(...elements) {
@@ -214,17 +209,19 @@ class AstElement extends AstNode {
214
209
  }
215
210
 
216
211
  /**
217
- * @param {...string|this} elements
212
+ * 在开头批量插入子节点
213
+ * @param {...this} elements 插入节点
218
214
  * @complexity `n`
219
215
  */
220
216
  prepend(...elements) {
221
- for (const [i, element] of elements.entries()) {
222
- this.insertAt(element, i);
217
+ for (let i = 0; i < elements.length; i++) {
218
+ this.insertAt(elements[i], i);
223
219
  }
224
220
  }
225
221
 
226
222
  /**
227
- * @param {...string|this} elements
223
+ * 批量替换子节点
224
+ * @param {...this} elements 新的子节点
228
225
  * @complexity `n`
229
226
  */
230
227
  replaceChildren(...elements) {
@@ -235,253 +232,211 @@ class AstElement extends AstNode {
235
232
  }
236
233
 
237
234
  /**
238
- * @param {(string|this)[]} elements
239
- * @param {number} offset
240
- * @complexity `n`
235
+ * 修改文本子节点
236
+ * @param {string} str 新文本
237
+ * @param {number} i 子节点位置
238
+ * @throws `RangeError` 对应位置的子节点不是文本节点
241
239
  */
242
- #insertAdjacent(elements, offset) {
243
- const {parentNode} = this;
244
- if (!parentNode) {
245
- throw new Error('不存在父节点!');
246
- }
247
- const i = parentNode.childNodes.indexOf(this) + offset;
248
- for (const [j, element] of elements.entries()) {
249
- parentNode.insertAt(element, i + j);
240
+ setText(str, i = 0) {
241
+ this.getAttribute('verifyChild')(i);
242
+ const /** @type {AstText} */ oldText = this.childNodes.at(i),
243
+ {type, data, constructor: {name}} = oldText;
244
+ if (type === 'text') {
245
+ oldText.replaceData(str);
246
+ return data;
250
247
  }
248
+ throw new RangeError(`第 ${i} 个子节点是 ${name}!`);
251
249
  }
252
250
 
253
- /**
254
- * @param {...string|this} elements
255
- * @complexity `n`
256
- */
257
- after(...elements) {
258
- this.#insertAdjacent(elements, 1);
259
- }
260
-
261
- /**
262
- * @param {...string|this} elements
263
- * @complexity `n`
264
- */
265
- before(...elements) {
266
- this.#insertAdjacent(elements, 0);
267
- }
268
-
269
- /** @complexity `n` */
270
- remove() {
271
- const {parentNode} = this;
251
+ /** 是否受保护。保护条件来自Token,这里仅提前用于:required和:optional伪选择器。 */
252
+ #isProtected() {
253
+ const /** @type {{parentNode: AstElement & {constructor: {fixed: boolean}}}} */ {parentNode} = this;
272
254
  if (!parentNode) {
273
- throw new Error('不存在父节点!');
255
+ return undefined;
274
256
  }
275
- parentNode.removeChild(this);
257
+ const {childNodes, constructor: {fixed}} = parentNode,
258
+ protectedIndices = parentNode.getAttribute('protectedChildren')?.applyTo(childNodes);
259
+ return fixed || protectedIndices?.includes(childNodes.indexOf(this));
276
260
  }
277
261
 
278
262
  /**
279
- * @param {...string|this} elements
280
- * @complexity `n`
263
+ * 检查是否符合解析后的选择器,不含节点关系
264
+ * @this {AstElement & {link: string, constructor: {fixed: boolean}}}
265
+ * @param {SelectorArray} step 解析后的选择器
266
+ * @throws `SyntaxError` 未定义的伪选择器
281
267
  */
282
- replaceWith(...elements) {
283
- this.after(...elements);
284
- this.remove();
285
- }
286
-
287
- /**
288
- * @param {string} key
289
- * @param {string|undefined} equal - `equal`存在时`val`和`i`也一定存在
290
- * @param {string|undefined} val
291
- * @param {string|undefined} i
292
- */
293
- matchesAttr(key, equal, val, i) {
294
- if (externalUse('matchesAttr')) {
295
- throw new Error(`禁止外部调用 ${this.constructor.name}.matchesAttr 方法!`);
296
- } else if (!equal) {
297
- return this.hasAttribute(key);
298
- } else if (!this.hasAttribute(key)) {
299
- return equal === '!=';
300
- }
301
- val = toCase(val, i);
302
- if (equal === '~=') {
303
- let /** @type {Iterable<string>} */ thisVals = this[key];
304
- if (typeof thisVals === 'string') {
305
- thisVals = thisVals.split(/\s/);
268
+ #matches(step) {
269
+ const {parentNode, type, name, childNodes, link, constructor: {fixed, name: tokenName}} = this,
270
+ children = parentNode?.children,
271
+ childrenOfType = children?.filter(({type: t}) => t === type),
272
+ siblingsCount = children?.length ?? 1,
273
+ siblingsCountOfType = childrenOfType?.length ?? 1,
274
+ index = (children?.indexOf(this) ?? 0) + 1,
275
+ indexOfType = (childrenOfType?.indexOf(this) ?? 0) + 1,
276
+ lastIndex = siblingsCount - index + 1,
277
+ lastIndexOfType = siblingsCountOfType - indexOfType + 1;
278
+ return step.every(selector => {
279
+ if (typeof selector === 'string') {
280
+ switch (selector) { // 情形1:简单伪选择器、type和name
281
+ case '*':
282
+ return true;
283
+ case ':root':
284
+ return !parentNode;
285
+ case ':first-child':
286
+ return index === 1;
287
+ case ':first-of-type':
288
+ return indexOfType === 1;
289
+ case ':last-child':
290
+ return lastIndex === 1;
291
+ case ':last-of-type':
292
+ return lastIndexOfType === 1;
293
+ case ':only-child':
294
+ return siblingsCount === 1;
295
+ case ':only-of-type':
296
+ return siblingsCountOfType === 1;
297
+ case ':empty':
298
+ return !childNodes.some(child => child instanceof AstElement || String(child));
299
+ case ':parent':
300
+ return childNodes.some(child => child instanceof AstElement || String(child));
301
+ case ':header':
302
+ return type === 'heading';
303
+ case ':hidden':
304
+ return this.text() === '';
305
+ case ':visible':
306
+ return this.text() !== '';
307
+ case ':only-whitespace':
308
+ return this.text().trim() === '';
309
+ case ':any-link':
310
+ return type === 'link' || type === 'free-ext-link' || type === 'ext-link'
311
+ || (type === 'file' || type === 'gallery-image' && link);
312
+ case ':local-link':
313
+ return (type === 'link' || type === 'file' || type === 'gallery-image')
314
+ && link?.[0] === '#';
315
+ case ':read-only':
316
+ return fixed;
317
+ case ':read-write':
318
+ return !fixed;
319
+ case ':invalid':
320
+ return type === 'table-inter' || tokenName === 'HiddenToken';
321
+ case ':required':
322
+ return this.#isProtected() === true;
323
+ case ':optional':
324
+ return this.#isProtected() === false;
325
+ default: {
326
+ const [t, n] = selector.split('#');
327
+ return (!t || t === type || Boolean(Parser.typeAliases[type]?.includes(t)))
328
+ && (!n || n === name);
329
+ }
330
+ }
331
+ } else if (selector.length === 4) { // 情形2:属性选择器
332
+ return this.getAttribute('matchesAttr')(...selector);
306
333
  }
307
- return Boolean(thisVals?.[Symbol.iterator]) && [...thisVals].some(v => toCase(v, i) === val);
308
- }
309
- const thisVal = toCase(this.getAttribute(key), i);
310
- switch (equal) {
311
- case '|=':
312
- return thisVal === val || thisVal.startsWith(`${val}-`);
313
- case '^=':
314
- return thisVal.startsWith(val);
315
- case '$=':
316
- return thisVal.endsWith(val);
317
- case '*=':
318
- return thisVal.includes(val);
319
- case '!=':
320
- return thisVal !== val;
321
- default: // `=`
322
- return thisVal === val;
323
- }
334
+ const [s, pseudo] = selector; // 情形3:复杂伪选择器
335
+ switch (pseudo) {
336
+ case 'is':
337
+ return this.matches(s);
338
+ case 'not':
339
+ return !this.matches(s);
340
+ case 'nth-child':
341
+ return nth(s, index);
342
+ case 'nth-of-type':
343
+ return nth(s, indexOfType);
344
+ case 'nth-last-child':
345
+ return nth(s, lastIndex);
346
+ case 'nth-last-of-type':
347
+ return nth(s, lastIndexOfType);
348
+ case 'contains':
349
+ return this.text().includes(s);
350
+ case 'has':
351
+ return Boolean(this.querySelector(s));
352
+ case 'lang': {
353
+ const regex = new RegExp(`^${s}(?:-|$)`, 'u');
354
+ return matchesLang(this, regex)
355
+ || this.getAncestors().some(ancestor => matchesLang(ancestor, regex));
356
+ }
357
+ default:
358
+ throw new SyntaxError(`未定义的伪选择器!${pseudo}`);
359
+ }
360
+ });
324
361
  }
325
362
 
326
- /** @type {Record<pseudo, boolean>} */ static #pseudo = {
327
- root: false,
328
- is: true,
329
- not: true,
330
- 'nth-child': true,
331
- 'nth-of-type': true,
332
- 'nth-last-child': true,
333
- 'nth-last-of-type': true,
334
- 'first-child': false,
335
- 'first-of-type': false,
336
- 'last-child': false,
337
- 'last-of-type': false,
338
- 'only-child': false,
339
- 'only-of-type': false,
340
- empty: false,
341
- contains: true,
342
- has: true,
343
- header: false,
344
- parent: false,
345
- hidden: false,
346
- visible: false,
347
- };
348
- /** @type {pseudo[]} */ static #pseudoKeys = Object.keys(AstElement.#pseudo);
349
- static #pseudoRegex = RegExp(
350
- `:(${this.#pseudoKeys.join('|')})(?:\\(\\s*("[^"]*"|'[^']*'|[^()]*?)\\s*\\))?(?=:|\\s*(?:,|$))`,
351
- 'g',
352
- );
353
- static #simplePseudoRegex = RegExp(`:(?:${this.#pseudoKeys.join('|')})(?:\\(.*?\\))?(?=:|\\s*(?:,|$))`, 'g');
354
-
355
363
  /**
364
+ * 检查是否符合选择器
365
+ * @param {string|SelectorArray[]} selector
356
366
  * @returns {boolean}
357
367
  * @complexity `n`
358
368
  */
359
- matches(selector = '', simple = false) {
360
- if (typeof selector !== 'string') {
369
+ matches(selector = '') {
370
+ if (typeof selector === 'string') {
371
+ return Parser.run(() => {
372
+ const stack = parseSelector(selector),
373
+ /** @type {Set<string>} */
374
+ pseudos = new Set(stack.flat(2).filter(step => typeof step === 'string' && step[0] === ':'));
375
+ if (pseudos.size > 0) {
376
+ Parser.warn('检测到伪选择器,请确认是否需要将":"转义成"\\:"。', pseudos);
377
+ }
378
+ return stack.some(condition => this.matches(condition));
379
+ });
380
+ } else if (!Parser.running) {
361
381
  this.typeError('matches', 'String');
362
- } else if (!selector.trim()) {
363
- return true;
364
- }
365
- simple &&= Parser.running;
366
- const /** @type {Record<string, string>} */ escapedQuotes = {'"': '&quot;', "'": '&apos;'},
367
- escapedSelector = selector.replace(/\\["']/g, m => escapedQuotes[m[1]]);
368
- if (simple || escapedSelector.search(AstElement.#pseudoRegex) === -1) {
369
- if (!simple && selector.includes(',')) {
370
- return Parser.run(() => selector.split(',').some(str => this.matches(str, true)));
371
- }
372
- const mt = escapedSelector.match(AstElement.#simplePseudoRegex);
373
- if (mt) {
374
- Parser.error(
375
- '检测到不规范的伪选择器!嵌套伪选择器时请使用引号包裹内层,多层嵌套时请使用"\\"转义引号。',
376
- mt.map(s => s.replace(
377
- /&(quot|apos);/g,
378
- /** @param {string} p1 */ (_, p1) => `\\${p1 === 'quot' ? '"' : "'"}`,
379
- )),
380
- );
381
- }
382
- const /** @type {Record<string, string>} */ entities = {comma: ',', ratio: ':'},
383
- /** @type {string[][]} */ attributes = [],
384
- plainSelector = selector.replace(
385
- /&(comma|ratio);/g, /** @param {string} name */ (_, name) => entities[name],
386
- ).replace(
387
- /\[\s*(\w+)\s*(?:([~|^$*!]?=)\s*("[^"]*"|'[^']*'|[^\s[\]]+)(?:\s+(i))?\s*)?\]/g,
388
- /** @type {function(...string): ''} */ (_, key, equal, val, i) => {
389
- if (equal) {
390
- const quotes = /^(["']).*\1$/.exec(val)?.[1];
391
- attributes.push([
392
- key,
393
- equal,
394
- quotes ? val.slice(1, -1).replaceAll(escapedQuotes[quotes], quotes) : val,
395
- i,
396
- ]);
397
- } else {
398
- attributes.push([key]);
399
- }
400
- return '';
401
- },
402
- ),
403
- [type, ...parts] = plainSelector.trim().split('#'),
404
- name = parts.join('#');
405
- return (!type || this.type === type || Boolean(Parser.typeAliases[this.type]?.includes(type)))
406
- && (!name || this.name === name)
407
- && attributes.every(args => this.matchesAttr(...args));
408
382
  }
409
- /*
410
- * 先将`\\'`转义成`&apos;`,将`\\"`转义成`&quot;`,即escapedSelector
411
- * 在去掉一重`:pseudo()`时,如果使用了`'`,则将内部的`&apos;`解码成`'`;如果使用了`"`,则将内部的`&quot;`解码成`"`
412
- */
413
- const /** @type {pseudoCall} */ calls = Object.fromEntries(AstElement.#pseudoKeys.map(f => [f, []])),
414
- selectors = escapedSelector.replace(
415
- AstElement.#pseudoRegex,
416
- /** @type {function(...string): string} */ (m, f, arg) => {
417
- if (!arg) {
418
- calls[f].push('');
419
- return m;
383
+ selector = structuredClone(selector);
384
+ const step = selector.pop();
385
+ if (this.#matches(step)) {
386
+ const {parentNode, previousElementSibling} = this;
387
+ switch (selector.at(-1)?.relation) {
388
+ case undefined:
389
+ return true;
390
+ case '>':
391
+ return parentNode?.matches(selector);
392
+ case '+':
393
+ return previousElementSibling?.matches(selector);
394
+ case '~': {
395
+ if (!parentNode) {
396
+ return false;
420
397
  }
421
- const quotes = /^(["']).*\1$/.exec(arg)?.[1];
422
- calls[f].push(quotes ? arg.slice(1, -1).replaceAll(escapedQuotes[quotes], quotes) : arg);
423
- return `:${f}(${calls[f].length - 1})`;
424
- },
425
- ).split(','),
426
- {parentElement, hidden} = this,
427
- childNodes = parentElement?.childNodes,
428
- childrenOfType = parentElement?.children?.filter(child => child.type === this.type),
429
- index = (childNodes?.indexOf(this) ?? 0) + 1,
430
- indexOfType = (childrenOfType?.indexOf(this) ?? 0) + 1,
431
- lastIndex = (childNodes?.length ?? 1) - index + 1,
432
- lastIndexOfType = (childrenOfType?.length ?? 1) - indexOfType + 1,
433
- content = this.toString(),
434
- plainPseudo = AstElement.#pseudoKeys.filter(f => !AstElement.#pseudo[f] && calls[f].length);
435
- if (plainPseudo.length) {
436
- Parser.warn('检测到伪选择器,请确认是否需要将":"转义成"&ratio;"。', plainPseudo);
398
+ const {children} = parentNode,
399
+ i = children.indexOf(this);
400
+ return children.slice(0, i).some(child => child.matches(selector));
401
+ }
402
+ default: // ' '
403
+ return this.getAncestors().some(ancestor => ancestor.matches(selector));
404
+ }
437
405
  }
438
- return selectors.some(str => {
439
- const /** @type {pseudoCall} */ curCalls = Object.fromEntries(AstElement.#pseudoKeys.map(f => [f, []]));
440
- str = str.replace(AstElement.#pseudoRegex, /** @type {function(...string): ''} */ (_, f, i) => {
441
- curCalls[f].push(i ? calls[f][i] : '');
442
- return '';
443
- });
444
- return Parser.run(() => this.matches(str, true))
445
- && (curCalls.root.length === 0 || !parentElement)
446
- && curCalls.is.every(s => this.matches(s))
447
- && !curCalls.not.some(s => this.matches(s))
448
- && curCalls['nth-child'].every(s => nth(s, index))
449
- && curCalls['nth-of-type'].every(s => nth(s, indexOfType))
450
- && curCalls['nth-last-child'].every(s => nth(s, lastIndex))
451
- && curCalls['nth-last-of-type'].every(s => nth(s, lastIndexOfType))
452
- && (curCalls['first-child'].length === 0 || nth('1', index))
453
- && (curCalls['first-of-type'].length === 0 || nth('1', indexOfType))
454
- && (curCalls['last-child'].length === 0 || nth('1', lastIndex))
455
- && (curCalls['last-of-type'].length === 0 || nth('1', lastIndexOfType))
456
- && (curCalls['only-child'].length === 0 || !parentElement || childNodes.length === 1)
457
- && (curCalls['only-of-type'].length === 0 || !parentElement || childrenOfType.length === 1)
458
- && (curCalls.empty.length === 0 || this.childElementCount === 0)
459
- && curCalls.contains.every(s => content.includes(s))
460
- && curCalls.has.every(s => Boolean(this.querySelector(s)))
461
- && (curCalls.header.length === 0 || this.type === 'heading')
462
- && (curCalls.parent.length === 0 || this.childElementCount > 0)
463
- && (curCalls.hidden.length === 0 || hidden)
464
- && (curCalls.visible.length === 0 || !hidden);
465
- });
406
+ return false;
466
407
  }
467
408
 
409
+ /**
410
+ * 还原为wikitext
411
+ * @param {string} selector
412
+ * @param {string} separator 子节点间的连接符
413
+ */
414
+ toString(selector, separator = '') {
415
+ return selector && this.matches(selector)
416
+ ? ''
417
+ : this.childNodes.map(child => child.toString(selector)).join(separator);
418
+ }
419
+
420
+ /** 获取所有祖先节点 */
468
421
  getAncestors() {
469
422
  const /** @type {this[]} */ ancestors = [];
470
- let {parentElement} = this;
471
- while (parentElement) {
472
- ancestors.push(parentElement);
473
- ({parentElement} = parentElement);
423
+ let {parentNode} = this;
424
+ while (parentNode) {
425
+ ancestors.push(parentNode);
426
+ ({parentNode} = parentNode);
474
427
  }
475
428
  return ancestors;
476
429
  }
477
430
 
478
431
  /**
479
- * @param {this} other
432
+ * 比较和另一个节点的相对位置
433
+ * @param {this} other 待比较的节点
480
434
  * @complexity `n`
435
+ * @throws `Error` 不在同一个语法树
481
436
  */
482
- comparePosition(other) {
437
+ compareDocumentPosition(other) {
483
438
  if (!(other instanceof AstElement)) {
484
- this.typeError('comparePosition', 'AstElement');
439
+ this.typeError('compareDocumentPosition', 'AstElement');
485
440
  } else if (this === other) {
486
441
  return 0;
487
442
  } else if (this.contains(other)) {
@@ -499,17 +454,24 @@ class AstElement extends AstNode {
499
454
  return childNodes.indexOf(aAncestors[depth]) - childNodes.indexOf(bAncestors[depth]);
500
455
  }
501
456
 
457
+ /**
458
+ * 最近的祖先节点
459
+ * @param {string} selector
460
+ */
502
461
  closest(selector = '') {
503
- let {parentElement} = this;
504
- while (parentElement) {
505
- if (parentElement.matches(selector)) {
506
- return parentElement;
462
+ let {parentNode} = this;
463
+ while (parentNode) {
464
+ if (parentNode.matches(selector)) {
465
+ return parentNode;
507
466
  }
508
- ({parentElement} = parentElement);
467
+ ({parentNode} = parentNode);
509
468
  }
469
+ return undefined;
510
470
  }
511
471
 
512
472
  /**
473
+ * 符合选择器的第一个后代节点
474
+ * @param {string} selector
513
475
  * @returns {this|undefined}
514
476
  * @complexity `n`
515
477
  */
@@ -523,9 +485,14 @@ class AstElement extends AstNode {
523
485
  return descendant;
524
486
  }
525
487
  }
488
+ return undefined;
526
489
  }
527
490
 
528
- /** @complexity `n` */
491
+ /**
492
+ * 符合选择器的所有后代节点
493
+ * @param {string} selector
494
+ * @complexity `n`
495
+ */
529
496
  querySelectorAll(selector = '') {
530
497
  const /** @type {this[]} */ descendants = [];
531
498
  for (const child of this.children) {
@@ -538,208 +505,133 @@ class AstElement extends AstNode {
538
505
  }
539
506
 
540
507
  /**
541
- * @param {number} index
542
- * @complexity `n`
508
+ * 类选择器
509
+ * @param {string} className 类名之一
543
510
  */
544
- posFromIndex(index) {
545
- if (typeof index !== 'number') {
546
- this.typeError('posFromIndex', 'Number');
547
- }
548
- const text = this.toString();
549
- if (index < -text.length || index >= text.length || !Number.isInteger(index)) {
550
- return;
551
- }
552
- const lines = text.slice(0, index).split('\n');
553
- return {top: lines.length - 1, left: lines.at(-1).length};
511
+ getElementsByClassName(className) {
512
+ return typeof className === 'string'
513
+ ? this.querySelectorAll(`[className~="${className.replaceAll(/(?<!\\)"/gu, '\\"')}"]`)
514
+ : this.typeError('getElementsByClassName', 'String');
554
515
  }
555
516
 
556
517
  /**
557
- * @param {number} top
558
- * @param {number} left
559
- * @complexity `n`
518
+ * 标签名选择器
519
+ * @param {string} name 标签名
560
520
  */
561
- indexFromPos(top, left) {
562
- if (typeof top !== 'number' || typeof left !== 'number') {
563
- this.typeError('indexFromPos', 'Number');
564
- } else if (top < 0 || left < 0 || !Number.isInteger(top) || !Number.isInteger(left)) {
565
- return;
566
- }
567
- const lines = this.toString().split('\n');
568
- if (lines.length < top + 1 || lines[top].length < left) {
569
- return;
521
+ getElementsByTagName(name) {
522
+ if (typeof name === 'string') {
523
+ name = name.replaceAll(/(?<!\\)"/gu, '\\"');
524
+ return this.querySelectorAll(`ext[name="${name}"], html[name="${name}"]`);
570
525
  }
571
- return lines.slice(0, top).reduce((acc, curLine) => acc + curLine.length + 1, 0) + left;
526
+ return this.typeError('getElementsByTagName', 'String');
572
527
  }
573
528
 
574
- /** @complexity `n` */
575
- #getDimension() {
576
- const lines = this.toString().split('\n');
577
- return {height: lines.length, width: lines.at(-1).length};
529
+ /**
530
+ * 获取某一行的wikitext
531
+ * @param {number} n 行号
532
+ */
533
+ getLine(n) {
534
+ return String(this).split('\n', n + 1).at(-1);
578
535
  }
579
536
 
537
+ static lintIgnoredHidden = new Set(['noinclude', 'double-underscore', 'hidden']);
538
+ static lintIgnoredSyntax = new Set(['magic-word-name', 'heading-trail', 'table-syntax']);
539
+ static lintIgnoredExt = new Set(['nowiki', 'pre', 'syntaxhighlight', 'source', 'math', 'timeline']);
540
+
580
541
  /**
581
- * 获取当前节点的相对位置,或其第`j`个子节点的相对位置
582
- * @param {number|undefined} j
583
- * @complexity `n`
542
+ * Linter
543
+ * @param {number} start 起始位置
584
544
  */
585
- getRelativeIndex(j) {
586
- if (j !== undefined && typeof j !== 'number') {
587
- this.typeError('getRelativeIndex', 'Number');
545
+ lint(start = 0) {
546
+ if (AstElement.lintIgnoredHidden.has(this.type) || AstElement.lintIgnoredSyntax.has(this.type)
547
+ || this.type === 'ext-inner' && AstElement.lintIgnoredExt.has(this.name)
548
+ ) {
549
+ return [];
588
550
  }
589
- let /** @type {(string|this)[]} */ childNodes;
590
- /**
591
- * 使用前需要先给`childNodes`赋值
592
- * @param {number} end
593
- * @param {this} parent
594
- * @returns {number}
595
- */
596
- const getIndex = (end, parent) => childNodes.slice(0, end).reduce(
597
- (acc, cur, i) => acc + String(cur).length + parent.getGaps(i),
598
- 0,
599
- ) + parent.getPadding();
600
- if (j === undefined) {
601
- const {parentElement} = this;
602
- if (!parentElement) {
603
- return 0;
604
- }
605
- ({childNodes} = parentElement);
606
- return getIndex(childNodes.indexOf(this), parentElement);
551
+ const /** @type {LintError[]} */ errors = [];
552
+ for (let i = 0, cur = start + this.getPadding(); i < this.childNodes.length; i++) {
553
+ const child = this.childNodes[i];
554
+ errors.push(...child.lint(cur));
555
+ cur += String(child).length + this.getGaps(i);
607
556
  }
608
- this.verifyChild(j, 1);
609
- ({childNodes} = this);
610
- return getIndex(j, this);
557
+ return errors;
611
558
  }
612
559
 
613
560
  /**
614
- * 获取当前节点的绝对位置
615
- * @returns {number}
616
- * @complexity `n`
561
+ * 以HTML格式打印
562
+ * @param {printOpt} opt 选项
563
+ * @returns {string}
617
564
  */
618
- getAbsoluteIndex() {
619
- const {parentElement} = this;
620
- return parentElement ? parentElement.getAbsoluteIndex() + this.getRelativeIndex() : 0;
565
+ print(opt = {}) {
566
+ return this.childNodes.length === 0
567
+ ? ''
568
+ : `<span class="wpb-${opt.class ?? this.type}">${print(this.childNodes, opt)}</span>`;
621
569
  }
622
570
 
623
571
  /**
624
- * 获取当前节点的相对位置,或其第`j`个子节点的相对位置
625
- * @param {number|undefined} j
626
- * @complexity `n`
572
+ * 保存为JSON
573
+ * @param {string} file 文件名
574
+ * @returns {Record<string, any>}
627
575
  */
628
- #getPosition(j) {
629
- if (j === undefined) {
630
- const {parentElement} = this;
631
- if (!parentElement) {
632
- return {top: 0, left: 0};
633
- }
634
- return parentElement.posFromIndex(this.getRelativeIndex());
576
+ json(file) {
577
+ const {childNodes, ...prop} = this,
578
+ json = {
579
+ ...prop,
580
+ childNodes: childNodes.map(child => child.type === 'text' ? String(child) : child.json()),
581
+ };
582
+ if (typeof file === 'string') {
583
+ fs.writeFileSync(
584
+ path.join(__dirname.slice(0, -4), 'printed', `${file}${file.endsWith('.json') ? '' : '.json'}`),
585
+ JSON.stringify(json, null, 2),
586
+ );
635
587
  }
636
- return this.posFromIndex(this.getRelativeIndex(j));
637
- }
638
-
639
- /** @complexity `n` */
640
- getBoundingClientRect() {
641
- const root = this.getRootNode();
642
- return {...this.#getDimension(), ...root.posFromIndex(this.getAbsoluteIndex())};
643
- }
644
-
645
- /** @complexity `n` */
646
- get offsetHeight() {
647
- return this.#getDimension().height;
648
- }
649
- /** @complexity `n` */
650
- get offsetWidth() {
651
- return this.#getDimension().width;
652
- }
653
- /** @complexity `n` */
654
- get offsetTop() {
655
- return this.#getPosition().top;
656
- }
657
- /** @complexity `n` */
658
- get offsetLeft() {
659
- return this.#getPosition().left;
660
- }
661
-
662
- getPadding() {
663
- return 0;
664
- }
665
-
666
- getGaps() {
667
- return 0;
668
- }
669
-
670
- /** @complexity `n` */
671
- get style() {
672
- return {...this.#getPosition(), ...this.#getDimension(), padding: this.getPadding()};
588
+ return json;
673
589
  }
674
590
 
675
591
  /**
676
- * @returns {boolean}
677
- * @complexity `n`
592
+ * 输出AST
593
+ * @param {number} depth 当前深度
594
+ * @returns {void}
678
595
  */
679
- get eof() {
680
- return this.type === 'root' || !this.nextSibling && this.parentElement.eof;
681
- }
682
-
683
- /**
684
- * @template {'markup'|'json'} T
685
- * @param {T} format
686
- * @param {T extends 'markup' ? number : string} depth
687
- * @returns {T extends 'markup' ? void : Record<string, any>}
688
- */
689
- print(format = 'markup', depth = 0) {
690
- if (format === 'json') {
691
- const {childNodes, ...prop} = this,
692
- json = {
693
- ...prop,
694
- childNodes: childNodes.map(child => typeof child === 'string' ? child : child.print('json')),
695
- };
696
- if (typeof depth === 'string') {
697
- fs.writeFileSync(
698
- `${__dirname.slice(0, -3)}printed/${depth}${depth.endsWith('.json') ? '' : '.json'}`,
699
- JSON.stringify(json, null, 2),
700
- );
701
- }
702
- return json;
703
- } else if (typeof depth !== 'number') {
596
+ echo(depth = 0) {
597
+ if (typeof depth !== 'number') {
704
598
  this.typeError('print', 'Number');
705
599
  }
706
600
  const indent = ' '.repeat(depth),
707
- str = this.toString(),
708
- {childNodes, type, firstChild} = this,
601
+ str = String(this),
602
+ {childNodes, type} = this,
709
603
  {length} = childNodes;
710
- if (!str || length === 0 || typeof firstChild === 'string' && firstChild === str) {
711
- console.log(`${indent}\x1b[32m<%s>\x1b[0m${noWrap(str)}\x1b[32m</%s>\x1b[0m`, type, type);
712
- return;
604
+ if (childNodes.every(child => child.type === 'text' || !String(child))) {
605
+ console.log(`${indent}\x1B[32m<%s>\x1B[0m${noWrap(str)}\x1B[32m</%s>\x1B[0m`, type, type);
606
+ return undefined;
713
607
  }
714
608
  Parser.info(`${indent}<${type}>`);
715
609
  let i = this.getPadding();
716
610
  if (i) {
717
611
  console.log(`${indent} ${noWrap(str.slice(0, i))}`);
718
612
  }
719
- for (const [j, child] of childNodes.entries()) {
720
- const childStr = String(child),
613
+ for (let j = 0; j < length; j++) {
614
+ const child = childNodes[j],
615
+ childStr = String(child),
721
616
  gap = j === length - 1 ? 0 : this.getGaps(j);
722
617
  if (!childStr) {
723
618
  // pass
724
- } else if (typeof child === 'string') {
725
- console.log(`${indent} ${noWrap(child)}`);
619
+ } else if (child.type === 'text') {
620
+ console.log(`${indent} ${noWrap(String(child))}`);
726
621
  } else {
727
- child.print('markup', depth + 1);
622
+ child.echo(depth + 1);
728
623
  }
729
- i += childStr.length + gap;
624
+ i += childStr.length;
730
625
  if (gap) {
731
- console.log(`${indent} ${noWrap(str.slice(i - gap, i))}`);
626
+ console.log(`${indent} ${noWrap(str.slice(i, i + gap))}`);
627
+ i += gap;
732
628
  }
733
629
  }
734
630
  if (i < str.length) {
735
631
  console.log(`${indent} ${noWrap(str.slice(i))}`);
736
632
  }
737
633
  Parser.info(`${indent}</${type}>`);
738
- }
739
-
740
- /** @param {number} n */
741
- getLine(n) {
742
- return this.toString().split('\n', n + 1).at(-1);
634
+ return undefined;
743
635
  }
744
636
  }
745
637