wikiparser-node 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/.eslintrc.json +472 -34
  2. package/README.md +1 -1
  3. package/config/default.json +58 -30
  4. package/config/llwiki.json +22 -90
  5. package/config/moegirl.json +51 -13
  6. package/config/zhwiki.json +1269 -0
  7. package/index.js +114 -104
  8. package/lib/element.js +448 -440
  9. package/lib/node.js +335 -115
  10. package/lib/ranges.js +27 -18
  11. package/lib/text.js +146 -0
  12. package/lib/title.js +13 -5
  13. package/mixin/attributeParent.js +70 -24
  14. package/mixin/fixedToken.js +14 -6
  15. package/mixin/hidden.js +6 -4
  16. package/mixin/sol.js +27 -10
  17. package/package.json +9 -3
  18. package/parser/brackets.js +22 -17
  19. package/parser/commentAndExt.js +18 -16
  20. package/parser/converter.js +14 -13
  21. package/parser/externalLinks.js +12 -11
  22. package/parser/hrAndDoubleUnderscore.js +23 -14
  23. package/parser/html.js +10 -9
  24. package/parser/links.js +15 -14
  25. package/parser/list.js +12 -11
  26. package/parser/magicLinks.js +12 -11
  27. package/parser/quotes.js +6 -5
  28. package/parser/selector.js +175 -0
  29. package/parser/table.js +25 -18
  30. package/printed/example.json +120 -0
  31. package/src/arg.js +56 -32
  32. package/src/atom/hidden.js +5 -2
  33. package/src/atom/index.js +17 -9
  34. package/src/attribute.js +182 -100
  35. package/src/converter.js +68 -41
  36. package/src/converterFlags.js +67 -45
  37. package/src/converterRule.js +117 -65
  38. package/src/extLink.js +66 -18
  39. package/src/gallery.js +42 -15
  40. package/src/heading.js +34 -15
  41. package/src/html.js +97 -35
  42. package/src/imageParameter.js +83 -54
  43. package/src/index.js +299 -178
  44. package/src/link/category.js +20 -52
  45. package/src/link/file.js +59 -28
  46. package/src/link/galleryImage.js +21 -7
  47. package/src/link/index.js +146 -60
  48. package/src/magicLink.js +34 -12
  49. package/src/nowiki/comment.js +22 -10
  50. package/src/nowiki/dd.js +37 -22
  51. package/src/nowiki/doubleUnderscore.js +16 -7
  52. package/src/nowiki/hr.js +11 -7
  53. package/src/nowiki/index.js +16 -9
  54. package/src/nowiki/list.js +2 -2
  55. package/src/nowiki/noinclude.js +8 -4
  56. package/src/nowiki/quote.js +11 -7
  57. package/src/onlyinclude.js +19 -7
  58. package/src/parameter.js +65 -38
  59. package/src/syntax.js +26 -20
  60. package/src/table/index.js +260 -165
  61. package/src/table/td.js +98 -52
  62. package/src/table/tr.js +102 -58
  63. package/src/tagPair/ext.js +27 -19
  64. package/src/tagPair/include.js +16 -11
  65. package/src/tagPair/index.js +64 -29
  66. package/src/transclude.js +170 -93
  67. package/test/api.js +83 -0
  68. package/test/real.js +133 -0
  69. package/test/test.js +28 -0
  70. package/test/util.js +80 -0
  71. package/tool/index.js +41 -31
  72. package/typings/api.d.ts +13 -0
  73. package/typings/array.d.ts +28 -0
  74. package/typings/event.d.ts +24 -0
  75. package/typings/index.d.ts +46 -4
  76. package/typings/node.d.ts +15 -9
  77. package/typings/parser.d.ts +7 -0
  78. package/typings/tool.d.ts +3 -2
  79. package/util/debug.js +21 -18
  80. package/util/string.js +40 -27
  81. package/typings/element.d.ts +0 -28
package/lib/element.js CHANGED
@@ -4,64 +4,135 @@ const fs = require('fs'),
4
4
  {externalUse} = require('../util/debug'),
5
5
  {toCase, noWrap} = require('../util/string'),
6
6
  {nth} = require('./ranges'),
7
- EventEmitter = require('events'),
7
+ parseSelector = require('../parser/selector'),
8
+ Parser = require('..'),
8
9
  AstNode = require('./node'),
9
- /** @type {Parser} */ Parser = require('..');
10
-
10
+ AstText = require('./text');
11
+
12
+ /**
13
+ * 检测:lang()伪选择器
14
+ * @param {AstElement & {attributes: Records<string, string|true>}} node 节点
15
+ * @param {RegExp} regex 语言正则
16
+ */
17
+ const matchesLang = ({attributes}, regex) => {
18
+ const /** @type {string} */ lang = attributes?.lang;
19
+ return typeof lang === 'string' && regex.test(lang);
20
+ };
21
+
22
+ /** 类似HTMLElement */
11
23
  class AstElement extends AstNode {
12
- /** @type {string} */ type;
13
24
  /** @type {string} */ name;
14
- #events = new EventEmitter();
15
25
 
16
- /** @complexity `n` */
26
+ /**
27
+ * 检查是否符合某条属性规则
28
+ * @param {string} key 属性键
29
+ * @param {string|undefined} equal 属性规则运算符,`equal`存在时`val`和`i`也一定存在
30
+ * @param {string|undefined} val 属性值
31
+ * @param {string|undefined} i 是否对大小写不敏感
32
+ */
33
+ #matchesAttr = (key, equal, val, i) => {
34
+ if (!equal) {
35
+ return this.hasAttribute(key);
36
+ } else if (!this.hasAttribute(key)) {
37
+ return equal === '!=';
38
+ }
39
+ val = toCase(val, i);
40
+ if (equal === '~=') {
41
+ let /** @type {Iterable<string>} */ thisVals = this[key];
42
+ if (typeof thisVals === 'string') {
43
+ thisVals = thisVals.split(/\s/u);
44
+ }
45
+ return Boolean(thisVals?.[Symbol.iterator]) && [...thisVals].some(v => toCase(v, i) === val);
46
+ }
47
+ const thisVal = toCase(this.getAttribute(key), i);
48
+ switch (equal) {
49
+ case '|=':
50
+ return thisVal === val || thisVal.startsWith(`${val}-`);
51
+ case '^=':
52
+ return thisVal.startsWith(val);
53
+ case '$=':
54
+ return thisVal.endsWith(val);
55
+ case '*=':
56
+ return thisVal.includes(val);
57
+ case '!=':
58
+ return thisVal !== val;
59
+ default: // `=`
60
+ return thisVal === val;
61
+ }
62
+ };
63
+
64
+ /**
65
+ * 全部非文本子节点
66
+ * @complexity `n`
67
+ */
17
68
  get children() {
18
- const /** @type {this[]} */ children = this.childNodes.filter(ele => ele instanceof AstElement);
69
+ const /** @type {this[]} */ children = this.childNodes.filter(({type}) => type !== 'text');
19
70
  return children;
20
71
  }
21
- /** @complexity `n` */
72
+
73
+ /**
74
+ * 非文本子节点总数
75
+ * @complexity `n`
76
+ */
22
77
  get childElementCount() {
23
78
  return this.children.length;
24
79
  }
25
- /** @returns {this} */
80
+
81
+ /**
82
+ * 首位非文本子节点
83
+ * @returns {this}
84
+ */
26
85
  get firstElementChild() {
27
- return this.childNodes.find(ele => ele instanceof AstElement);
86
+ return this.childNodes.find(({type}) => type !== 'text');
28
87
  }
29
- /** @complexity `n` */
88
+
89
+ /**
90
+ * 末位非文本子节点
91
+ * @returns {this}
92
+ */
30
93
  get lastElementChild() {
31
- return this.children.at(-1);
94
+ return this.childNodes.findLast(({type}) => type !== 'text');
32
95
  }
96
+
97
+ /** 父节点 */
33
98
  get parentElement() {
34
99
  return this.parentNode;
35
100
  }
36
- get isConnected() {
37
- return this.getRootNode().type === 'root';
38
- }
39
- /** @complexity `n` */
40
- get nextElementSibling() {
41
- const children = this.parentElement?.children;
42
- return children && children[children.indexOf(this) + 1];
43
- }
44
- /** @complexity `n` */
45
- get previousElementSibling() {
46
- const children = this.parentElement?.children;
47
- return children && children[children.indexOf(this) - 1];
101
+
102
+ /**
103
+ * AstElement.prototype.text()的getter写法
104
+ * @complexity `n`
105
+ */
106
+ get outerText() {
107
+ return this.text();
48
108
  }
49
- /** @complexity `n` */
109
+
110
+ /**
111
+ * 不可见
112
+ */
50
113
  get hidden() {
51
114
  return this.text() === '';
52
115
  }
53
- /** @complexity `n` */
116
+
117
+ /**
118
+ * 后一个可见的兄弟节点
119
+ * @complexity `n`
120
+ */
54
121
  get nextVisibleSibling() {
55
122
  let {nextSibling} = this;
56
- while (nextSibling === '' || nextSibling instanceof AstElement && nextSibling.hidden) {
123
+ while (nextSibling?.text() === '') {
57
124
  ({nextSibling} = nextSibling);
58
125
  }
59
126
  return nextSibling;
60
127
  }
61
- /** @complexity `n` */
128
+
129
+ /**
130
+ * 前一个可见的兄弟节点
131
+ * @complexity `n`
132
+ */
62
133
  get previousVisibleSibling() {
63
134
  let {previousSibling} = this;
64
- while (previousSibling === '' || previousSibling instanceof AstElement && previousSibling.hidden) {
135
+ while (previousSibling?.text() === '') {
65
136
  ({previousSibling} = previousSibling);
66
137
  }
67
138
  return previousSibling;
@@ -72,101 +143,39 @@ class AstElement extends AstNode {
72
143
  this.seal('name');
73
144
  }
74
145
 
75
- /** @complexity `n` */
76
- destroy() {
77
- if (this.parentNode) {
78
- throw new Error('不能销毁子节点!');
79
- }
80
- for (const element of this.children) {
81
- element.setAttribute('parentNode');
82
- }
83
- Object.setPrototypeOf(this, null);
84
- }
85
-
86
146
  /**
87
- * @param {string|string[]} types
88
- * @param {AstListener} listener
89
- * @param {{once: boolean}} options
90
- */
91
- addEventListener(types, listener, options) {
92
- if (Array.isArray(types)) {
93
- for (const type of types) {
94
- this.addEventListener(type, listener, options);
95
- }
96
- } else if (typeof types !== 'string' || typeof listener !== 'function') {
97
- this.typeError('addEventListener', 'String', 'Function');
98
- } else {
99
- this.#events[options?.once ? 'once' : 'on'](types, listener);
100
- }
101
- }
102
-
103
- /**
104
- * @param {string|string[]} types
105
- * @param {AstListener} listener
147
+ * 销毁
148
+ * @complexity `n`
149
+ * @param {boolean} deep 是否从根节点开始销毁
150
+ * @throws `Error` 不能销毁子节点
106
151
  */
107
- removeEventListener(types, listener) {
108
- if (Array.isArray(types)) {
109
- for (const type of types) {
110
- this.removeEventListener(type, listener);
111
- }
112
- } else if (typeof types !== 'string' || typeof listener !== 'function') {
113
- this.typeError('removeEventListener', 'String', 'Function');
114
- } else {
115
- this.#events.off(types, listener);
152
+ destroy(deep) {
153
+ if (this.parentNode && !deep) {
154
+ throw new Error('不能销毁子节点!');
116
155
  }
117
- }
118
-
119
- /** @param {string|string[]} types */
120
- removeAllEventListeners(types) {
121
- if (Array.isArray(types)) {
122
- for (const type of types) {
123
- this.removeAllEventListeners(type);
124
- }
125
- } else if (types !== undefined && typeof types !== 'string') {
126
- this.typeError('removeAllEventListeners', 'String');
127
- } else {
128
- this.#events.removeAllListeners(types);
156
+ this.parentNode?.destroy(deep);
157
+ for (const child of this.childNodes) {
158
+ child.setAttribute('parentNode');
129
159
  }
160
+ Object.setPrototypeOf(this, null);
130
161
  }
131
162
 
132
163
  /**
133
- * @param {string} type
134
- * @returns {AstListener[]}
135
- */
136
- listEventListeners(type) {
137
- if (typeof type !== 'string') {
138
- this.typeError('listEventListeners', 'String');
139
- }
140
- return this.#events.listeners(type);
141
- }
142
-
143
- /**
144
- * @param {AstEvent} e
145
- * @param {any} data
164
+ * @override
165
+ * @template {string} T
166
+ * @param {T} key 属性键
167
+ * @returns {TokenAttribute<T>}
146
168
  */
147
- dispatchEvent(e, data) {
148
- if (!(e instanceof Event)) {
149
- this.typeError('dispatchEvent', 'Event');
150
- } else if (!e.target) { // 初始化
151
- Object.defineProperty(e, 'target', {value: this, enumerable: true});
152
- e.stopPropagation = function() {
153
- Object.defineProperty(this, 'bubbles', {value: false});
154
- };
155
- }
156
- Object.defineProperties(e, { // 每次bubble更新
157
- prevTarget: {value: e.currentTarget, enumerable: true, configurable: true},
158
- currentTarget: {value: this, enumerable: true, configurable: true},
159
- });
160
- this.#events.emit(e.type, e, data);
161
- if (e.bubbles && this.parentElement) {
162
- this.parentElement.dispatchEvent(e, data);
163
- }
169
+ getAttribute(key) {
170
+ return key === 'matchesAttr' ? this.#matchesAttr : super.getAttribute(key);
164
171
  }
165
172
 
166
173
  /**
174
+ * @override
167
175
  * @template {string} T
168
- * @param {T} key
169
- * @param {TokenAttribute<T>} value
176
+ * @param {T} key 属性键
177
+ * @param {TokenAttribute<T>} value 属性值
178
+ * @throws `RangeError` 禁止手动指定的属性
170
179
  */
171
180
  setAttribute(key, value) {
172
181
  if (key === 'name' && externalUse('setAttribute')) {
@@ -175,36 +184,9 @@ class AstElement extends AstNode {
175
184
  return super.setAttribute(key, value);
176
185
  }
177
186
 
178
- /** @param {number} i */
179
- removeAt(i) {
180
- const element = super.removeAt(i),
181
- e = new Event('remove', {bubbles: true});
182
- this.dispatchEvent(e, {position: i, removed: element});
183
- return element;
184
- }
185
-
186
187
  /**
187
- * @template {string|this} T
188
- * @param {T} element
189
- * @complexity `n`
190
- */
191
- insertAt(element, i = this.childNodes.length) {
192
- super.insertAt(element, i);
193
- const e = new Event('insert', {bubbles: true});
194
- this.dispatchEvent(e, {position: i < 0 ? i + this.childNodes.length - 1 : i, inserted: element});
195
- return element;
196
- }
197
-
198
- /** @param {string} str */
199
- setText(str, i = 0) {
200
- const oldText = super.setText(str, i),
201
- e = new Event('text', {bubbles: true});
202
- this.dispatchEvent(e, {position: i, oldText, newText: str});
203
- return oldText;
204
- }
205
-
206
- /**
207
- * @param {...string|this} elements
188
+ * 在末尾批量插入子节点
189
+ * @param {...this} elements 插入节点
208
190
  * @complexity `n`
209
191
  */
210
192
  append(...elements) {
@@ -214,17 +196,19 @@ class AstElement extends AstNode {
214
196
  }
215
197
 
216
198
  /**
217
- * @param {...string|this} elements
199
+ * 在开头批量插入子节点
200
+ * @param {...this} elements 插入节点
218
201
  * @complexity `n`
219
202
  */
220
203
  prepend(...elements) {
221
- for (const [i, element] of elements.entries()) {
222
- this.insertAt(element, i);
204
+ for (let i = 0; i < elements.length; i++) {
205
+ this.insertAt(elements[i], i);
223
206
  }
224
207
  }
225
208
 
226
209
  /**
227
- * @param {...string|this} elements
210
+ * 批量替换子节点
211
+ * @param {...this} elements 新的子节点
228
212
  * @complexity `n`
229
213
  */
230
214
  replaceChildren(...elements) {
@@ -235,253 +219,211 @@ class AstElement extends AstNode {
235
219
  }
236
220
 
237
221
  /**
238
- * @param {(string|this)[]} elements
239
- * @param {number} offset
240
- * @complexity `n`
222
+ * 修改文本子节点
223
+ * @param {string} str 新文本
224
+ * @param {number} i 子节点位置
225
+ * @throws `RangeError` 对应位置的子节点不是文本节点
241
226
  */
242
- #insertAdjacent(elements, offset) {
243
- const {parentNode} = this;
244
- if (!parentNode) {
245
- throw new Error('不存在父节点!');
246
- }
247
- const i = parentNode.childNodes.indexOf(this) + offset;
248
- for (const [j, element] of elements.entries()) {
249
- parentNode.insertAt(element, i + j);
227
+ setText(str, i = 0) {
228
+ this.getAttribute('verifyChild')(i);
229
+ const /** @type {AstText} */ oldText = this.childNodes.at(i),
230
+ {type, data, constructor: {name}} = oldText;
231
+ if (type !== 'text') {
232
+ throw new RangeError(`第 ${i} 个子节点是 ${name}!`);
250
233
  }
234
+ oldText.replaceData(str);
235
+ return data;
251
236
  }
252
237
 
253
- /**
254
- * @param {...string|this} elements
255
- * @complexity `n`
256
- */
257
- after(...elements) {
258
- this.#insertAdjacent(elements, 1);
259
- }
260
-
261
- /**
262
- * @param {...string|this} elements
263
- * @complexity `n`
264
- */
265
- before(...elements) {
266
- this.#insertAdjacent(elements, 0);
267
- }
268
-
269
- /** @complexity `n` */
270
- remove() {
271
- const {parentNode} = this;
238
+ /** 是否受保护。保护条件来自Token,这里仅提前用于:required和:optional伪选择器。 */
239
+ #isProtected() {
240
+ const /** @type {{parentNode: AstElement & {constructor: {fixed: boolean}}}} */ {parentNode} = this;
272
241
  if (!parentNode) {
273
- throw new Error('不存在父节点!');
242
+ return undefined;
274
243
  }
275
- parentNode.removeChild(this);
276
- }
277
-
278
- /**
279
- * @param {...string|this} elements
280
- * @complexity `n`
281
- */
282
- replaceWith(...elements) {
283
- this.after(...elements);
284
- this.remove();
244
+ const {childNodes, constructor: {fixed}} = parentNode,
245
+ protectedIndices = parentNode.getAttribute('protectedChildren')?.applyTo(childNodes);
246
+ return fixed || protectedIndices?.includes(childNodes.indexOf(this));
285
247
  }
286
248
 
287
249
  /**
288
- * @param {string} key
289
- * @param {string|undefined} equal - `equal`存在时`val`和`i`也一定存在
290
- * @param {string|undefined} val
291
- * @param {string|undefined} i
250
+ * 检查是否符合解析后的选择器,不含节点关系
251
+ * @this {AstElement & {link: string, constructor: {fixed: boolean}}}
252
+ * @param {SelectorArray} step 解析后的选择器
253
+ * @throws `SyntaxError` 未定义的伪选择器
292
254
  */
293
- matchesAttr(key, equal, val, i) {
294
- if (externalUse('matchesAttr')) {
295
- throw new Error(`禁止外部调用 ${this.constructor.name}.matchesAttr 方法!`);
296
- } else if (!equal) {
297
- return this.hasAttribute(key);
298
- } else if (!this.hasAttribute(key)) {
299
- return equal === '!=';
300
- }
301
- val = toCase(val, i);
302
- if (equal === '~=') {
303
- let /** @type {Iterable<string>} */ thisVals = this[key];
304
- if (typeof thisVals === 'string') {
305
- thisVals = thisVals.split(/\s/);
255
+ #matches(step) {
256
+ const {parentNode, type, name, childNodes, link, constructor: {fixed, name: tokenName}} = this,
257
+ children = parentNode?.children,
258
+ childrenOfType = children?.filter(({type: t}) => t === type),
259
+ siblingsCount = children?.length ?? 1,
260
+ siblingsCountOfType = childrenOfType?.length ?? 1,
261
+ index = (children?.indexOf(this) ?? 0) + 1,
262
+ indexOfType = (childrenOfType?.indexOf(this) ?? 0) + 1,
263
+ lastIndex = siblingsCount - index + 1,
264
+ lastIndexOfType = siblingsCountOfType - indexOfType + 1;
265
+ return step.every(selector => {
266
+ if (typeof selector === 'string') {
267
+ switch (selector) { // 情形1:简单伪选择器、type和name
268
+ case '*':
269
+ return true;
270
+ case ':root':
271
+ return !parentNode;
272
+ case ':first-child':
273
+ return index === 1;
274
+ case ':first-of-type':
275
+ return indexOfType === 1;
276
+ case ':last-child':
277
+ return lastIndex === 1;
278
+ case ':last-of-type':
279
+ return lastIndexOfType === 1;
280
+ case ':only-child':
281
+ return siblingsCount === 1;
282
+ case ':only-of-type':
283
+ return siblingsCountOfType === 1;
284
+ case ':empty':
285
+ return !childNodes.some(child => child instanceof AstElement || String(child));
286
+ case ':parent':
287
+ return childNodes.some(child => child instanceof AstElement || String(child));
288
+ case ':header':
289
+ return type === 'heading';
290
+ case ':hidden':
291
+ return this.text() === '';
292
+ case ':visible':
293
+ return this.text() !== '';
294
+ case ':only-whitespace':
295
+ return this.text().trim() === '';
296
+ case ':any-link':
297
+ return type === 'link' || type === 'free-ext-link' || type === 'ext-link'
298
+ || (type === 'file' || type === 'gallery-image' && link);
299
+ case ':local-link':
300
+ return (type === 'link' || type === 'file' || type === 'gallery-image')
301
+ && link?.startsWith('#');
302
+ case ':read-only':
303
+ return fixed;
304
+ case ':read-write':
305
+ return !fixed;
306
+ case ':invalid':
307
+ return type === 'table-inter' || tokenName === 'HiddenToken';
308
+ case ':required':
309
+ return this.#isProtected() === true;
310
+ case ':optional':
311
+ return this.#isProtected() === false;
312
+ default: {
313
+ const [t, n] = selector.split('#');
314
+ return (!t || t === type || Boolean(Parser.typeAliases[type]?.includes(t)))
315
+ && (!n || n === name);
316
+ }
317
+ }
318
+ } else if (selector.length === 4) { // 情形2:属性选择器
319
+ return this.getAttribute('matchesAttr')(...selector);
306
320
  }
307
- return Boolean(thisVals?.[Symbol.iterator]) && [...thisVals].some(v => toCase(v, i) === val);
308
- }
309
- const thisVal = toCase(this.getAttribute(key), i);
310
- switch (equal) {
311
- case '|=':
312
- return thisVal === val || thisVal.startsWith(`${val}-`);
313
- case '^=':
314
- return thisVal.startsWith(val);
315
- case '$=':
316
- return thisVal.endsWith(val);
317
- case '*=':
318
- return thisVal.includes(val);
319
- case '!=':
320
- return thisVal !== val;
321
- default: // `=`
322
- return thisVal === val;
323
- }
321
+ const [s, pseudo] = selector; // 情形3:复杂伪选择器
322
+ switch (pseudo) {
323
+ case 'is':
324
+ return this.matches(s);
325
+ case 'not':
326
+ return !this.matches(s);
327
+ case 'nth-child':
328
+ return nth(s, index);
329
+ case 'nth-of-type':
330
+ return nth(s, indexOfType);
331
+ case 'nth-last-child':
332
+ return nth(s, lastIndex);
333
+ case 'nth-last-of-type':
334
+ return nth(s, lastIndexOfType);
335
+ case 'contains':
336
+ return this.text().includes(s);
337
+ case 'has':
338
+ return Boolean(this.querySelector(s));
339
+ case 'lang': {
340
+ const regex = new RegExp(`^${s}(?:-|$)`, 'u');
341
+ return matchesLang(this, regex)
342
+ || this.getAncestors().some(ancestor => matchesLang(ancestor, regex));
343
+ }
344
+ default:
345
+ throw new SyntaxError(`未定义的伪选择器!${pseudo}`);
346
+ }
347
+ });
324
348
  }
325
349
 
326
- /** @type {Record<pseudo, boolean>} */ static #pseudo = {
327
- root: false,
328
- is: true,
329
- not: true,
330
- 'nth-child': true,
331
- 'nth-of-type': true,
332
- 'nth-last-child': true,
333
- 'nth-last-of-type': true,
334
- 'first-child': false,
335
- 'first-of-type': false,
336
- 'last-child': false,
337
- 'last-of-type': false,
338
- 'only-child': false,
339
- 'only-of-type': false,
340
- empty: false,
341
- contains: true,
342
- has: true,
343
- header: false,
344
- parent: false,
345
- hidden: false,
346
- visible: false,
347
- };
348
- /** @type {pseudo[]} */ static #pseudoKeys = Object.keys(AstElement.#pseudo);
349
- static #pseudoRegex = new RegExp(
350
- `:(${this.#pseudoKeys.join('|')})(?:\\(\\s*("[^"]*"|'[^']*'|[^()]*?)\\s*\\))?(?=:|\\s*(?:,|$))`,
351
- 'g',
352
- );
353
- static #simplePseudoRegex = new RegExp(`:(?:${this.#pseudoKeys.join('|')})(?:\\(.*?\\))?(?=:|\\s*(?:,|$))`, 'g');
354
-
355
350
  /**
351
+ * 检查是否符合选择器
352
+ * @param {string|SelectorArray[]} selector
356
353
  * @returns {boolean}
357
354
  * @complexity `n`
358
355
  */
359
- matches(selector = '', simple = false) {
360
- if (typeof selector !== 'string') {
356
+ matches(selector = '') {
357
+ if (typeof selector === 'string') {
358
+ return Parser.run(() => {
359
+ const stack = parseSelector(selector),
360
+ /** @type {Set<string>} */
361
+ pseudos = new Set(stack.flat(2).filter(step => typeof step === 'string' && step.startsWith(':')));
362
+ if (pseudos.size > 0) {
363
+ Parser.warn('检测到伪选择器,请确认是否需要将":"转义成"\\:"。', pseudos);
364
+ }
365
+ return stack.some(condition => this.matches(condition));
366
+ });
367
+ } else if (!Parser.running) {
361
368
  this.typeError('matches', 'String');
362
- } else if (!selector.trim()) {
363
- return true;
364
- }
365
- simple &&= Parser.running;
366
- const /** @type {Record<string, string>} */ escapedQuotes = {'"': '&quot;', "'": '&apos;'},
367
- escapedSelector = selector.replace(/\\["']/g, m => escapedQuotes[m[1]]);
368
- if (simple || !AstElement.#pseudoRegex.test(escapedSelector)) {
369
- if (!simple && selector.includes(',')) {
370
- return Parser.run(() => selector.split(',').some(str => this.matches(str, true)));
371
- }
372
- const mt = escapedSelector.match(AstElement.#simplePseudoRegex);
373
- if (mt) {
374
- Parser.error(
375
- '检测到不规范的伪选择器!嵌套伪选择器时请使用引号包裹内层,多层嵌套时请使用"\\"转义引号。',
376
- mt.map(s => s.replace(
377
- /&(quot|apos);/g,
378
- /** @param {string} p1 */ (_, p1) => `\\${p1 === 'quot' ? '"' : "'"}`,
379
- )),
380
- );
381
- }
382
- const /** @type {Record<string, string>} */ entities = {comma: ',', ratio: ':'},
383
- /** @type {string[][]} */ attributes = [],
384
- plainSelector = selector.replace(
385
- /&(comma|ratio);/g, /** @param {string} name */ (_, name) => entities[name],
386
- ).replace(
387
- /\[\s*(\w+)\s*(?:([~|^$*!]?=)\s*("[^"]*"|'[^']*'|[^[\]]*?)\s*(\si)?\s*)?]/g,
388
- /** @type {function(...string): ''} */ (_, key, equal, val, i) => {
389
- if (equal) {
390
- const quotes = val.match(/^(["']).*\1$/)?.[1];
391
- attributes.push([
392
- key,
393
- equal,
394
- quotes ? val.slice(1, -1).replaceAll(escapedQuotes[quotes], quotes) : val,
395
- i,
396
- ]);
397
- } else {
398
- attributes.push([key]);
399
- }
400
- return '';
401
- },
402
- ),
403
- [type, ...parts] = plainSelector.trim().split('#'),
404
- name = parts.join('#');
405
- return (!type || this.type === type || Boolean(Parser.typeAliases[this.type]?.includes(type)))
406
- && (!name || this.name === name)
407
- && attributes.every(args => this.matchesAttr(...args));
408
369
  }
409
- /*
410
- * 先将`\\'`转义成`&apos;`,将`\\"`转义成`&quot;`,即escapedSelector
411
- * 在去掉一重`:pseudo()`时,如果使用了`'`,则将内部的`&apos;`解码成`'`;如果使用了`"`,则将内部的`&quot;`解码成`"`
412
- */
413
- const /** @type {pseudoCall} */ calls = Object.fromEntries(AstElement.#pseudoKeys.map(f => [f, []])),
414
- selectors = escapedSelector.replace(
415
- AstElement.#pseudoRegex,
416
- /** @type {function(...string): string} */ (m, f, arg) => {
417
- if (!arg) {
418
- calls[f].push('');
419
- return m;
370
+ selector = structuredClone(selector);
371
+ const step = selector.pop();
372
+ if (this.#matches(step)) {
373
+ const {parentNode, previousElementSibling} = this;
374
+ switch (selector.at(-1)?.relation) {
375
+ case undefined:
376
+ return true;
377
+ case '>':
378
+ return parentNode?.matches(selector);
379
+ case '+':
380
+ return previousElementSibling?.matches(selector);
381
+ case '~': {
382
+ if (!parentNode) {
383
+ return false;
420
384
  }
421
- const quotes = arg.match(/^(["']).*\1$/)?.[1];
422
- calls[f].push(quotes ? arg.slice(1, -1).replaceAll(escapedQuotes[quotes], quotes) : arg);
423
- return `:${f}(${calls[f].length - 1})`;
424
- },
425
- ).split(','),
426
- {parentElement, hidden} = this,
427
- childNodes = parentElement?.childNodes,
428
- childrenOfType = parentElement?.children?.filter(child => child.type === this.type),
429
- index = (childNodes?.indexOf(this) ?? 0) + 1,
430
- indexOfType = (childrenOfType?.indexOf(this) ?? 0) + 1,
431
- lastIndex = (childNodes?.length ?? 1) - index + 1,
432
- lastIndexOfType = (childrenOfType?.length ?? 1) - indexOfType + 1,
433
- content = this.toString(),
434
- plainPseudo = AstElement.#pseudoKeys.filter(f => !AstElement.#pseudo[f] && calls[f].length);
435
- if (plainPseudo.length) {
436
- Parser.warn('检测到伪选择器,请确认是否需要将":"转义成"&ratio;"。', plainPseudo);
385
+ const {children} = parentNode,
386
+ i = children.indexOf(this);
387
+ return children.slice(0, i).some(child => child.matches(selector));
388
+ }
389
+ default: // ' '
390
+ return this.getAncestors().some(ancestor => ancestor.matches(selector));
391
+ }
437
392
  }
438
- return selectors.some(str => {
439
- const /** @type {pseudoCall} */ curCalls = Object.fromEntries(AstElement.#pseudoKeys.map(f => [f, []]));
440
- str = str.replace(AstElement.#pseudoRegex, /** @type {function(...string): ''} */ (_, f, i) => {
441
- curCalls[f].push(i ? calls[f][i] : '');
442
- return '';
443
- });
444
- return Parser.run(() => this.matches(str, true))
445
- && (curCalls.root.length === 0 || !parentElement)
446
- && curCalls.is.every(s => this.matches(s))
447
- && !curCalls.not.some(s => this.matches(s))
448
- && curCalls['nth-child'].every(s => nth(s, index))
449
- && curCalls['nth-of-type'].every(s => nth(s, indexOfType))
450
- && curCalls['nth-last-child'].every(s => nth(s, lastIndex))
451
- && curCalls['nth-last-of-type'].every(s => nth(s, lastIndexOfType))
452
- && (curCalls['first-child'].length === 0 || nth('1', index))
453
- && (curCalls['first-of-type'].length === 0 || nth('1', indexOfType))
454
- && (curCalls['last-child'].length === 0 || nth('1', lastIndex))
455
- && (curCalls['last-of-type'].length === 0 || nth('1', lastIndexOfType))
456
- && (curCalls['only-child'].length === 0 || !parentElement || childNodes.length === 1)
457
- && (curCalls['only-of-type'].length === 0 || !parentElement || childrenOfType.length === 1)
458
- && (curCalls.empty.length === 0 || this.childElementCount === 0)
459
- && curCalls.contains.every(s => content.includes(s))
460
- && curCalls.has.every(s => Boolean(this.querySelector(s)))
461
- && (curCalls.header.length === 0 || this.type === 'heading')
462
- && (curCalls.parent.length === 0 || this.childElementCount > 0)
463
- && (curCalls.hidden.length === 0 || hidden)
464
- && (curCalls.visible.length === 0 || !hidden);
465
- });
393
+ return false;
394
+ }
395
+
396
+ /**
397
+ * 还原为wikitext
398
+ * @param {string} selector
399
+ * @param {string} separator 子节点间的连接符
400
+ */
401
+ toString(selector, separator = '') {
402
+ return selector && this.matches(selector)
403
+ ? ''
404
+ : this.childNodes.map(child => child.toString(selector)).join(separator);
466
405
  }
467
406
 
407
+ /** 获取所有祖先节点 */
468
408
  getAncestors() {
469
409
  const /** @type {this[]} */ ancestors = [];
470
- let {parentElement} = this;
471
- while (parentElement) {
472
- ancestors.push(parentElement);
473
- ({parentElement} = parentElement);
410
+ let {parentNode} = this;
411
+ while (parentNode) {
412
+ ancestors.push(parentNode);
413
+ ({parentNode} = parentNode);
474
414
  }
475
415
  return ancestors;
476
416
  }
477
417
 
478
418
  /**
479
- * @param {this} other
419
+ * 比较和另一个节点的相对位置
420
+ * @param {this} other 待比较的节点
480
421
  * @complexity `n`
422
+ * @throws `Error` 不在同一个语法树
481
423
  */
482
- comparePosition(other) {
424
+ compareDocumentPosition(other) {
483
425
  if (!(other instanceof AstElement)) {
484
- this.typeError('comparePosition', 'AstElement');
426
+ this.typeError('compareDocumentPosition', 'AstElement');
485
427
  } else if (this === other) {
486
428
  return 0;
487
429
  } else if (this.contains(other)) {
@@ -499,17 +441,24 @@ class AstElement extends AstNode {
499
441
  return childNodes.indexOf(aAncestors[depth]) - childNodes.indexOf(bAncestors[depth]);
500
442
  }
501
443
 
444
+ /**
445
+ * 最近的祖先节点
446
+ * @param {string} selector
447
+ */
502
448
  closest(selector = '') {
503
- let {parentElement} = this;
504
- while (parentElement) {
505
- if (parentElement.matches(selector)) {
506
- return parentElement;
449
+ let {parentNode} = this;
450
+ while (parentNode) {
451
+ if (parentNode.matches(selector)) {
452
+ return parentNode;
507
453
  }
508
- ({parentElement} = parentElement);
454
+ ({parentNode} = parentNode);
509
455
  }
456
+ return undefined;
510
457
  }
511
458
 
512
459
  /**
460
+ * 符合选择器的第一个后代节点
461
+ * @param {string} selector
513
462
  * @returns {this|undefined}
514
463
  * @complexity `n`
515
464
  */
@@ -523,9 +472,14 @@ class AstElement extends AstNode {
523
472
  return descendant;
524
473
  }
525
474
  }
475
+ return undefined;
526
476
  }
527
477
 
528
- /** @complexity `n` */
478
+ /**
479
+ * 符合选择器的所有后代节点
480
+ * @param {string} selector
481
+ * @complexity `n`
482
+ */
529
483
  querySelectorAll(selector = '') {
530
484
  const /** @type {this[]} */ descendants = [];
531
485
  for (const child of this.children) {
@@ -538,59 +492,93 @@ class AstElement extends AstNode {
538
492
  }
539
493
 
540
494
  /**
541
- * @param {number} index
495
+ * 类选择器
496
+ * @param {string} className 类名之一
497
+ */
498
+ getElementsByClassName(className) {
499
+ return typeof className === 'string'
500
+ ? this.querySelectorAll(`[className~="${className.replaceAll(/(?<!\\)"/gu, '\\"')}"]`)
501
+ : this.typeError('getElementsByClassName', 'String');
502
+ }
503
+
504
+ /**
505
+ * 标签名选择器
506
+ * @param {string} name 标签名
507
+ */
508
+ getElementsByTagName(name) {
509
+ if (typeof name === 'string') {
510
+ name = name.replaceAll(/(?<!\\)"/gu, '\\"');
511
+ return this.querySelectorAll(`ext[name="${name}"], html[name="${name}"]`);
512
+ }
513
+ return this.typeError('getElementsByTagName', 'String');
514
+ }
515
+
516
+ /**
517
+ * 获取某一行的wikitext
518
+ * @param {number} n 行号
519
+ */
520
+ getLine(n) {
521
+ return String(this).split('\n', n + 1).at(-1);
522
+ }
523
+
524
+ /**
525
+ * 将字符位置转换为行列号
526
+ * @param {number} index 字符位置
542
527
  * @complexity `n`
543
528
  */
544
529
  posFromIndex(index) {
545
530
  if (typeof index !== 'number') {
546
531
  this.typeError('posFromIndex', 'Number');
547
532
  }
548
- const text = this.toString();
549
- if (index < -text.length || index >= text.length || !Number.isInteger(index)) {
550
- return;
533
+ const text = String(this);
534
+ if (index >= -text.length && index < text.length && Number.isInteger(index)) {
535
+ const lines = text.slice(0, index).split('\n');
536
+ return {top: lines.length - 1, left: lines.at(-1).length};
551
537
  }
552
- const lines = text.slice(0, index).split('\n');
553
- return {top: lines.length - 1, left: lines.at(-1).length};
538
+ return undefined;
554
539
  }
555
540
 
556
541
  /**
557
- * @param {number} top
558
- * @param {number} left
542
+ * 将行列号转换为字符位置
543
+ * @param {number} top 行号
544
+ * @param {number} left 列号
559
545
  * @complexity `n`
560
546
  */
561
547
  indexFromPos(top, left) {
562
548
  if (typeof top !== 'number' || typeof left !== 'number') {
563
549
  this.typeError('indexFromPos', 'Number');
564
- } else if (top < 0 || left < 0 || !Number.isInteger(top) || !Number.isInteger(left)) {
565
- return;
566
550
  }
567
- const lines = this.toString().split('\n');
568
- if (lines.length < top + 1 || lines[top].length < left) {
569
- return;
570
- }
571
- return lines.slice(0, top).reduce((acc, curLine) => acc + curLine.length + 1, 0) + left;
551
+ const lines = String(this).split('\n');
552
+ return top >= 0 && left >= 0 && Number.isInteger(top) && Number.isInteger(left)
553
+ && lines.length >= top + 1 && lines[top].length >= left
554
+ ? lines.slice(0, top).reduce((acc, curLine) => acc + curLine.length + 1, 0) + left
555
+ : undefined;
572
556
  }
573
557
 
574
- /** @complexity `n` */
558
+ /**
559
+ * 获取行数和最后一行的列数
560
+ * @complexity `n`
561
+ */
575
562
  #getDimension() {
576
- const lines = this.toString().split('\n');
563
+ const lines = String(this).split('\n');
577
564
  return {height: lines.length, width: lines.at(-1).length};
578
565
  }
579
566
 
580
567
  /**
581
- * 获取当前节点的相对位置,或其第`j`个子节点的相对位置
582
- * @param {number|undefined} j
568
+ * 获取当前节点的相对字符位置,或其第`j`个子节点的相对字符位置
569
+ * @param {number|undefined} j 子节点序号
583
570
  * @complexity `n`
584
571
  */
585
572
  getRelativeIndex(j) {
586
573
  if (j !== undefined && typeof j !== 'number') {
587
574
  this.typeError('getRelativeIndex', 'Number');
588
575
  }
589
- let /** @type {(string|this)[]} */ childNodes;
576
+ let /** @type {this[]} */ childNodes;
577
+
590
578
  /**
591
- * 使用前需要先给`childNodes`赋值
592
- * @param {number} end
593
- * @param {this} parent
579
+ * 获取子节点相对于父节点的字符位置,使用前需要先给`childNodes`赋值
580
+ * @param {number} end 子节点序号
581
+ * @param {this} parent 父节点
594
582
  * @returns {number}
595
583
  */
596
584
  const getIndex = (end, parent) => childNodes.slice(0, end).reduce(
@@ -598,14 +586,14 @@ class AstElement extends AstNode {
598
586
  0,
599
587
  ) + parent.getPadding();
600
588
  if (j === undefined) {
601
- const {parentElement} = this;
602
- if (!parentElement) {
589
+ const {parentNode} = this;
590
+ if (!parentNode) {
603
591
  return 0;
604
592
  }
605
- ({childNodes} = parentElement);
606
- return getIndex(childNodes.indexOf(this), parentElement);
593
+ ({childNodes} = parentNode);
594
+ return getIndex(childNodes.indexOf(this), parentNode);
607
595
  }
608
- this.verifyChild(j, 1);
596
+ this.getAttribute('verifyChild')(j, 1);
609
597
  ({childNodes} = this);
610
598
  return getIndex(j, this);
611
599
  }
@@ -616,74 +604,97 @@ class AstElement extends AstNode {
616
604
  * @complexity `n`
617
605
  */
618
606
  getAbsoluteIndex() {
619
- const {parentElement} = this;
620
- return parentElement ? parentElement.getAbsoluteIndex() + this.getRelativeIndex() : 0;
607
+ const {parentNode} = this;
608
+ return parentNode ? parentNode.getAbsoluteIndex() + this.getRelativeIndex() : 0;
621
609
  }
622
610
 
623
611
  /**
624
612
  * 获取当前节点的相对位置,或其第`j`个子节点的相对位置
625
- * @param {number|undefined} j
613
+ * @param {number|undefined} j 子节点序号
626
614
  * @complexity `n`
627
615
  */
628
616
  #getPosition(j) {
629
- if (j === undefined) {
630
- const {parentElement} = this;
631
- if (!parentElement) {
632
- return {top: 0, left: 0};
633
- }
634
- return parentElement.posFromIndex(this.getRelativeIndex());
635
- }
636
- return this.posFromIndex(this.getRelativeIndex(j));
617
+ return j === undefined
618
+ ? this.parentNode?.posFromIndex(this.getRelativeIndex()) ?? {top: 0, left: 0}
619
+ : this.posFromIndex(this.getRelativeIndex(j));
637
620
  }
638
621
 
639
- /** @complexity `n` */
622
+ /**
623
+ * 获取当前节点的行列位置和大小
624
+ * @complexity `n`
625
+ */
640
626
  getBoundingClientRect() {
641
627
  const root = this.getRootNode();
642
628
  return {...this.#getDimension(), ...root.posFromIndex(this.getAbsoluteIndex())};
643
629
  }
644
630
 
645
- /** @complexity `n` */
631
+ /** 第一个子节点前的间距 */
632
+ getPadding() {
633
+ return 0;
634
+ }
635
+
636
+ /** 子节点间距 */
637
+ getGaps() {
638
+ return 0;
639
+ }
640
+
641
+ /**
642
+ * 行数
643
+ * @complexity `n`
644
+ */
646
645
  get offsetHeight() {
647
646
  return this.#getDimension().height;
648
647
  }
649
- /** @complexity `n` */
648
+
649
+ /**
650
+ * 最后一行的列数
651
+ * @complexity `n`
652
+ */
650
653
  get offsetWidth() {
651
654
  return this.#getDimension().width;
652
655
  }
653
- /** @complexity `n` */
656
+
657
+ /**
658
+ * 行号
659
+ * @complexity `n`
660
+ */
654
661
  get offsetTop() {
655
662
  return this.#getPosition().top;
656
663
  }
657
- /** @complexity `n` */
664
+
665
+ /**
666
+ * 列号
667
+ * @complexity `n`
668
+ */
658
669
  get offsetLeft() {
659
670
  return this.#getPosition().left;
660
671
  }
661
672
 
662
- getPadding() {
663
- return 0;
664
- }
665
-
666
- getGaps() {
667
- return 0;
668
- }
669
-
670
- /** @complexity `n` */
673
+ /**
674
+ * 位置、大小和padding
675
+ * @complexity `n`
676
+ */
671
677
  get style() {
672
678
  return {...this.#getPosition(), ...this.#getDimension(), padding: this.getPadding()};
673
679
  }
674
680
 
675
- /**
676
- * @returns {boolean}
677
- * @complexity `n`
678
- */
679
- get eof() {
680
- return this.type === 'root' || !this.nextSibling && this.parentElement.eof;
681
+ /** 内部高度 */
682
+ get clientHeight() {
683
+ const {innerText} = this;
684
+ return typeof innerText === 'string' ? innerText.split('\n').length : undefined;
685
+ }
686
+
687
+ /** 内部宽度 */
688
+ get clientWidth() {
689
+ const {innerText} = this;
690
+ return typeof innerText === 'string' ? innerText.split('\n').at(-1).length : undefined;
681
691
  }
682
692
 
683
693
  /**
694
+ * 输出AST
684
695
  * @template {'markup'|'json'} T
685
- * @param {T} format
686
- * @param {T extends 'markup' ? number : string} depth
696
+ * @param {T} format 输出格式
697
+ * @param {T extends 'markup' ? number : string} depth 输出深度
687
698
  * @returns {T extends 'markup' ? void : Record<string, any>}
688
699
  */
689
700
  print(format = 'markup', depth = 0) {
@@ -691,7 +702,7 @@ class AstElement extends AstNode {
691
702
  const {childNodes, ...prop} = this,
692
703
  json = {
693
704
  ...prop,
694
- childNodes: childNodes.map(child => typeof child === 'string' ? child : child.print('json')),
705
+ childNodes: childNodes.map(child => child.type === 'text' ? String(child) : child.print('json')),
695
706
  };
696
707
  if (typeof depth === 'string') {
697
708
  fs.writeFileSync(
@@ -704,25 +715,26 @@ class AstElement extends AstNode {
704
715
  this.typeError('print', 'Number');
705
716
  }
706
717
  const indent = ' '.repeat(depth),
707
- str = this.toString(),
718
+ str = String(this),
708
719
  {childNodes, type, firstChild} = this,
709
720
  {length} = childNodes;
710
- if (!str || length === 0 || typeof firstChild === 'string' && firstChild === str) {
711
- console.log(`${indent}\x1b[32m<%s>\x1b[0m${noWrap(str)}\x1b[32m</%s>\x1b[0m`, type, type);
712
- return;
721
+ if (!str || length === 0 || firstChild.type === 'text' && String(firstChild) === str) {
722
+ console.log(`${indent}\x1B[32m<%s>\x1B[0m${noWrap(str)}\x1B[32m</%s>\x1B[0m`, type, type);
723
+ return undefined;
713
724
  }
714
725
  Parser.info(`${indent}<${type}>`);
715
726
  let i = this.getPadding();
716
727
  if (i) {
717
728
  console.log(`${indent} ${noWrap(str.slice(0, i))}`);
718
729
  }
719
- for (const [j, child] of childNodes.entries()) {
720
- const childStr = String(child),
730
+ for (let j = 0; j < length; j++) {
731
+ const child = childNodes[j],
732
+ childStr = String(child),
721
733
  gap = j === length - 1 ? 0 : this.getGaps(j);
722
734
  if (!childStr) {
723
735
  // pass
724
- } else if (typeof child === 'string') {
725
- console.log(`${indent} ${noWrap(child)}`);
736
+ } else if (child.type === 'text') {
737
+ console.log(`${indent} ${noWrap(String(child))}`);
726
738
  } else {
727
739
  child.print('markup', depth + 1);
728
740
  }
@@ -735,11 +747,7 @@ class AstElement extends AstNode {
735
747
  console.log(`${indent} ${noWrap(str.slice(i))}`);
736
748
  }
737
749
  Parser.info(`${indent}</${type}>`);
738
- }
739
-
740
- /** @param {number} n */
741
- getLine(n) {
742
- return this.toString().split('\n', n + 1).at(-1);
750
+ return undefined;
743
751
  }
744
752
  }
745
753