wikiparser-node 0.0.3 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -76,6 +76,7 @@ const /** @type {Parser} */ Parser = {
76
76
  ['QuoteToken'],
77
77
  ['ExtLinkToken'],
78
78
  ['MagicLinkToken'],
79
+ ['ListToken', 'DdToken'],
79
80
  ],
80
81
 
81
82
  config: './config/default',
package/mixin/sol.js ADDED
@@ -0,0 +1,38 @@
1
+ 'use strict';
2
+
3
+ const /** @type {Parser} */ Parser = require('..'),
4
+ Token = require('../src'); // eslint-disable-line no-unused-vars
5
+
6
+ /**
7
+ * @template T
8
+ * @param {T} constructor
9
+ * @returns {T}
10
+ */
11
+ const sol = constructor => class extends constructor {
12
+ /** @this {Token} */
13
+ prependNewLine() {
14
+ const {previousVisibleSibling} = this;
15
+ return previousVisibleSibling && !String(previousVisibleSibling).endsWith('\n') ? '\n' : '';
16
+ }
17
+
18
+ /** @this {Token} */
19
+ appendNewLine() {
20
+ const {nextVisibleSibling} = this;
21
+ return nextVisibleSibling && !String(nextVisibleSibling).startsWith('\n') ? '\n' : '';
22
+ }
23
+
24
+ toString(ownLine = false) {
25
+ return `${this.prependNewLine()}${super.toString()}${ownLine ? this.appendNewLine() : ''}`;
26
+ }
27
+
28
+ getPadding() {
29
+ return this.prependNewLine().length;
30
+ }
31
+
32
+ text(ownLine = false) {
33
+ return `${this.prependNewLine()}${super.text()}${ownLine ? this.appendNewLine() : ''}`;
34
+ }
35
+ };
36
+
37
+ Parser.mixins.sol = __filename;
38
+ module.exports = sol;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "wikiparser-node",
3
- "version": "0.0.3",
3
+ "version": "0.1.0",
4
4
  "description": "A Node.js parser for MediaWiki markup with AST",
5
5
  "keywords": [
6
6
  "mediawiki",
package/parser/list.js ADDED
@@ -0,0 +1,58 @@
1
+ 'use strict';
2
+
3
+ const /** @type {Parser} */ Parser = require('..');
4
+
5
+ /**
6
+ * @param {string} text
7
+ * @param {accum} accum
8
+ */
9
+ const parseList = (text, config = Parser.getConfig(), accum = []) => {
10
+ const mt = text.match(/^(?:[;:*#]|\x00\d+c\x7f)*[;:*#]/);
11
+ if (!mt) {
12
+ return text;
13
+ }
14
+ const ListToken = require('../src/nowiki/list'),
15
+ [prefix] = mt;
16
+ text = `\x00${accum.length}d\x7f${text.slice(prefix.length)}`;
17
+ new ListToken(prefix, config, accum);
18
+ let dt = prefix.split(';').length - 1;
19
+ if (!dt) {
20
+ return text;
21
+ }
22
+ const DdToken = require('../src/nowiki/dd');
23
+ let regex = /:+|-{/g,
24
+ ex = regex.exec(text),
25
+ lc = 0;
26
+ while (ex && dt) {
27
+ const {0: syntax, index} = ex;
28
+ if (syntax[0] === ':') {
29
+ if (syntax.length >= dt) {
30
+ new DdToken(':'.repeat(dt), config, accum);
31
+ return `${text.slice(0, index)}\x00${accum.length - 1}d\x7f${text.slice(index + dt)}`;
32
+ }
33
+ text = `${text.slice(0, index)}\x00${accum.length}d\x7f${text.slice(regex.lastIndex)}`;
34
+ dt -= syntax.length;
35
+ regex.lastIndex = index + 4 + String(accum.length).length;
36
+ new DdToken(syntax, config, accum);
37
+ } else if (syntax === '-{') {
38
+ if (!lc) {
39
+ const {lastIndex} = regex;
40
+ regex = /-{|}-/g;
41
+ regex.lastIndex = lastIndex;
42
+ }
43
+ lc++;
44
+ } else {
45
+ lc--;
46
+ if (!lc) {
47
+ const {lastIndex} = regex;
48
+ regex = /:+|-{/g;
49
+ regex.lastIndex = lastIndex;
50
+ }
51
+ }
52
+ ex = regex.exec(text);
53
+ }
54
+ return text;
55
+ };
56
+
57
+ Parser.parsers.parseList = __filename;
58
+ module.exports = parseList;
package/src/attribute.js CHANGED
@@ -28,14 +28,13 @@ class AttributeToken extends Token {
28
28
  ) {
29
29
  equal = '{{=}}';
30
30
  }
31
- const str = [...this.#attr].map(([k, v]) => {
31
+ return [...this.#attr].map(([k, v]) => {
32
32
  if (v === true) {
33
33
  return k;
34
34
  }
35
35
  const quote = v.includes('"') ? "'" : '"';
36
36
  return `${k}${equal}${quote}${v}${quote}`;
37
37
  }).join(' ');
38
- return str && ` ${str}`;
39
38
  }
40
39
 
41
40
  /** @complexity `n` */
@@ -56,17 +55,17 @@ class AttributeToken extends Token {
56
55
  */
57
56
  #parseAttr() {
58
57
  this.#attr.clear();
59
- const config = this.getAttribute('config'),
60
- include = this.getAttribute('include'),
61
- /** @type {Token & {firstChild: string}} */ token = this.type !== 'ext-attr' && !Parser.running
62
- ? Parser.run(() => new Token(string, config).parseOnce(0, include).parseOnce())
63
- : undefined,
64
- string = removeComment(token?.firstChild ?? this.toString()).replace(/\x00\d+~\x7f/g, '=');
65
- const build = /** @param {string|boolean} str */ str => {
66
- return typeof str === 'boolean' || !(token instanceof Token)
67
- ? str
68
- : token.buildFromStr(str).map(String).join('');
69
- };
58
+ let string = this.toString(),
59
+ /** @type {Token & {firstChild: string}} */ token;
60
+ if (this.type !== 'ext-attr' && !Parser.running) {
61
+ const config = this.getAttribute('config'),
62
+ include = this.getAttribute('include');
63
+ token = Parser.run(() => new Token(string, config).parseOnce(0, include).parseOnce());
64
+ string = token.firstChild;
65
+ }
66
+ string = removeComment(string).replace(/\x00\d+~\x7f/g, '=');
67
+ const build = /** @param {string|boolean} str */ str =>
68
+ typeof str === 'boolean' || !token ? str : token.buildFromStr(str).map(String).join('');
70
69
  for (const [, key,, quoted, unquoted] of string
71
70
  .matchAll(/([^\s/][^\s/=]*)(?:\s*=\s*(?:(["'])(.*?)(?:\2|$)|(\S*)))?/sg)
72
71
  ) {
@@ -241,13 +240,24 @@ class AttributeToken extends Token {
241
240
  this.setAttr(key, force === true || force === undefined && value === false);
242
241
  }
243
242
 
243
+ #leadingSpace(str = super.toString()) {
244
+ return str && !/^\s/.test(str) ? ' ' : '';
245
+ }
246
+
244
247
  toString() {
245
- const str = super.toString();
248
+ let str = super.toString();
249
+ str = `${this.#leadingSpace(str)}${str}`;
246
250
  return this.type === 'table-attr' ? str.replaceAll('\n', ' ') : str;
247
251
  }
248
252
 
253
+ getPadding() {
254
+ return this.#leadingSpace().length;
255
+ }
256
+
249
257
  text() {
250
- return this.#updateFromAttr();
258
+ let str = this.#updateFromAttr();
259
+ str = `${this.#leadingSpace(str)}${str}`;
260
+ return this.type === 'table-attr' ? str.replaceAll('\n', ' ') : str;
251
261
  }
252
262
 
253
263
  /** @returns {[number, string][]} */
package/src/extLink.js CHANGED
@@ -49,15 +49,27 @@ class ExtLinkToken extends Token {
49
49
  }
50
50
  }
51
51
 
52
+ #correct() {
53
+ if (!this.#space
54
+ // 都替换成`<`肯定不对,但无妨
55
+ && /^[^[\]<>"\x00-\x20\x7f\p{Zs}\ufffd]/u.test(this.children[1]?.text().replace(/&[lg]t;/, '<'))
56
+ ) {
57
+ this.#space = ' ';
58
+ }
59
+ }
60
+
52
61
  toString() {
62
+ this.#correct();
53
63
  return `[${this.firstElementChild.toString()}${this.#space}${this.children[1]?.toString() ?? ''}]`;
54
64
  }
55
65
 
56
66
  getPadding() {
67
+ this.#correct();
57
68
  return 1;
58
69
  }
59
70
 
60
71
  getGaps() {
72
+ this.#correct();
61
73
  return this.#space.length;
62
74
  }
63
75
 
package/src/heading.js CHANGED
@@ -1,6 +1,7 @@
1
1
  'use strict';
2
2
 
3
3
  const fixedToken = require('../mixin/fixedToken'),
4
+ sol = require('../mixin/sol'),
4
5
  /** @type {Parser} */ Parser = require('..'),
5
6
  Token = require('.');
6
7
 
@@ -8,7 +9,7 @@ const fixedToken = require('../mixin/fixedToken'),
8
9
  * 章节标题
9
10
  * @classdesc `{childNodes: [Token, HiddenToken]}`
10
11
  */
11
- class HeadingToken extends fixedToken(Token) {
12
+ class HeadingToken extends fixedToken(sol(Token)) {
12
13
  type = 'heading';
13
14
 
14
15
  /**
@@ -37,34 +38,29 @@ class HeadingToken extends fixedToken(Token) {
37
38
  return token;
38
39
  }
39
40
 
41
+ /** @this {HeadingToken & {prependNewLine(): ''|'\n', appendNewLine(): ''|'\n'}} */
40
42
  toString() {
41
- const equals = '='.repeat(Number(this.name)),
42
- {previousVisibleSibling, nextVisibleSibling} = this;
43
- return `${
44
- typeof previousVisibleSibling === 'string' && !previousVisibleSibling.endsWith('\n')
45
- || previousVisibleSibling instanceof Token
46
- ? '\n'
47
- : ''
48
- }${equals}${super.toString(equals)}${
49
- typeof nextVisibleSibling === 'string' && !nextVisibleSibling.startsWith('\n')
50
- || nextVisibleSibling instanceof Token
51
- ? '\n'
52
- : ''
53
- }`;
43
+ const equals = '='.repeat(Number(this.name));
44
+ return `${this.prependNewLine()}${equals}${
45
+ this.firstElementChild.toString()
46
+ }${equals}${this.lastElementChild.toString()}${this.appendNewLine()}`;
54
47
  }
55
48
 
56
49
  getPadding() {
57
- return Number(this.name);
50
+ return super.getPadding() + Number(this.name);
58
51
  }
59
52
 
60
53
  getGaps() {
61
54
  return Number(this.name);
62
55
  }
63
56
 
64
- /** @returns {string} */
57
+ /**
58
+ * @this {HeadingToken & {prependNewLine(): ''|'\n', appendNewLine(): ''|'\n'}}
59
+ * @returns {string}
60
+ */
65
61
  text() {
66
62
  const equals = '='.repeat(Number(this.name));
67
- return `${equals}${this.firstElementChild.text()}${equals}`;
63
+ return `${this.prependNewLine()}${equals}${this.firstElementChild.text()}${equals}${this.appendNewLine()}`;
68
64
  }
69
65
 
70
66
  /** @returns {[number, string][]} */
package/src/index.js CHANGED
@@ -37,6 +37,7 @@
37
37
  * l: LinkToken
38
38
  * q: QuoteToken
39
39
  * w: ExtLinkToken
40
+ * d: ListToken
40
41
  */
41
42
 
42
43
  const {externalUse} = require('../util/debug'),
@@ -436,11 +437,7 @@ class Token extends AstElement {
436
437
  this.#parseLinks();
437
438
  break;
438
439
  case 6: {
439
- const lines = this.firstChild.split('\n');
440
- for (let i = 0; i < lines.length; i++) {
441
- lines[i] = this.#parseQuotes(lines[i]);
442
- }
443
- this.setText(lines.join('\n'));
440
+ this.#parseQuotes();
444
441
  break;
445
442
  }
446
443
  case 7:
@@ -450,6 +447,7 @@ class Token extends AstElement {
450
447
  this.#parseMagicLinks();
451
448
  break;
452
449
  case 9:
450
+ this.#parseList();
453
451
  break;
454
452
  case 10:
455
453
  // no default
@@ -579,10 +577,14 @@ class Token extends AstElement {
579
577
  this.setText(parseLinks(this.firstChild, this.#config, this.#accum));
580
578
  }
581
579
 
582
- /** @param {string} text */
583
- #parseQuotes(text) {
584
- const parseQuotes = require('../parser/quotes');
585
- return parseQuotes(text, this.#config, this.#accum);
580
+ /** @this {Token & {firstChild: string}} */
581
+ #parseQuotes() {
582
+ const parseQuotes = require('../parser/quotes'),
583
+ lines = this.firstChild.split('\n');
584
+ for (let i = 0; i < lines.length; i++) {
585
+ lines[i] = parseQuotes(lines[i], this.#config, this.#accum);
586
+ }
587
+ this.setText(lines.join('\n'));
586
588
  }
587
589
 
588
590
  /** @this {Token & {firstChild: string}} */
@@ -596,6 +598,15 @@ class Token extends AstElement {
596
598
  const parseMagicLinks = require('../parser/magicLinks');
597
599
  this.setText(parseMagicLinks(this.firstChild, this.#config, this.#accum));
598
600
  }
601
+
602
+ #parseList() {
603
+ const parseList = require('../parser/list'),
604
+ lines = this.firstChild.split('\n');
605
+ for (let i = 0; i < lines.length; i++) {
606
+ lines[i] = parseList(lines[i], this.#config, this.#accum);
607
+ }
608
+ this.setText(lines.join('\n'));
609
+ }
599
610
  }
600
611
 
601
612
  Parser.classes.Token = __filename;
@@ -0,0 +1,49 @@
1
+ 'use strict';
2
+
3
+ const /** @type {Parser} */ Parser = require('../..'),
4
+ NowikiToken = require('.');
5
+
6
+ /**
7
+ * :
8
+ * @classdesc `{childNodes: [string]}`
9
+ */
10
+ class DdToken extends NowikiToken {
11
+ type = 'dd';
12
+ dt;
13
+ ul;
14
+ ol;
15
+ indent;
16
+
17
+ /** @param {string} str */
18
+ #update(str) {
19
+ this.setAttribute('ul', str.includes('*')).setAttribute('ol', str.includes('#'))
20
+ .setAttribute('dt', str.includes(';')).setAttribute('indent', str.split(':').length - 1);
21
+ }
22
+
23
+ /**
24
+ * @param {string} str
25
+ * @param {accum} accum
26
+ */
27
+ constructor(str, config = Parser.getConfig(), accum = []) {
28
+ super(str, config, accum);
29
+ this.seal(['dt', 'ul', 'ol', 'indent']).#update(str);
30
+ }
31
+
32
+ /** @returns {[number, string][]} */
33
+ plain() {
34
+ return [];
35
+ }
36
+
37
+ /** @param {string} str */
38
+ setText(str) {
39
+ const src = this.type === 'dd' ? ':' : ';:*#';
40
+ if (new RegExp(`[^${src}]`).test(str)) {
41
+ throw new RangeError(`${this.constructor.name} 仅能包含${src.split('').map(c => `"${c}"`).join('、')}!`);
42
+ }
43
+ this.#update(str);
44
+ return super.setText(str);
45
+ }
46
+ }
47
+
48
+ Parser.classes.DdToken = __filename;
49
+ module.exports = DdToken;
package/src/nowiki/hr.js CHANGED
@@ -1,13 +1,14 @@
1
1
  'use strict';
2
2
 
3
- const /** @type {Parser} */ Parser = require('../..'),
3
+ const sol = require('../../mixin/sol'),
4
+ /** @type {Parser} */ Parser = require('../..'),
4
5
  NowikiToken = require('.');
5
6
 
6
7
  /**
7
8
  * `<hr>`
8
9
  * @classdesc `{childNodes: [string]}`
9
10
  */
10
- class HrToken extends NowikiToken {
11
+ class HrToken extends sol(NowikiToken) {
11
12
  type = 'hr';
12
13
 
13
14
  /**
@@ -0,0 +1,16 @@
1
+ 'use strict';
2
+
3
+ const sol = require('../../mixin/sol'),
4
+ /** @type {Parser} */ Parser = require('../..'),
5
+ DdToken = require('./dd');
6
+
7
+ /**
8
+ * ;:*#
9
+ * @classdesc `{childNodes: [string]}`
10
+ */
11
+ class ListToken extends sol(DdToken) {
12
+ type = 'list';
13
+ }
14
+
15
+ Parser.classes.ListToken = __filename;
16
+ module.exports = ListToken;
@@ -70,15 +70,15 @@ class TableToken extends TrToken {
70
70
  }
71
71
 
72
72
  /**
73
- * @template {string|Token} T
73
+ * @template {TrToken|SyntaxToken} T
74
74
  * @param {T} token
75
75
  * @returns {T}
76
76
  * @complexity `n`
77
77
  */
78
78
  insertAt(token, i = this.childNodes.length) {
79
- const previous = this.childNodes.at(i - 1),
79
+ const previous = this.children.at(i - 1),
80
80
  {closingPattern} = TableToken;
81
- if (token instanceof TrToken && token.type === 'td' && previous instanceof TrToken && previous.type === 'tr') {
81
+ if (token.type === 'td' && previous.type === 'tr') {
82
82
  Parser.warn('改为将单元格插入当前行。');
83
83
  return previous.appendChild(token);
84
84
  } else if (!Parser.running && i === this.childNodes.length && token instanceof SyntaxToken
@@ -417,14 +417,14 @@ class TableToken extends TrToken {
417
417
  /** @complexity `n` */
418
418
  #prependTableRow() {
419
419
  const row = Parser.run(() => new TrToken('\n|-', undefined, this.getAttribute('config'))),
420
- {childNodes} = this,
421
- [,, plain] = childNodes,
422
- start = typeof plain === 'string' || plain.isPlain() ? 3 : 2,
423
- /** @type {TdToken[]} */ children = childNodes.slice(start),
424
- index = children.findIndex(({type}) => type !== 'td');
420
+ {children} = this,
421
+ [,, plain] = children,
422
+ start = plain?.isPlain() ? 3 : 2,
423
+ /** @type {TdToken[]} */ tdChildren = children.slice(start),
424
+ index = tdChildren.findIndex(({type}) => type !== 'td');
425
425
  this.insertAt(row, index === -1 ? -1 : index + start);
426
426
  Parser.run(() => {
427
- for (const cell of children.slice(0, index === -1 ? undefined : index)) {
427
+ for (const cell of tdChildren.slice(0, index === -1 ? undefined : index)) {
428
428
  if (cell.subtype !== 'caption') {
429
429
  row.appendChild(cell);
430
430
  }
package/src/table/tr.js CHANGED
@@ -48,15 +48,13 @@ class TrToken extends attributeParent(Token, 1) {
48
48
  }
49
49
 
50
50
  #correct() {
51
- const [,, child] = this.childNodes;
52
- if (typeof child === 'string' && !child.startsWith('\n')) {
53
- this.setText(`\n${child}`, 2);
54
- } else if (typeof child !== 'string' && child?.isPlain()) {
51
+ const [,, child] = this.children;
52
+ if (child?.isPlain()) {
55
53
  const {firstChild} = child;
56
54
  if (typeof firstChild !== 'string') {
57
55
  child.prepend('\n');
58
56
  } else if (!firstChild.startsWith('\n')) {
59
- child.setText(`\n${firstChild}`, 0);
57
+ child.setText(`\n${firstChild}`);
60
58
  }
61
59
  }
62
60
  }
package/typings/node.d.ts CHANGED
@@ -7,7 +7,7 @@ declare global {
7
7
  T extends 'childNodes' ? (string|Token)[] :
8
8
  T extends 'parentNode' ? Token|undefined :
9
9
  T extends 'optional'|'tags' ? string[] :
10
- T extends 'stage' ? number :
10
+ T extends 'stage'|'indent' ? number :
11
11
  T extends 'config' ? ParserConfig :
12
12
  T extends 'accum' ? accum :
13
13
  T extends 'acceptable' ? Record<string, Ranges> :
@@ -15,7 +15,7 @@ declare global {
15
15
  T extends 'keys' ? Set<string> :
16
16
  T extends 'args' ? Record<string, Set<ParameterToken>> :
17
17
  T extends 'attr' ? Map<string, string|true> :
18
- T extends 'include'|'selfLink' ? boolean :
18
+ T extends 'include'|'selfLink'|'ul'|'ol'|'dt' ? boolean :
19
19
  T extends 'pattern' ? RegExp :
20
20
  string;
21
21
  }
package/src/listToken.js DELETED
@@ -1,47 +0,0 @@
1
- 'use strict';
2
- const Token = require('.'),
3
- AtomToken = require('./atom'),
4
- {fixToken} = require('./util');
5
-
6
- class ListToken extends fixToken(Token) {
7
- type = 'list';
8
-
9
- /**
10
- * @param {string} syntax
11
- * @param {?string|number|Token|(string|Token)[]} content
12
- * @param {Object<string, any>} config
13
- * @param {Token} parent
14
- * @param {Token[]} accum
15
- */
16
- constructor(syntax, content, config = require(Token.config), parent = null, accum = [], isTable = false) {
17
- if (/[^:;#*]/.test(syntax)) {
18
- throw new RangeError('List语法只接受":"、";"、"#"或"*"!');
19
- }
20
- super(new AtomToken(syntax, 'list-syntax'), config, true, parent, accum, ['AtomToken', 'Token']);
21
- const inner = new Token(content, config, true, this, accum);
22
- inner.type = 'list-inner';
23
- inner.set('stage', isTable ? 4 : 10);
24
- this.lists = new Set(syntax.split(''));
25
- this.seal();
26
- }
27
-
28
- isDt() {
29
- return this.$children[0].contains(';');
30
- }
31
-
32
- idDd() {
33
- return this.$children[0].contains(':');
34
- }
35
-
36
- isOl() {
37
- return this.$children[0].contains('#');
38
- }
39
-
40
- isUl() {
41
- return this.$children[0].contains('*');
42
- }
43
- }
44
-
45
- Token.classes.ListToken = ListToken;
46
-
47
- module.exports = ListToken;