wikiparser-node 0.4.0 → 0.6.2-b
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bundle/bundle.min.js +34 -0
- package/package.json +12 -5
- package/.eslintrc.json +0 -714
- package/README.md +0 -39
- package/config/default.json +0 -769
- package/config/llwiki.json +0 -630
- package/config/moegirl.json +0 -727
- package/config/zhwiki.json +0 -1269
- package/errors/README +0 -1
- package/index.js +0 -245
- package/jsconfig.json +0 -7
- package/lib/element.js +0 -755
- package/lib/node.js +0 -585
- package/lib/ranges.js +0 -131
- package/lib/text.js +0 -146
- package/lib/title.js +0 -69
- package/mixin/attributeParent.js +0 -113
- package/mixin/fixedToken.js +0 -40
- package/mixin/hidden.js +0 -19
- package/mixin/sol.js +0 -59
- package/parser/brackets.js +0 -112
- package/parser/commentAndExt.js +0 -63
- package/parser/converter.js +0 -45
- package/parser/externalLinks.js +0 -31
- package/parser/hrAndDoubleUnderscore.js +0 -35
- package/parser/html.js +0 -42
- package/parser/links.js +0 -98
- package/parser/list.js +0 -59
- package/parser/magicLinks.js +0 -41
- package/parser/quotes.js +0 -64
- package/parser/selector.js +0 -175
- package/parser/table.js +0 -112
- package/printed/README +0 -1
- package/printed/example.json +0 -120
- package/src/arg.js +0 -169
- package/src/atom/hidden.js +0 -13
- package/src/atom/index.js +0 -41
- package/src/attribute.js +0 -422
- package/src/converter.js +0 -157
- package/src/converterFlags.js +0 -232
- package/src/converterRule.js +0 -253
- package/src/extLink.js +0 -167
- package/src/gallery.js +0 -91
- package/src/heading.js +0 -100
- package/src/html.js +0 -202
- package/src/imageParameter.js +0 -254
- package/src/index.js +0 -737
- package/src/link/category.js +0 -53
- package/src/link/file.js +0 -265
- package/src/link/galleryImage.js +0 -61
- package/src/link/index.js +0 -322
- package/src/magicLink.js +0 -108
- package/src/nowiki/comment.js +0 -57
- package/src/nowiki/dd.js +0 -59
- package/src/nowiki/doubleUnderscore.js +0 -51
- package/src/nowiki/hr.js +0 -41
- package/src/nowiki/index.js +0 -44
- package/src/nowiki/list.js +0 -16
- package/src/nowiki/noinclude.js +0 -28
- package/src/nowiki/quote.js +0 -36
- package/src/onlyinclude.js +0 -54
- package/src/parameter.js +0 -187
- package/src/syntax.js +0 -83
- package/src/table/index.js +0 -967
- package/src/table/td.js +0 -308
- package/src/table/tr.js +0 -282
- package/src/tagPair/ext.js +0 -105
- package/src/tagPair/include.js +0 -50
- package/src/tagPair/index.js +0 -117
- package/src/transclude.js +0 -703
- package/test/api.js +0 -83
- package/test/real.js +0 -133
- package/test/test.js +0 -28
- package/test/util.js +0 -80
- package/tool/index.js +0 -918
- package/typings/api.d.ts +0 -13
- package/typings/array.d.ts +0 -28
- package/typings/event.d.ts +0 -24
- package/typings/index.d.ts +0 -94
- package/typings/node.d.ts +0 -29
- package/typings/parser.d.ts +0 -16
- package/typings/table.d.ts +0 -14
- package/typings/token.d.ts +0 -22
- package/typings/tool.d.ts +0 -11
- package/util/debug.js +0 -73
- package/util/string.js +0 -88
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
'use strict';
|
|
2
|
-
|
|
3
|
-
const Parser = require('..'),
|
|
4
|
-
AstText = require('../lib/text');
|
|
5
|
-
|
|
6
|
-
/**
|
|
7
|
-
* 解析\<hr\>和状态开关
|
|
8
|
-
* @param {{firstChild: AstText, type: string}} root 根节点
|
|
9
|
-
* @param {accum} accum
|
|
10
|
-
*/
|
|
11
|
-
const parseHrAndDoubleUnderscore = ({firstChild: {data}, type}, config = Parser.getConfig(), accum = []) => {
|
|
12
|
-
const HrToken = require('../src/nowiki/hr'),
|
|
13
|
-
DoubleUnderscoreToken = require('../src/nowiki/doubleUnderscore');
|
|
14
|
-
const {doubleUnderscore} = config;
|
|
15
|
-
if (type !== 'root') {
|
|
16
|
-
data = `\0${data}`;
|
|
17
|
-
}
|
|
18
|
-
data = data.replaceAll(/^((?:\0\d+c\x7F)*)(-{4,})/gmu, (_, lead, m) => {
|
|
19
|
-
new HrToken(m.length, config, accum);
|
|
20
|
-
return `${lead}\0${accum.length - 1}r\x7F`;
|
|
21
|
-
}).replaceAll(
|
|
22
|
-
new RegExp(`__(${doubleUnderscore.flat().join('|')})__`, 'giu'),
|
|
23
|
-
/** @param {string} p1 */ (m, p1) => {
|
|
24
|
-
if (doubleUnderscore[0].includes(p1.toLowerCase()) || doubleUnderscore[1].includes(p1)) {
|
|
25
|
-
new DoubleUnderscoreToken(p1, config, accum);
|
|
26
|
-
return `\0${accum.length - 1}u\x7F`;
|
|
27
|
-
}
|
|
28
|
-
return m;
|
|
29
|
-
},
|
|
30
|
-
);
|
|
31
|
-
return type === 'root' ? data : data.slice(1);
|
|
32
|
-
};
|
|
33
|
-
|
|
34
|
-
Parser.parsers.parseHrAndDoubleUnderscore = __filename;
|
|
35
|
-
module.exports = parseHrAndDoubleUnderscore;
|
package/parser/html.js
DELETED
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
'use strict';
|
|
2
|
-
|
|
3
|
-
const Parser = require('..');
|
|
4
|
-
|
|
5
|
-
/**
|
|
6
|
-
* 解析HTML标签
|
|
7
|
-
* @param {string} wikitext wikitext
|
|
8
|
-
* @param {accum} accum
|
|
9
|
-
*/
|
|
10
|
-
const parseHtml = (wikitext, config = Parser.getConfig(), accum = []) => {
|
|
11
|
-
const regex = /^(\/?)([a-z][^\s/>]*)(\s[^>]*?)?(\/?>)([^<]*)$/iu,
|
|
12
|
-
elements = config.html.flat(),
|
|
13
|
-
bits = wikitext.split('<');
|
|
14
|
-
let text = bits.shift();
|
|
15
|
-
for (const x of bits) {
|
|
16
|
-
const mt = regex.exec(x),
|
|
17
|
-
t = mt?.[2],
|
|
18
|
-
name = t?.toLowerCase();
|
|
19
|
-
if (!mt || !elements.includes(name)) {
|
|
20
|
-
text += `<${x}`;
|
|
21
|
-
continue;
|
|
22
|
-
}
|
|
23
|
-
const AttributeToken = require('../src/attribute');
|
|
24
|
-
const [, slash,, params = '', brace, rest] = mt,
|
|
25
|
-
attr = new AttributeToken(params, 'html-attr', name, config, accum),
|
|
26
|
-
itemprop = attr.getAttr('itemprop');
|
|
27
|
-
if (name === 'meta' && (itemprop === undefined || attr.getAttr('content') === undefined)
|
|
28
|
-
|| name === 'link' && (itemprop === undefined || attr.getAttr('href') === undefined)
|
|
29
|
-
) {
|
|
30
|
-
text += `<${x}`;
|
|
31
|
-
accum.pop();
|
|
32
|
-
continue;
|
|
33
|
-
}
|
|
34
|
-
text += `\0${accum.length}x\x7F${rest}`;
|
|
35
|
-
const HtmlToken = require('../src/html');
|
|
36
|
-
new HtmlToken(t, attr, slash === '/', brace === '/>', config, accum);
|
|
37
|
-
}
|
|
38
|
-
return text;
|
|
39
|
-
};
|
|
40
|
-
|
|
41
|
-
Parser.parsers.parseHtml = __filename;
|
|
42
|
-
module.exports = parseHtml;
|
package/parser/links.js
DELETED
|
@@ -1,98 +0,0 @@
|
|
|
1
|
-
'use strict';
|
|
2
|
-
|
|
3
|
-
const Parser = require('..'),
|
|
4
|
-
Token = require('../src');
|
|
5
|
-
|
|
6
|
-
/**
|
|
7
|
-
* 解析内部链接
|
|
8
|
-
* @param {string} wikitext wikitext
|
|
9
|
-
* @param {accum} accum
|
|
10
|
-
*/
|
|
11
|
-
const parseLinks = (wikitext, config = Parser.getConfig(), accum = []) => {
|
|
12
|
-
const parseQuotes = require('./quotes.js');
|
|
13
|
-
const regex = /^([^\n<>[\]{}|]+)(?:\|(.*?[^\]]))?\]\](.*)$/su,
|
|
14
|
-
regexImg = /^([^\n<>[\]{}|]+)\|(.*)$/su,
|
|
15
|
-
regexExt = new RegExp(`^\\s*(?:${config.protocol})`, 'iu'),
|
|
16
|
-
bits = wikitext.split('[[');
|
|
17
|
-
let s = bits.shift();
|
|
18
|
-
for (let i = 0; i < bits.length; i++) {
|
|
19
|
-
let mightBeImg, link, text, after;
|
|
20
|
-
const x = bits[i],
|
|
21
|
-
m = regex.exec(x);
|
|
22
|
-
if (m) {
|
|
23
|
-
[, link, text, after] = m;
|
|
24
|
-
if (after[0] === ']' && text?.includes('[')) {
|
|
25
|
-
text += ']';
|
|
26
|
-
after = after.slice(1);
|
|
27
|
-
}
|
|
28
|
-
} else {
|
|
29
|
-
const m2 = regexImg.exec(x);
|
|
30
|
-
if (m2) {
|
|
31
|
-
mightBeImg = true;
|
|
32
|
-
[, link, text] = m2;
|
|
33
|
-
}
|
|
34
|
-
}
|
|
35
|
-
if (link === undefined || regexExt.test(link) || /\0\d+[exhbru]\x7F/u.test(link)) {
|
|
36
|
-
s += `[[${x}`;
|
|
37
|
-
continue;
|
|
38
|
-
}
|
|
39
|
-
let page = link;
|
|
40
|
-
if (link.includes('%')) {
|
|
41
|
-
try {
|
|
42
|
-
page = decodeURIComponent(link);
|
|
43
|
-
} catch {}
|
|
44
|
-
}
|
|
45
|
-
const force = link.trim()[0] === ':';
|
|
46
|
-
if (force && mightBeImg) {
|
|
47
|
-
s += `[[${x}`;
|
|
48
|
-
continue;
|
|
49
|
-
}
|
|
50
|
-
const title = Parser.normalizeTitle(page, 0, false, config, true),
|
|
51
|
-
{ns, interwiki, valid} = title;
|
|
52
|
-
if (!valid) {
|
|
53
|
-
s += `[[${x}`;
|
|
54
|
-
continue;
|
|
55
|
-
} else if (mightBeImg) {
|
|
56
|
-
if (interwiki || ns !== 6) {
|
|
57
|
-
s += `[[${x}`;
|
|
58
|
-
continue;
|
|
59
|
-
}
|
|
60
|
-
let found;
|
|
61
|
-
for (i++; i < bits.length; i++) {
|
|
62
|
-
const next = bits[i],
|
|
63
|
-
p = next.split(']]');
|
|
64
|
-
if (p.length > 2) {
|
|
65
|
-
found = true;
|
|
66
|
-
text += `[[${p[0]}]]${p[1]}`;
|
|
67
|
-
after = p.slice(2).join(']]');
|
|
68
|
-
break;
|
|
69
|
-
} else if (p.length === 2) {
|
|
70
|
-
text += `[[${p[0]}]]${p[1]}`;
|
|
71
|
-
} else {
|
|
72
|
-
text += `[[${next}`;
|
|
73
|
-
break;
|
|
74
|
-
}
|
|
75
|
-
}
|
|
76
|
-
text = parseLinks(text, config, accum);
|
|
77
|
-
if (!found) {
|
|
78
|
-
s += `[[${link}|${text}`;
|
|
79
|
-
continue;
|
|
80
|
-
}
|
|
81
|
-
}
|
|
82
|
-
text &&= parseQuotes(text, config, accum);
|
|
83
|
-
s += `\0${accum.length}l\x7F${after}`;
|
|
84
|
-
let LinkToken = require('../src/link');
|
|
85
|
-
if (!force) {
|
|
86
|
-
if (!interwiki && ns === 6) {
|
|
87
|
-
LinkToken = require('../src/link/file');
|
|
88
|
-
} else if (!interwiki && ns === 14) {
|
|
89
|
-
LinkToken = require('../src/link/category');
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
new LinkToken(link, text, title, config, accum);
|
|
93
|
-
}
|
|
94
|
-
return s;
|
|
95
|
-
};
|
|
96
|
-
|
|
97
|
-
Parser.parsers.parseLinks = __filename;
|
|
98
|
-
module.exports = parseLinks;
|
package/parser/list.js
DELETED
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
'use strict';
|
|
2
|
-
|
|
3
|
-
const Parser = require('..');
|
|
4
|
-
|
|
5
|
-
/**
|
|
6
|
-
* 解析列表
|
|
7
|
-
* @param {string} text wikitext
|
|
8
|
-
* @param {accum} accum
|
|
9
|
-
*/
|
|
10
|
-
const parseList = (text, config = Parser.getConfig(), accum = []) => {
|
|
11
|
-
const mt = /^((?:\0\d+c\x7F)*)([;:*#]+)/u.exec(text);
|
|
12
|
-
if (!mt) {
|
|
13
|
-
return text;
|
|
14
|
-
}
|
|
15
|
-
const ListToken = require('../src/nowiki/list');
|
|
16
|
-
const [total, comment, prefix] = mt;
|
|
17
|
-
text = `${comment}\0${accum.length}d\x7F${text.slice(total.length)}`;
|
|
18
|
-
new ListToken(prefix, config, accum);
|
|
19
|
-
let dt = prefix.split(';').length - 1;
|
|
20
|
-
if (!dt) {
|
|
21
|
-
return text;
|
|
22
|
-
}
|
|
23
|
-
const DdToken = require('../src/nowiki/dd');
|
|
24
|
-
let regex = /:+|-\{/gu,
|
|
25
|
-
ex = regex.exec(text),
|
|
26
|
-
lc = 0;
|
|
27
|
-
while (ex && dt) {
|
|
28
|
-
const {0: syntax, index} = ex;
|
|
29
|
-
if (syntax[0] === ':') {
|
|
30
|
-
if (syntax.length >= dt) {
|
|
31
|
-
new DdToken(':'.repeat(dt), config, accum);
|
|
32
|
-
return `${text.slice(0, index)}\0${accum.length - 1}d\x7F${text.slice(index + dt)}`;
|
|
33
|
-
}
|
|
34
|
-
text = `${text.slice(0, index)}\0${accum.length}d\x7F${text.slice(regex.lastIndex)}`;
|
|
35
|
-
dt -= syntax.length;
|
|
36
|
-
regex.lastIndex = index + 4 + String(accum.length).length;
|
|
37
|
-
new DdToken(syntax, config, accum);
|
|
38
|
-
} else if (syntax === '-{') {
|
|
39
|
-
if (!lc) {
|
|
40
|
-
const {lastIndex} = regex;
|
|
41
|
-
regex = /-\{|\}-/gu;
|
|
42
|
-
regex.lastIndex = lastIndex;
|
|
43
|
-
}
|
|
44
|
-
lc++;
|
|
45
|
-
} else {
|
|
46
|
-
lc--;
|
|
47
|
-
if (!lc) {
|
|
48
|
-
const {lastIndex} = regex;
|
|
49
|
-
regex = /:+|-\{/gu;
|
|
50
|
-
regex.lastIndex = lastIndex;
|
|
51
|
-
}
|
|
52
|
-
}
|
|
53
|
-
ex = regex.exec(text);
|
|
54
|
-
}
|
|
55
|
-
return text;
|
|
56
|
-
};
|
|
57
|
-
|
|
58
|
-
Parser.parsers.parseList = __filename;
|
|
59
|
-
module.exports = parseList;
|
package/parser/magicLinks.js
DELETED
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
'use strict';
|
|
2
|
-
|
|
3
|
-
const {extUrlChar} = require('../util/string'),
|
|
4
|
-
Parser = require('..');
|
|
5
|
-
|
|
6
|
-
/**
|
|
7
|
-
* 解析自由外链
|
|
8
|
-
* @param {string} wikitext wikitext
|
|
9
|
-
* @param {accum} accum
|
|
10
|
-
*/
|
|
11
|
-
const parseMagicLinks = (wikitext, config = Parser.getConfig(), accum = []) => {
|
|
12
|
-
const MagicLinkToken = require('../src/magicLink');
|
|
13
|
-
const regex = new RegExp(`\\b(?:${config.protocol})(${extUrlChar})`, 'giu');
|
|
14
|
-
return wikitext.replace(regex, /** @param {string} p1 */ (m, p1) => {
|
|
15
|
-
let trail = '',
|
|
16
|
-
url = m;
|
|
17
|
-
const m2 = /&(?:lt|gt|nbsp|#x0*(?:3[ce]|a0)|#0*(?:6[02]|160));/iu.exec(url);
|
|
18
|
-
if (m2) {
|
|
19
|
-
trail = url.slice(m2.index);
|
|
20
|
-
url = url.slice(0, m2.index);
|
|
21
|
-
}
|
|
22
|
-
const sep = new RegExp(`[,;.:!?${url.includes('(') ? '' : ')'}]+$`, 'u'),
|
|
23
|
-
sepChars = sep.exec(url);
|
|
24
|
-
if (sepChars) {
|
|
25
|
-
let correction = 0;
|
|
26
|
-
if (sepChars[0][0] === ';' && /&(?:[a-z]+|#x[\da-f]+|#\d+)$/iu.test(url.slice(0, sepChars.index))) {
|
|
27
|
-
correction = 1;
|
|
28
|
-
}
|
|
29
|
-
trail = `${url.slice(sepChars.index + correction)}${trail}`;
|
|
30
|
-
url = url.slice(0, sepChars.index + correction);
|
|
31
|
-
}
|
|
32
|
-
if (trail.length >= p1.length) {
|
|
33
|
-
return m;
|
|
34
|
-
}
|
|
35
|
-
new MagicLinkToken(url, false, config, accum);
|
|
36
|
-
return `\0${accum.length - 1}w\x7F${trail}`;
|
|
37
|
-
});
|
|
38
|
-
};
|
|
39
|
-
|
|
40
|
-
Parser.parsers.parseMagicLinks = __filename;
|
|
41
|
-
module.exports = parseMagicLinks;
|
package/parser/quotes.js
DELETED
|
@@ -1,64 +0,0 @@
|
|
|
1
|
-
'use strict';
|
|
2
|
-
|
|
3
|
-
const Parser = require('..');
|
|
4
|
-
|
|
5
|
-
/**
|
|
6
|
-
* 解析单引号
|
|
7
|
-
* @param {string} text wikitext
|
|
8
|
-
* @param {accum} accum
|
|
9
|
-
*/
|
|
10
|
-
const parseQuotes = (text, config = Parser.getConfig(), accum = []) => {
|
|
11
|
-
const arr = text.split(/('{2,})/u),
|
|
12
|
-
{length} = arr;
|
|
13
|
-
if (length === 1) {
|
|
14
|
-
return text;
|
|
15
|
-
}
|
|
16
|
-
let nBold = 0,
|
|
17
|
-
nItalic = 0,
|
|
18
|
-
firstSingle, firstMulti, firstSpace;
|
|
19
|
-
for (let i = 1; i < length; i += 2) {
|
|
20
|
-
const {length: len} = arr[i];
|
|
21
|
-
switch (len) {
|
|
22
|
-
case 2:
|
|
23
|
-
nItalic++;
|
|
24
|
-
break;
|
|
25
|
-
case 4:
|
|
26
|
-
arr[i - 1] += "'";
|
|
27
|
-
arr[i] = "'''";
|
|
28
|
-
// fall through
|
|
29
|
-
case 3:
|
|
30
|
-
nBold++;
|
|
31
|
-
if (firstSingle) {
|
|
32
|
-
break;
|
|
33
|
-
} else if (arr[i - 1].at(-1) === ' ') {
|
|
34
|
-
if (!firstMulti && !firstSpace) {
|
|
35
|
-
firstSpace = i;
|
|
36
|
-
}
|
|
37
|
-
} else if (arr[i - 1].at(-2) === ' ') {
|
|
38
|
-
firstSingle = i;
|
|
39
|
-
} else {
|
|
40
|
-
firstMulti ||= i;
|
|
41
|
-
}
|
|
42
|
-
break;
|
|
43
|
-
default:
|
|
44
|
-
arr[i - 1] += "'".repeat(len - 5);
|
|
45
|
-
arr[i] = "'''''";
|
|
46
|
-
nItalic++;
|
|
47
|
-
nBold++;
|
|
48
|
-
}
|
|
49
|
-
}
|
|
50
|
-
if (nItalic % 2 === 1 && nBold % 2 === 1) {
|
|
51
|
-
const i = firstSingle ?? firstMulti ?? firstSpace;
|
|
52
|
-
arr[i] = "''";
|
|
53
|
-
arr[i - 1] += "'";
|
|
54
|
-
}
|
|
55
|
-
const QuoteToken = require('../src/nowiki/quote');
|
|
56
|
-
for (let i = 1; i < length; i += 2) {
|
|
57
|
-
new QuoteToken(arr[i].length, config, accum);
|
|
58
|
-
arr[i] = `\0${accum.length - 1}q\x7F`;
|
|
59
|
-
}
|
|
60
|
-
return arr.join('');
|
|
61
|
-
};
|
|
62
|
-
|
|
63
|
-
Parser.parsers.parseQuotes = __filename;
|
|
64
|
-
module.exports = parseQuotes;
|
package/parser/selector.js
DELETED
|
@@ -1,175 +0,0 @@
|
|
|
1
|
-
'use strict';
|
|
2
|
-
|
|
3
|
-
const Parser = require('..');
|
|
4
|
-
|
|
5
|
-
const /** @type {pseudo[]} */ simplePseudos = [
|
|
6
|
-
'root',
|
|
7
|
-
'first-child',
|
|
8
|
-
'first-of-type',
|
|
9
|
-
'last-child',
|
|
10
|
-
'last-of-type',
|
|
11
|
-
'only-child',
|
|
12
|
-
'only-of-type',
|
|
13
|
-
'empty',
|
|
14
|
-
'parent',
|
|
15
|
-
'header',
|
|
16
|
-
'hidden',
|
|
17
|
-
'visible',
|
|
18
|
-
'only-whitespace',
|
|
19
|
-
'local-link',
|
|
20
|
-
'read-only',
|
|
21
|
-
'read-write',
|
|
22
|
-
'invalid',
|
|
23
|
-
'required',
|
|
24
|
-
'optional',
|
|
25
|
-
],
|
|
26
|
-
/** @type {pseudo[]} */ complexPseudos = [
|
|
27
|
-
'is',
|
|
28
|
-
'not',
|
|
29
|
-
'nth-child',
|
|
30
|
-
'nth-of-type',
|
|
31
|
-
'nth-last-child',
|
|
32
|
-
'nth-last-of-type',
|
|
33
|
-
'contains',
|
|
34
|
-
'has',
|
|
35
|
-
'lang',
|
|
36
|
-
],
|
|
37
|
-
specialChars = [
|
|
38
|
-
['[', '['],
|
|
39
|
-
[']', ']'],
|
|
40
|
-
['(', '('],
|
|
41
|
-
[')', ')'],
|
|
42
|
-
['"', '"'],
|
|
43
|
-
["'", '''],
|
|
44
|
-
[':', ':'],
|
|
45
|
-
['\\', '\'],
|
|
46
|
-
['&', '&'],
|
|
47
|
-
],
|
|
48
|
-
pseudoRegex = new RegExp(`:(${complexPseudos.join('|')})$`, 'u'),
|
|
49
|
-
regularRegex = /[[(,>+~]|\s+/u, // eslint-disable-line regexp/no-super-linear-move
|
|
50
|
-
attributeRegex = /^\s*(\w+)\s*(?:([~|^$*!]?=)\s*("[^"]*"|'[^']*'|[^\s[\]]+)(?:\s+(i))?\s*)?\]/u,
|
|
51
|
-
functionRegex = /^(\s*"[^"]*"\s*|\s*'[^']*'\s*|[^()]*)\)/u;
|
|
52
|
-
|
|
53
|
-
/**
|
|
54
|
-
* 清理转义符号
|
|
55
|
-
* @param {string} selector
|
|
56
|
-
*/
|
|
57
|
-
const sanitize = selector => {
|
|
58
|
-
for (const [c, escaped] of specialChars) {
|
|
59
|
-
selector = selector.replaceAll(`\\${c}`, escaped);
|
|
60
|
-
}
|
|
61
|
-
return selector;
|
|
62
|
-
};
|
|
63
|
-
|
|
64
|
-
/**
|
|
65
|
-
* 还原转义符号
|
|
66
|
-
* @param {string|undefined} selector
|
|
67
|
-
*/
|
|
68
|
-
const desanitize = selector => {
|
|
69
|
-
if (selector === undefined) {
|
|
70
|
-
return undefined;
|
|
71
|
-
}
|
|
72
|
-
for (const [c, escaped] of specialChars) {
|
|
73
|
-
selector = selector.replaceAll(escaped, c);
|
|
74
|
-
}
|
|
75
|
-
return selector.trim();
|
|
76
|
-
};
|
|
77
|
-
|
|
78
|
-
/**
|
|
79
|
-
* 去除首尾的引号
|
|
80
|
-
* @param {string|undefined} val 属性值或伪选择器函数的参数
|
|
81
|
-
*/
|
|
82
|
-
const deQuote = val => {
|
|
83
|
-
if (val === undefined) {
|
|
84
|
-
return undefined;
|
|
85
|
-
}
|
|
86
|
-
const quotes = /^(["']).*\1$/u.exec(val)?.[1];
|
|
87
|
-
return quotes ? val.slice(1, -1) : val;
|
|
88
|
-
};
|
|
89
|
-
|
|
90
|
-
/**
|
|
91
|
-
* 解析简单伪选择器
|
|
92
|
-
* @param {SelectorArray} step 当前顶部
|
|
93
|
-
* @param {string} str 不含属性和复杂伪选择器的语句
|
|
94
|
-
* @throws `SyntaxError` 非法的选择器
|
|
95
|
-
*/
|
|
96
|
-
const pushSimple = (step, str) => {
|
|
97
|
-
const pieces = str.trim().split(':'),
|
|
98
|
-
// eslint-disable-next-line unicorn/explicit-length-check
|
|
99
|
-
i = pieces.slice(1).findIndex(pseudo => simplePseudos.includes(pseudo)) + 1 || pieces.length;
|
|
100
|
-
if (pieces.slice(i).some(pseudo => !simplePseudos.includes(pseudo))) {
|
|
101
|
-
throw new SyntaxError(`非法的选择器!\n${str}\n可能需要将':'转义为'\\:'。`);
|
|
102
|
-
}
|
|
103
|
-
step.push(desanitize(pieces.slice(0, i).join(':')), ...pieces.slice(i).map(piece => `:${piece}`));
|
|
104
|
-
};
|
|
105
|
-
|
|
106
|
-
/**
|
|
107
|
-
* 解析选择器
|
|
108
|
-
* @param {string} selector
|
|
109
|
-
* @throws `SyntaxError` 非法的选择器
|
|
110
|
-
*/
|
|
111
|
-
const parseSelector = selector => {
|
|
112
|
-
selector = selector.trim();
|
|
113
|
-
const /** @type {SelectorArray[][]} */ stack = [[[]]];
|
|
114
|
-
let sanitized = sanitize(selector),
|
|
115
|
-
regex = regularRegex,
|
|
116
|
-
mt = regex.exec(sanitized),
|
|
117
|
-
[condition] = stack,
|
|
118
|
-
[step] = condition;
|
|
119
|
-
while (mt) {
|
|
120
|
-
let {0: syntax, index} = mt;
|
|
121
|
-
if (syntax.trim() === '') {
|
|
122
|
-
index += syntax.length;
|
|
123
|
-
const char = sanitized[index];
|
|
124
|
-
syntax = [',', '>', '+', '~'].includes(char) ? char : '';
|
|
125
|
-
}
|
|
126
|
-
if (syntax === ',') { // 情形1:并列
|
|
127
|
-
pushSimple(step, sanitized.slice(0, index));
|
|
128
|
-
condition = [[]];
|
|
129
|
-
[step] = condition;
|
|
130
|
-
stack.push(condition);
|
|
131
|
-
} else if (['>', '+', '~', ''].includes(syntax)) { // 情形2:关系
|
|
132
|
-
pushSimple(step, sanitized.slice(0, index));
|
|
133
|
-
if (!step.some(Boolean)) {
|
|
134
|
-
throw new SyntaxError(`非法的选择器!\n${selector}\n可能需要通用选择器'*'。`);
|
|
135
|
-
}
|
|
136
|
-
step.relation = syntax;
|
|
137
|
-
step = [];
|
|
138
|
-
condition.push(step);
|
|
139
|
-
} else if (syntax === '[') { // 情形3:属性开启
|
|
140
|
-
pushSimple(step, sanitized.slice(0, index));
|
|
141
|
-
regex = attributeRegex;
|
|
142
|
-
} else if (syntax.at(-1) === ']') { // 情形4:属性闭合
|
|
143
|
-
mt[3] = desanitize(deQuote(mt[3]));
|
|
144
|
-
step.push(mt.slice(1));
|
|
145
|
-
regex = regularRegex;
|
|
146
|
-
} else if (syntax === '(') { // 情形5:伪选择器开启
|
|
147
|
-
const pseudoExec = pseudoRegex.exec(sanitized.slice(0, index));
|
|
148
|
-
if (!pseudoExec) {
|
|
149
|
-
throw new SyntaxError(`非法的选择器!\n${desanitize(sanitized)}\n请检查伪选择器是否存在。`);
|
|
150
|
-
}
|
|
151
|
-
pushSimple(step, sanitized.slice(0, pseudoExec.index));
|
|
152
|
-
step.push(pseudoExec[1]); // 临时存放复杂伪选择器
|
|
153
|
-
regex = functionRegex;
|
|
154
|
-
} else { // 情形6:伪选择器闭合
|
|
155
|
-
const /** @type {pseudo} */ pseudo = step.pop();
|
|
156
|
-
mt.push(pseudo);
|
|
157
|
-
mt[1] = deQuote(mt[1]);
|
|
158
|
-
step.push(mt.slice(1));
|
|
159
|
-
regex = regularRegex;
|
|
160
|
-
}
|
|
161
|
-
sanitized = sanitized.slice(index + syntax.length);
|
|
162
|
-
if ([',', '>', '+', '~'].includes(syntax)) {
|
|
163
|
-
sanitized = sanitized.trim();
|
|
164
|
-
}
|
|
165
|
-
mt = regex.exec(sanitized);
|
|
166
|
-
}
|
|
167
|
-
if (regex !== regularRegex) {
|
|
168
|
-
throw new SyntaxError(`非法的选择器!\n${selector}\n检测到未闭合的'${regex === attributeRegex ? '[' : '('}'`);
|
|
169
|
-
}
|
|
170
|
-
pushSimple(step, sanitized);
|
|
171
|
-
return stack;
|
|
172
|
-
};
|
|
173
|
-
|
|
174
|
-
Parser.parsers.parseSelector = __filename;
|
|
175
|
-
module.exports = parseSelector;
|
package/parser/table.js
DELETED
|
@@ -1,112 +0,0 @@
|
|
|
1
|
-
'use strict';
|
|
2
|
-
|
|
3
|
-
const Parser = require('..'),
|
|
4
|
-
AstText = require('../lib/text');
|
|
5
|
-
|
|
6
|
-
/**
|
|
7
|
-
* 解析表格,注意`tr`和`td`包含开头的换行
|
|
8
|
-
* @param {{firstChild: AstText, type: string}} root 根节点
|
|
9
|
-
* @param {accum} accum
|
|
10
|
-
*/
|
|
11
|
-
const parseTable = ({firstChild: {data}, type}, config = Parser.getConfig(), accum = []) => {
|
|
12
|
-
const Token = require('../src'),
|
|
13
|
-
TableToken = require('../src/table'),
|
|
14
|
-
TrToken = require('../src/table/tr'),
|
|
15
|
-
TdToken = require('../src/table/td'),
|
|
16
|
-
DdToken = require('../src/nowiki/dd');
|
|
17
|
-
const /** @type {TrToken[]} */ stack = [],
|
|
18
|
-
lines = data.split('\n');
|
|
19
|
-
let out = type === 'root' ? '' : `\n${lines.shift()}`;
|
|
20
|
-
|
|
21
|
-
/**
|
|
22
|
-
* 向表格中插入纯文本
|
|
23
|
-
* @param {string} str 待插入的文本
|
|
24
|
-
* @param {TrToken} top 当前解析的表格或表格行
|
|
25
|
-
*/
|
|
26
|
-
const push = (str, top) => {
|
|
27
|
-
if (!top) {
|
|
28
|
-
out += str;
|
|
29
|
-
return;
|
|
30
|
-
}
|
|
31
|
-
const {lastElementChild} = top;
|
|
32
|
-
if (lastElementChild.isPlain()) {
|
|
33
|
-
lastElementChild.setText(String(lastElementChild) + str);
|
|
34
|
-
} else {
|
|
35
|
-
const token = new Token(str, config, true, accum);
|
|
36
|
-
token.type = 'table-inter';
|
|
37
|
-
top.appendChild(token.setAttribute('stage', 3));
|
|
38
|
-
}
|
|
39
|
-
};
|
|
40
|
-
for (const outLine of lines) {
|
|
41
|
-
let top = stack.pop();
|
|
42
|
-
const [spaces] = /^(?:\s|\0\d+c\x7F)*/u.exec(outLine),
|
|
43
|
-
line = outLine.slice(spaces.length),
|
|
44
|
-
matchesStart = /^(:*)((?:\s|\0\d+c\x7F)*)(\{\||\{\0\d+!\x7F|\0\d+\{\x7F)(.*)$/u.exec(line);
|
|
45
|
-
if (matchesStart) {
|
|
46
|
-
while (top && top.type !== 'td') {
|
|
47
|
-
top = stack.pop();
|
|
48
|
-
}
|
|
49
|
-
const [, indent, moreSpaces, tableSyntax, attr] = matchesStart;
|
|
50
|
-
if (indent) {
|
|
51
|
-
new DdToken(indent, config, accum);
|
|
52
|
-
}
|
|
53
|
-
push(`\n${spaces}${indent && `\0${accum.length - 1}d\x7F`}${moreSpaces}\0${accum.length}b\x7F`, top);
|
|
54
|
-
const table = new TableToken(tableSyntax, attr, config, accum);
|
|
55
|
-
stack.push(...top ? [top] : [], table);
|
|
56
|
-
continue;
|
|
57
|
-
} else if (!top) {
|
|
58
|
-
out += `\n${outLine}`;
|
|
59
|
-
continue;
|
|
60
|
-
}
|
|
61
|
-
const matches = // eslint-disable-line operator-linebreak
|
|
62
|
-
/^(?:(\|\}|\0\d+!\x7F\}|\0\d+\}\x7F)|(\|-+|\0\d+!\x7F-+|\0\d+-\x7F-*)(?!-)|(!|(?:\||\0\d+!\x7F)\+?))(.*)$/u
|
|
63
|
-
.exec(line);
|
|
64
|
-
if (!matches) {
|
|
65
|
-
push(`\n${outLine}`, top);
|
|
66
|
-
stack.push(...top ? [top] : []);
|
|
67
|
-
continue;
|
|
68
|
-
}
|
|
69
|
-
const [, closing, row, cell, attr] = matches;
|
|
70
|
-
if (closing) {
|
|
71
|
-
while (!(top instanceof TableToken)) {
|
|
72
|
-
top = stack.pop();
|
|
73
|
-
}
|
|
74
|
-
top.close(`\n${spaces}${closing}`, true);
|
|
75
|
-
push(attr, stack.at(-1));
|
|
76
|
-
} else if (row) {
|
|
77
|
-
if (top.type === 'td') {
|
|
78
|
-
top = stack.pop();
|
|
79
|
-
}
|
|
80
|
-
if (top.type === 'tr') {
|
|
81
|
-
top = stack.pop();
|
|
82
|
-
}
|
|
83
|
-
const tr = new TrToken(`\n${spaces}${row}`, attr, config, accum);
|
|
84
|
-
stack.push(top, tr);
|
|
85
|
-
top.appendChild(tr);
|
|
86
|
-
} else {
|
|
87
|
-
if (top.type === 'td') {
|
|
88
|
-
top = stack.pop();
|
|
89
|
-
}
|
|
90
|
-
const regex = cell === '!'
|
|
91
|
-
? /!!|(?:\||\0\d+!\x7F){2}|\0\d+\+\x7F/gu
|
|
92
|
-
: /(?:\||\0\d+!\x7F){2}|\0\d+\+\x7F/gu;
|
|
93
|
-
let mt = regex.exec(attr),
|
|
94
|
-
lastIndex = 0,
|
|
95
|
-
lastSyntax = `\n${spaces}${cell}`;
|
|
96
|
-
while (mt) {
|
|
97
|
-
const td = new TdToken(lastSyntax, attr.slice(lastIndex, mt.index), config, accum);
|
|
98
|
-
top.appendChild(td);
|
|
99
|
-
({lastIndex} = regex);
|
|
100
|
-
[lastSyntax] = mt;
|
|
101
|
-
mt = regex.exec(attr);
|
|
102
|
-
}
|
|
103
|
-
const td = new TdToken(lastSyntax, attr.slice(lastIndex), config, accum);
|
|
104
|
-
stack.push(top, td);
|
|
105
|
-
top.appendChild(td);
|
|
106
|
-
}
|
|
107
|
-
}
|
|
108
|
-
return out.slice(1);
|
|
109
|
-
};
|
|
110
|
-
|
|
111
|
-
Parser.parsers.parseTable = __filename;
|
|
112
|
-
module.exports = parseTable;
|
package/printed/README
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
这里存放以 JSON 格式打印的 AST。
|