wikiparser-node 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +229 -0
- package/LICENSE +674 -0
- package/README.md +1896 -0
- package/config/default.json +766 -0
- package/config/llwiki.json +686 -0
- package/config/moegirl.json +721 -0
- package/index.js +159 -0
- package/jsconfig.json +7 -0
- package/lib/element.js +690 -0
- package/lib/node.js +357 -0
- package/lib/ranges.js +122 -0
- package/lib/title.js +57 -0
- package/mixin/attributeParent.js +67 -0
- package/mixin/fixedToken.js +32 -0
- package/mixin/hidden.js +22 -0
- package/package.json +30 -0
- package/parser/brackets.js +107 -0
- package/parser/commentAndExt.js +61 -0
- package/parser/externalLinks.js +30 -0
- package/parser/hrAndDoubleUnderscore.js +26 -0
- package/parser/html.js +41 -0
- package/parser/links.js +92 -0
- package/parser/magicLinks.js +40 -0
- package/parser/quotes.js +63 -0
- package/parser/table.js +97 -0
- package/src/arg.js +150 -0
- package/src/atom/hidden.js +10 -0
- package/src/atom/index.js +33 -0
- package/src/attribute.js +342 -0
- package/src/extLink.js +116 -0
- package/src/heading.js +91 -0
- package/src/html.js +144 -0
- package/src/imageParameter.js +172 -0
- package/src/index.js +602 -0
- package/src/link/category.js +88 -0
- package/src/link/file.js +201 -0
- package/src/link/index.js +214 -0
- package/src/listToken.js +47 -0
- package/src/magicLink.js +66 -0
- package/src/nowiki/comment.js +45 -0
- package/src/nowiki/doubleUnderscore.js +42 -0
- package/src/nowiki/hr.js +41 -0
- package/src/nowiki/index.js +37 -0
- package/src/nowiki/noinclude.js +24 -0
- package/src/nowiki/quote.js +37 -0
- package/src/onlyinclude.js +42 -0
- package/src/parameter.js +165 -0
- package/src/syntax.js +80 -0
- package/src/table/index.js +867 -0
- package/src/table/td.js +259 -0
- package/src/table/tr.js +244 -0
- package/src/tagPair/ext.js +85 -0
- package/src/tagPair/include.js +45 -0
- package/src/tagPair/index.js +91 -0
- package/src/transclude.js +627 -0
- package/tool/index.js +898 -0
- package/typings/element.d.ts +28 -0
- package/typings/index.d.ts +49 -0
- package/typings/node.d.ts +23 -0
- package/typings/parser.d.ts +9 -0
- package/typings/table.d.ts +14 -0
- package/typings/token.d.ts +21 -0
- package/typings/tool.d.ts +10 -0
- package/util/debug.js +70 -0
- package/util/string.js +60 -0
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const {removeComment} = require('../util/string'),
|
|
4
|
+
/** @type {Parser} */ Parser = require('..');
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* @param {string} text
|
|
8
|
+
* @param {accum} accum
|
|
9
|
+
*/
|
|
10
|
+
const parseBrackets = (text, config = Parser.getConfig(), accum = []) => {
|
|
11
|
+
const source = '(?<=^(?:\x00\\d+c\x7f)*)={1,6}|\\[\\[|{{2,}|-{(?!{)',
|
|
12
|
+
/** @type {BracketExecArray[]} */ stack = [],
|
|
13
|
+
closes = {'=': '\n', '{': '}{2,}|\\|', '-': '}-', '[': ']]'},
|
|
14
|
+
/** @type {Record<string, string>} */ marks = {'!': '!', '!!': '+', '(!': '{', '!)': '}', '!-': '-', '=': '~'};
|
|
15
|
+
let regex = new RegExp(source, 'gm'),
|
|
16
|
+
/** @type {BracketExecArray} */ mt = regex.exec(text),
|
|
17
|
+
moreBraces = text.includes('}}'),
|
|
18
|
+
lastIndex;
|
|
19
|
+
while (mt || lastIndex <= text.length && stack.at(-1)?.[0]?.[0] === '=') {
|
|
20
|
+
const {0: syntax, index: curIndex} = mt ?? {0: '\n', index: text.length},
|
|
21
|
+
/** @type {BracketExecArray} */ top = stack.pop() ?? {},
|
|
22
|
+
{0: open, index, parts} = top,
|
|
23
|
+
innerEqual = syntax === '=' && top.findEqual;
|
|
24
|
+
if ([']]', '}-'].includes(syntax)) { // 情形1:闭合内链或转换
|
|
25
|
+
lastIndex = curIndex + 2;
|
|
26
|
+
} else if (syntax === '\n') { // 情形2:闭合标题
|
|
27
|
+
lastIndex = curIndex + 1;
|
|
28
|
+
const {pos, findEqual} = stack.at(-1) ?? {};
|
|
29
|
+
if (!pos || findEqual || removeComment(text.slice(pos, index)) !== '') {
|
|
30
|
+
const rmt = text.slice(index, curIndex).match(/^(={1,6})(.+)\1((?:\s|\x00\d+c\x7f)*)$/);
|
|
31
|
+
if (rmt) {
|
|
32
|
+
text = `${text.slice(0, index)}\x00${accum.length}h\x7f${text.slice(curIndex)}`;
|
|
33
|
+
lastIndex = index + 4 + String(accum.length).length;
|
|
34
|
+
const HeadingToken = require('../src/heading');
|
|
35
|
+
new HeadingToken(rmt[1].length, rmt.slice(2), config, accum);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
} else if (syntax === '|' || innerEqual) { // 情形3:模板内部,含行首单个'='
|
|
39
|
+
lastIndex = curIndex + 1;
|
|
40
|
+
parts.at(-1).push(text.slice(top.pos, curIndex));
|
|
41
|
+
if (syntax === '|') {
|
|
42
|
+
parts.push([]);
|
|
43
|
+
}
|
|
44
|
+
top.pos = lastIndex;
|
|
45
|
+
top.findEqual = syntax === '|';
|
|
46
|
+
stack.push(top);
|
|
47
|
+
} else if (syntax.startsWith('}}')) { // 情形4:闭合模板
|
|
48
|
+
const close = syntax.slice(0, Math.min(open.length, 3)),
|
|
49
|
+
rest = open.length - close.length,
|
|
50
|
+
{length} = accum;
|
|
51
|
+
lastIndex = curIndex + close.length; // 这不是最终的lastIndex
|
|
52
|
+
parts.at(-1).push(text.slice(top.pos, curIndex));
|
|
53
|
+
/* 标记{{!}}等 */
|
|
54
|
+
const ch = close.length === 2 ? marks[removeComment(parts[0][0])] ?? 't' : 't';
|
|
55
|
+
let skip = false;
|
|
56
|
+
if (close.length === 3) {
|
|
57
|
+
const ArgToken = require('../src/arg');
|
|
58
|
+
new ArgToken(parts.map(part => part.join('=')), config, accum);
|
|
59
|
+
} else {
|
|
60
|
+
try {
|
|
61
|
+
const TranscludeToken = require('../src/transclude');
|
|
62
|
+
new TranscludeToken(parts[0][0], parts.slice(1), config, accum);
|
|
63
|
+
} catch (e) {
|
|
64
|
+
if (e instanceof Error && e.message.startsWith('非法的模板名称:')) {
|
|
65
|
+
lastIndex = index + open.length;
|
|
66
|
+
skip = true;
|
|
67
|
+
} else {
|
|
68
|
+
throw e;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
if (!skip) {
|
|
73
|
+
/* 标记{{!}}结束 */
|
|
74
|
+
text = `${text.slice(0, index + rest)}\x00${length}${ch}\x7f${text.slice(lastIndex)}`;
|
|
75
|
+
lastIndex = index + rest + 3 + String(length).length;
|
|
76
|
+
if (rest > 1) {
|
|
77
|
+
stack.push({0: open.slice(0, rest), index, pos: index + rest, parts: [[]]});
|
|
78
|
+
} else if (rest === 1 && text[index - 1] === '-') {
|
|
79
|
+
stack.push({0: '-{', index: index - 1, pos: index + 1, parts: [[]]});
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
} else { // 情形5:开启
|
|
83
|
+
lastIndex = curIndex + syntax.length;
|
|
84
|
+
if (syntax[0] === '{') {
|
|
85
|
+
mt.pos = lastIndex;
|
|
86
|
+
mt.parts = [[]];
|
|
87
|
+
}
|
|
88
|
+
stack.push(...'0' in top ? [top] : [], mt);
|
|
89
|
+
}
|
|
90
|
+
moreBraces &&= text.slice(lastIndex).includes('}}');
|
|
91
|
+
let curTop = stack.at(-1);
|
|
92
|
+
if (!moreBraces && curTop?.[0]?.[0] === '{') {
|
|
93
|
+
stack.pop();
|
|
94
|
+
curTop = stack.at(-1);
|
|
95
|
+
}
|
|
96
|
+
regex = new RegExp(source + (curTop
|
|
97
|
+
? `|${closes[curTop[0][0]]}${curTop.findEqual ? '|=' : ''}`
|
|
98
|
+
: ''
|
|
99
|
+
), 'gm');
|
|
100
|
+
regex.lastIndex = lastIndex;
|
|
101
|
+
mt = regex.exec(text);
|
|
102
|
+
}
|
|
103
|
+
return text;
|
|
104
|
+
};
|
|
105
|
+
|
|
106
|
+
Parser.parsers.parseBrackets = __filename;
|
|
107
|
+
module.exports = parseBrackets;
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const /** @type {Parser} */ Parser = require('..');
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* @param {string} text
|
|
7
|
+
* @param {accum} accum
|
|
8
|
+
*/
|
|
9
|
+
const parseCommentAndExt = (text, config = Parser.getConfig(), accum = [], includeOnly = false) => {
|
|
10
|
+
const onlyinclude = /<onlyinclude>(.*?)<\/onlyinclude>/gs;
|
|
11
|
+
if (includeOnly && onlyinclude.test(text)) { // `<onlyinclude>`拥有最高优先级
|
|
12
|
+
return text.replace(onlyinclude, /** @param {string} inner */ (_, inner) => {
|
|
13
|
+
const str = `\x00${accum.length}e\x7f`,
|
|
14
|
+
OnlyincludeToken = require('../src/onlyinclude');
|
|
15
|
+
new OnlyincludeToken(inner, config, accum);
|
|
16
|
+
return str;
|
|
17
|
+
}).replace(/(?<=^|\x00\d+e\x7f).*?(?=$|\x00\d+e\x7f)/gs, substr => {
|
|
18
|
+
if (substr === '') {
|
|
19
|
+
return '';
|
|
20
|
+
}
|
|
21
|
+
const NoincludeToken = require('../src/nowiki/noinclude');
|
|
22
|
+
new NoincludeToken(substr, config, accum);
|
|
23
|
+
return `\x00${accum.length - 1}c\x7f`;
|
|
24
|
+
});
|
|
25
|
+
}
|
|
26
|
+
const ext = config.ext.join('|'),
|
|
27
|
+
includeRegex = includeOnly ? 'includeonly' : '(?:no|only)include',
|
|
28
|
+
noincludeRegex = includeOnly ? 'noinclude' : 'includeonly',
|
|
29
|
+
regex = new RegExp(
|
|
30
|
+
'<!--.*?(?:-->|$)|' // comment
|
|
31
|
+
+ `<${includeRegex}(?:\\s.*?)?>|</${includeRegex}\\s*>|` // <includeonly>
|
|
32
|
+
+ `<(${ext})(\\s.*?)?(?:/>|>(.*?)</(\\1\\s*)>)|` // 扩展标签
|
|
33
|
+
+ `<(${noincludeRegex})(\\s.*?)?(?:/>|>(.*?)(?:</(\\5\\s*)>|$))`, // <noinclude>
|
|
34
|
+
'gis',
|
|
35
|
+
);
|
|
36
|
+
return text.replace(
|
|
37
|
+
regex,
|
|
38
|
+
/** @type {function(...string): string} */
|
|
39
|
+
(substr, name, attr, inner, closing, include, includeAttr, includeInner, includeClosing) => {
|
|
40
|
+
const str = `\x00${accum.length}${name ? 'e' : 'c'}\x7f`;
|
|
41
|
+
if (name) {
|
|
42
|
+
const ExtToken = require('../src/tagPair/ext');
|
|
43
|
+
new ExtToken(name, attr, inner, closing, config, accum);
|
|
44
|
+
} else if (substr.startsWith('<!--')) {
|
|
45
|
+
const CommentToken = require('../src/nowiki/comment'),
|
|
46
|
+
closed = substr.endsWith('-->');
|
|
47
|
+
new CommentToken(substr.slice(4, closed ? -3 : undefined), closed, config, accum);
|
|
48
|
+
} else if (include) {
|
|
49
|
+
const IncludeToken = require('../src/tagPair/include');
|
|
50
|
+
new IncludeToken(include, includeAttr, includeInner, includeClosing, config, accum);
|
|
51
|
+
} else {
|
|
52
|
+
const NoincludeToken = require('../src/nowiki/noinclude');
|
|
53
|
+
new NoincludeToken(substr, config, accum);
|
|
54
|
+
}
|
|
55
|
+
return str;
|
|
56
|
+
},
|
|
57
|
+
);
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
Parser.parsers.parseCommentAndExt = __filename;
|
|
61
|
+
module.exports = parseCommentAndExt;
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const {extUrlChar} = require('../util/string'),
|
|
4
|
+
/** @type {Parser} */ Parser = require('..');
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* @param {string} firstChild
|
|
8
|
+
* @param {accum} accum
|
|
9
|
+
*/
|
|
10
|
+
const parseExternalLinks = (firstChild, config = Parser.getConfig(), accum = []) => {
|
|
11
|
+
const ExtLinkToken = require('../src/extLink'),
|
|
12
|
+
regex = new RegExp(
|
|
13
|
+
`\\[((?:${config.protocol}|//)${extUrlChar})(\\p{Zs}*)([^\\]\\x01-\\x08\\x0a-\\x1f\\ufffd]*)\\]`,
|
|
14
|
+
'gui',
|
|
15
|
+
);
|
|
16
|
+
return firstChild.replace(regex, /** @type {function(...string): string} */ (_, url, space, text) => {
|
|
17
|
+
const {length} = accum,
|
|
18
|
+
mt = url.match(/&[lg]t;/);
|
|
19
|
+
if (mt) {
|
|
20
|
+
url = url.slice(0, mt.index);
|
|
21
|
+
space = '';
|
|
22
|
+
text = `${url.slice(mt.index)}${space}${text}`;
|
|
23
|
+
}
|
|
24
|
+
new ExtLinkToken(url, space, text, config, accum);
|
|
25
|
+
return `\x00${length}w\x7f`;
|
|
26
|
+
});
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
Parser.parsers.parseExternalLinks = __filename;
|
|
30
|
+
module.exports = parseExternalLinks;
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const /** @type {Parser} */ Parser = require('..');
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* @param {string} firstChild
|
|
7
|
+
* @param {accum} accum
|
|
8
|
+
*/
|
|
9
|
+
const parseHrAndDoubleUnderscore = (firstChild, config = Parser.getConfig(), accum = []) => {
|
|
10
|
+
const HrToken = require('../src/nowiki/hr'),
|
|
11
|
+
DoubleUnderscoreToken = require('../src/nowiki/doubleUnderscore'),
|
|
12
|
+
{doubleUnderscore} = config;
|
|
13
|
+
return firstChild.replace(/^-{4,}/mg, m => {
|
|
14
|
+
new HrToken(m.length, config, accum);
|
|
15
|
+
return `\x00${accum.length - 1}r\x7f`;
|
|
16
|
+
}).replace(new RegExp(`__(${doubleUnderscore.flat().join('|')})__`, 'ig'), /** @param {string} p1 */(m, p1) => {
|
|
17
|
+
if (doubleUnderscore[0].includes(p1.toLowerCase()) || doubleUnderscore[1].includes(p1)) {
|
|
18
|
+
new DoubleUnderscoreToken(p1, config, accum);
|
|
19
|
+
return `\x00${accum.length - 1}u\x7f`;
|
|
20
|
+
}
|
|
21
|
+
return m;
|
|
22
|
+
});
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
Parser.parsers.parseHrAndDoubleUnderscore = __filename;
|
|
26
|
+
module.exports = parseHrAndDoubleUnderscore;
|
package/parser/html.js
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const /** @type {Parser} */ Parser = require('..');
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* @param {string} firstChild
|
|
7
|
+
* @param {accum} accum
|
|
8
|
+
*/
|
|
9
|
+
const parseHtml = (firstChild, config = Parser.getConfig(), accum = []) => {
|
|
10
|
+
const regex = /^(\/?)([a-z][^\s/>]*)([^>]*?)(\/?>)([^<]*)$/i,
|
|
11
|
+
elements = config.html.flat(),
|
|
12
|
+
bits = firstChild.split('<');
|
|
13
|
+
let text = bits.shift();
|
|
14
|
+
for (const x of bits) {
|
|
15
|
+
const mt = x.match(regex),
|
|
16
|
+
t = mt?.[2],
|
|
17
|
+
name = t?.toLowerCase();
|
|
18
|
+
if (!mt || !elements.includes(name)) {
|
|
19
|
+
text += `<${x}`;
|
|
20
|
+
continue;
|
|
21
|
+
}
|
|
22
|
+
const [, slash,, params, brace, rest] = mt,
|
|
23
|
+
AttributeToken = require('../src/attribute'),
|
|
24
|
+
attr = new AttributeToken(params, 'html-attr', name, config, accum),
|
|
25
|
+
itemprop = attr.getAttr('itemprop');
|
|
26
|
+
if (name === 'meta' && (itemprop === undefined || attr.getAttr('content') === undefined)
|
|
27
|
+
|| name === 'link' && (itemprop === undefined || attr.getAttr('href') === undefined)
|
|
28
|
+
) {
|
|
29
|
+
text += `<${x}`;
|
|
30
|
+
accum.pop();
|
|
31
|
+
continue;
|
|
32
|
+
}
|
|
33
|
+
text += `\x00${accum.length}x\x7f${rest}`;
|
|
34
|
+
const HtmlToken = require('../src/html');
|
|
35
|
+
new HtmlToken(t, attr, slash === '/', brace === '/>', config, accum);
|
|
36
|
+
}
|
|
37
|
+
return text;
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
Parser.parsers.parseHtml = __filename;
|
|
41
|
+
module.exports = parseHtml;
|
package/parser/links.js
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const /** @type {Parser} */ Parser = require('..'),
|
|
4
|
+
Token = require('../src'); // eslint-disable-line no-unused-vars
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* @param {string} firstChild
|
|
8
|
+
* @param {accum} accum
|
|
9
|
+
*/
|
|
10
|
+
const parseLinks = (firstChild, config = Parser.getConfig(), accum = []) => {
|
|
11
|
+
const parseQuotes = require('./quotes.js'),
|
|
12
|
+
regex = /^([^\n<>[\]{}|]+)(?:\|(.+?))?]](.*)/s,
|
|
13
|
+
regexImg = /^([^\n<>[\]{}|]+)\|(.*)/s,
|
|
14
|
+
regexExt = new RegExp(`^\\s*(?:${config.protocol})`, 'i'),
|
|
15
|
+
bits = firstChild.split('[[');
|
|
16
|
+
let s = bits.shift();
|
|
17
|
+
for (let i = 0; i < bits.length; i++) {
|
|
18
|
+
let mightBeImg, link, text, after;
|
|
19
|
+
const x = bits[i],
|
|
20
|
+
m = x.match(regex);
|
|
21
|
+
if (m) {
|
|
22
|
+
[, link, text, after] = m;
|
|
23
|
+
if (after.startsWith(']') && text?.includes('[')) {
|
|
24
|
+
text += ']';
|
|
25
|
+
after = after.slice(1);
|
|
26
|
+
}
|
|
27
|
+
} else {
|
|
28
|
+
const m2 = x.match(regexImg);
|
|
29
|
+
if (m2) {
|
|
30
|
+
mightBeImg = true;
|
|
31
|
+
[, link, text] = m2;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
if (link === undefined || regexExt.test(link) || /\x00\d+[exhbru]\x7f/.test(link)) {
|
|
35
|
+
s += `[[${x}`;
|
|
36
|
+
continue;
|
|
37
|
+
}
|
|
38
|
+
const page = link.includes('%') ? decodeURIComponent(link) : link,
|
|
39
|
+
force = link.trim().startsWith(':');
|
|
40
|
+
if (force && mightBeImg) {
|
|
41
|
+
s += `[[${x}`;
|
|
42
|
+
continue;
|
|
43
|
+
}
|
|
44
|
+
const title = Parser.normalizeTitle(page, 0, false, config, true),
|
|
45
|
+
{ns, interwiki, valid} = title;
|
|
46
|
+
if (!valid) {
|
|
47
|
+
s += `[[${x}`;
|
|
48
|
+
continue;
|
|
49
|
+
} else if (mightBeImg) {
|
|
50
|
+
if (interwiki || ns !== 6) {
|
|
51
|
+
s += `[[${x}`;
|
|
52
|
+
continue;
|
|
53
|
+
}
|
|
54
|
+
let found;
|
|
55
|
+
for (i++; i < bits.length; i++) {
|
|
56
|
+
const next = bits[i],
|
|
57
|
+
p = next.split(']]');
|
|
58
|
+
if (p.length > 2) {
|
|
59
|
+
found = true;
|
|
60
|
+
text += `[[${p[0]}]]${p[1]}`;
|
|
61
|
+
after = p.slice(2).join(']]');
|
|
62
|
+
break;
|
|
63
|
+
} else if (p.length === 2) {
|
|
64
|
+
text += `[[${p[0]}]]${p[1]}`;
|
|
65
|
+
} else {
|
|
66
|
+
text += `[[${next}`;
|
|
67
|
+
break;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
text = parseLinks(text, config, accum);
|
|
71
|
+
if (!found) {
|
|
72
|
+
s += `[[${link}|${text}`;
|
|
73
|
+
continue;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
text = text && parseQuotes(text, config, accum);
|
|
77
|
+
s += `\x00${accum.length}l\x7f${after}`;
|
|
78
|
+
let LinkToken = require('../src/link');
|
|
79
|
+
if (!force) {
|
|
80
|
+
if (!interwiki && ns === 6) {
|
|
81
|
+
LinkToken = require('../src/link/file');
|
|
82
|
+
} else if (!interwiki && ns === 14) {
|
|
83
|
+
LinkToken = require('../src/link/category');
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
new LinkToken(link, text, title, config, accum);
|
|
87
|
+
}
|
|
88
|
+
return s;
|
|
89
|
+
};
|
|
90
|
+
|
|
91
|
+
Parser.parsers.parseLinks = __filename;
|
|
92
|
+
module.exports = parseLinks;
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const {extUrlChar} = require('../util/string'),
|
|
4
|
+
/** @type {Parser} */ Parser = require('..');
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* @param {string} firstChild
|
|
8
|
+
* @param {accum} accum
|
|
9
|
+
*/
|
|
10
|
+
const parseMagicLinks = (firstChild, config = Parser.getConfig(), accum = []) => {
|
|
11
|
+
const MagicLinkToken = require('../src/magicLink'),
|
|
12
|
+
regex = new RegExp(`\\b(?:${config.protocol})(${extUrlChar})`, 'gui');
|
|
13
|
+
return firstChild.replace(regex, /** @param {string} p1 */ (m, p1) => {
|
|
14
|
+
let trail = '',
|
|
15
|
+
url = m;
|
|
16
|
+
const m2 = url.match(/&(?:lt|gt|nbsp|#x0*(?:3[ce]|a0)|#0*(?:6[02]|160));/i);
|
|
17
|
+
if (m2) {
|
|
18
|
+
trail = url.slice(m2.index);
|
|
19
|
+
url = url.slice(0, m2.index);
|
|
20
|
+
}
|
|
21
|
+
const sep = new RegExp(`[,;.:!?${url.includes('(') ? '' : ')'}]+$`),
|
|
22
|
+
sepChars = url.match(sep);
|
|
23
|
+
if (sepChars) {
|
|
24
|
+
let correction = 0;
|
|
25
|
+
if (sepChars[0].startsWith(';') && /&(?:[a-z]+|#x[\da-f]+|#\d+)$/i.test(url.slice(0, sepChars.index))) {
|
|
26
|
+
correction = 1;
|
|
27
|
+
}
|
|
28
|
+
trail = `${url.slice(sepChars.index + correction)}${trail}`;
|
|
29
|
+
url = url.slice(0, sepChars.index + correction);
|
|
30
|
+
}
|
|
31
|
+
if (trail.length >= p1.length) {
|
|
32
|
+
return m;
|
|
33
|
+
}
|
|
34
|
+
new MagicLinkToken(url, false, config, accum);
|
|
35
|
+
return `\x00${accum.length - 1}w\x7f${trail}`;
|
|
36
|
+
});
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
Parser.parsers.parseMagicLinks = __filename;
|
|
40
|
+
module.exports = parseMagicLinks;
|
package/parser/quotes.js
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const /** @type {Parser} */ Parser = require('..');
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* @param {string} text
|
|
7
|
+
* @param {accum} accum
|
|
8
|
+
*/
|
|
9
|
+
const parseQuotes = (text, config = Parser.getConfig(), accum = []) => {
|
|
10
|
+
const arr = text.split(/('{2,})/),
|
|
11
|
+
{length} = arr;
|
|
12
|
+
if (length === 1) {
|
|
13
|
+
return text;
|
|
14
|
+
}
|
|
15
|
+
let nBold = 0,
|
|
16
|
+
nItalic = 0,
|
|
17
|
+
firstSingle, firstMulti, firstSpace;
|
|
18
|
+
for (let i = 1; i < length; i += 2) {
|
|
19
|
+
const len = arr[i].length;
|
|
20
|
+
switch (len) {
|
|
21
|
+
case 2:
|
|
22
|
+
nItalic++;
|
|
23
|
+
break;
|
|
24
|
+
case 4:
|
|
25
|
+
arr[i - 1] += "'";
|
|
26
|
+
arr[i] = "'''";
|
|
27
|
+
// fall through
|
|
28
|
+
case 3:
|
|
29
|
+
nBold++;
|
|
30
|
+
if (firstSingle) {
|
|
31
|
+
break;
|
|
32
|
+
} else if (arr[i - 1].at(-1) === ' ') {
|
|
33
|
+
if (!firstMulti && !firstSpace) {
|
|
34
|
+
firstSpace = i;
|
|
35
|
+
}
|
|
36
|
+
} else if (arr[i - 1].at(-2) === ' ') {
|
|
37
|
+
firstSingle = i;
|
|
38
|
+
} else if (!firstMulti) {
|
|
39
|
+
firstMulti = i;
|
|
40
|
+
}
|
|
41
|
+
break;
|
|
42
|
+
default:
|
|
43
|
+
arr[i - 1] += "'".repeat(len - 5);
|
|
44
|
+
arr[i] = "'''''";
|
|
45
|
+
nItalic++;
|
|
46
|
+
nBold++;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
if (nItalic % 2 === 1 && nBold % 2 === 1) {
|
|
50
|
+
const i = firstSingle ?? firstMulti ?? firstSpace;
|
|
51
|
+
arr[i] = "''";
|
|
52
|
+
arr[i - 1] += "'";
|
|
53
|
+
}
|
|
54
|
+
const QuoteToken = require('../src/nowiki/quote');
|
|
55
|
+
for (let i = 1; i < length; i += 2) {
|
|
56
|
+
new QuoteToken(arr[i].length, config, accum);
|
|
57
|
+
arr[i] = `\x00${accum.length - 1}q\x7f`;
|
|
58
|
+
}
|
|
59
|
+
return arr.join('');
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
Parser.parsers.parseQuotes = __filename;
|
|
63
|
+
module.exports = parseQuotes;
|
package/parser/table.js
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const /** @type {Parser} */ Parser = require('..');
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* `tr`和`td`包含开头的换行
|
|
7
|
+
* @param {{firstChild: string, type: string}}
|
|
8
|
+
* @param {accum} accum
|
|
9
|
+
*/
|
|
10
|
+
const parseTable = ({firstChild, type}, config = Parser.getConfig(), accum = []) => {
|
|
11
|
+
const TableToken = require('../src/table'),
|
|
12
|
+
TrToken = require('../src/table/tr'),
|
|
13
|
+
TdToken = require('../src/table/td'),
|
|
14
|
+
/** @type {TrToken[]} */ stack = [],
|
|
15
|
+
lines = firstChild.split('\n');
|
|
16
|
+
let out = type === 'root' ? '' : `\n${lines.shift()}`;
|
|
17
|
+
const /** @type {(str: string, top: TrToken & {firstChild: string}) => void} */ push = (str, top) => {
|
|
18
|
+
if (!top) {
|
|
19
|
+
out += str;
|
|
20
|
+
return;
|
|
21
|
+
}
|
|
22
|
+
const {lastElementChild, lastChild} = top;
|
|
23
|
+
if (top instanceof TdToken) {
|
|
24
|
+
lastElementChild.setText(lastElementChild.firstChild + str, 0);
|
|
25
|
+
} else if (typeof lastChild === 'string') {
|
|
26
|
+
top.setText(lastChild + str, 2);
|
|
27
|
+
} else {
|
|
28
|
+
top.appendChild(str);
|
|
29
|
+
}
|
|
30
|
+
};
|
|
31
|
+
for (const outLine of lines) {
|
|
32
|
+
let top = stack.pop();
|
|
33
|
+
const [spaces] = outLine.match(/^(?:\s|\x00\d+c\x7f)*/);
|
|
34
|
+
const line = outLine.slice(spaces.length),
|
|
35
|
+
matchesStart = line.match(/^(:*(?:\s|\x00\d+c\x7f)*)({\||{\x00\d+!\x7f|\x00\d+{\x7f)(.*)/);
|
|
36
|
+
if (matchesStart) {
|
|
37
|
+
const [, indent, tableSyntax, attr] = matchesStart;
|
|
38
|
+
push(`\n${spaces}${indent}\x00${accum.length}b\x7f`, top);
|
|
39
|
+
const table = new TableToken(tableSyntax, attr, config, accum);
|
|
40
|
+
stack.push(...top ? [top] : [], table);
|
|
41
|
+
continue;
|
|
42
|
+
} else if (!top) {
|
|
43
|
+
out += `\n${outLine}`;
|
|
44
|
+
continue;
|
|
45
|
+
}
|
|
46
|
+
const matches = line.match(
|
|
47
|
+
/^(?:(\|}|\x00\d+!\x7f}|\x00\d+}\x7f)|(\|-+|\x00\d+!\x7f-+|\x00\d+-\x7f-*)|(!|(?:\||\x00\d+!\x7f)\+?))(.*)/,
|
|
48
|
+
);
|
|
49
|
+
if (!matches) {
|
|
50
|
+
push(`\n${outLine}`, top);
|
|
51
|
+
stack.push(...top ? [top] : []);
|
|
52
|
+
continue;
|
|
53
|
+
}
|
|
54
|
+
const [, closing, row, cell, attr] = matches;
|
|
55
|
+
if (closing) {
|
|
56
|
+
while (top.type !== 'table') {
|
|
57
|
+
top = stack.pop();
|
|
58
|
+
}
|
|
59
|
+
top.close(`\n${spaces}${closing}`);
|
|
60
|
+
push(attr, stack.at(-1));
|
|
61
|
+
} else if (row) {
|
|
62
|
+
if (top.type === 'td') {
|
|
63
|
+
top = stack.pop();
|
|
64
|
+
}
|
|
65
|
+
if (top.type === 'tr') {
|
|
66
|
+
top = stack.pop();
|
|
67
|
+
}
|
|
68
|
+
const tr = new TrToken(`\n${spaces}${row}`, attr, config, accum);
|
|
69
|
+
stack.push(top, tr);
|
|
70
|
+
top.appendChild(tr);
|
|
71
|
+
} else {
|
|
72
|
+
if (top.type === 'td') {
|
|
73
|
+
top = stack.pop();
|
|
74
|
+
}
|
|
75
|
+
const regex = cell === '!'
|
|
76
|
+
? /!!|(?:\||\x00\d+!\x7f){2}|\x00\d+\+\x7f/g
|
|
77
|
+
: /(?:\||\x00\d+!\x7f){2}|\x00\d+\+\x7f/g;
|
|
78
|
+
let mt = regex.exec(attr),
|
|
79
|
+
lastIndex = 0,
|
|
80
|
+
lastSyntax = `\n${spaces}${cell}`;
|
|
81
|
+
while (mt) {
|
|
82
|
+
const td = new TdToken(lastSyntax, attr.slice(lastIndex, mt.index), config, accum);
|
|
83
|
+
top.appendChild(td);
|
|
84
|
+
({lastIndex} = regex);
|
|
85
|
+
[lastSyntax] = mt;
|
|
86
|
+
mt = regex.exec(attr);
|
|
87
|
+
}
|
|
88
|
+
const td = new TdToken(lastSyntax, attr.slice(lastIndex), config, accum);
|
|
89
|
+
stack.push(top, td);
|
|
90
|
+
top.appendChild(td);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
return out.slice(1);
|
|
94
|
+
};
|
|
95
|
+
|
|
96
|
+
Parser.parsers.parseTable = __filename;
|
|
97
|
+
module.exports = parseTable;
|