securemark 0.294.5 → 0.294.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +4 -0
- package/dist/index.js +22 -28
- package/markdown.d.ts +3 -1
- package/package.json +1 -1
- package/src/combinator/data/parser.ts +3 -0
- package/src/parser/api/normalize.test.ts +9 -1
- package/src/parser/api/normalize.ts +17 -10
- package/src/parser/inline/autolink/channel.ts +2 -2
- package/src/parser/inline/autolink/hashnum.ts +2 -2
- package/src/parser/inline/autolink/hashtag.test.ts +6 -2
- package/src/parser/inline/autolink/hashtag.ts +4 -4
- package/src/parser/inline/autolink.ts +1 -1
- package/src/parser/inline/htmlentity.ts +13 -16
- package/src/parser/inline/ruby.ts +7 -2
- package/src/parser/source/text.ts +3 -1
package/CHANGELOG.md
CHANGED
package/dist/index.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
/*! securemark v0.294.
|
|
1
|
+
/*! securemark v0.294.6 https://github.com/falsandtru/securemark | (c) 2017, falsandtru | UNLICENSED License */
|
|
2
2
|
(function webpackUniversalModuleDefinition(root, factory) {
|
|
3
3
|
if(typeof exports === 'object' && typeof module === 'object')
|
|
4
4
|
module.exports = factory(require("Prism"), require("DOMPurify"));
|
|
@@ -4397,14 +4397,14 @@ function format(source) {
|
|
|
4397
4397
|
return source.replace(/\r\n?/g, '\n');
|
|
4398
4398
|
}
|
|
4399
4399
|
function sanitize(source) {
|
|
4400
|
-
return source.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]|[\u2006\u200B-\u200F\u202A-\u202F\u2060\uFEFF]|(?<![\u1820\u1821])\u180E/g, UNICODE_REPLACEMENT_CHARACTER).replace(/[\uD800-\uDBFF][\uDC00-\uDFFF]?|[\uDC00-\uDFFF]/g, char => char.length === 1 ? UNICODE_REPLACEMENT_CHARACTER : char);
|
|
4400
|
+
return source.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]|(?!\u200D)[\u2006\u200B-\u200F\u202A-\u202F\u2060\uFEFF]|(?<![\u1820\u1821])\u180E/g, UNICODE_REPLACEMENT_CHARACTER).replace(/[\uD800-\uDBFF][\uDC00-\uDFFF]?|[\uDC00-\uDFFF]/g, char => char.length === 1 ? UNICODE_REPLACEMENT_CHARACTER : char);
|
|
4401
4401
|
}
|
|
4402
4402
|
// https://dev.w3.org/html5/html-author/charref
|
|
4403
4403
|
// https://en.wikipedia.org/wiki/Whitespace_character
|
|
4404
4404
|
exports.invisibleHTMLEntityNames = ['Tab', 'NewLine', 'NonBreakingSpace', 'nbsp', 'shy', 'ensp', 'emsp', 'emsp13', 'emsp14', 'numsp', 'puncsp', 'ThinSpace', 'thinsp', 'VeryThinSpace', 'hairsp', 'ZeroWidthSpace', 'NegativeVeryThinSpace', 'NegativeThinSpace', 'NegativeMediumSpace', 'NegativeThickSpace', 'zwj', 'zwnj', 'lrm', 'rlm', 'MediumSpace', 'NoBreak', 'ApplyFunction', 'af', 'InvisibleTimes', 'it', 'InvisibleComma', 'ic'];
|
|
4405
|
-
const
|
|
4406
|
-
const
|
|
4407
|
-
const
|
|
4405
|
+
const unreadableEscapeHTMLEntityNames = exports.invisibleHTMLEntityNames.filter(name => !['Tab', 'NewLine', 'NonBreakingSpace', 'nbsp', 'zwj', 'zwnj'].includes(name));
|
|
4406
|
+
const unreadableEscapeCharacters = unreadableEscapeHTMLEntityNames.map(name => (0, htmlentity_1.unsafehtmlentity)((0, parser_1.input)(`&${name};`, {})).head.value);
|
|
4407
|
+
const unreadableEscapeCharacter = new RegExp(`[${unreadableEscapeCharacters.join('')}]`, 'g');
|
|
4408
4408
|
// https://www.pandanoir.info/entry/2018/03/11/193000
|
|
4409
4409
|
// http://anti.rosx.net/etc/memo/002_space.html
|
|
4410
4410
|
// http://nicowiki.com/%E7%A9%BA%E7%99%BD%E3%83%BB%E7%89%B9%E6%AE%8A%E8%A8%98%E5%8F%B7.html
|
|
@@ -4416,7 +4416,7 @@ const unreadableSpecialCharacters = (/* unused pure expression or super */ null
|
|
|
4416
4416
|
// ZERO WIDTH NON-JOINER
|
|
4417
4417
|
'\u200C',
|
|
4418
4418
|
// ZERO WIDTH JOINER
|
|
4419
|
-
'\u200D',
|
|
4419
|
+
//'\u200D',
|
|
4420
4420
|
// LEFT-TO-RIGHT MARK
|
|
4421
4421
|
'\u200E',
|
|
4422
4422
|
// RIGHT-TO-LEFT MARK
|
|
@@ -4439,7 +4439,7 @@ const unreadableSpecialCharacters = (/* unused pure expression or super */ null
|
|
|
4439
4439
|
'\uFEFF']));
|
|
4440
4440
|
// 特殊不可視文字はエディタおよびソースビューアでは等幅および強調表示により可視化する
|
|
4441
4441
|
function escape(source) {
|
|
4442
|
-
return source.replace(
|
|
4442
|
+
return source.replace(unreadableEscapeCharacter, char => `&${unreadableEscapeHTMLEntityNames[unreadableEscapeCharacters.indexOf(char)]};`);
|
|
4443
4443
|
}
|
|
4444
4444
|
exports.escape = escape;
|
|
4445
4445
|
|
|
@@ -6208,7 +6208,7 @@ const account_1 = __webpack_require__(4107);
|
|
|
6208
6208
|
const hashtag_1 = __webpack_require__(5764);
|
|
6209
6209
|
const hashnum_1 = __webpack_require__(8684);
|
|
6210
6210
|
const anchor_1 = __webpack_require__(8535);
|
|
6211
|
-
exports.autolink = (0, combinator_1.lazy)(() => (0, combinator_1.validate)(new RegExp([/(?<![0-9a-z])@/yi.source, /(?<![^\p{C}\p{S}\p{P}\s]|emoji)#/yiu.source, /(?<![0-9a-z])>>/yi.source, /(?<![0-9a-z][.+-]?|[@#])!?[0-9a-z]/yi.source].join('|').replace(/emoji/g, hashtag_1.emoji), 'yiu'), (0, combinator_1.state)(~1 /* State.autolink */, (0, combinator_1.union)([url_1.lineurl, url_1.url, email_1.email,
|
|
6211
|
+
exports.autolink = (0, combinator_1.lazy)(() => (0, combinator_1.validate)(new RegExp([/(?<![0-9a-z])@/yi.source, /(?<![^\p{C}\p{S}\p{P}\s]|emoji)#/yiu.source, /(?<![0-9a-z])>>/yi.source, /(?<![0-9a-z][.+-]?|[@#])!?[0-9a-z]/yi.source].join('|').replace(/emoji/g, hashtag_1.emoji.source), 'yiu'), (0, combinator_1.state)(~1 /* State.autolink */, (0, combinator_1.union)([url_1.lineurl, url_1.url, email_1.email,
|
|
6212
6212
|
// Escape unmatched email-like strings.
|
|
6213
6213
|
//str(/[0-9a-z]+(?:[_.+-][0-9a-z]+[:@]?|:|@(?=@))*/yi),
|
|
6214
6214
|
channel_1.channel, account_1.account,
|
|
@@ -6292,7 +6292,7 @@ const hashtag_1 = __webpack_require__(5764);
|
|
|
6292
6292
|
const source_1 = __webpack_require__(8745);
|
|
6293
6293
|
const dom_1 = __webpack_require__(394);
|
|
6294
6294
|
// https://example/@user?ch=a+b must be a user channel page or a redirect page going there.
|
|
6295
|
-
exports.channel = (0, combinator_1.lazy)(() => (0, combinator_1.rewrite)((0, combinator_1.sequence)([(0, combinator_1.surround)(/(?<![0-9a-z])@/yi, (0, source_1.str)(/[0-9a-z](?:(?:[0-9a-z]|-(?=[0-9a-z])){0,61}[0-9a-z])?(?:\.[0-9a-z](?:(?:[0-9a-z]|-(?=[0-9a-z])){0,61}[0-9a-z])?)*\//yi), (0, source_1.str)(/[a-z][0-9a-z]*(?:[-.][0-9a-z]+)*(?![-.]?[0-9a-z@]|>>|:\S)/yi), true, undefined, undefined, [3 | 0 /* Backtrack.autolink */]), (0, combinator_1.some)((0, combinator_1.verify)((0, combinator_1.surround)('#', (0, source_1.str)(new RegExp([/(?!['_])(?:[^\p{C}\p{S}\p{P}\s]|emoji|'(?=[0-9A-Za-z])|_(?=[^\p{C}\p{S}\p{P}\s]|emoji))+/yu.source].join('').replace(/emoji/g, hashtag_1.emoji), 'yu')), (0, source_1.str)(new RegExp([/(?![0-9a-z@]|>>|:\S|[^\p{C}\p{S}\p{P}\s]|emoji)/yu.source].join('').replace(/emoji/g, hashtag_1.emoji), 'yu')), false, undefined, undefined, [3 | 0 /* Backtrack.autolink */]), ([{
|
|
6295
|
+
exports.channel = (0, combinator_1.lazy)(() => (0, combinator_1.rewrite)((0, combinator_1.sequence)([(0, combinator_1.surround)(/(?<![0-9a-z])@/yi, (0, source_1.str)(/[0-9a-z](?:(?:[0-9a-z]|-(?=[0-9a-z])){0,61}[0-9a-z])?(?:\.[0-9a-z](?:(?:[0-9a-z]|-(?=[0-9a-z])){0,61}[0-9a-z])?)*\//yi), (0, source_1.str)(/[a-z][0-9a-z]*(?:[-.][0-9a-z]+)*(?![-.]?[0-9a-z@]|>>|:\S)/yi), true, undefined, undefined, [3 | 0 /* Backtrack.autolink */]), (0, combinator_1.some)((0, combinator_1.verify)((0, combinator_1.surround)('#', (0, source_1.str)(new RegExp([/(?!['_])(?:[^\p{C}\p{S}\p{P}\s]|emoji|'(?=[0-9A-Za-z])|_(?=[^\p{C}\p{S}\p{P}\s]|emoji))+/yu.source].join('').replace(/emoji/g, hashtag_1.emoji.source), 'yu')), (0, source_1.str)(new RegExp([/(?![0-9a-z@]|>>|:\S|[^\p{C}\p{S}\p{P}\s]|emoji)/yu.source].join('').replace(/emoji/g, hashtag_1.emoji.source), 'yu')), false, undefined, undefined, [3 | 0 /* Backtrack.autolink */]), ([{
|
|
6296
6296
|
value
|
|
6297
6297
|
}]) => !/^[0-9]{1,4}$|^[0-9]{5}/.test(value)))]), (0, combinator_1.constraint)(1 /* State.autolink */, (0, combinator_1.state)(1 /* State.autolink */, (0, combinator_1.fmap)((0, combinator_1.convert)(source => `[${source}]{ ${source.includes('/') ? `https://${source.slice(1, source.indexOf('#')).replace('/', '/@')}` : `/${source.slice(0, source.indexOf('#'))}`} }`, (0, combinator_1.union)([link_1.unsafelink]), false), ([{
|
|
6298
6298
|
value: el
|
|
@@ -6355,7 +6355,7 @@ const link_1 = __webpack_require__(3628);
|
|
|
6355
6355
|
const hashtag_1 = __webpack_require__(5764);
|
|
6356
6356
|
const source_1 = __webpack_require__(8745);
|
|
6357
6357
|
const dom_1 = __webpack_require__(394);
|
|
6358
|
-
exports.hashnum = (0, combinator_1.lazy)(() => (0, combinator_1.rewrite)((0, combinator_1.open)(new RegExp([/(?<![^\p{C}\p{S}\p{P}\s]|emoji)#/yiu.source].join('').replace(/emoji/g, hashtag_1.emoji), 'yu'), (0, source_1.str)(new RegExp([/[0-9]{1,9}(?![0-9a-z@#]|>>|:\S|[^\p{C}\p{S}\p{P}\s]|emoji)/yu.source].join('').replace(/emoji/g, hashtag_1.emoji), 'yu')), false, [1 | 0 /* Backtrack.autolink */]), (0, combinator_1.constraint)(1 /* State.autolink */, (0, combinator_1.state)(1 /* State.autolink */, (0, combinator_1.fmap)((0, combinator_1.convert)(source => `[${source}]{ ${source.slice(1)} }`, (0, combinator_1.union)([link_1.unsafelink]), false), ([{
|
|
6358
|
+
exports.hashnum = (0, combinator_1.lazy)(() => (0, combinator_1.rewrite)((0, combinator_1.open)(new RegExp([/(?<![^\p{C}\p{S}\p{P}\s]|emoji)#/yiu.source].join('').replace(/emoji/g, hashtag_1.emoji.source), 'yu'), (0, source_1.str)(new RegExp([/[0-9]{1,9}(?![0-9a-z@#]|>>|:\S|[^\p{C}\p{S}\p{P}\s]|emoji)/yu.source].join('').replace(/emoji/g, hashtag_1.emoji.source), 'yu')), false, [1 | 0 /* Backtrack.autolink */]), (0, combinator_1.constraint)(1 /* State.autolink */, (0, combinator_1.state)(1 /* State.autolink */, (0, combinator_1.fmap)((0, combinator_1.convert)(source => `[${source}]{ ${source.slice(1)} }`, (0, combinator_1.union)([link_1.unsafelink]), false), ([{
|
|
6359
6359
|
value
|
|
6360
6360
|
}]) => new parser_1.List([new parser_1.Data((0, dom_1.define)(value, {
|
|
6361
6361
|
class: 'hashnum',
|
|
@@ -6381,8 +6381,8 @@ const source_1 = __webpack_require__(8745);
|
|
|
6381
6381
|
const dom_1 = __webpack_require__(394);
|
|
6382
6382
|
// https://example/hashtags/a must be a hashtag page or a redirect page going there.
|
|
6383
6383
|
// https://github.com/tc39/proposal-regexp-unicode-property-escapes#matching-emoji
|
|
6384
|
-
exports.emoji =
|
|
6385
|
-
exports.hashtag = (0, combinator_1.lazy)(() => (0, combinator_1.rewrite)((0, combinator_1.verify)((0, combinator_1.surround)(new RegExp([/(?<![^\p{C}\p{S}\p{P}\s]|emoji)#/yiu.source].join('').replace(/emoji/g, exports.emoji), 'yu'), (0, source_1.str)(new RegExp([/(?!['_])(?:[^\p{C}\p{S}\p{P}\s]|emoji|'(?=[0-9A-Za-z])|_(?=[^\p{C}\p{S}\p{P}\s]|emoji))+/yu.source].join('').replace(/emoji/g, exports.emoji), 'yu')), (0, source_1.str)(new RegExp([/(?![0-9a-z@#]|>>|:\S|[^\p{C}\p{S}\p{P}\s]|emoji)/yu.source].join('').replace(/emoji/g, exports.emoji), 'yu')), false, undefined, undefined, [3 | 0 /* Backtrack.autolink */]), ([{
|
|
6384
|
+
exports.emoji = /\p{Emoji_Modifier_Base}\p{Emoji_Modifier}?|\p{Emoji_Presentation}|\p{Emoji}\uFE0F|\u200D/u;
|
|
6385
|
+
exports.hashtag = (0, combinator_1.lazy)(() => (0, combinator_1.rewrite)((0, combinator_1.verify)((0, combinator_1.surround)(new RegExp([/(?<![^\p{C}\p{S}\p{P}\s]|emoji)#/yiu.source].join('').replace(/emoji/g, exports.emoji.source), 'yu'), (0, source_1.str)(new RegExp([/(?!['_])(?:[^\p{C}\p{S}\p{P}\s]|emoji|'(?=[0-9A-Za-z])|_(?=[^\p{C}\p{S}\p{P}\s]|emoji))+/yu.source].join('').replace(/emoji/g, exports.emoji.source), 'yu')), (0, source_1.str)(new RegExp([/(?![0-9a-z@#]|>>|:\S|[^\p{C}\p{S}\p{P}\s]|emoji)/yu.source].join('').replace(/emoji/g, exports.emoji.source), 'yu')), false, undefined, undefined, [3 | 0 /* Backtrack.autolink */]), ([{
|
|
6386
6386
|
value
|
|
6387
6387
|
}]) => !/^[0-9]{1,4}$|^[0-9]{5}/.test(value)), (0, combinator_1.constraint)(1 /* State.autolink */, (0, combinator_1.state)(1 /* State.autolink */, (0, combinator_1.fmap)((0, combinator_1.convert)(source => `[${source}]{ ${`/hashtags/${source.slice(1)}`} }`, (0, combinator_1.union)([link_1.unsafelink]), false), ([{
|
|
6388
6388
|
value
|
|
@@ -7140,19 +7140,10 @@ Object.defineProperty(exports, "__esModule", ({
|
|
|
7140
7140
|
exports.htmlentity = exports.unsafehtmlentity = void 0;
|
|
7141
7141
|
const parser_1 = __webpack_require__(605);
|
|
7142
7142
|
const combinator_1 = __webpack_require__(3484);
|
|
7143
|
+
const source_1 = __webpack_require__(8745);
|
|
7143
7144
|
const util_1 = __webpack_require__(4992);
|
|
7144
7145
|
const dom_1 = __webpack_require__(394);
|
|
7145
|
-
exports.unsafehtmlentity = (0, combinator_1.
|
|
7146
|
-
//({ source }) => [[parser(source) ?? `${Command.Error}${source}`], '']));
|
|
7147
|
-
({
|
|
7148
|
-
context
|
|
7149
|
-
}) => {
|
|
7150
|
-
const {
|
|
7151
|
-
source
|
|
7152
|
-
} = context;
|
|
7153
|
-
context.position += source.length;
|
|
7154
|
-
return source.length > 1 && source.at(-1) === ';' ? new parser_1.List([new parser_1.Data(parser(source) ?? source)]) : new parser_1.List([new parser_1.Data(source)]);
|
|
7155
|
-
});
|
|
7146
|
+
exports.unsafehtmlentity = (0, combinator_1.surround)((0, source_1.str)('&'), (0, source_1.str)(/[0-9A-Za-z]+/y), (0, source_1.str)(';'), false, ([as, bs, cs]) => new parser_1.List([new parser_1.Data(parser(as.head.value + bs.head.value + cs.head.value))]), ([as, bs]) => new parser_1.List([new parser_1.Data(as.head.value + (bs?.head?.value ?? ''))]), [3 | 64 /* Backtrack.bracket */]);
|
|
7156
7147
|
exports.htmlentity = (0, combinator_1.fmap)((0, combinator_1.union)([exports.unsafehtmlentity]), ([{
|
|
7157
7148
|
value
|
|
7158
7149
|
}]) => new parser_1.List([length === 1 || value.at(-1) !== ';' ? new parser_1.Data(value) : new parser_1.Data((0, dom_1.html)('span', {
|
|
@@ -7162,8 +7153,7 @@ exports.htmlentity = (0, combinator_1.fmap)((0, combinator_1.union)([exports.uns
|
|
|
7162
7153
|
const parser = (el => entity => {
|
|
7163
7154
|
if (entity === '
') return ' ';
|
|
7164
7155
|
el.innerHTML = entity;
|
|
7165
|
-
|
|
7166
|
-
return entity === text ? undefined : text;
|
|
7156
|
+
return el.textContent;
|
|
7167
7157
|
})((0, dom_1.html)('span'));
|
|
7168
7158
|
|
|
7169
7159
|
/***/ },
|
|
@@ -7772,6 +7762,7 @@ exports.ruby = (0, combinator_1.lazy)(() => (0, combinator_1.bind)((0, combinato
|
|
|
7772
7762
|
}, acc) => value + ' ' + acc, '').trim())), new parser_1.Data((0, dom_1.html)('rp', ')'))])))))]);
|
|
7773
7763
|
}
|
|
7774
7764
|
}));
|
|
7765
|
+
const delimiter = /[$"`\[\](){}<>()[]{}]|\\?\n/y;
|
|
7775
7766
|
const text = input => {
|
|
7776
7767
|
const {
|
|
7777
7768
|
context
|
|
@@ -7785,11 +7776,12 @@ const text = input => {
|
|
|
7785
7776
|
for (let {
|
|
7786
7777
|
position
|
|
7787
7778
|
} = context; position < source.length; position = context.position) {
|
|
7788
|
-
|
|
7779
|
+
delimiter.lastIndex = position;
|
|
7780
|
+
if (delimiter.test(source)) break;
|
|
7789
7781
|
switch (source[position]) {
|
|
7790
7782
|
case '&':
|
|
7791
7783
|
{
|
|
7792
|
-
const result = (0, htmlentity_1.unsafehtmlentity)(input) ?? (0, source_1.txt)(input);
|
|
7784
|
+
const result = source[position + 1] !== ' ' ? (0, htmlentity_1.unsafehtmlentity)(input) ?? (0, source_1.txt)(input) : (0, source_1.txt)(input);
|
|
7793
7785
|
acc.last.value += result.head.value;
|
|
7794
7786
|
continue;
|
|
7795
7787
|
}
|
|
@@ -8724,7 +8716,6 @@ function seek(source, position) {
|
|
|
8724
8716
|
case '@':
|
|
8725
8717
|
case '#':
|
|
8726
8718
|
case '$':
|
|
8727
|
-
case '&':
|
|
8728
8719
|
case '"':
|
|
8729
8720
|
case '`':
|
|
8730
8721
|
case '[':
|
|
@@ -8760,6 +8751,9 @@ function seek(source, position) {
|
|
|
8760
8751
|
case ':':
|
|
8761
8752
|
if (source[i + 1] === '/' && source[i + 2] === '/') return i;
|
|
8762
8753
|
continue;
|
|
8754
|
+
case '&':
|
|
8755
|
+
if (source[i + 1] !== ' ') return i;
|
|
8756
|
+
continue;
|
|
8763
8757
|
case ' ':
|
|
8764
8758
|
case '\t':
|
|
8765
8759
|
case ' ':
|
package/markdown.d.ts
CHANGED
|
@@ -1057,7 +1057,9 @@ export namespace MarkdownParser {
|
|
|
1057
1057
|
export interface UnsafeHTMLEntityParser extends
|
|
1058
1058
|
// ©
|
|
1059
1059
|
Inline<'unsafehtmlentity'>,
|
|
1060
|
-
Parser<string, Context, [
|
|
1060
|
+
Parser<string, Context, [
|
|
1061
|
+
SourceParser.StrParser,
|
|
1062
|
+
]> {
|
|
1061
1063
|
}
|
|
1062
1064
|
export interface ShortMediaParser extends
|
|
1063
1065
|
// !https://host
|
package/package.json
CHANGED
|
@@ -39,8 +39,11 @@ export interface CtxOptions {
|
|
|
39
39
|
// 区間別テーブルは固定サイズであるためプールして再使用できる。
|
|
40
40
|
// 従って分割時のデータ構造は区間ごとに探索木を動的に生成しデータ数に応じてテーブルに移行するのが最も効率的である。
|
|
41
41
|
// これにより最悪時間計算量線形化に要する最悪空間計算量が+1nに局限される。
|
|
42
|
+
// またはテーブルの参照が高速なら変換せず併用してもよい。
|
|
42
43
|
// 木とテーブルいずれにおいてもバックトラックデータとオーバーヘッドを合わせた追加データサイズの最大値は
|
|
43
44
|
// セグメントサイズに制約されるため入力サイズに対する最大追加データサイズの平均比率はかなり小さくなる。
|
|
45
|
+
// 必要なテーブルの最大サイズは最大セグメントサイズであるため最大追加データサイズは入力サイズにかかわらず
|
|
46
|
+
// 10KB*並列数に留まり最大数百文字以下の短文ならば数百byte*並列数となる。
|
|
44
47
|
//
|
|
45
48
|
// 1. データ数が規定数を超えたら区間テーブルを生成しデータを振り分ける。
|
|
46
49
|
// - 子ノードのポインタだけ保持するとしても1ノード複数データ保持で圧縮できるかは微妙。
|
|
@@ -67,12 +67,20 @@ describe('Unit: parser/normalize', () => {
|
|
|
67
67
|
assert(normalize('\x01---\na: b\x01\n---\n\n!> \x01---\na: b\x01\n---') === '\uFFFD---\na: b\uFFFD\n---\n\n!> \uFFFD---\na: b\uFFFD\n---');
|
|
68
68
|
});
|
|
69
69
|
|
|
70
|
+
it('emoji', () => {
|
|
71
|
+
assert(normalize('😀') === '😀');
|
|
72
|
+
assert(normalize('🤚🏽') === '🤚🏽');
|
|
73
|
+
assert(normalize('👨👩👧') === '👨👩👧');
|
|
74
|
+
assert(normalize('🇺🇳') === '🇺🇳');
|
|
75
|
+
assert(normalize('#️⃣*️⃣0️⃣1️⃣2️⃣3️⃣4️⃣5️⃣6️⃣7️⃣8️⃣9️⃣') === '#️⃣*️⃣0️⃣1️⃣2️⃣3️⃣4️⃣5️⃣6️⃣7️⃣8️⃣9️⃣');
|
|
76
|
+
});
|
|
77
|
+
|
|
70
78
|
});
|
|
71
79
|
|
|
72
80
|
describe('escape', () => {
|
|
73
81
|
it('', () => {
|
|
74
82
|
assert(escape('\u200B') === '​');
|
|
75
|
-
assert(escape('\
|
|
83
|
+
assert(escape('\u200F') === '‏');
|
|
76
84
|
});
|
|
77
85
|
|
|
78
86
|
});
|
|
@@ -15,7 +15,7 @@ function format(source: string): string {
|
|
|
15
15
|
|
|
16
16
|
function sanitize(source: string): string {
|
|
17
17
|
return source
|
|
18
|
-
.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]|[\u2006\u200B-\u200F\u202A-\u202F\u2060\uFEFF]|(?<![\u1820\u1821])\u180E/g, UNICODE_REPLACEMENT_CHARACTER)
|
|
18
|
+
.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]|(?!\u200D)[\u2006\u200B-\u200F\u202A-\u202F\u2060\uFEFF]|(?<![\u1820\u1821])\u180E/g, UNICODE_REPLACEMENT_CHARACTER)
|
|
19
19
|
.replace(/[\uD800-\uDBFF][\uDC00-\uDFFF]?|[\uDC00-\uDFFF]/g, char =>
|
|
20
20
|
char.length === 1
|
|
21
21
|
? UNICODE_REPLACEMENT_CHARACTER
|
|
@@ -58,13 +58,20 @@ export const invisibleHTMLEntityNames = [
|
|
|
58
58
|
'InvisibleComma',
|
|
59
59
|
'ic',
|
|
60
60
|
] as const;
|
|
61
|
-
const
|
|
62
|
-
|
|
61
|
+
const unreadableEscapeHTMLEntityNames = invisibleHTMLEntityNames.filter(name => ![
|
|
62
|
+
'Tab',
|
|
63
|
+
'NewLine',
|
|
64
|
+
'NonBreakingSpace',
|
|
65
|
+
'nbsp',
|
|
66
|
+
'zwj',
|
|
67
|
+
'zwnj',
|
|
68
|
+
].includes(name));
|
|
69
|
+
const unreadableEscapeCharacters = unreadableEscapeHTMLEntityNames
|
|
63
70
|
.map(name => unsafehtmlentity(input(`&${name};`, {}))!.head!.value);
|
|
64
|
-
assert(
|
|
65
|
-
assert(
|
|
66
|
-
const
|
|
67
|
-
assert(!
|
|
71
|
+
assert(unreadableEscapeCharacters.length === unreadableEscapeHTMLEntityNames.length);
|
|
72
|
+
assert(unreadableEscapeCharacters.every(c => c.length === 1));
|
|
73
|
+
const unreadableEscapeCharacter = new RegExp(`[${unreadableEscapeCharacters.join('')}]`, 'g');
|
|
74
|
+
assert(!unreadableEscapeCharacter.source.includes('&'));
|
|
68
75
|
|
|
69
76
|
// https://www.pandanoir.info/entry/2018/03/11/193000
|
|
70
77
|
// http://anti.rosx.net/etc/memo/002_space.html
|
|
@@ -77,7 +84,7 @@ const unreadableSpecialCharacters = [
|
|
|
77
84
|
// ZERO WIDTH NON-JOINER
|
|
78
85
|
'\u200C',
|
|
79
86
|
// ZERO WIDTH JOINER
|
|
80
|
-
'\u200D',
|
|
87
|
+
//'\u200D',
|
|
81
88
|
// LEFT-TO-RIGHT MARK
|
|
82
89
|
'\u200E',
|
|
83
90
|
// RIGHT-TO-LEFT MARK
|
|
@@ -104,6 +111,6 @@ assert(unreadableSpecialCharacters.every(c => sanitize(c) === UNICODE_REPLACEMEN
|
|
|
104
111
|
// 特殊不可視文字はエディタおよびソースビューアでは等幅および強調表示により可視化する
|
|
105
112
|
export function escape(source: string): string {
|
|
106
113
|
return source
|
|
107
|
-
.replace(
|
|
108
|
-
`&${
|
|
114
|
+
.replace(unreadableEscapeCharacter, char =>
|
|
115
|
+
`&${unreadableEscapeHTMLEntityNames[unreadableEscapeCharacters.indexOf(char)]};`);
|
|
109
116
|
}
|
|
@@ -21,10 +21,10 @@ export const channel: AutolinkParser.ChannelParser = lazy(() => rewrite(
|
|
|
21
21
|
'#',
|
|
22
22
|
str(new RegExp([
|
|
23
23
|
/(?!['_])(?:[^\p{C}\p{S}\p{P}\s]|emoji|'(?=[0-9A-Za-z])|_(?=[^\p{C}\p{S}\p{P}\s]|emoji))+/yu.source,
|
|
24
|
-
].join('').replace(/emoji/g, emoji), 'yu')),
|
|
24
|
+
].join('').replace(/emoji/g, emoji.source), 'yu')),
|
|
25
25
|
str(new RegExp([
|
|
26
26
|
/(?![0-9a-z@]|>>|:\S|[^\p{C}\p{S}\p{P}\s]|emoji)/yu.source,
|
|
27
|
-
].join('').replace(/emoji/g, emoji), 'yu')),
|
|
27
|
+
].join('').replace(/emoji/g, emoji.source), 'yu')),
|
|
28
28
|
false, undefined, undefined,
|
|
29
29
|
[3 | Backtrack.autolink]),
|
|
30
30
|
([{ value }]) => !/^[0-9]{1,4}$|^[0-9]{5}/.test(value as string))),
|
|
@@ -11,10 +11,10 @@ export const hashnum: AutolinkParser.HashnumParser = lazy(() => rewrite(
|
|
|
11
11
|
open(
|
|
12
12
|
new RegExp([
|
|
13
13
|
/(?<![^\p{C}\p{S}\p{P}\s]|emoji)#/yiu.source,
|
|
14
|
-
].join('').replace(/emoji/g, emoji), 'yu'),
|
|
14
|
+
].join('').replace(/emoji/g, emoji.source), 'yu'),
|
|
15
15
|
str(new RegExp([
|
|
16
16
|
/[0-9]{1,9}(?![0-9a-z@#]|>>|:\S|[^\p{C}\p{S}\p{P}\s]|emoji)/yu.source,
|
|
17
|
-
].join('').replace(/emoji/g, emoji), 'yu')),
|
|
17
|
+
].join('').replace(/emoji/g, emoji.source), 'yu')),
|
|
18
18
|
false,
|
|
19
19
|
[1 | Backtrack.autolink]),
|
|
20
20
|
constraint(State.autolink, state(State.autolink, fmap(convert(
|
|
@@ -54,10 +54,14 @@ describe('Unit: parser/inline/autolink/hashtag', () => {
|
|
|
54
54
|
assert.deepStrictEqual(inspect(parser('#a_b'), ctx), [['<a class="hashtag" href="/hashtags/a_b">#a_b</a>'], '']);
|
|
55
55
|
assert.deepStrictEqual(inspect(parser('#a__b'), ctx), [['<a class="hashtag" href="/hashtags/a">#a</a>'], '__b']);
|
|
56
56
|
assert.deepStrictEqual(inspect(parser('#あ'), ctx), [['<a class="hashtag" href="/hashtags/あ">#あ</a>'], '']);
|
|
57
|
-
assert.deepStrictEqual(inspect(parser('
|
|
57
|
+
assert.deepStrictEqual(inspect(parser('#😀'), ctx), [['<a class="hashtag" href="/hashtags/😀">#😀</a>'], '']);
|
|
58
|
+
assert.deepStrictEqual(inspect(parser('#🤚🏽'), ctx), [['<a class="hashtag" href="/hashtags/🤚🏽">#🤚🏽</a>'], '']);
|
|
59
|
+
assert.deepStrictEqual(inspect(parser('#👨👩👧'), ctx), [['<a class="hashtag" href="/hashtags/👨👩👧">#👨👩👧</a>'], '']);
|
|
60
|
+
assert.deepStrictEqual(inspect(parser('#🇺🇳'), ctx), [['<a class="hashtag" href="/hashtags/🇺🇳">#🇺🇳</a>'], '']);
|
|
61
|
+
assert.deepStrictEqual(inspect(parser('##️⃣*️⃣0️⃣1️⃣2️⃣3️⃣4️⃣5️⃣6️⃣7️⃣8️⃣9️⃣'), ctx), [['<a class="hashtag" href="/hashtags/#️⃣*️⃣0️⃣1️⃣2️⃣3️⃣4️⃣5️⃣6️⃣7️⃣8️⃣9️⃣">##️⃣*️⃣0️⃣1️⃣2️⃣3️⃣4️⃣5️⃣6️⃣7️⃣8️⃣9️⃣</a>'], '']);
|
|
58
62
|
assert.deepStrictEqual(inspect(parser('#1a'), ctx), [['<a class="hashtag" href="/hashtags/1a">#1a</a>'], '']);
|
|
59
63
|
assert.deepStrictEqual(inspect(parser('#1あ'), ctx), [['<a class="hashtag" href="/hashtags/1あ">#1あ</a>'], '']);
|
|
60
|
-
assert.deepStrictEqual(inspect(parser('#1
|
|
64
|
+
assert.deepStrictEqual(inspect(parser('#1😀'), ctx), [['<a class="hashtag" href="/hashtags/1😀">#1😀</a>'], '']);
|
|
61
65
|
assert.deepStrictEqual(inspect(parser(`#a'`), ctx), [[`<a class="hashtag" href="/hashtags/a">#a</a>`], `'`]);
|
|
62
66
|
assert.deepStrictEqual(inspect(parser(`#a''`), ctx), [[`<a class="hashtag" href="/hashtags/a">#a</a>`], `''`]);
|
|
63
67
|
assert.deepStrictEqual(inspect(parser(`#a'b`), ctx), [[`<a class="hashtag" href="/hashtags/a'b">#a'b</a>`], '']);
|
|
@@ -9,19 +9,19 @@ import { define } from 'typed-dom/dom';
|
|
|
9
9
|
// https://example/hashtags/a must be a hashtag page or a redirect page going there.
|
|
10
10
|
|
|
11
11
|
// https://github.com/tc39/proposal-regexp-unicode-property-escapes#matching-emoji
|
|
12
|
-
export const emoji =
|
|
12
|
+
export const emoji = /\p{Emoji_Modifier_Base}\p{Emoji_Modifier}?|\p{Emoji_Presentation}|\p{Emoji}\uFE0F|\u200D/u;
|
|
13
13
|
|
|
14
14
|
export const hashtag: AutolinkParser.HashtagParser = lazy(() => rewrite(
|
|
15
15
|
verify(surround(
|
|
16
16
|
new RegExp([
|
|
17
17
|
/(?<![^\p{C}\p{S}\p{P}\s]|emoji)#/yiu.source,
|
|
18
|
-
].join('').replace(/emoji/g, emoji), 'yu'),
|
|
18
|
+
].join('').replace(/emoji/g, emoji.source), 'yu'),
|
|
19
19
|
str(new RegExp([
|
|
20
20
|
/(?!['_])(?:[^\p{C}\p{S}\p{P}\s]|emoji|'(?=[0-9A-Za-z])|_(?=[^\p{C}\p{S}\p{P}\s]|emoji))+/yu.source,
|
|
21
|
-
].join('').replace(/emoji/g, emoji), 'yu')),
|
|
21
|
+
].join('').replace(/emoji/g, emoji.source), 'yu')),
|
|
22
22
|
str(new RegExp([
|
|
23
23
|
/(?![0-9a-z@#]|>>|:\S|[^\p{C}\p{S}\p{P}\s]|emoji)/yu.source,
|
|
24
|
-
].join('').replace(/emoji/g, emoji), 'yu')),
|
|
24
|
+
].join('').replace(/emoji/g, emoji.source), 'yu')),
|
|
25
25
|
false, undefined, undefined,
|
|
26
26
|
[3 | Backtrack.autolink]),
|
|
27
27
|
([{ value }]) => !/^[0-9]{1,4}$|^[0-9]{5}/.test(value)),
|
|
@@ -15,7 +15,7 @@ export const autolink: AutolinkParser = lazy(() =>
|
|
|
15
15
|
/(?<![^\p{C}\p{S}\p{P}\s]|emoji)#/yiu.source,
|
|
16
16
|
/(?<![0-9a-z])>>/yi.source,
|
|
17
17
|
/(?<![0-9a-z][.+-]?|[@#])!?[0-9a-z]/yi.source,
|
|
18
|
-
].join('|').replace(/emoji/g, emoji), 'yiu'),
|
|
18
|
+
].join('|').replace(/emoji/g, emoji.source), 'yiu'),
|
|
19
19
|
state(~State.autolink,
|
|
20
20
|
union([
|
|
21
21
|
lineurl,
|
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
import { HTMLEntityParser, UnsafeHTMLEntityParser } from '../inline';
|
|
2
|
+
import { Backtrack } from '../context';
|
|
2
3
|
import { List, Data } from '../../combinator/data/parser';
|
|
3
|
-
import { union,
|
|
4
|
+
import { union, surround, fmap } from '../../combinator';
|
|
5
|
+
import { str } from '../source';
|
|
4
6
|
import { invalid } from '../util';
|
|
5
7
|
import { html } from 'typed-dom/dom';
|
|
6
8
|
|
|
7
|
-
export const unsafehtmlentity: UnsafeHTMLEntityParser =
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
(
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
: new List([new Data(source)]);
|
|
16
|
-
});
|
|
9
|
+
export const unsafehtmlentity: UnsafeHTMLEntityParser = surround(
|
|
10
|
+
str('&'), str(/[0-9A-Za-z]+/y), str(';'),
|
|
11
|
+
false,
|
|
12
|
+
([as, bs, cs]) =>
|
|
13
|
+
new List([new Data(parser(as.head!.value + bs.head!.value + cs.head!.value))]),
|
|
14
|
+
([as, bs]) =>
|
|
15
|
+
new List([new Data(as.head!.value + (bs?.head?.value ?? ''))]),
|
|
16
|
+
[3 | Backtrack.bracket]);
|
|
17
17
|
|
|
18
18
|
export const htmlentity: HTMLEntityParser = fmap(
|
|
19
19
|
union([unsafehtmlentity]),
|
|
@@ -26,11 +26,8 @@ export const htmlentity: HTMLEntityParser = fmap(
|
|
|
26
26
|
}, value))
|
|
27
27
|
]));
|
|
28
28
|
|
|
29
|
-
const parser = (el => (entity: string): string
|
|
29
|
+
const parser = (el => (entity: string): string => {
|
|
30
30
|
if (entity === '
') return ' ';
|
|
31
31
|
el.innerHTML = entity;
|
|
32
|
-
|
|
33
|
-
return entity === text
|
|
34
|
-
? undefined
|
|
35
|
-
: text;
|
|
32
|
+
return el.textContent!;
|
|
36
33
|
})(html('span'));
|
|
@@ -63,6 +63,8 @@ export const ruby: RubyParser = lazy(() => bind(
|
|
|
63
63
|
}
|
|
64
64
|
}));
|
|
65
65
|
|
|
66
|
+
const delimiter = /[$"`\[\](){}<>()[]{}]|\\?\n/y;
|
|
67
|
+
|
|
66
68
|
const text: RubyParser.TextParser = input => {
|
|
67
69
|
const { context } = input;
|
|
68
70
|
const { source } = context;
|
|
@@ -70,11 +72,14 @@ const text: RubyParser.TextParser = input => {
|
|
|
70
72
|
let state = false;
|
|
71
73
|
context.sequential = true;
|
|
72
74
|
for (let { position } = context; position < source.length; position = context.position) {
|
|
73
|
-
|
|
75
|
+
delimiter.lastIndex = position;
|
|
76
|
+
if (delimiter.test(source)) break;
|
|
74
77
|
assert(source[position] !== '\n');
|
|
75
78
|
switch (source[position]) {
|
|
76
79
|
case '&': {
|
|
77
|
-
const result =
|
|
80
|
+
const result = source[position + 1] !== ' '
|
|
81
|
+
? unsafehtmlentity(input) ?? txt(input)!
|
|
82
|
+
: txt(input)!;
|
|
78
83
|
assert(result);
|
|
79
84
|
acc.last!.value += result.head!.value;
|
|
80
85
|
continue;
|
|
@@ -238,7 +238,6 @@ function seek(source: string, position: number): number {
|
|
|
238
238
|
case '@':
|
|
239
239
|
case '#':
|
|
240
240
|
case '$':
|
|
241
|
-
case '&':
|
|
242
241
|
case '"':
|
|
243
242
|
case '`':
|
|
244
243
|
case '[':
|
|
@@ -274,6 +273,9 @@ function seek(source: string, position: number): number {
|
|
|
274
273
|
case ':':
|
|
275
274
|
if (source[i + 1] === '/' && source[i + 2] === '/') return i;
|
|
276
275
|
continue;
|
|
276
|
+
case '&':
|
|
277
|
+
if (source[i + 1] !== ' ') return i;
|
|
278
|
+
continue;
|
|
277
279
|
case ' ':
|
|
278
280
|
case '\t':
|
|
279
281
|
case ' ':
|