securemark 0.294.5 → 0.294.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.294.6
4
+
5
+ - Refactoring.
6
+
3
7
  ## 0.294.5
4
8
 
5
9
  - Refactoring.
package/dist/index.js CHANGED
@@ -1,4 +1,4 @@
1
- /*! securemark v0.294.5 https://github.com/falsandtru/securemark | (c) 2017, falsandtru | UNLICENSED License */
1
+ /*! securemark v0.294.6 https://github.com/falsandtru/securemark | (c) 2017, falsandtru | UNLICENSED License */
2
2
  (function webpackUniversalModuleDefinition(root, factory) {
3
3
  if(typeof exports === 'object' && typeof module === 'object')
4
4
  module.exports = factory(require("Prism"), require("DOMPurify"));
@@ -4397,14 +4397,14 @@ function format(source) {
4397
4397
  return source.replace(/\r\n?/g, '\n');
4398
4398
  }
4399
4399
  function sanitize(source) {
4400
- return source.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]|[\u2006\u200B-\u200F\u202A-\u202F\u2060\uFEFF]|(?<![\u1820\u1821])\u180E/g, UNICODE_REPLACEMENT_CHARACTER).replace(/[\uD800-\uDBFF][\uDC00-\uDFFF]?|[\uDC00-\uDFFF]/g, char => char.length === 1 ? UNICODE_REPLACEMENT_CHARACTER : char);
4400
+ return source.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]|(?!\u200D)[\u2006\u200B-\u200F\u202A-\u202F\u2060\uFEFF]|(?<![\u1820\u1821])\u180E/g, UNICODE_REPLACEMENT_CHARACTER).replace(/[\uD800-\uDBFF][\uDC00-\uDFFF]?|[\uDC00-\uDFFF]/g, char => char.length === 1 ? UNICODE_REPLACEMENT_CHARACTER : char);
4401
4401
  }
4402
4402
  // https://dev.w3.org/html5/html-author/charref
4403
4403
  // https://en.wikipedia.org/wiki/Whitespace_character
4404
4404
  exports.invisibleHTMLEntityNames = ['Tab', 'NewLine', 'NonBreakingSpace', 'nbsp', 'shy', 'ensp', 'emsp', 'emsp13', 'emsp14', 'numsp', 'puncsp', 'ThinSpace', 'thinsp', 'VeryThinSpace', 'hairsp', 'ZeroWidthSpace', 'NegativeVeryThinSpace', 'NegativeThinSpace', 'NegativeMediumSpace', 'NegativeThickSpace', 'zwj', 'zwnj', 'lrm', 'rlm', 'MediumSpace', 'NoBreak', 'ApplyFunction', 'af', 'InvisibleTimes', 'it', 'InvisibleComma', 'ic'];
4405
- const unreadableHTMLEntityNames = exports.invisibleHTMLEntityNames.slice(2);
4406
- const unreadableEscapableCharacters = unreadableHTMLEntityNames.map(name => (0, htmlentity_1.unsafehtmlentity)((0, parser_1.input)(`&${name};`, {})).head.value);
4407
- const unreadableEscapableCharacter = new RegExp(`[${unreadableEscapableCharacters.join('')}]`, 'g');
4405
+ const unreadableEscapeHTMLEntityNames = exports.invisibleHTMLEntityNames.filter(name => !['Tab', 'NewLine', 'NonBreakingSpace', 'nbsp', 'zwj', 'zwnj'].includes(name));
4406
+ const unreadableEscapeCharacters = unreadableEscapeHTMLEntityNames.map(name => (0, htmlentity_1.unsafehtmlentity)((0, parser_1.input)(`&${name};`, {})).head.value);
4407
+ const unreadableEscapeCharacter = new RegExp(`[${unreadableEscapeCharacters.join('')}]`, 'g');
4408
4408
  // https://www.pandanoir.info/entry/2018/03/11/193000
4409
4409
  // http://anti.rosx.net/etc/memo/002_space.html
4410
4410
  // http://nicowiki.com/%E7%A9%BA%E7%99%BD%E3%83%BB%E7%89%B9%E6%AE%8A%E8%A8%98%E5%8F%B7.html
@@ -4416,7 +4416,7 @@ const unreadableSpecialCharacters = (/* unused pure expression or super */ null
4416
4416
  // ZERO WIDTH NON-JOINER
4417
4417
  '\u200C',
4418
4418
  // ZERO WIDTH JOINER
4419
- '\u200D',
4419
+ //'\u200D',
4420
4420
  // LEFT-TO-RIGHT MARK
4421
4421
  '\u200E',
4422
4422
  // RIGHT-TO-LEFT MARK
@@ -4439,7 +4439,7 @@ const unreadableSpecialCharacters = (/* unused pure expression or super */ null
4439
4439
  '\uFEFF']));
4440
4440
  // 特殊不可視文字はエディタおよびソースビューアでは等幅および強調表示により可視化する
4441
4441
  function escape(source) {
4442
- return source.replace(unreadableEscapableCharacter, char => `&${unreadableHTMLEntityNames[unreadableEscapableCharacters.indexOf(char)]};`);
4442
+ return source.replace(unreadableEscapeCharacter, char => `&${unreadableEscapeHTMLEntityNames[unreadableEscapeCharacters.indexOf(char)]};`);
4443
4443
  }
4444
4444
  exports.escape = escape;
4445
4445
 
@@ -6208,7 +6208,7 @@ const account_1 = __webpack_require__(4107);
6208
6208
  const hashtag_1 = __webpack_require__(5764);
6209
6209
  const hashnum_1 = __webpack_require__(8684);
6210
6210
  const anchor_1 = __webpack_require__(8535);
6211
- exports.autolink = (0, combinator_1.lazy)(() => (0, combinator_1.validate)(new RegExp([/(?<![0-9a-z])@/yi.source, /(?<![^\p{C}\p{S}\p{P}\s]|emoji)#/yiu.source, /(?<![0-9a-z])>>/yi.source, /(?<![0-9a-z][.+-]?|[@#])!?[0-9a-z]/yi.source].join('|').replace(/emoji/g, hashtag_1.emoji), 'yiu'), (0, combinator_1.state)(~1 /* State.autolink */, (0, combinator_1.union)([url_1.lineurl, url_1.url, email_1.email,
6211
+ exports.autolink = (0, combinator_1.lazy)(() => (0, combinator_1.validate)(new RegExp([/(?<![0-9a-z])@/yi.source, /(?<![^\p{C}\p{S}\p{P}\s]|emoji)#/yiu.source, /(?<![0-9a-z])>>/yi.source, /(?<![0-9a-z][.+-]?|[@#])!?[0-9a-z]/yi.source].join('|').replace(/emoji/g, hashtag_1.emoji.source), 'yiu'), (0, combinator_1.state)(~1 /* State.autolink */, (0, combinator_1.union)([url_1.lineurl, url_1.url, email_1.email,
6212
6212
  // Escape unmatched email-like strings.
6213
6213
  //str(/[0-9a-z]+(?:[_.+-][0-9a-z]+[:@]?|:|@(?=@))*/yi),
6214
6214
  channel_1.channel, account_1.account,
@@ -6292,7 +6292,7 @@ const hashtag_1 = __webpack_require__(5764);
6292
6292
  const source_1 = __webpack_require__(8745);
6293
6293
  const dom_1 = __webpack_require__(394);
6294
6294
  // https://example/@user?ch=a+b must be a user channel page or a redirect page going there.
6295
- exports.channel = (0, combinator_1.lazy)(() => (0, combinator_1.rewrite)((0, combinator_1.sequence)([(0, combinator_1.surround)(/(?<![0-9a-z])@/yi, (0, source_1.str)(/[0-9a-z](?:(?:[0-9a-z]|-(?=[0-9a-z])){0,61}[0-9a-z])?(?:\.[0-9a-z](?:(?:[0-9a-z]|-(?=[0-9a-z])){0,61}[0-9a-z])?)*\//yi), (0, source_1.str)(/[a-z][0-9a-z]*(?:[-.][0-9a-z]+)*(?![-.]?[0-9a-z@]|>>|:\S)/yi), true, undefined, undefined, [3 | 0 /* Backtrack.autolink */]), (0, combinator_1.some)((0, combinator_1.verify)((0, combinator_1.surround)('#', (0, source_1.str)(new RegExp([/(?!['_])(?:[^\p{C}\p{S}\p{P}\s]|emoji|'(?=[0-9A-Za-z])|_(?=[^\p{C}\p{S}\p{P}\s]|emoji))+/yu.source].join('').replace(/emoji/g, hashtag_1.emoji), 'yu')), (0, source_1.str)(new RegExp([/(?![0-9a-z@]|>>|:\S|[^\p{C}\p{S}\p{P}\s]|emoji)/yu.source].join('').replace(/emoji/g, hashtag_1.emoji), 'yu')), false, undefined, undefined, [3 | 0 /* Backtrack.autolink */]), ([{
6295
+ exports.channel = (0, combinator_1.lazy)(() => (0, combinator_1.rewrite)((0, combinator_1.sequence)([(0, combinator_1.surround)(/(?<![0-9a-z])@/yi, (0, source_1.str)(/[0-9a-z](?:(?:[0-9a-z]|-(?=[0-9a-z])){0,61}[0-9a-z])?(?:\.[0-9a-z](?:(?:[0-9a-z]|-(?=[0-9a-z])){0,61}[0-9a-z])?)*\//yi), (0, source_1.str)(/[a-z][0-9a-z]*(?:[-.][0-9a-z]+)*(?![-.]?[0-9a-z@]|>>|:\S)/yi), true, undefined, undefined, [3 | 0 /* Backtrack.autolink */]), (0, combinator_1.some)((0, combinator_1.verify)((0, combinator_1.surround)('#', (0, source_1.str)(new RegExp([/(?!['_])(?:[^\p{C}\p{S}\p{P}\s]|emoji|'(?=[0-9A-Za-z])|_(?=[^\p{C}\p{S}\p{P}\s]|emoji))+/yu.source].join('').replace(/emoji/g, hashtag_1.emoji.source), 'yu')), (0, source_1.str)(new RegExp([/(?![0-9a-z@]|>>|:\S|[^\p{C}\p{S}\p{P}\s]|emoji)/yu.source].join('').replace(/emoji/g, hashtag_1.emoji.source), 'yu')), false, undefined, undefined, [3 | 0 /* Backtrack.autolink */]), ([{
6296
6296
  value
6297
6297
  }]) => !/^[0-9]{1,4}$|^[0-9]{5}/.test(value)))]), (0, combinator_1.constraint)(1 /* State.autolink */, (0, combinator_1.state)(1 /* State.autolink */, (0, combinator_1.fmap)((0, combinator_1.convert)(source => `[${source}]{ ${source.includes('/') ? `https://${source.slice(1, source.indexOf('#')).replace('/', '/@')}` : `/${source.slice(0, source.indexOf('#'))}`} }`, (0, combinator_1.union)([link_1.unsafelink]), false), ([{
6298
6298
  value: el
@@ -6355,7 +6355,7 @@ const link_1 = __webpack_require__(3628);
6355
6355
  const hashtag_1 = __webpack_require__(5764);
6356
6356
  const source_1 = __webpack_require__(8745);
6357
6357
  const dom_1 = __webpack_require__(394);
6358
- exports.hashnum = (0, combinator_1.lazy)(() => (0, combinator_1.rewrite)((0, combinator_1.open)(new RegExp([/(?<![^\p{C}\p{S}\p{P}\s]|emoji)#/yiu.source].join('').replace(/emoji/g, hashtag_1.emoji), 'yu'), (0, source_1.str)(new RegExp([/[0-9]{1,9}(?![0-9a-z@#]|>>|:\S|[^\p{C}\p{S}\p{P}\s]|emoji)/yu.source].join('').replace(/emoji/g, hashtag_1.emoji), 'yu')), false, [1 | 0 /* Backtrack.autolink */]), (0, combinator_1.constraint)(1 /* State.autolink */, (0, combinator_1.state)(1 /* State.autolink */, (0, combinator_1.fmap)((0, combinator_1.convert)(source => `[${source}]{ ${source.slice(1)} }`, (0, combinator_1.union)([link_1.unsafelink]), false), ([{
6358
+ exports.hashnum = (0, combinator_1.lazy)(() => (0, combinator_1.rewrite)((0, combinator_1.open)(new RegExp([/(?<![^\p{C}\p{S}\p{P}\s]|emoji)#/yiu.source].join('').replace(/emoji/g, hashtag_1.emoji.source), 'yu'), (0, source_1.str)(new RegExp([/[0-9]{1,9}(?![0-9a-z@#]|>>|:\S|[^\p{C}\p{S}\p{P}\s]|emoji)/yu.source].join('').replace(/emoji/g, hashtag_1.emoji.source), 'yu')), false, [1 | 0 /* Backtrack.autolink */]), (0, combinator_1.constraint)(1 /* State.autolink */, (0, combinator_1.state)(1 /* State.autolink */, (0, combinator_1.fmap)((0, combinator_1.convert)(source => `[${source}]{ ${source.slice(1)} }`, (0, combinator_1.union)([link_1.unsafelink]), false), ([{
6359
6359
  value
6360
6360
  }]) => new parser_1.List([new parser_1.Data((0, dom_1.define)(value, {
6361
6361
  class: 'hashnum',
@@ -6381,8 +6381,8 @@ const source_1 = __webpack_require__(8745);
6381
6381
  const dom_1 = __webpack_require__(394);
6382
6382
  // https://example/hashtags/a must be a hashtag page or a redirect page going there.
6383
6383
  // https://github.com/tc39/proposal-regexp-unicode-property-escapes#matching-emoji
6384
- exports.emoji = String.raw`\p{Emoji_Modifier_Base}\p{Emoji_Modifier}?|\p{Emoji_Presentation}|\p{Emoji}\uFE0F`;
6385
- exports.hashtag = (0, combinator_1.lazy)(() => (0, combinator_1.rewrite)((0, combinator_1.verify)((0, combinator_1.surround)(new RegExp([/(?<![^\p{C}\p{S}\p{P}\s]|emoji)#/yiu.source].join('').replace(/emoji/g, exports.emoji), 'yu'), (0, source_1.str)(new RegExp([/(?!['_])(?:[^\p{C}\p{S}\p{P}\s]|emoji|'(?=[0-9A-Za-z])|_(?=[^\p{C}\p{S}\p{P}\s]|emoji))+/yu.source].join('').replace(/emoji/g, exports.emoji), 'yu')), (0, source_1.str)(new RegExp([/(?![0-9a-z@#]|>>|:\S|[^\p{C}\p{S}\p{P}\s]|emoji)/yu.source].join('').replace(/emoji/g, exports.emoji), 'yu')), false, undefined, undefined, [3 | 0 /* Backtrack.autolink */]), ([{
6384
+ exports.emoji = /\p{Emoji_Modifier_Base}\p{Emoji_Modifier}?|\p{Emoji_Presentation}|\p{Emoji}\uFE0F|\u200D/u;
6385
+ exports.hashtag = (0, combinator_1.lazy)(() => (0, combinator_1.rewrite)((0, combinator_1.verify)((0, combinator_1.surround)(new RegExp([/(?<![^\p{C}\p{S}\p{P}\s]|emoji)#/yiu.source].join('').replace(/emoji/g, exports.emoji.source), 'yu'), (0, source_1.str)(new RegExp([/(?!['_])(?:[^\p{C}\p{S}\p{P}\s]|emoji|'(?=[0-9A-Za-z])|_(?=[^\p{C}\p{S}\p{P}\s]|emoji))+/yu.source].join('').replace(/emoji/g, exports.emoji.source), 'yu')), (0, source_1.str)(new RegExp([/(?![0-9a-z@#]|>>|:\S|[^\p{C}\p{S}\p{P}\s]|emoji)/yu.source].join('').replace(/emoji/g, exports.emoji.source), 'yu')), false, undefined, undefined, [3 | 0 /* Backtrack.autolink */]), ([{
6386
6386
  value
6387
6387
  }]) => !/^[0-9]{1,4}$|^[0-9]{5}/.test(value)), (0, combinator_1.constraint)(1 /* State.autolink */, (0, combinator_1.state)(1 /* State.autolink */, (0, combinator_1.fmap)((0, combinator_1.convert)(source => `[${source}]{ ${`/hashtags/${source.slice(1)}`} }`, (0, combinator_1.union)([link_1.unsafelink]), false), ([{
6388
6388
  value
@@ -7140,19 +7140,10 @@ Object.defineProperty(exports, "__esModule", ({
7140
7140
  exports.htmlentity = exports.unsafehtmlentity = void 0;
7141
7141
  const parser_1 = __webpack_require__(605);
7142
7142
  const combinator_1 = __webpack_require__(3484);
7143
+ const source_1 = __webpack_require__(8745);
7143
7144
  const util_1 = __webpack_require__(4992);
7144
7145
  const dom_1 = __webpack_require__(394);
7145
- exports.unsafehtmlentity = (0, combinator_1.focus)(/&(?:[0-9A-Za-z]+;?)?/y,
7146
- //({ source }) => [[parser(source) ?? `${Command.Error}${source}`], '']));
7147
- ({
7148
- context
7149
- }) => {
7150
- const {
7151
- source
7152
- } = context;
7153
- context.position += source.length;
7154
- return source.length > 1 && source.at(-1) === ';' ? new parser_1.List([new parser_1.Data(parser(source) ?? source)]) : new parser_1.List([new parser_1.Data(source)]);
7155
- });
7146
+ exports.unsafehtmlentity = (0, combinator_1.surround)((0, source_1.str)('&'), (0, source_1.str)(/[0-9A-Za-z]+/y), (0, source_1.str)(';'), false, ([as, bs, cs]) => new parser_1.List([new parser_1.Data(parser(as.head.value + bs.head.value + cs.head.value))]), ([as, bs]) => new parser_1.List([new parser_1.Data(as.head.value + (bs?.head?.value ?? ''))]), [3 | 64 /* Backtrack.bracket */]);
7156
7147
  exports.htmlentity = (0, combinator_1.fmap)((0, combinator_1.union)([exports.unsafehtmlentity]), ([{
7157
7148
  value
7158
7149
  }]) => new parser_1.List([length === 1 || value.at(-1) !== ';' ? new parser_1.Data(value) : new parser_1.Data((0, dom_1.html)('span', {
@@ -7162,8 +7153,7 @@ exports.htmlentity = (0, combinator_1.fmap)((0, combinator_1.union)([exports.uns
7162
7153
  const parser = (el => entity => {
7163
7154
  if (entity === '&NewLine;') return ' ';
7164
7155
  el.innerHTML = entity;
7165
- const text = el.textContent;
7166
- return entity === text ? undefined : text;
7156
+ return el.textContent;
7167
7157
  })((0, dom_1.html)('span'));
7168
7158
 
7169
7159
  /***/ },
@@ -7772,6 +7762,7 @@ exports.ruby = (0, combinator_1.lazy)(() => (0, combinator_1.bind)((0, combinato
7772
7762
  }, acc) => value + ' ' + acc, '').trim())), new parser_1.Data((0, dom_1.html)('rp', ')'))])))))]);
7773
7763
  }
7774
7764
  }));
7765
+ const delimiter = /[$"`\[\](){}<>()[]{}]|\\?\n/y;
7775
7766
  const text = input => {
7776
7767
  const {
7777
7768
  context
@@ -7785,11 +7776,12 @@ const text = input => {
7785
7776
  for (let {
7786
7777
  position
7787
7778
  } = context; position < source.length; position = context.position) {
7788
- if (/[$"`\[\](){}<>()[]{}]|\\?\n/yi.test(source.slice(position, position + 2))) break;
7779
+ delimiter.lastIndex = position;
7780
+ if (delimiter.test(source)) break;
7789
7781
  switch (source[position]) {
7790
7782
  case '&':
7791
7783
  {
7792
- const result = (0, htmlentity_1.unsafehtmlentity)(input) ?? (0, source_1.txt)(input);
7784
+ const result = source[position + 1] !== ' ' ? (0, htmlentity_1.unsafehtmlentity)(input) ?? (0, source_1.txt)(input) : (0, source_1.txt)(input);
7793
7785
  acc.last.value += result.head.value;
7794
7786
  continue;
7795
7787
  }
@@ -8724,7 +8716,6 @@ function seek(source, position) {
8724
8716
  case '@':
8725
8717
  case '#':
8726
8718
  case '$':
8727
- case '&':
8728
8719
  case '"':
8729
8720
  case '`':
8730
8721
  case '[':
@@ -8760,6 +8751,9 @@ function seek(source, position) {
8760
8751
  case ':':
8761
8752
  if (source[i + 1] === '/' && source[i + 2] === '/') return i;
8762
8753
  continue;
8754
+ case '&':
8755
+ if (source[i + 1] !== ' ') return i;
8756
+ continue;
8763
8757
  case ' ':
8764
8758
  case '\t':
8765
8759
  case ' ':
package/markdown.d.ts CHANGED
@@ -1057,7 +1057,9 @@ export namespace MarkdownParser {
1057
1057
  export interface UnsafeHTMLEntityParser extends
1058
1058
  // &copy;
1059
1059
  Inline<'unsafehtmlentity'>,
1060
- Parser<string, Context, []> {
1060
+ Parser<string, Context, [
1061
+ SourceParser.StrParser,
1062
+ ]> {
1061
1063
  }
1062
1064
  export interface ShortMediaParser extends
1063
1065
  // !https://host
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "securemark",
3
- "version": "0.294.5",
3
+ "version": "0.294.6",
4
4
  "description": "Secure markdown renderer working on browsers for user input data.",
5
5
  "private": false,
6
6
  "homepage": "https://github.com/falsandtru/securemark",
@@ -39,8 +39,11 @@ export interface CtxOptions {
39
39
  // 区間別テーブルは固定サイズであるためプールして再使用できる。
40
40
  // 従って分割時のデータ構造は区間ごとに探索木を動的に生成しデータ数に応じてテーブルに移行するのが最も効率的である。
41
41
  // これにより最悪時間計算量線形化に要する最悪空間計算量が+1nに局限される。
42
+ // またはテーブルの参照が高速なら変換せず併用してもよい。
42
43
  // 木とテーブルいずれにおいてもバックトラックデータとオーバーヘッドを合わせた追加データサイズの最大値は
43
44
  // セグメントサイズに制約されるため入力サイズに対する最大追加データサイズの平均比率はかなり小さくなる。
45
+ // 必要なテーブルの最大サイズは最大セグメントサイズであるため最大追加データサイズは入力サイズにかかわらず
46
+ // 10KB*並列数に留まり最大数百文字以下の短文ならば数百byte*並列数となる。
44
47
  //
45
48
  // 1. データ数が規定数を超えたら区間テーブルを生成しデータを振り分ける。
46
49
  // - 子ノードのポインタだけ保持するとしても1ノード複数データ保持で圧縮できるかは微妙。
@@ -67,12 +67,20 @@ describe('Unit: parser/normalize', () => {
67
67
  assert(normalize('\x01---\na: b\x01\n---\n\n!> \x01---\na: b\x01\n---') === '\uFFFD---\na: b\uFFFD\n---\n\n!> \uFFFD---\na: b\uFFFD\n---');
68
68
  });
69
69
 
70
+ it('emoji', () => {
71
+ assert(normalize('😀') === '😀');
72
+ assert(normalize('🤚🏽') === '🤚🏽');
73
+ assert(normalize('👨‍👩‍👧') === '👨‍👩‍👧');
74
+ assert(normalize('🇺🇳') === '🇺🇳');
75
+ assert(normalize('#️⃣*️⃣0️⃣1️⃣2️⃣3️⃣4️⃣5️⃣6️⃣7️⃣8️⃣9️⃣') === '#️⃣*️⃣0️⃣1️⃣2️⃣3️⃣4️⃣5️⃣6️⃣7️⃣8️⃣9️⃣');
76
+ });
77
+
70
78
  });
71
79
 
72
80
  describe('escape', () => {
73
81
  it('', () => {
74
82
  assert(escape('\u200B') === '&ZeroWidthSpace;');
75
- assert(escape('\u200D') === '&zwj;');
83
+ assert(escape('\u200F') === '&rlm;');
76
84
  });
77
85
 
78
86
  });
@@ -15,7 +15,7 @@ function format(source: string): string {
15
15
 
16
16
  function sanitize(source: string): string {
17
17
  return source
18
- .replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]|[\u2006\u200B-\u200F\u202A-\u202F\u2060\uFEFF]|(?<![\u1820\u1821])\u180E/g, UNICODE_REPLACEMENT_CHARACTER)
18
+ .replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]|(?!\u200D)[\u2006\u200B-\u200F\u202A-\u202F\u2060\uFEFF]|(?<![\u1820\u1821])\u180E/g, UNICODE_REPLACEMENT_CHARACTER)
19
19
  .replace(/[\uD800-\uDBFF][\uDC00-\uDFFF]?|[\uDC00-\uDFFF]/g, char =>
20
20
  char.length === 1
21
21
  ? UNICODE_REPLACEMENT_CHARACTER
@@ -58,13 +58,20 @@ export const invisibleHTMLEntityNames = [
58
58
  'InvisibleComma',
59
59
  'ic',
60
60
  ] as const;
61
- const unreadableHTMLEntityNames: readonly string[] = invisibleHTMLEntityNames.slice(2);
62
- const unreadableEscapableCharacters = unreadableHTMLEntityNames
61
+ const unreadableEscapeHTMLEntityNames = invisibleHTMLEntityNames.filter(name => ![
62
+ 'Tab',
63
+ 'NewLine',
64
+ 'NonBreakingSpace',
65
+ 'nbsp',
66
+ 'zwj',
67
+ 'zwnj',
68
+ ].includes(name));
69
+ const unreadableEscapeCharacters = unreadableEscapeHTMLEntityNames
63
70
  .map(name => unsafehtmlentity(input(`&${name};`, {}))!.head!.value);
64
- assert(unreadableEscapableCharacters.length === unreadableHTMLEntityNames.length);
65
- assert(unreadableEscapableCharacters.every(c => c.length === 1));
66
- const unreadableEscapableCharacter = new RegExp(`[${unreadableEscapableCharacters.join('')}]`, 'g');
67
- assert(!unreadableEscapableCharacter.source.includes('&'));
71
+ assert(unreadableEscapeCharacters.length === unreadableEscapeHTMLEntityNames.length);
72
+ assert(unreadableEscapeCharacters.every(c => c.length === 1));
73
+ const unreadableEscapeCharacter = new RegExp(`[${unreadableEscapeCharacters.join('')}]`, 'g');
74
+ assert(!unreadableEscapeCharacter.source.includes('&'));
68
75
 
69
76
  // https://www.pandanoir.info/entry/2018/03/11/193000
70
77
  // http://anti.rosx.net/etc/memo/002_space.html
@@ -77,7 +84,7 @@ const unreadableSpecialCharacters = [
77
84
  // ZERO WIDTH NON-JOINER
78
85
  '\u200C',
79
86
  // ZERO WIDTH JOINER
80
- '\u200D',
87
+ //'\u200D',
81
88
  // LEFT-TO-RIGHT MARK
82
89
  '\u200E',
83
90
  // RIGHT-TO-LEFT MARK
@@ -104,6 +111,6 @@ assert(unreadableSpecialCharacters.every(c => sanitize(c) === UNICODE_REPLACEMEN
104
111
  // 特殊不可視文字はエディタおよびソースビューアでは等幅および強調表示により可視化する
105
112
  export function escape(source: string): string {
106
113
  return source
107
- .replace(unreadableEscapableCharacter, char =>
108
- `&${unreadableHTMLEntityNames[unreadableEscapableCharacters.indexOf(char)]};`);
114
+ .replace(unreadableEscapeCharacter, char =>
115
+ `&${unreadableEscapeHTMLEntityNames[unreadableEscapeCharacters.indexOf(char)]};`);
109
116
  }
@@ -21,10 +21,10 @@ export const channel: AutolinkParser.ChannelParser = lazy(() => rewrite(
21
21
  '#',
22
22
  str(new RegExp([
23
23
  /(?!['_])(?:[^\p{C}\p{S}\p{P}\s]|emoji|'(?=[0-9A-Za-z])|_(?=[^\p{C}\p{S}\p{P}\s]|emoji))+/yu.source,
24
- ].join('').replace(/emoji/g, emoji), 'yu')),
24
+ ].join('').replace(/emoji/g, emoji.source), 'yu')),
25
25
  str(new RegExp([
26
26
  /(?![0-9a-z@]|>>|:\S|[^\p{C}\p{S}\p{P}\s]|emoji)/yu.source,
27
- ].join('').replace(/emoji/g, emoji), 'yu')),
27
+ ].join('').replace(/emoji/g, emoji.source), 'yu')),
28
28
  false, undefined, undefined,
29
29
  [3 | Backtrack.autolink]),
30
30
  ([{ value }]) => !/^[0-9]{1,4}$|^[0-9]{5}/.test(value as string))),
@@ -11,10 +11,10 @@ export const hashnum: AutolinkParser.HashnumParser = lazy(() => rewrite(
11
11
  open(
12
12
  new RegExp([
13
13
  /(?<![^\p{C}\p{S}\p{P}\s]|emoji)#/yiu.source,
14
- ].join('').replace(/emoji/g, emoji), 'yu'),
14
+ ].join('').replace(/emoji/g, emoji.source), 'yu'),
15
15
  str(new RegExp([
16
16
  /[0-9]{1,9}(?![0-9a-z@#]|>>|:\S|[^\p{C}\p{S}\p{P}\s]|emoji)/yu.source,
17
- ].join('').replace(/emoji/g, emoji), 'yu')),
17
+ ].join('').replace(/emoji/g, emoji.source), 'yu')),
18
18
  false,
19
19
  [1 | Backtrack.autolink]),
20
20
  constraint(State.autolink, state(State.autolink, fmap(convert(
@@ -54,10 +54,14 @@ describe('Unit: parser/inline/autolink/hashtag', () => {
54
54
  assert.deepStrictEqual(inspect(parser('#a_b'), ctx), [['<a class="hashtag" href="/hashtags/a_b">#a_b</a>'], '']);
55
55
  assert.deepStrictEqual(inspect(parser('#a__b'), ctx), [['<a class="hashtag" href="/hashtags/a">#a</a>'], '__b']);
56
56
  assert.deepStrictEqual(inspect(parser('#あ'), ctx), [['<a class="hashtag" href="/hashtags/あ">#あ</a>'], '']);
57
- assert.deepStrictEqual(inspect(parser('#👩'), ctx), [['<a class="hashtag" href="/hashtags/👩">#👩</a>'], '']);
57
+ assert.deepStrictEqual(inspect(parser('#😀'), ctx), [['<a class="hashtag" href="/hashtags/😀">#😀</a>'], '']);
58
+ assert.deepStrictEqual(inspect(parser('#🤚🏽'), ctx), [['<a class="hashtag" href="/hashtags/🤚🏽">#🤚🏽</a>'], '']);
59
+ assert.deepStrictEqual(inspect(parser('#👨‍👩‍👧'), ctx), [['<a class="hashtag" href="/hashtags/👨‍👩‍👧">#👨‍👩‍👧</a>'], '']);
60
+ assert.deepStrictEqual(inspect(parser('#🇺🇳'), ctx), [['<a class="hashtag" href="/hashtags/🇺🇳">#🇺🇳</a>'], '']);
61
+ assert.deepStrictEqual(inspect(parser('##️⃣*️⃣0️⃣1️⃣2️⃣3️⃣4️⃣5️⃣6️⃣7️⃣8️⃣9️⃣'), ctx), [['<a class="hashtag" href="/hashtags/#️⃣*️⃣0️⃣1️⃣2️⃣3️⃣4️⃣5️⃣6️⃣7️⃣8️⃣9️⃣">##️⃣*️⃣0️⃣1️⃣2️⃣3️⃣4️⃣5️⃣6️⃣7️⃣8️⃣9️⃣</a>'], '']);
58
62
  assert.deepStrictEqual(inspect(parser('#1a'), ctx), [['<a class="hashtag" href="/hashtags/1a">#1a</a>'], '']);
59
63
  assert.deepStrictEqual(inspect(parser('#1あ'), ctx), [['<a class="hashtag" href="/hashtags/1あ">#1あ</a>'], '']);
60
- assert.deepStrictEqual(inspect(parser('#1👩'), ctx), [['<a class="hashtag" href="/hashtags/1👩">#1👩</a>'], '']);
64
+ assert.deepStrictEqual(inspect(parser('#1😀'), ctx), [['<a class="hashtag" href="/hashtags/1😀">#1😀</a>'], '']);
61
65
  assert.deepStrictEqual(inspect(parser(`#a'`), ctx), [[`<a class="hashtag" href="/hashtags/a">#a</a>`], `'`]);
62
66
  assert.deepStrictEqual(inspect(parser(`#a''`), ctx), [[`<a class="hashtag" href="/hashtags/a">#a</a>`], `''`]);
63
67
  assert.deepStrictEqual(inspect(parser(`#a'b`), ctx), [[`<a class="hashtag" href="/hashtags/a'b">#a'b</a>`], '']);
@@ -9,19 +9,19 @@ import { define } from 'typed-dom/dom';
9
9
  // https://example/hashtags/a must be a hashtag page or a redirect page going there.
10
10
 
11
11
  // https://github.com/tc39/proposal-regexp-unicode-property-escapes#matching-emoji
12
- export const emoji = String.raw`\p{Emoji_Modifier_Base}\p{Emoji_Modifier}?|\p{Emoji_Presentation}|\p{Emoji}\uFE0F`;
12
+ export const emoji = /\p{Emoji_Modifier_Base}\p{Emoji_Modifier}?|\p{Emoji_Presentation}|\p{Emoji}\uFE0F|\u200D/u;
13
13
 
14
14
  export const hashtag: AutolinkParser.HashtagParser = lazy(() => rewrite(
15
15
  verify(surround(
16
16
  new RegExp([
17
17
  /(?<![^\p{C}\p{S}\p{P}\s]|emoji)#/yiu.source,
18
- ].join('').replace(/emoji/g, emoji), 'yu'),
18
+ ].join('').replace(/emoji/g, emoji.source), 'yu'),
19
19
  str(new RegExp([
20
20
  /(?!['_])(?:[^\p{C}\p{S}\p{P}\s]|emoji|'(?=[0-9A-Za-z])|_(?=[^\p{C}\p{S}\p{P}\s]|emoji))+/yu.source,
21
- ].join('').replace(/emoji/g, emoji), 'yu')),
21
+ ].join('').replace(/emoji/g, emoji.source), 'yu')),
22
22
  str(new RegExp([
23
23
  /(?![0-9a-z@#]|>>|:\S|[^\p{C}\p{S}\p{P}\s]|emoji)/yu.source,
24
- ].join('').replace(/emoji/g, emoji), 'yu')),
24
+ ].join('').replace(/emoji/g, emoji.source), 'yu')),
25
25
  false, undefined, undefined,
26
26
  [3 | Backtrack.autolink]),
27
27
  ([{ value }]) => !/^[0-9]{1,4}$|^[0-9]{5}/.test(value)),
@@ -15,7 +15,7 @@ export const autolink: AutolinkParser = lazy(() =>
15
15
  /(?<![^\p{C}\p{S}\p{P}\s]|emoji)#/yiu.source,
16
16
  /(?<![0-9a-z])>>/yi.source,
17
17
  /(?<![0-9a-z][.+-]?|[@#])!?[0-9a-z]/yi.source,
18
- ].join('|').replace(/emoji/g, emoji), 'yiu'),
18
+ ].join('|').replace(/emoji/g, emoji.source), 'yiu'),
19
19
  state(~State.autolink,
20
20
  union([
21
21
  lineurl,
@@ -1,19 +1,19 @@
1
1
  import { HTMLEntityParser, UnsafeHTMLEntityParser } from '../inline';
2
+ import { Backtrack } from '../context';
2
3
  import { List, Data } from '../../combinator/data/parser';
3
- import { union, focus, fmap } from '../../combinator';
4
+ import { union, surround, fmap } from '../../combinator';
5
+ import { str } from '../source';
4
6
  import { invalid } from '../util';
5
7
  import { html } from 'typed-dom/dom';
6
8
 
7
- export const unsafehtmlentity: UnsafeHTMLEntityParser = focus(
8
- /&(?:[0-9A-Za-z]+;?)?/y,
9
- //({ source }) => [[parser(source) ?? `${Command.Error}${source}`], '']));
10
- ({ context }) => {
11
- const { source } = context;
12
- context.position += source.length;
13
- return source.length > 1 && source.at(-1) === ';'
14
- ? new List([new Data(parser(source) ?? source)])
15
- : new List([new Data(source)]);
16
- });
9
+ export const unsafehtmlentity: UnsafeHTMLEntityParser = surround(
10
+ str('&'), str(/[0-9A-Za-z]+/y), str(';'),
11
+ false,
12
+ ([as, bs, cs]) =>
13
+ new List([new Data(parser(as.head!.value + bs.head!.value + cs.head!.value))]),
14
+ ([as, bs]) =>
15
+ new List([new Data(as.head!.value + (bs?.head?.value ?? ''))]),
16
+ [3 | Backtrack.bracket]);
17
17
 
18
18
  export const htmlentity: HTMLEntityParser = fmap(
19
19
  union([unsafehtmlentity]),
@@ -26,11 +26,8 @@ export const htmlentity: HTMLEntityParser = fmap(
26
26
  }, value))
27
27
  ]));
28
28
 
29
- const parser = (el => (entity: string): string | undefined => {
29
+ const parser = (el => (entity: string): string => {
30
30
  if (entity === '&NewLine;') return ' ';
31
31
  el.innerHTML = entity;
32
- const text = el.textContent!;
33
- return entity === text
34
- ? undefined
35
- : text;
32
+ return el.textContent!;
36
33
  })(html('span'));
@@ -63,6 +63,8 @@ export const ruby: RubyParser = lazy(() => bind(
63
63
  }
64
64
  }));
65
65
 
66
+ const delimiter = /[$"`\[\](){}<>()[]{}]|\\?\n/y;
67
+
66
68
  const text: RubyParser.TextParser = input => {
67
69
  const { context } = input;
68
70
  const { source } = context;
@@ -70,11 +72,14 @@ const text: RubyParser.TextParser = input => {
70
72
  let state = false;
71
73
  context.sequential = true;
72
74
  for (let { position } = context; position < source.length; position = context.position) {
73
- if (/[$"`\[\](){}<>()[]{}]|\\?\n/yi.test(source.slice(position, position + 2))) break;
75
+ delimiter.lastIndex = position;
76
+ if (delimiter.test(source)) break;
74
77
  assert(source[position] !== '\n');
75
78
  switch (source[position]) {
76
79
  case '&': {
77
- const result = unsafehtmlentity(input) ?? txt(input)!;
80
+ const result = source[position + 1] !== ' '
81
+ ? unsafehtmlentity(input) ?? txt(input)!
82
+ : txt(input)!;
78
83
  assert(result);
79
84
  acc.last!.value += result.head!.value;
80
85
  continue;
@@ -238,7 +238,6 @@ function seek(source: string, position: number): number {
238
238
  case '@':
239
239
  case '#':
240
240
  case '$':
241
- case '&':
242
241
  case '"':
243
242
  case '`':
244
243
  case '[':
@@ -274,6 +273,9 @@ function seek(source: string, position: number): number {
274
273
  case ':':
275
274
  if (source[i + 1] === '/' && source[i + 2] === '/') return i;
276
275
  continue;
276
+ case '&':
277
+ if (source[i + 1] !== ' ') return i;
278
+ continue;
277
279
  case ' ':
278
280
  case '\t':
279
281
  case ' ':