npm - securemark - Versions diffs - 0.294.5 → 0.294.6 - Mend

securemark 0.294.5 → 0.294.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/CHANGELOG.md +4 -0
package/dist/index.js +22 -28
package/markdown.d.ts +3 -1
package/package.json +1 -1
package/src/combinator/data/parser.ts +3 -0
package/src/parser/api/normalize.test.ts +9 -1
package/src/parser/api/normalize.ts +17 -10
package/src/parser/inline/autolink/channel.ts +2 -2
package/src/parser/inline/autolink/hashnum.ts +2 -2
package/src/parser/inline/autolink/hashtag.test.ts +6 -2
package/src/parser/inline/autolink/hashtag.ts +4 -4
package/src/parser/inline/autolink.ts +1 -1
package/src/parser/inline/htmlentity.ts +13 -16
package/src/parser/inline/ruby.ts +7 -2
package/src/parser/source/text.ts +3 -1

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,9 @@
 # Changelog
+## 0.294.6
+- Refactoring.
 ## 0.294.5
 - Refactoring.

package/dist/index.js CHANGED Viewed

@@ -1,4 +1,4 @@
-/*! securemark v0.294.5 https://github.com/falsandtru/securemark | (c) 2017, falsandtru | UNLICENSED License */
+/*! securemark v0.294.6 https://github.com/falsandtru/securemark | (c) 2017, falsandtru | UNLICENSED License */
 (function webpackUniversalModuleDefinition(root, factory) {
 	if(typeof exports === 'object' && typeof module === 'object')
 		module.exports = factory(require("Prism"), require("DOMPurify"));
@@ -4397,14 +4397,14 @@ function format(source) {
   return source.replace(/\r\n?/g, '\n');
 }
 function sanitize(source) {
-  return source.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]|[\u2006\u200B-\u200F\u202A-\u202F\u2060\uFEFF]|(?<![\u1820\u1821])\u180E/g, UNICODE_REPLACEMENT_CHARACTER).replace(/[\uD800-\uDBFF][\uDC00-\uDFFF]?|[\uDC00-\uDFFF]/g, char => char.length === 1 ? UNICODE_REPLACEMENT_CHARACTER : char);
+  return source.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]|(?!\u200D)[\u2006\u200B-\u200F\u202A-\u202F\u2060\uFEFF]|(?<![\u1820\u1821])\u180E/g, UNICODE_REPLACEMENT_CHARACTER).replace(/[\uD800-\uDBFF][\uDC00-\uDFFF]?|[\uDC00-\uDFFF]/g, char => char.length === 1 ? UNICODE_REPLACEMENT_CHARACTER : char);
 }
 // https://dev.w3.org/html5/html-author/charref
 // https://en.wikipedia.org/wiki/Whitespace_character
 exports.invisibleHTMLEntityNames = ['Tab', 'NewLine', 'NonBreakingSpace', 'nbsp', 'shy', 'ensp', 'emsp', 'emsp13', 'emsp14', 'numsp', 'puncsp', 'ThinSpace', 'thinsp', 'VeryThinSpace', 'hairsp', 'ZeroWidthSpace', 'NegativeVeryThinSpace', 'NegativeThinSpace', 'NegativeMediumSpace', 'NegativeThickSpace', 'zwj', 'zwnj', 'lrm', 'rlm', 'MediumSpace', 'NoBreak', 'ApplyFunction', 'af', 'InvisibleTimes', 'it', 'InvisibleComma', 'ic'];
-const unreadableHTMLEntityNames = exports.invisibleHTMLEntityNames.slice(2);
-const unreadableEscapableCharacters = unreadableHTMLEntityNames.map(name => (0, htmlentity_1.unsafehtmlentity)((0, parser_1.input)(`&${name};`, {})).head.value);
-const unreadableEscapableCharacter = new RegExp(`[${unreadableEscapableCharacters.join('')}]`, 'g');
+const unreadableEscapeHTMLEntityNames = exports.invisibleHTMLEntityNames.filter(name => !['Tab', 'NewLine', 'NonBreakingSpace', 'nbsp', 'zwj', 'zwnj'].includes(name));
+const unreadableEscapeCharacters = unreadableEscapeHTMLEntityNames.map(name => (0, htmlentity_1.unsafehtmlentity)((0, parser_1.input)(`&${name};`, {})).head.value);
+const unreadableEscapeCharacter = new RegExp(`[${unreadableEscapeCharacters.join('')}]`, 'g');
 // https://www.pandanoir.info/entry/2018/03/11/193000
 // http://anti.rosx.net/etc/memo/002_space.html
 // http://nicowiki.com/%E7%A9%BA%E7%99%BD%E3%83%BB%E7%89%B9%E6%AE%8A%E8%A8%98%E5%8F%B7.html
@@ -4416,7 +4416,7 @@ const unreadableSpecialCharacters = (/* unused pure expression or super */ null
 // ZERO WIDTH NON-JOINER
 '\u200C',
 // ZERO WIDTH JOINER
-'\u200D',
+//'\u200D',
 // LEFT-TO-RIGHT MARK
 '\u200E',
 // RIGHT-TO-LEFT MARK
@@ -4439,7 +4439,7 @@ const unreadableSpecialCharacters = (/* unused pure expression or super */ null
 '\uFEFF']));
 // 特殊不可視文字はエディタおよびソースビューアでは等幅および強調表示により可視化する
 function escape(source) {
-  return source.replace(unreadableEscapableCharacter, char => `&${unreadableHTMLEntityNames[unreadableEscapableCharacters.indexOf(char)]};`);
+  return source.replace(unreadableEscapeCharacter, char => `&${unreadableEscapeHTMLEntityNames[unreadableEscapeCharacters.indexOf(char)]};`);
 }
 exports.escape = escape;
@@ -6208,7 +6208,7 @@ const account_1 = __webpack_require__(4107);
 const hashtag_1 = __webpack_require__(5764);
 const hashnum_1 = __webpack_require__(8684);
 const anchor_1 = __webpack_require__(8535);
-exports.autolink = (0, combinator_1.lazy)(() => (0, combinator_1.validate)(new RegExp([/(?<![0-9a-z])@/yi.source, /(?<![^\p{C}\p{S}\p{P}\s]|emoji)#/yiu.source, /(?<![0-9a-z])>>/yi.source, /(?<![0-9a-z][.+-]?|[@#])!?[0-9a-z]/yi.source].join('|').replace(/emoji/g, hashtag_1.emoji), 'yiu'), (0, combinator_1.state)(~1 /* State.autolink */, (0, combinator_1.union)([url_1.lineurl, url_1.url, email_1.email,
+exports.autolink = (0, combinator_1.lazy)(() => (0, combinator_1.validate)(new RegExp([/(?<![0-9a-z])@/yi.source, /(?<![^\p{C}\p{S}\p{P}\s]|emoji)#/yiu.source, /(?<![0-9a-z])>>/yi.source, /(?<![0-9a-z][.+-]?|[@#])!?[0-9a-z]/yi.source].join('|').replace(/emoji/g, hashtag_1.emoji.source), 'yiu'), (0, combinator_1.state)(~1 /* State.autolink */, (0, combinator_1.union)([url_1.lineurl, url_1.url, email_1.email,
 // Escape unmatched email-like strings.
 //str(/[0-9a-z]+(?:[_.+-][0-9a-z]+[:@]?|:|@(?=@))*/yi),
 channel_1.channel, account_1.account,
@@ -6292,7 +6292,7 @@ const hashtag_1 = __webpack_require__(5764);
 const source_1 = __webpack_require__(8745);
 const dom_1 = __webpack_require__(394);
 // https://example/@user?ch=a+b must be a user channel page or a redirect page going there.
-exports.channel = (0, combinator_1.lazy)(() => (0, combinator_1.rewrite)((0, combinator_1.sequence)([(0, combinator_1.surround)(/(?<![0-9a-z])@/yi, (0, source_1.str)(/[0-9a-z](?:(?:[0-9a-z]|-(?=[0-9a-z])){0,61}[0-9a-z])?(?:\.[0-9a-z](?:(?:[0-9a-z]|-(?=[0-9a-z])){0,61}[0-9a-z])?)*\//yi), (0, source_1.str)(/[a-z][0-9a-z]*(?:[-.][0-9a-z]+)*(?![-.]?[0-9a-z@]|>>|:\S)/yi), true, undefined, undefined, [3 | 0 /* Backtrack.autolink */]), (0, combinator_1.some)((0, combinator_1.verify)((0, combinator_1.surround)('#', (0, source_1.str)(new RegExp([/(?!['_])(?:[^\p{C}\p{S}\p{P}\s]|emoji|'(?=[0-9A-Za-z])|_(?=[^\p{C}\p{S}\p{P}\s]|emoji))+/yu.source].join('').replace(/emoji/g, hashtag_1.emoji), 'yu')), (0, source_1.str)(new RegExp([/(?![0-9a-z@]|>>|:\S|[^\p{C}\p{S}\p{P}\s]|emoji)/yu.source].join('').replace(/emoji/g, hashtag_1.emoji), 'yu')), false, undefined, undefined, [3 | 0 /* Backtrack.autolink */]), ([{
+exports.channel = (0, combinator_1.lazy)(() => (0, combinator_1.rewrite)((0, combinator_1.sequence)([(0, combinator_1.surround)(/(?<![0-9a-z])@/yi, (0, source_1.str)(/[0-9a-z](?:(?:[0-9a-z]|-(?=[0-9a-z])){0,61}[0-9a-z])?(?:\.[0-9a-z](?:(?:[0-9a-z]|-(?=[0-9a-z])){0,61}[0-9a-z])?)*\//yi), (0, source_1.str)(/[a-z][0-9a-z]*(?:[-.][0-9a-z]+)*(?![-.]?[0-9a-z@]|>>|:\S)/yi), true, undefined, undefined, [3 | 0 /* Backtrack.autolink */]), (0, combinator_1.some)((0, combinator_1.verify)((0, combinator_1.surround)('#', (0, source_1.str)(new RegExp([/(?!['_])(?:[^\p{C}\p{S}\p{P}\s]|emoji|'(?=[0-9A-Za-z])|_(?=[^\p{C}\p{S}\p{P}\s]|emoji))+/yu.source].join('').replace(/emoji/g, hashtag_1.emoji.source), 'yu')), (0, source_1.str)(new RegExp([/(?![0-9a-z@]|>>|:\S|[^\p{C}\p{S}\p{P}\s]|emoji)/yu.source].join('').replace(/emoji/g, hashtag_1.emoji.source), 'yu')), false, undefined, undefined, [3 | 0 /* Backtrack.autolink */]), ([{
   value
 }]) => !/^[0-9]{1,4}$|^[0-9]{5}/.test(value)))]), (0, combinator_1.constraint)(1 /* State.autolink */, (0, combinator_1.state)(1 /* State.autolink */, (0, combinator_1.fmap)((0, combinator_1.convert)(source => `[${source}]{ ${source.includes('/') ? `https://${source.slice(1, source.indexOf('#')).replace('/', '/@')}` : `/${source.slice(0, source.indexOf('#'))}`} }`, (0, combinator_1.union)([link_1.unsafelink]), false), ([{
   value: el
@@ -6355,7 +6355,7 @@ const link_1 = __webpack_require__(3628);
 const hashtag_1 = __webpack_require__(5764);
 const source_1 = __webpack_require__(8745);
 const dom_1 = __webpack_require__(394);
-exports.hashnum = (0, combinator_1.lazy)(() => (0, combinator_1.rewrite)((0, combinator_1.open)(new RegExp([/(?<![^\p{C}\p{S}\p{P}\s]|emoji)#/yiu.source].join('').replace(/emoji/g, hashtag_1.emoji), 'yu'), (0, source_1.str)(new RegExp([/[0-9]{1,9}(?![0-9a-z@#]|>>|:\S|[^\p{C}\p{S}\p{P}\s]|emoji)/yu.source].join('').replace(/emoji/g, hashtag_1.emoji), 'yu')), false, [1 | 0 /* Backtrack.autolink */]), (0, combinator_1.constraint)(1 /* State.autolink */, (0, combinator_1.state)(1 /* State.autolink */, (0, combinator_1.fmap)((0, combinator_1.convert)(source => `[${source}]{ ${source.slice(1)} }`, (0, combinator_1.union)([link_1.unsafelink]), false), ([{
+exports.hashnum = (0, combinator_1.lazy)(() => (0, combinator_1.rewrite)((0, combinator_1.open)(new RegExp([/(?<![^\p{C}\p{S}\p{P}\s]|emoji)#/yiu.source].join('').replace(/emoji/g, hashtag_1.emoji.source), 'yu'), (0, source_1.str)(new RegExp([/[0-9]{1,9}(?![0-9a-z@#]|>>|:\S|[^\p{C}\p{S}\p{P}\s]|emoji)/yu.source].join('').replace(/emoji/g, hashtag_1.emoji.source), 'yu')), false, [1 | 0 /* Backtrack.autolink */]), (0, combinator_1.constraint)(1 /* State.autolink */, (0, combinator_1.state)(1 /* State.autolink */, (0, combinator_1.fmap)((0, combinator_1.convert)(source => `[${source}]{ ${source.slice(1)} }`, (0, combinator_1.union)([link_1.unsafelink]), false), ([{
   value
 }]) => new parser_1.List([new parser_1.Data((0, dom_1.define)(value, {
   class: 'hashnum',
@@ -6381,8 +6381,8 @@ const source_1 = __webpack_require__(8745);
 const dom_1 = __webpack_require__(394);
 // https://example/hashtags/a must be a hashtag page or a redirect page going there.
 // https://github.com/tc39/proposal-regexp-unicode-property-escapes#matching-emoji
-exports.emoji = String.raw`\p{Emoji_Modifier_Base}\p{Emoji_Modifier}?|\p{Emoji_Presentation}|\p{Emoji}\uFE0F`;
-exports.hashtag = (0, combinator_1.lazy)(() => (0, combinator_1.rewrite)((0, combinator_1.verify)((0, combinator_1.surround)(new RegExp([/(?<![^\p{C}\p{S}\p{P}\s]|emoji)#/yiu.source].join('').replace(/emoji/g, exports.emoji), 'yu'), (0, source_1.str)(new RegExp([/(?!['_])(?:[^\p{C}\p{S}\p{P}\s]|emoji|'(?=[0-9A-Za-z])|_(?=[^\p{C}\p{S}\p{P}\s]|emoji))+/yu.source].join('').replace(/emoji/g, exports.emoji), 'yu')), (0, source_1.str)(new RegExp([/(?![0-9a-z@#]|>>|:\S|[^\p{C}\p{S}\p{P}\s]|emoji)/yu.source].join('').replace(/emoji/g, exports.emoji), 'yu')), false, undefined, undefined, [3 | 0 /* Backtrack.autolink */]), ([{
+exports.emoji = /\p{Emoji_Modifier_Base}\p{Emoji_Modifier}?|\p{Emoji_Presentation}|\p{Emoji}\uFE0F|\u200D/u;
+exports.hashtag = (0, combinator_1.lazy)(() => (0, combinator_1.rewrite)((0, combinator_1.verify)((0, combinator_1.surround)(new RegExp([/(?<![^\p{C}\p{S}\p{P}\s]|emoji)#/yiu.source].join('').replace(/emoji/g, exports.emoji.source), 'yu'), (0, source_1.str)(new RegExp([/(?!['_])(?:[^\p{C}\p{S}\p{P}\s]|emoji|'(?=[0-9A-Za-z])|_(?=[^\p{C}\p{S}\p{P}\s]|emoji))+/yu.source].join('').replace(/emoji/g, exports.emoji.source), 'yu')), (0, source_1.str)(new RegExp([/(?![0-9a-z@#]|>>|:\S|[^\p{C}\p{S}\p{P}\s]|emoji)/yu.source].join('').replace(/emoji/g, exports.emoji.source), 'yu')), false, undefined, undefined, [3 | 0 /* Backtrack.autolink */]), ([{
   value
 }]) => !/^[0-9]{1,4}$|^[0-9]{5}/.test(value)), (0, combinator_1.constraint)(1 /* State.autolink */, (0, combinator_1.state)(1 /* State.autolink */, (0, combinator_1.fmap)((0, combinator_1.convert)(source => `[${source}]{ ${`/hashtags/${source.slice(1)}`} }`, (0, combinator_1.union)([link_1.unsafelink]), false), ([{
   value
@@ -7140,19 +7140,10 @@ Object.defineProperty(exports, "__esModule", ({
 exports.htmlentity = exports.unsafehtmlentity = void 0;
 const parser_1 = __webpack_require__(605);
 const combinator_1 = __webpack_require__(3484);
+const source_1 = __webpack_require__(8745);
 const util_1 = __webpack_require__(4992);
 const dom_1 = __webpack_require__(394);
-exports.unsafehtmlentity = (0, combinator_1.focus)(/&(?:[0-9A-Za-z]+;?)?/y,
-//({ source }) => [[parser(source) ?? `${Command.Error}${source}`], '']));
-({
-  context
-}) => {
-  const {
-    source
-  } = context;
-  context.position += source.length;
-  return source.length > 1 && source.at(-1) === ';' ? new parser_1.List([new parser_1.Data(parser(source) ?? source)]) : new parser_1.List([new parser_1.Data(source)]);
-});
+exports.unsafehtmlentity = (0, combinator_1.surround)((0, source_1.str)('&'), (0, source_1.str)(/[0-9A-Za-z]+/y), (0, source_1.str)(';'), false, ([as, bs, cs]) => new parser_1.List([new parser_1.Data(parser(as.head.value + bs.head.value + cs.head.value))]), ([as, bs]) => new parser_1.List([new parser_1.Data(as.head.value + (bs?.head?.value ?? ''))]), [3 | 64 /* Backtrack.bracket */]);
 exports.htmlentity = (0, combinator_1.fmap)((0, combinator_1.union)([exports.unsafehtmlentity]), ([{
   value
 }]) => new parser_1.List([length === 1 || value.at(-1) !== ';' ? new parser_1.Data(value) : new parser_1.Data((0, dom_1.html)('span', {
@@ -7162,8 +7153,7 @@ exports.htmlentity = (0, combinator_1.fmap)((0, combinator_1.union)([exports.uns
 const parser = (el => entity => {
   if (entity === '&NewLine;') return ' ';
   el.innerHTML = entity;
-  const text = el.textContent;
-  return entity === text ? undefined : text;
+  return el.textContent;
 })((0, dom_1.html)('span'));
 /***/ },
@@ -7772,6 +7762,7 @@ exports.ruby = (0, combinator_1.lazy)(() => (0, combinator_1.bind)((0, combinato
       }, acc) => value + ' ' + acc, '').trim())), new parser_1.Data((0, dom_1.html)('rp', ')'))])))))]);
   }
 }));
+const delimiter = /[$"`\[\](){}<>（）［］｛｝]|\\?\n/y;
 const text = input => {
   const {
     context
@@ -7785,11 +7776,12 @@ const text = input => {
   for (let {
     position
   } = context; position < source.length; position = context.position) {
-    if (/[$"`\[\](){}<>（）［］｛｝]|\\?\n/yi.test(source.slice(position, position + 2))) break;
+    delimiter.lastIndex = position;
+    if (delimiter.test(source)) break;
     switch (source[position]) {
       case '&':
         {
-          const result = (0, htmlentity_1.unsafehtmlentity)(input) ?? (0, source_1.txt)(input);
+          const result = source[position + 1] !== ' ' ? (0, htmlentity_1.unsafehtmlentity)(input) ?? (0, source_1.txt)(input) : (0, source_1.txt)(input);
           acc.last.value += result.head.value;
           continue;
         }
@@ -8724,7 +8716,6 @@ function seek(source, position) {
       case '@':
       case '#':
       case '$':
-      case '&':
       case '"':
       case '`':
       case '[':
@@ -8760,6 +8751,9 @@ function seek(source, position) {
       case ':':
         if (source[i + 1] === '/' && source[i + 2] === '/') return i;
         continue;
+      case '&':
+        if (source[i + 1] !== ' ') return i;
+        continue;
       case ' ':
       case '\t':
       case '　':

package/markdown.d.ts CHANGED Viewed

@@ -1057,7 +1057,9 @@ export namespace MarkdownParser {
     export interface UnsafeHTMLEntityParser extends
       // &copy;
       Inline<'unsafehtmlentity'>,
-      Parser<string, Context, []> {
+      Parser<string, Context, [
+        SourceParser.StrParser,
+      ]> {
     }
     export interface ShortMediaParser extends
       // !https://host

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "securemark",
-  "version": "0.294.5",
+  "version": "0.294.6",
   "description": "Secure markdown renderer working on browsers for user input data.",
   "private": false,
   "homepage": "https://github.com/falsandtru/securemark",

package/src/combinator/data/parser.ts CHANGED Viewed

@@ -39,8 +39,11 @@ export interface CtxOptions {
   // 区間別テーブルは固定サイズであるためプールして再使用できる。
   // 従って分割時のデータ構造は区間ごとに探索木を動的に生成しデータ数に応じてテーブルに移行するのが最も効率的である。
   // これにより最悪時間計算量線形化に要する最悪空間計算量が+1nに局限される。
+  // またはテーブルの参照が高速なら変換せず併用してもよい。
   // 木とテーブルいずれにおいてもバックトラックデータとオーバーヘッドを合わせた追加データサイズの最大値は
   // セグメントサイズに制約されるため入力サイズに対する最大追加データサイズの平均比率はかなり小さくなる。
+  // 必要なテーブルの最大サイズは最大セグメントサイズであるため最大追加データサイズは入力サイズにかかわらず
+  // 10KB*並列数に留まり最大数百文字以下の短文ならば数百byte*並列数となる。
   //
   // 1. データ数が規定数を超えたら区間テーブルを生成しデータを振り分ける。
   //   - 子ノードのポインタだけ保持するとしても1ノード複数データ保持で圧縮できるかは微妙。

package/src/parser/api/normalize.test.ts CHANGED Viewed

@@ -67,12 +67,20 @@ describe('Unit: parser/normalize', () => {
       assert(normalize('\x01---\na: b\x01\n---\n\n!> \x01---\na: b\x01\n---') === '\uFFFD---\na: b\uFFFD\n---\n\n!> \uFFFD---\na: b\uFFFD\n---');
     });
+    it('emoji', () => {
+      assert(normalize('😀') === '😀');
+      assert(normalize('🤚🏽') === '🤚🏽');
+      assert(normalize('👨‍👩‍👧') === '👨‍👩‍👧');
+      assert(normalize('🇺🇳') === '🇺🇳');
+      assert(normalize('#️⃣*️⃣0️⃣1️⃣2️⃣3️⃣4️⃣5️⃣6️⃣7️⃣8️⃣9️⃣') === '#️⃣*️⃣0️⃣1️⃣2️⃣3️⃣4️⃣5️⃣6️⃣7️⃣8️⃣9️⃣');
+    });
   });
   describe('escape', () => {
     it('', () => {
       assert(escape('\u200B') === '&ZeroWidthSpace;');
-      assert(escape('\u200D') === '&zwj;');
+      assert(escape('\u200F') === '&rlm;');
     });
   });

package/src/parser/api/normalize.ts CHANGED Viewed

@@ -15,7 +15,7 @@ function format(source: string): string {
 function sanitize(source: string): string {
   return source
-    .replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]|[\u2006\u200B-\u200F\u202A-\u202F\u2060\uFEFF]|(?<![\u1820\u1821])\u180E/g, UNICODE_REPLACEMENT_CHARACTER)
+    .replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]|(?!\u200D)[\u2006\u200B-\u200F\u202A-\u202F\u2060\uFEFF]|(?<![\u1820\u1821])\u180E/g, UNICODE_REPLACEMENT_CHARACTER)
     .replace(/[\uD800-\uDBFF][\uDC00-\uDFFF]?|[\uDC00-\uDFFF]/g, char =>
       char.length === 1
         ? UNICODE_REPLACEMENT_CHARACTER
@@ -58,13 +58,20 @@ export const invisibleHTMLEntityNames = [
   'InvisibleComma',
   'ic',
 ] as const;
-const unreadableHTMLEntityNames: readonly string[] = invisibleHTMLEntityNames.slice(2);
-const unreadableEscapableCharacters = unreadableHTMLEntityNames
+const unreadableEscapeHTMLEntityNames = invisibleHTMLEntityNames.filter(name => ![
+  'Tab',
+  'NewLine',
+  'NonBreakingSpace',
+  'nbsp',
+  'zwj',
+  'zwnj',
+].includes(name));
+const unreadableEscapeCharacters = unreadableEscapeHTMLEntityNames
   .map(name => unsafehtmlentity(input(`&${name};`, {}))!.head!.value);
-assert(unreadableEscapableCharacters.length === unreadableHTMLEntityNames.length);
-assert(unreadableEscapableCharacters.every(c => c.length === 1));
-const unreadableEscapableCharacter = new RegExp(`[${unreadableEscapableCharacters.join('')}]`, 'g');
-assert(!unreadableEscapableCharacter.source.includes('&'));
+assert(unreadableEscapeCharacters.length === unreadableEscapeHTMLEntityNames.length);
+assert(unreadableEscapeCharacters.every(c => c.length === 1));
+const unreadableEscapeCharacter = new RegExp(`[${unreadableEscapeCharacters.join('')}]`, 'g');
+assert(!unreadableEscapeCharacter.source.includes('&'));
 // https://www.pandanoir.info/entry/2018/03/11/193000
 // http://anti.rosx.net/etc/memo/002_space.html
@@ -77,7 +84,7 @@ const unreadableSpecialCharacters = [
   // ZERO WIDTH NON-JOINER
   '\u200C',
   // ZERO WIDTH JOINER
-  '\u200D',
+  //'\u200D',
   // LEFT-TO-RIGHT MARK
   '\u200E',
   // RIGHT-TO-LEFT MARK
@@ -104,6 +111,6 @@ assert(unreadableSpecialCharacters.every(c => sanitize(c) === UNICODE_REPLACEMEN
 // 特殊不可視文字はエディタおよびソースビューアでは等幅および強調表示により可視化する
 export function escape(source: string): string {
   return source
-    .replace(unreadableEscapableCharacter, char =>
-      `&${unreadableHTMLEntityNames[unreadableEscapableCharacters.indexOf(char)]};`);
+    .replace(unreadableEscapeCharacter, char =>
+      `&${unreadableEscapeHTMLEntityNames[unreadableEscapeCharacters.indexOf(char)]};`);
 }

package/src/parser/inline/autolink/channel.ts CHANGED Viewed

@@ -21,10 +21,10 @@ export const channel: AutolinkParser.ChannelParser = lazy(() => rewrite(
       '#',
       str(new RegExp([
         /(?!['_])(?:[^\p{C}\p{S}\p{P}\s]|emoji|'(?=[0-9A-Za-z])|_(?=[^\p{C}\p{S}\p{P}\s]|emoji))+/yu.source,
-      ].join('').replace(/emoji/g, emoji), 'yu')),
+      ].join('').replace(/emoji/g, emoji.source), 'yu')),
       str(new RegExp([
         /(?![0-9a-z@]|>>|:\S|[^\p{C}\p{S}\p{P}\s]|emoji)/yu.source,
-      ].join('').replace(/emoji/g, emoji), 'yu')),
+      ].join('').replace(/emoji/g, emoji.source), 'yu')),
       false, undefined, undefined,
       [3 | Backtrack.autolink]),
       ([{ value }]) => !/^[0-9]{1,4}$|^[0-9]{5}/.test(value as string))),

package/src/parser/inline/autolink/hashnum.ts CHANGED Viewed

@@ -11,10 +11,10 @@ export const hashnum: AutolinkParser.HashnumParser = lazy(() => rewrite(
   open(
     new RegExp([
       /(?<![^\p{C}\p{S}\p{P}\s]|emoji)#/yiu.source,
-    ].join('').replace(/emoji/g, emoji), 'yu'),
+    ].join('').replace(/emoji/g, emoji.source), 'yu'),
     str(new RegExp([
       /[0-9]{1,9}(?![0-9a-z@#]|>>|:\S|[^\p{C}\p{S}\p{P}\s]|emoji)/yu.source,
-    ].join('').replace(/emoji/g, emoji), 'yu')),
+    ].join('').replace(/emoji/g, emoji.source), 'yu')),
     false,
     [1 | Backtrack.autolink]),
   constraint(State.autolink, state(State.autolink, fmap(convert(

package/src/parser/inline/autolink/hashtag.test.ts CHANGED Viewed

@@ -54,10 +54,14 @@ describe('Unit: parser/inline/autolink/hashtag', () => {
       assert.deepStrictEqual(inspect(parser('#a_b'), ctx), [['<a class="hashtag" href="/hashtags/a_b">#a_b</a>'], '']);
       assert.deepStrictEqual(inspect(parser('#a__b'), ctx), [['<a class="hashtag" href="/hashtags/a">#a</a>'], '__b']);
       assert.deepStrictEqual(inspect(parser('#あ'), ctx), [['<a class="hashtag" href="/hashtags/あ">#あ</a>'], '']);
-      assert.deepStrictEqual(inspect(parser('#👩'), ctx), [['<a class="hashtag" href="/hashtags/👩">#👩</a>'], '']);
+      assert.deepStrictEqual(inspect(parser('#😀'), ctx), [['<a class="hashtag" href="/hashtags/😀">#😀</a>'], '']);
+      assert.deepStrictEqual(inspect(parser('#🤚🏽'), ctx), [['<a class="hashtag" href="/hashtags/🤚🏽">#🤚🏽</a>'], '']);
+      assert.deepStrictEqual(inspect(parser('#👨‍👩‍👧'), ctx), [['<a class="hashtag" href="/hashtags/👨‍👩‍👧">#👨‍👩‍👧</a>'], '']);
+      assert.deepStrictEqual(inspect(parser('#🇺🇳'), ctx), [['<a class="hashtag" href="/hashtags/🇺🇳">#🇺🇳</a>'], '']);
+      assert.deepStrictEqual(inspect(parser('##️⃣*️⃣0️⃣1️⃣2️⃣3️⃣4️⃣5️⃣6️⃣7️⃣8️⃣9️⃣'), ctx), [['<a class="hashtag" href="/hashtags/#️⃣*️⃣0️⃣1️⃣2️⃣3️⃣4️⃣5️⃣6️⃣7️⃣8️⃣9️⃣">##️⃣*️⃣0️⃣1️⃣2️⃣3️⃣4️⃣5️⃣6️⃣7️⃣8️⃣9️⃣</a>'], '']);
       assert.deepStrictEqual(inspect(parser('#1a'), ctx), [['<a class="hashtag" href="/hashtags/1a">#1a</a>'], '']);
       assert.deepStrictEqual(inspect(parser('#1あ'), ctx), [['<a class="hashtag" href="/hashtags/1あ">#1あ</a>'], '']);
-      assert.deepStrictEqual(inspect(parser('#1👩'), ctx), [['<a class="hashtag" href="/hashtags/1👩">#1👩</a>'], '']);
+      assert.deepStrictEqual(inspect(parser('#1😀'), ctx), [['<a class="hashtag" href="/hashtags/1😀">#1😀</a>'], '']);
       assert.deepStrictEqual(inspect(parser(`#a'`), ctx), [[`<a class="hashtag" href="/hashtags/a">#a</a>`], `'`]);
       assert.deepStrictEqual(inspect(parser(`#a''`), ctx), [[`<a class="hashtag" href="/hashtags/a">#a</a>`], `''`]);
       assert.deepStrictEqual(inspect(parser(`#a'b`), ctx), [[`<a class="hashtag" href="/hashtags/a'b">#a'b</a>`], '']);

package/src/parser/inline/autolink/hashtag.ts CHANGED Viewed

@@ -9,19 +9,19 @@ import { define } from 'typed-dom/dom';
 // https://example/hashtags/a must be a hashtag page or a redirect page going there.
 // https://github.com/tc39/proposal-regexp-unicode-property-escapes#matching-emoji
-export const emoji = String.raw`\p{Emoji_Modifier_Base}\p{Emoji_Modifier}?|\p{Emoji_Presentation}|\p{Emoji}\uFE0F`;
+export const emoji = /\p{Emoji_Modifier_Base}\p{Emoji_Modifier}?|\p{Emoji_Presentation}|\p{Emoji}\uFE0F|\u200D/u;
 export const hashtag: AutolinkParser.HashtagParser = lazy(() => rewrite(
   verify(surround(
     new RegExp([
       /(?<![^\p{C}\p{S}\p{P}\s]|emoji)#/yiu.source,
-    ].join('').replace(/emoji/g, emoji), 'yu'),
+    ].join('').replace(/emoji/g, emoji.source), 'yu'),
     str(new RegExp([
       /(?!['_])(?:[^\p{C}\p{S}\p{P}\s]|emoji|'(?=[0-9A-Za-z])|_(?=[^\p{C}\p{S}\p{P}\s]|emoji))+/yu.source,
-    ].join('').replace(/emoji/g, emoji), 'yu')),
+    ].join('').replace(/emoji/g, emoji.source), 'yu')),
     str(new RegExp([
       /(?![0-9a-z@#]|>>|:\S|[^\p{C}\p{S}\p{P}\s]|emoji)/yu.source,
-    ].join('').replace(/emoji/g, emoji), 'yu')),
+    ].join('').replace(/emoji/g, emoji.source), 'yu')),
     false, undefined, undefined,
     [3 | Backtrack.autolink]),
     ([{ value }]) => !/^[0-9]{1,4}$|^[0-9]{5}/.test(value)),

package/src/parser/inline/autolink.ts CHANGED Viewed

@@ -15,7 +15,7 @@ export const autolink: AutolinkParser = lazy(() =>
     /(?<![^\p{C}\p{S}\p{P}\s]|emoji)#/yiu.source,
     /(?<![0-9a-z])>>/yi.source,
     /(?<![0-9a-z][.+-]?|[@#])!?[0-9a-z]/yi.source,
-  ].join('|').replace(/emoji/g, emoji), 'yiu'),
+  ].join('|').replace(/emoji/g, emoji.source), 'yiu'),
   state(~State.autolink,
   union([
     lineurl,

package/src/parser/inline/htmlentity.ts CHANGED Viewed

@@ -1,19 +1,19 @@
 import { HTMLEntityParser, UnsafeHTMLEntityParser } from '../inline';
+import { Backtrack } from '../context';
 import { List, Data } from '../../combinator/data/parser';
-import { union, focus, fmap } from '../../combinator';
+import { union, surround, fmap } from '../../combinator';
+import { str } from '../source';
 import { invalid } from '../util';
 import { html } from 'typed-dom/dom';
-export const unsafehtmlentity: UnsafeHTMLEntityParser = focus(
-  /&(?:[0-9A-Za-z]+;?)?/y,
-  //({ source }) => [[parser(source) ?? `${Command.Error}${source}`], '']));
-  ({ context }) => {
-    const { source } = context;
-    context.position += source.length;
-    return source.length > 1 && source.at(-1) === ';'
-      ? new List([new Data(parser(source) ?? source)])
-      : new List([new Data(source)]);
-  });
+export const unsafehtmlentity: UnsafeHTMLEntityParser = surround(
+  str('&'), str(/[0-9A-Za-z]+/y), str(';'),
+  false,
+  ([as, bs, cs]) =>
+    new List([new Data(parser(as.head!.value + bs.head!.value + cs.head!.value))]),
+  ([as, bs]) =>
+    new List([new Data(as.head!.value + (bs?.head?.value ?? ''))]),
+  [3 | Backtrack.bracket]);
 export const htmlentity: HTMLEntityParser = fmap(
   union([unsafehtmlentity]),
@@ -26,11 +26,8 @@ export const htmlentity: HTMLEntityParser = fmap(
       }, value))
   ]));
-const parser = (el => (entity: string): string | undefined => {
+const parser = (el => (entity: string): string => {
   if (entity === '&NewLine;') return ' ';
   el.innerHTML = entity;
-  const text = el.textContent!;
-  return entity === text
-    ? undefined
-    : text;
+  return el.textContent!;
 })(html('span'));

package/src/parser/inline/ruby.ts CHANGED Viewed

@@ -63,6 +63,8 @@ export const ruby: RubyParser = lazy(() => bind(
     }
   }));
+const delimiter = /[$"`\[\](){}<>（）［］｛｝]|\\?\n/y;
 const text: RubyParser.TextParser = input => {
   const { context } = input;
   const { source } = context;
@@ -70,11 +72,14 @@ const text: RubyParser.TextParser = input => {
   let state = false;
   context.sequential = true;
   for (let { position } = context; position < source.length; position = context.position) {
-    if (/[$"`\[\](){}<>（）［］｛｝]|\\?\n/yi.test(source.slice(position, position + 2))) break;
+    delimiter.lastIndex = position;
+    if (delimiter.test(source)) break;
     assert(source[position] !== '\n');
     switch (source[position]) {
       case '&': {
-        const result = unsafehtmlentity(input) ?? txt(input)!;
+        const result = source[position + 1] !== ' '
+          ? unsafehtmlentity(input) ?? txt(input)!
+          : txt(input)!;
         assert(result);
         acc.last!.value += result.head!.value;
         continue;

package/src/parser/source/text.ts CHANGED Viewed

@@ -238,7 +238,6 @@ function seek(source: string, position: number): number {
       case '@':
       case '#':
       case '$':
-      case '&':
       case '"':
       case '`':
       case '[':
@@ -274,6 +273,9 @@ function seek(source: string, position: number): number {
       case ':':
         if (source[i + 1] === '/' && source[i + 2] === '/') return i;
         continue;
+      case '&':
+        if (source[i + 1] !== ' ') return i;
+        continue;
       case ' ':
       case '\t':
       case '　':