npm - @peaceroad/markdown-it-cjk-breaks-mod - Versions diffs - 0.1.2 → 0.1.4 - Mend

@peaceroad/markdown-it-cjk-breaks-mod 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/index.js +93 -70
package/package.json +1 -1

package/index.js CHANGED Viewed

@@ -6,6 +6,13 @@ const ASCII_PRINTABLE_MAX = 0x7E;
 const IDEOGRAPHIC_SPACE = '\u3000';
 const DEFAULT_PUNCTUATION_TARGETS = ['！', '？', '⁉', '！？', '？！', '!?', '?!', '.', ':'];
 const DEFAULT_PUNCTUATION_CONFIG = create_punctuation_config(DEFAULT_PUNCTUATION_TARGETS);
+/* eslint-disable max-len */
+// require('unicode-10.0.0/Script/Hangul/regex')
+const HANGUL_RE = /[\u1100-\u11FF\u302E\u302F\u3131-\u318E\u3200-\u321E\u3260-\u327E\uA960-\uA97C\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uFFA0-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC]/;
+/* eslint-enable max-len */
+const WHITESPACE_RE = /\s/;
+const WHITESPACE_LEAD_RE = /^\s/;
+const WHITESPACE_TRAIL_RE = /\s$/;
 function is_surrogate(c1, c2) {
@@ -14,10 +21,7 @@ function is_surrogate(c1, c2) {
 function is_hangul(c) {
-  // require('unicode-10.0.0/Script/Hangul/regex')
-  /* eslint-disable max-len */
-  return /[\u1100-\u11FF\u302E\u302F\u3131-\u318E\u3200-\u321E\u3260-\u327E\uA960-\uA97C\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uFFA0-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC]/.test(c);
-  /* eslint-enable max-len */
+  return HANGUL_RE.test(c);
 }
@@ -51,10 +55,15 @@ function resolve_punctuation_targets(opts) {
   if (!opts) return DEFAULT_PUNCTUATION_CONFIG;
   var hasCustomTargets = Object.prototype.hasOwnProperty.call(opts, 'spaceAfterPunctuationTargets');
+  var addTargets = opts.spaceAfterPunctuationTargetsAdd;
+  var removeTargets = opts.spaceAfterPunctuationTargetsRemove;
+  if (!hasCustomTargets && addTargets === undefined && removeTargets === undefined) {
+    return DEFAULT_PUNCTUATION_CONFIG;
+  }
   var baseTargets;
   if (!hasCustomTargets) {
-    baseTargets = DEFAULT_PUNCTUATION_TARGETS.slice();
+    baseTargets = DEFAULT_PUNCTUATION_TARGETS;
   } else {
     var customTargets = opts.spaceAfterPunctuationTargets;
     if (customTargets === null || customTargets === false) return null;
@@ -63,13 +72,12 @@ function resolve_punctuation_targets(opts) {
       baseTargets = [ customTargets ];
     } else if (Array.isArray(customTargets)) {
       if (customTargets.length === 0) return null;
-      baseTargets = customTargets.slice();
+      baseTargets = customTargets;
     } else {
-      baseTargets = DEFAULT_PUNCTUATION_TARGETS.slice();
+      baseTargets = DEFAULT_PUNCTUATION_TARGETS;
     }
   }
-  var addTargets = opts.spaceAfterPunctuationTargetsAdd;
   if (addTargets !== undefined) {
     var addList = [];
     if (typeof addTargets === 'string') {
@@ -82,7 +90,6 @@ function resolve_punctuation_targets(opts) {
     }
   }
-  var removeTargets = opts.spaceAfterPunctuationTargetsRemove;
   if (removeTargets !== undefined) {
     var removeList = [];
     if (typeof removeTargets === 'string') {
@@ -140,15 +147,15 @@ function get_cjk_width_class(ch) {
   return width === 'F' || width === 'W' || width === 'H' ? width : '';
 }
-function build_next_text_info(tokens) {
+function build_next_text_info(tokens, trackSkippedEmpty) {
   var nextTextIndex = new Array(tokens.length);
-  var nextSkippedEmpty = new Array(tokens.length);
+  var nextSkippedEmpty = trackSkippedEmpty ? new Array(tokens.length) : null;
   var nextNonEmpty = -1;
   var sawEmpty = false;
   for (var idx = tokens.length - 1; idx >= 0; idx--) {
     nextTextIndex[idx] = nextNonEmpty;
-    nextSkippedEmpty[idx] = sawEmpty;
+    if (trackSkippedEmpty) nextSkippedEmpty[idx] = sawEmpty;
     var token = tokens[idx];
     if (!token || token.type !== 'text') continue;
@@ -170,7 +177,7 @@ function build_next_text_info(tokens) {
 function process_inlines(tokens, state, ctx, inlineToken) {
-  var i, last, trailing, next, c1, c2, remove_break;
+  var i, last, next, c1, c2, remove_break;
   var either = ctx.either;
   var normalizeSoftBreaks = ctx.normalizeSoftBreaks;
   var punctuationSpace = ctx.punctuationSpace;
@@ -179,15 +186,27 @@ function process_inlines(tokens, state, ctx, inlineToken) {
   var considerInlineBoundaries = ctx.considerInlineBoundaries;
   var needsPunctuation = punctuationSpace && punctuationConfig && maxPunctuationLength > 0;
+  if (!tokens || tokens.length === 0) return;
   if (normalizeSoftBreaks) normalize_text_tokens(tokens);
-  var nextInfo = build_next_text_info(tokens);
-  var nextTextIndex = nextInfo.nextTextIndex;
-  var nextSkippedEmpty = nextInfo.nextSkippedEmpty;
-  var widthCache = Object.create(null);
+  var nextTextIndex = null;
+  var nextSkippedEmpty = null;
+  var widthCache = null;
   function get_cached_width_class(ch) {
     if (!ch) return '';
+    if (!widthCache) {
+      var firstWidth = get_cjk_width_class(ch);
+      if (firstWidth === '') {
+        var codePoint = ch.codePointAt(0);
+        if (codePoint !== undefined && codePoint <= ASCII_PRINTABLE_MAX) return '';
+        widthCache = Object.create(null);
+        widthCache[ch] = '';
+        return '';
+      }
+      widthCache = Object.create(null);
+      widthCache[ch] = firstWidth;
+      return firstWidth;
+    }
     var cached = widthCache[ch];
     if (cached !== undefined) return cached;
     var width = get_cjk_width_class(ch);
@@ -204,22 +223,24 @@ function process_inlines(tokens, state, ctx, inlineToken) {
     var isSoftbreakToken = token.type === 'softbreak';
     var isTextBreakToken = token.type === 'text' && token.content === '\n';
     if (isSoftbreakToken || isTextBreakToken) {
+      if (!nextTextIndex) {
+        var nextInfo = build_next_text_info(tokens, considerInlineBoundaries);
+        nextTextIndex = nextInfo.nextTextIndex;
+        nextSkippedEmpty = nextInfo.nextSkippedEmpty;
+      }
       // default last/next character to space
       last = next = ' ';
-      trailing = '';
-      var trailingMatchesPunctuation = false;
-      var skippedEmptyBefore = sawEmptySinceLast;
-      var skippedEmptyAfter = nextSkippedEmpty[i];
+      var skippedEmptyBefore = false;
+      var skippedEmptyAfter = false;
+      if (considerInlineBoundaries) {
+        skippedEmptyBefore = sawEmptySinceLast;
+        skippedEmptyAfter = nextSkippedEmpty ? nextSkippedEmpty[i] : false;
+      }
       if (hasLastText) {
         c1 = lastTextContent.charCodeAt(lastTextContent.length - 2);
         c2 = lastTextContent.charCodeAt(lastTextContent.length - 1);
         last = lastTextContent.slice(is_surrogate(c1, c2) ? -2 : -1);
-        if (needsPunctuation) {
-          trailing = lastTextContent.slice(-maxPunctuationLength);
-          trailingMatchesPunctuation = matches_punctuation_sequence(trailing, punctuationConfig);
-        }
       }
       var nextIdx = nextTextIndex[i];
@@ -233,46 +254,44 @@ function process_inlines(tokens, state, ctx, inlineToken) {
       }
       remove_break = false;
-      // remove newline if it's adjacent to ZWSP
-      if (last === '\u200b' || next === '\u200b') remove_break = true;
-      var lastWidthClass = '';
       var nextWidthClass = '';
+      var nextWidthComputed = false;
-      // remove newline if both characters AND/OR fullwidth (F), wide (W) or
-      // halfwidth (H), but not Hangul
-      var tLast = false;
-      var tNext = false;
-      var needsWidthForRemoval = !remove_break;
-      var needsWidthForPunctuation = punctuationSpace && trailingMatchesPunctuation && last && next && next !== '\u200b';
-      if (needsWidthForRemoval || needsWidthForPunctuation) {
-        if (needsWidthForRemoval) {
-          lastWidthClass = get_cached_width_class(last);
+      // remove newline if it's adjacent to ZWSP
+      if (last === '\u200b' || next === '\u200b') {
+        remove_break = true;
+      } else {
+        // remove newline if both characters AND/OR fullwidth (F), wide (W) or
+        // halfwidth (H), but not Hangul
+        var lastWidthClass = get_cached_width_class(last);
+        if (either || lastWidthClass) {
+          nextWidthClass = get_cached_width_class(next);
+          nextWidthComputed = true;
         }
-        nextWidthClass = get_cached_width_class(next);
-        if (needsWidthForRemoval) {
-          tLast = lastWidthClass !== '';
-          tNext = nextWidthClass !== '';
+        var tLast = lastWidthClass !== '';
+        var tNext = nextWidthComputed ? nextWidthClass !== '' : false;
-          if (considerInlineBoundaries && (skippedEmptyBefore || skippedEmptyAfter) && tLast && tNext) {
-            tLast = false;
-            tNext = false;
-          }
-          if (either ? tLast || tNext : tLast && tNext) {
-            if (!is_hangul(last) && !is_hangul(next)) remove_break = true;
-          }
+        if (considerInlineBoundaries && (skippedEmptyBefore || skippedEmptyAfter) && tLast && tNext) {
+          tLast = false;
+          tNext = false;
+        }
+        if (either ? tLast || tNext : tLast && tNext) {
+          if (!is_hangul(last) && !is_hangul(next)) remove_break = true;
         }
       }
       if (remove_break) {
         var insertPunctuationSpace = false;
-        if (punctuationSpace && punctuationConfig && trailingMatchesPunctuation && last && next && next !== '\u200b') {
-          var nextIsFullwidthOrWide = nextWidthClass === 'F' || nextWidthClass === 'W';
-          if (is_printable_ascii(next) || nextIsFullwidthOrWide) insertPunctuationSpace = true;
+        if (needsPunctuation && last && next && next !== '\u200b') {
+          var trailing = hasLastText ? lastTextContent.slice(-maxPunctuationLength) : '';
+          if (matches_punctuation_sequence(trailing, punctuationConfig)) {
+            if (!nextWidthComputed) {
+              nextWidthClass = get_cached_width_class(next);
+            }
+            var nextIsFullwidthOrWide = nextWidthClass === 'F' || nextWidthClass === 'W';
+            if (nextIsFullwidthOrWide || is_printable_ascii(next)) insertPunctuationSpace = true;
+          }
         }
         token.type    = 'text';
         token.content = insertPunctuationSpace ? punctuationSpace : '';
@@ -281,16 +300,16 @@ function process_inlines(tokens, state, ctx, inlineToken) {
     if (token.type === 'text') {
       if (!token.content) {
-        sawEmptySinceLast = true;
+        if (considerInlineBoundaries) sawEmptySinceLast = true;
       } else {
         lastTextContent = token.content;
         hasLastText = true;
-        sawEmptySinceLast = false;
+        if (considerInlineBoundaries) sawEmptySinceLast = false;
       }
     }
   }
-  if (punctuationSpace && punctuationConfig) {
+  if (needsPunctuation) {
     apply_missing_punctuation_spacing(tokens, inlineToken, punctuationSpace, punctuationConfig);
   }
 }
@@ -386,6 +405,8 @@ function apply_missing_punctuation_spacing(tokens, inlineToken, punctuationSpace
   if (!inlineToken || !inlineToken.content) return;
   if (inlineToken.content.indexOf('\n') === -1) return;
   if (!tokens || tokens.length === 0) return;
+  var maxPunctuationLength = punctuationConfig.maxLength;
+  if (maxPunctuationLength <= 0) return;
   var rawSearchState = { pos: 0 };
@@ -393,15 +414,13 @@ function apply_missing_punctuation_spacing(tokens, inlineToken, punctuationSpace
     var current = tokens[idx];
     if (!current || current.type !== 'text' || !current.content) continue;
-    var trailing = punctuationConfig.maxLength > 0 ?
-      current.content.slice(-punctuationConfig.maxLength) :
-      current.content.slice(-1);
+    var trailing = current.content.slice(-maxPunctuationLength);
     if (!matches_punctuation_sequence(trailing, punctuationConfig)) continue;
-    if (/\s$/.test(current.content)) continue;
+    if (WHITESPACE_TRAIL_RE.test(current.content)) continue;
     var nextInfo = find_next_visible_token(tokens, idx + 1);
     if (!nextInfo) continue;
-    if (nextInfo.token.type === 'text' && /^\s/.test(nextInfo.token.content || '')) continue;
+    if (nextInfo.token.type === 'text' && WHITESPACE_LEAD_RE.test(nextInfo.token.content || '')) continue;
     if (!raw_boundary_includes_newline(inlineToken.content, tokens, idx, nextInfo.index, nextInfo.fragment, rawSearchState)) {
       continue;
@@ -411,7 +430,9 @@ function apply_missing_punctuation_spacing(tokens, inlineToken, punctuationSpace
     idx = nextInfo.index;
   }
-  apply_single_text_token_spacing(tokens, inlineToken, punctuationSpace, punctuationConfig);
+  if (tokens.length === 1) {
+    apply_single_text_token_spacing(tokens, inlineToken, punctuationSpace, punctuationConfig);
+  }
 }
@@ -481,6 +502,8 @@ function apply_single_text_token_spacing(tokens, inlineToken, punctuationSpace,
   if (inlineToken.content.indexOf('\n') === -1) return;
   var token = tokens[0];
   if (!token || token.type !== 'text' || !token.content) return;
+  var maxPunctuationLength = punctuationConfig.maxLength;
+  if (maxPunctuationLength <= 0) return;
   var segments = inlineToken.content.split('\n');
   if (segments.length < 2) return;
@@ -490,7 +513,7 @@ function apply_single_text_token_spacing(tokens, inlineToken, punctuationSpace,
   for (var segIdx = 0; segIdx < segments.length - 1; segIdx++) {
     var leftRaw = segments[segIdx];
     var rightRaw = segments[segIdx + 1];
-    var tail = extract_visible_tail(leftRaw, punctuationConfig.maxLength);
+    var tail = extract_visible_tail(leftRaw, maxPunctuationLength);
     var nextChar = extract_visible_head(rightRaw);
     var shouldInsert = tail &&
       matches_punctuation_sequence(tail, punctuationConfig) &&
@@ -501,7 +524,7 @@ function apply_single_text_token_spacing(tokens, inlineToken, punctuationSpace,
       var splitIndex = cumulativeLength + leftRaw.length + offsetDelta;
       if (splitIndex >= 0 && splitIndex <= updatedContent.length) {
         var existingChar = updatedContent.charAt(splitIndex);
-        if (existingChar && /\s/.test(existingChar)) {
+        if (existingChar && WHITESPACE_RE.test(existingChar)) {
           // already has whitespace at this boundary
           cumulativeLength += leftRaw.length;
           continue;
@@ -528,7 +551,7 @@ function extract_visible_tail(raw, maxLength) {
     var charLen = code > 0xFFFF ? 2 : 1;
     var ch = raw.slice(pos - charLen, pos);
     pos -= charLen;
-    if (/\s/.test(ch)) continue;
+    if (WHITESPACE_RE.test(ch)) continue;
     if (is_markup_closer_char(ch)) continue;
     result = ch + result;
   }
@@ -543,7 +566,7 @@ function extract_visible_head(raw) {
     var charLen = code > 0xFFFF ? 2 : 1;
     var ch = raw.slice(pos, pos + charLen);
     pos += charLen;
-    if (/\s/.test(ch)) continue;
+    if (WHITESPACE_RE.test(ch)) continue;
     return ch;
   }
   return '';

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@peaceroad/markdown-it-cjk-breaks-mod",
-  "version": "0.1.2",
+  "version": "0.1.4",
   "type": "module",
   "description": "Suppress linebreaks between east asian (Especially Japanese) characters",
   "repository": {