npm - @peaceroad/markdown-it-cjk-breaks-mod - Versions diffs - 0.1.7 → 0.1.9 - Mend

@peaceroad/markdown-it-cjk-breaks-mod 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -1,133 +1,125 @@
-# markdown-it-cjk-breaks
+# @peaceroad/markdown-it-cjk-breaks-mod
-## k_taka's additional features
+`@peaceroad/markdown-it-cjk-breaks-mod` is a markdown-it plugin that suppresses line breaks between CJK text and optionally injects spacing after configured punctuation when a break is removed. It is designed for mixed Japanese/CJK + ASCII documents where default newline handling often produces unwanted spaces or breaks.
-### Punctuation spacing options
+This package is a fork lineage of [`markdown-it-cjk-breaks`](https://github.com/markdown-it/markdown-it-cjk-breaks) and [`@sup39/markdown-it-cjk-breaks`](https://www.npmjs.com/package/@sup39/markdown-it-cjk-breaks). It keeps the original CJK break suppression behavior, adds the `either` mode introduced by `@sup39`, and extends it with punctuation-spacing controls and softbreak normalization for plugin-heavy markdown-it pipelines.
-`spaceAfterPunctuation` inserts spacing only when this plugin suppresses a line break after punctuation. The second-pass matcher also covers inline markup starts (inline code, links/autolinks, images, inline HTML) as long as the raw source contains a visible newline boundary; if a `softbreak` remains, no spacing is injected.
+## Install
-`spaceAfterPunctuationTargets` lets you replace the default trigger list with a custom string or array. Defaults are `['！', '？', '⁉', '！？', '？！', '!?', '?!', '.', ':']`. To disable punctuation spacing while still setting `spaceAfterPunctuation`, pass `spaceAfterPunctuationTargets: []` (or `null`/`false`). Use `spaceAfterPunctuationTargetsAdd` to append triggers and `spaceAfterPunctuationTargetsRemove` to drop items from the resolved list.
+```
+npm i @peaceroad/markdown-it-cjk-breaks-mod
+```
-`spaceAfterPunctuation` accepts `'half'` for ASCII space, `'full'` for an ideographic space, or any custom string via a literal value. Raw matching is strict, so escapes or entities (e.g. `&amp;`) right before the newline can prevent a match and skip spacing (safe-fail behavior).
+## Quick Start
 ```js
 import MarkdownIt from 'markdown-it';
 import cjkBreaks from '@peaceroad/markdown-it-cjk-breaks-mod';
-// Full-width spacing after default punctuation
-const mdFull = MarkdownIt({ html: true }).use(cjkBreaks, {
-  spaceAfterPunctuation: 'full',
-  either: true
-});
-mdFull.render('こんにちは！\nWorld');
-// <p>こんにちは！　World</p>
+const md = MarkdownIt({ html: true }).use(cjkBreaks);
+md.render('あおえ\nうい');
+// <p>あおえうい</p>
+```
+## Options
+- `either`
+  Type: `boolean`
+  Default: `false`
+  Remove a break when either side (instead of both sides) is CJK-width (`F/W/H`), still excluding Hangul.
+  Origin: inherited from `@sup39/markdown-it-cjk-breaks`.
+The options below are extensions added by this project:
+- `normalizeSoftBreaks`
+  Type: `boolean`
+  Default: `false`
+  Split newline-containing `text` tokens into explicit `softbreak` tokens before processing. Useful with plugins that rewrite inline tokens.
+- `spaceAfterPunctuation`
+  Type: `'half' | 'full' | string`
+  Default: disabled
+  Insert spacing only when this plugin removes a break after a target sequence. `'half'` => `' '`, `'full'` => `\u3000`.
+- `spaceAfterPunctuationTargets`
+  Type: `string | string[] | [] | null | false`
+  Default: `['！', '？', '⁉', '！？', '？！', '!?', '?!', '.', ':']`
+  Replace the target sequence set. `[]`, `null`, or `false` explicitly disable target matching.
+- `spaceAfterPunctuationTargetsAdd`
+  Type: `string | string[]`
+  Default: unset
+  Append target sequences after base resolution.
+- `spaceAfterPunctuationTargetsRemove`
+  Type: `string | string[]`
+  Default: unset
+  Remove sequences from the resolved target list.
+## Punctuation Spacing Examples
+```js
+import MarkdownIt from 'markdown-it';
+import cjkBreaks from '@peaceroad/markdown-it-cjk-breaks-mod';
-// Half-width spacing for ASCII-friendly mixes
 const mdHalf = MarkdownIt({ html: true }).use(cjkBreaks, {
-  spaceAfterPunctuation: 'half',
-  either: true
+  either: true,
+  spaceAfterPunctuation: 'half'
 });
 mdHalf.render('こんにちは！\nWorld');
 // <p>こんにちは！ World</p>
-// Inline code and links are supported when a raw newline is present
-mdHalf.render('漢！\n`code`');
-// <p>漢！ <code>code</code></p>
-mdHalf.render('漢！\n[link](url)');
-// <p>漢！ <a href="url">link</a></p>
+const mdFull = MarkdownIt({ html: true }).use(cjkBreaks, {
+  either: true,
+  spaceAfterPunctuation: 'full'
+});
+mdFull.render('こんにちは！\nWorld');
+// <p>こんにちは！　World</p>
-// Custom punctuation triggers (replaces defaults)
 const mdCustom = MarkdownIt({ html: true }).use(cjkBreaks, {
+  either: true,
   spaceAfterPunctuation: 'half',
   spaceAfterPunctuationTargets: ['??']
 });
 mdCustom.render('Hello??\nWorld');
 // <p>Hello?? World</p>
 ```
-### Softbreak normalization for other plugins
-Even with stock markdown-it, emphasis markers can leave inline `text` tokens that still embed `\n`. When `normalizeSoftBreaks: true`, those tokens are split back into proper `softbreak` entries before CJK suppression runs, so a trailing `***漢***\n字` behaves the same way regardless of how markdown-it represented it internally.
+## Softbreak Normalization Example
 ```js
-// Normalize softbreaks emitted by other plugins first
-const mdStrongJaFriendly = MarkdownIt({ html: true }).use(cjkBreaks, {
-  normalizeSoftBreaks: true,
-  either: true
-});
-mdStrongJaFriendly.render('**漢**\nb');
-// <p><strong>漢</strong>b</p>
-```
-`@peaceroad/markdown-it-strong-ja` also emit newline-containing `text` nodes after their own rewrites. The same option keeps behavior consistent no matter which order you register plugins.
-## sup39's additional features
-- [@sup39/markdown-it-cjk-breaks](https://npmjs.com/package/@sup39/markdown-it-cjk-breaks)
+import MarkdownIt from 'markdown-it';
+import cjkBreaks from '@peaceroad/markdown-it-cjk-breaks-mod';
-Provide an optional option `either`(default: false, which works as original version) to determine whether allowing removing linebreak when either the character before **OR** after the linebreak is east asian character.
+const md = MarkdownIt({ html: true }).use(cjkBreaks, {
+  either: true,
+  normalizeSoftBreaks: true
+});
-```js
-var md = require('markdown-it')();
-var cjk_breaks = require('markdown-it-cjk-breaks');
-md.use(cjk_breaks, {either: true}); // << set either to true
-md.render(`
-あおえ
-うい
-aoe
-ui
-`);
-// returns:
-//
-//<p>あおえういaoe <!-- linebreak between `い` and `a` is removed -->
-//ui</p>
+md.render('**漢**\nb');
+// <p><strong>漢</strong>b</p>
 ```
-## Original
+## Behavior Notes
-- [markdown-it-cjk-breaks](https://github.com/markdown-it/markdown-it-cjk-breaks)
+- Break suppression follows CSS Text Level 3 style rules used by upstream: ZWSP-adjacent breaks are removed first; otherwise width-class checks are applied with Hangul exclusion.
+- Punctuation spacing is never global formatting. It only runs when this plugin actually removes the break.
+- The second punctuation pass handles inline markup boundaries (inline code, links/autolinks, images, inline HTML) when a raw newline boundary is verifiably present.
+- Matching is fail-closed: if raw boundary reconstruction cannot be proven, no space is inserted.
+- If a `softbreak` is still active between candidate tokens, spacing insertion is skipped.
-> Plugin for [markdown-it](https://github.com/markdown-it/markdown-it) that suppresses linebreaks between east asian characters.
+## Compatibility
-Normally newlines in a markdown text get rendered as newlines in output html text. Then browsers will usually render those newlines as whitespace (more smart behavior is included in w3c drafts, but not actually implemented by vendors).
+- Module format: ESM (`"type": "module"`).
+- Runtime: works in Node.js ESM environments and browser/VSCode bundling setups that support ESM dependencies.
+- Runtime plugin code uses no Node-only APIs (`fs`, `path`, etc.); those are confined to tests.
+- For plugin chains that rewrite inline text (for example `@peaceroad/markdown-it-strong-ja`), prefer `normalizeSoftBreaks: true` for stable behavior.
-This plugin finds and removes newlines that cannot be converted to space, algorithm matches [CSS Text Module Level 3](https://www.w3.org/TR/css-text-3/#line-break-transform):
-- If the character immediately before or immediately after the segment break is the zero-width space character (U+200B), then the break is removed, leaving behind the zero-width space.
-- Otherwise, if the East Asian Width property [UAX11] of both the character before and after the segment break is F, W, or H (not A), and neither side is Hangul, then the segment break is removed.
-- Otherwise, the segment break is converted to a space (U+0020).
-## Install
-```bash
-yarn add markdown-it-cjk-breaks
-```
-## Usage
-```js
-var md = require('markdown-it')();
-var cjk_breaks = require('markdown-it-cjk-breaks');
-md.use(cjk_breaks);
-md.render(`
-あおえ
-うい
-aoe
-ui
-`);
-// returns:
-//
-//<p>あおえうい
-//aoe
-//ui</p>
-```
+## Upstream And Credits
+- Original: [markdown-it/markdown-it-cjk-breaks](https://github.com/markdown-it/markdown-it-cjk-breaks)
+- Fork enhancement (`either`): [@sup39/markdown-it-cjk-breaks](https://www.npmjs.com/package/@sup39/markdown-it-cjk-breaks)
+- Current package: [@peaceroad/markdown-it-cjk-breaks-mod](https://github.com/peaceroad/p7d-markdown-it-cjk-breaks-mod)
 ## License

package/index.js CHANGED Viewed

@@ -11,7 +11,6 @@ const DEFAULT_PUNCTUATION_CONFIG = create_punctuation_config(DEFAULT_PUNCTUATION
 const HANGUL_RE = /[\u1100-\u11FF\u302E\u302F\u3131-\u318E\u3200-\u321E\u3260-\u327E\uA960-\uA97C\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uFFA0-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC]/;
 /* eslint-enable max-len */
 const WHITESPACE_RE = /\s/;
-const WHITESPACE_LEAD_RE = /^\s/;
 function is_surrogate(c1, c2) {
@@ -34,6 +33,7 @@ function create_punctuation_config(targets) {
   for (var i = 0; i < targets.length; i++) {
     var value = targets[i];
     if (typeof value !== 'string' || value.length === 0) continue;
+    if (sequences.has(value)) continue;
     sequences.add(value);
     var valueLength = value.length;
     if (valueLength > maxLength) maxLength = valueLength;
@@ -45,7 +45,9 @@ function create_punctuation_config(targets) {
     if (endChar) endCharMap[endChar] = true;
   }
-  lengths.sort(function (a, b) { return b - a; });
+  if (lengths.length > 1) {
+    lengths.sort(function (a, b) { return b - a; });
+  }
   return { sequences: sequences, maxLength: maxLength, endCharMap: endCharMap, lengths: lengths };
 }
@@ -126,9 +128,9 @@ function matches_punctuation_sequence(trailing, punctuationConfig, skipEndCharCh
   if (!trailing || !punctuationConfig || punctuationConfig.maxLength === 0) return false;
   var sequences = punctuationConfig.sequences;
-  var endCharMap = punctuationConfig.endCharMap;
   var lengths = punctuationConfig.lengths;
   if (!skipEndCharCheck) {
+    var endCharMap = punctuationConfig.endCharMap;
     var endChar = get_last_char(trailing);
     if (!endCharMap[endChar]) return false;
   }
@@ -136,7 +138,7 @@ function matches_punctuation_sequence(trailing, punctuationConfig, skipEndCharCh
   for (var i = 0; i < lengths.length; i++) {
     var len = lengths[i];
     if (len > trailingLength) continue;
-    var fragment = trailing.slice(-len);
+    var fragment = len === trailingLength ? trailing : trailing.slice(-len);
     if (sequences.has(fragment)) return true;
   }
   return false;
@@ -160,6 +162,12 @@ function is_printable_ascii(ch) {
 }
+function has_leading_whitespace(text) {
+  if (!text) return false;
+  return WHITESPACE_RE.test(text.charAt(0));
+}
 function is_fullwidth_or_wide(ch) {
   var width = get_cjk_width_class(ch);
   return width === 'F' || width === 'W';
@@ -208,9 +216,8 @@ function process_inlines(tokens, ctx, inlineToken) {
   var normalizeSoftBreaks = ctx.normalizeSoftBreaks;
   var punctuationSpace = ctx.punctuationSpace;
   var punctuationConfig = ctx.punctuationConfig;
-  var maxPunctuationLength = ctx.maxPunctuationLength;
   var considerInlineBoundaries = ctx.considerInlineBoundaries;
-  var needsPunctuation = punctuationSpace && punctuationConfig && maxPunctuationLength > 0;
+  var needsPunctuation = punctuationSpace && punctuationConfig && ctx.maxPunctuationLength > 0;
   var punctuationEndCharMap = punctuationConfig ? punctuationConfig.endCharMap : null;
   if (!tokens || tokens.length === 0) return;
@@ -225,13 +232,13 @@ function process_inlines(tokens, ctx, inlineToken) {
     if (!widthCache) widthCache = Object.create(null);
     var cached = widthCache[ch];
     if (cached !== undefined) return cached;
-    var width = get_cjk_width_class(ch);
+    var width = eastAsianWidth(ch);
+    width = width === 'F' || width === 'W' || width === 'H' ? width : '';
     widthCache[ch] = width;
     return width;
   }
   var lastTextContent = '';
-  var hasLastText = false;
   var sawEmptySinceLast = false;
   for (i = 0; i < tokens.length; i++) {
@@ -253,7 +260,7 @@ function process_inlines(tokens, ctx, inlineToken) {
         skippedEmptyAfter = nextSkippedEmpty ? nextSkippedEmpty[i] : false;
       }
-      if (hasLastText) {
+      if (lastTextContent) {
         c1 = lastTextContent.charCodeAt(lastTextContent.length - 2);
         c2 = lastTextContent.charCodeAt(lastTextContent.length - 1);
         last = lastTextContent.slice(is_surrogate(c1, c2) ? -2 : -1);
@@ -261,12 +268,10 @@ function process_inlines(tokens, ctx, inlineToken) {
       var nextIdx = nextTextIndex[i];
       if (nextIdx !== -1) {
-        var nextContent = tokens[nextIdx].content || '';
-        if (nextContent) {
-          c1 = nextContent.charCodeAt(0);
-          c2 = nextContent.charCodeAt(1);
-          next = nextContent.slice(0, is_surrogate(c1, c2) ? 2 : 1);
-        }
+        var nextContent = tokens[nextIdx].content;
+        c1 = nextContent.charCodeAt(0);
+        c2 = nextContent.charCodeAt(1);
+        next = nextContent.slice(0, is_surrogate(c1, c2) ? 2 : 1);
       }
       remove_break = false;
@@ -299,10 +304,9 @@ function process_inlines(tokens, ctx, inlineToken) {
       if (remove_break) {
         var insertPunctuationSpace = false;
-        if (needsPunctuation && hasLastText && last && next && next !== '\u200b') {
+        if (needsPunctuation && lastTextContent && nextIdx !== -1 && next !== '\u200b') {
           if (punctuationEndCharMap[last]) {
-            var trailing = lastTextContent.slice(-maxPunctuationLength);
-            if (matches_punctuation_sequence(trailing, punctuationConfig, true)) {
+            if (matches_punctuation_sequence(lastTextContent, punctuationConfig, true)) {
               if (!nextWidthComputed) {
                 nextWidthClass = get_cached_width_class(next);
               }
@@ -321,7 +325,6 @@ function process_inlines(tokens, ctx, inlineToken) {
         if (considerInlineBoundaries) sawEmptySinceLast = true;
       } else {
         lastTextContent = token.content;
-        hasLastText = true;
         if (considerInlineBoundaries) sawEmptySinceLast = false;
       }
     }
@@ -367,21 +370,29 @@ function split_text_token(token) {
   var parts = [];
   var content = token.content;
   var start = 0;
+  var reusedToken = false;
+  function push_text_part(text) {
+    if (!text) return;
+    if (!reusedToken) {
+      token.content = text;
+      parts.push(token);
+      reusedToken = true;
+      return;
+    }
+    parts.push(clone_text_token(TokenConstructor, token, text));
+  }
   for (var pos = 0; pos < content.length; pos++) {
     if (content.charCodeAt(pos) !== 0x0A) continue;
-    if (pos > start) {
-      parts.push(clone_text_token(TokenConstructor, token, content.slice(start, pos)));
-    }
+    if (pos > start) push_text_part(content.slice(start, pos));
     parts.push(create_softbreak_token(TokenConstructor, token));
     start = pos + 1;
   }
-  if (start < content.length) {
-    parts.push(clone_text_token(TokenConstructor, token, content.slice(start)));
-  }
+  if (start < content.length) push_text_part(content.slice(start));
   return parts;
 }
@@ -422,10 +433,14 @@ function apply_missing_punctuation_spacing(tokens, inlineToken, punctuationSpace
   if (!inlineToken || !inlineToken.content) return;
   if (inlineToken.content.indexOf('\n') === -1) return;
   if (!tokens || tokens.length === 0) return;
-  var maxPunctuationLength = punctuationConfig.maxLength;
-  if (maxPunctuationLength <= 0) return;
+  if (punctuationConfig.maxLength <= 0) return;
   var endCharMap = punctuationConfig.endCharMap;
+  if (tokens.length === 1) {
+    apply_single_text_token_spacing(tokens, inlineToken, punctuationSpace, punctuationConfig);
+    return;
+  }
   var rawSearchState = { pos: 0 };
   for (var idx = 0; idx < tokens.length; idx++) {
@@ -434,15 +449,20 @@ function apply_missing_punctuation_spacing(tokens, inlineToken, punctuationSpace
     var endChar = get_last_char(current.content);
     if (!endCharMap[endChar]) continue;
-    var trailing = current.content.slice(-maxPunctuationLength);
-    if (!matches_punctuation_sequence(trailing, punctuationConfig, true)) continue;
+    if (!matches_punctuation_sequence(current.content, punctuationConfig, true)) continue;
     var nextInfo = find_next_visible_token(tokens, idx + 1);
     if (!nextInfo) continue;
-    if (nextInfo.token.type === 'text' && WHITESPACE_LEAD_RE.test(nextInfo.token.content || '')) continue;
-    if (has_active_break(tokens, idx, nextInfo.index)) continue;
-    if (!raw_boundary_includes_newline(inlineToken.content, tokens, idx, nextInfo.index, nextInfo.fragment, rawSearchState)) {
+    if (nextInfo.token.type === 'text' && has_leading_whitespace(nextInfo.token.content)) continue;
+    if (nextInfo.hasActiveBreak) continue;
+    if (!raw_boundary_includes_newline(
+      inlineToken.content,
+      current.content,
+      nextInfo.betweenMarkup,
+      nextInfo.fragment,
+      rawSearchState
+    )) {
       continue;
     }
@@ -450,50 +470,48 @@ function apply_missing_punctuation_spacing(tokens, inlineToken, punctuationSpace
     idx = nextInfo.index;
   }
-  if (tokens.length === 1) {
-    apply_single_text_token_spacing(tokens, inlineToken, punctuationSpace, punctuationConfig);
-  }
-}
-function has_active_break(tokens, fromIdx, nextIdx) {
-  for (var idx = fromIdx + 1; idx < nextIdx; idx++) {
-    var token = tokens[idx];
-    if (!token) continue;
-    if (token.type === 'softbreak') return true;
-    if (token.type === 'text' && token.content === '\n') return true;
-  }
-  return false;
 }
-function raw_boundary_includes_newline(source, tokens, fromIdx, nextIdx, afterFragment, state) {
+function raw_boundary_includes_newline(source, beforeFragment, betweenFragment, afterFragment, state) {
   if (!source || !afterFragment) return false;
-  var fragments = Array.isArray(afterFragment) ? afterFragment : [afterFragment];
-  var beforeFragment = tokens[fromIdx].content || '';
-  var betweenFragment = '';
-  for (var k = fromIdx + 1; k < nextIdx; k++) {
-    if (tokens[k].markup) betweenFragment += tokens[k].markup;
-  }
-  for (var i = 0; i < fragments.length; i++) {
-    var fragment = fragments[i];
-    if (!fragment) continue;
-    var candidate = beforeFragment + betweenFragment + '\n' + fragment;
-    var startPos = source.indexOf(candidate, state.pos);
-    if (startPos === -1) continue;
-    state.pos = startPos + candidate.length - fragment.length;
-    return true;
+  if (!beforeFragment) return false;
+  betweenFragment = betweenFragment || '';
+  if (Array.isArray(afterFragment)) {
+    for (var i = 0; i < afterFragment.length; i++) {
+      var fragment = afterFragment[i];
+      if (!fragment) continue;
+      var candidate = beforeFragment + betweenFragment + '\n' + fragment;
+      var startPos = source.indexOf(candidate, state.pos);
+      if (startPos === -1) continue;
+      state.pos = startPos + candidate.length - fragment.length;
+      return true;
+    }
+    return false;
   }
-  return false;
+  var fragment = afterFragment;
+  var candidate = beforeFragment + betweenFragment + '\n' + fragment;
+  var startPos = source.indexOf(candidate, state.pos);
+  if (startPos === -1) return false;
+  state.pos = startPos + candidate.length - fragment.length;
+  return true;
 }
 function find_next_visible_token(tokens, startIdx) {
+  var hasActiveBreak = false;
+  var betweenMarkup = '';
   for (var idx = startIdx; idx < tokens.length; idx++) {
     var token = tokens[idx];
     if (!token) continue;
+    if (!hasActiveBreak && (token.type === 'softbreak' || (token.type === 'text' && token.content === '\n'))) {
+      hasActiveBreak = true;
+    }
     var fragment = derive_after_fragment(token);
-    if (!fragment) continue;
-    return { index: idx, token: token, fragment: fragment };
+    if (!fragment) {
+      if (token.markup) betweenMarkup += token.markup;
+      continue;
+    }
+    return { index: idx, token: token, fragment: fragment, hasActiveBreak: hasActiveBreak, betweenMarkup: betweenMarkup };
   }
   return null;
 }
@@ -509,7 +527,7 @@ function derive_after_fragment(token) {
     if (markup && content) fragments.push(markup + content);
     if (markup) fragments.push(markup);
     if (content) fragments.push(content);
-    return fragments;
+    return fragments.length > 0 ? fragments : '';
   }
   if (token.type === 'image') return '![';
   if (token.type === 'link_open') return token.markup || '[';
@@ -549,7 +567,6 @@ function apply_single_text_token_spacing(tokens, inlineToken, punctuationSpace,
   if (maxPunctuationLength <= 0) return;
   var segments = inlineToken.content.split('\n');
-  if (segments.length < 2) return;
   var cumulativeLength = 0;
   var offsetDelta = 0;
   var updatedContent = token.content;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@peaceroad/markdown-it-cjk-breaks-mod",
-  "version": "0.1.7",
+  "version": "0.1.9",
   "type": "module",
   "description": "Suppress linebreaks between east asian (Especially Japanese) characters",
   "repository": {
@@ -22,7 +22,7 @@
     "eastasianwidth": "^0.3.0"
   },
   "devDependencies": {
-    "@peaceroad/markdown-it-strong-ja": "^0.7.2",
+    "@peaceroad/markdown-it-strong-ja": "^0.8.1",
     "markdown-it": "^14.1.0"
   }
 }