@peaceroad/markdown-it-cjk-breaks-mod 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +11 -3
  2. package/index.js +68 -21
  3. package/package.json +2 -2
package/README.md CHANGED
@@ -4,9 +4,11 @@
4
4
 
5
5
  ### Punctuation spacing options
6
6
 
7
- Fine-tune the trigger list with `spaceAfterPunctuationTargets`. Provide either a single string or an array and every exact match becomes eligible for automatic spacing; this option replaces the defaults. Defaults remain `['!', '?', '⁉', '!?', '?!', '!?', '?!', '.', ':']`. To disable punctuation spacing while still setting `spaceAfterPunctuation`, pass `spaceAfterPunctuationTargets: []` (or `null`/`false`). Use `spaceAfterPunctuationTargetsAdd` to append triggers and `spaceAfterPunctuationTargetsRemove` to drop items from the resolved list.
7
+ `spaceAfterPunctuation` inserts spacing only when this plugin suppresses a line break after punctuation. The second-pass matcher also covers inline markup starts (inline code, links/autolinks, images, inline HTML) as long as the raw source contains a visible newline boundary; if a `softbreak` remains, no spacing is injected.
8
8
 
9
- Use `spaceAfterPunctuation` to inject a space every time this plugin suppresses a line break after punctuation. Accepts `'half'` for ASCII space, `'full'` for an ideographic space, or any custom string via a literal value.
9
+ `spaceAfterPunctuationTargets` lets you replace the default trigger list with a custom string or array. Defaults are `['', '?', '⁉', '!?', '?!', '!?', '?!', '.', ':']`. To disable punctuation spacing while still setting `spaceAfterPunctuation`, pass `spaceAfterPunctuationTargets: []` (or `null`/`false`). Use `spaceAfterPunctuationTargetsAdd` to append triggers and `spaceAfterPunctuationTargetsRemove` to drop items from the resolved list.
10
+
11
+ `spaceAfterPunctuation` accepts `'half'` for ASCII space, `'full'` for an ideographic space, or any custom string via a literal value. Raw matching is strict, so escapes or entities (e.g. `&`) right before the newline can prevent a match and skip spacing (safe-fail behavior).
10
12
 
11
13
  ```js
12
14
  import MarkdownIt from 'markdown-it';
@@ -28,7 +30,13 @@ const mdHalf = MarkdownIt({ html: true }).use(cjkBreaks, {
28
30
  mdHalf.render('こんにちは!\nWorld');
29
31
  // <p>こんにちは! World</p>
30
32
 
31
- // Custom punctuation triggers
33
+ // Inline code and links are supported when a raw newline is present
34
+ mdHalf.render('漢!\n`code`');
35
+ // <p>漢! <code>code</code></p>
36
+ mdHalf.render('漢!\n[link](url)');
37
+ // <p>漢! <a href="url">link</a></p>
38
+
39
+ // Custom punctuation triggers (replaces defaults)
32
40
  const mdCustom = MarkdownIt({ html: true }).use(cjkBreaks, {
33
41
  spaceAfterPunctuation: 'half',
34
42
  spaceAfterPunctuationTargets: ['??']
package/index.js CHANGED
@@ -12,7 +12,6 @@ const HANGUL_RE = /[\u1100-\u11FF\u302E\u302F\u3131-\u318E\u3200-\u321E\u3260-\u
12
12
  /* eslint-enable max-len */
13
13
  const WHITESPACE_RE = /\s/;
14
14
  const WHITESPACE_LEAD_RE = /^\s/;
15
- const WHITESPACE_TRAIL_RE = /\s$/;
16
15
 
17
16
 
18
17
  function is_surrogate(c1, c2) {
@@ -28,15 +27,18 @@ function is_hangul(c) {
28
27
  function create_punctuation_config(targets) {
29
28
  var sequences = new Set();
30
29
  var maxLength = 0;
30
+ var endCharMap = Object.create(null);
31
31
 
32
32
  for (var i = 0; i < targets.length; i++) {
33
33
  var value = targets[i];
34
34
  if (typeof value !== 'string' || value.length === 0) continue;
35
35
  sequences.add(value);
36
36
  if (value.length > maxLength) maxLength = value.length;
37
+ var endChar = get_last_char(value);
38
+ if (endChar) endCharMap[endChar] = true;
37
39
  }
38
40
 
39
- return { sequences: sequences, maxLength: maxLength };
41
+ return { sequences: sequences, maxLength: maxLength, endCharMap: endCharMap };
40
42
  }
41
43
 
42
44
 
@@ -116,6 +118,11 @@ function matches_punctuation_sequence(trailing, punctuationConfig) {
116
118
  if (!trailing || !punctuationConfig || punctuationConfig.maxLength === 0) return false;
117
119
 
118
120
  var sequences = punctuationConfig.sequences;
121
+ var endCharMap = punctuationConfig.endCharMap;
122
+ if (endCharMap) {
123
+ var endChar = get_last_char(trailing);
124
+ if (!endChar || !endCharMap[endChar]) return false;
125
+ }
119
126
  var maxLength = Math.min(trailing.length, punctuationConfig.maxLength);
120
127
 
121
128
  for (var len = maxLength; len > 0; len--) {
@@ -126,6 +133,16 @@ function matches_punctuation_sequence(trailing, punctuationConfig) {
126
133
  }
127
134
 
128
135
 
136
+ function get_last_char(text) {
137
+ if (!text) return '';
138
+ var len = text.length;
139
+ if (len === 1) return text;
140
+ var c1 = text.charCodeAt(len - 2);
141
+ var c2 = text.charCodeAt(len - 1);
142
+ return is_surrogate(c1, c2) ? text.slice(-2) : text.slice(-1);
143
+ }
144
+
145
+
129
146
  function is_printable_ascii(ch) {
130
147
  if (!ch) return false;
131
148
  var code = ch.charCodeAt(0);
@@ -176,7 +193,7 @@ function build_next_text_info(tokens, trackSkippedEmpty) {
176
193
  }
177
194
 
178
195
 
179
- function process_inlines(tokens, state, ctx, inlineToken) {
196
+ function process_inlines(tokens, ctx, inlineToken) {
180
197
  var i, last, next, c1, c2, remove_break;
181
198
  var either = ctx.either;
182
199
  var normalizeSoftBreaks = ctx.normalizeSoftBreaks;
@@ -185,6 +202,7 @@ function process_inlines(tokens, state, ctx, inlineToken) {
185
202
  var maxPunctuationLength = ctx.maxPunctuationLength;
186
203
  var considerInlineBoundaries = ctx.considerInlineBoundaries;
187
204
  var needsPunctuation = punctuationSpace && punctuationConfig && maxPunctuationLength > 0;
205
+ var punctuationEndCharMap = punctuationConfig ? punctuationConfig.endCharMap : null;
188
206
 
189
207
  if (!tokens || tokens.length === 0) return;
190
208
  if (normalizeSoftBreaks) normalize_text_tokens(tokens);
@@ -283,14 +301,16 @@ function process_inlines(tokens, state, ctx, inlineToken) {
283
301
 
284
302
  if (remove_break) {
285
303
  var insertPunctuationSpace = false;
286
- if (needsPunctuation && last && next && next !== '\u200b') {
287
- var trailing = hasLastText ? lastTextContent.slice(-maxPunctuationLength) : '';
288
- if (matches_punctuation_sequence(trailing, punctuationConfig)) {
289
- if (!nextWidthComputed) {
290
- nextWidthClass = get_cached_width_class(next);
304
+ if (needsPunctuation && hasLastText && last && next && next !== '\u200b') {
305
+ if (!punctuationEndCharMap || punctuationEndCharMap[last]) {
306
+ var trailing = lastTextContent.slice(-maxPunctuationLength);
307
+ if (matches_punctuation_sequence(trailing, punctuationConfig)) {
308
+ if (!nextWidthComputed) {
309
+ nextWidthClass = get_cached_width_class(next);
310
+ }
311
+ var nextIsFullwidthOrWide = nextWidthClass === 'F' || nextWidthClass === 'W';
312
+ if (nextIsFullwidthOrWide || is_printable_ascii(next)) insertPunctuationSpace = true;
291
313
  }
292
- var nextIsFullwidthOrWide = nextWidthClass === 'F' || nextWidthClass === 'W';
293
- if (nextIsFullwidthOrWide || is_printable_ascii(next)) insertPunctuationSpace = true;
294
314
  }
295
315
  }
296
316
  token.type = 'text';
@@ -365,7 +385,6 @@ function split_text_token(token) {
365
385
  parts.push(clone_text_token(TokenConstructor, token, content.slice(start)));
366
386
  }
367
387
 
368
- if (parts.length === 0) parts.push(token);
369
388
  return parts;
370
389
  }
371
390
 
@@ -407,6 +426,7 @@ function apply_missing_punctuation_spacing(tokens, inlineToken, punctuationSpace
407
426
  if (!tokens || tokens.length === 0) return;
408
427
  var maxPunctuationLength = punctuationConfig.maxLength;
409
428
  if (maxPunctuationLength <= 0) return;
429
+ var endCharMap = punctuationConfig.endCharMap;
410
430
 
411
431
  var rawSearchState = { pos: 0 };
412
432
 
@@ -414,13 +434,17 @@ function apply_missing_punctuation_spacing(tokens, inlineToken, punctuationSpace
414
434
  var current = tokens[idx];
415
435
  if (!current || current.type !== 'text' || !current.content) continue;
416
436
 
437
+ if (endCharMap) {
438
+ var endChar = get_last_char(current.content);
439
+ if (!endChar || !endCharMap[endChar]) continue;
440
+ }
417
441
  var trailing = current.content.slice(-maxPunctuationLength);
418
442
  if (!matches_punctuation_sequence(trailing, punctuationConfig)) continue;
419
- if (WHITESPACE_TRAIL_RE.test(current.content)) continue;
420
443
 
421
444
  var nextInfo = find_next_visible_token(tokens, idx + 1);
422
445
  if (!nextInfo) continue;
423
446
  if (nextInfo.token.type === 'text' && WHITESPACE_LEAD_RE.test(nextInfo.token.content || '')) continue;
447
+ if (has_active_break(tokens, idx, nextInfo.index)) continue;
424
448
 
425
449
  if (!raw_boundary_includes_newline(inlineToken.content, tokens, idx, nextInfo.index, nextInfo.fragment, rawSearchState)) {
426
450
  continue;
@@ -435,20 +459,35 @@ function apply_missing_punctuation_spacing(tokens, inlineToken, punctuationSpace
435
459
  }
436
460
  }
437
461
 
462
+ function has_active_break(tokens, fromIdx, nextIdx) {
463
+ for (var idx = fromIdx + 1; idx < nextIdx; idx++) {
464
+ var token = tokens[idx];
465
+ if (!token) continue;
466
+ if (token.type === 'softbreak') return true;
467
+ if (token.type === 'text' && token.content === '\n') return true;
468
+ }
469
+ return false;
470
+ }
471
+
438
472
 
439
473
  function raw_boundary_includes_newline(source, tokens, fromIdx, nextIdx, afterFragment, state) {
440
474
  if (!source || !afterFragment) return false;
475
+ var fragments = Array.isArray(afterFragment) ? afterFragment : [afterFragment];
441
476
  var beforeFragment = tokens[fromIdx].content || '';
442
477
  var betweenFragment = '';
443
478
  for (var k = fromIdx + 1; k < nextIdx; k++) {
444
479
  if (tokens[k].markup) betweenFragment += tokens[k].markup;
445
480
  }
446
- var candidate = beforeFragment + betweenFragment + '\n' + afterFragment;
447
- if (!candidate) return false;
448
- var startPos = source.indexOf(candidate, state.pos);
449
- if (startPos === -1) return false;
450
- state.pos = startPos + candidate.length - afterFragment.length;
451
- return true;
481
+ for (var i = 0; i < fragments.length; i++) {
482
+ var fragment = fragments[i];
483
+ if (!fragment) continue;
484
+ var candidate = beforeFragment + betweenFragment + '\n' + fragment;
485
+ var startPos = source.indexOf(candidate, state.pos);
486
+ if (startPos === -1) continue;
487
+ state.pos = startPos + candidate.length - fragment.length;
488
+ return true;
489
+ }
490
+ return false;
452
491
  }
453
492
 
454
493
 
@@ -467,10 +506,18 @@ function find_next_visible_token(tokens, startIdx) {
467
506
  function derive_after_fragment(token) {
468
507
  if (!token) return '';
469
508
  if (token.type === 'text' || token.type === 'html_inline' || token.type === 'code_inline') {
470
- return token.content || '';
509
+ if (token.type !== 'code_inline') return token.content || '';
510
+ var fragments = [];
511
+ var markup = token.markup || '';
512
+ var content = token.content || '';
513
+ if (markup && content) fragments.push(markup + content);
514
+ if (markup) fragments.push(markup);
515
+ if (content) fragments.push(content);
516
+ return fragments;
471
517
  }
472
518
  if (token.type === 'image') return '![';
473
- if (token.type === 'link_open') return '[';
519
+ if (token.type === 'link_open') return token.markup || '[';
520
+ if (token.nesting === 1 && token.markup) return token.markup;
474
521
  if (token.type === 'inline') return token.content || '';
475
522
  return '';
476
523
  }
@@ -594,7 +641,7 @@ export default function cjk_breaks_plugin(md, opts) {
594
641
  function cjk_breaks(state) {
595
642
  for (var blkIdx = state.tokens.length - 1; blkIdx >= 0; blkIdx--) {
596
643
  if (state.tokens[blkIdx].type !== 'inline') continue;
597
- process_inlines(state.tokens[blkIdx].children, state, ctx, state.tokens[blkIdx]);
644
+ process_inlines(state.tokens[blkIdx].children, ctx, state.tokens[blkIdx]);
598
645
  }
599
646
  }
600
647
  if (!md || !md.core || !md.core.ruler) return;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@peaceroad/markdown-it-cjk-breaks-mod",
3
- "version": "0.1.4",
3
+ "version": "0.1.6",
4
4
  "type": "module",
5
5
  "description": "Suppress linebreaks between east asian (Especially Japanese) characters",
6
6
  "repository": {
@@ -22,7 +22,7 @@
22
22
  "eastasianwidth": "^0.3.0"
23
23
  },
24
24
  "devDependencies": {
25
- "@peaceroad/markdown-it-strong-ja": "^0.5.5",
25
+ "@peaceroad/markdown-it-strong-ja": "^0.7.2",
26
26
  "markdown-it": "^14.1.0"
27
27
  }
28
28
  }