@peaceroad/markdown-it-cjk-breaks-mod 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -3
- package/index.js +68 -21
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -4,9 +4,11 @@
|
|
|
4
4
|
|
|
5
5
|
### Punctuation spacing options
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
`spaceAfterPunctuation` inserts spacing only when this plugin suppresses a line break after punctuation. The second-pass matcher also covers inline markup starts (inline code, links/autolinks, images, inline HTML) as long as the raw source contains a visible newline boundary; if a `softbreak` remains, no spacing is injected.
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
`spaceAfterPunctuationTargets` lets you replace the default trigger list with a custom string or array. Defaults are `['!', '?', '⁉', '!?', '?!', '!?', '?!', '.', ':']`. To disable punctuation spacing while still setting `spaceAfterPunctuation`, pass `spaceAfterPunctuationTargets: []` (or `null`/`false`). Use `spaceAfterPunctuationTargetsAdd` to append triggers and `spaceAfterPunctuationTargetsRemove` to drop items from the resolved list.
|
|
10
|
+
|
|
11
|
+
`spaceAfterPunctuation` accepts `'half'` for ASCII space, `'full'` for an ideographic space, or any custom string via a literal value. Raw matching is strict, so escapes or entities (e.g. `&`) right before the newline can prevent a match and skip spacing (safe-fail behavior).
|
|
10
12
|
|
|
11
13
|
```js
|
|
12
14
|
import MarkdownIt from 'markdown-it';
|
|
@@ -28,7 +30,13 @@ const mdHalf = MarkdownIt({ html: true }).use(cjkBreaks, {
|
|
|
28
30
|
mdHalf.render('こんにちは!\nWorld');
|
|
29
31
|
// <p>こんにちは! World</p>
|
|
30
32
|
|
|
31
|
-
//
|
|
33
|
+
// Inline code and links are supported when a raw newline is present
|
|
34
|
+
mdHalf.render('漢!\n`code`');
|
|
35
|
+
// <p>漢! <code>code</code></p>
|
|
36
|
+
mdHalf.render('漢!\n[link](url)');
|
|
37
|
+
// <p>漢! <a href="url">link</a></p>
|
|
38
|
+
|
|
39
|
+
// Custom punctuation triggers (replaces defaults)
|
|
32
40
|
const mdCustom = MarkdownIt({ html: true }).use(cjkBreaks, {
|
|
33
41
|
spaceAfterPunctuation: 'half',
|
|
34
42
|
spaceAfterPunctuationTargets: ['??']
|
package/index.js
CHANGED
|
@@ -12,7 +12,6 @@ const HANGUL_RE = /[\u1100-\u11FF\u302E\u302F\u3131-\u318E\u3200-\u321E\u3260-\u
|
|
|
12
12
|
/* eslint-enable max-len */
|
|
13
13
|
const WHITESPACE_RE = /\s/;
|
|
14
14
|
const WHITESPACE_LEAD_RE = /^\s/;
|
|
15
|
-
const WHITESPACE_TRAIL_RE = /\s$/;
|
|
16
15
|
|
|
17
16
|
|
|
18
17
|
function is_surrogate(c1, c2) {
|
|
@@ -28,15 +27,18 @@ function is_hangul(c) {
|
|
|
28
27
|
function create_punctuation_config(targets) {
|
|
29
28
|
var sequences = new Set();
|
|
30
29
|
var maxLength = 0;
|
|
30
|
+
var endCharMap = Object.create(null);
|
|
31
31
|
|
|
32
32
|
for (var i = 0; i < targets.length; i++) {
|
|
33
33
|
var value = targets[i];
|
|
34
34
|
if (typeof value !== 'string' || value.length === 0) continue;
|
|
35
35
|
sequences.add(value);
|
|
36
36
|
if (value.length > maxLength) maxLength = value.length;
|
|
37
|
+
var endChar = get_last_char(value);
|
|
38
|
+
if (endChar) endCharMap[endChar] = true;
|
|
37
39
|
}
|
|
38
40
|
|
|
39
|
-
return { sequences: sequences, maxLength: maxLength };
|
|
41
|
+
return { sequences: sequences, maxLength: maxLength, endCharMap: endCharMap };
|
|
40
42
|
}
|
|
41
43
|
|
|
42
44
|
|
|
@@ -116,6 +118,11 @@ function matches_punctuation_sequence(trailing, punctuationConfig) {
|
|
|
116
118
|
if (!trailing || !punctuationConfig || punctuationConfig.maxLength === 0) return false;
|
|
117
119
|
|
|
118
120
|
var sequences = punctuationConfig.sequences;
|
|
121
|
+
var endCharMap = punctuationConfig.endCharMap;
|
|
122
|
+
if (endCharMap) {
|
|
123
|
+
var endChar = get_last_char(trailing);
|
|
124
|
+
if (!endChar || !endCharMap[endChar]) return false;
|
|
125
|
+
}
|
|
119
126
|
var maxLength = Math.min(trailing.length, punctuationConfig.maxLength);
|
|
120
127
|
|
|
121
128
|
for (var len = maxLength; len > 0; len--) {
|
|
@@ -126,6 +133,16 @@ function matches_punctuation_sequence(trailing, punctuationConfig) {
|
|
|
126
133
|
}
|
|
127
134
|
|
|
128
135
|
|
|
136
|
+
function get_last_char(text) {
|
|
137
|
+
if (!text) return '';
|
|
138
|
+
var len = text.length;
|
|
139
|
+
if (len === 1) return text;
|
|
140
|
+
var c1 = text.charCodeAt(len - 2);
|
|
141
|
+
var c2 = text.charCodeAt(len - 1);
|
|
142
|
+
return is_surrogate(c1, c2) ? text.slice(-2) : text.slice(-1);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
|
|
129
146
|
function is_printable_ascii(ch) {
|
|
130
147
|
if (!ch) return false;
|
|
131
148
|
var code = ch.charCodeAt(0);
|
|
@@ -176,7 +193,7 @@ function build_next_text_info(tokens, trackSkippedEmpty) {
|
|
|
176
193
|
}
|
|
177
194
|
|
|
178
195
|
|
|
179
|
-
function process_inlines(tokens,
|
|
196
|
+
function process_inlines(tokens, ctx, inlineToken) {
|
|
180
197
|
var i, last, next, c1, c2, remove_break;
|
|
181
198
|
var either = ctx.either;
|
|
182
199
|
var normalizeSoftBreaks = ctx.normalizeSoftBreaks;
|
|
@@ -185,6 +202,7 @@ function process_inlines(tokens, state, ctx, inlineToken) {
|
|
|
185
202
|
var maxPunctuationLength = ctx.maxPunctuationLength;
|
|
186
203
|
var considerInlineBoundaries = ctx.considerInlineBoundaries;
|
|
187
204
|
var needsPunctuation = punctuationSpace && punctuationConfig && maxPunctuationLength > 0;
|
|
205
|
+
var punctuationEndCharMap = punctuationConfig ? punctuationConfig.endCharMap : null;
|
|
188
206
|
|
|
189
207
|
if (!tokens || tokens.length === 0) return;
|
|
190
208
|
if (normalizeSoftBreaks) normalize_text_tokens(tokens);
|
|
@@ -283,14 +301,16 @@ function process_inlines(tokens, state, ctx, inlineToken) {
|
|
|
283
301
|
|
|
284
302
|
if (remove_break) {
|
|
285
303
|
var insertPunctuationSpace = false;
|
|
286
|
-
if (needsPunctuation && last && next && next !== '\u200b') {
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
if (
|
|
290
|
-
|
|
304
|
+
if (needsPunctuation && hasLastText && last && next && next !== '\u200b') {
|
|
305
|
+
if (!punctuationEndCharMap || punctuationEndCharMap[last]) {
|
|
306
|
+
var trailing = lastTextContent.slice(-maxPunctuationLength);
|
|
307
|
+
if (matches_punctuation_sequence(trailing, punctuationConfig)) {
|
|
308
|
+
if (!nextWidthComputed) {
|
|
309
|
+
nextWidthClass = get_cached_width_class(next);
|
|
310
|
+
}
|
|
311
|
+
var nextIsFullwidthOrWide = nextWidthClass === 'F' || nextWidthClass === 'W';
|
|
312
|
+
if (nextIsFullwidthOrWide || is_printable_ascii(next)) insertPunctuationSpace = true;
|
|
291
313
|
}
|
|
292
|
-
var nextIsFullwidthOrWide = nextWidthClass === 'F' || nextWidthClass === 'W';
|
|
293
|
-
if (nextIsFullwidthOrWide || is_printable_ascii(next)) insertPunctuationSpace = true;
|
|
294
314
|
}
|
|
295
315
|
}
|
|
296
316
|
token.type = 'text';
|
|
@@ -365,7 +385,6 @@ function split_text_token(token) {
|
|
|
365
385
|
parts.push(clone_text_token(TokenConstructor, token, content.slice(start)));
|
|
366
386
|
}
|
|
367
387
|
|
|
368
|
-
if (parts.length === 0) parts.push(token);
|
|
369
388
|
return parts;
|
|
370
389
|
}
|
|
371
390
|
|
|
@@ -407,6 +426,7 @@ function apply_missing_punctuation_spacing(tokens, inlineToken, punctuationSpace
|
|
|
407
426
|
if (!tokens || tokens.length === 0) return;
|
|
408
427
|
var maxPunctuationLength = punctuationConfig.maxLength;
|
|
409
428
|
if (maxPunctuationLength <= 0) return;
|
|
429
|
+
var endCharMap = punctuationConfig.endCharMap;
|
|
410
430
|
|
|
411
431
|
var rawSearchState = { pos: 0 };
|
|
412
432
|
|
|
@@ -414,13 +434,17 @@ function apply_missing_punctuation_spacing(tokens, inlineToken, punctuationSpace
|
|
|
414
434
|
var current = tokens[idx];
|
|
415
435
|
if (!current || current.type !== 'text' || !current.content) continue;
|
|
416
436
|
|
|
437
|
+
if (endCharMap) {
|
|
438
|
+
var endChar = get_last_char(current.content);
|
|
439
|
+
if (!endChar || !endCharMap[endChar]) continue;
|
|
440
|
+
}
|
|
417
441
|
var trailing = current.content.slice(-maxPunctuationLength);
|
|
418
442
|
if (!matches_punctuation_sequence(trailing, punctuationConfig)) continue;
|
|
419
|
-
if (WHITESPACE_TRAIL_RE.test(current.content)) continue;
|
|
420
443
|
|
|
421
444
|
var nextInfo = find_next_visible_token(tokens, idx + 1);
|
|
422
445
|
if (!nextInfo) continue;
|
|
423
446
|
if (nextInfo.token.type === 'text' && WHITESPACE_LEAD_RE.test(nextInfo.token.content || '')) continue;
|
|
447
|
+
if (has_active_break(tokens, idx, nextInfo.index)) continue;
|
|
424
448
|
|
|
425
449
|
if (!raw_boundary_includes_newline(inlineToken.content, tokens, idx, nextInfo.index, nextInfo.fragment, rawSearchState)) {
|
|
426
450
|
continue;
|
|
@@ -435,20 +459,35 @@ function apply_missing_punctuation_spacing(tokens, inlineToken, punctuationSpace
|
|
|
435
459
|
}
|
|
436
460
|
}
|
|
437
461
|
|
|
462
|
+
function has_active_break(tokens, fromIdx, nextIdx) {
|
|
463
|
+
for (var idx = fromIdx + 1; idx < nextIdx; idx++) {
|
|
464
|
+
var token = tokens[idx];
|
|
465
|
+
if (!token) continue;
|
|
466
|
+
if (token.type === 'softbreak') return true;
|
|
467
|
+
if (token.type === 'text' && token.content === '\n') return true;
|
|
468
|
+
}
|
|
469
|
+
return false;
|
|
470
|
+
}
|
|
471
|
+
|
|
438
472
|
|
|
439
473
|
function raw_boundary_includes_newline(source, tokens, fromIdx, nextIdx, afterFragment, state) {
|
|
440
474
|
if (!source || !afterFragment) return false;
|
|
475
|
+
var fragments = Array.isArray(afterFragment) ? afterFragment : [afterFragment];
|
|
441
476
|
var beforeFragment = tokens[fromIdx].content || '';
|
|
442
477
|
var betweenFragment = '';
|
|
443
478
|
for (var k = fromIdx + 1; k < nextIdx; k++) {
|
|
444
479
|
if (tokens[k].markup) betweenFragment += tokens[k].markup;
|
|
445
480
|
}
|
|
446
|
-
var
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
481
|
+
for (var i = 0; i < fragments.length; i++) {
|
|
482
|
+
var fragment = fragments[i];
|
|
483
|
+
if (!fragment) continue;
|
|
484
|
+
var candidate = beforeFragment + betweenFragment + '\n' + fragment;
|
|
485
|
+
var startPos = source.indexOf(candidate, state.pos);
|
|
486
|
+
if (startPos === -1) continue;
|
|
487
|
+
state.pos = startPos + candidate.length - fragment.length;
|
|
488
|
+
return true;
|
|
489
|
+
}
|
|
490
|
+
return false;
|
|
452
491
|
}
|
|
453
492
|
|
|
454
493
|
|
|
@@ -467,10 +506,18 @@ function find_next_visible_token(tokens, startIdx) {
|
|
|
467
506
|
function derive_after_fragment(token) {
|
|
468
507
|
if (!token) return '';
|
|
469
508
|
if (token.type === 'text' || token.type === 'html_inline' || token.type === 'code_inline') {
|
|
470
|
-
return token.content || '';
|
|
509
|
+
if (token.type !== 'code_inline') return token.content || '';
|
|
510
|
+
var fragments = [];
|
|
511
|
+
var markup = token.markup || '';
|
|
512
|
+
var content = token.content || '';
|
|
513
|
+
if (markup && content) fragments.push(markup + content);
|
|
514
|
+
if (markup) fragments.push(markup);
|
|
515
|
+
if (content) fragments.push(content);
|
|
516
|
+
return fragments;
|
|
471
517
|
}
|
|
472
518
|
if (token.type === 'image') return '![';
|
|
473
|
-
if (token.type === 'link_open') return '[';
|
|
519
|
+
if (token.type === 'link_open') return token.markup || '[';
|
|
520
|
+
if (token.nesting === 1 && token.markup) return token.markup;
|
|
474
521
|
if (token.type === 'inline') return token.content || '';
|
|
475
522
|
return '';
|
|
476
523
|
}
|
|
@@ -594,7 +641,7 @@ export default function cjk_breaks_plugin(md, opts) {
|
|
|
594
641
|
function cjk_breaks(state) {
|
|
595
642
|
for (var blkIdx = state.tokens.length - 1; blkIdx >= 0; blkIdx--) {
|
|
596
643
|
if (state.tokens[blkIdx].type !== 'inline') continue;
|
|
597
|
-
process_inlines(state.tokens[blkIdx].children,
|
|
644
|
+
process_inlines(state.tokens[blkIdx].children, ctx, state.tokens[blkIdx]);
|
|
598
645
|
}
|
|
599
646
|
}
|
|
600
647
|
if (!md || !md.core || !md.core.ruler) return;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@peaceroad/markdown-it-cjk-breaks-mod",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.6",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Suppress linebreaks between east asian (Especially Japanese) characters",
|
|
6
6
|
"repository": {
|
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
"eastasianwidth": "^0.3.0"
|
|
23
23
|
},
|
|
24
24
|
"devDependencies": {
|
|
25
|
-
"@peaceroad/markdown-it-strong-ja": "^0.
|
|
25
|
+
"@peaceroad/markdown-it-strong-ja": "^0.7.2",
|
|
26
26
|
"markdown-it": "^14.1.0"
|
|
27
27
|
}
|
|
28
28
|
}
|