@peaceroad/markdown-it-cjk-breaks-mod 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/LICENSE +24 -0
  2. package/README.md +128 -0
  3. package/index.js +463 -0
  4. package/package.json +23 -0
package/LICENSE ADDED
@@ -0,0 +1,24 @@
1
+ Copyright (c) 2018 Authors.
2
+ Copyright (c) 2021 sup39[サポミク].
3
+ Copyright (c) 2025 k_taka.
4
+
5
+ Permission is hereby granted, free of charge, to any person
6
+ obtaining a copy of this software and associated documentation
7
+ files (the "Software"), to deal in the Software without
8
+ restriction, including without limitation the rights to use,
9
+ copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the
11
+ Software is furnished to do so, subject to the following
12
+ conditions:
13
+
14
+ The above copyright notice and this permission notice shall be
15
+ included in all copies or substantial portions of the Software.
16
+
17
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24
+ OTHER DEALINGS IN THE SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,128 @@
1
+ # markdown-it-cjk-breaks
2
+
3
+ ## k_taka's additional features
4
+
5
+ ### Punctuation spacing options
6
+
7
+ Fine-tune the trigger list with `spaceAfterPunctuationTargets`. Provide either a single string or an array and every exact match becomes eligible for automatic spacing. Defaults remain `['!', '?', '⁉', '!?', '?!', '!?', '?!']`.
8
+
9
+ Use `spaceAfterPunctuation` to inject a space every time this plugin suppresses a line break after punctuation. Accepts `'half'` for ASCII space, `'full'` for an ideographic space, or any custom string via a literal value.
10
+
11
+ ```js
12
+ import MarkdownIt from 'markdown-it';
13
+ import cjkBreaks from '@peaceroad/markdown-it-cjk-breaks-mod';
14
+
15
+ // Full-width spacing after default punctuation
16
+ const mdFull = MarkdownIt({ html: true }).use(cjkBreaks, {
17
+ spaceAfterPunctuation: 'full',
18
+ either: true
19
+ });
20
+ mdFull.render('こんにちは!\nWorld');
21
+ // <p>こんにちは! World</p>
22
+
23
+ // Half-width spacing for ASCII-friendly mixes
24
+ const mdHalf = MarkdownIt({ html: true }).use(cjkBreaks, {
25
+ spaceAfterPunctuation: 'half',
26
+ either: true
27
+ });
28
+ mdHalf.render('こんにちは!\nWorld');
29
+ // <p>こんにちは! World</p>
30
+
31
+ // Custom punctuation triggers
32
+ const mdCustom = MarkdownIt({ html: true }).use(cjkBreaks, {
33
+ spaceAfterPunctuation: 'half',
34
+ spaceAfterPunctuationTargets: ['??']
35
+ });
36
+ mdCustom.render('Hello??\nWorld');
37
+ // <p>Hello?? World</p>
38
+ ```
39
+
40
+ ### Softbreak normalization for other plugins
41
+ Even with stock markdown-it, emphasis markers can leave inline `text` tokens that still embed `\n`. When `normalizeSoftBreaks: true`, those tokens are split back into proper `softbreak` entries before CJK suppression runs, so a trailing `***漢***\n字` behaves the same way regardless of how markdown-it represented it internally.
42
+
43
+ ```js
44
+ // Normalize softbreaks emitted by other plugins first
45
+ const mdStrongJaFriendly = MarkdownIt({ html: true }).use(cjkBreaks, {
46
+ normalizeSoftBreaks: true,
47
+ either: true
48
+ });
49
+ mdStrongJaFriendly.render('**漢**\nb');
50
+ // <p><strong>漢</strong>b</p>
51
+ ```
52
+
53
+ `@peaceroad/markdown-it-strong-ja` also emit newline-containing `text` nodes after their own rewrites. The same option keeps behavior consistent no matter which order you register plugins.
54
+
55
+ ## sup39's additional features
56
+
57
+ - [@sup39/markdown-it-cjk-breaks](https://npmjs.com/package/@sup39/markdown-it-cjk-breaks)
58
+
59
+ Provide an optional option `either`(default: false, which works as original version) to determine whether allowing removing linebreak when either the character before **OR** after the linebreak is east asian character.
60
+
61
+ ```js
62
+ var md = require('markdown-it')();
63
+ var cjk_breaks = require('markdown-it-cjk-breaks');
64
+
65
+ md.use(cjk_breaks, {either: true}); // << set either to true
66
+
67
+ md.render(`
68
+ あおえ
69
+ うい
70
+ aoe
71
+ ui
72
+ `);
73
+
74
+ // returns:
75
+ //
76
+ //<p>あおえういaoe <!-- linebreak between `い` and `a` is removed -->
77
+ //ui</p>
78
+ ```
79
+
80
+ ## Original
81
+
82
+ - [markdown-it-cjk-breaks](https://github.com/markdown-it/markdown-it-cjk-breaks)
83
+
84
+ > Plugin for [markdown-it](https://github.com/markdown-it/markdown-it) that suppresses linebreaks between east asian characters.
85
+
86
+ Normally newlines in a markdown text get rendered as newlines in output html text. Then browsers will usually render those newlines as whitespace (more smart behavior is included in w3c drafts, but not actually implemented by vendors).
87
+
88
+ This plugin finds and removes newlines that cannot be converted to space, algorithm matches [CSS Text Module Level 3](https://www.w3.org/TR/css-text-3/#line-break-transform):
89
+
90
+ - If the character immediately before or immediately after the segment break is the zero-width space character (U+200B), then the break is removed, leaving behind the zero-width space.
91
+ - Otherwise, if the East Asian Width property [UAX11] of both the character before and after the segment break is F, W, or H (not A), and neither side is Hangul, then the segment break is removed.
92
+ - Otherwise, the segment break is converted to a space (U+0020).
93
+
94
+ ## Install
95
+
96
+ ```bash
97
+ yarn add markdown-it-cjk-breaks
98
+ ```
99
+
100
+
101
+ ## Usage
102
+
103
+ ```js
104
+ var md = require('markdown-it')();
105
+ var cjk_breaks = require('markdown-it-cjk-breaks');
106
+
107
+ md.use(cjk_breaks);
108
+
109
+ md.render(`
110
+ あおえ
111
+ うい
112
+ aoe
113
+ ui
114
+ `);
115
+
116
+ // returns:
117
+ //
118
+ //<p>あおえうい
119
+ //aoe
120
+ //ui</p>
121
+ ```
122
+
123
+
124
+ ## License
125
+
126
+ - markdown-it/markdown-it-cjk-breaks: [MIT](https://github.com/markdown-it/markdown-it-cjk-breaks/blob/master/LICENSE)
127
+ - @sup39/markdown-it-cjk-breaks: [MIT](https://www.npmjs.com/package/@sup39/markdown-it-cjk-breaks?activeTab=code)
128
+ - @peaceroad/markdown-it-cjk-breaks-mod: [MIT](https://github.com/peaceroad/p7d-markdown-it-cjk-breaks-mod/blob/main/LICENSE
package/index.js ADDED
@@ -0,0 +1,463 @@
1
+ import eastAsianWidthModule from 'eastasianwidth';
2
+
3
+ const { eastAsianWidth } = eastAsianWidthModule;
4
+ const ASCII_PRINTABLE_MIN = 0x21;
5
+ const ASCII_PRINTABLE_MAX = 0x7E;
6
+ const IDEOGRAPHIC_SPACE = '\u3000';
7
+ const DEFAULT_PUNCTUATION_TARGETS = ['!', '?', '⁉', '!?', '?!', '!?', '?!'];
8
+ const DEFAULT_PUNCTUATION_CONFIG = create_punctuation_config(DEFAULT_PUNCTUATION_TARGETS);
9
+
10
+
11
+ function is_surrogate(c1, c2) {
12
+ return c1 >= 0xD800 && c1 <= 0xDBFF && c2 >= 0xDC00 && c2 <= 0xDFFF;
13
+ }
14
+
15
+
16
+ function is_hangul(c) {
17
+ // require('unicode-10.0.0/Script/Hangul/regex')
18
+ /* eslint-disable max-len */
19
+ return /[\u1100-\u11FF\u302E\u302F\u3131-\u318E\u3200-\u321E\u3260-\u327E\uA960-\uA97C\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uFFA0-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC]/.test(c);
20
+ /* eslint-enable max-len */
21
+ }
22
+
23
+
24
+ function create_punctuation_config(targets) {
25
+ var sequences = new Set();
26
+ var maxLength = 0;
27
+
28
+ for (var i = 0; i < targets.length; i++) {
29
+ var value = targets[i];
30
+ if (typeof value !== 'string' || value.length === 0) continue;
31
+ sequences.add(value);
32
+ if (value.length > maxLength) maxLength = value.length;
33
+ }
34
+
35
+ return { sequences: sequences, maxLength: maxLength };
36
+ }
37
+
38
+
39
+ function resolve_punctuation_space_option(opts) {
40
+ if (!opts || !opts.spaceAfterPunctuation) return '';
41
+
42
+ var option = opts.spaceAfterPunctuation;
43
+ if (option === 'half') return ' ';
44
+ if (option === 'full') return IDEOGRAPHIC_SPACE;
45
+ if (typeof option === 'string' && option.length > 0) return option;
46
+ return '';
47
+ }
48
+
49
+
50
+ function resolve_punctuation_targets(opts) {
51
+ if (!opts || !opts.spaceAfterPunctuationTargets) return DEFAULT_PUNCTUATION_CONFIG;
52
+
53
+ var customTargets = opts.spaceAfterPunctuationTargets;
54
+ if (typeof customTargets === 'string') customTargets = [ customTargets ];
55
+ if (!Array.isArray(customTargets) || customTargets.length === 0) return DEFAULT_PUNCTUATION_CONFIG;
56
+
57
+ var config = create_punctuation_config(customTargets);
58
+ return config.sequences.size === 0 ? DEFAULT_PUNCTUATION_CONFIG : config;
59
+ }
60
+
61
+
62
+ function matches_punctuation_sequence(trailing, punctuationConfig) {
63
+ if (!trailing || !punctuationConfig || punctuationConfig.maxLength === 0) return false;
64
+
65
+ var sequences = punctuationConfig.sequences;
66
+ var maxLength = Math.min(trailing.length, punctuationConfig.maxLength);
67
+
68
+ for (var len = maxLength; len > 0; len--) {
69
+ var fragment = trailing.slice(-len);
70
+ if (sequences.has(fragment)) return true;
71
+ }
72
+ return false;
73
+ }
74
+
75
+
76
+ function is_printable_ascii(ch) {
77
+ if (!ch) return false;
78
+ var code = ch.charCodeAt(0);
79
+ return code >= ASCII_PRINTABLE_MIN && code <= ASCII_PRINTABLE_MAX;
80
+ }
81
+
82
+
83
+ function is_fullwidth_or_wide(ch) {
84
+ var width = get_cjk_width_class(ch);
85
+ return width === 'F' || width === 'W';
86
+ }
87
+
88
+
89
+ function get_cjk_width_class(ch) {
90
+ if (!ch) return '';
91
+ var codePoint = ch.codePointAt(0);
92
+ if (codePoint !== undefined && codePoint <= ASCII_PRINTABLE_MAX) return '';
93
+ var width = eastAsianWidth(ch);
94
+ return width === 'F' || width === 'W' || width === 'H' ? width : '';
95
+ }
96
+
97
+
98
+ function process_inlines(tokens, state, ctx, inlineToken) {
99
+ var i, j, last, trailing, next, c1, c2, remove_break;
100
+ var either = ctx.either;
101
+ var normalizeSoftBreaks = ctx.normalizeSoftBreaks;
102
+ var punctuationSpace = ctx.punctuationSpace;
103
+ var punctuationConfig = ctx.punctuationConfig;
104
+ var maxPunctuationLength = ctx.maxPunctuationLength;
105
+ var considerInlineBoundaries = ctx.considerInlineBoundaries;
106
+
107
+ if (normalizeSoftBreaks) normalize_text_tokens(tokens);
108
+
109
+ for (i = 0; i < tokens.length; i++) {
110
+ var isSoftbreakToken = tokens[i].type === 'softbreak';
111
+ var isTextBreakToken = tokens[i].type === 'text' && tokens[i].content === '\n';
112
+ if (!isSoftbreakToken && !isTextBreakToken) continue;
113
+
114
+ // default last/next character to space
115
+ last = next = ' ';
116
+ trailing = '';
117
+ var trailingMatchesPunctuation = false;
118
+
119
+ var skippedEmptyBefore = false;
120
+ var skippedEmptyAfter = false;
121
+
122
+ for (j = i - 1; j >= 0; j--) {
123
+ if (tokens[j].type !== 'text') continue;
124
+
125
+ var textContent = tokens[j].content;
126
+ if (!textContent) {
127
+ skippedEmptyBefore = true;
128
+ continue;
129
+ }
130
+ c1 = textContent.charCodeAt(textContent.length - 2);
131
+ c2 = textContent.charCodeAt(textContent.length - 1);
132
+
133
+ last = textContent.slice(is_surrogate(c1, c2) ? -2 : -1);
134
+ trailing = maxPunctuationLength > 0 ?
135
+ textContent.slice(-maxPunctuationLength) :
136
+ textContent.slice(-1);
137
+ if (!trailingMatchesPunctuation && punctuationSpace && punctuationConfig && maxPunctuationLength > 0 && trailing) {
138
+ trailingMatchesPunctuation = matches_punctuation_sequence(trailing, punctuationConfig);
139
+ }
140
+ break;
141
+ }
142
+
143
+ for (j = i + 1; j < tokens.length; j++) {
144
+ if (tokens[j].type !== 'text') continue;
145
+
146
+ if (!tokens[j].content) {
147
+ skippedEmptyAfter = true;
148
+ continue;
149
+ }
150
+
151
+ c1 = tokens[j].content.charCodeAt(0);
152
+ c2 = tokens[j].content.charCodeAt(1);
153
+
154
+ next = tokens[j].content.slice(0, is_surrogate(c1, c2) ? 2 : 1);
155
+ break;
156
+ }
157
+
158
+ remove_break = false;
159
+
160
+ // remove newline if it's adjacent to ZWSP
161
+ if (last === '\u200b' || next === '\u200b') remove_break = true;
162
+
163
+ var lastWidthClass = get_cjk_width_class(last);
164
+ var nextWidthClass = get_cjk_width_class(next);
165
+
166
+ // remove newline if both characters AND/OR fullwidth (F), wide (W) or
167
+ // halfwidth (H), but not Hangul
168
+ var tLast = lastWidthClass !== '';
169
+ var tNext = nextWidthClass !== '';
170
+
171
+ if (considerInlineBoundaries && (skippedEmptyBefore || skippedEmptyAfter) && tLast && tNext) {
172
+ tLast = false;
173
+ tNext = false;
174
+ }
175
+ if (either ? tLast || tNext : tLast && tNext) {
176
+ if (!is_hangul(last) && !is_hangul(next)) remove_break = true;
177
+ }
178
+
179
+ if (remove_break) {
180
+ var insertPunctuationSpace = false;
181
+ var nextIsFullwidthOrWide = nextWidthClass === 'F' || nextWidthClass === 'W';
182
+ if (punctuationSpace && punctuationConfig && trailingMatchesPunctuation && last && next && next !== '\u200b') {
183
+ if (is_printable_ascii(next) || nextIsFullwidthOrWide) insertPunctuationSpace = true;
184
+ }
185
+ tokens[i].type = 'text';
186
+ tokens[i].content = insertPunctuationSpace ? punctuationSpace : '';
187
+ }
188
+ }
189
+
190
+ if (punctuationSpace && punctuationConfig) {
191
+ apply_missing_punctuation_spacing(tokens, inlineToken, punctuationSpace, punctuationConfig);
192
+ }
193
+ }
194
+
195
+
196
+ function normalize_text_tokens(tokens) {
197
+ for (var idx = 0; idx < tokens.length; idx++) {
198
+ var token = tokens[idx];
199
+ if (token.type !== 'text') continue;
200
+ if (!token.content || token.content.indexOf('\n') === -1) continue;
201
+
202
+ var replacement = split_text_token(token);
203
+ tokens.splice(idx, 1, replacement[0]);
204
+ if (replacement.length > 1) {
205
+ Array.prototype.splice.apply(tokens, [idx + 1, 0].concat(replacement.slice(1)));
206
+ idx += replacement.length - 1;
207
+ }
208
+ }
209
+ }
210
+
211
+
212
+ function split_text_token(token) {
213
+ var TokenConstructor = token.constructor;
214
+ var parts = [];
215
+ var content = token.content;
216
+ var start = 0;
217
+
218
+ for (var pos = 0; pos < content.length; pos++) {
219
+ if (content.charCodeAt(pos) !== 0x0A) continue;
220
+
221
+ if (pos > start) {
222
+ parts.push(clone_text_token(TokenConstructor, token, content.slice(start, pos)));
223
+ }
224
+
225
+ parts.push(create_softbreak_token(TokenConstructor, token));
226
+ start = pos + 1;
227
+ }
228
+
229
+ if (start < content.length) {
230
+ parts.push(clone_text_token(TokenConstructor, token, content.slice(start)));
231
+ }
232
+
233
+ if (parts.length === 0) parts.push(token);
234
+ return parts;
235
+ }
236
+
237
+
238
+ function clone_text_token(TokenConstructor, source, text) {
239
+ var cloned = new TokenConstructor('text', source.tag, 0);
240
+ copy_token_base(cloned, source);
241
+ cloned.content = text;
242
+ return cloned;
243
+ }
244
+
245
+
246
+ function create_softbreak_token(TokenConstructor, source) {
247
+ var softbreak = new TokenConstructor('softbreak', '', 0);
248
+ copy_token_base(softbreak, source);
249
+ softbreak.content = '';
250
+ softbreak.markup = '';
251
+ softbreak.info = '';
252
+ return softbreak;
253
+ }
254
+
255
+
256
+ function copy_token_base(target, source) {
257
+ target.level = source.level;
258
+ target.meta = source.meta ? Object.assign({}, source.meta) : source.meta;
259
+ target.block = source.block;
260
+ target.hidden = source.hidden;
261
+ target.markup = source.markup;
262
+ target.info = source.info;
263
+ target.children = source.children;
264
+ target.attrs = source.attrs ? source.attrs.slice() : source.attrs;
265
+ target.map = source.map ? source.map.slice() : source.map;
266
+ }
267
+
268
+
269
+ function apply_missing_punctuation_spacing(tokens, inlineToken, punctuationSpace, punctuationConfig) {
270
+ if (!inlineToken || !inlineToken.content) return;
271
+ if (inlineToken.content.indexOf('\n') === -1) return;
272
+ if (!tokens || tokens.length === 0) return;
273
+
274
+ var rawSearchState = { pos: 0 };
275
+
276
+ for (var idx = 0; idx < tokens.length; idx++) {
277
+ var current = tokens[idx];
278
+ if (!current || current.type !== 'text' || !current.content) continue;
279
+
280
+ var trailing = punctuationConfig.maxLength > 0 ?
281
+ current.content.slice(-punctuationConfig.maxLength) :
282
+ current.content.slice(-1);
283
+ if (!matches_punctuation_sequence(trailing, punctuationConfig)) continue;
284
+ if (/\s$/.test(current.content)) continue;
285
+
286
+ var nextInfo = find_next_visible_token(tokens, idx + 1);
287
+ if (!nextInfo) continue;
288
+ if (nextInfo.token.type === 'text' && /^\s/.test(nextInfo.token.content || '')) continue;
289
+
290
+ if (!raw_boundary_includes_newline(inlineToken.content, tokens, idx, nextInfo.index, nextInfo.fragment, rawSearchState)) {
291
+ continue;
292
+ }
293
+
294
+ insert_space_token(tokens, nextInfo.index, nextInfo.token, punctuationSpace);
295
+ idx = nextInfo.index;
296
+ }
297
+
298
+ apply_single_text_token_spacing(tokens, inlineToken, punctuationSpace, punctuationConfig);
299
+ }
300
+
301
+
302
+ function raw_boundary_includes_newline(source, tokens, fromIdx, nextIdx, afterFragment, state) {
303
+ if (!source || !afterFragment) return false;
304
+ var beforeFragment = tokens[fromIdx].content || '';
305
+ var betweenFragment = '';
306
+ for (var k = fromIdx + 1; k < nextIdx; k++) {
307
+ if (tokens[k].markup) betweenFragment += tokens[k].markup;
308
+ }
309
+ var candidate = beforeFragment + betweenFragment + '\n' + afterFragment;
310
+ if (!candidate) return false;
311
+ var startPos = source.indexOf(candidate, state.pos);
312
+ if (startPos === -1) return false;
313
+ state.pos = startPos + candidate.length - afterFragment.length;
314
+ return true;
315
+ }
316
+
317
+
318
+ function find_next_visible_token(tokens, startIdx) {
319
+ for (var idx = startIdx; idx < tokens.length; idx++) {
320
+ var token = tokens[idx];
321
+ if (!token) continue;
322
+ var fragment = derive_after_fragment(token);
323
+ if (!fragment) continue;
324
+ return { index: idx, token: token, fragment: fragment };
325
+ }
326
+ return null;
327
+ }
328
+
329
+
330
+ function derive_after_fragment(token) {
331
+ if (!token) return '';
332
+ if (token.type === 'text' || token.type === 'html_inline' || token.type === 'code_inline') {
333
+ return token.content || '';
334
+ }
335
+ if (token.type === 'image') return '![';
336
+ if (token.type === 'link_open') return '[';
337
+ if (token.type === 'inline') return token.content || '';
338
+ return '';
339
+ }
340
+
341
+
342
+ function insert_space_token(tokens, insertIndex, referenceToken, punctuationSpace) {
343
+ if (!punctuationSpace) return;
344
+ var TokenConstructor = (referenceToken && referenceToken.constructor) || (tokens[0] && tokens[0].constructor);
345
+ if (!TokenConstructor) return;
346
+ var spaceToken = new TokenConstructor('text', '', 0);
347
+ spaceToken.content = punctuationSpace;
348
+ spaceToken.markup = '';
349
+ spaceToken.info = '';
350
+ spaceToken.tag = '';
351
+ spaceToken.block = false;
352
+ spaceToken.hidden = false;
353
+ spaceToken.level = referenceToken ? referenceToken.level : 0;
354
+ spaceToken.meta = referenceToken && referenceToken.meta ? Object.assign({}, referenceToken.meta) : referenceToken ? referenceToken.meta : null;
355
+ spaceToken.children = null;
356
+ spaceToken.attrs = null;
357
+ spaceToken.map = null;
358
+ tokens.splice(insertIndex, 0, spaceToken);
359
+ }
360
+
361
+
362
+ function apply_single_text_token_spacing(tokens, inlineToken, punctuationSpace, punctuationConfig) {
363
+ if (!inlineToken || !inlineToken.content) return;
364
+ if (!tokens || tokens.length !== 1) return;
365
+ if (inlineToken.content.indexOf('\n') === -1) return;
366
+ var token = tokens[0];
367
+ if (!token || token.type !== 'text' || !token.content) return;
368
+
369
+ var segments = inlineToken.content.split('\n');
370
+ if (segments.length < 2) return;
371
+ var cumulativeLength = 0;
372
+ var offsetDelta = 0;
373
+ var updatedContent = token.content;
374
+ for (var segIdx = 0; segIdx < segments.length - 1; segIdx++) {
375
+ var leftRaw = segments[segIdx];
376
+ var rightRaw = segments[segIdx + 1];
377
+ var tail = extract_visible_tail(leftRaw, punctuationConfig.maxLength);
378
+ var nextChar = extract_visible_head(rightRaw);
379
+ var shouldInsert = tail &&
380
+ matches_punctuation_sequence(tail, punctuationConfig) &&
381
+ nextChar &&
382
+ (is_printable_ascii(nextChar) || is_fullwidth_or_wide(nextChar));
383
+
384
+ if (shouldInsert) {
385
+ var splitIndex = cumulativeLength + leftRaw.length + offsetDelta;
386
+ if (splitIndex >= 0 && splitIndex <= updatedContent.length) {
387
+ var existingChar = updatedContent.charAt(splitIndex);
388
+ if (existingChar && /\s/.test(existingChar)) {
389
+ // already has whitespace at this boundary
390
+ cumulativeLength += leftRaw.length;
391
+ continue;
392
+ }
393
+ updatedContent = updatedContent.slice(0, splitIndex) + punctuationSpace + updatedContent.slice(splitIndex);
394
+ offsetDelta += punctuationSpace.length;
395
+ }
396
+ }
397
+
398
+ cumulativeLength += leftRaw.length;
399
+ }
400
+
401
+ if (offsetDelta > 0) {
402
+ token.content = updatedContent;
403
+ }
404
+ }
405
+
406
+
407
+ function extract_visible_tail(raw, maxLength) {
408
+ if (!raw || !maxLength) return '';
409
+ var result = '';
410
+ for (var pos = raw.length; pos > 0 && result.length < maxLength;) {
411
+ var code = raw.codePointAt(pos - 1);
412
+ var charLen = code > 0xFFFF ? 2 : 1;
413
+ var ch = raw.slice(pos - charLen, pos);
414
+ pos -= charLen;
415
+ if (/\s/.test(ch)) continue;
416
+ if (is_markup_closer_char(ch)) continue;
417
+ result = ch + result;
418
+ }
419
+ return result;
420
+ }
421
+
422
+
423
+ function extract_visible_head(raw) {
424
+ if (!raw) return '';
425
+ for (var pos = 0; pos < raw.length;) {
426
+ var code = raw.codePointAt(pos);
427
+ var charLen = code > 0xFFFF ? 2 : 1;
428
+ var ch = raw.slice(pos, pos + charLen);
429
+ pos += charLen;
430
+ if (/\s/.test(ch)) continue;
431
+ return ch;
432
+ }
433
+ return '';
434
+ }
435
+
436
+
437
+ function is_markup_closer_char(ch) {
438
+ return ch === '*' || ch === '_' || ch === '~' || ch === '`';
439
+ }
440
+
441
+
442
+ export default function cjk_breaks_plugin(md, opts) {
443
+ var options = opts || {};
444
+ var punctuationSpace = resolve_punctuation_space_option(options);
445
+ var punctuationConfig = punctuationSpace ? resolve_punctuation_targets(options) : null;
446
+ var ctx = {
447
+ either: !!options.either,
448
+ normalizeSoftBreaks: !!options.normalizeSoftBreaks,
449
+ considerInlineBoundaries: !options.normalizeSoftBreaks,
450
+ punctuationSpace: punctuationSpace,
451
+ punctuationConfig: punctuationConfig,
452
+ maxPunctuationLength: punctuationConfig ? punctuationConfig.maxLength : 0
453
+ };
454
+
455
+ function cjk_breaks(state) {
456
+ for (var blkIdx = state.tokens.length - 1; blkIdx >= 0; blkIdx--) {
457
+ if (state.tokens[blkIdx].type !== 'inline') continue;
458
+ process_inlines(state.tokens[blkIdx].children, state, ctx, state.tokens[blkIdx]);
459
+ }
460
+ }
461
+ if (!md || !md.core || !md.core.ruler) return;
462
+ md.core.ruler.push('cjk_breaks', cjk_breaks);
463
+ }
package/package.json ADDED
@@ -0,0 +1,23 @@
1
+ {
2
+ "name": "@peaceroad/markdown-it-cjk-breaks-mod",
3
+ "version": "0.1.0",
4
+ "type": "module",
5
+ "description": "Suppress linebreaks between east asian (Especially Japanese) characters",
6
+ "repository": "https://github.com/peaceroad/markdown-it-cjk-breaks-mod.git",
7
+ "license": "MIT",
8
+ "scripts": {
9
+ "test": "node test/test.js"
10
+ },
11
+ "files": [
12
+ "index.js",
13
+ "README.md",
14
+ "LICENSE"
15
+ ],
16
+ "dependencies": {
17
+ "eastasianwidth": "^0.3.0"
18
+ },
19
+ "devDependencies": {
20
+ "@peaceroad/markdown-it-strong-ja": "^0.5.3",
21
+ "markdown-it": "^14.1.0"
22
+ }
23
+ }