@peaceroad/markdown-it-cjk-breaks-mod 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +93 -70
- package/package.json +1 -1
package/index.js
CHANGED
|
@@ -6,6 +6,13 @@ const ASCII_PRINTABLE_MAX = 0x7E;
|
|
|
6
6
|
const IDEOGRAPHIC_SPACE = '\u3000';
|
|
7
7
|
const DEFAULT_PUNCTUATION_TARGETS = ['!', '?', '⁉', '!?', '?!', '!?', '?!', '.', ':'];
|
|
8
8
|
const DEFAULT_PUNCTUATION_CONFIG = create_punctuation_config(DEFAULT_PUNCTUATION_TARGETS);
|
|
9
|
+
/* eslint-disable max-len */
|
|
10
|
+
// require('unicode-10.0.0/Script/Hangul/regex')
|
|
11
|
+
const HANGUL_RE = /[\u1100-\u11FF\u302E\u302F\u3131-\u318E\u3200-\u321E\u3260-\u327E\uA960-\uA97C\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uFFA0-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC]/;
|
|
12
|
+
/* eslint-enable max-len */
|
|
13
|
+
const WHITESPACE_RE = /\s/;
|
|
14
|
+
const WHITESPACE_LEAD_RE = /^\s/;
|
|
15
|
+
const WHITESPACE_TRAIL_RE = /\s$/;
|
|
9
16
|
|
|
10
17
|
|
|
11
18
|
function is_surrogate(c1, c2) {
|
|
@@ -14,10 +21,7 @@ function is_surrogate(c1, c2) {
|
|
|
14
21
|
|
|
15
22
|
|
|
16
23
|
function is_hangul(c) {
|
|
17
|
-
|
|
18
|
-
/* eslint-disable max-len */
|
|
19
|
-
return /[\u1100-\u11FF\u302E\u302F\u3131-\u318E\u3200-\u321E\u3260-\u327E\uA960-\uA97C\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uFFA0-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC]/.test(c);
|
|
20
|
-
/* eslint-enable max-len */
|
|
24
|
+
return HANGUL_RE.test(c);
|
|
21
25
|
}
|
|
22
26
|
|
|
23
27
|
|
|
@@ -51,10 +55,15 @@ function resolve_punctuation_targets(opts) {
|
|
|
51
55
|
if (!opts) return DEFAULT_PUNCTUATION_CONFIG;
|
|
52
56
|
|
|
53
57
|
var hasCustomTargets = Object.prototype.hasOwnProperty.call(opts, 'spaceAfterPunctuationTargets');
|
|
58
|
+
var addTargets = opts.spaceAfterPunctuationTargetsAdd;
|
|
59
|
+
var removeTargets = opts.spaceAfterPunctuationTargetsRemove;
|
|
60
|
+
if (!hasCustomTargets && addTargets === undefined && removeTargets === undefined) {
|
|
61
|
+
return DEFAULT_PUNCTUATION_CONFIG;
|
|
62
|
+
}
|
|
54
63
|
var baseTargets;
|
|
55
64
|
|
|
56
65
|
if (!hasCustomTargets) {
|
|
57
|
-
baseTargets = DEFAULT_PUNCTUATION_TARGETS
|
|
66
|
+
baseTargets = DEFAULT_PUNCTUATION_TARGETS;
|
|
58
67
|
} else {
|
|
59
68
|
var customTargets = opts.spaceAfterPunctuationTargets;
|
|
60
69
|
if (customTargets === null || customTargets === false) return null;
|
|
@@ -63,13 +72,12 @@ function resolve_punctuation_targets(opts) {
|
|
|
63
72
|
baseTargets = [ customTargets ];
|
|
64
73
|
} else if (Array.isArray(customTargets)) {
|
|
65
74
|
if (customTargets.length === 0) return null;
|
|
66
|
-
baseTargets = customTargets
|
|
75
|
+
baseTargets = customTargets;
|
|
67
76
|
} else {
|
|
68
|
-
baseTargets = DEFAULT_PUNCTUATION_TARGETS
|
|
77
|
+
baseTargets = DEFAULT_PUNCTUATION_TARGETS;
|
|
69
78
|
}
|
|
70
79
|
}
|
|
71
80
|
|
|
72
|
-
var addTargets = opts.spaceAfterPunctuationTargetsAdd;
|
|
73
81
|
if (addTargets !== undefined) {
|
|
74
82
|
var addList = [];
|
|
75
83
|
if (typeof addTargets === 'string') {
|
|
@@ -82,7 +90,6 @@ function resolve_punctuation_targets(opts) {
|
|
|
82
90
|
}
|
|
83
91
|
}
|
|
84
92
|
|
|
85
|
-
var removeTargets = opts.spaceAfterPunctuationTargetsRemove;
|
|
86
93
|
if (removeTargets !== undefined) {
|
|
87
94
|
var removeList = [];
|
|
88
95
|
if (typeof removeTargets === 'string') {
|
|
@@ -140,15 +147,15 @@ function get_cjk_width_class(ch) {
|
|
|
140
147
|
return width === 'F' || width === 'W' || width === 'H' ? width : '';
|
|
141
148
|
}
|
|
142
149
|
|
|
143
|
-
function build_next_text_info(tokens) {
|
|
150
|
+
function build_next_text_info(tokens, trackSkippedEmpty) {
|
|
144
151
|
var nextTextIndex = new Array(tokens.length);
|
|
145
|
-
var nextSkippedEmpty = new Array(tokens.length);
|
|
152
|
+
var nextSkippedEmpty = trackSkippedEmpty ? new Array(tokens.length) : null;
|
|
146
153
|
var nextNonEmpty = -1;
|
|
147
154
|
var sawEmpty = false;
|
|
148
155
|
|
|
149
156
|
for (var idx = tokens.length - 1; idx >= 0; idx--) {
|
|
150
157
|
nextTextIndex[idx] = nextNonEmpty;
|
|
151
|
-
nextSkippedEmpty[idx] = sawEmpty;
|
|
158
|
+
if (trackSkippedEmpty) nextSkippedEmpty[idx] = sawEmpty;
|
|
152
159
|
|
|
153
160
|
var token = tokens[idx];
|
|
154
161
|
if (!token || token.type !== 'text') continue;
|
|
@@ -170,7 +177,7 @@ function build_next_text_info(tokens) {
|
|
|
170
177
|
|
|
171
178
|
|
|
172
179
|
function process_inlines(tokens, state, ctx, inlineToken) {
|
|
173
|
-
var i, last,
|
|
180
|
+
var i, last, next, c1, c2, remove_break;
|
|
174
181
|
var either = ctx.either;
|
|
175
182
|
var normalizeSoftBreaks = ctx.normalizeSoftBreaks;
|
|
176
183
|
var punctuationSpace = ctx.punctuationSpace;
|
|
@@ -179,15 +186,27 @@ function process_inlines(tokens, state, ctx, inlineToken) {
|
|
|
179
186
|
var considerInlineBoundaries = ctx.considerInlineBoundaries;
|
|
180
187
|
var needsPunctuation = punctuationSpace && punctuationConfig && maxPunctuationLength > 0;
|
|
181
188
|
|
|
189
|
+
if (!tokens || tokens.length === 0) return;
|
|
182
190
|
if (normalizeSoftBreaks) normalize_text_tokens(tokens);
|
|
183
191
|
|
|
184
|
-
var
|
|
185
|
-
var
|
|
186
|
-
var
|
|
187
|
-
|
|
188
|
-
var widthCache = Object.create(null);
|
|
192
|
+
var nextTextIndex = null;
|
|
193
|
+
var nextSkippedEmpty = null;
|
|
194
|
+
var widthCache = null;
|
|
189
195
|
function get_cached_width_class(ch) {
|
|
190
196
|
if (!ch) return '';
|
|
197
|
+
if (!widthCache) {
|
|
198
|
+
var firstWidth = get_cjk_width_class(ch);
|
|
199
|
+
if (firstWidth === '') {
|
|
200
|
+
var codePoint = ch.codePointAt(0);
|
|
201
|
+
if (codePoint !== undefined && codePoint <= ASCII_PRINTABLE_MAX) return '';
|
|
202
|
+
widthCache = Object.create(null);
|
|
203
|
+
widthCache[ch] = '';
|
|
204
|
+
return '';
|
|
205
|
+
}
|
|
206
|
+
widthCache = Object.create(null);
|
|
207
|
+
widthCache[ch] = firstWidth;
|
|
208
|
+
return firstWidth;
|
|
209
|
+
}
|
|
191
210
|
var cached = widthCache[ch];
|
|
192
211
|
if (cached !== undefined) return cached;
|
|
193
212
|
var width = get_cjk_width_class(ch);
|
|
@@ -204,22 +223,24 @@ function process_inlines(tokens, state, ctx, inlineToken) {
|
|
|
204
223
|
var isSoftbreakToken = token.type === 'softbreak';
|
|
205
224
|
var isTextBreakToken = token.type === 'text' && token.content === '\n';
|
|
206
225
|
if (isSoftbreakToken || isTextBreakToken) {
|
|
226
|
+
if (!nextTextIndex) {
|
|
227
|
+
var nextInfo = build_next_text_info(tokens, considerInlineBoundaries);
|
|
228
|
+
nextTextIndex = nextInfo.nextTextIndex;
|
|
229
|
+
nextSkippedEmpty = nextInfo.nextSkippedEmpty;
|
|
230
|
+
}
|
|
207
231
|
// default last/next character to space
|
|
208
232
|
last = next = ' ';
|
|
209
|
-
|
|
210
|
-
var
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
233
|
+
var skippedEmptyBefore = false;
|
|
234
|
+
var skippedEmptyAfter = false;
|
|
235
|
+
if (considerInlineBoundaries) {
|
|
236
|
+
skippedEmptyBefore = sawEmptySinceLast;
|
|
237
|
+
skippedEmptyAfter = nextSkippedEmpty ? nextSkippedEmpty[i] : false;
|
|
238
|
+
}
|
|
214
239
|
|
|
215
240
|
if (hasLastText) {
|
|
216
241
|
c1 = lastTextContent.charCodeAt(lastTextContent.length - 2);
|
|
217
242
|
c2 = lastTextContent.charCodeAt(lastTextContent.length - 1);
|
|
218
243
|
last = lastTextContent.slice(is_surrogate(c1, c2) ? -2 : -1);
|
|
219
|
-
if (needsPunctuation) {
|
|
220
|
-
trailing = lastTextContent.slice(-maxPunctuationLength);
|
|
221
|
-
trailingMatchesPunctuation = matches_punctuation_sequence(trailing, punctuationConfig);
|
|
222
|
-
}
|
|
223
244
|
}
|
|
224
245
|
|
|
225
246
|
var nextIdx = nextTextIndex[i];
|
|
@@ -233,46 +254,44 @@ function process_inlines(tokens, state, ctx, inlineToken) {
|
|
|
233
254
|
}
|
|
234
255
|
|
|
235
256
|
remove_break = false;
|
|
236
|
-
|
|
237
|
-
// remove newline if it's adjacent to ZWSP
|
|
238
|
-
if (last === '\u200b' || next === '\u200b') remove_break = true;
|
|
239
|
-
|
|
240
|
-
var lastWidthClass = '';
|
|
241
257
|
var nextWidthClass = '';
|
|
258
|
+
var nextWidthComputed = false;
|
|
242
259
|
|
|
243
|
-
// remove newline if
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
lastWidthClass = get_cached_width_class(last);
|
|
260
|
+
// remove newline if it's adjacent to ZWSP
|
|
261
|
+
if (last === '\u200b' || next === '\u200b') {
|
|
262
|
+
remove_break = true;
|
|
263
|
+
} else {
|
|
264
|
+
// remove newline if both characters AND/OR fullwidth (F), wide (W) or
|
|
265
|
+
// halfwidth (H), but not Hangul
|
|
266
|
+
var lastWidthClass = get_cached_width_class(last);
|
|
267
|
+
if (either || lastWidthClass) {
|
|
268
|
+
nextWidthClass = get_cached_width_class(next);
|
|
269
|
+
nextWidthComputed = true;
|
|
254
270
|
}
|
|
255
|
-
nextWidthClass = get_cached_width_class(next);
|
|
256
271
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
tNext = nextWidthClass !== '';
|
|
272
|
+
var tLast = lastWidthClass !== '';
|
|
273
|
+
var tNext = nextWidthComputed ? nextWidthClass !== '' : false;
|
|
260
274
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
}
|
|
275
|
+
if (considerInlineBoundaries && (skippedEmptyBefore || skippedEmptyAfter) && tLast && tNext) {
|
|
276
|
+
tLast = false;
|
|
277
|
+
tNext = false;
|
|
278
|
+
}
|
|
279
|
+
if (either ? tLast || tNext : tLast && tNext) {
|
|
280
|
+
if (!is_hangul(last) && !is_hangul(next)) remove_break = true;
|
|
268
281
|
}
|
|
269
282
|
}
|
|
270
283
|
|
|
271
284
|
if (remove_break) {
|
|
272
285
|
var insertPunctuationSpace = false;
|
|
273
|
-
if (
|
|
274
|
-
var
|
|
275
|
-
if (
|
|
286
|
+
if (needsPunctuation && last && next && next !== '\u200b') {
|
|
287
|
+
var trailing = hasLastText ? lastTextContent.slice(-maxPunctuationLength) : '';
|
|
288
|
+
if (matches_punctuation_sequence(trailing, punctuationConfig)) {
|
|
289
|
+
if (!nextWidthComputed) {
|
|
290
|
+
nextWidthClass = get_cached_width_class(next);
|
|
291
|
+
}
|
|
292
|
+
var nextIsFullwidthOrWide = nextWidthClass === 'F' || nextWidthClass === 'W';
|
|
293
|
+
if (nextIsFullwidthOrWide || is_printable_ascii(next)) insertPunctuationSpace = true;
|
|
294
|
+
}
|
|
276
295
|
}
|
|
277
296
|
token.type = 'text';
|
|
278
297
|
token.content = insertPunctuationSpace ? punctuationSpace : '';
|
|
@@ -281,16 +300,16 @@ function process_inlines(tokens, state, ctx, inlineToken) {
|
|
|
281
300
|
|
|
282
301
|
if (token.type === 'text') {
|
|
283
302
|
if (!token.content) {
|
|
284
|
-
sawEmptySinceLast = true;
|
|
303
|
+
if (considerInlineBoundaries) sawEmptySinceLast = true;
|
|
285
304
|
} else {
|
|
286
305
|
lastTextContent = token.content;
|
|
287
306
|
hasLastText = true;
|
|
288
|
-
sawEmptySinceLast = false;
|
|
307
|
+
if (considerInlineBoundaries) sawEmptySinceLast = false;
|
|
289
308
|
}
|
|
290
309
|
}
|
|
291
310
|
}
|
|
292
311
|
|
|
293
|
-
if (
|
|
312
|
+
if (needsPunctuation) {
|
|
294
313
|
apply_missing_punctuation_spacing(tokens, inlineToken, punctuationSpace, punctuationConfig);
|
|
295
314
|
}
|
|
296
315
|
}
|
|
@@ -386,6 +405,8 @@ function apply_missing_punctuation_spacing(tokens, inlineToken, punctuationSpace
|
|
|
386
405
|
if (!inlineToken || !inlineToken.content) return;
|
|
387
406
|
if (inlineToken.content.indexOf('\n') === -1) return;
|
|
388
407
|
if (!tokens || tokens.length === 0) return;
|
|
408
|
+
var maxPunctuationLength = punctuationConfig.maxLength;
|
|
409
|
+
if (maxPunctuationLength <= 0) return;
|
|
389
410
|
|
|
390
411
|
var rawSearchState = { pos: 0 };
|
|
391
412
|
|
|
@@ -393,15 +414,13 @@ function apply_missing_punctuation_spacing(tokens, inlineToken, punctuationSpace
|
|
|
393
414
|
var current = tokens[idx];
|
|
394
415
|
if (!current || current.type !== 'text' || !current.content) continue;
|
|
395
416
|
|
|
396
|
-
var trailing =
|
|
397
|
-
current.content.slice(-punctuationConfig.maxLength) :
|
|
398
|
-
current.content.slice(-1);
|
|
417
|
+
var trailing = current.content.slice(-maxPunctuationLength);
|
|
399
418
|
if (!matches_punctuation_sequence(trailing, punctuationConfig)) continue;
|
|
400
|
-
if (
|
|
419
|
+
if (WHITESPACE_TRAIL_RE.test(current.content)) continue;
|
|
401
420
|
|
|
402
421
|
var nextInfo = find_next_visible_token(tokens, idx + 1);
|
|
403
422
|
if (!nextInfo) continue;
|
|
404
|
-
if (nextInfo.token.type === 'text' &&
|
|
423
|
+
if (nextInfo.token.type === 'text' && WHITESPACE_LEAD_RE.test(nextInfo.token.content || '')) continue;
|
|
405
424
|
|
|
406
425
|
if (!raw_boundary_includes_newline(inlineToken.content, tokens, idx, nextInfo.index, nextInfo.fragment, rawSearchState)) {
|
|
407
426
|
continue;
|
|
@@ -411,7 +430,9 @@ function apply_missing_punctuation_spacing(tokens, inlineToken, punctuationSpace
|
|
|
411
430
|
idx = nextInfo.index;
|
|
412
431
|
}
|
|
413
432
|
|
|
414
|
-
|
|
433
|
+
if (tokens.length === 1) {
|
|
434
|
+
apply_single_text_token_spacing(tokens, inlineToken, punctuationSpace, punctuationConfig);
|
|
435
|
+
}
|
|
415
436
|
}
|
|
416
437
|
|
|
417
438
|
|
|
@@ -481,6 +502,8 @@ function apply_single_text_token_spacing(tokens, inlineToken, punctuationSpace,
|
|
|
481
502
|
if (inlineToken.content.indexOf('\n') === -1) return;
|
|
482
503
|
var token = tokens[0];
|
|
483
504
|
if (!token || token.type !== 'text' || !token.content) return;
|
|
505
|
+
var maxPunctuationLength = punctuationConfig.maxLength;
|
|
506
|
+
if (maxPunctuationLength <= 0) return;
|
|
484
507
|
|
|
485
508
|
var segments = inlineToken.content.split('\n');
|
|
486
509
|
if (segments.length < 2) return;
|
|
@@ -490,7 +513,7 @@ function apply_single_text_token_spacing(tokens, inlineToken, punctuationSpace,
|
|
|
490
513
|
for (var segIdx = 0; segIdx < segments.length - 1; segIdx++) {
|
|
491
514
|
var leftRaw = segments[segIdx];
|
|
492
515
|
var rightRaw = segments[segIdx + 1];
|
|
493
|
-
var tail = extract_visible_tail(leftRaw,
|
|
516
|
+
var tail = extract_visible_tail(leftRaw, maxPunctuationLength);
|
|
494
517
|
var nextChar = extract_visible_head(rightRaw);
|
|
495
518
|
var shouldInsert = tail &&
|
|
496
519
|
matches_punctuation_sequence(tail, punctuationConfig) &&
|
|
@@ -501,7 +524,7 @@ function apply_single_text_token_spacing(tokens, inlineToken, punctuationSpace,
|
|
|
501
524
|
var splitIndex = cumulativeLength + leftRaw.length + offsetDelta;
|
|
502
525
|
if (splitIndex >= 0 && splitIndex <= updatedContent.length) {
|
|
503
526
|
var existingChar = updatedContent.charAt(splitIndex);
|
|
504
|
-
if (existingChar &&
|
|
527
|
+
if (existingChar && WHITESPACE_RE.test(existingChar)) {
|
|
505
528
|
// already has whitespace at this boundary
|
|
506
529
|
cumulativeLength += leftRaw.length;
|
|
507
530
|
continue;
|
|
@@ -528,7 +551,7 @@ function extract_visible_tail(raw, maxLength) {
|
|
|
528
551
|
var charLen = code > 0xFFFF ? 2 : 1;
|
|
529
552
|
var ch = raw.slice(pos - charLen, pos);
|
|
530
553
|
pos -= charLen;
|
|
531
|
-
if (
|
|
554
|
+
if (WHITESPACE_RE.test(ch)) continue;
|
|
532
555
|
if (is_markup_closer_char(ch)) continue;
|
|
533
556
|
result = ch + result;
|
|
534
557
|
}
|
|
@@ -543,7 +566,7 @@ function extract_visible_head(raw) {
|
|
|
543
566
|
var charLen = code > 0xFFFF ? 2 : 1;
|
|
544
567
|
var ch = raw.slice(pos, pos + charLen);
|
|
545
568
|
pos += charLen;
|
|
546
|
-
if (
|
|
569
|
+
if (WHITESPACE_RE.test(ch)) continue;
|
|
547
570
|
return ch;
|
|
548
571
|
}
|
|
549
572
|
return '';
|