@peaceroad/markdown-it-cjk-breaks-mod 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +47 -44
- package/package.json +1 -1
package/index.js
CHANGED
|
@@ -6,6 +6,13 @@ const ASCII_PRINTABLE_MAX = 0x7E;
|
|
|
6
6
|
const IDEOGRAPHIC_SPACE = '\u3000';
|
|
7
7
|
const DEFAULT_PUNCTUATION_TARGETS = ['!', '?', '⁉', '!?', '?!', '!?', '?!', '.', ':'];
|
|
8
8
|
const DEFAULT_PUNCTUATION_CONFIG = create_punctuation_config(DEFAULT_PUNCTUATION_TARGETS);
|
|
9
|
+
/* eslint-disable max-len */
|
|
10
|
+
// require('unicode-10.0.0/Script/Hangul/regex')
|
|
11
|
+
const HANGUL_RE = /[\u1100-\u11FF\u302E\u302F\u3131-\u318E\u3200-\u321E\u3260-\u327E\uA960-\uA97C\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uFFA0-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC]/;
|
|
12
|
+
/* eslint-enable max-len */
|
|
13
|
+
const WHITESPACE_RE = /\s/;
|
|
14
|
+
const WHITESPACE_LEAD_RE = /^\s/;
|
|
15
|
+
const WHITESPACE_TRAIL_RE = /\s$/;
|
|
9
16
|
|
|
10
17
|
|
|
11
18
|
function is_surrogate(c1, c2) {
|
|
@@ -14,10 +21,7 @@ function is_surrogate(c1, c2) {
|
|
|
14
21
|
|
|
15
22
|
|
|
16
23
|
function is_hangul(c) {
|
|
17
|
-
|
|
18
|
-
/* eslint-disable max-len */
|
|
19
|
-
return /[\u1100-\u11FF\u302E\u302F\u3131-\u318E\u3200-\u321E\u3260-\u327E\uA960-\uA97C\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uFFA0-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC]/.test(c);
|
|
20
|
-
/* eslint-enable max-len */
|
|
24
|
+
return HANGUL_RE.test(c);
|
|
21
25
|
}
|
|
22
26
|
|
|
23
27
|
|
|
@@ -140,15 +144,15 @@ function get_cjk_width_class(ch) {
|
|
|
140
144
|
return width === 'F' || width === 'W' || width === 'H' ? width : '';
|
|
141
145
|
}
|
|
142
146
|
|
|
143
|
-
function build_next_text_info(tokens) {
|
|
147
|
+
function build_next_text_info(tokens, trackSkippedEmpty) {
|
|
144
148
|
var nextTextIndex = new Array(tokens.length);
|
|
145
|
-
var nextSkippedEmpty = new Array(tokens.length);
|
|
149
|
+
var nextSkippedEmpty = trackSkippedEmpty ? new Array(tokens.length) : null;
|
|
146
150
|
var nextNonEmpty = -1;
|
|
147
151
|
var sawEmpty = false;
|
|
148
152
|
|
|
149
153
|
for (var idx = tokens.length - 1; idx >= 0; idx--) {
|
|
150
154
|
nextTextIndex[idx] = nextNonEmpty;
|
|
151
|
-
nextSkippedEmpty[idx] = sawEmpty;
|
|
155
|
+
if (trackSkippedEmpty) nextSkippedEmpty[idx] = sawEmpty;
|
|
152
156
|
|
|
153
157
|
var token = tokens[idx];
|
|
154
158
|
if (!token || token.type !== 'text') continue;
|
|
@@ -181,7 +185,7 @@ function process_inlines(tokens, state, ctx, inlineToken) {
|
|
|
181
185
|
|
|
182
186
|
if (normalizeSoftBreaks) normalize_text_tokens(tokens);
|
|
183
187
|
|
|
184
|
-
var nextInfo = build_next_text_info(tokens);
|
|
188
|
+
var nextInfo = build_next_text_info(tokens, considerInlineBoundaries);
|
|
185
189
|
var nextTextIndex = nextInfo.nextTextIndex;
|
|
186
190
|
var nextSkippedEmpty = nextInfo.nextSkippedEmpty;
|
|
187
191
|
|
|
@@ -209,8 +213,12 @@ function process_inlines(tokens, state, ctx, inlineToken) {
|
|
|
209
213
|
trailing = '';
|
|
210
214
|
var trailingMatchesPunctuation = false;
|
|
211
215
|
|
|
212
|
-
var skippedEmptyBefore =
|
|
213
|
-
var skippedEmptyAfter =
|
|
216
|
+
var skippedEmptyBefore = false;
|
|
217
|
+
var skippedEmptyAfter = false;
|
|
218
|
+
if (considerInlineBoundaries) {
|
|
219
|
+
skippedEmptyBefore = sawEmptySinceLast;
|
|
220
|
+
skippedEmptyAfter = nextSkippedEmpty ? nextSkippedEmpty[i] : false;
|
|
221
|
+
}
|
|
214
222
|
|
|
215
223
|
if (hasLastText) {
|
|
216
224
|
c1 = lastTextContent.charCodeAt(lastTextContent.length - 2);
|
|
@@ -233,44 +241,39 @@ function process_inlines(tokens, state, ctx, inlineToken) {
|
|
|
233
241
|
}
|
|
234
242
|
|
|
235
243
|
remove_break = false;
|
|
236
|
-
|
|
237
|
-
// remove newline if it's adjacent to ZWSP
|
|
238
|
-
if (last === '\u200b' || next === '\u200b') remove_break = true;
|
|
239
|
-
|
|
240
|
-
var lastWidthClass = '';
|
|
241
244
|
var nextWidthClass = '';
|
|
245
|
+
var nextWidthComputed = false;
|
|
242
246
|
|
|
243
|
-
// remove newline if
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
lastWidthClass = get_cached_width_class(last);
|
|
247
|
+
// remove newline if it's adjacent to ZWSP
|
|
248
|
+
if (last === '\u200b' || next === '\u200b') {
|
|
249
|
+
remove_break = true;
|
|
250
|
+
} else {
|
|
251
|
+
// remove newline if both characters AND/OR fullwidth (F), wide (W) or
|
|
252
|
+
// halfwidth (H), but not Hangul
|
|
253
|
+
var lastWidthClass = get_cached_width_class(last);
|
|
254
|
+
if (either || lastWidthClass) {
|
|
255
|
+
nextWidthClass = get_cached_width_class(next);
|
|
256
|
+
nextWidthComputed = true;
|
|
254
257
|
}
|
|
255
|
-
nextWidthClass = get_cached_width_class(next);
|
|
256
258
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
tNext = nextWidthClass !== '';
|
|
259
|
+
var tLast = lastWidthClass !== '';
|
|
260
|
+
var tNext = nextWidthComputed ? nextWidthClass !== '' : false;
|
|
260
261
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
}
|
|
262
|
+
if (considerInlineBoundaries && (skippedEmptyBefore || skippedEmptyAfter) && tLast && tNext) {
|
|
263
|
+
tLast = false;
|
|
264
|
+
tNext = false;
|
|
265
|
+
}
|
|
266
|
+
if (either ? tLast || tNext : tLast && tNext) {
|
|
267
|
+
if (!is_hangul(last) && !is_hangul(next)) remove_break = true;
|
|
268
268
|
}
|
|
269
269
|
}
|
|
270
270
|
|
|
271
271
|
if (remove_break) {
|
|
272
272
|
var insertPunctuationSpace = false;
|
|
273
|
-
if (
|
|
273
|
+
if (needsPunctuation && trailingMatchesPunctuation && last && next && next !== '\u200b') {
|
|
274
|
+
if (!nextWidthComputed) {
|
|
275
|
+
nextWidthClass = get_cached_width_class(next);
|
|
276
|
+
}
|
|
274
277
|
var nextIsFullwidthOrWide = nextWidthClass === 'F' || nextWidthClass === 'W';
|
|
275
278
|
if (is_printable_ascii(next) || nextIsFullwidthOrWide) insertPunctuationSpace = true;
|
|
276
279
|
}
|
|
@@ -290,7 +293,7 @@ function process_inlines(tokens, state, ctx, inlineToken) {
|
|
|
290
293
|
}
|
|
291
294
|
}
|
|
292
295
|
|
|
293
|
-
if (
|
|
296
|
+
if (needsPunctuation) {
|
|
294
297
|
apply_missing_punctuation_spacing(tokens, inlineToken, punctuationSpace, punctuationConfig);
|
|
295
298
|
}
|
|
296
299
|
}
|
|
@@ -397,11 +400,11 @@ function apply_missing_punctuation_spacing(tokens, inlineToken, punctuationSpace
|
|
|
397
400
|
current.content.slice(-punctuationConfig.maxLength) :
|
|
398
401
|
current.content.slice(-1);
|
|
399
402
|
if (!matches_punctuation_sequence(trailing, punctuationConfig)) continue;
|
|
400
|
-
if (
|
|
403
|
+
if (WHITESPACE_TRAIL_RE.test(current.content)) continue;
|
|
401
404
|
|
|
402
405
|
var nextInfo = find_next_visible_token(tokens, idx + 1);
|
|
403
406
|
if (!nextInfo) continue;
|
|
404
|
-
if (nextInfo.token.type === 'text' &&
|
|
407
|
+
if (nextInfo.token.type === 'text' && WHITESPACE_LEAD_RE.test(nextInfo.token.content || '')) continue;
|
|
405
408
|
|
|
406
409
|
if (!raw_boundary_includes_newline(inlineToken.content, tokens, idx, nextInfo.index, nextInfo.fragment, rawSearchState)) {
|
|
407
410
|
continue;
|
|
@@ -501,7 +504,7 @@ function apply_single_text_token_spacing(tokens, inlineToken, punctuationSpace,
|
|
|
501
504
|
var splitIndex = cumulativeLength + leftRaw.length + offsetDelta;
|
|
502
505
|
if (splitIndex >= 0 && splitIndex <= updatedContent.length) {
|
|
503
506
|
var existingChar = updatedContent.charAt(splitIndex);
|
|
504
|
-
if (existingChar &&
|
|
507
|
+
if (existingChar && WHITESPACE_RE.test(existingChar)) {
|
|
505
508
|
// already has whitespace at this boundary
|
|
506
509
|
cumulativeLength += leftRaw.length;
|
|
507
510
|
continue;
|
|
@@ -528,7 +531,7 @@ function extract_visible_tail(raw, maxLength) {
|
|
|
528
531
|
var charLen = code > 0xFFFF ? 2 : 1;
|
|
529
532
|
var ch = raw.slice(pos - charLen, pos);
|
|
530
533
|
pos -= charLen;
|
|
531
|
-
if (
|
|
534
|
+
if (WHITESPACE_RE.test(ch)) continue;
|
|
532
535
|
if (is_markup_closer_char(ch)) continue;
|
|
533
536
|
result = ch + result;
|
|
534
537
|
}
|
|
@@ -543,7 +546,7 @@ function extract_visible_head(raw) {
|
|
|
543
546
|
var charLen = code > 0xFFFF ? 2 : 1;
|
|
544
547
|
var ch = raw.slice(pos, pos + charLen);
|
|
545
548
|
pos += charLen;
|
|
546
|
-
if (
|
|
549
|
+
if (WHITESPACE_RE.test(ch)) continue;
|
|
547
550
|
return ch;
|
|
548
551
|
}
|
|
549
552
|
return '';
|