@peaceroad/markdown-it-cjk-breaks-mod 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/index.js +47 -44
  2. package/package.json +1 -1
package/index.js CHANGED
@@ -6,6 +6,13 @@ const ASCII_PRINTABLE_MAX = 0x7E;
6
6
  const IDEOGRAPHIC_SPACE = '\u3000';
7
7
  const DEFAULT_PUNCTUATION_TARGETS = ['!', '?', '⁉', '!?', '?!', '!?', '?!', '.', ':'];
8
8
  const DEFAULT_PUNCTUATION_CONFIG = create_punctuation_config(DEFAULT_PUNCTUATION_TARGETS);
9
+ /* eslint-disable max-len */
10
+ // require('unicode-10.0.0/Script/Hangul/regex')
11
+ const HANGUL_RE = /[\u1100-\u11FF\u302E\u302F\u3131-\u318E\u3200-\u321E\u3260-\u327E\uA960-\uA97C\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uFFA0-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC]/;
12
+ /* eslint-enable max-len */
13
+ const WHITESPACE_RE = /\s/;
14
+ const WHITESPACE_LEAD_RE = /^\s/;
15
+ const WHITESPACE_TRAIL_RE = /\s$/;
9
16
 
10
17
 
11
18
  function is_surrogate(c1, c2) {
@@ -14,10 +21,7 @@ function is_surrogate(c1, c2) {
14
21
 
15
22
 
16
23
  function is_hangul(c) {
17
- // require('unicode-10.0.0/Script/Hangul/regex')
18
- /* eslint-disable max-len */
19
- return /[\u1100-\u11FF\u302E\u302F\u3131-\u318E\u3200-\u321E\u3260-\u327E\uA960-\uA97C\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uFFA0-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC]/.test(c);
20
- /* eslint-enable max-len */
24
+ return HANGUL_RE.test(c);
21
25
  }
22
26
 
23
27
 
@@ -140,15 +144,15 @@ function get_cjk_width_class(ch) {
140
144
  return width === 'F' || width === 'W' || width === 'H' ? width : '';
141
145
  }
142
146
 
143
- function build_next_text_info(tokens) {
147
+ function build_next_text_info(tokens, trackSkippedEmpty) {
144
148
  var nextTextIndex = new Array(tokens.length);
145
- var nextSkippedEmpty = new Array(tokens.length);
149
+ var nextSkippedEmpty = trackSkippedEmpty ? new Array(tokens.length) : null;
146
150
  var nextNonEmpty = -1;
147
151
  var sawEmpty = false;
148
152
 
149
153
  for (var idx = tokens.length - 1; idx >= 0; idx--) {
150
154
  nextTextIndex[idx] = nextNonEmpty;
151
- nextSkippedEmpty[idx] = sawEmpty;
155
+ if (trackSkippedEmpty) nextSkippedEmpty[idx] = sawEmpty;
152
156
 
153
157
  var token = tokens[idx];
154
158
  if (!token || token.type !== 'text') continue;
@@ -181,7 +185,7 @@ function process_inlines(tokens, state, ctx, inlineToken) {
181
185
 
182
186
  if (normalizeSoftBreaks) normalize_text_tokens(tokens);
183
187
 
184
- var nextInfo = build_next_text_info(tokens);
188
+ var nextInfo = build_next_text_info(tokens, considerInlineBoundaries);
185
189
  var nextTextIndex = nextInfo.nextTextIndex;
186
190
  var nextSkippedEmpty = nextInfo.nextSkippedEmpty;
187
191
 
@@ -209,8 +213,12 @@ function process_inlines(tokens, state, ctx, inlineToken) {
209
213
  trailing = '';
210
214
  var trailingMatchesPunctuation = false;
211
215
 
212
- var skippedEmptyBefore = sawEmptySinceLast;
213
- var skippedEmptyAfter = nextSkippedEmpty[i];
216
+ var skippedEmptyBefore = false;
217
+ var skippedEmptyAfter = false;
218
+ if (considerInlineBoundaries) {
219
+ skippedEmptyBefore = sawEmptySinceLast;
220
+ skippedEmptyAfter = nextSkippedEmpty ? nextSkippedEmpty[i] : false;
221
+ }
214
222
 
215
223
  if (hasLastText) {
216
224
  c1 = lastTextContent.charCodeAt(lastTextContent.length - 2);
@@ -233,44 +241,39 @@ function process_inlines(tokens, state, ctx, inlineToken) {
233
241
  }
234
242
 
235
243
  remove_break = false;
236
-
237
- // remove newline if it's adjacent to ZWSP
238
- if (last === '\u200b' || next === '\u200b') remove_break = true;
239
-
240
- var lastWidthClass = '';
241
244
  var nextWidthClass = '';
245
+ var nextWidthComputed = false;
242
246
 
243
- // remove newline if both characters AND/OR fullwidth (F), wide (W) or
244
- // halfwidth (H), but not Hangul
245
- var tLast = false;
246
- var tNext = false;
247
-
248
- var needsWidthForRemoval = !remove_break;
249
- var needsWidthForPunctuation = punctuationSpace && trailingMatchesPunctuation && last && next && next !== '\u200b';
250
-
251
- if (needsWidthForRemoval || needsWidthForPunctuation) {
252
- if (needsWidthForRemoval) {
253
- lastWidthClass = get_cached_width_class(last);
247
+ // remove newline if it's adjacent to ZWSP
248
+ if (last === '\u200b' || next === '\u200b') {
249
+ remove_break = true;
250
+ } else {
251
+ // remove newline if both characters AND/OR fullwidth (F), wide (W) or
252
+ // halfwidth (H), but not Hangul
253
+ var lastWidthClass = get_cached_width_class(last);
254
+ if (either || lastWidthClass) {
255
+ nextWidthClass = get_cached_width_class(next);
256
+ nextWidthComputed = true;
254
257
  }
255
- nextWidthClass = get_cached_width_class(next);
256
258
 
257
- if (needsWidthForRemoval) {
258
- tLast = lastWidthClass !== '';
259
- tNext = nextWidthClass !== '';
259
+ var tLast = lastWidthClass !== '';
260
+ var tNext = nextWidthComputed ? nextWidthClass !== '' : false;
260
261
 
261
- if (considerInlineBoundaries && (skippedEmptyBefore || skippedEmptyAfter) && tLast && tNext) {
262
- tLast = false;
263
- tNext = false;
264
- }
265
- if (either ? tLast || tNext : tLast && tNext) {
266
- if (!is_hangul(last) && !is_hangul(next)) remove_break = true;
267
- }
262
+ if (considerInlineBoundaries && (skippedEmptyBefore || skippedEmptyAfter) && tLast && tNext) {
263
+ tLast = false;
264
+ tNext = false;
265
+ }
266
+ if (either ? tLast || tNext : tLast && tNext) {
267
+ if (!is_hangul(last) && !is_hangul(next)) remove_break = true;
268
268
  }
269
269
  }
270
270
 
271
271
  if (remove_break) {
272
272
  var insertPunctuationSpace = false;
273
- if (punctuationSpace && punctuationConfig && trailingMatchesPunctuation && last && next && next !== '\u200b') {
273
+ if (needsPunctuation && trailingMatchesPunctuation && last && next && next !== '\u200b') {
274
+ if (!nextWidthComputed) {
275
+ nextWidthClass = get_cached_width_class(next);
276
+ }
274
277
  var nextIsFullwidthOrWide = nextWidthClass === 'F' || nextWidthClass === 'W';
275
278
  if (is_printable_ascii(next) || nextIsFullwidthOrWide) insertPunctuationSpace = true;
276
279
  }
@@ -290,7 +293,7 @@ function process_inlines(tokens, state, ctx, inlineToken) {
290
293
  }
291
294
  }
292
295
 
293
- if (punctuationSpace && punctuationConfig) {
296
+ if (needsPunctuation) {
294
297
  apply_missing_punctuation_spacing(tokens, inlineToken, punctuationSpace, punctuationConfig);
295
298
  }
296
299
  }
@@ -397,11 +400,11 @@ function apply_missing_punctuation_spacing(tokens, inlineToken, punctuationSpace
397
400
  current.content.slice(-punctuationConfig.maxLength) :
398
401
  current.content.slice(-1);
399
402
  if (!matches_punctuation_sequence(trailing, punctuationConfig)) continue;
400
- if (/\s$/.test(current.content)) continue;
403
+ if (WHITESPACE_TRAIL_RE.test(current.content)) continue;
401
404
 
402
405
  var nextInfo = find_next_visible_token(tokens, idx + 1);
403
406
  if (!nextInfo) continue;
404
- if (nextInfo.token.type === 'text' && /^\s/.test(nextInfo.token.content || '')) continue;
407
+ if (nextInfo.token.type === 'text' && WHITESPACE_LEAD_RE.test(nextInfo.token.content || '')) continue;
405
408
 
406
409
  if (!raw_boundary_includes_newline(inlineToken.content, tokens, idx, nextInfo.index, nextInfo.fragment, rawSearchState)) {
407
410
  continue;
@@ -501,7 +504,7 @@ function apply_single_text_token_spacing(tokens, inlineToken, punctuationSpace,
501
504
  var splitIndex = cumulativeLength + leftRaw.length + offsetDelta;
502
505
  if (splitIndex >= 0 && splitIndex <= updatedContent.length) {
503
506
  var existingChar = updatedContent.charAt(splitIndex);
504
- if (existingChar && /\s/.test(existingChar)) {
507
+ if (existingChar && WHITESPACE_RE.test(existingChar)) {
505
508
  // already has whitespace at this boundary
506
509
  cumulativeLength += leftRaw.length;
507
510
  continue;
@@ -528,7 +531,7 @@ function extract_visible_tail(raw, maxLength) {
528
531
  var charLen = code > 0xFFFF ? 2 : 1;
529
532
  var ch = raw.slice(pos - charLen, pos);
530
533
  pos -= charLen;
531
- if (/\s/.test(ch)) continue;
534
+ if (WHITESPACE_RE.test(ch)) continue;
532
535
  if (is_markup_closer_char(ch)) continue;
533
536
  result = ch + result;
534
537
  }
@@ -543,7 +546,7 @@ function extract_visible_head(raw) {
543
546
  var charLen = code > 0xFFFF ? 2 : 1;
544
547
  var ch = raw.slice(pos, pos + charLen);
545
548
  pos += charLen;
546
- if (/\s/.test(ch)) continue;
549
+ if (WHITESPACE_RE.test(ch)) continue;
547
550
  return ch;
548
551
  }
549
552
  return '';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@peaceroad/markdown-it-cjk-breaks-mod",
3
- "version": "0.1.2",
3
+ "version": "0.1.3",
4
4
  "type": "module",
5
5
  "description": "Suppress linebreaks between east asian (Especially Japanese) characters",
6
6
  "repository": {