@peaceroad/markdown-it-cjk-breaks-mod 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/index.js +93 -70
  2. package/package.json +1 -1
package/index.js CHANGED
@@ -6,6 +6,13 @@ const ASCII_PRINTABLE_MAX = 0x7E;
6
6
  const IDEOGRAPHIC_SPACE = '\u3000';
7
7
  const DEFAULT_PUNCTUATION_TARGETS = ['!', '?', '⁉', '!?', '?!', '!?', '?!', '.', ':'];
8
8
  const DEFAULT_PUNCTUATION_CONFIG = create_punctuation_config(DEFAULT_PUNCTUATION_TARGETS);
9
+ /* eslint-disable max-len */
10
+ // require('unicode-10.0.0/Script/Hangul/regex')
11
+ const HANGUL_RE = /[\u1100-\u11FF\u302E\u302F\u3131-\u318E\u3200-\u321E\u3260-\u327E\uA960-\uA97C\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uFFA0-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC]/;
12
+ /* eslint-enable max-len */
13
+ const WHITESPACE_RE = /\s/;
14
+ const WHITESPACE_LEAD_RE = /^\s/;
15
+ const WHITESPACE_TRAIL_RE = /\s$/;
9
16
 
10
17
 
11
18
  function is_surrogate(c1, c2) {
@@ -14,10 +21,7 @@ function is_surrogate(c1, c2) {
14
21
 
15
22
 
16
23
  function is_hangul(c) {
17
- // require('unicode-10.0.0/Script/Hangul/regex')
18
- /* eslint-disable max-len */
19
- return /[\u1100-\u11FF\u302E\u302F\u3131-\u318E\u3200-\u321E\u3260-\u327E\uA960-\uA97C\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uFFA0-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC]/.test(c);
20
- /* eslint-enable max-len */
24
+ return HANGUL_RE.test(c);
21
25
  }
22
26
 
23
27
 
@@ -51,10 +55,15 @@ function resolve_punctuation_targets(opts) {
51
55
  if (!opts) return DEFAULT_PUNCTUATION_CONFIG;
52
56
 
53
57
  var hasCustomTargets = Object.prototype.hasOwnProperty.call(opts, 'spaceAfterPunctuationTargets');
58
+ var addTargets = opts.spaceAfterPunctuationTargetsAdd;
59
+ var removeTargets = opts.spaceAfterPunctuationTargetsRemove;
60
+ if (!hasCustomTargets && addTargets === undefined && removeTargets === undefined) {
61
+ return DEFAULT_PUNCTUATION_CONFIG;
62
+ }
54
63
  var baseTargets;
55
64
 
56
65
  if (!hasCustomTargets) {
57
- baseTargets = DEFAULT_PUNCTUATION_TARGETS.slice();
66
+ baseTargets = DEFAULT_PUNCTUATION_TARGETS;
58
67
  } else {
59
68
  var customTargets = opts.spaceAfterPunctuationTargets;
60
69
  if (customTargets === null || customTargets === false) return null;
@@ -63,13 +72,12 @@ function resolve_punctuation_targets(opts) {
63
72
  baseTargets = [ customTargets ];
64
73
  } else if (Array.isArray(customTargets)) {
65
74
  if (customTargets.length === 0) return null;
66
- baseTargets = customTargets.slice();
75
+ baseTargets = customTargets;
67
76
  } else {
68
- baseTargets = DEFAULT_PUNCTUATION_TARGETS.slice();
77
+ baseTargets = DEFAULT_PUNCTUATION_TARGETS;
69
78
  }
70
79
  }
71
80
 
72
- var addTargets = opts.spaceAfterPunctuationTargetsAdd;
73
81
  if (addTargets !== undefined) {
74
82
  var addList = [];
75
83
  if (typeof addTargets === 'string') {
@@ -82,7 +90,6 @@ function resolve_punctuation_targets(opts) {
82
90
  }
83
91
  }
84
92
 
85
- var removeTargets = opts.spaceAfterPunctuationTargetsRemove;
86
93
  if (removeTargets !== undefined) {
87
94
  var removeList = [];
88
95
  if (typeof removeTargets === 'string') {
@@ -140,15 +147,15 @@ function get_cjk_width_class(ch) {
140
147
  return width === 'F' || width === 'W' || width === 'H' ? width : '';
141
148
  }
142
149
 
143
- function build_next_text_info(tokens) {
150
+ function build_next_text_info(tokens, trackSkippedEmpty) {
144
151
  var nextTextIndex = new Array(tokens.length);
145
- var nextSkippedEmpty = new Array(tokens.length);
152
+ var nextSkippedEmpty = trackSkippedEmpty ? new Array(tokens.length) : null;
146
153
  var nextNonEmpty = -1;
147
154
  var sawEmpty = false;
148
155
 
149
156
  for (var idx = tokens.length - 1; idx >= 0; idx--) {
150
157
  nextTextIndex[idx] = nextNonEmpty;
151
- nextSkippedEmpty[idx] = sawEmpty;
158
+ if (trackSkippedEmpty) nextSkippedEmpty[idx] = sawEmpty;
152
159
 
153
160
  var token = tokens[idx];
154
161
  if (!token || token.type !== 'text') continue;
@@ -170,7 +177,7 @@ function build_next_text_info(tokens) {
170
177
 
171
178
 
172
179
  function process_inlines(tokens, state, ctx, inlineToken) {
173
- var i, last, trailing, next, c1, c2, remove_break;
180
+ var i, last, next, c1, c2, remove_break;
174
181
  var either = ctx.either;
175
182
  var normalizeSoftBreaks = ctx.normalizeSoftBreaks;
176
183
  var punctuationSpace = ctx.punctuationSpace;
@@ -179,15 +186,27 @@ function process_inlines(tokens, state, ctx, inlineToken) {
179
186
  var considerInlineBoundaries = ctx.considerInlineBoundaries;
180
187
  var needsPunctuation = punctuationSpace && punctuationConfig && maxPunctuationLength > 0;
181
188
 
189
+ if (!tokens || tokens.length === 0) return;
182
190
  if (normalizeSoftBreaks) normalize_text_tokens(tokens);
183
191
 
184
- var nextInfo = build_next_text_info(tokens);
185
- var nextTextIndex = nextInfo.nextTextIndex;
186
- var nextSkippedEmpty = nextInfo.nextSkippedEmpty;
187
-
188
- var widthCache = Object.create(null);
192
+ var nextTextIndex = null;
193
+ var nextSkippedEmpty = null;
194
+ var widthCache = null;
189
195
  function get_cached_width_class(ch) {
190
196
  if (!ch) return '';
197
+ if (!widthCache) {
198
+ var firstWidth = get_cjk_width_class(ch);
199
+ if (firstWidth === '') {
200
+ var codePoint = ch.codePointAt(0);
201
+ if (codePoint !== undefined && codePoint <= ASCII_PRINTABLE_MAX) return '';
202
+ widthCache = Object.create(null);
203
+ widthCache[ch] = '';
204
+ return '';
205
+ }
206
+ widthCache = Object.create(null);
207
+ widthCache[ch] = firstWidth;
208
+ return firstWidth;
209
+ }
191
210
  var cached = widthCache[ch];
192
211
  if (cached !== undefined) return cached;
193
212
  var width = get_cjk_width_class(ch);
@@ -204,22 +223,24 @@ function process_inlines(tokens, state, ctx, inlineToken) {
204
223
  var isSoftbreakToken = token.type === 'softbreak';
205
224
  var isTextBreakToken = token.type === 'text' && token.content === '\n';
206
225
  if (isSoftbreakToken || isTextBreakToken) {
226
+ if (!nextTextIndex) {
227
+ var nextInfo = build_next_text_info(tokens, considerInlineBoundaries);
228
+ nextTextIndex = nextInfo.nextTextIndex;
229
+ nextSkippedEmpty = nextInfo.nextSkippedEmpty;
230
+ }
207
231
  // default last/next character to space
208
232
  last = next = ' ';
209
- trailing = '';
210
- var trailingMatchesPunctuation = false;
211
-
212
- var skippedEmptyBefore = sawEmptySinceLast;
213
- var skippedEmptyAfter = nextSkippedEmpty[i];
233
+ var skippedEmptyBefore = false;
234
+ var skippedEmptyAfter = false;
235
+ if (considerInlineBoundaries) {
236
+ skippedEmptyBefore = sawEmptySinceLast;
237
+ skippedEmptyAfter = nextSkippedEmpty ? nextSkippedEmpty[i] : false;
238
+ }
214
239
 
215
240
  if (hasLastText) {
216
241
  c1 = lastTextContent.charCodeAt(lastTextContent.length - 2);
217
242
  c2 = lastTextContent.charCodeAt(lastTextContent.length - 1);
218
243
  last = lastTextContent.slice(is_surrogate(c1, c2) ? -2 : -1);
219
- if (needsPunctuation) {
220
- trailing = lastTextContent.slice(-maxPunctuationLength);
221
- trailingMatchesPunctuation = matches_punctuation_sequence(trailing, punctuationConfig);
222
- }
223
244
  }
224
245
 
225
246
  var nextIdx = nextTextIndex[i];
@@ -233,46 +254,44 @@ function process_inlines(tokens, state, ctx, inlineToken) {
233
254
  }
234
255
 
235
256
  remove_break = false;
236
-
237
- // remove newline if it's adjacent to ZWSP
238
- if (last === '\u200b' || next === '\u200b') remove_break = true;
239
-
240
- var lastWidthClass = '';
241
257
  var nextWidthClass = '';
258
+ var nextWidthComputed = false;
242
259
 
243
- // remove newline if both characters AND/OR fullwidth (F), wide (W) or
244
- // halfwidth (H), but not Hangul
245
- var tLast = false;
246
- var tNext = false;
247
-
248
- var needsWidthForRemoval = !remove_break;
249
- var needsWidthForPunctuation = punctuationSpace && trailingMatchesPunctuation && last && next && next !== '\u200b';
250
-
251
- if (needsWidthForRemoval || needsWidthForPunctuation) {
252
- if (needsWidthForRemoval) {
253
- lastWidthClass = get_cached_width_class(last);
260
+ // remove newline if it's adjacent to ZWSP
261
+ if (last === '\u200b' || next === '\u200b') {
262
+ remove_break = true;
263
+ } else {
264
+ // remove newline if both characters AND/OR fullwidth (F), wide (W) or
265
+ // halfwidth (H), but not Hangul
266
+ var lastWidthClass = get_cached_width_class(last);
267
+ if (either || lastWidthClass) {
268
+ nextWidthClass = get_cached_width_class(next);
269
+ nextWidthComputed = true;
254
270
  }
255
- nextWidthClass = get_cached_width_class(next);
256
271
 
257
- if (needsWidthForRemoval) {
258
- tLast = lastWidthClass !== '';
259
- tNext = nextWidthClass !== '';
272
+ var tLast = lastWidthClass !== '';
273
+ var tNext = nextWidthComputed ? nextWidthClass !== '' : false;
260
274
 
261
- if (considerInlineBoundaries && (skippedEmptyBefore || skippedEmptyAfter) && tLast && tNext) {
262
- tLast = false;
263
- tNext = false;
264
- }
265
- if (either ? tLast || tNext : tLast && tNext) {
266
- if (!is_hangul(last) && !is_hangul(next)) remove_break = true;
267
- }
275
+ if (considerInlineBoundaries && (skippedEmptyBefore || skippedEmptyAfter) && tLast && tNext) {
276
+ tLast = false;
277
+ tNext = false;
278
+ }
279
+ if (either ? tLast || tNext : tLast && tNext) {
280
+ if (!is_hangul(last) && !is_hangul(next)) remove_break = true;
268
281
  }
269
282
  }
270
283
 
271
284
  if (remove_break) {
272
285
  var insertPunctuationSpace = false;
273
- if (punctuationSpace && punctuationConfig && trailingMatchesPunctuation && last && next && next !== '\u200b') {
274
- var nextIsFullwidthOrWide = nextWidthClass === 'F' || nextWidthClass === 'W';
275
- if (is_printable_ascii(next) || nextIsFullwidthOrWide) insertPunctuationSpace = true;
286
+ if (needsPunctuation && last && next && next !== '\u200b') {
287
+ var trailing = hasLastText ? lastTextContent.slice(-maxPunctuationLength) : '';
288
+ if (matches_punctuation_sequence(trailing, punctuationConfig)) {
289
+ if (!nextWidthComputed) {
290
+ nextWidthClass = get_cached_width_class(next);
291
+ }
292
+ var nextIsFullwidthOrWide = nextWidthClass === 'F' || nextWidthClass === 'W';
293
+ if (nextIsFullwidthOrWide || is_printable_ascii(next)) insertPunctuationSpace = true;
294
+ }
276
295
  }
277
296
  token.type = 'text';
278
297
  token.content = insertPunctuationSpace ? punctuationSpace : '';
@@ -281,16 +300,16 @@ function process_inlines(tokens, state, ctx, inlineToken) {
281
300
 
282
301
  if (token.type === 'text') {
283
302
  if (!token.content) {
284
- sawEmptySinceLast = true;
303
+ if (considerInlineBoundaries) sawEmptySinceLast = true;
285
304
  } else {
286
305
  lastTextContent = token.content;
287
306
  hasLastText = true;
288
- sawEmptySinceLast = false;
307
+ if (considerInlineBoundaries) sawEmptySinceLast = false;
289
308
  }
290
309
  }
291
310
  }
292
311
 
293
- if (punctuationSpace && punctuationConfig) {
312
+ if (needsPunctuation) {
294
313
  apply_missing_punctuation_spacing(tokens, inlineToken, punctuationSpace, punctuationConfig);
295
314
  }
296
315
  }
@@ -386,6 +405,8 @@ function apply_missing_punctuation_spacing(tokens, inlineToken, punctuationSpace
386
405
  if (!inlineToken || !inlineToken.content) return;
387
406
  if (inlineToken.content.indexOf('\n') === -1) return;
388
407
  if (!tokens || tokens.length === 0) return;
408
+ var maxPunctuationLength = punctuationConfig.maxLength;
409
+ if (maxPunctuationLength <= 0) return;
389
410
 
390
411
  var rawSearchState = { pos: 0 };
391
412
 
@@ -393,15 +414,13 @@ function apply_missing_punctuation_spacing(tokens, inlineToken, punctuationSpace
393
414
  var current = tokens[idx];
394
415
  if (!current || current.type !== 'text' || !current.content) continue;
395
416
 
396
- var trailing = punctuationConfig.maxLength > 0 ?
397
- current.content.slice(-punctuationConfig.maxLength) :
398
- current.content.slice(-1);
417
+ var trailing = current.content.slice(-maxPunctuationLength);
399
418
  if (!matches_punctuation_sequence(trailing, punctuationConfig)) continue;
400
- if (/\s$/.test(current.content)) continue;
419
+ if (WHITESPACE_TRAIL_RE.test(current.content)) continue;
401
420
 
402
421
  var nextInfo = find_next_visible_token(tokens, idx + 1);
403
422
  if (!nextInfo) continue;
404
- if (nextInfo.token.type === 'text' && /^\s/.test(nextInfo.token.content || '')) continue;
423
+ if (nextInfo.token.type === 'text' && WHITESPACE_LEAD_RE.test(nextInfo.token.content || '')) continue;
405
424
 
406
425
  if (!raw_boundary_includes_newline(inlineToken.content, tokens, idx, nextInfo.index, nextInfo.fragment, rawSearchState)) {
407
426
  continue;
@@ -411,7 +430,9 @@ function apply_missing_punctuation_spacing(tokens, inlineToken, punctuationSpace
411
430
  idx = nextInfo.index;
412
431
  }
413
432
 
414
- apply_single_text_token_spacing(tokens, inlineToken, punctuationSpace, punctuationConfig);
433
+ if (tokens.length === 1) {
434
+ apply_single_text_token_spacing(tokens, inlineToken, punctuationSpace, punctuationConfig);
435
+ }
415
436
  }
416
437
 
417
438
 
@@ -481,6 +502,8 @@ function apply_single_text_token_spacing(tokens, inlineToken, punctuationSpace,
481
502
  if (inlineToken.content.indexOf('\n') === -1) return;
482
503
  var token = tokens[0];
483
504
  if (!token || token.type !== 'text' || !token.content) return;
505
+ var maxPunctuationLength = punctuationConfig.maxLength;
506
+ if (maxPunctuationLength <= 0) return;
484
507
 
485
508
  var segments = inlineToken.content.split('\n');
486
509
  if (segments.length < 2) return;
@@ -490,7 +513,7 @@ function apply_single_text_token_spacing(tokens, inlineToken, punctuationSpace,
490
513
  for (var segIdx = 0; segIdx < segments.length - 1; segIdx++) {
491
514
  var leftRaw = segments[segIdx];
492
515
  var rightRaw = segments[segIdx + 1];
493
- var tail = extract_visible_tail(leftRaw, punctuationConfig.maxLength);
516
+ var tail = extract_visible_tail(leftRaw, maxPunctuationLength);
494
517
  var nextChar = extract_visible_head(rightRaw);
495
518
  var shouldInsert = tail &&
496
519
  matches_punctuation_sequence(tail, punctuationConfig) &&
@@ -501,7 +524,7 @@ function apply_single_text_token_spacing(tokens, inlineToken, punctuationSpace,
501
524
  var splitIndex = cumulativeLength + leftRaw.length + offsetDelta;
502
525
  if (splitIndex >= 0 && splitIndex <= updatedContent.length) {
503
526
  var existingChar = updatedContent.charAt(splitIndex);
504
- if (existingChar && /\s/.test(existingChar)) {
527
+ if (existingChar && WHITESPACE_RE.test(existingChar)) {
505
528
  // already has whitespace at this boundary
506
529
  cumulativeLength += leftRaw.length;
507
530
  continue;
@@ -528,7 +551,7 @@ function extract_visible_tail(raw, maxLength) {
528
551
  var charLen = code > 0xFFFF ? 2 : 1;
529
552
  var ch = raw.slice(pos - charLen, pos);
530
553
  pos -= charLen;
531
- if (/\s/.test(ch)) continue;
554
+ if (WHITESPACE_RE.test(ch)) continue;
532
555
  if (is_markup_closer_char(ch)) continue;
533
556
  result = ch + result;
534
557
  }
@@ -543,7 +566,7 @@ function extract_visible_head(raw) {
543
566
  var charLen = code > 0xFFFF ? 2 : 1;
544
567
  var ch = raw.slice(pos, pos + charLen);
545
568
  pos += charLen;
546
- if (/\s/.test(ch)) continue;
569
+ if (WHITESPACE_RE.test(ch)) continue;
547
570
  return ch;
548
571
  }
549
572
  return '';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@peaceroad/markdown-it-cjk-breaks-mod",
3
- "version": "0.1.2",
3
+ "version": "0.1.4",
4
4
  "type": "module",
5
5
  "description": "Suppress linebreaks between east asian (Especially Japanese) characters",
6
6
  "repository": {