@peaceroad/markdown-it-strong-ja 0.9.1 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -357,6 +357,83 @@ Supporting visuals:
357
357
  - `aggressive`:
358
358
  `<p>broken **tail <a href="https://x.test">aa<strong>aa</strong><em>Text</em><strong>and<em>More</em>bb</strong>bb</a> after</p>`
359
359
 
360
+ ## Compatibility Notes
361
+
362
+ ### `markdown-it-attrs` 5.x parity
363
+
364
+ When `markdown-it-attrs` is installed, strong-ja follows the token stream produced by that plugin and does not reinterpret where `{...}` attributes should be attached. This is intentional: strong-ja should not make attribute syntax mean something different from `markdown-it-attrs` alone.
365
+
366
+ One edge case to be aware of is a tight list item followed by an emphasized line:
367
+
368
+ ```markdown
369
+ - e {.li-style}
370
+ *{.ul-style}*
371
+ ```
372
+
373
+ With `markdown-it-attrs` 5.x, the first attribute block is consumed as a block-level attribute on the hidden `paragraph_open` inside the tight list. Because that paragraph token is hidden by markdown-it's tight-list rendering, the class is not visible in the final HTML. The second `{.ul-style}` is inside emphasis text, not a suffix after a closed inline token, so it remains literal text:
374
+
375
+ ```html
376
+ <ul>
377
+ <li>e
378
+ <em>{.ul-style}</em></li>
379
+ </ul>
380
+ ```
381
+
382
+ This output matches `markdown-it-attrs` alone. To attach attributes intentionally, use the syntax owned by `markdown-it-attrs`, for example:
383
+
384
+ ```markdown
385
+ - e
386
+ {.ul-style}
387
+ ```
388
+
389
+ ```html
390
+ <ul class="ul-style">
391
+ <li>e</li>
392
+ </ul>
393
+ ```
394
+
395
+ or attach inline attributes after the closing inline token:
396
+
397
+ ```markdown
398
+ - e
399
+ *x*{.ul-style}
400
+ ```
401
+
402
+ ```html
403
+ <ul>
404
+ <li>e
405
+ <em class="ul-style">x</em></li>
406
+ </ul>
407
+ ```
408
+
409
+ strong-ja keeps this as dependency parity rather than adding a local workaround.
410
+
411
+ ### `markdown-it` 14.2 astral delimiter policy
412
+
413
+ `markdown-it` 14.2 recognizes astral characters (surrogate pairs) as full Unicode code points when scanning emphasis delimiters. strong-ja keeps `compatible` mode aligned with that upstream behavior.
414
+
415
+ In Japanese modes, strong-ja still only adds its own delimiter relaxation when Japanese/CJK context is present. Astral Han characters, such as CJK Extension B, are treated as CJK context:
416
+
417
+ ```markdown
418
+ *𠀋?*abc*
419
+ ```
420
+
421
+ ```html
422
+ <p><em>𠀋?</em>abc*</p>
423
+ ```
424
+
425
+ Emoji or symbol-only English contexts remain aligned with `markdown-it` and are not promoted just because they are astral characters:
426
+
427
+ ```markdown
428
+ *😀?*abc*
429
+ ```
430
+
431
+ ```html
432
+ <p>*😀?<em>abc</em></p>
433
+ ```
434
+
435
+ Symbols inside Japanese prose may still be emphasized by the existing Japanese-context rule, for example `**😀**です` can render as `<p><strong>😀</strong>です</p>`. Use `mode: 'compatible'` when exact `markdown-it` 14.2 delimiter behavior is required.
436
+
360
437
  ## Options
361
438
 
362
439
  ### `mode`
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@peaceroad/markdown-it-strong-ja",
3
3
  "description": "Extends asterisk emphasis handling for Japanese text while keeping markdown-it behavior as close as practical.",
4
- "version": "0.9.1",
4
+ "version": "0.9.2",
5
5
  "main": "index.js",
6
6
  "type": "module",
7
7
  "files": [
@@ -34,16 +34,16 @@
34
34
  "author": "peaceroad <peaceroad@gmail.com>",
35
35
  "license": "MIT",
36
36
  "dependencies": {
37
- "markdown-it": "^14.1.0"
37
+ "markdown-it": "^14.2.0"
38
38
  },
39
39
  "devDependencies": {
40
- "@peaceroad/markdown-it-cjk-breaks-mod": "^0.1.10",
41
- "@peaceroad/markdown-it-hr-sandwiched-semantic-container": "^0.11.0",
42
- "@peaceroad/markdown-it-renderer-image": "^0.12.0",
43
- "@peaceroad/markdown-it-renderer-inline-text": "^0.8.0",
44
- "markdown-it-attrs": "^4.3.1",
40
+ "@peaceroad/markdown-it-cjk-breaks-mod": "^0.1.11",
41
+ "@peaceroad/markdown-it-hr-sandwiched-semantic-container": "^0.12.0",
42
+ "@peaceroad/markdown-it-renderer-image": "^0.16.0",
43
+ "@peaceroad/markdown-it-renderer-inline-text": "^0.8.1",
44
+ "markdown-it-attrs": "^5.0.0",
45
45
  "markdown-it-sub": "^2.0.0",
46
46
  "markdown-it-sup": "^2.0.0",
47
- "p7d-markdown-it-p-captions": "^0.21.0"
47
+ "p7d-markdown-it-p-captions": "^0.23.0"
48
48
  }
49
49
  }
@@ -3,6 +3,8 @@ import {
3
3
  REG_ATTRS,
4
4
  isJapaneseChar,
5
5
  isAsciiWordCode,
6
+ codePointAtSafe,
7
+ codePointBeforeSafe,
6
8
  hasCjkBreaksRule,
7
9
  isCjkBreaksRuleName,
8
10
  getRuntimeOpt,
@@ -119,8 +121,8 @@ const registerTokenCompat = (md, baseOpt) => {
119
121
  if (!prevToken || !nextToken) continue
120
122
  if (prevToken.type !== 'text' || !prevToken.content) continue
121
123
  if (nextToken.type !== 'text' || !nextToken.content) continue
122
- const prevCharCode = prevToken.content.charCodeAt(prevToken.content.length - 1)
123
- const nextCharCode = nextToken.content.charCodeAt(0)
124
+ const prevCharCode = codePointBeforeSafe(prevToken.content, prevToken.content.length, 0)
125
+ const nextCharCode = codePointAtSafe(nextToken.content, 0, 0)
124
126
  const isAsciiWord = isAsciiWordCode(nextCharCode)
125
127
  const shouldReplace = isAsciiWord &&
126
128
  isJapaneseChar(prevCharCode) && !isJapaneseChar(nextCharCode)
@@ -138,8 +140,8 @@ const registerTokenCompat = (md, baseOpt) => {
138
140
  for (let idx = 0; idx < child.content.length; idx++) {
139
141
  const ch = child.content[idx]
140
142
  if (ch === '\n') {
141
- const prevCharCode = idx > 0 ? child.content.charCodeAt(idx - 1) : 0
142
- const nextCharCode = idx + 1 < child.content.length ? child.content.charCodeAt(idx + 1) : 0
143
+ const prevCharCode = codePointBeforeSafe(child.content, idx, 0)
144
+ const nextCharCode = codePointAtSafe(child.content, idx + 1, 0)
143
145
  const isAsciiWord = isAsciiWordCode(nextCharCode)
144
146
  const shouldReplace = isAsciiWord &&
145
147
  isJapaneseChar(prevCharCode) && !isJapaneseChar(nextCharCode)
@@ -187,7 +189,7 @@ const registerTokenCompat = (md, baseOpt) => {
187
189
  if (!prevTextCharCode || !isJapaneseChar(prevTextCharCode)) continue
188
190
  const next = children[j + 1]
189
191
  if (!next || next.type !== 'text' || !next.content) continue
190
- const nextCharCode = next.content.charCodeAt(0)
192
+ const nextCharCode = codePointAtSafe(next.content, 0, 0)
191
193
  if (nextCharCode !== 0x7B) continue
192
194
  child.type = 'softbreak'
193
195
  child.tag = ''
@@ -196,7 +198,7 @@ const registerTokenCompat = (md, baseOpt) => {
196
198
  child.info = ''
197
199
  continue
198
200
  }
199
- prevTextCharCode = child.content.charCodeAt(child.content.length - 1)
201
+ prevTextCharCode = codePointBeforeSafe(child.content, child.content.length, 0)
200
202
  }
201
203
  }
202
204
  }
package/src/token-core.js CHANGED
@@ -3,6 +3,10 @@ import Token from 'markdown-it/lib/token.mjs'
3
3
  import {
4
4
  CHAR_ASTERISK,
5
5
  CHAR_NEWLINE,
6
+ codePointAtSafe,
7
+ codePointBeforeSafe,
8
+ codePointStartBefore,
9
+ codePointSize,
6
10
  isJapaneseChar,
7
11
  isAsciiWordCode,
8
12
  isSoftSpaceCode,
@@ -280,24 +284,34 @@ const buildScanDelimsLookupCache = (src) => {
280
284
 
281
285
  let prev = -1
282
286
  for (let i = 0; i < len; i++) {
283
- const code = src.charCodeAt(i)
287
+ const code = codePointAtSafe(src, i)
284
288
  if (code === CHAR_NEWLINE) {
285
289
  prev = -1
286
290
  continue
287
291
  }
292
+ const size = codePointSize(code)
288
293
  if (!isSoftSpaceCode(code)) prev = i
289
294
  prevNonSpaceSameLine[i] = prev
295
+ if (size === 2 && i + 1 < len) {
296
+ prevNonSpaceSameLine[i + 1] = prev
297
+ i++
298
+ }
290
299
  }
291
300
 
292
301
  let next = -1
293
302
  for (let i = len - 1; i >= 0; i--) {
294
- const code = src.charCodeAt(i)
303
+ const cpStart = codePointStartBefore(src, i + 1)
304
+ const code = cpStart === -1 ? 0 : codePointAtSafe(src, cpStart)
295
305
  if (code === CHAR_NEWLINE) {
296
306
  next = -1
297
307
  continue
298
308
  }
299
- if (!isSoftSpaceCode(code)) next = i
309
+ if (!isSoftSpaceCode(code)) next = cpStart
300
310
  nextNonSpaceSameLine[i] = next
311
+ if (cpStart !== i) {
312
+ nextNonSpaceSameLine[cpStart] = next
313
+ i = cpStart
314
+ }
301
315
  }
302
316
 
303
317
  return {
@@ -323,11 +337,13 @@ const findPrevNonSpaceIndex = (src, start, lookupCache = null) => {
323
337
  start < lookupCache.prevNonSpaceSameLine.length) {
324
338
  return lookupCache.prevNonSpaceSameLine[start]
325
339
  }
326
- for (let i = start; i >= 0; i--) {
327
- const code = src.charCodeAt(i)
340
+ for (let i = start; i >= 0;) {
341
+ const cpStart = codePointStartBefore(src, i + 1)
342
+ if (cpStart === -1) return -1
343
+ const code = codePointAtSafe(src, cpStart)
328
344
  if (code === CHAR_NEWLINE) return -1
329
- if (isSoftSpaceCode(code)) continue
330
- return i
345
+ if (!isSoftSpaceCode(code)) return cpStart
346
+ i = cpStart - 1
331
347
  }
332
348
  return -1
333
349
  }
@@ -340,11 +356,11 @@ const findNextNonSpaceIndex = (src, start, max, lookupCache = null) => {
340
356
  const next = lookupCache.nextNonSpaceSameLine[start]
341
357
  return next !== -1 && next < max ? next : -1
342
358
  }
343
- for (let i = start; i < max; i++) {
344
- const code = src.charCodeAt(i)
359
+ for (let i = start; i < max;) {
360
+ const code = codePointAtSafe(src, i)
345
361
  if (code === CHAR_NEWLINE) return -1
346
- if (isSoftSpaceCode(code)) continue
347
- return i
362
+ if (!isSoftSpaceCode(code)) return i
363
+ i += codePointSize(code)
348
364
  }
349
365
  return -1
350
366
  }
@@ -353,26 +369,26 @@ const hasAsciiStartAfterOptionalOpenWrappers = (src, index, max, lookupCache = n
353
369
  let i = index
354
370
  // Two wrappers are enough for common shapes: * [ "word" ]*
355
371
  for (let wrappers = 0; wrappers < 2 && i >= 0 && i < max; wrappers++) {
356
- const code = src.charCodeAt(i)
372
+ const code = codePointAtSafe(src, i)
357
373
  if (!isAsciiGuardOpenWrapper(code)) break
358
374
  i = findNextNonSpaceIndex(src, i + 1, max, lookupCache)
359
375
  if (i === -1) return false
360
376
  }
361
377
  if (i < 0 || i >= max) return false
362
- return isAsciiWordCode(src.charCodeAt(i))
378
+ return isAsciiWordCode(codePointAtSafe(src, i))
363
379
  }
364
380
 
365
381
  const hasAsciiEndBeforeOptionalCloseWrappers = (src, index, lookupCache = null) => {
366
382
  let i = index
367
383
  // Two wrappers are enough for common shapes: *["word"] *
368
384
  for (let wrappers = 0; wrappers < 2 && i >= 0; wrappers++) {
369
- const code = src.charCodeAt(i)
385
+ const code = codePointAtSafe(src, i)
370
386
  if (!isAsciiGuardCloseWrapper(code)) break
371
387
  i = findPrevNonSpaceIndex(src, i - 1, lookupCache)
372
388
  if (i === -1) return false
373
389
  }
374
390
  if (i < 0) return false
375
- return isAsciiWordCode(src.charCodeAt(i))
391
+ return isAsciiWordCode(codePointAtSafe(src, i))
376
392
  }
377
393
 
378
394
  const isMarkdownStructuralOpenWrapper = (code) => {
@@ -409,18 +425,20 @@ const findPrevNonSpaceLimited = (src, start, maxLook, lookupCache = null) => {
409
425
  start < lookupCache.prevNonSpaceSameLine.length) {
410
426
  const prev = lookupCache.prevNonSpaceSameLine[start]
411
427
  if (prev !== -1 && (start - prev) < maxLook) {
412
- return src.charCodeAt(prev)
428
+ return codePointAtSafe(src, prev)
413
429
  }
414
430
  return 0
415
431
  }
416
432
  let looked = 0
417
- for (let i = start; i >= 0; i--) {
433
+ for (let i = start; i >= 0;) {
418
434
  if (looked >= maxLook) break
419
- const code = src.charCodeAt(i)
420
- looked++
435
+ const cpStart = codePointStartBefore(src, i + 1)
436
+ if (cpStart === -1) break
437
+ const code = codePointAtSafe(src, cpStart)
438
+ looked += i - cpStart + 1
421
439
  if (code === CHAR_NEWLINE) return 0
422
- if (isSoftSpaceCode(code)) continue
423
- return code
440
+ if (!isSoftSpaceCode(code)) return code
441
+ i = cpStart - 1
424
442
  }
425
443
  return 0
426
444
  }
@@ -432,18 +450,19 @@ const findNextNonSpaceLimited = (src, start, max, maxLook, lookupCache = null) =
432
450
  start < lookupCache.nextNonSpaceSameLine.length) {
433
451
  const next = lookupCache.nextNonSpaceSameLine[start]
434
452
  if (next !== -1 && next < max && (next - start) < maxLook) {
435
- return src.charCodeAt(next)
453
+ return codePointAtSafe(src, next)
436
454
  }
437
455
  return 0
438
456
  }
439
457
  let looked = 0
440
- for (let i = start; i < max; i++) {
458
+ for (let i = start; i < max;) {
441
459
  if (looked >= maxLook) break
442
- const code = src.charCodeAt(i)
443
- looked++
460
+ const code = codePointAtSafe(src, i)
461
+ const size = codePointSize(code)
462
+ looked += size
444
463
  if (code === CHAR_NEWLINE) return 0
445
- if (isSoftSpaceCode(code)) continue
446
- return code
464
+ if (!isSoftSpaceCode(code)) return code
465
+ i += size
447
466
  }
448
467
  return 0
449
468
  }
@@ -462,8 +481,8 @@ const hasJapaneseContextForBracketWrapper = (src, start, pos, max, lastChar, nex
462
481
 
463
482
  const scanPrevSingleStarContextFlags = (src, start) => {
464
483
  let hasJapaneseBetween = false
465
- for (let i = start - 1; i >= 0; i--) {
466
- const code = src.charCodeAt(i)
484
+ for (let i = codePointStartBefore(src, start); i >= 0; i = codePointStartBefore(src, i)) {
485
+ const code = codePointAtSafe(src, i)
467
486
  if (code === CHAR_NEWLINE) break
468
487
  if (isSentenceBoundaryStop(code) && i < start - 1) break
469
488
  if (code !== CHAR_ASTERISK) {
@@ -475,8 +494,8 @@ const scanPrevSingleStarContextFlags = (src, start) => {
475
494
  backslashCount++
476
495
  }
477
496
  if ((backslashCount % 2) === 1) continue
478
- const prevCode = i > 0 ? src.charCodeAt(i - 1) : 0
479
- const nextCode = i + 1 < src.length ? src.charCodeAt(i + 1) : 0
497
+ const prevCode = codePointBeforeSafe(src, i, 0)
498
+ const nextCode = codePointAtSafe(src, i + 1, 0)
480
499
  if (prevCode === CHAR_ASTERISK || nextCode === CHAR_ASTERISK) continue
481
500
  return hasJapaneseBetween ? PREV_STAR_HAS_OPENER | PREV_STAR_HAS_JP_BETWEEN : PREV_STAR_HAS_OPENER
482
501
  }
@@ -778,12 +797,12 @@ const patchScanDelims = (md) => {
778
797
  const aggressiveMode = (modeFlags & MODE_FLAG_AGGRESSIVE) !== 0
779
798
  const max = this.posMax
780
799
  let lookupCache = null
781
- const lastChar = start > 0 ? src.charCodeAt(start - 1) : 0x20
800
+ const lastChar = codePointBeforeSafe(src, start, 0x20)
782
801
 
783
802
  const count = base && base.length ? base.length : 1
784
803
  const pos = start + count
785
804
 
786
- const nextChar = pos < max ? src.charCodeAt(pos) : 0x20
805
+ const nextChar = codePointAtSafe(src, pos, 0x20)
787
806
  let prevStarFlags = -1
788
807
 
789
808
  const leftJapanese = isJapaneseChar(lastChar)
@@ -819,7 +838,7 @@ const patchScanDelims = (md) => {
819
838
  lookupCache || (lookupCache = getScanDelimsLookupCache(this))
820
839
  )
821
840
  if (prevNonSpaceIdx !== -1) {
822
- const prevNonSpaceLocal = src.charCodeAt(prevNonSpaceIdx)
841
+ const prevNonSpaceLocal = codePointAtSafe(src, prevNonSpaceIdx)
823
842
  const plusStrictAsciiBoundary = plusMode &&
824
843
  hasAsciiEndBeforeOptionalCloseWrappers(src, prevNonSpaceIdx, lookupCache)
825
844
  if (prevNonSpaceLocal !== CHAR_ASTERISK && !plusStrictAsciiBoundary) {
@@ -835,7 +854,7 @@ const patchScanDelims = (md) => {
835
854
  lookupCache || (lookupCache = getScanDelimsLookupCache(this))
836
855
  )
837
856
  if (nextNonSpaceIdx !== -1) {
838
- const nextNonSpace = src.charCodeAt(nextNonSpaceIdx)
857
+ const nextNonSpace = codePointAtSafe(src, nextNonSpaceIdx)
839
858
  const plusStrictAsciiBoundary = plusMode &&
840
859
  hasAsciiStartAfterOptionalOpenWrappers(src, nextNonSpaceIdx, max, lookupCache)
841
860
  if (nextNonSpace !== CHAR_ASTERISK && !plusStrictAsciiBoundary) {
@@ -1,4 +1,4 @@
1
- import { isJapaneseChar } from '../token-utils.js'
1
+ import { codePointAtSafe, codePointBeforeSafe, codePointSize, isJapaneseChar } from '../token-utils.js'
2
2
 
3
3
  const CHAR_ASTERISK = 0x2A // *
4
4
  const INLINE_REPAIR_EM_OUTER_STRONG_SEQUENCE = 1 << 0
@@ -46,11 +46,13 @@ const tokenHasJapaneseChars = (token) => {
46
46
  return token.__strongJaHasJapaneseChar
47
47
  }
48
48
  let hasJapanese = false
49
- for (let i = 0; i < content.length; i++) {
50
- if (isJapaneseChar(content.charCodeAt(i))) {
49
+ for (let i = 0; i < content.length;) {
50
+ const code = codePointAtSafe(content, i)
51
+ if (isJapaneseChar(code)) {
51
52
  hasJapanese = true
52
53
  break
53
54
  }
55
+ i += codePointSize(code)
54
56
  }
55
57
  token.__strongJaJapaneseSource = content
56
58
  token.__strongJaHasJapaneseChar = hasJapanese
@@ -103,9 +105,9 @@ const countDelimiterLikeStrongRuns = (content, from = 0, limit = 0) => {
103
105
  continue
104
106
  }
105
107
  const pos = at
106
- const prevCode = pos > 0 ? content.charCodeAt(pos - 1) : 0
108
+ const prevCode = codePointBeforeSafe(content, pos, 0)
107
109
  const nextPos = pos + 2
108
- const nextCode = nextPos < len ? content.charCodeAt(nextPos) : 0
110
+ const nextCode = codePointAtSafe(content, nextPos, 0)
109
111
  const prevSameMarker = prevCode === CHAR_ASTERISK
110
112
  const nextSameMarker = nextCode === CHAR_ASTERISK
111
113
  if (prevSameMarker || nextSameMarker) {
@@ -18,10 +18,69 @@ const VALID_CANONICAL_MODES = new Set([
18
18
  ])
19
19
  const REG_JAPANESE = /[\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Han}\u3000-\u303F\uFF00-\uFFEF]/u
20
20
  const REG_ATTRS = /{[^{}\n!@#%^&*()]+?}$/
21
+ const CHAR_REPLACEMENT = 0xFFFD
22
+
23
+ const isHighSurrogate = (code) => code >= 0xD800 && code <= 0xDBFF
24
+ const isLowSurrogate = (code) => code >= 0xDC00 && code <= 0xDFFF
25
+
26
+ const combineSurrogates = (high, low) => {
27
+ return 0x10000 + ((high - 0xD800) << 10) + (low - 0xDC00)
28
+ }
29
+
30
+ const codePointAtSafe = (src, index, fallback = 0) => {
31
+ if (typeof src !== 'string' || index < 0 || index >= src.length) return fallback
32
+ const first = src.charCodeAt(index)
33
+ if (first < 0xD800 || first > 0xDFFF) return first
34
+ if (first <= 0xDBFF) {
35
+ const second = index + 1 < src.length ? src.charCodeAt(index + 1) : 0
36
+ return isLowSurrogate(second) ? combineSurrogates(first, second) : CHAR_REPLACEMENT
37
+ }
38
+ return CHAR_REPLACEMENT
39
+ }
40
+
41
+ const codePointBeforeSafe = (src, index, fallback = 0) => {
42
+ if (typeof src !== 'string' || index <= 0 || index > src.length) return fallback
43
+ const last = src.charCodeAt(index - 1)
44
+ if (last < 0xD800 || last > 0xDFFF) return last
45
+ if (last >= 0xDC00) {
46
+ const first = index - 2 >= 0 ? src.charCodeAt(index - 2) : 0
47
+ return isHighSurrogate(first) ? combineSurrogates(first, last) : CHAR_REPLACEMENT
48
+ }
49
+ return CHAR_REPLACEMENT
50
+ }
51
+
52
+ const codePointStartBefore = (src, index) => {
53
+ if (typeof src !== 'string' || index <= 0 || index > src.length) return -1
54
+ const lastIdx = index - 1
55
+ const last = src.charCodeAt(lastIdx)
56
+ if (isLowSurrogate(last) && lastIdx - 1 >= 0 && isHighSurrogate(src.charCodeAt(lastIdx - 1))) {
57
+ return lastIdx - 1
58
+ }
59
+ return lastIdx
60
+ }
61
+
62
+ const codePointSize = (code) => code > 0xFFFF ? 2 : 1
63
+
64
+ const isAstralJapaneseCode = (code) => {
65
+ return (code >= 0x1AFF0 && code <= 0x1AFFF) || // Kana Extended-B
66
+ (code >= 0x1B000 && code <= 0x1B0FF) || // Kana Supplement
67
+ (code >= 0x1B100 && code <= 0x1B12F) || // Kana Extended-A
68
+ (code >= 0x1B130 && code <= 0x1B16F) || // Small Kana Extension
69
+ (code >= 0x20000 && code <= 0x2A6DF) || // CJK Unified Ideographs Extension B
70
+ (code >= 0x2A700 && code <= 0x2B73F) || // Extension C
71
+ (code >= 0x2B740 && code <= 0x2B81F) || // Extension D
72
+ (code >= 0x2B820 && code <= 0x2CEAF) || // Extension E
73
+ (code >= 0x2CEB0 && code <= 0x2EBEF) || // Extension F
74
+ (code >= 0x2EBF0 && code <= 0x2EE5F) || // Extension I
75
+ (code >= 0x2F800 && code <= 0x2FA1F) || // CJK Compatibility Ideographs Supplement
76
+ (code >= 0x30000 && code <= 0x3134F) || // Extension G
77
+ (code >= 0x31350 && code <= 0x323AF) // Extension H
78
+ }
21
79
 
22
80
  const isJapaneseChar = (ch) => {
23
81
  if (!ch) return false
24
- const code = typeof ch === 'string' ? ch.charCodeAt(0) : ch
82
+ const code = typeof ch === 'string' ? ch.codePointAt(0) : ch
83
+ if (!Number.isFinite(code)) return false
25
84
  if (code < 128) return false
26
85
  if (code >= 0x3040 && code <= 0x309F) return true
27
86
  if (code >= 0x30A0 && code <= 0x30FF) return true
@@ -32,7 +91,10 @@ const isJapaneseChar = (ch) => {
32
91
  if (code >= 0xF900 && code <= 0xFAFF) return true
33
92
  if (code >= 0x3000 && code <= 0x303F) return true
34
93
  if (code >= 0xFF00 && code <= 0xFFEF) return true
35
- return REG_JAPANESE.test(String.fromCharCode(code))
94
+ if (code > 0x10FFFF) return false
95
+ if (code >= 0x10000 && isAstralJapaneseCode(code)) return true
96
+ if (code >= 0x10000 && code < 0x20000) return false
97
+ return REG_JAPANESE.test(String.fromCodePoint(code))
36
98
  }
37
99
 
38
100
  const isAsciiWordCode = (code) => {
@@ -257,6 +319,10 @@ export {
257
319
  CHAR_NEWLINE,
258
320
  CHAR_IDEOGRAPHIC_SPACE,
259
321
  REG_ATTRS,
322
+ codePointAtSafe,
323
+ codePointBeforeSafe,
324
+ codePointStartBefore,
325
+ codePointSize,
260
326
  isJapaneseChar,
261
327
  isAsciiWordCode,
262
328
  isSoftSpaceCode,