@peaceroad/markdown-it-strong-ja 0.9.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@peaceroad/markdown-it-strong-ja",
3
3
  "description": "Extends asterisk emphasis handling for Japanese text while keeping markdown-it behavior as close as practical.",
4
- "version": "0.9.0",
4
+ "version": "0.9.1",
5
5
  "main": "index.js",
6
6
  "type": "module",
7
7
  "files": [
@@ -26,6 +26,7 @@
26
26
  "test:all": "node test/test-all.js",
27
27
  "bench:scan": "node test/material/perf-scan-delims.mjs",
28
28
  "bench:postprocess": "node test/material/perf-postprocess.mjs",
29
+ "bench:isolated": "node test/material/bench-isolated.mjs",
29
30
  "analyze:postprocess-calls": "node test/material/analyze-postprocess-calls.mjs",
30
31
  "analyze:fastpath": "node test/material/analyze-fastpath-hits.mjs"
31
32
  },
@@ -46,4 +47,3 @@
46
47
  "p7d-markdown-it-p-captions": "^0.21.0"
47
48
  }
48
49
  }
49
-
@@ -2,6 +2,7 @@ import Token from 'markdown-it/lib/token.mjs'
2
2
  import {
3
3
  REG_ATTRS,
4
4
  isJapaneseChar,
5
+ isAsciiWordCode,
5
6
  hasCjkBreaksRule,
6
7
  isCjkBreaksRuleName,
7
8
  getRuntimeOpt,
@@ -9,12 +10,6 @@ import {
9
10
  moveRuleAfter
10
11
  } from './token-utils.js'
11
12
 
12
- const isAsciiWordCode = (code) => {
13
- return (code >= 0x30 && code <= 0x39) ||
14
- (code >= 0x41 && code <= 0x5A) ||
15
- (code >= 0x61 && code <= 0x7A)
16
- }
17
-
18
13
  const trimTrailingSpaceTab = (text) => {
19
14
  if (!text) return text
20
15
  let end = text.length
package/src/token-core.js CHANGED
@@ -2,16 +2,15 @@ import { isWhiteSpace } from 'markdown-it/lib/common/utils.mjs'
2
2
  import Token from 'markdown-it/lib/token.mjs'
3
3
  import {
4
4
  CHAR_ASTERISK,
5
- CHAR_SPACE,
6
- CHAR_TAB,
7
5
  CHAR_NEWLINE,
8
- CHAR_IDEOGRAPHIC_SPACE,
9
6
  isJapaneseChar,
7
+ isAsciiWordCode,
8
+ isSoftSpaceCode,
10
9
  MODE_FLAG_COMPATIBLE,
11
10
  MODE_FLAG_AGGRESSIVE,
12
11
  MODE_FLAG_JAPANESE_PLUS,
13
- getRuntimeOpt,
14
- hasRuntimeOverride
12
+ hasRuntimeOverride,
13
+ getRuntimeOpt
15
14
  } from './token-utils.js'
16
15
 
17
16
  const SCAN_DELIMS_PATCHED = Symbol.for('strongJaTokenScanDelimsPatched')
@@ -20,10 +19,6 @@ const PREV_STAR_HAS_OPENER = 1
20
19
  const PREV_STAR_HAS_JP_BETWEEN = 2
21
20
  const SCAN_DELIMS_LOOKUP_KEY = Symbol.for('strongJaTokenScanDelimsLookup')
22
21
 
23
- const isSoftSpaceCode = (code) => {
24
- return code === CHAR_SPACE || code === CHAR_TAB || code === CHAR_IDEOGRAPHIC_SPACE
25
- }
26
-
27
22
  const isPlusQuoteWrapperOpen = (code) => {
28
23
  return code === 0x2018 || // ‘
29
24
  code === 0x201C || // “
@@ -256,12 +251,6 @@ const isSingleStarClosingBoundary = (code) => {
256
251
  isClosingBracketLike(code)
257
252
  }
258
253
 
259
- const isAsciiAlphaNum = (code) => {
260
- return (code >= 0x30 && code <= 0x39) ||
261
- (code >= 0x41 && code <= 0x5A) ||
262
- (code >= 0x61 && code <= 0x7A)
263
- }
264
-
265
254
  const isAsciiGuardOpenWrapper = (code) => {
266
255
  return code === 0x22 || // "
267
256
  code === 0x27 || // '
@@ -370,7 +359,7 @@ const hasAsciiStartAfterOptionalOpenWrappers = (src, index, max, lookupCache = n
370
359
  if (i === -1) return false
371
360
  }
372
361
  if (i < 0 || i >= max) return false
373
- return isAsciiAlphaNum(src.charCodeAt(i))
362
+ return isAsciiWordCode(src.charCodeAt(i))
374
363
  }
375
364
 
376
365
  const hasAsciiEndBeforeOptionalCloseWrappers = (src, index, lookupCache = null) => {
@@ -383,7 +372,7 @@ const hasAsciiEndBeforeOptionalCloseWrappers = (src, index, lookupCache = null)
383
372
  if (i === -1) return false
384
373
  }
385
374
  if (i < 0) return false
386
- return isAsciiAlphaNum(src.charCodeAt(i))
375
+ return isAsciiWordCode(src.charCodeAt(i))
387
376
  }
388
377
 
389
378
  const isMarkdownStructuralOpenWrapper = (code) => {
@@ -870,8 +859,10 @@ const patchScanDelims = (md) => {
870
859
  if (!aggressiveMode && count === 1) {
871
860
  // Keep local directionality to avoid degrading markdown-it-valid runs,
872
861
  // e.g. `[。*a**](u)` where the first `*` should remain opener-only.
873
- const rightIsBoundary = isSingleStarClosingBoundary(nextChar) || isWrapperOpenLike(nextChar)
874
- const leftIsBoundary = isSingleStarBoundary(lastChar) || isWrapperCloseLike(lastChar)
862
+ const rightIsOpenWrapper = isWrapperOpenLike(nextChar)
863
+ const leftIsCloseWrapper = isWrapperCloseLike(lastChar)
864
+ const rightIsBoundary = isSingleStarClosingBoundary(nextChar) || rightIsOpenWrapper
865
+ const leftIsBoundary = isSingleStarBoundary(lastChar) || leftIsCloseWrapper
875
866
  if (leftJapanese && !rightJapanese && !rightIsBoundary) {
876
867
  prevStarFlags = ensurePrevStarFlags(src, start, prevStarFlags)
877
868
  if ((prevStarFlags & PREV_STAR_HAS_OPENER) === 0) {
@@ -880,28 +871,30 @@ const patchScanDelims = (md) => {
880
871
  } else if (!leftJapanese && rightJapanese && !leftIsBoundary) {
881
872
  relaxedOpen = false
882
873
  }
883
- const rightIsOpenWrapper = isWrapperOpenLike(nextChar)
884
- const leftIsCloseWrapper = isWrapperCloseLike(lastChar)
885
- prevStarFlags = ensurePrevStarFlags(src, start, prevStarFlags)
886
- const hasPrevJapaneseOpener = (prevStarFlags & PREV_STAR_HAS_OPENER) !== 0
887
- const hasJapaneseSincePrevStar = (prevStarFlags & PREV_STAR_HAS_JP_BETWEEN) !== 0
888
874
  const leftIsExtraClosePunct = isExtraSingleStarClosePunct(lastChar)
889
- const canForceCloseByPunct = leftIsExtraClosePunct && hasJapaneseSincePrevStar
890
- if (leftJapanese &&
891
- rightIsOpenWrapper &&
892
- !hasPrevJapaneseOpener &&
893
- !isMarkdownStructuralOpenWrapper(nextChar)) {
894
- forceOpen = true
895
- forceClose = false
896
- } else if (leftIsCloseWrapper && rightJapanese && hasPrevJapaneseOpener) {
897
- forceOpen = false
898
- forceClose = true
899
- } else if ((leftIsCloseWrapper || canForceCloseByPunct) &&
900
- !rightJapanese &&
901
- !rightIsBoundary &&
902
- hasPrevJapaneseOpener) {
903
- forceOpen = false
904
- forceClose = true
875
+ const canCheckForceOpen =
876
+ leftJapanese && rightIsOpenWrapper && !isMarkdownStructuralOpenWrapper(nextChar)
877
+ const canCheckForceClose =
878
+ (leftIsCloseWrapper && rightJapanese) ||
879
+ ((leftIsCloseWrapper || leftIsExtraClosePunct) && !rightJapanese && !rightIsBoundary)
880
+ if (canCheckForceOpen || canCheckForceClose) {
881
+ prevStarFlags = ensurePrevStarFlags(src, start, prevStarFlags)
882
+ const hasPrevJapaneseOpener = (prevStarFlags & PREV_STAR_HAS_OPENER) !== 0
883
+ const hasJapaneseSincePrevStar = (prevStarFlags & PREV_STAR_HAS_JP_BETWEEN) !== 0
884
+ const canForceCloseByPunct = leftIsExtraClosePunct && hasJapaneseSincePrevStar
885
+ if (canCheckForceOpen && !hasPrevJapaneseOpener) {
886
+ forceOpen = true
887
+ forceClose = false
888
+ } else if (leftIsCloseWrapper && rightJapanese && hasPrevJapaneseOpener) {
889
+ forceOpen = false
890
+ forceClose = true
891
+ } else if ((leftIsCloseWrapper || canForceCloseByPunct) &&
892
+ !rightJapanese &&
893
+ !rightIsBoundary &&
894
+ hasPrevJapaneseOpener) {
895
+ forceOpen = false
896
+ forceClose = true
897
+ }
905
898
  }
906
899
  }
907
900
  const finalOpen = forceOpen === null ? ((base && base.can_open) || relaxedOpen) : forceOpen
@@ -1,6 +1,6 @@
1
1
  import Token from 'markdown-it/lib/token.mjs'
2
2
  import { isWhiteSpace } from 'markdown-it/lib/common/utils.mjs'
3
- import { getReferenceCount } from './token-utils.js'
3
+ import { cloneMap, getReferenceCount } from './token-utils.js'
4
4
 
5
5
  const CHAR_OPEN_BRACKET = 0x5B // [
6
6
  const CHAR_CLOSE_BRACKET = 0x5D // ]
@@ -80,11 +80,6 @@ const getNormalizeRef = (state) => {
80
80
  }
81
81
 
82
82
 
83
- const cloneMap = (map) => {
84
- if (!map || !Array.isArray(map)) return null
85
- return [map[0], map[1]]
86
- }
87
-
88
83
  const getMapFromTokenRange = (tokens, startIdx, endIdx) => {
89
84
  if (!tokens || startIdx > endIdx) return null
90
85
  let startLine = null
@@ -115,14 +115,14 @@ const expandSegmentEndForWrapperBalance = (tokens, startIdx, endIdx) => {
115
115
  return balance.total > 0 ? -1 : expandedEnd
116
116
  }
117
117
 
118
- const bumpBrokenRefMetric = (metrics, bucket, key) => {
119
- if (!metrics || !bucket || !key) return
118
+ const bumpBrokenRefMetric = (metrics, bucket, key, delta = 1) => {
119
+ if (!metrics || !bucket || !key || delta <= 0) return
120
120
  let table = metrics[bucket]
121
121
  if (!table || typeof table !== 'object') {
122
122
  table = Object.create(null)
123
123
  metrics[bucket] = table
124
124
  }
125
- table[key] = (table[key] || 0) + 1
125
+ table[key] = (table[key] || 0) + delta
126
126
  }
127
127
 
128
128
  const ensureBrokenRefLinkCloseMap = (tokens, facts = null, hooks = null, fallbackCache = null) => {
@@ -190,6 +190,7 @@ const resolveBrokenRefCandidateGuardFlow = (
190
190
  children,
191
191
  brokenRefCandidate,
192
192
  segmentEnd,
193
+ metrics = null,
193
194
  facts = null,
194
195
  hooks = null,
195
196
  fallbackCache = null
@@ -203,6 +204,10 @@ const resolveBrokenRefCandidateGuardFlow = (
203
204
  if (!wrapperSignals.hasTextMarker) {
204
205
  return BROKEN_REF_FLOW_SKIP_NO_TEXT_MARKER
205
206
  }
207
+ if (!hasBrokenRefActiveFastPathTokenSignal(wrapperSignals)) {
208
+ bumpBrokenRefMetric(metrics, 'brokenRefCandidateFlow', 'no-active-signature')
209
+ return BROKEN_REF_FLOW_SKIP_NO_ACTIVE_SIGNATURE
210
+ }
206
211
  const wrapperPrefixStats = ensureBrokenRefWrapperPrefixStats(children, facts, hooks, fallbackCache)
207
212
  if (!shouldAttemptBrokenRefRewrite(
208
213
  children,
@@ -214,6 +219,7 @@ const resolveBrokenRefCandidateGuardFlow = (
214
219
  )) {
215
220
  return BROKEN_REF_FLOW_SKIP_GUARD
216
221
  }
222
+ bumpBrokenRefMetric(metrics, 'brokenRefCandidateFlow', 'guard-passed')
217
223
  return null
218
224
  }
219
225
 
@@ -232,12 +238,16 @@ const resolveBrokenRefFastPathFlow = (
232
238
  metrics,
233
239
  bumpBrokenRefMetric
234
240
  )
241
+ bumpBrokenRefMetric(metrics, 'brokenRefCandidateFlow', 'fastpath-dispatch')
235
242
  if (fastPathResult === BROKEN_REF_FAST_PATH_RESULT_NO_ACTIVE_SIGNATURE) {
243
+ bumpBrokenRefMetric(metrics, 'brokenRefCandidateFlow', 'no-active-signature')
236
244
  return BROKEN_REF_FLOW_SKIP_NO_ACTIVE_SIGNATURE
237
245
  }
238
246
  if (fastPathResult === BROKEN_REF_FAST_PATH_RESULT_NO_MATCH) {
247
+ bumpBrokenRefMetric(metrics, 'brokenRefCandidateFlow', 'no-fastpath-match')
239
248
  return BROKEN_REF_FLOW_SKIP_NO_FASTPATH_MATCH
240
249
  }
250
+ bumpBrokenRefMetric(metrics, 'brokenRefCandidateFlow', 'repaired')
241
251
  return BROKEN_REF_FLOW_REPAIRED
242
252
  }
243
253
 
@@ -256,6 +266,7 @@ const runBrokenRefCandidateRewrite = (
256
266
  children,
257
267
  brokenRefCandidate,
258
268
  segmentEnd,
269
+ metrics,
259
270
  facts,
260
271
  hooks,
261
272
  fallbackCache
@@ -308,7 +319,7 @@ const createBrokenRefPassSignals = (seedSignals = null) => {
308
319
  const observeBrokenRefTextToken = (passSignals, candidateState, text, tokenIdx, scanState) => {
309
320
  const hasOpenBracket = text.indexOf('[') !== -1
310
321
  const hasCloseBracket = text.indexOf(']') !== -1
311
- if (!passSignals.hasBracketText && (hasOpenBracket || hasCloseBracket)) {
322
+ if (passSignals && !passSignals.hasBracketText && (hasOpenBracket || hasCloseBracket)) {
312
323
  passSignals.hasBracketText = true
313
324
  }
314
325
  if (candidateState.start === -1) {
@@ -383,6 +394,7 @@ const tryRepairBrokenRefCandidateAtLinkOpen = (
383
394
  const closeIdx = linkCloseMap.get(childIdx) ?? -1
384
395
  if (closeIdx === -1) return null
385
396
  bumpBrokenRefMetric(metrics, 'brokenRefFlow', 'candidate')
397
+ bumpBrokenRefMetric(metrics, 'brokenRefCandidateFlow', 'candidate')
386
398
  const flowResult = runBrokenRefCandidateRewrite(
387
399
  children,
388
400
  brokenRefCandidate,
@@ -437,46 +449,123 @@ const runBrokenRefRepairPass = (children, scanState, metrics = null, facts = nul
437
449
  return buildBrokenRefRepairPassResult(false, passSignals)
438
450
  }
439
451
 
440
- const computeMaxBrokenRefRepairPass = (children, scanState) => {
452
+ const hasPotentialBrokenRefRepairPass = (children, scanState) => {
441
453
  resetBrokenRefScanState(scanState)
442
- let maxRepairPass = 0
443
454
  for (let j = 0; j < children.length; j++) {
444
455
  const child = children[j]
445
456
  if (!child || child.type !== 'text' || !child.content) continue
446
457
  if (child.content.indexOf('[') === -1) continue
447
458
  if (scanBrokenRefState(child.content, scanState).brokenEnd) {
448
- maxRepairPass++
459
+ return true
449
460
  }
450
461
  }
451
- return maxRepairPass
462
+ return false
463
+ }
464
+
465
+ const hasBrokenRefActiveFastPathTokenSignal = (wrapperSignals) => {
466
+ if (!wrapperSignals) return false
467
+ // Current broken-ref fast paths are all strong-token driven.
468
+ return wrapperSignals.strongOpenInRange > 0 || wrapperSignals.strongCloseInRange > 0
452
469
  }
453
470
 
454
- const runBrokenRefRepairs = (children, maxRepairPass, scanState, metrics = null, facts = null, hooks = null) => {
471
+ const countGuardedBrokenRefRepairPasses = (children, scanState, facts = null, hooks = null) => {
472
+ resetBrokenRefScanState(scanState)
473
+ const brokenRefCandidate = resetBrokenRefCandidateState({ start: -1, depth: 0, startTextOffset: 0 })
474
+ const fallbackCache = {
475
+ linkCloseMap: undefined,
476
+ wrapperPrefixStats: undefined
477
+ }
455
478
  let repairPassCount = 0
479
+ for (let j = 0; j < children.length; j++) {
480
+ const child = children[j]
481
+ if (!child) continue
482
+ if (child.type === 'text' && child.content) {
483
+ observeBrokenRefTextToken(null, brokenRefCandidate, child.content, j, scanState)
484
+ }
485
+ if (child.type !== 'link_open' || brokenRefCandidate.start === -1) continue
486
+ if (brokenRefCandidate.depth <= 0) {
487
+ resetBrokenRefCandidateState(brokenRefCandidate)
488
+ continue
489
+ }
490
+ const linkCloseMap = ensureBrokenRefLinkCloseMap(children, facts, hooks, fallbackCache)
491
+ const closeIdx = linkCloseMap.get(j) ?? -1
492
+ if (closeIdx === -1) continue
493
+ const segmentEnd = resolveBrokenRefSegmentEnd(children, brokenRefCandidate, closeIdx)
494
+ const wrapperSignals = buildBrokenRefWrapperRangeSignals(
495
+ children,
496
+ brokenRefCandidate.start,
497
+ segmentEnd,
498
+ brokenRefCandidate.startTextOffset
499
+ )
500
+ if (!wrapperSignals.hasTextMarker || !hasBrokenRefActiveFastPathTokenSignal(wrapperSignals)) {
501
+ resetBrokenRefCandidateState(brokenRefCandidate)
502
+ continue
503
+ }
504
+ const wrapperPrefixStats = ensureBrokenRefWrapperPrefixStats(children, facts, hooks, fallbackCache)
505
+ if (shouldAttemptBrokenRefRewrite(
506
+ children,
507
+ brokenRefCandidate.start,
508
+ segmentEnd,
509
+ brokenRefCandidate.startTextOffset,
510
+ wrapperPrefixStats,
511
+ wrapperSignals
512
+ )) {
513
+ repairPassCount++
514
+ }
515
+ resetBrokenRefCandidateState(brokenRefCandidate)
516
+ }
517
+ return repairPassCount
518
+ }
519
+
520
+ const buildBrokenRefRepairsResult = (changed, passSignals) => {
521
+ return {
522
+ changed,
523
+ hasBracketText: passSignals.hasBracketText,
524
+ hasEmphasis: passSignals.hasEmphasis,
525
+ hasLinkClose: passSignals.hasLinkClose
526
+ }
527
+ }
528
+
529
+ const runBrokenRefRepairs = (children, scanState, metrics = null, facts = null, hooks = null) => {
530
+ const seedSignals = createBrokenRefPassSignals(createBrokenRefSignalSeed(facts))
531
+ if (!hasPotentialBrokenRefRepairPass(children, scanState)) {
532
+ return buildBrokenRefRepairsResult(false, seedSignals)
533
+ }
534
+
456
535
  let changed = false
457
- while (repairPassCount < maxRepairPass) {
458
- const pass = runBrokenRefRepairPass(children, scanState, metrics, facts, hooks)
536
+ bumpBrokenRefMetric(metrics, 'brokenRefPasses', 'budgeted')
537
+ bumpBrokenRefMetric(metrics, 'brokenRefPasses', 'executed')
538
+
539
+ let pass = runBrokenRefRepairPass(children, scanState, metrics, facts, hooks)
540
+ if (!pass.didRepair) {
541
+ bumpBrokenRefMetric(metrics, 'brokenRefPasses', 'stopped-no-repair')
542
+ return buildBrokenRefRepairsResult(changed, pass)
543
+ }
544
+
545
+ changed = true
546
+ bumpBrokenRefMetric(metrics, 'brokenRefPasses', 'repaired')
547
+
548
+ const remainingBudget = countGuardedBrokenRefRepairPasses(children, scanState, facts, hooks)
549
+ if (remainingBudget > 0) {
550
+ bumpBrokenRefMetric(metrics, 'brokenRefPasses', 'budgeted', remainingBudget)
551
+ }
552
+
553
+ let repairPassCount = 0
554
+ while (repairPassCount < remainingBudget) {
555
+ bumpBrokenRefMetric(metrics, 'brokenRefPasses', 'executed')
556
+ pass = runBrokenRefRepairPass(children, scanState, metrics, facts, hooks)
459
557
  if (!pass.didRepair) {
460
- return {
461
- changed,
462
- hasBracketText: pass.hasBracketText,
463
- hasEmphasis: pass.hasEmphasis,
464
- hasLinkClose: pass.hasLinkClose
465
- }
558
+ bumpBrokenRefMetric(metrics, 'brokenRefPasses', 'stopped-no-repair')
559
+ return buildBrokenRefRepairsResult(changed, pass)
466
560
  }
467
561
  changed = true
468
562
  repairPassCount++
563
+ bumpBrokenRefMetric(metrics, 'brokenRefPasses', 'repaired')
469
564
  }
470
565
  const finalSignals = collectBrokenRefPassSignals(children, createBrokenRefSignalSeed(facts))
471
- return {
472
- changed,
473
- hasBracketText: finalSignals.hasBracketText,
474
- hasEmphasis: finalSignals.hasEmphasis,
475
- hasLinkClose: finalSignals.hasLinkClose
476
- }
566
+ return buildBrokenRefRepairsResult(changed, finalSignals)
477
567
  }
478
568
 
479
569
  export {
480
- computeMaxBrokenRefRepairPass,
481
570
  runBrokenRefRepairs
482
571
  }
@@ -0,0 +1,50 @@
1
+ const fallbackMarkupByType = (type) => {
2
+ if (type === 'strong_open' || type === 'strong_close') return '**'
3
+ if (type === 'em_open' || type === 'em_close') return '*'
4
+ return ''
5
+ }
6
+
7
+ const makeTokenLiteralText = (token) => {
8
+ if (!token) return
9
+ const literal = token.markup || fallbackMarkupByType(token.type)
10
+ token.type = 'text'
11
+ token.tag = ''
12
+ token.nesting = 0
13
+ token.content = literal
14
+ token.markup = ''
15
+ token.info = ''
16
+ }
17
+
18
+ const sanitizeEmStrongBalance = (tokens, onChangeStart = null) => {
19
+ if (!tokens || tokens.length === 0) return false
20
+ const stack = []
21
+ let changed = false
22
+ for (let i = 0; i < tokens.length; i++) {
23
+ const token = tokens[i]
24
+ if (!token || !token.type) continue
25
+ if (token.type === 'strong_open' || token.type === 'em_open') {
26
+ stack.push({ type: token.type, idx: i })
27
+ continue
28
+ }
29
+ if (token.type !== 'strong_close' && token.type !== 'em_close') continue
30
+ const expected = token.type === 'strong_close' ? 'strong_open' : 'em_open'
31
+ if (stack.length > 0 && stack[stack.length - 1].type === expected) {
32
+ stack.pop()
33
+ continue
34
+ }
35
+ if (onChangeStart) onChangeStart(i)
36
+ makeTokenLiteralText(token)
37
+ changed = true
38
+ }
39
+ for (let i = stack.length - 1; i >= 0; i--) {
40
+ const entry = stack[i]
41
+ const token = tokens[entry.idx]
42
+ if (!token) continue
43
+ if (onChangeStart) onChangeStart(entry.idx)
44
+ makeTokenLiteralText(token)
45
+ changed = true
46
+ }
47
+ return changed
48
+ }
49
+
50
+ export { sanitizeEmStrongBalance }
@@ -1,9 +1,5 @@
1
1
  import Token from 'markdown-it/lib/token.mjs'
2
-
3
- const cloneMap = (map) => {
4
- if (!map || !Array.isArray(map)) return null
5
- return [map[0], map[1]]
6
- }
2
+ import { cloneMap } from '../token-utils.js'
7
3
 
8
4
  const cloneTextLike = (source, content) => {
9
5
  const token = new Token('text', '', 0)
@@ -1,6 +1,11 @@
1
1
  import { isJapaneseChar } from '../token-utils.js'
2
2
 
3
3
  const CHAR_ASTERISK = 0x2A // *
4
+ const INLINE_REPAIR_EM_OUTER_STRONG_SEQUENCE = 1 << 0
5
+ const INLINE_REPAIR_TAIL_AFTER_LINK = 1 << 1
6
+ const INLINE_REPAIR_LEADING_ASTERISK_EM = 1 << 2
7
+ const INLINE_REPAIR_TRAILING_STRONG = 1 << 3
8
+ const INLINE_REPAIR_BALANCE_SANITIZE = 1 << 4
4
9
 
5
10
  const hasMarkerChars = (text) => {
6
11
  return !!text && text.indexOf('*') !== -1
@@ -389,13 +394,9 @@ const hasBrokenRefImmediateRewriteSignal = (wrapperSignals) => {
389
394
  return wrapperSignals.hasImbalance && hasBrokenRefExplicitAsteriskSignal(wrapperSignals)
390
395
  }
391
396
 
392
- const shouldRejectBalancedBrokenRefRewrite = (wrapperSignals) => {
393
- return !wrapperSignals.hasImbalance && hasBrokenRefExplicitAsteriskSignal(wrapperSignals)
394
- }
395
-
396
397
  const shouldAttemptBrokenRefRewriteFromSignals = (wrapperSignals) => {
397
398
  if (hasBrokenRefImmediateRewriteSignal(wrapperSignals)) return true
398
- if (shouldRejectBalancedBrokenRefRewrite(wrapperSignals)) return false
399
+ if (!wrapperSignals.hasImbalance && hasBrokenRefExplicitAsteriskSignal(wrapperSignals)) return false
399
400
  return hasBrokenRefStrongRunEvidence(wrapperSignals)
400
401
  }
401
402
 
@@ -413,16 +414,47 @@ const shouldAttemptBrokenRefRewrite = (
413
414
  return shouldAttemptBrokenRefRewriteFromSignals(signals)
414
415
  }
415
416
 
416
- const scanInlinePostprocessSignals = (children) => {
417
+ const scanInlinePostprocessSignals = (children, collectJapaneseContext = false) => {
417
418
  let hasEmphasis = false
418
419
  let hasLinkOpen = false
419
420
  let hasLinkClose = false
420
421
  let hasCodeInline = false
422
+ let hasJapaneseContext = false
423
+ let hasTextStrongMarker = false
424
+ let strongOpenCount = 0
425
+ let strongCloseCount = 0
426
+ let emOpenCount = 0
427
+ let emCloseCount = 0
428
+ let hasAsteriskWrapperImbalance = false
429
+ const emphasisStack = []
421
430
  for (let j = 0; j < children.length; j++) {
422
431
  const child = children[j]
423
432
  if (!child) continue
424
- if (!hasEmphasis && isAsteriskEmphasisToken(child)) {
433
+ if (collectJapaneseContext && !hasJapaneseContext && tokenHasJapaneseChars(child)) {
434
+ hasJapaneseContext = true
435
+ }
436
+ if (!hasTextStrongMarker && child.type === 'text' && child.content && child.content.indexOf('**') !== -1) {
437
+ hasTextStrongMarker = true
438
+ }
439
+ const isAsteriskEmphasis = isAsteriskEmphasisToken(child)
440
+ if (isAsteriskEmphasis) {
425
441
  hasEmphasis = true
442
+ if (child.type === 'strong_open') strongOpenCount++
443
+ else if (child.type === 'strong_close') strongCloseCount++
444
+ else if (child.type === 'em_open') emOpenCount++
445
+ else if (child.type === 'em_close') emCloseCount++
446
+ if (!hasAsteriskWrapperImbalance) {
447
+ if (child.type === 'strong_open' || child.type === 'em_open') {
448
+ emphasisStack.push(child.type)
449
+ } else {
450
+ const expected = child.type === 'strong_close' ? 'strong_open' : 'em_open'
451
+ if (emphasisStack.length > 0 && emphasisStack[emphasisStack.length - 1] === expected) {
452
+ emphasisStack.pop()
453
+ } else {
454
+ hasAsteriskWrapperImbalance = true
455
+ }
456
+ }
457
+ }
426
458
  }
427
459
  if (!hasLinkOpen && child.type === 'link_open') {
428
460
  hasLinkOpen = true
@@ -433,13 +465,34 @@ const scanInlinePostprocessSignals = (children) => {
433
465
  if (!hasCodeInline && child.type === 'code_inline') {
434
466
  hasCodeInline = true
435
467
  }
436
- if (hasEmphasis && hasLinkOpen && hasLinkClose) break
468
+ }
469
+ if (!hasAsteriskWrapperImbalance && emphasisStack.length > 0) {
470
+ hasAsteriskWrapperImbalance = true
471
+ }
472
+ let repairMask = 0
473
+ if (emOpenCount >= 2 && emCloseCount >= 2 && strongOpenCount > 0) {
474
+ repairMask |= INLINE_REPAIR_EM_OUTER_STRONG_SEQUENCE
475
+ }
476
+ if (hasLinkClose && strongCloseCount > 0) {
477
+ repairMask |= INLINE_REPAIR_TAIL_AFTER_LINK
478
+ }
479
+ if (hasLinkClose && emCloseCount > 0) {
480
+ repairMask |= INLINE_REPAIR_LEADING_ASTERISK_EM
481
+ }
482
+ if (emOpenCount > 0 && emCloseCount > 0 && hasTextStrongMarker) {
483
+ repairMask |= INLINE_REPAIR_TRAILING_STRONG
484
+ }
485
+ if (hasAsteriskWrapperImbalance) {
486
+ repairMask |= INLINE_REPAIR_BALANCE_SANITIZE
437
487
  }
438
488
  return {
439
489
  hasEmphasis,
440
490
  hasLinkOpen,
441
491
  hasLinkClose,
442
- hasCodeInline
492
+ hasCodeInline,
493
+ hasJapaneseContext,
494
+ repairMask,
495
+ hasAsteriskWrapperImbalance
443
496
  }
444
497
  }
445
498
 
@@ -451,5 +504,10 @@ export {
451
504
  buildAsteriskWrapperPrefixStats,
452
505
  buildBrokenRefWrapperRangeSignals,
453
506
  shouldAttemptBrokenRefRewrite,
454
- scanInlinePostprocessSignals
507
+ scanInlinePostprocessSignals,
508
+ INLINE_REPAIR_EM_OUTER_STRONG_SEQUENCE,
509
+ INLINE_REPAIR_TAIL_AFTER_LINK,
510
+ INLINE_REPAIR_LEADING_ASTERISK_EM,
511
+ INLINE_REPAIR_TRAILING_STRONG,
512
+ INLINE_REPAIR_BALANCE_SANITIZE
455
513
  }
@@ -1,6 +1,6 @@
1
1
  import Token from 'markdown-it/lib/token.mjs'
2
2
  import { buildLinkCloseMap, convertCollapsedReferenceLinks, mergeBrokenMarksAroundLinks } from '../token-link-utils.js'
3
- import { computeMaxBrokenRefRepairPass, runBrokenRefRepairs } from './broken-ref.js'
3
+ import { runBrokenRefRepairs } from './broken-ref.js'
4
4
  import {
5
5
  rebuildInlineLevels,
6
6
  rebuildInlineLevelsFrom,
@@ -11,68 +11,35 @@ import {
11
11
  import {
12
12
  getRuntimeOpt,
13
13
  hasRuntimeOverride,
14
- getReferenceCount
14
+ getReferenceCount,
15
+ isAsciiWordCode,
16
+ isSoftSpaceCode,
17
+ cloneMap
15
18
  } from '../token-utils.js'
16
19
  import {
17
20
  hasMarkerChars,
18
21
  hasJapaneseContextInRange,
19
22
  hasEmphasisSignalInRange,
20
23
  buildAsteriskWrapperPrefixStats,
21
- scanInlinePostprocessSignals
24
+ scanInlinePostprocessSignals,
25
+ INLINE_REPAIR_EM_OUTER_STRONG_SEQUENCE,
26
+ INLINE_REPAIR_TAIL_AFTER_LINK,
27
+ INLINE_REPAIR_LEADING_ASTERISK_EM,
28
+ INLINE_REPAIR_TRAILING_STRONG,
29
+ INLINE_REPAIR_BALANCE_SANITIZE
22
30
  } from './guards.js'
23
31
  import {
24
32
  tryFixTailPatternTokenOnly,
25
33
  tryFixTailDanglingStrongCloseTokenOnly
26
34
  } from './fastpaths.js'
35
+ import { sanitizeEmStrongBalance } from './emphasis-balance.js'
27
36
 
28
- const fallbackMarkupByType = (type) => {
29
- if (type === 'strong_open' || type === 'strong_close') return '**'
30
- if (type === 'em_open' || type === 'em_close') return '*'
31
- return ''
32
- }
33
-
34
- const makeTokenLiteralText = (token) => {
35
- if (!token) return
36
- const literal = token.markup || fallbackMarkupByType(token.type)
37
- token.type = 'text'
38
- token.tag = ''
39
- token.nesting = 0
40
- token.content = literal
41
- token.markup = ''
42
- token.info = ''
43
- }
44
-
45
- const sanitizeEmStrongBalance = (tokens, onChangeStart = null) => {
46
- if (!tokens || tokens.length === 0) return false
47
- const stack = []
48
- let changed = false
49
- for (let i = 0; i < tokens.length; i++) {
50
- const token = tokens[i]
51
- if (!token || !token.type) continue
52
- if (token.type === 'strong_open' || token.type === 'em_open') {
53
- stack.push({ type: token.type, idx: i })
54
- continue
55
- }
56
- if (token.type !== 'strong_close' && token.type !== 'em_close') continue
57
- const expected = token.type === 'strong_close' ? 'strong_open' : 'em_open'
58
- if (stack.length > 0 && stack[stack.length - 1].type === expected) {
59
- stack.pop()
60
- continue
61
- }
62
- if (onChangeStart) onChangeStart(i)
63
- makeTokenLiteralText(token)
64
- changed = true
65
- }
66
- for (let i = stack.length - 1; i >= 0; i--) {
67
- const entry = stack[i]
68
- const token = tokens[entry.idx]
69
- if (!token) continue
70
- if (onChangeStart) onChangeStart(entry.idx)
71
- makeTokenLiteralText(token)
72
- changed = true
73
- }
74
- return changed
75
- }
37
+ const INLINE_REPAIR_ALL_EMPHASIS_FIXERS =
38
+ INLINE_REPAIR_EM_OUTER_STRONG_SEQUENCE |
39
+ INLINE_REPAIR_TAIL_AFTER_LINK |
40
+ INLINE_REPAIR_LEADING_ASTERISK_EM |
41
+ INLINE_REPAIR_TRAILING_STRONG |
42
+ INLINE_REPAIR_BALANCE_SANITIZE
76
43
 
77
44
  const getPostprocessMetrics = (state) => {
78
45
  if (!state || !state.env) return null
@@ -81,14 +48,17 @@ const getPostprocessMetrics = (state) => {
81
48
  return metrics
82
49
  }
83
50
 
84
- const buildInlinePostprocessFacts = (children, inlineContent) => {
85
- const preScan = scanInlinePostprocessSignals(children)
51
+ const buildInlinePostprocessFacts = (children, inlineContent, collectJapaneseContext) => {
52
+ const preScan = scanInlinePostprocessSignals(children, collectJapaneseContext)
86
53
  return {
87
54
  hasBracketText: inlineContent.indexOf('[') !== -1 || inlineContent.indexOf(']') !== -1,
88
55
  hasEmphasis: preScan.hasEmphasis,
56
+ hasAsteriskWrapperImbalance: preScan.hasAsteriskWrapperImbalance,
89
57
  hasLinkOpen: preScan.hasLinkOpen,
90
58
  hasLinkClose: preScan.hasLinkClose,
91
59
  hasCodeInline: preScan.hasCodeInline,
60
+ hasJapaneseContext: preScan.hasJapaneseContext,
61
+ repairMask: preScan.repairMask,
92
62
  linkCloseMap: undefined,
93
63
  wrapperPrefixStats: undefined,
94
64
  rebuildLevelStart: undefined
@@ -159,14 +129,14 @@ const BROKEN_REF_REPAIR_HOOKS = {
159
129
  markLevelRebuildFrom: markInlineLevelRebuildFrom
160
130
  }
161
131
 
162
- const bumpPostprocessMetric = (metrics, bucket, key) => {
163
- if (!metrics || !bucket || !key) return
132
+ const bumpPostprocessMetric = (metrics, bucket, key, delta = 1) => {
133
+ if (!metrics || !bucket || !key || delta <= 0) return
164
134
  let table = metrics[bucket]
165
135
  if (!table || typeof table !== 'object') {
166
136
  table = Object.create(null)
167
137
  metrics[bucket] = table
168
138
  }
169
- table[key] = (table[key] || 0) + 1
139
+ table[key] = (table[key] || 0) + delta
170
140
  }
171
141
 
172
142
  const scanTailRepairCandidateAfterLinkClose = (tokens, linkCloseIdx) => {
@@ -232,11 +202,6 @@ const fixTailAfterLinkStrongClose = (tokens, isJapaneseMode, metrics = null, onC
232
202
  return false
233
203
  }
234
204
 
235
- const cloneMap = (map) => {
236
- if (!map || !Array.isArray(map)) return null
237
- return [map[0], map[1]]
238
- }
239
-
240
205
  const cloneTextToken = (source, content) => {
241
206
  const token = new Token('text', '', 0)
242
207
  Object.assign(token, source)
@@ -245,19 +210,9 @@ const cloneTextToken = (source, content) => {
245
210
  return token
246
211
  }
247
212
 
248
- const isSoftSpaceCode = (code) => {
249
- return code === 0x20 || code === 0x09 || code === 0x3000
250
- }
251
-
252
213
  const CHAR_ASTERISK = 0x2A // *
253
214
  const CHAR_BACKSLASH = 0x5C // \
254
215
 
255
- const isAsciiWordCode = (code) => {
256
- return (code >= 0x30 && code <= 0x39) ||
257
- (code >= 0x41 && code <= 0x5A) ||
258
- (code >= 0x61 && code <= 0x7A)
259
- }
260
-
261
216
  const textEndsAsciiWord = (text) => {
262
217
  if (!text || text.length === 0) return false
263
218
  return isAsciiWordCode(text.charCodeAt(text.length - 1))
@@ -459,46 +414,78 @@ const shouldRunInlineBrokenRefRepair = (facts, inlineContent, state) => {
459
414
  return getReferenceCount(state) > 0
460
415
  }
461
416
 
462
- const applyBrokenRefRepairFacts = (facts, repairs) => {
463
- if (!facts || !repairs) return
464
- facts.hasBracketText = repairs.hasBracketText
465
- facts.hasEmphasis = repairs.hasEmphasis
466
- facts.hasLinkClose = repairs.hasLinkClose
467
- }
468
-
469
- const createBrokenRefScanState = () => {
470
- return { depth: 0, brokenEnd: false, tailOpen: -1 }
471
- }
472
-
473
417
  const runInlineBrokenRefRepairStage = (children, facts, inlineContent, state) => {
474
418
  if (!shouldRunInlineBrokenRefRepair(facts, inlineContent, state)) return false
475
- const scanState = createBrokenRefScanState()
476
- const maxRepairPass = computeMaxBrokenRefRepairPass(children, scanState)
477
- if (maxRepairPass <= 0) return false
419
+ const scanState = { depth: 0, brokenEnd: false, tailOpen: -1 }
478
420
  const repairs = runBrokenRefRepairs(
479
421
  children,
480
- maxRepairPass,
481
422
  scanState,
482
423
  getPostprocessMetrics(state),
483
424
  facts,
484
425
  BROKEN_REF_REPAIR_HOOKS
485
426
  )
486
- applyBrokenRefRepairFacts(facts, repairs)
427
+ facts.hasBracketText = repairs.hasBracketText
428
+ facts.hasEmphasis = repairs.hasEmphasis
429
+ facts.hasLinkClose = repairs.hasLinkClose
487
430
  return repairs.changed
488
431
  }
489
432
 
490
- const runInlineEmphasisRepairStage = (children, facts, state, isJapaneseMode) => {
433
+ const runInlineEmphasisRepairStage = (
434
+ children,
435
+ facts,
436
+ state,
437
+ isJapaneseMode,
438
+ forceBalanceSanitize = false
439
+ ) => {
491
440
  if (!facts.hasEmphasis) return false
492
441
  let changed = false
493
442
  const markChangedFrom = createInlineChangeMarker(facts)
494
- if (fixEmOuterStrongSequence(children, markChangedFrom)) changed = true
443
+ const metrics = getPostprocessMetrics(state)
444
+ const repairMask = forceBalanceSanitize
445
+ ? INLINE_REPAIR_ALL_EMPHASIS_FIXERS
446
+ : (facts.repairMask || 0)
447
+ if ((repairMask & INLINE_REPAIR_EM_OUTER_STRONG_SEQUENCE) &&
448
+ fixEmOuterStrongSequence(children, markChangedFrom)) {
449
+ changed = true
450
+ bumpPostprocessMetric(metrics, 'emphasisFixers', 'em-outer-strong-sequence')
451
+ }
495
452
  if (facts.hasLinkClose) {
496
- const metrics = getPostprocessMetrics(state)
497
- if (fixTailAfterLinkStrongClose(children, isJapaneseMode, metrics, markChangedFrom)) changed = true
498
- if (fixLeadingAsteriskEm(children, markChangedFrom)) changed = true
453
+ if ((repairMask & INLINE_REPAIR_TAIL_AFTER_LINK) &&
454
+ fixTailAfterLinkStrongClose(children, isJapaneseMode, metrics, markChangedFrom)) {
455
+ changed = true
456
+ }
457
+ if ((repairMask & INLINE_REPAIR_LEADING_ASTERISK_EM) &&
458
+ fixLeadingAsteriskEm(children, markChangedFrom)) {
459
+ changed = true
460
+ bumpPostprocessMetric(metrics, 'emphasisFixers', 'leading-asterisk-em')
461
+ }
462
+ }
463
+ if ((repairMask & INLINE_REPAIR_TRAILING_STRONG) &&
464
+ fixTrailingStrong(children, markChangedFrom)) {
465
+ changed = true
466
+ bumpPostprocessMetric(metrics, 'emphasisFixers', 'trailing-strong')
467
+ }
468
+ const shouldAttemptSanitize = forceBalanceSanitize ||
469
+ changed ||
470
+ facts.hasAsteriskWrapperImbalance ||
471
+ (repairMask & INLINE_REPAIR_BALANCE_SANITIZE)
472
+ if (!shouldAttemptSanitize) {
473
+ bumpPostprocessMetric(metrics, 'emphasisSanitize', 'skipped-balanced')
474
+ return changed
475
+ }
476
+ bumpPostprocessMetric(metrics, 'emphasisSanitize', 'attempted')
477
+ if (forceBalanceSanitize || changed) {
478
+ bumpPostprocessMetric(metrics, 'emphasisSanitize', 'attempted-after-change')
479
+ } else {
480
+ bumpPostprocessMetric(metrics, 'emphasisSanitize', 'attempted-pre-scan-risk')
481
+ }
482
+ if (sanitizeEmStrongBalance(children, markChangedFrom)) {
483
+ changed = true
484
+ bumpPostprocessMetric(metrics, 'emphasisFixers', 'sanitize-em-strong-balance')
485
+ bumpPostprocessMetric(metrics, 'emphasisSanitize', 'repaired')
486
+ } else {
487
+ bumpPostprocessMetric(metrics, 'emphasisSanitize', 'no-change')
499
488
  }
500
- if (fixTrailingStrong(children, markChangedFrom)) changed = true
501
- if (sanitizeEmStrongBalance(children, markChangedFrom)) changed = true
502
489
  return changed
503
490
  }
504
491
 
@@ -507,33 +494,17 @@ const shouldRunInlineCollapsedRefRepair = (facts, state) => {
507
494
  return getReferenceCount(state) > 0
508
495
  }
509
496
 
510
- const applyCollapsedRefRepairFacts = (facts) => {
511
- if (!facts) return
512
- facts.hasLinkOpen = true
513
- facts.hasLinkClose = true
514
- }
515
-
516
- const rewriteInlineCollapsedReferences = (children, facts, state, markChangedFrom) => {
517
- const changed = convertCollapsedReferenceLinks(
518
- children,
519
- state,
520
- facts,
521
- markChangedFrom
522
- )
523
- if (!changed) return false
524
- applyCollapsedRefRepairFacts(facts)
525
- return true
526
- }
527
-
528
497
  const runInlineCollapsedRefStage = (children, facts, state) => {
529
498
  if (!shouldRunInlineCollapsedRefRepair(facts, state)) return false
530
499
  const markChangedFrom = createInlineChangeMarker(facts)
531
- if (!rewriteInlineCollapsedReferences(children, facts, state, markChangedFrom)) return false
500
+ if (!convertCollapsedReferenceLinks(children, state, facts, markChangedFrom)) return false
501
+ facts.hasLinkOpen = true
502
+ facts.hasLinkClose = true
532
503
  finalizeInlineLinkRepairStage(children, facts, markChangedFrom)
533
504
  return true
534
505
  }
535
506
 
536
- const shouldSkipInlinePostprocessToken = (children, facts, isJapaneseMode) => {
507
+ const shouldSkipInlinePostprocessToken = (facts, isJapaneseMode) => {
537
508
  if (!facts.hasEmphasis &&
538
509
  !facts.hasBracketText &&
539
510
  !facts.hasLinkOpen &&
@@ -541,8 +512,7 @@ const shouldSkipInlinePostprocessToken = (children, facts, isJapaneseMode) => {
541
512
  !facts.hasCodeInline) {
542
513
  return true
543
514
  }
544
- if (isJapaneseMode &&
545
- !hasJapaneseContextInRange(children, 0, children.length - 1)) {
515
+ if (isJapaneseMode && !facts.hasJapaneseContext) {
546
516
  return true
547
517
  }
548
518
  return false
@@ -569,7 +539,7 @@ const runInlineCoreRepairStages = (
569
539
  return false
570
540
  }
571
541
  if (runInlineBrokenRefRepairStage(children, facts, inlineContent, state)) changed = true
572
- if (runInlineEmphasisRepairStage(children, facts, state, isJapaneseMode)) changed = true
542
+ if (runInlineEmphasisRepairStage(children, facts, state, isJapaneseMode, changed)) changed = true
573
543
  return changed
574
544
  }
575
545
 
@@ -583,8 +553,8 @@ const processInlinePostprocessToken = (
583
553
  ) => {
584
554
  if (!token || token.type !== 'inline' || !token.children || token.children.length === 0) return
585
555
  const children = token.children
586
- const facts = buildInlinePostprocessFacts(children, inlineContent)
587
- if (shouldSkipInlinePostprocessToken(children, facts, isJapaneseMode)) return
556
+ const facts = buildInlinePostprocessFacts(children, inlineContent, isJapaneseMode)
557
+ if (shouldSkipInlinePostprocessToken(facts, isJapaneseMode)) return
588
558
  const changed = runInlineCoreRepairStages(
589
559
  children,
590
560
  facts,
@@ -1,10 +1,10 @@
1
- const CHAR_ASTERISK = 0x2A // *
2
- const CHAR_SPACE = 0x20 // ' '
3
- const CHAR_TAB = 0x09 // '\t'
4
- const CHAR_NEWLINE = 0x0A // '\n'
5
- const CHAR_IDEOGRAPHIC_SPACE = 0x3000 // fullwidth space
6
- const MODE_FLAG_COMPATIBLE = 1 << 0
7
- const MODE_FLAG_AGGRESSIVE = 1 << 1
1
+ const CHAR_ASTERISK = 0x2A // *
2
+ const CHAR_SPACE = 0x20 // ' '
3
+ const CHAR_TAB = 0x09 // '\t'
4
+ const CHAR_NEWLINE = 0x0A // '\n'
5
+ const CHAR_IDEOGRAPHIC_SPACE = 0x3000 // fullwidth space
6
+ const MODE_FLAG_COMPATIBLE = 1 << 0
7
+ const MODE_FLAG_AGGRESSIVE = 1 << 1
8
8
  const MODE_FLAG_JAPANESE_BASE = 1 << 2
9
9
  const MODE_FLAG_JAPANESE_PLUS = 1 << 3
10
10
  const MODE_FLAG_JAPANESE_ANY = MODE_FLAG_JAPANESE_BASE | MODE_FLAG_JAPANESE_PLUS
@@ -16,25 +16,40 @@ const VALID_CANONICAL_MODES = new Set([
16
16
  'japanese-boundary',
17
17
  'japanese-boundary-guard'
18
18
  ])
19
- const REG_JAPANESE = /[\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Han}\u3000-\u303F\uFF00-\uFFEF]/u
20
- const REG_ATTRS = /{[^{}\n!@#%^&*()]+?}$/
21
-
19
+ const REG_JAPANESE = /[\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Han}\u3000-\u303F\uFF00-\uFFEF]/u
20
+ const REG_ATTRS = /{[^{}\n!@#%^&*()]+?}$/
21
+
22
22
  const isJapaneseChar = (ch) => {
23
- if (!ch) return false
24
- const code = typeof ch === 'string' ? ch.charCodeAt(0) : ch
25
- if (code < 128) return false
26
- if (code >= 0x3040 && code <= 0x309F) return true
27
- if (code >= 0x30A0 && code <= 0x30FF) return true
28
- // Han + CJK punctuation/fullwidth ranges are common hot-path hits.
29
- // Keep these as cheap numeric checks before the fallback regex.
30
- if (code >= 0x3400 && code <= 0x4DBF) return true
31
- if (code >= 0x4E00 && code <= 0x9FFF) return true
32
- if (code >= 0xF900 && code <= 0xFAFF) return true
33
- if (code >= 0x3000 && code <= 0x303F) return true
34
- if (code >= 0xFF00 && code <= 0xFFEF) return true
23
+ if (!ch) return false
24
+ const code = typeof ch === 'string' ? ch.charCodeAt(0) : ch
25
+ if (code < 128) return false
26
+ if (code >= 0x3040 && code <= 0x309F) return true
27
+ if (code >= 0x30A0 && code <= 0x30FF) return true
28
+ // Han + CJK punctuation/fullwidth ranges are common hot-path hits.
29
+ // Keep these as cheap numeric checks before the fallback regex.
30
+ if (code >= 0x3400 && code <= 0x4DBF) return true
31
+ if (code >= 0x4E00 && code <= 0x9FFF) return true
32
+ if (code >= 0xF900 && code <= 0xFAFF) return true
33
+ if (code >= 0x3000 && code <= 0x303F) return true
34
+ if (code >= 0xFF00 && code <= 0xFFEF) return true
35
35
  return REG_JAPANESE.test(String.fromCharCode(code))
36
36
  }
37
37
 
38
+ const isAsciiWordCode = (code) => {
39
+ return (code >= 0x30 && code <= 0x39) ||
40
+ (code >= 0x41 && code <= 0x5A) ||
41
+ (code >= 0x61 && code <= 0x7A)
42
+ }
43
+
44
+ const isSoftSpaceCode = (code) => {
45
+ return code === CHAR_SPACE || code === CHAR_TAB || code === CHAR_IDEOGRAPHIC_SPACE
46
+ }
47
+
48
+ const cloneMap = (map) => {
49
+ if (!map || !Array.isArray(map)) return null
50
+ return [map[0], map[1]]
51
+ }
52
+
38
53
  const hasCjkBreaksRule = (md) => {
39
54
  if (!md || !md.core || !md.core.ruler || !Array.isArray(md.core.ruler.__rules__)) return false
40
55
  if (md.__strongJaHasCjkBreaks === true) return true
@@ -59,7 +74,7 @@ const hasCjkBreaksRule = (md) => {
59
74
  const isCjkBreaksRuleName = (name) => {
60
75
  return typeof name === 'string' && REG_CJK_BREAKS_RULE_NAME.test(name)
61
76
  }
62
-
77
+
63
78
  const resolveMode = (opt) => {
64
79
  const raw = opt && typeof opt.mode === 'string' ? opt.mode : 'japanese'
65
80
  const normalized = raw.toLowerCase()
@@ -70,9 +85,9 @@ const resolveMode = (opt) => {
70
85
  `mditStrongJa: unknown mode "${raw}". Valid modes: japanese, japanese-boundary, japanese-boundary-guard, aggressive, compatible`
71
86
  )
72
87
  }
73
-
74
- const getModeFlags = (mode) => {
75
- switch (mode) {
88
+
89
+ const getModeFlags = (mode) => {
90
+ switch (mode) {
76
91
  case 'compatible':
77
92
  return MODE_FLAG_COMPATIBLE
78
93
  case 'aggressive':
@@ -81,26 +96,26 @@ const getModeFlags = (mode) => {
81
96
  return MODE_FLAG_JAPANESE_BASE
82
97
  case 'japanese-boundary-guard':
83
98
  return MODE_FLAG_JAPANESE_PLUS
84
- default:
85
- return 0
86
- }
87
- }
88
-
89
- const deriveModeInfo = (opt) => {
90
- if (!opt || typeof opt !== 'object') return opt
91
- const rawMode = opt.mode
92
- if (opt.__strongJaModeRaw === rawMode &&
93
- typeof opt.__strongJaMode === 'string' &&
94
- typeof opt.__strongJaModeFlags === 'number') {
95
- return opt
96
- }
97
- const mode = resolveMode(opt)
98
- opt.__strongJaModeRaw = rawMode
99
- opt.__strongJaMode = mode
100
- opt.__strongJaModeFlags = getModeFlags(mode)
101
- return opt
102
- }
103
-
99
+ default:
100
+ return 0
101
+ }
102
+ }
103
+
104
+ const deriveModeInfo = (opt) => {
105
+ if (!opt || typeof opt !== 'object') return opt
106
+ const rawMode = opt.mode
107
+ if (opt.__strongJaModeRaw === rawMode &&
108
+ typeof opt.__strongJaMode === 'string' &&
109
+ typeof opt.__strongJaModeFlags === 'number') {
110
+ return opt
111
+ }
112
+ const mode = resolveMode(opt)
113
+ opt.__strongJaModeRaw = rawMode
114
+ opt.__strongJaMode = mode
115
+ opt.__strongJaModeFlags = getModeFlags(mode)
116
+ return opt
117
+ }
118
+
104
119
  const deriveOptionInfo = (opt) => {
105
120
  if (!opt || typeof opt !== 'object') return opt
106
121
  deriveModeInfo(opt)
@@ -135,21 +150,29 @@ const hasRuntimeOverride = (override) => {
135
150
 
136
151
  const getRuntimeOpt = (state, baseOpt) => {
137
152
  const override = state && state.env ? state.env.__strongJaTokenOpt : null
138
- if (!hasRuntimeOverride(override)) return deriveOptionInfo(baseOpt)
139
- if (state.__strongJaTokenRuntimeOpt &&
153
+ const hasOverride = hasRuntimeOverride(override)
154
+ if (state &&
155
+ state.__strongJaTokenRuntimeOpt &&
140
156
  state.__strongJaTokenRuntimeBase === baseOpt &&
141
- state.__strongJaTokenRuntimeOverride === override) {
157
+ state.__strongJaTokenRuntimeOverride === override &&
158
+ state.__strongJaTokenRuntimeHasOverride === hasOverride) {
142
159
  return state.__strongJaTokenRuntimeOpt
143
160
  }
144
- const merged = baseOpt && typeof baseOpt === 'object' ? { ...baseOpt } : {}
145
- if (HAS_OWN.call(override, 'mode') && override.mode !== undefined) merged.mode = override.mode
146
- if (HAS_OWN.call(override, 'postprocess') && override.postprocess !== undefined) merged.postprocess = override.postprocess
147
- state.__strongJaTokenRuntimeOpt = deriveOptionInfo(merged)
161
+ let resolved = deriveOptionInfo(baseOpt)
162
+ if (hasOverride) {
163
+ const merged = baseOpt && typeof baseOpt === 'object' ? { ...baseOpt } : {}
164
+ if (HAS_OWN.call(override, 'mode') && override.mode !== undefined) merged.mode = override.mode
165
+ if (HAS_OWN.call(override, 'postprocess') && override.postprocess !== undefined) merged.postprocess = override.postprocess
166
+ resolved = deriveOptionInfo(merged)
167
+ }
168
+ if (!state) return resolved
169
+ state.__strongJaTokenRuntimeOpt = resolved
148
170
  state.__strongJaTokenRuntimeBase = baseOpt
149
171
  state.__strongJaTokenRuntimeOverride = override
150
- return state.__strongJaTokenRuntimeOpt
172
+ state.__strongJaTokenRuntimeHasOverride = hasOverride
173
+ return resolved
151
174
  }
152
-
175
+
153
176
  const getReferenceCount = (state) => {
154
177
  if (!state) return 0
155
178
  let referenceCount = state.__strongJaReferenceCount
@@ -167,74 +190,77 @@ const getReferenceCount = (state) => {
167
190
  return referenceCount
168
191
  }
169
192
 
170
- function normalizeCoreRulesBeforePostprocess(value) {
171
- if (!value) return []
172
- const list = Array.isArray(value) ? value : [value]
173
- const normalized = []
174
- const seen = new Set()
175
- for (let idx = 0; idx < list.length; idx++) {
176
- const raw = list[idx]
177
- if (typeof raw !== 'string') continue
178
- const trimmed = raw.trim()
179
- if (!trimmed || seen.has(trimmed)) continue
180
- seen.add(trimmed)
181
- normalized.push(trimmed)
182
- }
183
- return normalized
184
- }
185
-
186
- function ensureCoreRuleOrder(md, ruleNames, targetRuleName) {
187
- if (!md || !md.core || !md.core.ruler) return
188
- if (!ruleNames || ruleNames.length === 0) return
189
- for (let idx = 0; idx < ruleNames.length; idx++) {
190
- moveRuleBefore(md.core.ruler, ruleNames[idx], targetRuleName)
191
- }
192
- }
193
-
194
- function moveRuleBefore(ruler, ruleName, beforeName) {
195
- if (!ruler || !ruler.__rules__) return
196
- const rules = ruler.__rules__
197
- let fromIdx = -1
198
- let beforeIdx = -1
199
- for (let idx = 0; idx < rules.length; idx++) {
200
- if (rules[idx].name === ruleName) fromIdx = idx
201
- if (rules[idx].name === beforeName) beforeIdx = idx
202
- if (fromIdx !== -1 && beforeIdx !== -1) break
203
- }
204
- // Ensure ruleName is before beforeName; keep existing order if already earlier.
205
- if (fromIdx === -1 || beforeIdx === -1 || fromIdx < beforeIdx) return
206
-
207
- const rule = rules.splice(fromIdx, 1)[0]
208
- rules.splice(beforeIdx, 0, rule)
209
- ruler.__cache__ = null
210
- }
211
-
212
- function moveRuleAfter(ruler, ruleName, afterName) {
213
- if (!ruler || !ruler.__rules__) return
214
- const rules = ruler.__rules__
215
- let fromIdx = -1
216
- let afterIdx = -1
217
- for (let idx = 0; idx < rules.length; idx++) {
218
- if (rules[idx].name === ruleName) fromIdx = idx
219
- if (rules[idx].name === afterName) afterIdx = idx
220
- if (fromIdx !== -1 && afterIdx !== -1) break
221
- }
222
- if (fromIdx === -1 || afterIdx === -1 || fromIdx === afterIdx + 1) return
223
-
224
- const rule = rules.splice(fromIdx, 1)[0]
225
- const targetIdx = fromIdx < afterIdx ? afterIdx - 1 : afterIdx
226
- rules.splice(targetIdx + 1, 0, rule)
227
- ruler.__cache__ = null
228
- }
229
-
230
- export {
231
- CHAR_ASTERISK,
232
- CHAR_SPACE,
233
- CHAR_TAB,
234
- CHAR_NEWLINE,
235
- CHAR_IDEOGRAPHIC_SPACE,
193
+ function normalizeCoreRulesBeforePostprocess(value) {
194
+ if (!value) return []
195
+ const list = Array.isArray(value) ? value : [value]
196
+ const normalized = []
197
+ const seen = new Set()
198
+ for (let idx = 0; idx < list.length; idx++) {
199
+ const raw = list[idx]
200
+ if (typeof raw !== 'string') continue
201
+ const trimmed = raw.trim()
202
+ if (!trimmed || seen.has(trimmed)) continue
203
+ seen.add(trimmed)
204
+ normalized.push(trimmed)
205
+ }
206
+ return normalized
207
+ }
208
+
209
+ function ensureCoreRuleOrder(md, ruleNames, targetRuleName) {
210
+ if (!md || !md.core || !md.core.ruler) return
211
+ if (!ruleNames || ruleNames.length === 0) return
212
+ for (let idx = 0; idx < ruleNames.length; idx++) {
213
+ moveRuleBefore(md.core.ruler, ruleNames[idx], targetRuleName)
214
+ }
215
+ }
216
+
217
+ function moveRuleBefore(ruler, ruleName, beforeName) {
218
+ if (!ruler || !ruler.__rules__) return
219
+ const rules = ruler.__rules__
220
+ let fromIdx = -1
221
+ let beforeIdx = -1
222
+ for (let idx = 0; idx < rules.length; idx++) {
223
+ if (rules[idx].name === ruleName) fromIdx = idx
224
+ if (rules[idx].name === beforeName) beforeIdx = idx
225
+ if (fromIdx !== -1 && beforeIdx !== -1) break
226
+ }
227
+ // Ensure ruleName is before beforeName; keep existing order if already earlier.
228
+ if (fromIdx === -1 || beforeIdx === -1 || fromIdx < beforeIdx) return
229
+
230
+ const rule = rules.splice(fromIdx, 1)[0]
231
+ rules.splice(beforeIdx, 0, rule)
232
+ ruler.__cache__ = null
233
+ }
234
+
235
+ function moveRuleAfter(ruler, ruleName, afterName) {
236
+ if (!ruler || !ruler.__rules__) return
237
+ const rules = ruler.__rules__
238
+ let fromIdx = -1
239
+ let afterIdx = -1
240
+ for (let idx = 0; idx < rules.length; idx++) {
241
+ if (rules[idx].name === ruleName) fromIdx = idx
242
+ if (rules[idx].name === afterName) afterIdx = idx
243
+ if (fromIdx !== -1 && afterIdx !== -1) break
244
+ }
245
+ if (fromIdx === -1 || afterIdx === -1 || fromIdx === afterIdx + 1) return
246
+
247
+ const rule = rules.splice(fromIdx, 1)[0]
248
+ const targetIdx = fromIdx < afterIdx ? afterIdx - 1 : afterIdx
249
+ rules.splice(targetIdx + 1, 0, rule)
250
+ ruler.__cache__ = null
251
+ }
252
+
253
+ export {
254
+ CHAR_ASTERISK,
255
+ CHAR_SPACE,
256
+ CHAR_TAB,
257
+ CHAR_NEWLINE,
258
+ CHAR_IDEOGRAPHIC_SPACE,
236
259
  REG_ATTRS,
237
260
  isJapaneseChar,
261
+ isAsciiWordCode,
262
+ isSoftSpaceCode,
263
+ cloneMap,
238
264
  hasCjkBreaksRule,
239
265
  isCjkBreaksRuleName,
240
266
  resolveMode,
@@ -243,14 +269,14 @@ export {
243
269
  deriveOptionInfo,
244
270
  hasRuntimeOverride,
245
271
  MODE_FLAG_COMPATIBLE,
246
- MODE_FLAG_AGGRESSIVE,
247
- MODE_FLAG_JAPANESE_BASE,
248
- MODE_FLAG_JAPANESE_PLUS,
272
+ MODE_FLAG_AGGRESSIVE,
273
+ MODE_FLAG_JAPANESE_BASE,
274
+ MODE_FLAG_JAPANESE_PLUS,
249
275
  MODE_FLAG_JAPANESE_ANY,
250
276
  getRuntimeOpt,
251
277
  getReferenceCount,
252
- normalizeCoreRulesBeforePostprocess,
253
- ensureCoreRuleOrder,
254
- moveRuleBefore,
255
- moveRuleAfter
256
- }
278
+ normalizeCoreRulesBeforePostprocess,
279
+ ensureCoreRuleOrder,
280
+ moveRuleBefore,
281
+ moveRuleAfter
282
+ }