@peaceroad/markdown-it-strong-ja 0.9.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/token-compat.js +1 -6
- package/src/token-core.js +33 -40
- package/src/token-link-utils.js +1 -6
- package/src/token-postprocess/broken-ref.js +113 -24
- package/src/token-postprocess/emphasis-balance.js +50 -0
- package/src/token-postprocess/fastpaths.js +1 -5
- package/src/token-postprocess/guards.js +68 -10
- package/src/token-postprocess/orchestrator.js +89 -119
- package/src/token-utils.js +155 -129
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@peaceroad/markdown-it-strong-ja",
|
|
3
3
|
"description": "Extends asterisk emphasis handling for Japanese text while keeping markdown-it behavior as close as practical.",
|
|
4
|
-
"version": "0.9.
|
|
4
|
+
"version": "0.9.1",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"type": "module",
|
|
7
7
|
"files": [
|
|
@@ -26,6 +26,7 @@
|
|
|
26
26
|
"test:all": "node test/test-all.js",
|
|
27
27
|
"bench:scan": "node test/material/perf-scan-delims.mjs",
|
|
28
28
|
"bench:postprocess": "node test/material/perf-postprocess.mjs",
|
|
29
|
+
"bench:isolated": "node test/material/bench-isolated.mjs",
|
|
29
30
|
"analyze:postprocess-calls": "node test/material/analyze-postprocess-calls.mjs",
|
|
30
31
|
"analyze:fastpath": "node test/material/analyze-fastpath-hits.mjs"
|
|
31
32
|
},
|
|
@@ -46,4 +47,3 @@
|
|
|
46
47
|
"p7d-markdown-it-p-captions": "^0.21.0"
|
|
47
48
|
}
|
|
48
49
|
}
|
|
49
|
-
|
package/src/token-compat.js
CHANGED
|
@@ -2,6 +2,7 @@ import Token from 'markdown-it/lib/token.mjs'
|
|
|
2
2
|
import {
|
|
3
3
|
REG_ATTRS,
|
|
4
4
|
isJapaneseChar,
|
|
5
|
+
isAsciiWordCode,
|
|
5
6
|
hasCjkBreaksRule,
|
|
6
7
|
isCjkBreaksRuleName,
|
|
7
8
|
getRuntimeOpt,
|
|
@@ -9,12 +10,6 @@ import {
|
|
|
9
10
|
moveRuleAfter
|
|
10
11
|
} from './token-utils.js'
|
|
11
12
|
|
|
12
|
-
const isAsciiWordCode = (code) => {
|
|
13
|
-
return (code >= 0x30 && code <= 0x39) ||
|
|
14
|
-
(code >= 0x41 && code <= 0x5A) ||
|
|
15
|
-
(code >= 0x61 && code <= 0x7A)
|
|
16
|
-
}
|
|
17
|
-
|
|
18
13
|
const trimTrailingSpaceTab = (text) => {
|
|
19
14
|
if (!text) return text
|
|
20
15
|
let end = text.length
|
package/src/token-core.js
CHANGED
|
@@ -2,16 +2,15 @@ import { isWhiteSpace } from 'markdown-it/lib/common/utils.mjs'
|
|
|
2
2
|
import Token from 'markdown-it/lib/token.mjs'
|
|
3
3
|
import {
|
|
4
4
|
CHAR_ASTERISK,
|
|
5
|
-
CHAR_SPACE,
|
|
6
|
-
CHAR_TAB,
|
|
7
5
|
CHAR_NEWLINE,
|
|
8
|
-
CHAR_IDEOGRAPHIC_SPACE,
|
|
9
6
|
isJapaneseChar,
|
|
7
|
+
isAsciiWordCode,
|
|
8
|
+
isSoftSpaceCode,
|
|
10
9
|
MODE_FLAG_COMPATIBLE,
|
|
11
10
|
MODE_FLAG_AGGRESSIVE,
|
|
12
11
|
MODE_FLAG_JAPANESE_PLUS,
|
|
13
|
-
|
|
14
|
-
|
|
12
|
+
hasRuntimeOverride,
|
|
13
|
+
getRuntimeOpt
|
|
15
14
|
} from './token-utils.js'
|
|
16
15
|
|
|
17
16
|
const SCAN_DELIMS_PATCHED = Symbol.for('strongJaTokenScanDelimsPatched')
|
|
@@ -20,10 +19,6 @@ const PREV_STAR_HAS_OPENER = 1
|
|
|
20
19
|
const PREV_STAR_HAS_JP_BETWEEN = 2
|
|
21
20
|
const SCAN_DELIMS_LOOKUP_KEY = Symbol.for('strongJaTokenScanDelimsLookup')
|
|
22
21
|
|
|
23
|
-
const isSoftSpaceCode = (code) => {
|
|
24
|
-
return code === CHAR_SPACE || code === CHAR_TAB || code === CHAR_IDEOGRAPHIC_SPACE
|
|
25
|
-
}
|
|
26
|
-
|
|
27
22
|
const isPlusQuoteWrapperOpen = (code) => {
|
|
28
23
|
return code === 0x2018 || // ‘
|
|
29
24
|
code === 0x201C || // “
|
|
@@ -256,12 +251,6 @@ const isSingleStarClosingBoundary = (code) => {
|
|
|
256
251
|
isClosingBracketLike(code)
|
|
257
252
|
}
|
|
258
253
|
|
|
259
|
-
const isAsciiAlphaNum = (code) => {
|
|
260
|
-
return (code >= 0x30 && code <= 0x39) ||
|
|
261
|
-
(code >= 0x41 && code <= 0x5A) ||
|
|
262
|
-
(code >= 0x61 && code <= 0x7A)
|
|
263
|
-
}
|
|
264
|
-
|
|
265
254
|
const isAsciiGuardOpenWrapper = (code) => {
|
|
266
255
|
return code === 0x22 || // "
|
|
267
256
|
code === 0x27 || // '
|
|
@@ -370,7 +359,7 @@ const hasAsciiStartAfterOptionalOpenWrappers = (src, index, max, lookupCache = n
|
|
|
370
359
|
if (i === -1) return false
|
|
371
360
|
}
|
|
372
361
|
if (i < 0 || i >= max) return false
|
|
373
|
-
return
|
|
362
|
+
return isAsciiWordCode(src.charCodeAt(i))
|
|
374
363
|
}
|
|
375
364
|
|
|
376
365
|
const hasAsciiEndBeforeOptionalCloseWrappers = (src, index, lookupCache = null) => {
|
|
@@ -383,7 +372,7 @@ const hasAsciiEndBeforeOptionalCloseWrappers = (src, index, lookupCache = null)
|
|
|
383
372
|
if (i === -1) return false
|
|
384
373
|
}
|
|
385
374
|
if (i < 0) return false
|
|
386
|
-
return
|
|
375
|
+
return isAsciiWordCode(src.charCodeAt(i))
|
|
387
376
|
}
|
|
388
377
|
|
|
389
378
|
const isMarkdownStructuralOpenWrapper = (code) => {
|
|
@@ -870,8 +859,10 @@ const patchScanDelims = (md) => {
|
|
|
870
859
|
if (!aggressiveMode && count === 1) {
|
|
871
860
|
// Keep local directionality to avoid degrading markdown-it-valid runs,
|
|
872
861
|
// e.g. `[。*a**](u)` where the first `*` should remain opener-only.
|
|
873
|
-
const
|
|
874
|
-
const
|
|
862
|
+
const rightIsOpenWrapper = isWrapperOpenLike(nextChar)
|
|
863
|
+
const leftIsCloseWrapper = isWrapperCloseLike(lastChar)
|
|
864
|
+
const rightIsBoundary = isSingleStarClosingBoundary(nextChar) || rightIsOpenWrapper
|
|
865
|
+
const leftIsBoundary = isSingleStarBoundary(lastChar) || leftIsCloseWrapper
|
|
875
866
|
if (leftJapanese && !rightJapanese && !rightIsBoundary) {
|
|
876
867
|
prevStarFlags = ensurePrevStarFlags(src, start, prevStarFlags)
|
|
877
868
|
if ((prevStarFlags & PREV_STAR_HAS_OPENER) === 0) {
|
|
@@ -880,28 +871,30 @@ const patchScanDelims = (md) => {
|
|
|
880
871
|
} else if (!leftJapanese && rightJapanese && !leftIsBoundary) {
|
|
881
872
|
relaxedOpen = false
|
|
882
873
|
}
|
|
883
|
-
const rightIsOpenWrapper = isWrapperOpenLike(nextChar)
|
|
884
|
-
const leftIsCloseWrapper = isWrapperCloseLike(lastChar)
|
|
885
|
-
prevStarFlags = ensurePrevStarFlags(src, start, prevStarFlags)
|
|
886
|
-
const hasPrevJapaneseOpener = (prevStarFlags & PREV_STAR_HAS_OPENER) !== 0
|
|
887
|
-
const hasJapaneseSincePrevStar = (prevStarFlags & PREV_STAR_HAS_JP_BETWEEN) !== 0
|
|
888
874
|
const leftIsExtraClosePunct = isExtraSingleStarClosePunct(lastChar)
|
|
889
|
-
const
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
875
|
+
const canCheckForceOpen =
|
|
876
|
+
leftJapanese && rightIsOpenWrapper && !isMarkdownStructuralOpenWrapper(nextChar)
|
|
877
|
+
const canCheckForceClose =
|
|
878
|
+
(leftIsCloseWrapper && rightJapanese) ||
|
|
879
|
+
((leftIsCloseWrapper || leftIsExtraClosePunct) && !rightJapanese && !rightIsBoundary)
|
|
880
|
+
if (canCheckForceOpen || canCheckForceClose) {
|
|
881
|
+
prevStarFlags = ensurePrevStarFlags(src, start, prevStarFlags)
|
|
882
|
+
const hasPrevJapaneseOpener = (prevStarFlags & PREV_STAR_HAS_OPENER) !== 0
|
|
883
|
+
const hasJapaneseSincePrevStar = (prevStarFlags & PREV_STAR_HAS_JP_BETWEEN) !== 0
|
|
884
|
+
const canForceCloseByPunct = leftIsExtraClosePunct && hasJapaneseSincePrevStar
|
|
885
|
+
if (canCheckForceOpen && !hasPrevJapaneseOpener) {
|
|
886
|
+
forceOpen = true
|
|
887
|
+
forceClose = false
|
|
888
|
+
} else if (leftIsCloseWrapper && rightJapanese && hasPrevJapaneseOpener) {
|
|
889
|
+
forceOpen = false
|
|
890
|
+
forceClose = true
|
|
891
|
+
} else if ((leftIsCloseWrapper || canForceCloseByPunct) &&
|
|
892
|
+
!rightJapanese &&
|
|
893
|
+
!rightIsBoundary &&
|
|
894
|
+
hasPrevJapaneseOpener) {
|
|
895
|
+
forceOpen = false
|
|
896
|
+
forceClose = true
|
|
897
|
+
}
|
|
905
898
|
}
|
|
906
899
|
}
|
|
907
900
|
const finalOpen = forceOpen === null ? ((base && base.can_open) || relaxedOpen) : forceOpen
|
package/src/token-link-utils.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import Token from 'markdown-it/lib/token.mjs'
|
|
2
2
|
import { isWhiteSpace } from 'markdown-it/lib/common/utils.mjs'
|
|
3
|
-
import { getReferenceCount } from './token-utils.js'
|
|
3
|
+
import { cloneMap, getReferenceCount } from './token-utils.js'
|
|
4
4
|
|
|
5
5
|
const CHAR_OPEN_BRACKET = 0x5B // [
|
|
6
6
|
const CHAR_CLOSE_BRACKET = 0x5D // ]
|
|
@@ -80,11 +80,6 @@ const getNormalizeRef = (state) => {
|
|
|
80
80
|
}
|
|
81
81
|
|
|
82
82
|
|
|
83
|
-
const cloneMap = (map) => {
|
|
84
|
-
if (!map || !Array.isArray(map)) return null
|
|
85
|
-
return [map[0], map[1]]
|
|
86
|
-
}
|
|
87
|
-
|
|
88
83
|
const getMapFromTokenRange = (tokens, startIdx, endIdx) => {
|
|
89
84
|
if (!tokens || startIdx > endIdx) return null
|
|
90
85
|
let startLine = null
|
|
@@ -115,14 +115,14 @@ const expandSegmentEndForWrapperBalance = (tokens, startIdx, endIdx) => {
|
|
|
115
115
|
return balance.total > 0 ? -1 : expandedEnd
|
|
116
116
|
}
|
|
117
117
|
|
|
118
|
-
const bumpBrokenRefMetric = (metrics, bucket, key) => {
|
|
119
|
-
if (!metrics || !bucket || !key) return
|
|
118
|
+
const bumpBrokenRefMetric = (metrics, bucket, key, delta = 1) => {
|
|
119
|
+
if (!metrics || !bucket || !key || delta <= 0) return
|
|
120
120
|
let table = metrics[bucket]
|
|
121
121
|
if (!table || typeof table !== 'object') {
|
|
122
122
|
table = Object.create(null)
|
|
123
123
|
metrics[bucket] = table
|
|
124
124
|
}
|
|
125
|
-
table[key] = (table[key] || 0) +
|
|
125
|
+
table[key] = (table[key] || 0) + delta
|
|
126
126
|
}
|
|
127
127
|
|
|
128
128
|
const ensureBrokenRefLinkCloseMap = (tokens, facts = null, hooks = null, fallbackCache = null) => {
|
|
@@ -190,6 +190,7 @@ const resolveBrokenRefCandidateGuardFlow = (
|
|
|
190
190
|
children,
|
|
191
191
|
brokenRefCandidate,
|
|
192
192
|
segmentEnd,
|
|
193
|
+
metrics = null,
|
|
193
194
|
facts = null,
|
|
194
195
|
hooks = null,
|
|
195
196
|
fallbackCache = null
|
|
@@ -203,6 +204,10 @@ const resolveBrokenRefCandidateGuardFlow = (
|
|
|
203
204
|
if (!wrapperSignals.hasTextMarker) {
|
|
204
205
|
return BROKEN_REF_FLOW_SKIP_NO_TEXT_MARKER
|
|
205
206
|
}
|
|
207
|
+
if (!hasBrokenRefActiveFastPathTokenSignal(wrapperSignals)) {
|
|
208
|
+
bumpBrokenRefMetric(metrics, 'brokenRefCandidateFlow', 'no-active-signature')
|
|
209
|
+
return BROKEN_REF_FLOW_SKIP_NO_ACTIVE_SIGNATURE
|
|
210
|
+
}
|
|
206
211
|
const wrapperPrefixStats = ensureBrokenRefWrapperPrefixStats(children, facts, hooks, fallbackCache)
|
|
207
212
|
if (!shouldAttemptBrokenRefRewrite(
|
|
208
213
|
children,
|
|
@@ -214,6 +219,7 @@ const resolveBrokenRefCandidateGuardFlow = (
|
|
|
214
219
|
)) {
|
|
215
220
|
return BROKEN_REF_FLOW_SKIP_GUARD
|
|
216
221
|
}
|
|
222
|
+
bumpBrokenRefMetric(metrics, 'brokenRefCandidateFlow', 'guard-passed')
|
|
217
223
|
return null
|
|
218
224
|
}
|
|
219
225
|
|
|
@@ -232,12 +238,16 @@ const resolveBrokenRefFastPathFlow = (
|
|
|
232
238
|
metrics,
|
|
233
239
|
bumpBrokenRefMetric
|
|
234
240
|
)
|
|
241
|
+
bumpBrokenRefMetric(metrics, 'brokenRefCandidateFlow', 'fastpath-dispatch')
|
|
235
242
|
if (fastPathResult === BROKEN_REF_FAST_PATH_RESULT_NO_ACTIVE_SIGNATURE) {
|
|
243
|
+
bumpBrokenRefMetric(metrics, 'brokenRefCandidateFlow', 'no-active-signature')
|
|
236
244
|
return BROKEN_REF_FLOW_SKIP_NO_ACTIVE_SIGNATURE
|
|
237
245
|
}
|
|
238
246
|
if (fastPathResult === BROKEN_REF_FAST_PATH_RESULT_NO_MATCH) {
|
|
247
|
+
bumpBrokenRefMetric(metrics, 'brokenRefCandidateFlow', 'no-fastpath-match')
|
|
239
248
|
return BROKEN_REF_FLOW_SKIP_NO_FASTPATH_MATCH
|
|
240
249
|
}
|
|
250
|
+
bumpBrokenRefMetric(metrics, 'brokenRefCandidateFlow', 'repaired')
|
|
241
251
|
return BROKEN_REF_FLOW_REPAIRED
|
|
242
252
|
}
|
|
243
253
|
|
|
@@ -256,6 +266,7 @@ const runBrokenRefCandidateRewrite = (
|
|
|
256
266
|
children,
|
|
257
267
|
brokenRefCandidate,
|
|
258
268
|
segmentEnd,
|
|
269
|
+
metrics,
|
|
259
270
|
facts,
|
|
260
271
|
hooks,
|
|
261
272
|
fallbackCache
|
|
@@ -308,7 +319,7 @@ const createBrokenRefPassSignals = (seedSignals = null) => {
|
|
|
308
319
|
const observeBrokenRefTextToken = (passSignals, candidateState, text, tokenIdx, scanState) => {
|
|
309
320
|
const hasOpenBracket = text.indexOf('[') !== -1
|
|
310
321
|
const hasCloseBracket = text.indexOf(']') !== -1
|
|
311
|
-
if (!passSignals.hasBracketText && (hasOpenBracket || hasCloseBracket)) {
|
|
322
|
+
if (passSignals && !passSignals.hasBracketText && (hasOpenBracket || hasCloseBracket)) {
|
|
312
323
|
passSignals.hasBracketText = true
|
|
313
324
|
}
|
|
314
325
|
if (candidateState.start === -1) {
|
|
@@ -383,6 +394,7 @@ const tryRepairBrokenRefCandidateAtLinkOpen = (
|
|
|
383
394
|
const closeIdx = linkCloseMap.get(childIdx) ?? -1
|
|
384
395
|
if (closeIdx === -1) return null
|
|
385
396
|
bumpBrokenRefMetric(metrics, 'brokenRefFlow', 'candidate')
|
|
397
|
+
bumpBrokenRefMetric(metrics, 'brokenRefCandidateFlow', 'candidate')
|
|
386
398
|
const flowResult = runBrokenRefCandidateRewrite(
|
|
387
399
|
children,
|
|
388
400
|
brokenRefCandidate,
|
|
@@ -437,46 +449,123 @@ const runBrokenRefRepairPass = (children, scanState, metrics = null, facts = nul
|
|
|
437
449
|
return buildBrokenRefRepairPassResult(false, passSignals)
|
|
438
450
|
}
|
|
439
451
|
|
|
440
|
-
const
|
|
452
|
+
const hasPotentialBrokenRefRepairPass = (children, scanState) => {
|
|
441
453
|
resetBrokenRefScanState(scanState)
|
|
442
|
-
let maxRepairPass = 0
|
|
443
454
|
for (let j = 0; j < children.length; j++) {
|
|
444
455
|
const child = children[j]
|
|
445
456
|
if (!child || child.type !== 'text' || !child.content) continue
|
|
446
457
|
if (child.content.indexOf('[') === -1) continue
|
|
447
458
|
if (scanBrokenRefState(child.content, scanState).brokenEnd) {
|
|
448
|
-
|
|
459
|
+
return true
|
|
449
460
|
}
|
|
450
461
|
}
|
|
451
|
-
return
|
|
462
|
+
return false
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
const hasBrokenRefActiveFastPathTokenSignal = (wrapperSignals) => {
|
|
466
|
+
if (!wrapperSignals) return false
|
|
467
|
+
// Current broken-ref fast paths are all strong-token driven.
|
|
468
|
+
return wrapperSignals.strongOpenInRange > 0 || wrapperSignals.strongCloseInRange > 0
|
|
452
469
|
}
|
|
453
470
|
|
|
454
|
-
const
|
|
471
|
+
const countGuardedBrokenRefRepairPasses = (children, scanState, facts = null, hooks = null) => {
|
|
472
|
+
resetBrokenRefScanState(scanState)
|
|
473
|
+
const brokenRefCandidate = resetBrokenRefCandidateState({ start: -1, depth: 0, startTextOffset: 0 })
|
|
474
|
+
const fallbackCache = {
|
|
475
|
+
linkCloseMap: undefined,
|
|
476
|
+
wrapperPrefixStats: undefined
|
|
477
|
+
}
|
|
455
478
|
let repairPassCount = 0
|
|
479
|
+
for (let j = 0; j < children.length; j++) {
|
|
480
|
+
const child = children[j]
|
|
481
|
+
if (!child) continue
|
|
482
|
+
if (child.type === 'text' && child.content) {
|
|
483
|
+
observeBrokenRefTextToken(null, brokenRefCandidate, child.content, j, scanState)
|
|
484
|
+
}
|
|
485
|
+
if (child.type !== 'link_open' || brokenRefCandidate.start === -1) continue
|
|
486
|
+
if (brokenRefCandidate.depth <= 0) {
|
|
487
|
+
resetBrokenRefCandidateState(brokenRefCandidate)
|
|
488
|
+
continue
|
|
489
|
+
}
|
|
490
|
+
const linkCloseMap = ensureBrokenRefLinkCloseMap(children, facts, hooks, fallbackCache)
|
|
491
|
+
const closeIdx = linkCloseMap.get(j) ?? -1
|
|
492
|
+
if (closeIdx === -1) continue
|
|
493
|
+
const segmentEnd = resolveBrokenRefSegmentEnd(children, brokenRefCandidate, closeIdx)
|
|
494
|
+
const wrapperSignals = buildBrokenRefWrapperRangeSignals(
|
|
495
|
+
children,
|
|
496
|
+
brokenRefCandidate.start,
|
|
497
|
+
segmentEnd,
|
|
498
|
+
brokenRefCandidate.startTextOffset
|
|
499
|
+
)
|
|
500
|
+
if (!wrapperSignals.hasTextMarker || !hasBrokenRefActiveFastPathTokenSignal(wrapperSignals)) {
|
|
501
|
+
resetBrokenRefCandidateState(brokenRefCandidate)
|
|
502
|
+
continue
|
|
503
|
+
}
|
|
504
|
+
const wrapperPrefixStats = ensureBrokenRefWrapperPrefixStats(children, facts, hooks, fallbackCache)
|
|
505
|
+
if (shouldAttemptBrokenRefRewrite(
|
|
506
|
+
children,
|
|
507
|
+
brokenRefCandidate.start,
|
|
508
|
+
segmentEnd,
|
|
509
|
+
brokenRefCandidate.startTextOffset,
|
|
510
|
+
wrapperPrefixStats,
|
|
511
|
+
wrapperSignals
|
|
512
|
+
)) {
|
|
513
|
+
repairPassCount++
|
|
514
|
+
}
|
|
515
|
+
resetBrokenRefCandidateState(brokenRefCandidate)
|
|
516
|
+
}
|
|
517
|
+
return repairPassCount
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
const buildBrokenRefRepairsResult = (changed, passSignals) => {
|
|
521
|
+
return {
|
|
522
|
+
changed,
|
|
523
|
+
hasBracketText: passSignals.hasBracketText,
|
|
524
|
+
hasEmphasis: passSignals.hasEmphasis,
|
|
525
|
+
hasLinkClose: passSignals.hasLinkClose
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
const runBrokenRefRepairs = (children, scanState, metrics = null, facts = null, hooks = null) => {
|
|
530
|
+
const seedSignals = createBrokenRefPassSignals(createBrokenRefSignalSeed(facts))
|
|
531
|
+
if (!hasPotentialBrokenRefRepairPass(children, scanState)) {
|
|
532
|
+
return buildBrokenRefRepairsResult(false, seedSignals)
|
|
533
|
+
}
|
|
534
|
+
|
|
456
535
|
let changed = false
|
|
457
|
-
|
|
458
|
-
|
|
536
|
+
bumpBrokenRefMetric(metrics, 'brokenRefPasses', 'budgeted')
|
|
537
|
+
bumpBrokenRefMetric(metrics, 'brokenRefPasses', 'executed')
|
|
538
|
+
|
|
539
|
+
let pass = runBrokenRefRepairPass(children, scanState, metrics, facts, hooks)
|
|
540
|
+
if (!pass.didRepair) {
|
|
541
|
+
bumpBrokenRefMetric(metrics, 'brokenRefPasses', 'stopped-no-repair')
|
|
542
|
+
return buildBrokenRefRepairsResult(changed, pass)
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
changed = true
|
|
546
|
+
bumpBrokenRefMetric(metrics, 'brokenRefPasses', 'repaired')
|
|
547
|
+
|
|
548
|
+
const remainingBudget = countGuardedBrokenRefRepairPasses(children, scanState, facts, hooks)
|
|
549
|
+
if (remainingBudget > 0) {
|
|
550
|
+
bumpBrokenRefMetric(metrics, 'brokenRefPasses', 'budgeted', remainingBudget)
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
let repairPassCount = 0
|
|
554
|
+
while (repairPassCount < remainingBudget) {
|
|
555
|
+
bumpBrokenRefMetric(metrics, 'brokenRefPasses', 'executed')
|
|
556
|
+
pass = runBrokenRefRepairPass(children, scanState, metrics, facts, hooks)
|
|
459
557
|
if (!pass.didRepair) {
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
hasBracketText: pass.hasBracketText,
|
|
463
|
-
hasEmphasis: pass.hasEmphasis,
|
|
464
|
-
hasLinkClose: pass.hasLinkClose
|
|
465
|
-
}
|
|
558
|
+
bumpBrokenRefMetric(metrics, 'brokenRefPasses', 'stopped-no-repair')
|
|
559
|
+
return buildBrokenRefRepairsResult(changed, pass)
|
|
466
560
|
}
|
|
467
561
|
changed = true
|
|
468
562
|
repairPassCount++
|
|
563
|
+
bumpBrokenRefMetric(metrics, 'brokenRefPasses', 'repaired')
|
|
469
564
|
}
|
|
470
565
|
const finalSignals = collectBrokenRefPassSignals(children, createBrokenRefSignalSeed(facts))
|
|
471
|
-
return
|
|
472
|
-
changed,
|
|
473
|
-
hasBracketText: finalSignals.hasBracketText,
|
|
474
|
-
hasEmphasis: finalSignals.hasEmphasis,
|
|
475
|
-
hasLinkClose: finalSignals.hasLinkClose
|
|
476
|
-
}
|
|
566
|
+
return buildBrokenRefRepairsResult(changed, finalSignals)
|
|
477
567
|
}
|
|
478
568
|
|
|
479
569
|
export {
|
|
480
|
-
computeMaxBrokenRefRepairPass,
|
|
481
570
|
runBrokenRefRepairs
|
|
482
571
|
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
const fallbackMarkupByType = (type) => {
|
|
2
|
+
if (type === 'strong_open' || type === 'strong_close') return '**'
|
|
3
|
+
if (type === 'em_open' || type === 'em_close') return '*'
|
|
4
|
+
return ''
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
const makeTokenLiteralText = (token) => {
|
|
8
|
+
if (!token) return
|
|
9
|
+
const literal = token.markup || fallbackMarkupByType(token.type)
|
|
10
|
+
token.type = 'text'
|
|
11
|
+
token.tag = ''
|
|
12
|
+
token.nesting = 0
|
|
13
|
+
token.content = literal
|
|
14
|
+
token.markup = ''
|
|
15
|
+
token.info = ''
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
const sanitizeEmStrongBalance = (tokens, onChangeStart = null) => {
|
|
19
|
+
if (!tokens || tokens.length === 0) return false
|
|
20
|
+
const stack = []
|
|
21
|
+
let changed = false
|
|
22
|
+
for (let i = 0; i < tokens.length; i++) {
|
|
23
|
+
const token = tokens[i]
|
|
24
|
+
if (!token || !token.type) continue
|
|
25
|
+
if (token.type === 'strong_open' || token.type === 'em_open') {
|
|
26
|
+
stack.push({ type: token.type, idx: i })
|
|
27
|
+
continue
|
|
28
|
+
}
|
|
29
|
+
if (token.type !== 'strong_close' && token.type !== 'em_close') continue
|
|
30
|
+
const expected = token.type === 'strong_close' ? 'strong_open' : 'em_open'
|
|
31
|
+
if (stack.length > 0 && stack[stack.length - 1].type === expected) {
|
|
32
|
+
stack.pop()
|
|
33
|
+
continue
|
|
34
|
+
}
|
|
35
|
+
if (onChangeStart) onChangeStart(i)
|
|
36
|
+
makeTokenLiteralText(token)
|
|
37
|
+
changed = true
|
|
38
|
+
}
|
|
39
|
+
for (let i = stack.length - 1; i >= 0; i--) {
|
|
40
|
+
const entry = stack[i]
|
|
41
|
+
const token = tokens[entry.idx]
|
|
42
|
+
if (!token) continue
|
|
43
|
+
if (onChangeStart) onChangeStart(entry.idx)
|
|
44
|
+
makeTokenLiteralText(token)
|
|
45
|
+
changed = true
|
|
46
|
+
}
|
|
47
|
+
return changed
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export { sanitizeEmStrongBalance }
|
|
@@ -1,9 +1,5 @@
|
|
|
1
1
|
import Token from 'markdown-it/lib/token.mjs'
|
|
2
|
-
|
|
3
|
-
const cloneMap = (map) => {
|
|
4
|
-
if (!map || !Array.isArray(map)) return null
|
|
5
|
-
return [map[0], map[1]]
|
|
6
|
-
}
|
|
2
|
+
import { cloneMap } from '../token-utils.js'
|
|
7
3
|
|
|
8
4
|
const cloneTextLike = (source, content) => {
|
|
9
5
|
const token = new Token('text', '', 0)
|
|
@@ -1,6 +1,11 @@
|
|
|
1
1
|
import { isJapaneseChar } from '../token-utils.js'
|
|
2
2
|
|
|
3
3
|
const CHAR_ASTERISK = 0x2A // *
|
|
4
|
+
const INLINE_REPAIR_EM_OUTER_STRONG_SEQUENCE = 1 << 0
|
|
5
|
+
const INLINE_REPAIR_TAIL_AFTER_LINK = 1 << 1
|
|
6
|
+
const INLINE_REPAIR_LEADING_ASTERISK_EM = 1 << 2
|
|
7
|
+
const INLINE_REPAIR_TRAILING_STRONG = 1 << 3
|
|
8
|
+
const INLINE_REPAIR_BALANCE_SANITIZE = 1 << 4
|
|
4
9
|
|
|
5
10
|
const hasMarkerChars = (text) => {
|
|
6
11
|
return !!text && text.indexOf('*') !== -1
|
|
@@ -389,13 +394,9 @@ const hasBrokenRefImmediateRewriteSignal = (wrapperSignals) => {
|
|
|
389
394
|
return wrapperSignals.hasImbalance && hasBrokenRefExplicitAsteriskSignal(wrapperSignals)
|
|
390
395
|
}
|
|
391
396
|
|
|
392
|
-
const shouldRejectBalancedBrokenRefRewrite = (wrapperSignals) => {
|
|
393
|
-
return !wrapperSignals.hasImbalance && hasBrokenRefExplicitAsteriskSignal(wrapperSignals)
|
|
394
|
-
}
|
|
395
|
-
|
|
396
397
|
const shouldAttemptBrokenRefRewriteFromSignals = (wrapperSignals) => {
|
|
397
398
|
if (hasBrokenRefImmediateRewriteSignal(wrapperSignals)) return true
|
|
398
|
-
if (
|
|
399
|
+
if (!wrapperSignals.hasImbalance && hasBrokenRefExplicitAsteriskSignal(wrapperSignals)) return false
|
|
399
400
|
return hasBrokenRefStrongRunEvidence(wrapperSignals)
|
|
400
401
|
}
|
|
401
402
|
|
|
@@ -413,16 +414,47 @@ const shouldAttemptBrokenRefRewrite = (
|
|
|
413
414
|
return shouldAttemptBrokenRefRewriteFromSignals(signals)
|
|
414
415
|
}
|
|
415
416
|
|
|
416
|
-
const scanInlinePostprocessSignals = (children) => {
|
|
417
|
+
const scanInlinePostprocessSignals = (children, collectJapaneseContext = false) => {
|
|
417
418
|
let hasEmphasis = false
|
|
418
419
|
let hasLinkOpen = false
|
|
419
420
|
let hasLinkClose = false
|
|
420
421
|
let hasCodeInline = false
|
|
422
|
+
let hasJapaneseContext = false
|
|
423
|
+
let hasTextStrongMarker = false
|
|
424
|
+
let strongOpenCount = 0
|
|
425
|
+
let strongCloseCount = 0
|
|
426
|
+
let emOpenCount = 0
|
|
427
|
+
let emCloseCount = 0
|
|
428
|
+
let hasAsteriskWrapperImbalance = false
|
|
429
|
+
const emphasisStack = []
|
|
421
430
|
for (let j = 0; j < children.length; j++) {
|
|
422
431
|
const child = children[j]
|
|
423
432
|
if (!child) continue
|
|
424
|
-
if (!
|
|
433
|
+
if (collectJapaneseContext && !hasJapaneseContext && tokenHasJapaneseChars(child)) {
|
|
434
|
+
hasJapaneseContext = true
|
|
435
|
+
}
|
|
436
|
+
if (!hasTextStrongMarker && child.type === 'text' && child.content && child.content.indexOf('**') !== -1) {
|
|
437
|
+
hasTextStrongMarker = true
|
|
438
|
+
}
|
|
439
|
+
const isAsteriskEmphasis = isAsteriskEmphasisToken(child)
|
|
440
|
+
if (isAsteriskEmphasis) {
|
|
425
441
|
hasEmphasis = true
|
|
442
|
+
if (child.type === 'strong_open') strongOpenCount++
|
|
443
|
+
else if (child.type === 'strong_close') strongCloseCount++
|
|
444
|
+
else if (child.type === 'em_open') emOpenCount++
|
|
445
|
+
else if (child.type === 'em_close') emCloseCount++
|
|
446
|
+
if (!hasAsteriskWrapperImbalance) {
|
|
447
|
+
if (child.type === 'strong_open' || child.type === 'em_open') {
|
|
448
|
+
emphasisStack.push(child.type)
|
|
449
|
+
} else {
|
|
450
|
+
const expected = child.type === 'strong_close' ? 'strong_open' : 'em_open'
|
|
451
|
+
if (emphasisStack.length > 0 && emphasisStack[emphasisStack.length - 1] === expected) {
|
|
452
|
+
emphasisStack.pop()
|
|
453
|
+
} else {
|
|
454
|
+
hasAsteriskWrapperImbalance = true
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
}
|
|
426
458
|
}
|
|
427
459
|
if (!hasLinkOpen && child.type === 'link_open') {
|
|
428
460
|
hasLinkOpen = true
|
|
@@ -433,13 +465,34 @@ const scanInlinePostprocessSignals = (children) => {
|
|
|
433
465
|
if (!hasCodeInline && child.type === 'code_inline') {
|
|
434
466
|
hasCodeInline = true
|
|
435
467
|
}
|
|
436
|
-
|
|
468
|
+
}
|
|
469
|
+
if (!hasAsteriskWrapperImbalance && emphasisStack.length > 0) {
|
|
470
|
+
hasAsteriskWrapperImbalance = true
|
|
471
|
+
}
|
|
472
|
+
let repairMask = 0
|
|
473
|
+
if (emOpenCount >= 2 && emCloseCount >= 2 && strongOpenCount > 0) {
|
|
474
|
+
repairMask |= INLINE_REPAIR_EM_OUTER_STRONG_SEQUENCE
|
|
475
|
+
}
|
|
476
|
+
if (hasLinkClose && strongCloseCount > 0) {
|
|
477
|
+
repairMask |= INLINE_REPAIR_TAIL_AFTER_LINK
|
|
478
|
+
}
|
|
479
|
+
if (hasLinkClose && emCloseCount > 0) {
|
|
480
|
+
repairMask |= INLINE_REPAIR_LEADING_ASTERISK_EM
|
|
481
|
+
}
|
|
482
|
+
if (emOpenCount > 0 && emCloseCount > 0 && hasTextStrongMarker) {
|
|
483
|
+
repairMask |= INLINE_REPAIR_TRAILING_STRONG
|
|
484
|
+
}
|
|
485
|
+
if (hasAsteriskWrapperImbalance) {
|
|
486
|
+
repairMask |= INLINE_REPAIR_BALANCE_SANITIZE
|
|
437
487
|
}
|
|
438
488
|
return {
|
|
439
489
|
hasEmphasis,
|
|
440
490
|
hasLinkOpen,
|
|
441
491
|
hasLinkClose,
|
|
442
|
-
hasCodeInline
|
|
492
|
+
hasCodeInline,
|
|
493
|
+
hasJapaneseContext,
|
|
494
|
+
repairMask,
|
|
495
|
+
hasAsteriskWrapperImbalance
|
|
443
496
|
}
|
|
444
497
|
}
|
|
445
498
|
|
|
@@ -451,5 +504,10 @@ export {
|
|
|
451
504
|
buildAsteriskWrapperPrefixStats,
|
|
452
505
|
buildBrokenRefWrapperRangeSignals,
|
|
453
506
|
shouldAttemptBrokenRefRewrite,
|
|
454
|
-
scanInlinePostprocessSignals
|
|
507
|
+
scanInlinePostprocessSignals,
|
|
508
|
+
INLINE_REPAIR_EM_OUTER_STRONG_SEQUENCE,
|
|
509
|
+
INLINE_REPAIR_TAIL_AFTER_LINK,
|
|
510
|
+
INLINE_REPAIR_LEADING_ASTERISK_EM,
|
|
511
|
+
INLINE_REPAIR_TRAILING_STRONG,
|
|
512
|
+
INLINE_REPAIR_BALANCE_SANITIZE
|
|
455
513
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import Token from 'markdown-it/lib/token.mjs'
|
|
2
2
|
import { buildLinkCloseMap, convertCollapsedReferenceLinks, mergeBrokenMarksAroundLinks } from '../token-link-utils.js'
|
|
3
|
-
import {
|
|
3
|
+
import { runBrokenRefRepairs } from './broken-ref.js'
|
|
4
4
|
import {
|
|
5
5
|
rebuildInlineLevels,
|
|
6
6
|
rebuildInlineLevelsFrom,
|
|
@@ -11,68 +11,35 @@ import {
|
|
|
11
11
|
import {
|
|
12
12
|
getRuntimeOpt,
|
|
13
13
|
hasRuntimeOverride,
|
|
14
|
-
getReferenceCount
|
|
14
|
+
getReferenceCount,
|
|
15
|
+
isAsciiWordCode,
|
|
16
|
+
isSoftSpaceCode,
|
|
17
|
+
cloneMap
|
|
15
18
|
} from '../token-utils.js'
|
|
16
19
|
import {
|
|
17
20
|
hasMarkerChars,
|
|
18
21
|
hasJapaneseContextInRange,
|
|
19
22
|
hasEmphasisSignalInRange,
|
|
20
23
|
buildAsteriskWrapperPrefixStats,
|
|
21
|
-
scanInlinePostprocessSignals
|
|
24
|
+
scanInlinePostprocessSignals,
|
|
25
|
+
INLINE_REPAIR_EM_OUTER_STRONG_SEQUENCE,
|
|
26
|
+
INLINE_REPAIR_TAIL_AFTER_LINK,
|
|
27
|
+
INLINE_REPAIR_LEADING_ASTERISK_EM,
|
|
28
|
+
INLINE_REPAIR_TRAILING_STRONG,
|
|
29
|
+
INLINE_REPAIR_BALANCE_SANITIZE
|
|
22
30
|
} from './guards.js'
|
|
23
31
|
import {
|
|
24
32
|
tryFixTailPatternTokenOnly,
|
|
25
33
|
tryFixTailDanglingStrongCloseTokenOnly
|
|
26
34
|
} from './fastpaths.js'
|
|
35
|
+
import { sanitizeEmStrongBalance } from './emphasis-balance.js'
|
|
27
36
|
|
|
28
|
-
const
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
const makeTokenLiteralText = (token) => {
|
|
35
|
-
if (!token) return
|
|
36
|
-
const literal = token.markup || fallbackMarkupByType(token.type)
|
|
37
|
-
token.type = 'text'
|
|
38
|
-
token.tag = ''
|
|
39
|
-
token.nesting = 0
|
|
40
|
-
token.content = literal
|
|
41
|
-
token.markup = ''
|
|
42
|
-
token.info = ''
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
const sanitizeEmStrongBalance = (tokens, onChangeStart = null) => {
|
|
46
|
-
if (!tokens || tokens.length === 0) return false
|
|
47
|
-
const stack = []
|
|
48
|
-
let changed = false
|
|
49
|
-
for (let i = 0; i < tokens.length; i++) {
|
|
50
|
-
const token = tokens[i]
|
|
51
|
-
if (!token || !token.type) continue
|
|
52
|
-
if (token.type === 'strong_open' || token.type === 'em_open') {
|
|
53
|
-
stack.push({ type: token.type, idx: i })
|
|
54
|
-
continue
|
|
55
|
-
}
|
|
56
|
-
if (token.type !== 'strong_close' && token.type !== 'em_close') continue
|
|
57
|
-
const expected = token.type === 'strong_close' ? 'strong_open' : 'em_open'
|
|
58
|
-
if (stack.length > 0 && stack[stack.length - 1].type === expected) {
|
|
59
|
-
stack.pop()
|
|
60
|
-
continue
|
|
61
|
-
}
|
|
62
|
-
if (onChangeStart) onChangeStart(i)
|
|
63
|
-
makeTokenLiteralText(token)
|
|
64
|
-
changed = true
|
|
65
|
-
}
|
|
66
|
-
for (let i = stack.length - 1; i >= 0; i--) {
|
|
67
|
-
const entry = stack[i]
|
|
68
|
-
const token = tokens[entry.idx]
|
|
69
|
-
if (!token) continue
|
|
70
|
-
if (onChangeStart) onChangeStart(entry.idx)
|
|
71
|
-
makeTokenLiteralText(token)
|
|
72
|
-
changed = true
|
|
73
|
-
}
|
|
74
|
-
return changed
|
|
75
|
-
}
|
|
37
|
+
const INLINE_REPAIR_ALL_EMPHASIS_FIXERS =
|
|
38
|
+
INLINE_REPAIR_EM_OUTER_STRONG_SEQUENCE |
|
|
39
|
+
INLINE_REPAIR_TAIL_AFTER_LINK |
|
|
40
|
+
INLINE_REPAIR_LEADING_ASTERISK_EM |
|
|
41
|
+
INLINE_REPAIR_TRAILING_STRONG |
|
|
42
|
+
INLINE_REPAIR_BALANCE_SANITIZE
|
|
76
43
|
|
|
77
44
|
const getPostprocessMetrics = (state) => {
|
|
78
45
|
if (!state || !state.env) return null
|
|
@@ -81,14 +48,17 @@ const getPostprocessMetrics = (state) => {
|
|
|
81
48
|
return metrics
|
|
82
49
|
}
|
|
83
50
|
|
|
84
|
-
const buildInlinePostprocessFacts = (children, inlineContent) => {
|
|
85
|
-
const preScan = scanInlinePostprocessSignals(children)
|
|
51
|
+
const buildInlinePostprocessFacts = (children, inlineContent, collectJapaneseContext) => {
|
|
52
|
+
const preScan = scanInlinePostprocessSignals(children, collectJapaneseContext)
|
|
86
53
|
return {
|
|
87
54
|
hasBracketText: inlineContent.indexOf('[') !== -1 || inlineContent.indexOf(']') !== -1,
|
|
88
55
|
hasEmphasis: preScan.hasEmphasis,
|
|
56
|
+
hasAsteriskWrapperImbalance: preScan.hasAsteriskWrapperImbalance,
|
|
89
57
|
hasLinkOpen: preScan.hasLinkOpen,
|
|
90
58
|
hasLinkClose: preScan.hasLinkClose,
|
|
91
59
|
hasCodeInline: preScan.hasCodeInline,
|
|
60
|
+
hasJapaneseContext: preScan.hasJapaneseContext,
|
|
61
|
+
repairMask: preScan.repairMask,
|
|
92
62
|
linkCloseMap: undefined,
|
|
93
63
|
wrapperPrefixStats: undefined,
|
|
94
64
|
rebuildLevelStart: undefined
|
|
@@ -159,14 +129,14 @@ const BROKEN_REF_REPAIR_HOOKS = {
|
|
|
159
129
|
markLevelRebuildFrom: markInlineLevelRebuildFrom
|
|
160
130
|
}
|
|
161
131
|
|
|
162
|
-
const bumpPostprocessMetric = (metrics, bucket, key) => {
|
|
163
|
-
if (!metrics || !bucket || !key) return
|
|
132
|
+
const bumpPostprocessMetric = (metrics, bucket, key, delta = 1) => {
|
|
133
|
+
if (!metrics || !bucket || !key || delta <= 0) return
|
|
164
134
|
let table = metrics[bucket]
|
|
165
135
|
if (!table || typeof table !== 'object') {
|
|
166
136
|
table = Object.create(null)
|
|
167
137
|
metrics[bucket] = table
|
|
168
138
|
}
|
|
169
|
-
table[key] = (table[key] || 0) +
|
|
139
|
+
table[key] = (table[key] || 0) + delta
|
|
170
140
|
}
|
|
171
141
|
|
|
172
142
|
const scanTailRepairCandidateAfterLinkClose = (tokens, linkCloseIdx) => {
|
|
@@ -232,11 +202,6 @@ const fixTailAfterLinkStrongClose = (tokens, isJapaneseMode, metrics = null, onC
|
|
|
232
202
|
return false
|
|
233
203
|
}
|
|
234
204
|
|
|
235
|
-
const cloneMap = (map) => {
|
|
236
|
-
if (!map || !Array.isArray(map)) return null
|
|
237
|
-
return [map[0], map[1]]
|
|
238
|
-
}
|
|
239
|
-
|
|
240
205
|
const cloneTextToken = (source, content) => {
|
|
241
206
|
const token = new Token('text', '', 0)
|
|
242
207
|
Object.assign(token, source)
|
|
@@ -245,19 +210,9 @@ const cloneTextToken = (source, content) => {
|
|
|
245
210
|
return token
|
|
246
211
|
}
|
|
247
212
|
|
|
248
|
-
const isSoftSpaceCode = (code) => {
|
|
249
|
-
return code === 0x20 || code === 0x09 || code === 0x3000
|
|
250
|
-
}
|
|
251
|
-
|
|
252
213
|
const CHAR_ASTERISK = 0x2A // *
|
|
253
214
|
const CHAR_BACKSLASH = 0x5C // \
|
|
254
215
|
|
|
255
|
-
const isAsciiWordCode = (code) => {
|
|
256
|
-
return (code >= 0x30 && code <= 0x39) ||
|
|
257
|
-
(code >= 0x41 && code <= 0x5A) ||
|
|
258
|
-
(code >= 0x61 && code <= 0x7A)
|
|
259
|
-
}
|
|
260
|
-
|
|
261
216
|
const textEndsAsciiWord = (text) => {
|
|
262
217
|
if (!text || text.length === 0) return false
|
|
263
218
|
return isAsciiWordCode(text.charCodeAt(text.length - 1))
|
|
@@ -459,46 +414,78 @@ const shouldRunInlineBrokenRefRepair = (facts, inlineContent, state) => {
|
|
|
459
414
|
return getReferenceCount(state) > 0
|
|
460
415
|
}
|
|
461
416
|
|
|
462
|
-
const applyBrokenRefRepairFacts = (facts, repairs) => {
|
|
463
|
-
if (!facts || !repairs) return
|
|
464
|
-
facts.hasBracketText = repairs.hasBracketText
|
|
465
|
-
facts.hasEmphasis = repairs.hasEmphasis
|
|
466
|
-
facts.hasLinkClose = repairs.hasLinkClose
|
|
467
|
-
}
|
|
468
|
-
|
|
469
|
-
const createBrokenRefScanState = () => {
|
|
470
|
-
return { depth: 0, brokenEnd: false, tailOpen: -1 }
|
|
471
|
-
}
|
|
472
|
-
|
|
473
417
|
const runInlineBrokenRefRepairStage = (children, facts, inlineContent, state) => {
|
|
474
418
|
if (!shouldRunInlineBrokenRefRepair(facts, inlineContent, state)) return false
|
|
475
|
-
const scanState =
|
|
476
|
-
const maxRepairPass = computeMaxBrokenRefRepairPass(children, scanState)
|
|
477
|
-
if (maxRepairPass <= 0) return false
|
|
419
|
+
const scanState = { depth: 0, brokenEnd: false, tailOpen: -1 }
|
|
478
420
|
const repairs = runBrokenRefRepairs(
|
|
479
421
|
children,
|
|
480
|
-
maxRepairPass,
|
|
481
422
|
scanState,
|
|
482
423
|
getPostprocessMetrics(state),
|
|
483
424
|
facts,
|
|
484
425
|
BROKEN_REF_REPAIR_HOOKS
|
|
485
426
|
)
|
|
486
|
-
|
|
427
|
+
facts.hasBracketText = repairs.hasBracketText
|
|
428
|
+
facts.hasEmphasis = repairs.hasEmphasis
|
|
429
|
+
facts.hasLinkClose = repairs.hasLinkClose
|
|
487
430
|
return repairs.changed
|
|
488
431
|
}
|
|
489
432
|
|
|
490
|
-
const runInlineEmphasisRepairStage = (
|
|
433
|
+
const runInlineEmphasisRepairStage = (
|
|
434
|
+
children,
|
|
435
|
+
facts,
|
|
436
|
+
state,
|
|
437
|
+
isJapaneseMode,
|
|
438
|
+
forceBalanceSanitize = false
|
|
439
|
+
) => {
|
|
491
440
|
if (!facts.hasEmphasis) return false
|
|
492
441
|
let changed = false
|
|
493
442
|
const markChangedFrom = createInlineChangeMarker(facts)
|
|
494
|
-
|
|
443
|
+
const metrics = getPostprocessMetrics(state)
|
|
444
|
+
const repairMask = forceBalanceSanitize
|
|
445
|
+
? INLINE_REPAIR_ALL_EMPHASIS_FIXERS
|
|
446
|
+
: (facts.repairMask || 0)
|
|
447
|
+
if ((repairMask & INLINE_REPAIR_EM_OUTER_STRONG_SEQUENCE) &&
|
|
448
|
+
fixEmOuterStrongSequence(children, markChangedFrom)) {
|
|
449
|
+
changed = true
|
|
450
|
+
bumpPostprocessMetric(metrics, 'emphasisFixers', 'em-outer-strong-sequence')
|
|
451
|
+
}
|
|
495
452
|
if (facts.hasLinkClose) {
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
453
|
+
if ((repairMask & INLINE_REPAIR_TAIL_AFTER_LINK) &&
|
|
454
|
+
fixTailAfterLinkStrongClose(children, isJapaneseMode, metrics, markChangedFrom)) {
|
|
455
|
+
changed = true
|
|
456
|
+
}
|
|
457
|
+
if ((repairMask & INLINE_REPAIR_LEADING_ASTERISK_EM) &&
|
|
458
|
+
fixLeadingAsteriskEm(children, markChangedFrom)) {
|
|
459
|
+
changed = true
|
|
460
|
+
bumpPostprocessMetric(metrics, 'emphasisFixers', 'leading-asterisk-em')
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
if ((repairMask & INLINE_REPAIR_TRAILING_STRONG) &&
|
|
464
|
+
fixTrailingStrong(children, markChangedFrom)) {
|
|
465
|
+
changed = true
|
|
466
|
+
bumpPostprocessMetric(metrics, 'emphasisFixers', 'trailing-strong')
|
|
467
|
+
}
|
|
468
|
+
const shouldAttemptSanitize = forceBalanceSanitize ||
|
|
469
|
+
changed ||
|
|
470
|
+
facts.hasAsteriskWrapperImbalance ||
|
|
471
|
+
(repairMask & INLINE_REPAIR_BALANCE_SANITIZE)
|
|
472
|
+
if (!shouldAttemptSanitize) {
|
|
473
|
+
bumpPostprocessMetric(metrics, 'emphasisSanitize', 'skipped-balanced')
|
|
474
|
+
return changed
|
|
475
|
+
}
|
|
476
|
+
bumpPostprocessMetric(metrics, 'emphasisSanitize', 'attempted')
|
|
477
|
+
if (forceBalanceSanitize || changed) {
|
|
478
|
+
bumpPostprocessMetric(metrics, 'emphasisSanitize', 'attempted-after-change')
|
|
479
|
+
} else {
|
|
480
|
+
bumpPostprocessMetric(metrics, 'emphasisSanitize', 'attempted-pre-scan-risk')
|
|
481
|
+
}
|
|
482
|
+
if (sanitizeEmStrongBalance(children, markChangedFrom)) {
|
|
483
|
+
changed = true
|
|
484
|
+
bumpPostprocessMetric(metrics, 'emphasisFixers', 'sanitize-em-strong-balance')
|
|
485
|
+
bumpPostprocessMetric(metrics, 'emphasisSanitize', 'repaired')
|
|
486
|
+
} else {
|
|
487
|
+
bumpPostprocessMetric(metrics, 'emphasisSanitize', 'no-change')
|
|
499
488
|
}
|
|
500
|
-
if (fixTrailingStrong(children, markChangedFrom)) changed = true
|
|
501
|
-
if (sanitizeEmStrongBalance(children, markChangedFrom)) changed = true
|
|
502
489
|
return changed
|
|
503
490
|
}
|
|
504
491
|
|
|
@@ -507,33 +494,17 @@ const shouldRunInlineCollapsedRefRepair = (facts, state) => {
|
|
|
507
494
|
return getReferenceCount(state) > 0
|
|
508
495
|
}
|
|
509
496
|
|
|
510
|
-
const applyCollapsedRefRepairFacts = (facts) => {
|
|
511
|
-
if (!facts) return
|
|
512
|
-
facts.hasLinkOpen = true
|
|
513
|
-
facts.hasLinkClose = true
|
|
514
|
-
}
|
|
515
|
-
|
|
516
|
-
const rewriteInlineCollapsedReferences = (children, facts, state, markChangedFrom) => {
|
|
517
|
-
const changed = convertCollapsedReferenceLinks(
|
|
518
|
-
children,
|
|
519
|
-
state,
|
|
520
|
-
facts,
|
|
521
|
-
markChangedFrom
|
|
522
|
-
)
|
|
523
|
-
if (!changed) return false
|
|
524
|
-
applyCollapsedRefRepairFacts(facts)
|
|
525
|
-
return true
|
|
526
|
-
}
|
|
527
|
-
|
|
528
497
|
const runInlineCollapsedRefStage = (children, facts, state) => {
|
|
529
498
|
if (!shouldRunInlineCollapsedRefRepair(facts, state)) return false
|
|
530
499
|
const markChangedFrom = createInlineChangeMarker(facts)
|
|
531
|
-
if (!
|
|
500
|
+
if (!convertCollapsedReferenceLinks(children, state, facts, markChangedFrom)) return false
|
|
501
|
+
facts.hasLinkOpen = true
|
|
502
|
+
facts.hasLinkClose = true
|
|
532
503
|
finalizeInlineLinkRepairStage(children, facts, markChangedFrom)
|
|
533
504
|
return true
|
|
534
505
|
}
|
|
535
506
|
|
|
536
|
-
const shouldSkipInlinePostprocessToken = (
|
|
507
|
+
const shouldSkipInlinePostprocessToken = (facts, isJapaneseMode) => {
|
|
537
508
|
if (!facts.hasEmphasis &&
|
|
538
509
|
!facts.hasBracketText &&
|
|
539
510
|
!facts.hasLinkOpen &&
|
|
@@ -541,8 +512,7 @@ const shouldSkipInlinePostprocessToken = (children, facts, isJapaneseMode) => {
|
|
|
541
512
|
!facts.hasCodeInline) {
|
|
542
513
|
return true
|
|
543
514
|
}
|
|
544
|
-
if (isJapaneseMode &&
|
|
545
|
-
!hasJapaneseContextInRange(children, 0, children.length - 1)) {
|
|
515
|
+
if (isJapaneseMode && !facts.hasJapaneseContext) {
|
|
546
516
|
return true
|
|
547
517
|
}
|
|
548
518
|
return false
|
|
@@ -569,7 +539,7 @@ const runInlineCoreRepairStages = (
|
|
|
569
539
|
return false
|
|
570
540
|
}
|
|
571
541
|
if (runInlineBrokenRefRepairStage(children, facts, inlineContent, state)) changed = true
|
|
572
|
-
if (runInlineEmphasisRepairStage(children, facts, state, isJapaneseMode)) changed = true
|
|
542
|
+
if (runInlineEmphasisRepairStage(children, facts, state, isJapaneseMode, changed)) changed = true
|
|
573
543
|
return changed
|
|
574
544
|
}
|
|
575
545
|
|
|
@@ -583,8 +553,8 @@ const processInlinePostprocessToken = (
|
|
|
583
553
|
) => {
|
|
584
554
|
if (!token || token.type !== 'inline' || !token.children || token.children.length === 0) return
|
|
585
555
|
const children = token.children
|
|
586
|
-
const facts = buildInlinePostprocessFacts(children, inlineContent)
|
|
587
|
-
if (shouldSkipInlinePostprocessToken(
|
|
556
|
+
const facts = buildInlinePostprocessFacts(children, inlineContent, isJapaneseMode)
|
|
557
|
+
if (shouldSkipInlinePostprocessToken(facts, isJapaneseMode)) return
|
|
588
558
|
const changed = runInlineCoreRepairStages(
|
|
589
559
|
children,
|
|
590
560
|
facts,
|
package/src/token-utils.js
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
const CHAR_ASTERISK = 0x2A // *
|
|
2
|
-
const CHAR_SPACE = 0x20 // ' '
|
|
3
|
-
const CHAR_TAB = 0x09 // '\t'
|
|
4
|
-
const CHAR_NEWLINE = 0x0A // '\n'
|
|
5
|
-
const CHAR_IDEOGRAPHIC_SPACE = 0x3000 // fullwidth space
|
|
6
|
-
const MODE_FLAG_COMPATIBLE = 1 << 0
|
|
7
|
-
const MODE_FLAG_AGGRESSIVE = 1 << 1
|
|
1
|
+
const CHAR_ASTERISK = 0x2A // *
|
|
2
|
+
const CHAR_SPACE = 0x20 // ' '
|
|
3
|
+
const CHAR_TAB = 0x09 // '\t'
|
|
4
|
+
const CHAR_NEWLINE = 0x0A // '\n'
|
|
5
|
+
const CHAR_IDEOGRAPHIC_SPACE = 0x3000 // fullwidth space
|
|
6
|
+
const MODE_FLAG_COMPATIBLE = 1 << 0
|
|
7
|
+
const MODE_FLAG_AGGRESSIVE = 1 << 1
|
|
8
8
|
const MODE_FLAG_JAPANESE_BASE = 1 << 2
|
|
9
9
|
const MODE_FLAG_JAPANESE_PLUS = 1 << 3
|
|
10
10
|
const MODE_FLAG_JAPANESE_ANY = MODE_FLAG_JAPANESE_BASE | MODE_FLAG_JAPANESE_PLUS
|
|
@@ -16,25 +16,40 @@ const VALID_CANONICAL_MODES = new Set([
|
|
|
16
16
|
'japanese-boundary',
|
|
17
17
|
'japanese-boundary-guard'
|
|
18
18
|
])
|
|
19
|
-
const REG_JAPANESE = /[\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Han}\u3000-\u303F\uFF00-\uFFEF]/u
|
|
20
|
-
const REG_ATTRS = /{[^{}\n!@#%^&*()]+?}$/
|
|
21
|
-
|
|
19
|
+
const REG_JAPANESE = /[\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Han}\u3000-\u303F\uFF00-\uFFEF]/u
|
|
20
|
+
const REG_ATTRS = /{[^{}\n!@#%^&*()]+?}$/
|
|
21
|
+
|
|
22
22
|
const isJapaneseChar = (ch) => {
|
|
23
|
-
if (!ch) return false
|
|
24
|
-
const code = typeof ch === 'string' ? ch.charCodeAt(0) : ch
|
|
25
|
-
if (code < 128) return false
|
|
26
|
-
if (code >= 0x3040 && code <= 0x309F) return true
|
|
27
|
-
if (code >= 0x30A0 && code <= 0x30FF) return true
|
|
28
|
-
// Han + CJK punctuation/fullwidth ranges are common hot-path hits.
|
|
29
|
-
// Keep these as cheap numeric checks before the fallback regex.
|
|
30
|
-
if (code >= 0x3400 && code <= 0x4DBF) return true
|
|
31
|
-
if (code >= 0x4E00 && code <= 0x9FFF) return true
|
|
32
|
-
if (code >= 0xF900 && code <= 0xFAFF) return true
|
|
33
|
-
if (code >= 0x3000 && code <= 0x303F) return true
|
|
34
|
-
if (code >= 0xFF00 && code <= 0xFFEF) return true
|
|
23
|
+
if (!ch) return false
|
|
24
|
+
const code = typeof ch === 'string' ? ch.charCodeAt(0) : ch
|
|
25
|
+
if (code < 128) return false
|
|
26
|
+
if (code >= 0x3040 && code <= 0x309F) return true
|
|
27
|
+
if (code >= 0x30A0 && code <= 0x30FF) return true
|
|
28
|
+
// Han + CJK punctuation/fullwidth ranges are common hot-path hits.
|
|
29
|
+
// Keep these as cheap numeric checks before the fallback regex.
|
|
30
|
+
if (code >= 0x3400 && code <= 0x4DBF) return true
|
|
31
|
+
if (code >= 0x4E00 && code <= 0x9FFF) return true
|
|
32
|
+
if (code >= 0xF900 && code <= 0xFAFF) return true
|
|
33
|
+
if (code >= 0x3000 && code <= 0x303F) return true
|
|
34
|
+
if (code >= 0xFF00 && code <= 0xFFEF) return true
|
|
35
35
|
return REG_JAPANESE.test(String.fromCharCode(code))
|
|
36
36
|
}
|
|
37
37
|
|
|
38
|
+
const isAsciiWordCode = (code) => {
|
|
39
|
+
return (code >= 0x30 && code <= 0x39) ||
|
|
40
|
+
(code >= 0x41 && code <= 0x5A) ||
|
|
41
|
+
(code >= 0x61 && code <= 0x7A)
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const isSoftSpaceCode = (code) => {
|
|
45
|
+
return code === CHAR_SPACE || code === CHAR_TAB || code === CHAR_IDEOGRAPHIC_SPACE
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
const cloneMap = (map) => {
|
|
49
|
+
if (!map || !Array.isArray(map)) return null
|
|
50
|
+
return [map[0], map[1]]
|
|
51
|
+
}
|
|
52
|
+
|
|
38
53
|
const hasCjkBreaksRule = (md) => {
|
|
39
54
|
if (!md || !md.core || !md.core.ruler || !Array.isArray(md.core.ruler.__rules__)) return false
|
|
40
55
|
if (md.__strongJaHasCjkBreaks === true) return true
|
|
@@ -59,7 +74,7 @@ const hasCjkBreaksRule = (md) => {
|
|
|
59
74
|
const isCjkBreaksRuleName = (name) => {
|
|
60
75
|
return typeof name === 'string' && REG_CJK_BREAKS_RULE_NAME.test(name)
|
|
61
76
|
}
|
|
62
|
-
|
|
77
|
+
|
|
63
78
|
const resolveMode = (opt) => {
|
|
64
79
|
const raw = opt && typeof opt.mode === 'string' ? opt.mode : 'japanese'
|
|
65
80
|
const normalized = raw.toLowerCase()
|
|
@@ -70,9 +85,9 @@ const resolveMode = (opt) => {
|
|
|
70
85
|
`mditStrongJa: unknown mode "${raw}". Valid modes: japanese, japanese-boundary, japanese-boundary-guard, aggressive, compatible`
|
|
71
86
|
)
|
|
72
87
|
}
|
|
73
|
-
|
|
74
|
-
const getModeFlags = (mode) => {
|
|
75
|
-
switch (mode) {
|
|
88
|
+
|
|
89
|
+
const getModeFlags = (mode) => {
|
|
90
|
+
switch (mode) {
|
|
76
91
|
case 'compatible':
|
|
77
92
|
return MODE_FLAG_COMPATIBLE
|
|
78
93
|
case 'aggressive':
|
|
@@ -81,26 +96,26 @@ const getModeFlags = (mode) => {
|
|
|
81
96
|
return MODE_FLAG_JAPANESE_BASE
|
|
82
97
|
case 'japanese-boundary-guard':
|
|
83
98
|
return MODE_FLAG_JAPANESE_PLUS
|
|
84
|
-
default:
|
|
85
|
-
return 0
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
const deriveModeInfo = (opt) => {
|
|
90
|
-
if (!opt || typeof opt !== 'object') return opt
|
|
91
|
-
const rawMode = opt.mode
|
|
92
|
-
if (opt.__strongJaModeRaw === rawMode &&
|
|
93
|
-
typeof opt.__strongJaMode === 'string' &&
|
|
94
|
-
typeof opt.__strongJaModeFlags === 'number') {
|
|
95
|
-
return opt
|
|
96
|
-
}
|
|
97
|
-
const mode = resolveMode(opt)
|
|
98
|
-
opt.__strongJaModeRaw = rawMode
|
|
99
|
-
opt.__strongJaMode = mode
|
|
100
|
-
opt.__strongJaModeFlags = getModeFlags(mode)
|
|
101
|
-
return opt
|
|
102
|
-
}
|
|
103
|
-
|
|
99
|
+
default:
|
|
100
|
+
return 0
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
const deriveModeInfo = (opt) => {
|
|
105
|
+
if (!opt || typeof opt !== 'object') return opt
|
|
106
|
+
const rawMode = opt.mode
|
|
107
|
+
if (opt.__strongJaModeRaw === rawMode &&
|
|
108
|
+
typeof opt.__strongJaMode === 'string' &&
|
|
109
|
+
typeof opt.__strongJaModeFlags === 'number') {
|
|
110
|
+
return opt
|
|
111
|
+
}
|
|
112
|
+
const mode = resolveMode(opt)
|
|
113
|
+
opt.__strongJaModeRaw = rawMode
|
|
114
|
+
opt.__strongJaMode = mode
|
|
115
|
+
opt.__strongJaModeFlags = getModeFlags(mode)
|
|
116
|
+
return opt
|
|
117
|
+
}
|
|
118
|
+
|
|
104
119
|
const deriveOptionInfo = (opt) => {
|
|
105
120
|
if (!opt || typeof opt !== 'object') return opt
|
|
106
121
|
deriveModeInfo(opt)
|
|
@@ -135,21 +150,29 @@ const hasRuntimeOverride = (override) => {
|
|
|
135
150
|
|
|
136
151
|
const getRuntimeOpt = (state, baseOpt) => {
|
|
137
152
|
const override = state && state.env ? state.env.__strongJaTokenOpt : null
|
|
138
|
-
|
|
139
|
-
if (state
|
|
153
|
+
const hasOverride = hasRuntimeOverride(override)
|
|
154
|
+
if (state &&
|
|
155
|
+
state.__strongJaTokenRuntimeOpt &&
|
|
140
156
|
state.__strongJaTokenRuntimeBase === baseOpt &&
|
|
141
|
-
state.__strongJaTokenRuntimeOverride === override
|
|
157
|
+
state.__strongJaTokenRuntimeOverride === override &&
|
|
158
|
+
state.__strongJaTokenRuntimeHasOverride === hasOverride) {
|
|
142
159
|
return state.__strongJaTokenRuntimeOpt
|
|
143
160
|
}
|
|
144
|
-
|
|
145
|
-
if (
|
|
146
|
-
|
|
147
|
-
|
|
161
|
+
let resolved = deriveOptionInfo(baseOpt)
|
|
162
|
+
if (hasOverride) {
|
|
163
|
+
const merged = baseOpt && typeof baseOpt === 'object' ? { ...baseOpt } : {}
|
|
164
|
+
if (HAS_OWN.call(override, 'mode') && override.mode !== undefined) merged.mode = override.mode
|
|
165
|
+
if (HAS_OWN.call(override, 'postprocess') && override.postprocess !== undefined) merged.postprocess = override.postprocess
|
|
166
|
+
resolved = deriveOptionInfo(merged)
|
|
167
|
+
}
|
|
168
|
+
if (!state) return resolved
|
|
169
|
+
state.__strongJaTokenRuntimeOpt = resolved
|
|
148
170
|
state.__strongJaTokenRuntimeBase = baseOpt
|
|
149
171
|
state.__strongJaTokenRuntimeOverride = override
|
|
150
|
-
|
|
172
|
+
state.__strongJaTokenRuntimeHasOverride = hasOverride
|
|
173
|
+
return resolved
|
|
151
174
|
}
|
|
152
|
-
|
|
175
|
+
|
|
153
176
|
const getReferenceCount = (state) => {
|
|
154
177
|
if (!state) return 0
|
|
155
178
|
let referenceCount = state.__strongJaReferenceCount
|
|
@@ -167,74 +190,77 @@ const getReferenceCount = (state) => {
|
|
|
167
190
|
return referenceCount
|
|
168
191
|
}
|
|
169
192
|
|
|
170
|
-
function normalizeCoreRulesBeforePostprocess(value) {
|
|
171
|
-
if (!value) return []
|
|
172
|
-
const list = Array.isArray(value) ? value : [value]
|
|
173
|
-
const normalized = []
|
|
174
|
-
const seen = new Set()
|
|
175
|
-
for (let idx = 0; idx < list.length; idx++) {
|
|
176
|
-
const raw = list[idx]
|
|
177
|
-
if (typeof raw !== 'string') continue
|
|
178
|
-
const trimmed = raw.trim()
|
|
179
|
-
if (!trimmed || seen.has(trimmed)) continue
|
|
180
|
-
seen.add(trimmed)
|
|
181
|
-
normalized.push(trimmed)
|
|
182
|
-
}
|
|
183
|
-
return normalized
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
function ensureCoreRuleOrder(md, ruleNames, targetRuleName) {
|
|
187
|
-
if (!md || !md.core || !md.core.ruler) return
|
|
188
|
-
if (!ruleNames || ruleNames.length === 0) return
|
|
189
|
-
for (let idx = 0; idx < ruleNames.length; idx++) {
|
|
190
|
-
moveRuleBefore(md.core.ruler, ruleNames[idx], targetRuleName)
|
|
191
|
-
}
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
function moveRuleBefore(ruler, ruleName, beforeName) {
|
|
195
|
-
if (!ruler || !ruler.__rules__) return
|
|
196
|
-
const rules = ruler.__rules__
|
|
197
|
-
let fromIdx = -1
|
|
198
|
-
let beforeIdx = -1
|
|
199
|
-
for (let idx = 0; idx < rules.length; idx++) {
|
|
200
|
-
if (rules[idx].name === ruleName) fromIdx = idx
|
|
201
|
-
if (rules[idx].name === beforeName) beforeIdx = idx
|
|
202
|
-
if (fromIdx !== -1 && beforeIdx !== -1) break
|
|
203
|
-
}
|
|
204
|
-
// Ensure ruleName is before beforeName; keep existing order if already earlier.
|
|
205
|
-
if (fromIdx === -1 || beforeIdx === -1 || fromIdx < beforeIdx) return
|
|
206
|
-
|
|
207
|
-
const rule = rules.splice(fromIdx, 1)[0]
|
|
208
|
-
rules.splice(beforeIdx, 0, rule)
|
|
209
|
-
ruler.__cache__ = null
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
function moveRuleAfter(ruler, ruleName, afterName) {
|
|
213
|
-
if (!ruler || !ruler.__rules__) return
|
|
214
|
-
const rules = ruler.__rules__
|
|
215
|
-
let fromIdx = -1
|
|
216
|
-
let afterIdx = -1
|
|
217
|
-
for (let idx = 0; idx < rules.length; idx++) {
|
|
218
|
-
if (rules[idx].name === ruleName) fromIdx = idx
|
|
219
|
-
if (rules[idx].name === afterName) afterIdx = idx
|
|
220
|
-
if (fromIdx !== -1 && afterIdx !== -1) break
|
|
221
|
-
}
|
|
222
|
-
if (fromIdx === -1 || afterIdx === -1 || fromIdx === afterIdx + 1) return
|
|
223
|
-
|
|
224
|
-
const rule = rules.splice(fromIdx, 1)[0]
|
|
225
|
-
const targetIdx = fromIdx < afterIdx ? afterIdx - 1 : afterIdx
|
|
226
|
-
rules.splice(targetIdx + 1, 0, rule)
|
|
227
|
-
ruler.__cache__ = null
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
export {
|
|
231
|
-
CHAR_ASTERISK,
|
|
232
|
-
CHAR_SPACE,
|
|
233
|
-
CHAR_TAB,
|
|
234
|
-
CHAR_NEWLINE,
|
|
235
|
-
CHAR_IDEOGRAPHIC_SPACE,
|
|
193
|
+
function normalizeCoreRulesBeforePostprocess(value) {
|
|
194
|
+
if (!value) return []
|
|
195
|
+
const list = Array.isArray(value) ? value : [value]
|
|
196
|
+
const normalized = []
|
|
197
|
+
const seen = new Set()
|
|
198
|
+
for (let idx = 0; idx < list.length; idx++) {
|
|
199
|
+
const raw = list[idx]
|
|
200
|
+
if (typeof raw !== 'string') continue
|
|
201
|
+
const trimmed = raw.trim()
|
|
202
|
+
if (!trimmed || seen.has(trimmed)) continue
|
|
203
|
+
seen.add(trimmed)
|
|
204
|
+
normalized.push(trimmed)
|
|
205
|
+
}
|
|
206
|
+
return normalized
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
function ensureCoreRuleOrder(md, ruleNames, targetRuleName) {
|
|
210
|
+
if (!md || !md.core || !md.core.ruler) return
|
|
211
|
+
if (!ruleNames || ruleNames.length === 0) return
|
|
212
|
+
for (let idx = 0; idx < ruleNames.length; idx++) {
|
|
213
|
+
moveRuleBefore(md.core.ruler, ruleNames[idx], targetRuleName)
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
function moveRuleBefore(ruler, ruleName, beforeName) {
|
|
218
|
+
if (!ruler || !ruler.__rules__) return
|
|
219
|
+
const rules = ruler.__rules__
|
|
220
|
+
let fromIdx = -1
|
|
221
|
+
let beforeIdx = -1
|
|
222
|
+
for (let idx = 0; idx < rules.length; idx++) {
|
|
223
|
+
if (rules[idx].name === ruleName) fromIdx = idx
|
|
224
|
+
if (rules[idx].name === beforeName) beforeIdx = idx
|
|
225
|
+
if (fromIdx !== -1 && beforeIdx !== -1) break
|
|
226
|
+
}
|
|
227
|
+
// Ensure ruleName is before beforeName; keep existing order if already earlier.
|
|
228
|
+
if (fromIdx === -1 || beforeIdx === -1 || fromIdx < beforeIdx) return
|
|
229
|
+
|
|
230
|
+
const rule = rules.splice(fromIdx, 1)[0]
|
|
231
|
+
rules.splice(beforeIdx, 0, rule)
|
|
232
|
+
ruler.__cache__ = null
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
function moveRuleAfter(ruler, ruleName, afterName) {
|
|
236
|
+
if (!ruler || !ruler.__rules__) return
|
|
237
|
+
const rules = ruler.__rules__
|
|
238
|
+
let fromIdx = -1
|
|
239
|
+
let afterIdx = -1
|
|
240
|
+
for (let idx = 0; idx < rules.length; idx++) {
|
|
241
|
+
if (rules[idx].name === ruleName) fromIdx = idx
|
|
242
|
+
if (rules[idx].name === afterName) afterIdx = idx
|
|
243
|
+
if (fromIdx !== -1 && afterIdx !== -1) break
|
|
244
|
+
}
|
|
245
|
+
if (fromIdx === -1 || afterIdx === -1 || fromIdx === afterIdx + 1) return
|
|
246
|
+
|
|
247
|
+
const rule = rules.splice(fromIdx, 1)[0]
|
|
248
|
+
const targetIdx = fromIdx < afterIdx ? afterIdx - 1 : afterIdx
|
|
249
|
+
rules.splice(targetIdx + 1, 0, rule)
|
|
250
|
+
ruler.__cache__ = null
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
export {
|
|
254
|
+
CHAR_ASTERISK,
|
|
255
|
+
CHAR_SPACE,
|
|
256
|
+
CHAR_TAB,
|
|
257
|
+
CHAR_NEWLINE,
|
|
258
|
+
CHAR_IDEOGRAPHIC_SPACE,
|
|
236
259
|
REG_ATTRS,
|
|
237
260
|
isJapaneseChar,
|
|
261
|
+
isAsciiWordCode,
|
|
262
|
+
isSoftSpaceCode,
|
|
263
|
+
cloneMap,
|
|
238
264
|
hasCjkBreaksRule,
|
|
239
265
|
isCjkBreaksRuleName,
|
|
240
266
|
resolveMode,
|
|
@@ -243,14 +269,14 @@ export {
|
|
|
243
269
|
deriveOptionInfo,
|
|
244
270
|
hasRuntimeOverride,
|
|
245
271
|
MODE_FLAG_COMPATIBLE,
|
|
246
|
-
MODE_FLAG_AGGRESSIVE,
|
|
247
|
-
MODE_FLAG_JAPANESE_BASE,
|
|
248
|
-
MODE_FLAG_JAPANESE_PLUS,
|
|
272
|
+
MODE_FLAG_AGGRESSIVE,
|
|
273
|
+
MODE_FLAG_JAPANESE_BASE,
|
|
274
|
+
MODE_FLAG_JAPANESE_PLUS,
|
|
249
275
|
MODE_FLAG_JAPANESE_ANY,
|
|
250
276
|
getRuntimeOpt,
|
|
251
277
|
getReferenceCount,
|
|
252
|
-
normalizeCoreRulesBeforePostprocess,
|
|
253
|
-
ensureCoreRuleOrder,
|
|
254
|
-
moveRuleBefore,
|
|
255
|
-
moveRuleAfter
|
|
256
|
-
}
|
|
278
|
+
normalizeCoreRulesBeforePostprocess,
|
|
279
|
+
ensureCoreRuleOrder,
|
|
280
|
+
moveRuleBefore,
|
|
281
|
+
moveRuleAfter
|
|
282
|
+
}
|