@peaceroad/markdown-it-strong-ja 0.8.1 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -4
- package/index.js +9 -17
- package/package.json +5 -5
- package/src/token-compat.js +13 -17
- package/src/token-core.js +148 -56
- package/src/token-link-utils.js +13 -16
- package/src/token-postprocess/broken-ref.js +123 -27
- package/src/token-postprocess/emphasis-balance.js +50 -0
- package/src/token-postprocess/fastpaths.js +1 -5
- package/src/token-postprocess/guards.js +121 -107
- package/src/token-postprocess/orchestrator.js +110 -169
- package/src/token-utils.js +182 -139
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { buildLinkCloseMap } from '../token-link-utils.js'
|
|
2
2
|
import {
|
|
3
3
|
isAsteriskEmphasisToken,
|
|
4
|
-
|
|
4
|
+
buildBrokenRefWrapperRangeSignals,
|
|
5
5
|
buildAsteriskWrapperPrefixStats,
|
|
6
6
|
shouldAttemptBrokenRefRewrite
|
|
7
7
|
} from './guards.js'
|
|
@@ -115,14 +115,14 @@ const expandSegmentEndForWrapperBalance = (tokens, startIdx, endIdx) => {
|
|
|
115
115
|
return balance.total > 0 ? -1 : expandedEnd
|
|
116
116
|
}
|
|
117
117
|
|
|
118
|
-
const bumpBrokenRefMetric = (metrics, bucket, key) => {
|
|
119
|
-
if (!metrics || !bucket || !key) return
|
|
118
|
+
const bumpBrokenRefMetric = (metrics, bucket, key, delta = 1) => {
|
|
119
|
+
if (!metrics || !bucket || !key || delta <= 0) return
|
|
120
120
|
let table = metrics[bucket]
|
|
121
121
|
if (!table || typeof table !== 'object') {
|
|
122
122
|
table = Object.create(null)
|
|
123
123
|
metrics[bucket] = table
|
|
124
124
|
}
|
|
125
|
-
table[key] = (table[key] || 0) +
|
|
125
|
+
table[key] = (table[key] || 0) + delta
|
|
126
126
|
}
|
|
127
127
|
|
|
128
128
|
const ensureBrokenRefLinkCloseMap = (tokens, facts = null, hooks = null, fallbackCache = null) => {
|
|
@@ -190,23 +190,36 @@ const resolveBrokenRefCandidateGuardFlow = (
|
|
|
190
190
|
children,
|
|
191
191
|
brokenRefCandidate,
|
|
192
192
|
segmentEnd,
|
|
193
|
+
metrics = null,
|
|
193
194
|
facts = null,
|
|
194
195
|
hooks = null,
|
|
195
196
|
fallbackCache = null
|
|
196
197
|
) => {
|
|
197
|
-
|
|
198
|
+
const wrapperSignals = buildBrokenRefWrapperRangeSignals(
|
|
199
|
+
children,
|
|
200
|
+
brokenRefCandidate.start,
|
|
201
|
+
segmentEnd,
|
|
202
|
+
brokenRefCandidate.startTextOffset
|
|
203
|
+
)
|
|
204
|
+
if (!wrapperSignals.hasTextMarker) {
|
|
198
205
|
return BROKEN_REF_FLOW_SKIP_NO_TEXT_MARKER
|
|
199
206
|
}
|
|
207
|
+
if (!hasBrokenRefActiveFastPathTokenSignal(wrapperSignals)) {
|
|
208
|
+
bumpBrokenRefMetric(metrics, 'brokenRefCandidateFlow', 'no-active-signature')
|
|
209
|
+
return BROKEN_REF_FLOW_SKIP_NO_ACTIVE_SIGNATURE
|
|
210
|
+
}
|
|
200
211
|
const wrapperPrefixStats = ensureBrokenRefWrapperPrefixStats(children, facts, hooks, fallbackCache)
|
|
201
212
|
if (!shouldAttemptBrokenRefRewrite(
|
|
202
213
|
children,
|
|
203
214
|
brokenRefCandidate.start,
|
|
204
215
|
segmentEnd,
|
|
205
216
|
brokenRefCandidate.startTextOffset,
|
|
206
|
-
wrapperPrefixStats
|
|
217
|
+
wrapperPrefixStats,
|
|
218
|
+
wrapperSignals
|
|
207
219
|
)) {
|
|
208
220
|
return BROKEN_REF_FLOW_SKIP_GUARD
|
|
209
221
|
}
|
|
222
|
+
bumpBrokenRefMetric(metrics, 'brokenRefCandidateFlow', 'guard-passed')
|
|
210
223
|
return null
|
|
211
224
|
}
|
|
212
225
|
|
|
@@ -225,12 +238,16 @@ const resolveBrokenRefFastPathFlow = (
|
|
|
225
238
|
metrics,
|
|
226
239
|
bumpBrokenRefMetric
|
|
227
240
|
)
|
|
241
|
+
bumpBrokenRefMetric(metrics, 'brokenRefCandidateFlow', 'fastpath-dispatch')
|
|
228
242
|
if (fastPathResult === BROKEN_REF_FAST_PATH_RESULT_NO_ACTIVE_SIGNATURE) {
|
|
243
|
+
bumpBrokenRefMetric(metrics, 'brokenRefCandidateFlow', 'no-active-signature')
|
|
229
244
|
return BROKEN_REF_FLOW_SKIP_NO_ACTIVE_SIGNATURE
|
|
230
245
|
}
|
|
231
246
|
if (fastPathResult === BROKEN_REF_FAST_PATH_RESULT_NO_MATCH) {
|
|
247
|
+
bumpBrokenRefMetric(metrics, 'brokenRefCandidateFlow', 'no-fastpath-match')
|
|
232
248
|
return BROKEN_REF_FLOW_SKIP_NO_FASTPATH_MATCH
|
|
233
249
|
}
|
|
250
|
+
bumpBrokenRefMetric(metrics, 'brokenRefCandidateFlow', 'repaired')
|
|
234
251
|
return BROKEN_REF_FLOW_REPAIRED
|
|
235
252
|
}
|
|
236
253
|
|
|
@@ -249,6 +266,7 @@ const runBrokenRefCandidateRewrite = (
|
|
|
249
266
|
children,
|
|
250
267
|
brokenRefCandidate,
|
|
251
268
|
segmentEnd,
|
|
269
|
+
metrics,
|
|
252
270
|
facts,
|
|
253
271
|
hooks,
|
|
254
272
|
fallbackCache
|
|
@@ -301,7 +319,7 @@ const createBrokenRefPassSignals = (seedSignals = null) => {
|
|
|
301
319
|
const observeBrokenRefTextToken = (passSignals, candidateState, text, tokenIdx, scanState) => {
|
|
302
320
|
const hasOpenBracket = text.indexOf('[') !== -1
|
|
303
321
|
const hasCloseBracket = text.indexOf(']') !== -1
|
|
304
|
-
if (!passSignals.hasBracketText && (hasOpenBracket || hasCloseBracket)) {
|
|
322
|
+
if (passSignals && !passSignals.hasBracketText && (hasOpenBracket || hasCloseBracket)) {
|
|
305
323
|
passSignals.hasBracketText = true
|
|
306
324
|
}
|
|
307
325
|
if (candidateState.start === -1) {
|
|
@@ -376,6 +394,7 @@ const tryRepairBrokenRefCandidateAtLinkOpen = (
|
|
|
376
394
|
const closeIdx = linkCloseMap.get(childIdx) ?? -1
|
|
377
395
|
if (closeIdx === -1) return null
|
|
378
396
|
bumpBrokenRefMetric(metrics, 'brokenRefFlow', 'candidate')
|
|
397
|
+
bumpBrokenRefMetric(metrics, 'brokenRefCandidateFlow', 'candidate')
|
|
379
398
|
const flowResult = runBrokenRefCandidateRewrite(
|
|
380
399
|
children,
|
|
381
400
|
brokenRefCandidate,
|
|
@@ -430,46 +449,123 @@ const runBrokenRefRepairPass = (children, scanState, metrics = null, facts = nul
|
|
|
430
449
|
return buildBrokenRefRepairPassResult(false, passSignals)
|
|
431
450
|
}
|
|
432
451
|
|
|
433
|
-
const
|
|
452
|
+
const hasPotentialBrokenRefRepairPass = (children, scanState) => {
|
|
434
453
|
resetBrokenRefScanState(scanState)
|
|
435
|
-
let maxRepairPass = 0
|
|
436
454
|
for (let j = 0; j < children.length; j++) {
|
|
437
455
|
const child = children[j]
|
|
438
456
|
if (!child || child.type !== 'text' || !child.content) continue
|
|
439
457
|
if (child.content.indexOf('[') === -1) continue
|
|
440
458
|
if (scanBrokenRefState(child.content, scanState).brokenEnd) {
|
|
441
|
-
|
|
459
|
+
return true
|
|
442
460
|
}
|
|
443
461
|
}
|
|
444
|
-
return
|
|
462
|
+
return false
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
const hasBrokenRefActiveFastPathTokenSignal = (wrapperSignals) => {
|
|
466
|
+
if (!wrapperSignals) return false
|
|
467
|
+
// Current broken-ref fast paths are all strong-token driven.
|
|
468
|
+
return wrapperSignals.strongOpenInRange > 0 || wrapperSignals.strongCloseInRange > 0
|
|
445
469
|
}
|
|
446
470
|
|
|
447
|
-
const
|
|
471
|
+
const countGuardedBrokenRefRepairPasses = (children, scanState, facts = null, hooks = null) => {
|
|
472
|
+
resetBrokenRefScanState(scanState)
|
|
473
|
+
const brokenRefCandidate = resetBrokenRefCandidateState({ start: -1, depth: 0, startTextOffset: 0 })
|
|
474
|
+
const fallbackCache = {
|
|
475
|
+
linkCloseMap: undefined,
|
|
476
|
+
wrapperPrefixStats: undefined
|
|
477
|
+
}
|
|
448
478
|
let repairPassCount = 0
|
|
479
|
+
for (let j = 0; j < children.length; j++) {
|
|
480
|
+
const child = children[j]
|
|
481
|
+
if (!child) continue
|
|
482
|
+
if (child.type === 'text' && child.content) {
|
|
483
|
+
observeBrokenRefTextToken(null, brokenRefCandidate, child.content, j, scanState)
|
|
484
|
+
}
|
|
485
|
+
if (child.type !== 'link_open' || brokenRefCandidate.start === -1) continue
|
|
486
|
+
if (brokenRefCandidate.depth <= 0) {
|
|
487
|
+
resetBrokenRefCandidateState(brokenRefCandidate)
|
|
488
|
+
continue
|
|
489
|
+
}
|
|
490
|
+
const linkCloseMap = ensureBrokenRefLinkCloseMap(children, facts, hooks, fallbackCache)
|
|
491
|
+
const closeIdx = linkCloseMap.get(j) ?? -1
|
|
492
|
+
if (closeIdx === -1) continue
|
|
493
|
+
const segmentEnd = resolveBrokenRefSegmentEnd(children, brokenRefCandidate, closeIdx)
|
|
494
|
+
const wrapperSignals = buildBrokenRefWrapperRangeSignals(
|
|
495
|
+
children,
|
|
496
|
+
brokenRefCandidate.start,
|
|
497
|
+
segmentEnd,
|
|
498
|
+
brokenRefCandidate.startTextOffset
|
|
499
|
+
)
|
|
500
|
+
if (!wrapperSignals.hasTextMarker || !hasBrokenRefActiveFastPathTokenSignal(wrapperSignals)) {
|
|
501
|
+
resetBrokenRefCandidateState(brokenRefCandidate)
|
|
502
|
+
continue
|
|
503
|
+
}
|
|
504
|
+
const wrapperPrefixStats = ensureBrokenRefWrapperPrefixStats(children, facts, hooks, fallbackCache)
|
|
505
|
+
if (shouldAttemptBrokenRefRewrite(
|
|
506
|
+
children,
|
|
507
|
+
brokenRefCandidate.start,
|
|
508
|
+
segmentEnd,
|
|
509
|
+
brokenRefCandidate.startTextOffset,
|
|
510
|
+
wrapperPrefixStats,
|
|
511
|
+
wrapperSignals
|
|
512
|
+
)) {
|
|
513
|
+
repairPassCount++
|
|
514
|
+
}
|
|
515
|
+
resetBrokenRefCandidateState(brokenRefCandidate)
|
|
516
|
+
}
|
|
517
|
+
return repairPassCount
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
const buildBrokenRefRepairsResult = (changed, passSignals) => {
|
|
521
|
+
return {
|
|
522
|
+
changed,
|
|
523
|
+
hasBracketText: passSignals.hasBracketText,
|
|
524
|
+
hasEmphasis: passSignals.hasEmphasis,
|
|
525
|
+
hasLinkClose: passSignals.hasLinkClose
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
const runBrokenRefRepairs = (children, scanState, metrics = null, facts = null, hooks = null) => {
|
|
530
|
+
const seedSignals = createBrokenRefPassSignals(createBrokenRefSignalSeed(facts))
|
|
531
|
+
if (!hasPotentialBrokenRefRepairPass(children, scanState)) {
|
|
532
|
+
return buildBrokenRefRepairsResult(false, seedSignals)
|
|
533
|
+
}
|
|
534
|
+
|
|
449
535
|
let changed = false
|
|
450
|
-
|
|
451
|
-
|
|
536
|
+
bumpBrokenRefMetric(metrics, 'brokenRefPasses', 'budgeted')
|
|
537
|
+
bumpBrokenRefMetric(metrics, 'brokenRefPasses', 'executed')
|
|
538
|
+
|
|
539
|
+
let pass = runBrokenRefRepairPass(children, scanState, metrics, facts, hooks)
|
|
540
|
+
if (!pass.didRepair) {
|
|
541
|
+
bumpBrokenRefMetric(metrics, 'brokenRefPasses', 'stopped-no-repair')
|
|
542
|
+
return buildBrokenRefRepairsResult(changed, pass)
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
changed = true
|
|
546
|
+
bumpBrokenRefMetric(metrics, 'brokenRefPasses', 'repaired')
|
|
547
|
+
|
|
548
|
+
const remainingBudget = countGuardedBrokenRefRepairPasses(children, scanState, facts, hooks)
|
|
549
|
+
if (remainingBudget > 0) {
|
|
550
|
+
bumpBrokenRefMetric(metrics, 'brokenRefPasses', 'budgeted', remainingBudget)
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
let repairPassCount = 0
|
|
554
|
+
while (repairPassCount < remainingBudget) {
|
|
555
|
+
bumpBrokenRefMetric(metrics, 'brokenRefPasses', 'executed')
|
|
556
|
+
pass = runBrokenRefRepairPass(children, scanState, metrics, facts, hooks)
|
|
452
557
|
if (!pass.didRepair) {
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
hasBracketText: pass.hasBracketText,
|
|
456
|
-
hasEmphasis: pass.hasEmphasis,
|
|
457
|
-
hasLinkClose: pass.hasLinkClose
|
|
458
|
-
}
|
|
558
|
+
bumpBrokenRefMetric(metrics, 'brokenRefPasses', 'stopped-no-repair')
|
|
559
|
+
return buildBrokenRefRepairsResult(changed, pass)
|
|
459
560
|
}
|
|
460
561
|
changed = true
|
|
461
562
|
repairPassCount++
|
|
563
|
+
bumpBrokenRefMetric(metrics, 'brokenRefPasses', 'repaired')
|
|
462
564
|
}
|
|
463
565
|
const finalSignals = collectBrokenRefPassSignals(children, createBrokenRefSignalSeed(facts))
|
|
464
|
-
return
|
|
465
|
-
changed,
|
|
466
|
-
hasBracketText: finalSignals.hasBracketText,
|
|
467
|
-
hasEmphasis: finalSignals.hasEmphasis,
|
|
468
|
-
hasLinkClose: finalSignals.hasLinkClose
|
|
469
|
-
}
|
|
566
|
+
return buildBrokenRefRepairsResult(changed, finalSignals)
|
|
470
567
|
}
|
|
471
568
|
|
|
472
569
|
export {
|
|
473
|
-
computeMaxBrokenRefRepairPass,
|
|
474
570
|
runBrokenRefRepairs
|
|
475
571
|
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
const fallbackMarkupByType = (type) => {
|
|
2
|
+
if (type === 'strong_open' || type === 'strong_close') return '**'
|
|
3
|
+
if (type === 'em_open' || type === 'em_close') return '*'
|
|
4
|
+
return ''
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
const makeTokenLiteralText = (token) => {
|
|
8
|
+
if (!token) return
|
|
9
|
+
const literal = token.markup || fallbackMarkupByType(token.type)
|
|
10
|
+
token.type = 'text'
|
|
11
|
+
token.tag = ''
|
|
12
|
+
token.nesting = 0
|
|
13
|
+
token.content = literal
|
|
14
|
+
token.markup = ''
|
|
15
|
+
token.info = ''
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
const sanitizeEmStrongBalance = (tokens, onChangeStart = null) => {
|
|
19
|
+
if (!tokens || tokens.length === 0) return false
|
|
20
|
+
const stack = []
|
|
21
|
+
let changed = false
|
|
22
|
+
for (let i = 0; i < tokens.length; i++) {
|
|
23
|
+
const token = tokens[i]
|
|
24
|
+
if (!token || !token.type) continue
|
|
25
|
+
if (token.type === 'strong_open' || token.type === 'em_open') {
|
|
26
|
+
stack.push({ type: token.type, idx: i })
|
|
27
|
+
continue
|
|
28
|
+
}
|
|
29
|
+
if (token.type !== 'strong_close' && token.type !== 'em_close') continue
|
|
30
|
+
const expected = token.type === 'strong_close' ? 'strong_open' : 'em_open'
|
|
31
|
+
if (stack.length > 0 && stack[stack.length - 1].type === expected) {
|
|
32
|
+
stack.pop()
|
|
33
|
+
continue
|
|
34
|
+
}
|
|
35
|
+
if (onChangeStart) onChangeStart(i)
|
|
36
|
+
makeTokenLiteralText(token)
|
|
37
|
+
changed = true
|
|
38
|
+
}
|
|
39
|
+
for (let i = stack.length - 1; i >= 0; i--) {
|
|
40
|
+
const entry = stack[i]
|
|
41
|
+
const token = tokens[entry.idx]
|
|
42
|
+
if (!token) continue
|
|
43
|
+
if (onChangeStart) onChangeStart(entry.idx)
|
|
44
|
+
makeTokenLiteralText(token)
|
|
45
|
+
changed = true
|
|
46
|
+
}
|
|
47
|
+
return changed
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export { sanitizeEmStrongBalance }
|
|
@@ -1,9 +1,5 @@
|
|
|
1
1
|
import Token from 'markdown-it/lib/token.mjs'
|
|
2
|
-
|
|
3
|
-
const cloneMap = (map) => {
|
|
4
|
-
if (!map || !Array.isArray(map)) return null
|
|
5
|
-
return [map[0], map[1]]
|
|
6
|
-
}
|
|
2
|
+
import { cloneMap } from '../token-utils.js'
|
|
7
3
|
|
|
8
4
|
const cloneTextLike = (source, content) => {
|
|
9
5
|
const token = new Token('text', '', 0)
|
|
@@ -1,17 +1,16 @@
|
|
|
1
1
|
import { isJapaneseChar } from '../token-utils.js'
|
|
2
2
|
|
|
3
|
+
const CHAR_ASTERISK = 0x2A // *
|
|
4
|
+
const INLINE_REPAIR_EM_OUTER_STRONG_SEQUENCE = 1 << 0
|
|
5
|
+
const INLINE_REPAIR_TAIL_AFTER_LINK = 1 << 1
|
|
6
|
+
const INLINE_REPAIR_LEADING_ASTERISK_EM = 1 << 2
|
|
7
|
+
const INLINE_REPAIR_TRAILING_STRONG = 1 << 3
|
|
8
|
+
const INLINE_REPAIR_BALANCE_SANITIZE = 1 << 4
|
|
9
|
+
|
|
3
10
|
const hasMarkerChars = (text) => {
|
|
4
11
|
return !!text && text.indexOf('*') !== -1
|
|
5
12
|
}
|
|
6
13
|
|
|
7
|
-
const contentHasMarkerCharsFrom = (content, from) => {
|
|
8
|
-
if (!content) return false
|
|
9
|
-
const start = from > 0 ? from : 0
|
|
10
|
-
if (start === 0) return hasMarkerChars(content)
|
|
11
|
-
if (start >= content.length) return false
|
|
12
|
-
return content.indexOf('*', start) !== -1
|
|
13
|
-
}
|
|
14
|
-
|
|
15
14
|
const isAsteriskEmphasisToken = (token) => {
|
|
16
15
|
if (!token || !token.type) return false
|
|
17
16
|
if (token.type !== 'strong_open' &&
|
|
@@ -78,20 +77,6 @@ const hasEmphasisSignalInRange = (tokens, startIdx, endIdx) => {
|
|
|
78
77
|
return false
|
|
79
78
|
}
|
|
80
79
|
|
|
81
|
-
const hasTextMarkerCharsInRange = (tokens, startIdx, endIdx, firstTextOffset = 0) => {
|
|
82
|
-
if (!tokens || startIdx < 0 || endIdx < startIdx) return false
|
|
83
|
-
for (let i = startIdx; i <= endIdx && i < tokens.length; i++) {
|
|
84
|
-
const token = tokens[i]
|
|
85
|
-
if (!token || token.type !== 'text' || !token.content) continue
|
|
86
|
-
if (i === startIdx && firstTextOffset > 0) {
|
|
87
|
-
if (contentHasMarkerCharsFrom(token.content, firstTextOffset)) return true
|
|
88
|
-
continue
|
|
89
|
-
}
|
|
90
|
-
if (textTokenHasMarkerChars(token)) return true
|
|
91
|
-
}
|
|
92
|
-
return false
|
|
93
|
-
}
|
|
94
|
-
|
|
95
80
|
const isStrongRunSoftSpace = (code) => {
|
|
96
81
|
return code === 0x20 || code === 0x09 || code === 0x0A || code === 0x3000
|
|
97
82
|
}
|
|
@@ -107,21 +92,24 @@ const isStrongRunTextLike = (code) => {
|
|
|
107
92
|
return isStrongRunAsciiWord(code) || isJapaneseChar(code)
|
|
108
93
|
}
|
|
109
94
|
|
|
110
|
-
const countDelimiterLikeStrongRuns = (content,
|
|
95
|
+
const countDelimiterLikeStrongRuns = (content, from = 0, limit = 0) => {
|
|
111
96
|
let at = from > 0 ? from : 0
|
|
112
97
|
const len = content.length
|
|
113
|
-
const markerCode = marker.charCodeAt(0)
|
|
114
98
|
let count = 0
|
|
115
|
-
while (at < len) {
|
|
116
|
-
|
|
117
|
-
|
|
99
|
+
while (at + 1 < len) {
|
|
100
|
+
if (content.charCodeAt(at) !== CHAR_ASTERISK ||
|
|
101
|
+
content.charCodeAt(at + 1) !== CHAR_ASTERISK) {
|
|
102
|
+
at++
|
|
103
|
+
continue
|
|
104
|
+
}
|
|
105
|
+
const pos = at
|
|
118
106
|
const prevCode = pos > 0 ? content.charCodeAt(pos - 1) : 0
|
|
119
|
-
const nextPos = pos +
|
|
107
|
+
const nextPos = pos + 2
|
|
120
108
|
const nextCode = nextPos < len ? content.charCodeAt(nextPos) : 0
|
|
121
|
-
const prevSameMarker = prevCode ===
|
|
122
|
-
const nextSameMarker = nextCode ===
|
|
109
|
+
const prevSameMarker = prevCode === CHAR_ASTERISK
|
|
110
|
+
const nextSameMarker = nextCode === CHAR_ASTERISK
|
|
123
111
|
if (prevSameMarker || nextSameMarker) {
|
|
124
|
-
at = pos +
|
|
112
|
+
at = pos + 2
|
|
125
113
|
continue
|
|
126
114
|
}
|
|
127
115
|
const prevSoft = prevCode !== 0 && isStrongRunSoftSpace(prevCode)
|
|
@@ -131,92 +119,50 @@ const countDelimiterLikeStrongRuns = (content, marker, from = 0, limit = 0) => {
|
|
|
131
119
|
const nextTextLike = isStrongRunTextLike(nextCode)
|
|
132
120
|
const hasTextNeighbor = prevTextLike || nextTextLike
|
|
133
121
|
if (!hasTextNeighbor) {
|
|
134
|
-
at = pos +
|
|
122
|
+
at = pos + 2
|
|
135
123
|
continue
|
|
136
124
|
}
|
|
137
125
|
const atBoundary = prevCode === 0 || nextCode === 0
|
|
138
126
|
if (!atBoundary && (!prevTextLike || !nextTextLike)) {
|
|
139
|
-
at = pos +
|
|
127
|
+
at = pos + 2
|
|
140
128
|
continue
|
|
141
129
|
}
|
|
142
130
|
if (hasPrevOrNext && !prevSoft && !nextSoft) {
|
|
143
131
|
count++
|
|
144
132
|
if (limit > 0 && count >= limit) return count
|
|
145
133
|
}
|
|
146
|
-
at = pos +
|
|
134
|
+
at = pos + 2
|
|
147
135
|
}
|
|
148
136
|
return count
|
|
149
137
|
}
|
|
150
138
|
|
|
151
|
-
const countStrongMarkerRunsInTextRange = (tokens, startIdx, endIdx, firstTextOffset = 0, limit = 0) => {
|
|
152
|
-
if (!tokens || startIdx < 0 || endIdx < startIdx) return 0
|
|
153
|
-
let total = 0
|
|
154
|
-
for (let i = startIdx; i <= endIdx && i < tokens.length; i++) {
|
|
155
|
-
const token = tokens[i]
|
|
156
|
-
if (!token || token.type !== 'text' || !token.content) continue
|
|
157
|
-
const content = token.content
|
|
158
|
-
const scanFrom = i === startIdx && firstTextOffset > 0 ? firstTextOffset : 0
|
|
159
|
-
if (scanFrom >= content.length) continue
|
|
160
|
-
const remain = limit > 0 ? (limit - total) : 0
|
|
161
|
-
total += countDelimiterLikeStrongRuns(content, '**', scanFrom, remain)
|
|
162
|
-
if (limit > 0 && total >= limit) {
|
|
163
|
-
return total
|
|
164
|
-
}
|
|
165
|
-
}
|
|
166
|
-
return total
|
|
167
|
-
}
|
|
168
|
-
|
|
169
139
|
const buildAsteriskWrapperPrefixStats = (tokens) => {
|
|
170
140
|
const len = Array.isArray(tokens) ? tokens.length : 0
|
|
171
141
|
const strongDepthPrefix = new Array(len + 1)
|
|
172
142
|
const emDepthPrefix = new Array(len + 1)
|
|
173
|
-
const strongOpenPrefix = new Array(len + 1)
|
|
174
|
-
const strongClosePrefix = new Array(len + 1)
|
|
175
|
-
const emOpenPrefix = new Array(len + 1)
|
|
176
|
-
const emClosePrefix = new Array(len + 1)
|
|
177
143
|
let strongDepth = 0
|
|
178
144
|
let emDepthCount = 0
|
|
179
|
-
let strongOpenCount = 0
|
|
180
|
-
let strongCloseCount = 0
|
|
181
|
-
let emOpenCount = 0
|
|
182
|
-
let emCloseCount = 0
|
|
183
145
|
strongDepthPrefix[0] = 0
|
|
184
146
|
emDepthPrefix[0] = 0
|
|
185
|
-
strongOpenPrefix[0] = 0
|
|
186
|
-
strongClosePrefix[0] = 0
|
|
187
|
-
emOpenPrefix[0] = 0
|
|
188
|
-
emClosePrefix[0] = 0
|
|
189
147
|
for (let i = 0; i < len; i++) {
|
|
190
148
|
const token = tokens[i]
|
|
191
149
|
if (token && token.type && isAsteriskEmphasisToken(token)) {
|
|
192
150
|
if (token.type === 'strong_open') {
|
|
193
151
|
strongDepth++
|
|
194
|
-
strongOpenCount++
|
|
195
152
|
} else if (token.type === 'strong_close') {
|
|
196
153
|
if (strongDepth > 0) strongDepth--
|
|
197
|
-
strongCloseCount++
|
|
198
154
|
} else if (token.type === 'em_open') {
|
|
199
155
|
emDepthCount++
|
|
200
|
-
emOpenCount++
|
|
201
156
|
} else if (token.type === 'em_close') {
|
|
202
157
|
if (emDepthCount > 0) emDepthCount--
|
|
203
|
-
emCloseCount++
|
|
204
158
|
}
|
|
205
159
|
}
|
|
206
160
|
strongDepthPrefix[i + 1] = strongDepth
|
|
207
161
|
emDepthPrefix[i + 1] = emDepthCount
|
|
208
|
-
strongOpenPrefix[i + 1] = strongOpenCount
|
|
209
|
-
strongClosePrefix[i + 1] = strongCloseCount
|
|
210
|
-
emOpenPrefix[i + 1] = emOpenCount
|
|
211
|
-
emClosePrefix[i + 1] = emCloseCount
|
|
212
162
|
}
|
|
213
163
|
return {
|
|
214
164
|
strongDepth: strongDepthPrefix,
|
|
215
|
-
emDepth: emDepthPrefix
|
|
216
|
-
strongOpen: strongOpenPrefix,
|
|
217
|
-
strongClose: strongClosePrefix,
|
|
218
|
-
emOpen: emOpenPrefix,
|
|
219
|
-
emClose: emClosePrefix
|
|
165
|
+
emDepth: emDepthPrefix
|
|
220
166
|
}
|
|
221
167
|
}
|
|
222
168
|
|
|
@@ -229,6 +175,8 @@ const createBrokenRefWrapperRangeSignals = () => {
|
|
|
229
175
|
hasUnderscoreText: false,
|
|
230
176
|
hasCodeInline: false,
|
|
231
177
|
hasUnderscoreEmphasisToken: false,
|
|
178
|
+
hasTextMarker: false,
|
|
179
|
+
strongRunCount: 0,
|
|
232
180
|
strongOpenInRange: 0,
|
|
233
181
|
strongCloseInRange: 0,
|
|
234
182
|
emOpenInRange: 0,
|
|
@@ -239,17 +187,25 @@ const createBrokenRefWrapperRangeSignals = () => {
|
|
|
239
187
|
const updateBrokenRefTextRangeSignals = (signals, token, tokenIdx, startIdx, firstTextOffset) => {
|
|
240
188
|
if (!token || token.type !== 'text' || !token.content) return
|
|
241
189
|
const content = token.content
|
|
190
|
+
const scanFrom = tokenIdx === startIdx && firstTextOffset > 0 ? firstTextOffset : 0
|
|
242
191
|
// Keep this at 0 (instead of firstTextOffset) so historical fail-safe
|
|
243
192
|
// behavior around noisy leading chains in the first text token stays unchanged.
|
|
244
193
|
if (!signals.hasLongStarNoise && content.indexOf('***') !== -1) {
|
|
245
194
|
signals.hasLongStarNoise = true
|
|
246
195
|
}
|
|
247
196
|
if (!signals.hasUnderscoreText) {
|
|
248
|
-
const scanFrom = tokenIdx === startIdx && firstTextOffset > 0 ? firstTextOffset : 0
|
|
249
197
|
if (scanFrom < content.length && content.indexOf('_', scanFrom) !== -1) {
|
|
250
198
|
signals.hasUnderscoreText = true
|
|
251
199
|
}
|
|
252
200
|
}
|
|
201
|
+
if (!signals.hasTextMarker) {
|
|
202
|
+
signals.hasTextMarker = scanFrom === 0
|
|
203
|
+
? textTokenHasMarkerChars(token)
|
|
204
|
+
: content.indexOf('*', scanFrom) !== -1
|
|
205
|
+
}
|
|
206
|
+
if (signals.strongRunCount < 2 && scanFrom < content.length) {
|
|
207
|
+
signals.strongRunCount += countDelimiterLikeStrongRuns(content, scanFrom, 2 - signals.strongRunCount)
|
|
208
|
+
}
|
|
253
209
|
}
|
|
254
210
|
|
|
255
211
|
const updateBrokenRefWrapperTokenSignals = (signals, token, isAsteriskEmphasis) => {
|
|
@@ -343,18 +299,10 @@ const hasPreexistingWrapperCloseOnlyInRange = (tokens, startIdx, endIdx, prefixS
|
|
|
343
299
|
const hasPrefix =
|
|
344
300
|
!!prefixStats &&
|
|
345
301
|
Array.isArray(prefixStats.strongDepth) &&
|
|
346
|
-
Array.isArray(prefixStats.emDepth)
|
|
347
|
-
Array.isArray(prefixStats.strongOpen) &&
|
|
348
|
-
Array.isArray(prefixStats.strongClose) &&
|
|
349
|
-
Array.isArray(prefixStats.emOpen) &&
|
|
350
|
-
Array.isArray(prefixStats.emClose)
|
|
302
|
+
Array.isArray(prefixStats.emDepth)
|
|
351
303
|
if (hasPrefix &&
|
|
352
304
|
startIdx < prefixStats.strongDepth.length &&
|
|
353
|
-
startIdx < prefixStats.emDepth.length
|
|
354
|
-
(endIdx + 1) < prefixStats.strongOpen.length &&
|
|
355
|
-
(endIdx + 1) < prefixStats.strongClose.length &&
|
|
356
|
-
(endIdx + 1) < prefixStats.emOpen.length &&
|
|
357
|
-
(endIdx + 1) < prefixStats.emClose.length) {
|
|
305
|
+
startIdx < prefixStats.emDepth.length) {
|
|
358
306
|
if (needsStrongCloseOnly) {
|
|
359
307
|
preStrongDepth = prefixStats.strongDepth[startIdx] || 0
|
|
360
308
|
if (preStrongDepth > 0) return true
|
|
@@ -434,8 +382,8 @@ const isLowConfidenceBrokenRefRange = (tokens, startIdx, endIdx, firstTextOffset
|
|
|
434
382
|
return hasBrokenRefLowConfidenceWrapperRisk(tokens, startIdx, endIdx, wrapperPrefixStats, signals)
|
|
435
383
|
}
|
|
436
384
|
|
|
437
|
-
const hasBrokenRefStrongRunEvidence = (
|
|
438
|
-
return
|
|
385
|
+
const hasBrokenRefStrongRunEvidence = (wrapperSignals) => {
|
|
386
|
+
return !!wrapperSignals && wrapperSignals.strongRunCount >= 2
|
|
439
387
|
}
|
|
440
388
|
|
|
441
389
|
const hasBrokenRefExplicitAsteriskSignal = (wrapperSignals) => {
|
|
@@ -446,31 +394,67 @@ const hasBrokenRefImmediateRewriteSignal = (wrapperSignals) => {
|
|
|
446
394
|
return wrapperSignals.hasImbalance && hasBrokenRefExplicitAsteriskSignal(wrapperSignals)
|
|
447
395
|
}
|
|
448
396
|
|
|
449
|
-
const
|
|
450
|
-
return !wrapperSignals.hasImbalance && hasBrokenRefExplicitAsteriskSignal(wrapperSignals)
|
|
451
|
-
}
|
|
452
|
-
|
|
453
|
-
const shouldAttemptBrokenRefRewriteFromSignals = (tokens, startIdx, endIdx, firstTextOffset, wrapperSignals) => {
|
|
397
|
+
const shouldAttemptBrokenRefRewriteFromSignals = (wrapperSignals) => {
|
|
454
398
|
if (hasBrokenRefImmediateRewriteSignal(wrapperSignals)) return true
|
|
455
|
-
if (
|
|
456
|
-
return hasBrokenRefStrongRunEvidence(
|
|
399
|
+
if (!wrapperSignals.hasImbalance && hasBrokenRefExplicitAsteriskSignal(wrapperSignals)) return false
|
|
400
|
+
return hasBrokenRefStrongRunEvidence(wrapperSignals)
|
|
457
401
|
}
|
|
458
402
|
|
|
459
|
-
const shouldAttemptBrokenRefRewrite = (
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
403
|
+
const shouldAttemptBrokenRefRewrite = (
|
|
404
|
+
tokens,
|
|
405
|
+
startIdx,
|
|
406
|
+
endIdx,
|
|
407
|
+
firstTextOffset = 0,
|
|
408
|
+
wrapperPrefixStats = null,
|
|
409
|
+
wrapperSignals = null
|
|
410
|
+
) => {
|
|
411
|
+
const signals = wrapperSignals || buildBrokenRefWrapperRangeSignals(tokens, startIdx, endIdx, firstTextOffset)
|
|
412
|
+
if (!signals.hasTextMarker) return false
|
|
413
|
+
if (isLowConfidenceBrokenRefRange(tokens, startIdx, endIdx, firstTextOffset, wrapperPrefixStats, signals)) return false
|
|
414
|
+
return shouldAttemptBrokenRefRewriteFromSignals(signals)
|
|
463
415
|
}
|
|
464
416
|
|
|
465
|
-
const scanInlinePostprocessSignals = (children) => {
|
|
417
|
+
const scanInlinePostprocessSignals = (children, collectJapaneseContext = false) => {
|
|
466
418
|
let hasEmphasis = false
|
|
467
419
|
let hasLinkOpen = false
|
|
468
420
|
let hasLinkClose = false
|
|
421
|
+
let hasCodeInline = false
|
|
422
|
+
let hasJapaneseContext = false
|
|
423
|
+
let hasTextStrongMarker = false
|
|
424
|
+
let strongOpenCount = 0
|
|
425
|
+
let strongCloseCount = 0
|
|
426
|
+
let emOpenCount = 0
|
|
427
|
+
let emCloseCount = 0
|
|
428
|
+
let hasAsteriskWrapperImbalance = false
|
|
429
|
+
const emphasisStack = []
|
|
469
430
|
for (let j = 0; j < children.length; j++) {
|
|
470
431
|
const child = children[j]
|
|
471
432
|
if (!child) continue
|
|
472
|
-
if (!
|
|
433
|
+
if (collectJapaneseContext && !hasJapaneseContext && tokenHasJapaneseChars(child)) {
|
|
434
|
+
hasJapaneseContext = true
|
|
435
|
+
}
|
|
436
|
+
if (!hasTextStrongMarker && child.type === 'text' && child.content && child.content.indexOf('**') !== -1) {
|
|
437
|
+
hasTextStrongMarker = true
|
|
438
|
+
}
|
|
439
|
+
const isAsteriskEmphasis = isAsteriskEmphasisToken(child)
|
|
440
|
+
if (isAsteriskEmphasis) {
|
|
473
441
|
hasEmphasis = true
|
|
442
|
+
if (child.type === 'strong_open') strongOpenCount++
|
|
443
|
+
else if (child.type === 'strong_close') strongCloseCount++
|
|
444
|
+
else if (child.type === 'em_open') emOpenCount++
|
|
445
|
+
else if (child.type === 'em_close') emCloseCount++
|
|
446
|
+
if (!hasAsteriskWrapperImbalance) {
|
|
447
|
+
if (child.type === 'strong_open' || child.type === 'em_open') {
|
|
448
|
+
emphasisStack.push(child.type)
|
|
449
|
+
} else {
|
|
450
|
+
const expected = child.type === 'strong_close' ? 'strong_open' : 'em_open'
|
|
451
|
+
if (emphasisStack.length > 0 && emphasisStack[emphasisStack.length - 1] === expected) {
|
|
452
|
+
emphasisStack.pop()
|
|
453
|
+
} else {
|
|
454
|
+
hasAsteriskWrapperImbalance = true
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
}
|
|
474
458
|
}
|
|
475
459
|
if (!hasLinkOpen && child.type === 'link_open') {
|
|
476
460
|
hasLinkOpen = true
|
|
@@ -478,12 +462,37 @@ const scanInlinePostprocessSignals = (children) => {
|
|
|
478
462
|
if (!hasLinkClose && child.type === 'link_close') {
|
|
479
463
|
hasLinkClose = true
|
|
480
464
|
}
|
|
481
|
-
if (
|
|
465
|
+
if (!hasCodeInline && child.type === 'code_inline') {
|
|
466
|
+
hasCodeInline = true
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
if (!hasAsteriskWrapperImbalance && emphasisStack.length > 0) {
|
|
470
|
+
hasAsteriskWrapperImbalance = true
|
|
471
|
+
}
|
|
472
|
+
let repairMask = 0
|
|
473
|
+
if (emOpenCount >= 2 && emCloseCount >= 2 && strongOpenCount > 0) {
|
|
474
|
+
repairMask |= INLINE_REPAIR_EM_OUTER_STRONG_SEQUENCE
|
|
475
|
+
}
|
|
476
|
+
if (hasLinkClose && strongCloseCount > 0) {
|
|
477
|
+
repairMask |= INLINE_REPAIR_TAIL_AFTER_LINK
|
|
478
|
+
}
|
|
479
|
+
if (hasLinkClose && emCloseCount > 0) {
|
|
480
|
+
repairMask |= INLINE_REPAIR_LEADING_ASTERISK_EM
|
|
481
|
+
}
|
|
482
|
+
if (emOpenCount > 0 && emCloseCount > 0 && hasTextStrongMarker) {
|
|
483
|
+
repairMask |= INLINE_REPAIR_TRAILING_STRONG
|
|
484
|
+
}
|
|
485
|
+
if (hasAsteriskWrapperImbalance) {
|
|
486
|
+
repairMask |= INLINE_REPAIR_BALANCE_SANITIZE
|
|
482
487
|
}
|
|
483
488
|
return {
|
|
484
489
|
hasEmphasis,
|
|
485
490
|
hasLinkOpen,
|
|
486
|
-
hasLinkClose
|
|
491
|
+
hasLinkClose,
|
|
492
|
+
hasCodeInline,
|
|
493
|
+
hasJapaneseContext,
|
|
494
|
+
repairMask,
|
|
495
|
+
hasAsteriskWrapperImbalance
|
|
487
496
|
}
|
|
488
497
|
}
|
|
489
498
|
|
|
@@ -492,8 +501,13 @@ export {
|
|
|
492
501
|
isAsteriskEmphasisToken,
|
|
493
502
|
hasJapaneseContextInRange,
|
|
494
503
|
hasEmphasisSignalInRange,
|
|
495
|
-
hasTextMarkerCharsInRange,
|
|
496
504
|
buildAsteriskWrapperPrefixStats,
|
|
505
|
+
buildBrokenRefWrapperRangeSignals,
|
|
497
506
|
shouldAttemptBrokenRefRewrite,
|
|
498
|
-
scanInlinePostprocessSignals
|
|
507
|
+
scanInlinePostprocessSignals,
|
|
508
|
+
INLINE_REPAIR_EM_OUTER_STRONG_SEQUENCE,
|
|
509
|
+
INLINE_REPAIR_TAIL_AFTER_LINK,
|
|
510
|
+
INLINE_REPAIR_LEADING_ASTERISK_EM,
|
|
511
|
+
INLINE_REPAIR_TRAILING_STRONG,
|
|
512
|
+
INLINE_REPAIR_BALANCE_SANITIZE
|
|
499
513
|
}
|