@peaceroad/markdown-it-strong-ja 0.8.0 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -2
- package/index.js +36 -19
- package/package.json +8 -6
- package/src/token-compat.js +2 -8
- package/src/token-core.js +71 -57
- package/src/token-link-utils.js +381 -190
- package/src/token-postprocess/broken-ref.js +475 -0
- package/src/token-postprocess/guards.js +176 -113
- package/src/token-postprocess/orchestrator.js +311 -372
- package/src/token-utils.js +42 -13
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { isJapaneseChar
|
|
1
|
+
import { isJapaneseChar } from '../token-utils.js'
|
|
2
2
|
|
|
3
3
|
const hasMarkerChars = (text) => {
|
|
4
4
|
return !!text && text.indexOf('*') !== -1
|
|
@@ -220,8 +220,8 @@ const buildAsteriskWrapperPrefixStats = (tokens) => {
|
|
|
220
220
|
}
|
|
221
221
|
}
|
|
222
222
|
|
|
223
|
-
const
|
|
224
|
-
|
|
223
|
+
const createBrokenRefWrapperRangeSignals = () => {
|
|
224
|
+
return {
|
|
225
225
|
hasLeadingUnmatchedClose: false,
|
|
226
226
|
hasImbalance: false,
|
|
227
227
|
hasAsteriskEmphasisToken: false,
|
|
@@ -234,83 +234,109 @@ const buildBrokenRefWrapperRangeSignals = (tokens, startIdx, endIdx, firstTextOf
|
|
|
234
234
|
emOpenInRange: 0,
|
|
235
235
|
emCloseInRange: 0
|
|
236
236
|
}
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
const updateBrokenRefTextRangeSignals = (signals, token, tokenIdx, startIdx, firstTextOffset) => {
|
|
240
|
+
if (!token || token.type !== 'text' || !token.content) return
|
|
241
|
+
const content = token.content
|
|
242
|
+
// Keep this at 0 (instead of firstTextOffset) so historical fail-safe
|
|
243
|
+
// behavior around noisy leading chains in the first text token stays unchanged.
|
|
244
|
+
if (!signals.hasLongStarNoise && content.indexOf('***') !== -1) {
|
|
245
|
+
signals.hasLongStarNoise = true
|
|
246
|
+
}
|
|
247
|
+
if (!signals.hasUnderscoreText) {
|
|
248
|
+
const scanFrom = tokenIdx === startIdx && firstTextOffset > 0 ? firstTextOffset : 0
|
|
249
|
+
if (scanFrom < content.length && content.indexOf('_', scanFrom) !== -1) {
|
|
250
|
+
signals.hasUnderscoreText = true
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
const updateBrokenRefWrapperTokenSignals = (signals, token, isAsteriskEmphasis) => {
|
|
256
|
+
if (!signals.hasCodeInline && token.type === 'code_inline') {
|
|
257
|
+
signals.hasCodeInline = true
|
|
258
|
+
}
|
|
259
|
+
if (isAsteriskEmphasis) {
|
|
260
|
+
signals.hasAsteriskEmphasisToken = true
|
|
261
|
+
}
|
|
262
|
+
if (!signals.hasUnderscoreEmphasisToken &&
|
|
263
|
+
(token.type === 'strong_open' ||
|
|
264
|
+
token.type === 'strong_close' ||
|
|
265
|
+
token.type === 'em_open' ||
|
|
266
|
+
token.type === 'em_close') &&
|
|
267
|
+
(token.markup === '_' || token.markup === '__')) {
|
|
268
|
+
signals.hasUnderscoreEmphasisToken = true
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
const updateBrokenRefWrapperRangeDepthSignals = (signals, token, wrapperState, isAsteriskEmphasis) => {
|
|
273
|
+
if (!isAsteriskEmphasis) return
|
|
274
|
+
let depthKey = ''
|
|
275
|
+
if (token.type === 'strong_open' || token.type === 'strong_close') {
|
|
276
|
+
depthKey = 'strongDepth'
|
|
277
|
+
} else if (token.type === 'em_open' || token.type === 'em_close') {
|
|
278
|
+
depthKey = 'emDepth'
|
|
279
|
+
} else {
|
|
280
|
+
return
|
|
281
|
+
}
|
|
282
|
+
const isOpen = token.type.endsWith('_open')
|
|
283
|
+
if (!wrapperState.sawWrapper) {
|
|
284
|
+
wrapperState.sawWrapper = true
|
|
285
|
+
if (!isOpen) signals.hasLeadingUnmatchedClose = true
|
|
286
|
+
}
|
|
287
|
+
if (isOpen) {
|
|
288
|
+
wrapperState.sawOpen = true
|
|
289
|
+
signals.hasLeadingUnmatchedClose = false
|
|
290
|
+
wrapperState[depthKey]++
|
|
291
|
+
} else if (wrapperState[depthKey] <= 0) {
|
|
292
|
+
signals.hasImbalance = true
|
|
293
|
+
} else {
|
|
294
|
+
wrapperState[depthKey]--
|
|
295
|
+
}
|
|
296
|
+
if (token.type === 'strong_open') signals.strongOpenInRange++
|
|
297
|
+
else if (token.type === 'strong_close') signals.strongCloseInRange++
|
|
298
|
+
else if (token.type === 'em_open') signals.emOpenInRange++
|
|
299
|
+
else if (token.type === 'em_close') signals.emCloseInRange++
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
const finalizeBrokenRefWrapperRangeSignals = (signals, wrapperState) => {
|
|
303
|
+
if (!wrapperState.sawWrapper || wrapperState.sawOpen) {
|
|
304
|
+
signals.hasLeadingUnmatchedClose = false
|
|
305
|
+
}
|
|
306
|
+
if (!signals.hasImbalance &&
|
|
307
|
+
(wrapperState.strongDepth !== 0 || wrapperState.emDepth !== 0)) {
|
|
308
|
+
signals.hasImbalance = true
|
|
309
|
+
}
|
|
310
|
+
return signals
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
const buildBrokenRefWrapperRangeSignals = (tokens, startIdx, endIdx, firstTextOffset = 0) => {
|
|
314
|
+
const signals = createBrokenRefWrapperRangeSignals()
|
|
315
|
+
if (!tokens || startIdx < 0 || endIdx < startIdx) return signals
|
|
316
|
+
const wrapperState = { sawWrapper: false, sawOpen: false, strongDepth: 0, emDepth: 0 }
|
|
241
317
|
for (let i = startIdx; i <= endIdx && i < tokens.length; i++) {
|
|
242
318
|
const token = tokens[i]
|
|
243
319
|
if (!token || !token.type) continue
|
|
244
|
-
if (!out.hasCodeInline && token.type === 'code_inline') {
|
|
245
|
-
out.hasCodeInline = true
|
|
246
|
-
}
|
|
247
320
|
const isAsteriskEmphasis = isAsteriskEmphasisToken(token)
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
token.type === 'strong_close' ||
|
|
252
|
-
token.type === 'em_open' ||
|
|
253
|
-
token.type === 'em_close') &&
|
|
254
|
-
(token.markup === '_' || token.markup === '__')) {
|
|
255
|
-
out.hasUnderscoreEmphasisToken = true
|
|
256
|
-
}
|
|
257
|
-
if (token.type === 'text' && token.content) {
|
|
258
|
-
const content = token.content
|
|
259
|
-
// Keep this at 0 (instead of firstTextOffset) so historical fail-safe
|
|
260
|
-
// behavior around noisy leading chains in the first text token stays unchanged.
|
|
261
|
-
if (!out.hasLongStarNoise && content.indexOf('***') !== -1) {
|
|
262
|
-
out.hasLongStarNoise = true
|
|
263
|
-
}
|
|
264
|
-
if (!out.hasUnderscoreText) {
|
|
265
|
-
const scanFrom = i === startIdx && firstTextOffset > 0 ? firstTextOffset : 0
|
|
266
|
-
if (scanFrom < content.length && content.indexOf('_', scanFrom) !== -1) {
|
|
267
|
-
out.hasUnderscoreText = true
|
|
268
|
-
}
|
|
269
|
-
}
|
|
270
|
-
}
|
|
271
|
-
if ((token.type === 'strong_open' || token.type === 'strong_close' || token.type === 'em_open' || token.type === 'em_close') &&
|
|
272
|
-
!isAsteriskEmphasis) {
|
|
273
|
-
continue
|
|
274
|
-
}
|
|
275
|
-
const base = getInlineWrapperBase(token.type)
|
|
276
|
-
if (!base) continue
|
|
277
|
-
const isOpen = token.type.endsWith('_open')
|
|
278
|
-
if (!sawWrapper) {
|
|
279
|
-
sawWrapper = true
|
|
280
|
-
if (!isOpen) out.hasLeadingUnmatchedClose = true
|
|
281
|
-
}
|
|
282
|
-
if (isOpen) {
|
|
283
|
-
sawOpen = true
|
|
284
|
-
out.hasLeadingUnmatchedClose = false
|
|
285
|
-
depthMap.set(base, (depthMap.get(base) || 0) + 1)
|
|
286
|
-
} else {
|
|
287
|
-
const prev = depthMap.get(base) || 0
|
|
288
|
-
if (prev <= 0) {
|
|
289
|
-
out.hasImbalance = true
|
|
290
|
-
} else {
|
|
291
|
-
depthMap.set(base, prev - 1)
|
|
292
|
-
}
|
|
293
|
-
}
|
|
294
|
-
if (token.type === 'strong_open') out.strongOpenInRange++
|
|
295
|
-
else if (token.type === 'strong_close') out.strongCloseInRange++
|
|
296
|
-
else if (token.type === 'em_open') out.emOpenInRange++
|
|
297
|
-
else if (token.type === 'em_close') out.emCloseInRange++
|
|
298
|
-
}
|
|
299
|
-
if (!sawWrapper || sawOpen) out.hasLeadingUnmatchedClose = false
|
|
300
|
-
if (!out.hasImbalance) {
|
|
301
|
-
for (const depth of depthMap.values()) {
|
|
302
|
-
if (depth !== 0) {
|
|
303
|
-
out.hasImbalance = true
|
|
304
|
-
break
|
|
305
|
-
}
|
|
306
|
-
}
|
|
321
|
+
updateBrokenRefWrapperTokenSignals(signals, token, isAsteriskEmphasis)
|
|
322
|
+
updateBrokenRefTextRangeSignals(signals, token, i, startIdx, firstTextOffset)
|
|
323
|
+
updateBrokenRefWrapperRangeDepthSignals(signals, token, wrapperState, isAsteriskEmphasis)
|
|
307
324
|
}
|
|
308
|
-
return
|
|
325
|
+
return finalizeBrokenRefWrapperRangeSignals(signals, wrapperState)
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
const hasRangeCloseOnlyWrapperSignals = (signals) => {
|
|
329
|
+
if (!signals) return false
|
|
330
|
+
return (signals.strongCloseInRange > 0 && signals.strongOpenInRange === 0) ||
|
|
331
|
+
(signals.emCloseInRange > 0 && signals.emOpenInRange === 0)
|
|
309
332
|
}
|
|
310
333
|
|
|
311
334
|
const hasPreexistingWrapperCloseOnlyInRange = (tokens, startIdx, endIdx, prefixStats = null, wrapperSignals = null) => {
|
|
312
335
|
if (!tokens || startIdx <= 0 || endIdx < startIdx) return false
|
|
313
336
|
const signals = wrapperSignals || buildBrokenRefWrapperRangeSignals(tokens, startIdx, endIdx, 0)
|
|
337
|
+
if (!hasRangeCloseOnlyWrapperSignals(signals)) return false
|
|
338
|
+
const needsStrongCloseOnly = signals.strongCloseInRange > 0 && signals.strongOpenInRange === 0
|
|
339
|
+
const needsEmCloseOnly = signals.emCloseInRange > 0 && signals.emOpenInRange === 0
|
|
314
340
|
|
|
315
341
|
let preStrongDepth = 0
|
|
316
342
|
let preEmDepth = 0
|
|
@@ -329,23 +355,20 @@ const hasPreexistingWrapperCloseOnlyInRange = (tokens, startIdx, endIdx, prefixS
|
|
|
329
355
|
(endIdx + 1) < prefixStats.strongClose.length &&
|
|
330
356
|
(endIdx + 1) < prefixStats.emOpen.length &&
|
|
331
357
|
(endIdx + 1) < prefixStats.emClose.length) {
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
const strongOpensInRange = signals.strongOpenInRange
|
|
336
|
-
const strongClosesInRange = signals.strongCloseInRange
|
|
337
|
-
if (strongClosesInRange > 0 && strongOpensInRange === 0) return true
|
|
358
|
+
if (needsStrongCloseOnly) {
|
|
359
|
+
preStrongDepth = prefixStats.strongDepth[startIdx] || 0
|
|
360
|
+
if (preStrongDepth > 0) return true
|
|
338
361
|
}
|
|
339
|
-
if (
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
if (emClosesInRange > 0 && emOpensInRange === 0) return true
|
|
362
|
+
if (needsEmCloseOnly) {
|
|
363
|
+
preEmDepth = prefixStats.emDepth[startIdx] || 0
|
|
364
|
+
if (preEmDepth > 0) return true
|
|
343
365
|
}
|
|
344
366
|
return false
|
|
345
|
-
}
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
367
|
+
}
|
|
368
|
+
for (let i = 0; i < startIdx && i < tokens.length; i++) {
|
|
369
|
+
const token = tokens[i]
|
|
370
|
+
if (!token || !token.type || !isAsteriskEmphasisToken(token)) continue
|
|
371
|
+
if (needsStrongCloseOnly) {
|
|
349
372
|
if (token.type === 'strong_open') {
|
|
350
373
|
preStrongDepth++
|
|
351
374
|
continue
|
|
@@ -354,46 +377,95 @@ const hasPreexistingWrapperCloseOnlyInRange = (tokens, startIdx, endIdx, prefixS
|
|
|
354
377
|
if (preStrongDepth > 0) preStrongDepth--
|
|
355
378
|
continue
|
|
356
379
|
}
|
|
380
|
+
}
|
|
381
|
+
if (needsEmCloseOnly) {
|
|
357
382
|
if (token.type === 'em_open') {
|
|
358
383
|
preEmDepth++
|
|
359
384
|
continue
|
|
360
385
|
}
|
|
361
|
-
if (token.type === 'em_close') {
|
|
362
|
-
|
|
386
|
+
if (token.type === 'em_close' && preEmDepth > 0) {
|
|
387
|
+
preEmDepth--
|
|
363
388
|
}
|
|
364
389
|
}
|
|
365
390
|
}
|
|
366
|
-
if (
|
|
367
|
-
if (
|
|
391
|
+
if (needsStrongCloseOnly && preStrongDepth > 0) return true
|
|
392
|
+
if (needsEmCloseOnly && preEmDepth > 0) return true
|
|
368
393
|
return false
|
|
369
394
|
}
|
|
370
395
|
|
|
396
|
+
const hasBrokenRefLowConfidenceTextNoise = (signals) => {
|
|
397
|
+
return signals.hasLongStarNoise || signals.hasUnderscoreText
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
const hasBrokenRefLowConfidenceInlineSyntax = (signals) => {
|
|
401
|
+
return signals.hasCodeInline || signals.hasUnderscoreEmphasisToken
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
const hasBrokenRefLowConfidenceNoise = (signals) => {
|
|
405
|
+
return hasBrokenRefLowConfidenceTextNoise(signals) || hasBrokenRefLowConfidenceInlineSyntax(signals)
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
const hasBrokenRefCloseOnlyWrapperRisk = (
|
|
409
|
+
tokens,
|
|
410
|
+
startIdx,
|
|
411
|
+
endIdx,
|
|
412
|
+
wrapperPrefixStats = null,
|
|
413
|
+
wrapperSignals = null
|
|
414
|
+
) => {
|
|
415
|
+
const signals = wrapperSignals || buildBrokenRefWrapperRangeSignals(tokens, startIdx, endIdx, 0)
|
|
416
|
+
return hasPreexistingWrapperCloseOnlyInRange(tokens, startIdx, endIdx, wrapperPrefixStats, signals)
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
const hasBrokenRefLowConfidenceWrapperRisk = (
|
|
420
|
+
tokens,
|
|
421
|
+
startIdx,
|
|
422
|
+
endIdx,
|
|
423
|
+
wrapperPrefixStats = null,
|
|
424
|
+
wrapperSignals = null
|
|
425
|
+
) => {
|
|
426
|
+
const signals = wrapperSignals || buildBrokenRefWrapperRangeSignals(tokens, startIdx, endIdx, 0)
|
|
427
|
+
if (signals.hasLeadingUnmatchedClose) return true
|
|
428
|
+
return hasBrokenRefCloseOnlyWrapperRisk(tokens, startIdx, endIdx, wrapperPrefixStats, signals)
|
|
429
|
+
}
|
|
430
|
+
|
|
371
431
|
const isLowConfidenceBrokenRefRange = (tokens, startIdx, endIdx, firstTextOffset = 0, wrapperPrefixStats = null, wrapperSignals = null) => {
|
|
372
432
|
const signals = wrapperSignals || buildBrokenRefWrapperRangeSignals(tokens, startIdx, endIdx, firstTextOffset)
|
|
373
|
-
if (signals
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
433
|
+
if (hasBrokenRefLowConfidenceNoise(signals)) return true
|
|
434
|
+
return hasBrokenRefLowConfidenceWrapperRisk(tokens, startIdx, endIdx, wrapperPrefixStats, signals)
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
const hasBrokenRefStrongRunEvidence = (tokens, startIdx, endIdx, firstTextOffset = 0) => {
|
|
438
|
+
return countStrongMarkerRunsInTextRange(tokens, startIdx, endIdx, firstTextOffset, 2) >= 2
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
const hasBrokenRefExplicitAsteriskSignal = (wrapperSignals) => {
|
|
442
|
+
return wrapperSignals.hasAsteriskEmphasisToken
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
const hasBrokenRefImmediateRewriteSignal = (wrapperSignals) => {
|
|
446
|
+
return wrapperSignals.hasImbalance && hasBrokenRefExplicitAsteriskSignal(wrapperSignals)
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
const shouldRejectBalancedBrokenRefRewrite = (wrapperSignals) => {
|
|
450
|
+
return !wrapperSignals.hasImbalance && hasBrokenRefExplicitAsteriskSignal(wrapperSignals)
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
const shouldAttemptBrokenRefRewriteFromSignals = (tokens, startIdx, endIdx, firstTextOffset, wrapperSignals) => {
|
|
454
|
+
if (hasBrokenRefImmediateRewriteSignal(wrapperSignals)) return true
|
|
455
|
+
if (shouldRejectBalancedBrokenRefRewrite(wrapperSignals)) return false
|
|
456
|
+
return hasBrokenRefStrongRunEvidence(tokens, startIdx, endIdx, firstTextOffset)
|
|
378
457
|
}
|
|
379
458
|
|
|
380
459
|
const shouldAttemptBrokenRefRewrite = (tokens, startIdx, endIdx, firstTextOffset = 0, wrapperPrefixStats = null) => {
|
|
381
460
|
const wrapperSignals = buildBrokenRefWrapperRangeSignals(tokens, startIdx, endIdx, firstTextOffset)
|
|
382
461
|
if (isLowConfidenceBrokenRefRange(tokens, startIdx, endIdx, firstTextOffset, wrapperPrefixStats, wrapperSignals)) return false
|
|
383
|
-
|
|
384
|
-
if (wrapperSignals.hasAsteriskEmphasisToken) return true
|
|
385
|
-
return countStrongMarkerRunsInTextRange(tokens, startIdx, endIdx, firstTextOffset, 2) >= 2
|
|
386
|
-
}
|
|
387
|
-
if (wrapperSignals.hasAsteriskEmphasisToken) return false
|
|
388
|
-
return countStrongMarkerRunsInTextRange(tokens, startIdx, endIdx, firstTextOffset, 2) >= 2
|
|
462
|
+
return shouldAttemptBrokenRefRewriteFromSignals(tokens, startIdx, endIdx, firstTextOffset, wrapperSignals)
|
|
389
463
|
}
|
|
390
464
|
|
|
391
|
-
const scanInlinePostprocessSignals = (children
|
|
392
|
-
let hasBracketText = hasBracketTextInContent
|
|
465
|
+
const scanInlinePostprocessSignals = (children) => {
|
|
393
466
|
let hasEmphasis = false
|
|
394
467
|
let hasLinkOpen = false
|
|
395
468
|
let hasLinkClose = false
|
|
396
|
-
let hasCodeInline = false
|
|
397
469
|
for (let j = 0; j < children.length; j++) {
|
|
398
470
|
const child = children[j]
|
|
399
471
|
if (!child) continue
|
|
@@ -406,21 +478,12 @@ const scanInlinePostprocessSignals = (children, hasBracketTextInContent = false)
|
|
|
406
478
|
if (!hasLinkClose && child.type === 'link_close') {
|
|
407
479
|
hasLinkClose = true
|
|
408
480
|
}
|
|
409
|
-
if (
|
|
410
|
-
hasCodeInline = true
|
|
411
|
-
}
|
|
412
|
-
if (hasBracketText || child.type !== 'text' || !child.content) continue
|
|
413
|
-
if (child.content.indexOf('[') !== -1 || child.content.indexOf(']') !== -1) {
|
|
414
|
-
hasBracketText = true
|
|
415
|
-
}
|
|
416
|
-
if (hasEmphasis && hasBracketText && hasLinkOpen && hasLinkClose) break
|
|
481
|
+
if (hasEmphasis && hasLinkOpen && hasLinkClose) break
|
|
417
482
|
}
|
|
418
483
|
return {
|
|
419
|
-
hasBracketText,
|
|
420
484
|
hasEmphasis,
|
|
421
485
|
hasLinkOpen,
|
|
422
|
-
hasLinkClose
|
|
423
|
-
hasCodeInline
|
|
486
|
+
hasLinkClose
|
|
424
487
|
}
|
|
425
488
|
}
|
|
426
489
|
|