@peaceroad/markdown-it-strong-ja 0.8.0 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import { isJapaneseChar, getInlineWrapperBase } from '../token-utils.js'
1
+ import { isJapaneseChar } from '../token-utils.js'
2
2
 
3
3
  const hasMarkerChars = (text) => {
4
4
  return !!text && text.indexOf('*') !== -1
@@ -220,8 +220,8 @@ const buildAsteriskWrapperPrefixStats = (tokens) => {
220
220
  }
221
221
  }
222
222
 
223
- const buildBrokenRefWrapperRangeSignals = (tokens, startIdx, endIdx, firstTextOffset = 0) => {
224
- const out = {
223
+ const createBrokenRefWrapperRangeSignals = () => {
224
+ return {
225
225
  hasLeadingUnmatchedClose: false,
226
226
  hasImbalance: false,
227
227
  hasAsteriskEmphasisToken: false,
@@ -234,83 +234,109 @@ const buildBrokenRefWrapperRangeSignals = (tokens, startIdx, endIdx, firstTextOf
234
234
  emOpenInRange: 0,
235
235
  emCloseInRange: 0
236
236
  }
237
- if (!tokens || startIdx < 0 || endIdx < startIdx) return out
238
- const depthMap = new Map()
239
- let sawWrapper = false
240
- let sawOpen = false
237
+ }
238
+
239
+ const updateBrokenRefTextRangeSignals = (signals, token, tokenIdx, startIdx, firstTextOffset) => {
240
+ if (!token || token.type !== 'text' || !token.content) return
241
+ const content = token.content
242
+ // Keep this at 0 (instead of firstTextOffset) so historical fail-safe
243
+ // behavior around noisy leading chains in the first text token stays unchanged.
244
+ if (!signals.hasLongStarNoise && content.indexOf('***') !== -1) {
245
+ signals.hasLongStarNoise = true
246
+ }
247
+ if (!signals.hasUnderscoreText) {
248
+ const scanFrom = tokenIdx === startIdx && firstTextOffset > 0 ? firstTextOffset : 0
249
+ if (scanFrom < content.length && content.indexOf('_', scanFrom) !== -1) {
250
+ signals.hasUnderscoreText = true
251
+ }
252
+ }
253
+ }
254
+
255
+ const updateBrokenRefWrapperTokenSignals = (signals, token, isAsteriskEmphasis) => {
256
+ if (!signals.hasCodeInline && token.type === 'code_inline') {
257
+ signals.hasCodeInline = true
258
+ }
259
+ if (isAsteriskEmphasis) {
260
+ signals.hasAsteriskEmphasisToken = true
261
+ }
262
+ if (!signals.hasUnderscoreEmphasisToken &&
263
+ (token.type === 'strong_open' ||
264
+ token.type === 'strong_close' ||
265
+ token.type === 'em_open' ||
266
+ token.type === 'em_close') &&
267
+ (token.markup === '_' || token.markup === '__')) {
268
+ signals.hasUnderscoreEmphasisToken = true
269
+ }
270
+ }
271
+
272
+ const updateBrokenRefWrapperRangeDepthSignals = (signals, token, wrapperState, isAsteriskEmphasis) => {
273
+ if (!isAsteriskEmphasis) return
274
+ let depthKey = ''
275
+ if (token.type === 'strong_open' || token.type === 'strong_close') {
276
+ depthKey = 'strongDepth'
277
+ } else if (token.type === 'em_open' || token.type === 'em_close') {
278
+ depthKey = 'emDepth'
279
+ } else {
280
+ return
281
+ }
282
+ const isOpen = token.type.endsWith('_open')
283
+ if (!wrapperState.sawWrapper) {
284
+ wrapperState.sawWrapper = true
285
+ if (!isOpen) signals.hasLeadingUnmatchedClose = true
286
+ }
287
+ if (isOpen) {
288
+ wrapperState.sawOpen = true
289
+ signals.hasLeadingUnmatchedClose = false
290
+ wrapperState[depthKey]++
291
+ } else if (wrapperState[depthKey] <= 0) {
292
+ signals.hasImbalance = true
293
+ } else {
294
+ wrapperState[depthKey]--
295
+ }
296
+ if (token.type === 'strong_open') signals.strongOpenInRange++
297
+ else if (token.type === 'strong_close') signals.strongCloseInRange++
298
+ else if (token.type === 'em_open') signals.emOpenInRange++
299
+ else if (token.type === 'em_close') signals.emCloseInRange++
300
+ }
301
+
302
+ const finalizeBrokenRefWrapperRangeSignals = (signals, wrapperState) => {
303
+ if (!wrapperState.sawWrapper || wrapperState.sawOpen) {
304
+ signals.hasLeadingUnmatchedClose = false
305
+ }
306
+ if (!signals.hasImbalance &&
307
+ (wrapperState.strongDepth !== 0 || wrapperState.emDepth !== 0)) {
308
+ signals.hasImbalance = true
309
+ }
310
+ return signals
311
+ }
312
+
313
+ const buildBrokenRefWrapperRangeSignals = (tokens, startIdx, endIdx, firstTextOffset = 0) => {
314
+ const signals = createBrokenRefWrapperRangeSignals()
315
+ if (!tokens || startIdx < 0 || endIdx < startIdx) return signals
316
+ const wrapperState = { sawWrapper: false, sawOpen: false, strongDepth: 0, emDepth: 0 }
241
317
  for (let i = startIdx; i <= endIdx && i < tokens.length; i++) {
242
318
  const token = tokens[i]
243
319
  if (!token || !token.type) continue
244
- if (!out.hasCodeInline && token.type === 'code_inline') {
245
- out.hasCodeInline = true
246
- }
247
320
  const isAsteriskEmphasis = isAsteriskEmphasisToken(token)
248
- if (isAsteriskEmphasis) out.hasAsteriskEmphasisToken = true
249
- if (!out.hasUnderscoreEmphasisToken &&
250
- (token.type === 'strong_open' ||
251
- token.type === 'strong_close' ||
252
- token.type === 'em_open' ||
253
- token.type === 'em_close') &&
254
- (token.markup === '_' || token.markup === '__')) {
255
- out.hasUnderscoreEmphasisToken = true
256
- }
257
- if (token.type === 'text' && token.content) {
258
- const content = token.content
259
- // Keep this at 0 (instead of firstTextOffset) so historical fail-safe
260
- // behavior around noisy leading chains in the first text token stays unchanged.
261
- if (!out.hasLongStarNoise && content.indexOf('***') !== -1) {
262
- out.hasLongStarNoise = true
263
- }
264
- if (!out.hasUnderscoreText) {
265
- const scanFrom = i === startIdx && firstTextOffset > 0 ? firstTextOffset : 0
266
- if (scanFrom < content.length && content.indexOf('_', scanFrom) !== -1) {
267
- out.hasUnderscoreText = true
268
- }
269
- }
270
- }
271
- if ((token.type === 'strong_open' || token.type === 'strong_close' || token.type === 'em_open' || token.type === 'em_close') &&
272
- !isAsteriskEmphasis) {
273
- continue
274
- }
275
- const base = getInlineWrapperBase(token.type)
276
- if (!base) continue
277
- const isOpen = token.type.endsWith('_open')
278
- if (!sawWrapper) {
279
- sawWrapper = true
280
- if (!isOpen) out.hasLeadingUnmatchedClose = true
281
- }
282
- if (isOpen) {
283
- sawOpen = true
284
- out.hasLeadingUnmatchedClose = false
285
- depthMap.set(base, (depthMap.get(base) || 0) + 1)
286
- } else {
287
- const prev = depthMap.get(base) || 0
288
- if (prev <= 0) {
289
- out.hasImbalance = true
290
- } else {
291
- depthMap.set(base, prev - 1)
292
- }
293
- }
294
- if (token.type === 'strong_open') out.strongOpenInRange++
295
- else if (token.type === 'strong_close') out.strongCloseInRange++
296
- else if (token.type === 'em_open') out.emOpenInRange++
297
- else if (token.type === 'em_close') out.emCloseInRange++
298
- }
299
- if (!sawWrapper || sawOpen) out.hasLeadingUnmatchedClose = false
300
- if (!out.hasImbalance) {
301
- for (const depth of depthMap.values()) {
302
- if (depth !== 0) {
303
- out.hasImbalance = true
304
- break
305
- }
306
- }
321
+ updateBrokenRefWrapperTokenSignals(signals, token, isAsteriskEmphasis)
322
+ updateBrokenRefTextRangeSignals(signals, token, i, startIdx, firstTextOffset)
323
+ updateBrokenRefWrapperRangeDepthSignals(signals, token, wrapperState, isAsteriskEmphasis)
307
324
  }
308
- return out
325
+ return finalizeBrokenRefWrapperRangeSignals(signals, wrapperState)
326
+ }
327
+
328
+ const hasRangeCloseOnlyWrapperSignals = (signals) => {
329
+ if (!signals) return false
330
+ return (signals.strongCloseInRange > 0 && signals.strongOpenInRange === 0) ||
331
+ (signals.emCloseInRange > 0 && signals.emOpenInRange === 0)
309
332
  }
310
333
 
311
334
  const hasPreexistingWrapperCloseOnlyInRange = (tokens, startIdx, endIdx, prefixStats = null, wrapperSignals = null) => {
312
335
  if (!tokens || startIdx <= 0 || endIdx < startIdx) return false
313
336
  const signals = wrapperSignals || buildBrokenRefWrapperRangeSignals(tokens, startIdx, endIdx, 0)
337
+ if (!hasRangeCloseOnlyWrapperSignals(signals)) return false
338
+ const needsStrongCloseOnly = signals.strongCloseInRange > 0 && signals.strongOpenInRange === 0
339
+ const needsEmCloseOnly = signals.emCloseInRange > 0 && signals.emOpenInRange === 0
314
340
 
315
341
  let preStrongDepth = 0
316
342
  let preEmDepth = 0
@@ -329,23 +355,20 @@ const hasPreexistingWrapperCloseOnlyInRange = (tokens, startIdx, endIdx, prefixS
329
355
  (endIdx + 1) < prefixStats.strongClose.length &&
330
356
  (endIdx + 1) < prefixStats.emOpen.length &&
331
357
  (endIdx + 1) < prefixStats.emClose.length) {
332
- preStrongDepth = prefixStats.strongDepth[startIdx] || 0
333
- preEmDepth = prefixStats.emDepth[startIdx] || 0
334
- if (preStrongDepth > 0) {
335
- const strongOpensInRange = signals.strongOpenInRange
336
- const strongClosesInRange = signals.strongCloseInRange
337
- if (strongClosesInRange > 0 && strongOpensInRange === 0) return true
358
+ if (needsStrongCloseOnly) {
359
+ preStrongDepth = prefixStats.strongDepth[startIdx] || 0
360
+ if (preStrongDepth > 0) return true
338
361
  }
339
- if (preEmDepth > 0) {
340
- const emOpensInRange = signals.emOpenInRange
341
- const emClosesInRange = signals.emCloseInRange
342
- if (emClosesInRange > 0 && emOpensInRange === 0) return true
362
+ if (needsEmCloseOnly) {
363
+ preEmDepth = prefixStats.emDepth[startIdx] || 0
364
+ if (preEmDepth > 0) return true
343
365
  }
344
366
  return false
345
- } else {
346
- for (let i = 0; i < startIdx && i < tokens.length; i++) {
347
- const token = tokens[i]
348
- if (!token || !token.type || !isAsteriskEmphasisToken(token)) continue
367
+ }
368
+ for (let i = 0; i < startIdx && i < tokens.length; i++) {
369
+ const token = tokens[i]
370
+ if (!token || !token.type || !isAsteriskEmphasisToken(token)) continue
371
+ if (needsStrongCloseOnly) {
349
372
  if (token.type === 'strong_open') {
350
373
  preStrongDepth++
351
374
  continue
@@ -354,46 +377,95 @@ const hasPreexistingWrapperCloseOnlyInRange = (tokens, startIdx, endIdx, prefixS
354
377
  if (preStrongDepth > 0) preStrongDepth--
355
378
  continue
356
379
  }
380
+ }
381
+ if (needsEmCloseOnly) {
357
382
  if (token.type === 'em_open') {
358
383
  preEmDepth++
359
384
  continue
360
385
  }
361
- if (token.type === 'em_close') {
362
- if (preEmDepth > 0) preEmDepth--
386
+ if (token.type === 'em_close' && preEmDepth > 0) {
387
+ preEmDepth--
363
388
  }
364
389
  }
365
390
  }
366
- if (preStrongDepth > 0 && signals.strongCloseInRange > 0 && signals.strongOpenInRange === 0) return true
367
- if (preEmDepth > 0 && signals.emCloseInRange > 0 && signals.emOpenInRange === 0) return true
391
+ if (needsStrongCloseOnly && preStrongDepth > 0) return true
392
+ if (needsEmCloseOnly && preEmDepth > 0) return true
368
393
  return false
369
394
  }
370
395
 
396
+ const hasBrokenRefLowConfidenceTextNoise = (signals) => {
397
+ return signals.hasLongStarNoise || signals.hasUnderscoreText
398
+ }
399
+
400
+ const hasBrokenRefLowConfidenceInlineSyntax = (signals) => {
401
+ return signals.hasCodeInline || signals.hasUnderscoreEmphasisToken
402
+ }
403
+
404
+ const hasBrokenRefLowConfidenceNoise = (signals) => {
405
+ return hasBrokenRefLowConfidenceTextNoise(signals) || hasBrokenRefLowConfidenceInlineSyntax(signals)
406
+ }
407
+
408
+ const hasBrokenRefCloseOnlyWrapperRisk = (
409
+ tokens,
410
+ startIdx,
411
+ endIdx,
412
+ wrapperPrefixStats = null,
413
+ wrapperSignals = null
414
+ ) => {
415
+ const signals = wrapperSignals || buildBrokenRefWrapperRangeSignals(tokens, startIdx, endIdx, 0)
416
+ return hasPreexistingWrapperCloseOnlyInRange(tokens, startIdx, endIdx, wrapperPrefixStats, signals)
417
+ }
418
+
419
+ const hasBrokenRefLowConfidenceWrapperRisk = (
420
+ tokens,
421
+ startIdx,
422
+ endIdx,
423
+ wrapperPrefixStats = null,
424
+ wrapperSignals = null
425
+ ) => {
426
+ const signals = wrapperSignals || buildBrokenRefWrapperRangeSignals(tokens, startIdx, endIdx, 0)
427
+ if (signals.hasLeadingUnmatchedClose) return true
428
+ return hasBrokenRefCloseOnlyWrapperRisk(tokens, startIdx, endIdx, wrapperPrefixStats, signals)
429
+ }
430
+
371
431
  const isLowConfidenceBrokenRefRange = (tokens, startIdx, endIdx, firstTextOffset = 0, wrapperPrefixStats = null, wrapperSignals = null) => {
372
432
  const signals = wrapperSignals || buildBrokenRefWrapperRangeSignals(tokens, startIdx, endIdx, firstTextOffset)
373
- if (signals.hasLongStarNoise) return true
374
- if (signals.hasUnderscoreText || signals.hasCodeInline || signals.hasUnderscoreEmphasisToken) return true
375
- if (signals.hasLeadingUnmatchedClose) return true
376
- if (hasPreexistingWrapperCloseOnlyInRange(tokens, startIdx, endIdx, wrapperPrefixStats, signals)) return true
377
- return false
433
+ if (hasBrokenRefLowConfidenceNoise(signals)) return true
434
+ return hasBrokenRefLowConfidenceWrapperRisk(tokens, startIdx, endIdx, wrapperPrefixStats, signals)
435
+ }
436
+
437
+ const hasBrokenRefStrongRunEvidence = (tokens, startIdx, endIdx, firstTextOffset = 0) => {
438
+ return countStrongMarkerRunsInTextRange(tokens, startIdx, endIdx, firstTextOffset, 2) >= 2
439
+ }
440
+
441
+ const hasBrokenRefExplicitAsteriskSignal = (wrapperSignals) => {
442
+ return wrapperSignals.hasAsteriskEmphasisToken
443
+ }
444
+
445
+ const hasBrokenRefImmediateRewriteSignal = (wrapperSignals) => {
446
+ return wrapperSignals.hasImbalance && hasBrokenRefExplicitAsteriskSignal(wrapperSignals)
447
+ }
448
+
449
+ const shouldRejectBalancedBrokenRefRewrite = (wrapperSignals) => {
450
+ return !wrapperSignals.hasImbalance && hasBrokenRefExplicitAsteriskSignal(wrapperSignals)
451
+ }
452
+
453
+ const shouldAttemptBrokenRefRewriteFromSignals = (tokens, startIdx, endIdx, firstTextOffset, wrapperSignals) => {
454
+ if (hasBrokenRefImmediateRewriteSignal(wrapperSignals)) return true
455
+ if (shouldRejectBalancedBrokenRefRewrite(wrapperSignals)) return false
456
+ return hasBrokenRefStrongRunEvidence(tokens, startIdx, endIdx, firstTextOffset)
378
457
  }
379
458
 
380
459
  const shouldAttemptBrokenRefRewrite = (tokens, startIdx, endIdx, firstTextOffset = 0, wrapperPrefixStats = null) => {
381
460
  const wrapperSignals = buildBrokenRefWrapperRangeSignals(tokens, startIdx, endIdx, firstTextOffset)
382
461
  if (isLowConfidenceBrokenRefRange(tokens, startIdx, endIdx, firstTextOffset, wrapperPrefixStats, wrapperSignals)) return false
383
- if (wrapperSignals.hasImbalance) {
384
- if (wrapperSignals.hasAsteriskEmphasisToken) return true
385
- return countStrongMarkerRunsInTextRange(tokens, startIdx, endIdx, firstTextOffset, 2) >= 2
386
- }
387
- if (wrapperSignals.hasAsteriskEmphasisToken) return false
388
- return countStrongMarkerRunsInTextRange(tokens, startIdx, endIdx, firstTextOffset, 2) >= 2
462
+ return shouldAttemptBrokenRefRewriteFromSignals(tokens, startIdx, endIdx, firstTextOffset, wrapperSignals)
389
463
  }
390
464
 
391
- const scanInlinePostprocessSignals = (children, hasBracketTextInContent = false) => {
392
- let hasBracketText = hasBracketTextInContent
465
+ const scanInlinePostprocessSignals = (children) => {
393
466
  let hasEmphasis = false
394
467
  let hasLinkOpen = false
395
468
  let hasLinkClose = false
396
- let hasCodeInline = false
397
469
  for (let j = 0; j < children.length; j++) {
398
470
  const child = children[j]
399
471
  if (!child) continue
@@ -406,21 +478,12 @@ const scanInlinePostprocessSignals = (children, hasBracketTextInContent = false)
406
478
  if (!hasLinkClose && child.type === 'link_close') {
407
479
  hasLinkClose = true
408
480
  }
409
- if (!hasCodeInline && child.type === 'code_inline') {
410
- hasCodeInline = true
411
- }
412
- if (hasBracketText || child.type !== 'text' || !child.content) continue
413
- if (child.content.indexOf('[') !== -1 || child.content.indexOf(']') !== -1) {
414
- hasBracketText = true
415
- }
416
- if (hasEmphasis && hasBracketText && hasLinkOpen && hasLinkClose) break
481
+ if (hasEmphasis && hasLinkOpen && hasLinkClose) break
417
482
  }
418
483
  return {
419
- hasBracketText,
420
484
  hasEmphasis,
421
485
  hasLinkOpen,
422
- hasLinkClose,
423
- hasCodeInline
486
+ hasLinkClose
424
487
  }
425
488
  }
426
489