@peaceroad/markdown-it-strong-ja 0.7.2 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,499 @@
1
+ import { isJapaneseChar } from '../token-utils.js'
2
+
3
+ const hasMarkerChars = (text) => {
4
+ return !!text && text.indexOf('*') !== -1
5
+ }
6
+
7
+ const contentHasMarkerCharsFrom = (content, from) => {
8
+ if (!content) return false
9
+ const start = from > 0 ? from : 0
10
+ if (start === 0) return hasMarkerChars(content)
11
+ if (start >= content.length) return false
12
+ return content.indexOf('*', start) !== -1
13
+ }
14
+
15
+ const isAsteriskEmphasisToken = (token) => {
16
+ if (!token || !token.type) return false
17
+ if (token.type !== 'strong_open' &&
18
+ token.type !== 'strong_close' &&
19
+ token.type !== 'em_open' &&
20
+ token.type !== 'em_close') {
21
+ return false
22
+ }
23
+ if (typeof token.markup === 'string' && token.markup.indexOf('_') !== -1) return false
24
+ return true
25
+ }
26
+
27
+ const textTokenHasMarkerChars = (token) => {
28
+ if (!token || token.type !== 'text' || !token.content) return false
29
+ const content = token.content
30
+ if (token.__strongJaMarkerSource === content &&
31
+ typeof token.__strongJaHasMarkerChars === 'boolean') {
32
+ return token.__strongJaHasMarkerChars
33
+ }
34
+ const hasMarker = hasMarkerChars(content)
35
+ token.__strongJaMarkerSource = content
36
+ token.__strongJaHasMarkerChars = hasMarker
37
+ return hasMarker
38
+ }
39
+
40
+ const tokenHasJapaneseChars = (token) => {
41
+ if (!token || (token.type !== 'text' && token.type !== 'code_inline') || !token.content) {
42
+ return false
43
+ }
44
+ const content = token.content
45
+ if (token.__strongJaJapaneseSource === content &&
46
+ typeof token.__strongJaHasJapaneseChar === 'boolean') {
47
+ return token.__strongJaHasJapaneseChar
48
+ }
49
+ let hasJapanese = false
50
+ for (let i = 0; i < content.length; i++) {
51
+ if (isJapaneseChar(content.charCodeAt(i))) {
52
+ hasJapanese = true
53
+ break
54
+ }
55
+ }
56
+ token.__strongJaJapaneseSource = content
57
+ token.__strongJaHasJapaneseChar = hasJapanese
58
+ return hasJapanese
59
+ }
60
+
61
+ const hasJapaneseContextInRange = (tokens, startIdx, endIdx) => {
62
+ if (!tokens || startIdx < 0 || endIdx < startIdx) return false
63
+ for (let i = startIdx; i <= endIdx && i < tokens.length; i++) {
64
+ const token = tokens[i]
65
+ if (tokenHasJapaneseChars(token)) return true
66
+ }
67
+ return false
68
+ }
69
+
70
+ const hasEmphasisSignalInRange = (tokens, startIdx, endIdx) => {
71
+ if (!tokens || startIdx < 0 || endIdx < startIdx) return false
72
+ for (let i = startIdx; i <= endIdx && i < tokens.length; i++) {
73
+ const token = tokens[i]
74
+ if (!token) continue
75
+ if (isAsteriskEmphasisToken(token)) return true
76
+ if (textTokenHasMarkerChars(token)) return true
77
+ }
78
+ return false
79
+ }
80
+
81
+ const hasTextMarkerCharsInRange = (tokens, startIdx, endIdx, firstTextOffset = 0) => {
82
+ if (!tokens || startIdx < 0 || endIdx < startIdx) return false
83
+ for (let i = startIdx; i <= endIdx && i < tokens.length; i++) {
84
+ const token = tokens[i]
85
+ if (!token || token.type !== 'text' || !token.content) continue
86
+ if (i === startIdx && firstTextOffset > 0) {
87
+ if (contentHasMarkerCharsFrom(token.content, firstTextOffset)) return true
88
+ continue
89
+ }
90
+ if (textTokenHasMarkerChars(token)) return true
91
+ }
92
+ return false
93
+ }
94
+
95
+ const isStrongRunSoftSpace = (code) => {
96
+ return code === 0x20 || code === 0x09 || code === 0x0A || code === 0x3000
97
+ }
98
+
99
+ const isStrongRunAsciiWord = (code) => {
100
+ return (code >= 0x30 && code <= 0x39) ||
101
+ (code >= 0x41 && code <= 0x5A) ||
102
+ (code >= 0x61 && code <= 0x7A)
103
+ }
104
+
105
+ const isStrongRunTextLike = (code) => {
106
+ if (!code) return false
107
+ return isStrongRunAsciiWord(code) || isJapaneseChar(code)
108
+ }
109
+
110
+ const countDelimiterLikeStrongRuns = (content, marker, from = 0, limit = 0) => {
111
+ let at = from > 0 ? from : 0
112
+ const len = content.length
113
+ const markerCode = marker.charCodeAt(0)
114
+ let count = 0
115
+ while (at < len) {
116
+ const pos = content.indexOf(marker, at)
117
+ if (pos === -1) break
118
+ const prevCode = pos > 0 ? content.charCodeAt(pos - 1) : 0
119
+ const nextPos = pos + marker.length
120
+ const nextCode = nextPos < len ? content.charCodeAt(nextPos) : 0
121
+ const prevSameMarker = prevCode === markerCode
122
+ const nextSameMarker = nextCode === markerCode
123
+ if (prevSameMarker || nextSameMarker) {
124
+ at = pos + marker.length
125
+ continue
126
+ }
127
+ const prevSoft = prevCode !== 0 && isStrongRunSoftSpace(prevCode)
128
+ const nextSoft = nextCode !== 0 && isStrongRunSoftSpace(nextCode)
129
+ const hasPrevOrNext = prevCode !== 0 || nextCode !== 0
130
+ const prevTextLike = isStrongRunTextLike(prevCode)
131
+ const nextTextLike = isStrongRunTextLike(nextCode)
132
+ const hasTextNeighbor = prevTextLike || nextTextLike
133
+ if (!hasTextNeighbor) {
134
+ at = pos + marker.length
135
+ continue
136
+ }
137
+ const atBoundary = prevCode === 0 || nextCode === 0
138
+ if (!atBoundary && (!prevTextLike || !nextTextLike)) {
139
+ at = pos + marker.length
140
+ continue
141
+ }
142
+ if (hasPrevOrNext && !prevSoft && !nextSoft) {
143
+ count++
144
+ if (limit > 0 && count >= limit) return count
145
+ }
146
+ at = pos + marker.length
147
+ }
148
+ return count
149
+ }
150
+
151
+ const countStrongMarkerRunsInTextRange = (tokens, startIdx, endIdx, firstTextOffset = 0, limit = 0) => {
152
+ if (!tokens || startIdx < 0 || endIdx < startIdx) return 0
153
+ let total = 0
154
+ for (let i = startIdx; i <= endIdx && i < tokens.length; i++) {
155
+ const token = tokens[i]
156
+ if (!token || token.type !== 'text' || !token.content) continue
157
+ const content = token.content
158
+ const scanFrom = i === startIdx && firstTextOffset > 0 ? firstTextOffset : 0
159
+ if (scanFrom >= content.length) continue
160
+ const remain = limit > 0 ? (limit - total) : 0
161
+ total += countDelimiterLikeStrongRuns(content, '**', scanFrom, remain)
162
+ if (limit > 0 && total >= limit) {
163
+ return total
164
+ }
165
+ }
166
+ return total
167
+ }
168
+
169
+ const buildAsteriskWrapperPrefixStats = (tokens) => {
170
+ const len = Array.isArray(tokens) ? tokens.length : 0
171
+ const strongDepthPrefix = new Array(len + 1)
172
+ const emDepthPrefix = new Array(len + 1)
173
+ const strongOpenPrefix = new Array(len + 1)
174
+ const strongClosePrefix = new Array(len + 1)
175
+ const emOpenPrefix = new Array(len + 1)
176
+ const emClosePrefix = new Array(len + 1)
177
+ let strongDepth = 0
178
+ let emDepthCount = 0
179
+ let strongOpenCount = 0
180
+ let strongCloseCount = 0
181
+ let emOpenCount = 0
182
+ let emCloseCount = 0
183
+ strongDepthPrefix[0] = 0
184
+ emDepthPrefix[0] = 0
185
+ strongOpenPrefix[0] = 0
186
+ strongClosePrefix[0] = 0
187
+ emOpenPrefix[0] = 0
188
+ emClosePrefix[0] = 0
189
+ for (let i = 0; i < len; i++) {
190
+ const token = tokens[i]
191
+ if (token && token.type && isAsteriskEmphasisToken(token)) {
192
+ if (token.type === 'strong_open') {
193
+ strongDepth++
194
+ strongOpenCount++
195
+ } else if (token.type === 'strong_close') {
196
+ if (strongDepth > 0) strongDepth--
197
+ strongCloseCount++
198
+ } else if (token.type === 'em_open') {
199
+ emDepthCount++
200
+ emOpenCount++
201
+ } else if (token.type === 'em_close') {
202
+ if (emDepthCount > 0) emDepthCount--
203
+ emCloseCount++
204
+ }
205
+ }
206
+ strongDepthPrefix[i + 1] = strongDepth
207
+ emDepthPrefix[i + 1] = emDepthCount
208
+ strongOpenPrefix[i + 1] = strongOpenCount
209
+ strongClosePrefix[i + 1] = strongCloseCount
210
+ emOpenPrefix[i + 1] = emOpenCount
211
+ emClosePrefix[i + 1] = emCloseCount
212
+ }
213
+ return {
214
+ strongDepth: strongDepthPrefix,
215
+ emDepth: emDepthPrefix,
216
+ strongOpen: strongOpenPrefix,
217
+ strongClose: strongClosePrefix,
218
+ emOpen: emOpenPrefix,
219
+ emClose: emClosePrefix
220
+ }
221
+ }
222
+
223
+ const createBrokenRefWrapperRangeSignals = () => {
224
+ return {
225
+ hasLeadingUnmatchedClose: false,
226
+ hasImbalance: false,
227
+ hasAsteriskEmphasisToken: false,
228
+ hasLongStarNoise: false,
229
+ hasUnderscoreText: false,
230
+ hasCodeInline: false,
231
+ hasUnderscoreEmphasisToken: false,
232
+ strongOpenInRange: 0,
233
+ strongCloseInRange: 0,
234
+ emOpenInRange: 0,
235
+ emCloseInRange: 0
236
+ }
237
+ }
238
+
239
+ const updateBrokenRefTextRangeSignals = (signals, token, tokenIdx, startIdx, firstTextOffset) => {
240
+ if (!token || token.type !== 'text' || !token.content) return
241
+ const content = token.content
242
+ // Keep this at 0 (instead of firstTextOffset) so historical fail-safe
243
+ // behavior around noisy leading chains in the first text token stays unchanged.
244
+ if (!signals.hasLongStarNoise && content.indexOf('***') !== -1) {
245
+ signals.hasLongStarNoise = true
246
+ }
247
+ if (!signals.hasUnderscoreText) {
248
+ const scanFrom = tokenIdx === startIdx && firstTextOffset > 0 ? firstTextOffset : 0
249
+ if (scanFrom < content.length && content.indexOf('_', scanFrom) !== -1) {
250
+ signals.hasUnderscoreText = true
251
+ }
252
+ }
253
+ }
254
+
255
+ const updateBrokenRefWrapperTokenSignals = (signals, token, isAsteriskEmphasis) => {
256
+ if (!signals.hasCodeInline && token.type === 'code_inline') {
257
+ signals.hasCodeInline = true
258
+ }
259
+ if (isAsteriskEmphasis) {
260
+ signals.hasAsteriskEmphasisToken = true
261
+ }
262
+ if (!signals.hasUnderscoreEmphasisToken &&
263
+ (token.type === 'strong_open' ||
264
+ token.type === 'strong_close' ||
265
+ token.type === 'em_open' ||
266
+ token.type === 'em_close') &&
267
+ (token.markup === '_' || token.markup === '__')) {
268
+ signals.hasUnderscoreEmphasisToken = true
269
+ }
270
+ }
271
+
272
+ const updateBrokenRefWrapperRangeDepthSignals = (signals, token, wrapperState, isAsteriskEmphasis) => {
273
+ if (!isAsteriskEmphasis) return
274
+ let depthKey = ''
275
+ if (token.type === 'strong_open' || token.type === 'strong_close') {
276
+ depthKey = 'strongDepth'
277
+ } else if (token.type === 'em_open' || token.type === 'em_close') {
278
+ depthKey = 'emDepth'
279
+ } else {
280
+ return
281
+ }
282
+ const isOpen = token.type.endsWith('_open')
283
+ if (!wrapperState.sawWrapper) {
284
+ wrapperState.sawWrapper = true
285
+ if (!isOpen) signals.hasLeadingUnmatchedClose = true
286
+ }
287
+ if (isOpen) {
288
+ wrapperState.sawOpen = true
289
+ signals.hasLeadingUnmatchedClose = false
290
+ wrapperState[depthKey]++
291
+ } else if (wrapperState[depthKey] <= 0) {
292
+ signals.hasImbalance = true
293
+ } else {
294
+ wrapperState[depthKey]--
295
+ }
296
+ if (token.type === 'strong_open') signals.strongOpenInRange++
297
+ else if (token.type === 'strong_close') signals.strongCloseInRange++
298
+ else if (token.type === 'em_open') signals.emOpenInRange++
299
+ else if (token.type === 'em_close') signals.emCloseInRange++
300
+ }
301
+
302
+ const finalizeBrokenRefWrapperRangeSignals = (signals, wrapperState) => {
303
+ if (!wrapperState.sawWrapper || wrapperState.sawOpen) {
304
+ signals.hasLeadingUnmatchedClose = false
305
+ }
306
+ if (!signals.hasImbalance &&
307
+ (wrapperState.strongDepth !== 0 || wrapperState.emDepth !== 0)) {
308
+ signals.hasImbalance = true
309
+ }
310
+ return signals
311
+ }
312
+
313
+ const buildBrokenRefWrapperRangeSignals = (tokens, startIdx, endIdx, firstTextOffset = 0) => {
314
+ const signals = createBrokenRefWrapperRangeSignals()
315
+ if (!tokens || startIdx < 0 || endIdx < startIdx) return signals
316
+ const wrapperState = { sawWrapper: false, sawOpen: false, strongDepth: 0, emDepth: 0 }
317
+ for (let i = startIdx; i <= endIdx && i < tokens.length; i++) {
318
+ const token = tokens[i]
319
+ if (!token || !token.type) continue
320
+ const isAsteriskEmphasis = isAsteriskEmphasisToken(token)
321
+ updateBrokenRefWrapperTokenSignals(signals, token, isAsteriskEmphasis)
322
+ updateBrokenRefTextRangeSignals(signals, token, i, startIdx, firstTextOffset)
323
+ updateBrokenRefWrapperRangeDepthSignals(signals, token, wrapperState, isAsteriskEmphasis)
324
+ }
325
+ return finalizeBrokenRefWrapperRangeSignals(signals, wrapperState)
326
+ }
327
+
328
+ const hasRangeCloseOnlyWrapperSignals = (signals) => {
329
+ if (!signals) return false
330
+ return (signals.strongCloseInRange > 0 && signals.strongOpenInRange === 0) ||
331
+ (signals.emCloseInRange > 0 && signals.emOpenInRange === 0)
332
+ }
333
+
334
+ const hasPreexistingWrapperCloseOnlyInRange = (tokens, startIdx, endIdx, prefixStats = null, wrapperSignals = null) => {
335
+ if (!tokens || startIdx <= 0 || endIdx < startIdx) return false
336
+ const signals = wrapperSignals || buildBrokenRefWrapperRangeSignals(tokens, startIdx, endIdx, 0)
337
+ if (!hasRangeCloseOnlyWrapperSignals(signals)) return false
338
+ const needsStrongCloseOnly = signals.strongCloseInRange > 0 && signals.strongOpenInRange === 0
339
+ const needsEmCloseOnly = signals.emCloseInRange > 0 && signals.emOpenInRange === 0
340
+
341
+ let preStrongDepth = 0
342
+ let preEmDepth = 0
343
+ const hasPrefix =
344
+ !!prefixStats &&
345
+ Array.isArray(prefixStats.strongDepth) &&
346
+ Array.isArray(prefixStats.emDepth) &&
347
+ Array.isArray(prefixStats.strongOpen) &&
348
+ Array.isArray(prefixStats.strongClose) &&
349
+ Array.isArray(prefixStats.emOpen) &&
350
+ Array.isArray(prefixStats.emClose)
351
+ if (hasPrefix &&
352
+ startIdx < prefixStats.strongDepth.length &&
353
+ startIdx < prefixStats.emDepth.length &&
354
+ (endIdx + 1) < prefixStats.strongOpen.length &&
355
+ (endIdx + 1) < prefixStats.strongClose.length &&
356
+ (endIdx + 1) < prefixStats.emOpen.length &&
357
+ (endIdx + 1) < prefixStats.emClose.length) {
358
+ if (needsStrongCloseOnly) {
359
+ preStrongDepth = prefixStats.strongDepth[startIdx] || 0
360
+ if (preStrongDepth > 0) return true
361
+ }
362
+ if (needsEmCloseOnly) {
363
+ preEmDepth = prefixStats.emDepth[startIdx] || 0
364
+ if (preEmDepth > 0) return true
365
+ }
366
+ return false
367
+ }
368
+ for (let i = 0; i < startIdx && i < tokens.length; i++) {
369
+ const token = tokens[i]
370
+ if (!token || !token.type || !isAsteriskEmphasisToken(token)) continue
371
+ if (needsStrongCloseOnly) {
372
+ if (token.type === 'strong_open') {
373
+ preStrongDepth++
374
+ continue
375
+ }
376
+ if (token.type === 'strong_close') {
377
+ if (preStrongDepth > 0) preStrongDepth--
378
+ continue
379
+ }
380
+ }
381
+ if (needsEmCloseOnly) {
382
+ if (token.type === 'em_open') {
383
+ preEmDepth++
384
+ continue
385
+ }
386
+ if (token.type === 'em_close' && preEmDepth > 0) {
387
+ preEmDepth--
388
+ }
389
+ }
390
+ }
391
+ if (needsStrongCloseOnly && preStrongDepth > 0) return true
392
+ if (needsEmCloseOnly && preEmDepth > 0) return true
393
+ return false
394
+ }
395
+
396
+ const hasBrokenRefLowConfidenceTextNoise = (signals) => {
397
+ return signals.hasLongStarNoise || signals.hasUnderscoreText
398
+ }
399
+
400
+ const hasBrokenRefLowConfidenceInlineSyntax = (signals) => {
401
+ return signals.hasCodeInline || signals.hasUnderscoreEmphasisToken
402
+ }
403
+
404
+ const hasBrokenRefLowConfidenceNoise = (signals) => {
405
+ return hasBrokenRefLowConfidenceTextNoise(signals) || hasBrokenRefLowConfidenceInlineSyntax(signals)
406
+ }
407
+
408
+ const hasBrokenRefCloseOnlyWrapperRisk = (
409
+ tokens,
410
+ startIdx,
411
+ endIdx,
412
+ wrapperPrefixStats = null,
413
+ wrapperSignals = null
414
+ ) => {
415
+ const signals = wrapperSignals || buildBrokenRefWrapperRangeSignals(tokens, startIdx, endIdx, 0)
416
+ return hasPreexistingWrapperCloseOnlyInRange(tokens, startIdx, endIdx, wrapperPrefixStats, signals)
417
+ }
418
+
419
+ const hasBrokenRefLowConfidenceWrapperRisk = (
420
+ tokens,
421
+ startIdx,
422
+ endIdx,
423
+ wrapperPrefixStats = null,
424
+ wrapperSignals = null
425
+ ) => {
426
+ const signals = wrapperSignals || buildBrokenRefWrapperRangeSignals(tokens, startIdx, endIdx, 0)
427
+ if (signals.hasLeadingUnmatchedClose) return true
428
+ return hasBrokenRefCloseOnlyWrapperRisk(tokens, startIdx, endIdx, wrapperPrefixStats, signals)
429
+ }
430
+
431
+ const isLowConfidenceBrokenRefRange = (tokens, startIdx, endIdx, firstTextOffset = 0, wrapperPrefixStats = null, wrapperSignals = null) => {
432
+ const signals = wrapperSignals || buildBrokenRefWrapperRangeSignals(tokens, startIdx, endIdx, firstTextOffset)
433
+ if (hasBrokenRefLowConfidenceNoise(signals)) return true
434
+ return hasBrokenRefLowConfidenceWrapperRisk(tokens, startIdx, endIdx, wrapperPrefixStats, signals)
435
+ }
436
+
437
+ const hasBrokenRefStrongRunEvidence = (tokens, startIdx, endIdx, firstTextOffset = 0) => {
438
+ return countStrongMarkerRunsInTextRange(tokens, startIdx, endIdx, firstTextOffset, 2) >= 2
439
+ }
440
+
441
+ const hasBrokenRefExplicitAsteriskSignal = (wrapperSignals) => {
442
+ return wrapperSignals.hasAsteriskEmphasisToken
443
+ }
444
+
445
+ const hasBrokenRefImmediateRewriteSignal = (wrapperSignals) => {
446
+ return wrapperSignals.hasImbalance && hasBrokenRefExplicitAsteriskSignal(wrapperSignals)
447
+ }
448
+
449
+ const shouldRejectBalancedBrokenRefRewrite = (wrapperSignals) => {
450
+ return !wrapperSignals.hasImbalance && hasBrokenRefExplicitAsteriskSignal(wrapperSignals)
451
+ }
452
+
453
+ const shouldAttemptBrokenRefRewriteFromSignals = (tokens, startIdx, endIdx, firstTextOffset, wrapperSignals) => {
454
+ if (hasBrokenRefImmediateRewriteSignal(wrapperSignals)) return true
455
+ if (shouldRejectBalancedBrokenRefRewrite(wrapperSignals)) return false
456
+ return hasBrokenRefStrongRunEvidence(tokens, startIdx, endIdx, firstTextOffset)
457
+ }
458
+
459
+ const shouldAttemptBrokenRefRewrite = (tokens, startIdx, endIdx, firstTextOffset = 0, wrapperPrefixStats = null) => {
460
+ const wrapperSignals = buildBrokenRefWrapperRangeSignals(tokens, startIdx, endIdx, firstTextOffset)
461
+ if (isLowConfidenceBrokenRefRange(tokens, startIdx, endIdx, firstTextOffset, wrapperPrefixStats, wrapperSignals)) return false
462
+ return shouldAttemptBrokenRefRewriteFromSignals(tokens, startIdx, endIdx, firstTextOffset, wrapperSignals)
463
+ }
464
+
465
+ const scanInlinePostprocessSignals = (children) => {
466
+ let hasEmphasis = false
467
+ let hasLinkOpen = false
468
+ let hasLinkClose = false
469
+ for (let j = 0; j < children.length; j++) {
470
+ const child = children[j]
471
+ if (!child) continue
472
+ if (!hasEmphasis && isAsteriskEmphasisToken(child)) {
473
+ hasEmphasis = true
474
+ }
475
+ if (!hasLinkOpen && child.type === 'link_open') {
476
+ hasLinkOpen = true
477
+ }
478
+ if (!hasLinkClose && child.type === 'link_close') {
479
+ hasLinkClose = true
480
+ }
481
+ if (hasEmphasis && hasLinkOpen && hasLinkClose) break
482
+ }
483
+ return {
484
+ hasEmphasis,
485
+ hasLinkOpen,
486
+ hasLinkClose
487
+ }
488
+ }
489
+
490
+ export {
491
+ hasMarkerChars,
492
+ isAsteriskEmphasisToken,
493
+ hasJapaneseContextInRange,
494
+ hasEmphasisSignalInRange,
495
+ hasTextMarkerCharsInRange,
496
+ buildAsteriskWrapperPrefixStats,
497
+ shouldAttemptBrokenRefRewrite,
498
+ scanInlinePostprocessSignals
499
+ }