@peaceroad/markdown-it-strong-ja 0.4.3 → 0.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/index.js +207 -123
  2. package/package.json +1 -1
package/index.js CHANGED
@@ -1,6 +1,5 @@
1
- // Character code constants
2
1
  const CHAR_ASTERISK = 0x2A // *
3
- const CHAR_UNDERSCORE = 0x5F // _
2
+ //const CHAR_UNDERSCORE = 0x5F // _
4
3
  const CHAR_BACKSLASH = 0x5C // \
5
4
  const CHAR_BACKTICK = 0x60 // `
6
5
  const CHAR_DOLLAR = 0x24 // $
@@ -20,9 +19,14 @@ const hasBackslash = (state, start) => {
20
19
  let slashNum = 0
21
20
  let i = start - 1
22
21
  const src = state.src
23
- while(i >= 0) {
24
- if (src.charCodeAt(i) === CHAR_BACKSLASH) { slashNum++; i--; continue }
25
- break
22
+ // Early exit if no backslash at all
23
+ if (i < 0 || src.charCodeAt(i) !== CHAR_BACKSLASH) {
24
+ return false
25
+ }
26
+ // Count consecutive backslashes efficiently
27
+ while (i >= 0 && src.charCodeAt(i) === CHAR_BACKSLASH) {
28
+ slashNum++
29
+ i--
26
30
  }
27
31
  return slashNum % 2 === 1
28
32
  }
@@ -159,6 +163,26 @@ const hasNextSymbol = (state, n, max, symbol, noMark) => {
159
163
  return [nextSymbolPos, noMark]
160
164
  }
161
165
 
166
+ const processSymbolPair = (state, n, srcLen, symbol, noMark, textStart, pushInlines) => {
167
+ const [nextSymbolPos, newNoMark] = hasNextSymbol(state, n, srcLen, symbol, noMark)
168
+ if (nextSymbolPos !== -1) {
169
+ if (nextSymbolPos === srcLen - 1) {
170
+ pushInlines(textStart, nextSymbolPos, nextSymbolPos - textStart + 1, 'text')
171
+ return { shouldBreak: true, newN: nextSymbolPos + 1, newNoMark }
172
+ }
173
+ return { shouldBreak: false, shouldContinue: true, newN: nextSymbolPos + 1, newNoMark }
174
+ }
175
+ return { shouldBreak: false, shouldContinue: false, newN: n, newNoMark }
176
+ }
177
+
178
+ const processTextSegment = (inlines, textStart, n, noMark) => {
179
+ if (n !== 0 && noMark.length !== 0) {
180
+ pushInlines(inlines, textStart, n - 1, n - textStart, 'text')
181
+ return ''
182
+ }
183
+ return noMark
184
+ }
185
+
162
186
  const createInlines = (state, start, max, opt) => {
163
187
  const src = state.src
164
188
  const srcLen = max
@@ -168,83 +192,111 @@ const createInlines = (state, start, max, opt) => {
168
192
  let noMark = ''
169
193
  let textStart = n
170
194
 
195
+ // Infinite loop prevention
196
+ const maxIterations = srcLen * 2 // Safe upper bound
197
+ let iterations = 0
198
+
171
199
  while (n < srcLen) {
200
+ // Prevent infinite loops
201
+ iterations++
202
+ if (iterations > maxIterations) {
203
+ // Add remaining text as-is and exit safely
204
+ if (textStart < srcLen) {
205
+ pushInlines(inlines, textStart, srcLen - 1, srcLen - textStart, 'text')
206
+ }
207
+ break
208
+ }
209
+
172
210
  const currentChar = src.charCodeAt(n)
173
- let nextSymbolPos = -1
211
+
212
+ // Unified escape check
213
+ let isEscaped = false
214
+ if (currentChar === CHAR_ASTERISK || currentChar === CHAR_BACKTICK ||
215
+ (opt.dollarMath && currentChar === CHAR_DOLLAR) ||
216
+ (htmlEnabled && currentChar === CHAR_LT)) {
217
+ isEscaped = hasBackslash(state, n)
218
+ }
174
219
 
175
- // Inline code (backticks)
176
- if (currentChar === CHAR_BACKTICK && !hasBackslash(state, n)) {
177
- [nextSymbolPos, noMark] = hasNextSymbol(state, n, srcLen, CHAR_BACKTICK, noMark)
178
- if (nextSymbolPos !== -1) {
179
- if (nextSymbolPos === srcLen - 1) {
180
- pushInlines(inlines, textStart, nextSymbolPos, nextSymbolPos - textStart + 1, 'text')
220
+ // Asterisk handling
221
+ if (currentChar === CHAR_ASTERISK) {
222
+ if (!isEscaped) {
223
+ noMark = processTextSegment(inlines, textStart, n, noMark)
224
+ if (n === srcLen - 1) {
225
+ pushInlines(inlines, n, n, 1, '')
181
226
  break
182
227
  }
183
- n = nextSymbolPos + 1
228
+ let i = n + 1
229
+ while (i < srcLen && src.charCodeAt(i) === CHAR_ASTERISK) {
230
+ i++
231
+ }
232
+ if (i === srcLen) {
233
+ pushInlines(inlines, n, i - 1, i - n, '')
234
+ } else {
235
+ pushInlines(inlines, n, i - 1, i - n, '')
236
+ textStart = i
237
+ }
238
+ n = i
184
239
  continue
185
240
  }
186
241
  }
187
242
 
188
- // Inline math ($...$)
189
- if (opt.dollarMath && currentChar === CHAR_DOLLAR && !hasBackslash(state, n)) {
190
- [nextSymbolPos, noMark] = hasNextSymbol(state, n, srcLen, CHAR_DOLLAR, noMark)
191
- if (nextSymbolPos !== -1) {
192
- if (nextSymbolPos === srcLen - 1) {
193
- pushInlines(inlines, textStart, nextSymbolPos, nextSymbolPos - textStart + 1, 'text')
194
- break
243
+ // Inline code (backticks)
244
+ if (currentChar === CHAR_BACKTICK) {
245
+ if (!isEscaped) {
246
+ const result = processSymbolPair(state, n, srcLen, CHAR_BACKTICK, noMark, textStart,
247
+ (start, end, len, type) => pushInlines(inlines, start, end, len, type))
248
+ if (result.shouldBreak) break
249
+ if (result.shouldContinue) {
250
+ n = result.newN
251
+ noMark = result.newNoMark
252
+ continue
195
253
  }
196
- n = nextSymbolPos + 1
197
- continue
254
+ noMark = result.newNoMark
198
255
  }
199
256
  }
200
257
 
201
- // HTML tags
202
- if (htmlEnabled && currentChar === CHAR_LT && !hasBackslash(state, n)) {
203
- for (let i = n + 1; i < srcLen; i++) {
204
- if (src.charCodeAt(i) === CHAR_GT && !hasBackslash(state, i)) {
205
- if (noMark.length !== 0) {
206
- pushInlines(inlines, textStart, n - 1, n - textStart, 'text')
207
- noMark = ''
208
- }
209
- let tag = src.slice(n + 1, i)
210
- let tagType
211
- if (tag.charCodeAt(0) === CHAR_SLASH) {
212
- tag = tag.slice(1)
213
- tagType = 'close'
214
- } else {
215
- tagType = 'open'
216
- }
217
- pushInlines(inlines, n, i, i - n + 1, 'html_inline', tag, tagType)
218
- textStart = i + 1
219
- n = i + 1
220
- break
258
+ // Inline math ($...$)
259
+ if (opt.dollarMath && currentChar === CHAR_DOLLAR) {
260
+ if (!isEscaped) {
261
+ const result = processSymbolPair(state, n, srcLen, CHAR_DOLLAR, noMark, textStart,
262
+ (start, end, len, type) => pushInlines(inlines, start, end, len, type))
263
+ if (result.shouldBreak) break
264
+ if (result.shouldContinue) {
265
+ n = result.newN
266
+ noMark = result.newNoMark
267
+ continue
221
268
  }
269
+ noMark = result.newNoMark
222
270
  }
223
- continue
224
271
  }
225
272
 
226
- // Asterisk handling
227
- if (currentChar === CHAR_ASTERISK && !hasBackslash(state, n)) {
228
- if (n !== 0 && noMark.length !== 0) {
229
- pushInlines(inlines, textStart, n - 1, n - textStart, 'text')
230
- noMark = ''
231
- }
232
- if (n === srcLen - 1) {
233
- pushInlines(inlines, n, n, 1, '')
234
- break
235
- }
236
- let i = n + 1
237
- while (i < srcLen && src.charCodeAt(i) === CHAR_ASTERISK) {
238
- i++
239
- }
240
- if (i === srcLen) {
241
- pushInlines(inlines, n, i - 1, i - n, '')
242
- } else {
243
- pushInlines(inlines, n, i - 1, i - n, '')
244
- textStart = i
273
+ // HTML tags
274
+ if (htmlEnabled && currentChar === CHAR_LT) {
275
+ if (!isEscaped) {
276
+ let foundClosingTag = false
277
+ for (let i = n + 1; i < srcLen; i++) {
278
+ if (src.charCodeAt(i) === CHAR_GT && !hasBackslash(state, i)) {
279
+ noMark = processTextSegment(inlines, textStart, n, noMark)
280
+ let tag = src.slice(n + 1, i)
281
+ let tagType
282
+ if (tag.charCodeAt(0) === CHAR_SLASH) {
283
+ tag = tag.slice(1)
284
+ tagType = 'close'
285
+ } else {
286
+ tagType = 'open'
287
+ }
288
+ pushInlines(inlines, n, i, i - n + 1, 'html_inline', tag, tagType)
289
+ textStart = i + 1
290
+ n = i + 1
291
+ foundClosingTag = true
292
+ break
293
+ }
294
+ }
295
+ if (foundClosingTag) {
296
+ continue
297
+ }
298
+ // If no closing tag found, treat as regular character to prevent infinite loops
245
299
  }
246
- n = i
247
- continue
248
300
  }
249
301
 
250
302
  // Regular character
@@ -259,19 +311,31 @@ const createInlines = (state, start, max, opt) => {
259
311
  }
260
312
 
261
313
  const pushMark = (marks, opts) => {
262
- let left = 0, right = marks.length
314
+ // Maintain sorted order during insertion
315
+ const newMark = {
316
+ nest: opts.nest,
317
+ s: opts.s,
318
+ e: opts.e,
319
+ len: opts.len,
320
+ oLen: opts.oLen,
321
+ type: opts.type
322
+ }
323
+ // Binary search for insertion point to maintain sorted order
324
+ let left = 0
325
+ let right = marks.length
263
326
  while (left < right) {
264
- const mid = (left + right) >> 1
265
- if (marks[mid].s > opts.s) {
266
- right = mid
267
- } else {
327
+ const mid = Math.floor((left + right) / 2)
328
+ if (marks[mid].s <= newMark.s) {
268
329
  left = mid + 1
330
+ } else {
331
+ right = mid
269
332
  }
270
333
  }
271
- marks.splice(left, 0, { ...opts });
334
+
335
+ marks.splice(left, 0, newMark)
272
336
  }
273
337
 
274
- const setStrong = (state, inlines, marks, n, memo, opt) => {
338
+ const setStrong = (state, inlines, marks, n, memo, opt, nestTracker) => {
275
339
  if (opt.disallowMixed === true) {
276
340
  let i = n + 1
277
341
  const inlinesLength = inlines.length
@@ -305,7 +369,7 @@ const setStrong = (state, inlines, marks, n, memo, opt) => {
305
369
  if (insideTagsIsClose === 0) { i++; continue }
306
370
  }
307
371
 
308
- nest = checkNest(inlines, marks, n, i)
372
+ nest = checkNest(inlines, marks, n, i, nestTracker)
309
373
  if (nest === -1) return [n, nest]
310
374
 
311
375
  if (inlines[i].len === 1 && inlines[n].len > 2) {
@@ -330,7 +394,7 @@ const setStrong = (state, inlines, marks, n, memo, opt) => {
330
394
  inlines[i].len -= 1
331
395
  if (inlines[i].len > 0) inlines[i].sp += 1
332
396
  if (insideTagsIsClose === 1) {
333
- const [newN, newNest] = setEm(state, inlines, marks, n, memo, opt)
397
+ const [newN, newNest] = setEm(state, inlines, marks, n, memo, opt, null, nestTracker)
334
398
  n = newN
335
399
  nest = newNest
336
400
  }
@@ -376,7 +440,7 @@ const setStrong = (state, inlines, marks, n, memo, opt) => {
376
440
 
377
441
  if (inlines[n].len === 1 && inlines[i].len > 0) {
378
442
  nest++
379
- const [newN, newNest] = setEm(state, inlines, marks, n, memo, opt, nest)
443
+ const [newN, newNest] = setEm(state, inlines, marks, n, memo, opt, nest, nestTracker)
380
444
  n = newN
381
445
  nest = newNest
382
446
  }
@@ -405,18 +469,41 @@ const checkInsideTags = (inlines, i, memo) => {
405
469
  if (memo.htmlTags[tagName] < 0) {
406
470
  return -1
407
471
  }
408
- const closeAllTags = Object.values(memo.htmlTags).every(val => val === 0)
409
- if (closeAllTags) return 1
410
- return 0
472
+
473
+ // Direct check instead of Object.values().every()
474
+ for (const count of Object.values(memo.htmlTags)) {
475
+ if (count !== 0) return 0
476
+ }
477
+ return 1
411
478
  }
412
479
 
480
+ // Check if character is ASCII punctuation or space
481
+ // Covers: !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ and space
413
482
  const isPunctuation = (ch) => {
414
- return REG_PUNCTUATION.test(ch)
483
+ if (!ch) return false
484
+ const code = ch.charCodeAt(0)
485
+ // ASCII punctuation: !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
486
+ return (code >= 33 && code <= 47) || (code >= 58 && code <= 64) ||
487
+ (code >= 91 && code <= 96) || (code >= 123 && code <= 126) || code === 32
415
488
  }
489
+
490
+ // Check if character is Japanese (hiragana, katakana, kanji, punctuation, symbols, format chars, emoji)
491
+ // Uses fast Unicode range checks for common cases, falls back to REG_JAPANESE for complex Unicode
416
492
  const isJapanese = (ch) => {
417
- return REG_JAPANESE.test(ch)
493
+ if (!ch) return false
494
+ const code = ch.charCodeAt(0)
495
+ // Fast ASCII check first
496
+ if (code < 128) return false
497
+ // Hiragana: U+3040-U+309F, Katakana: U+30A0-U+30FF, Kanji: U+4E00-U+9FAF
498
+ return (code >= 0x3040 && code <= 0x309F) ||
499
+ (code >= 0x30A0 && code <= 0x30FF) ||
500
+ (code >= 0x4E00 && code <= 0x9FAF) ||
501
+ // Fallback to regex for complex Unicode cases
502
+ REG_JAPANESE.test(ch)
418
503
  }
419
504
 
505
+ // Check if character is English (letters, numbers) or other non-Japanese characters
506
+ // Uses REG_JAPANESE and REG_PUNCTUATION to exclude Japanese and punctuation characters
420
507
  const isEnglish = (ch) => {
421
508
  if (!ch) return false
422
509
  const code = ch.charCodeAt(0)
@@ -461,9 +548,6 @@ const hasPunctuationOrNonJapanese = (state, inlines, n, i, opt) => {
461
548
  const checkCloseNextChar = (isPunctuation(closeNextChar) || i === inlines.length - 1)
462
549
 
463
550
  if (opt.disallowMixed === false) {
464
- const openPrevChar = src[inlines[n].s - 1] || ''
465
- const closeNextChar = src[inlines[i].e + 1] || ''
466
-
467
551
  if (isEnglish(openPrevChar) || isEnglish(closeNextChar)) {
468
552
  const contentBetween = src.slice(inlines[n].e + 1, inlines[i].s)
469
553
  if (REG_MARKDOWN_HTML.test(contentBetween)) {
@@ -476,7 +560,7 @@ const hasPunctuationOrNonJapanese = (state, inlines, n, i, opt) => {
476
560
  return result
477
561
  }
478
562
 
479
- const setEm = (state, inlines, marks, n, memo, opt, sNest) => {
563
+ const setEm = (state, inlines, marks, n, memo, opt, sNest, nestTracker) => {
480
564
  if (opt.disallowMixed === true && !sNest) {
481
565
  let i = n + 1
482
566
  const inlinesLength = inlines.length
@@ -531,7 +615,7 @@ const setEm = (state, inlines, marks, n, memo, opt, sNest) => {
531
615
  if (sNest) {
532
616
  nest = sNest - 1
533
617
  } else {
534
- nest = checkNest(inlines, marks, n, i)
618
+ nest = checkNest(inlines, marks, n, i, nestTracker)
535
619
  }
536
620
  if (nest === -1) return [n, nest]
537
621
 
@@ -603,60 +687,60 @@ const setText = (inlines, marks, n, nest) => {
603
687
  inlines[n].len = 0
604
688
  }
605
689
 
606
- const checkNest = (inlines, marks, n, i) => {
607
- let nest = 1
608
- let isRange = true
609
- if (marks.length === 0) return nest
610
- let strongNest = 0
611
- let emNest = 0
612
- let j = 0
613
- const marksLength = marks.length
614
- while (j < marksLength) {
615
- if (marks[j].s <= inlines[n].s) {
616
- if (marks[j].type === 'strong_open') strongNest++
617
- if (marks[j].type === 'strong_close') strongNest--
618
- if (marks[j].type === 'em_open') emNest++
619
- if (marks[j].type === 'em_close') emNest--
620
- } else { break }
621
- j++
622
- }
623
- let parentNest = strongNest + emNest
624
- let parentCloseN = j
625
- if (parentCloseN < marksLength) {
626
- while (parentCloseN < marksLength) {
627
- if (marks[parentCloseN].nest === parentNest) break
628
- parentCloseN++
629
- }
630
- if (parentCloseN > marksLength - 1) {
631
- isRange = true
632
- } else {
633
- if (marks[parentCloseN].s < inlines[i].s) isRange = false
634
- }
690
+ // Nest state management
691
+ const createNestTracker = () => {
692
+ return {
693
+ strongNest: 0,
694
+ emNest: 0,
695
+ markIndex: 0
635
696
  }
697
+ }
636
698
 
637
- if (isRange) {
638
- nest = parentNest + 1
639
- } else {
640
- nest = -1
699
+ const updateNestTracker = (tracker, marks, targetPos) => {
700
+ while (tracker.markIndex < marks.length && marks[tracker.markIndex].s <= targetPos) {
701
+ const mark = marks[tracker.markIndex]
702
+ if (mark.type === 'strong_open') tracker.strongNest++
703
+ else if (mark.type === 'strong_close') tracker.strongNest--
704
+ else if (mark.type === 'em_open') tracker.emNest++
705
+ else if (mark.type === 'em_close') tracker.emNest--
706
+ tracker.markIndex++
641
707
  }
642
- return nest
708
+ }
709
+
710
+ const checkNest = (inlines, marks, n, i, nestTracker) => {
711
+ if (marks.length === 0) return 1
712
+ // Update nest state up to current position
713
+ updateNestTracker(nestTracker, marks, inlines[n].s)
714
+
715
+ const parentNest = nestTracker.strongNest + nestTracker.emNest
716
+ // Check if there's a conflicting close mark before the end position
717
+ let parentCloseN = nestTracker.markIndex
718
+ while (parentCloseN < marks.length) {
719
+ if (marks[parentCloseN].nest === parentNest) break
720
+ parentCloseN++
721
+ }
722
+ if (parentCloseN < marks.length && marks[parentCloseN].s < inlines[i].s) {
723
+ return -1
724
+ }
725
+ return parentNest + 1
643
726
  }
644
727
 
645
728
  const createMarks = (state, inlines, start, end, memo, opt) => {
646
729
  let marks = []
647
730
  let n = start
731
+ const nestTracker = createNestTracker()
648
732
 
649
733
  while (n < end) {
650
734
  if (inlines[n].type !== '') { n++; continue }
651
735
  let nest = 0
652
736
 
653
737
  if (inlines[n].len > 1) {
654
- const [newN, newNest] = setStrong(state, inlines, marks, n, memo, opt)
738
+ const [newN, newNest] = setStrong(state, inlines, marks, n, memo, opt, nestTracker)
655
739
  n = newN
656
740
  nest = newNest
657
741
  }
658
742
  if (inlines[n].len !== 0) {
659
- const [newN2, newNest2] = setEm(state, inlines, marks, n, memo, opt)
743
+ const [newN2, newNest2] = setEm(state, inlines, marks, n, memo, opt, null, nestTracker)
660
744
  n = newN2
661
745
  nest = newNest2
662
746
  }
@@ -668,9 +752,8 @@ const createMarks = (state, inlines, start, end, memo, opt) => {
668
752
  return marks
669
753
  }
670
754
 
671
-
672
755
  const mergeInlinesAndMarks = (inlines, marks) => {
673
- marks.sort((a, b) => a.s - b.s)
756
+ // marks array is already sorted, skip sorting
674
757
  const merged = []
675
758
  let markIndex = 0
676
759
  for (const token of inlines) {
@@ -761,4 +844,5 @@ const mditStrongJa = (md, option) => {
761
844
  return strongJa(state, silent, opt)
762
845
  })
763
846
  }
764
- export default mditStrongJa
847
+
848
+ export default mditStrongJa
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@peaceroad/markdown-it-strong-ja",
3
3
  "description": "This is a plugin for markdown-it. It is an alternative to the standard `**` (strong) and `*` (em) processing. It also processes strings that cannot be converted by the standard.",
4
- "version": "0.4.3",
4
+ "version": "0.4.5",
5
5
  "main": "index.js",
6
6
  "type": "module",
7
7
  "files": [