@peaceroad/markdown-it-numbering-ul-regarded-as-ol 0.4.0 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -7
- package/index.js +62 -27
- package/package.json +3 -2
- package/src/list-helpers.js +5 -3
- package/src/phase0-description-list.js +37 -15
- package/src/phase1-analyze.js +44 -131
- package/src/phase2-convert.js +53 -69
- package/src/phase3-attributes.js +21 -29
- package/src/phase5-spans.js +13 -14
- package/src/preprocess-literal-lists.js +25 -36
- package/src/types-utility.js +95 -236
package/src/phase5-spans.js
CHANGED
|
@@ -9,16 +9,12 @@ import { buildListCloseIndexMap, findMatchingClose } from './list-helpers.js'
|
|
|
9
9
|
* @param {Array} tokens - Token array
|
|
10
10
|
* @param {Object} opt - Options
|
|
11
11
|
*/
|
|
12
|
-
export function generateSpans(tokens, opt) {
|
|
12
|
+
export function generateSpans(tokens, opt, initialListCloseByOpen = null) {
|
|
13
13
|
if (opt.useCounterStyle) {
|
|
14
14
|
return
|
|
15
15
|
}
|
|
16
|
-
const
|
|
17
|
-
|
|
18
|
-
? rawSpanClass.trim()
|
|
19
|
-
: ''
|
|
20
|
-
const spanClass = normalizedSpanClass || 'li-num'
|
|
21
|
-
let listCloseByOpen = null
|
|
16
|
+
const spanClass = opt.markerSpanClass || 'li-num'
|
|
17
|
+
let listCloseByOpen = initialListCloseByOpen
|
|
22
18
|
const getListCloseByOpen = () => {
|
|
23
19
|
if (!listCloseByOpen) {
|
|
24
20
|
listCloseByOpen = buildListCloseIndexMap(tokens).listCloseByOpen
|
|
@@ -58,15 +54,17 @@ function addMarkerSpans(tokens, listToken, listIndex, markerInfo, opt, spanClass
|
|
|
58
54
|
}
|
|
59
55
|
if (listCloseIndex === -1) return
|
|
60
56
|
|
|
57
|
+
const markerCount = markerInfo.markers.length
|
|
58
|
+
const listItemLevel = (listToken.level || 0) + 1
|
|
61
59
|
let markerIndex = 0
|
|
62
60
|
let inListItem = false
|
|
63
61
|
let listItemInlineFound = false
|
|
64
62
|
|
|
65
|
-
for (let i = listIndex + 1; i < listCloseIndex && markerIndex <
|
|
63
|
+
for (let i = listIndex + 1; i < listCloseIndex && markerIndex < markerCount; i++) {
|
|
66
64
|
const token = tokens[i]
|
|
67
65
|
|
|
68
66
|
// When list_item_open is found, prepare to add marker to next inline
|
|
69
|
-
if (token.type === 'list_item_open' && token.level ===
|
|
67
|
+
if (token.type === 'list_item_open' && token.level === listItemLevel) {
|
|
70
68
|
inListItem = true
|
|
71
69
|
listItemInlineFound = false
|
|
72
70
|
}
|
|
@@ -77,12 +75,13 @@ function addMarkerSpans(tokens, listToken, listIndex, markerInfo, opt, spanClass
|
|
|
77
75
|
listItemInlineFound = true
|
|
78
76
|
continue
|
|
79
77
|
}
|
|
78
|
+
const TokenClass = token.constructor
|
|
80
79
|
// Insert span_open, text, span_close before inline token
|
|
81
|
-
const spanOpen = new
|
|
80
|
+
const spanOpen = new TokenClass('span_open', 'span', 1)
|
|
82
81
|
spanOpen.attrSet('class', spanClass)
|
|
83
82
|
spanOpen.attrSet('aria-hidden', 'true')
|
|
84
83
|
|
|
85
|
-
const text = new
|
|
84
|
+
const text = new TokenClass('text', '', 0)
|
|
86
85
|
|
|
87
86
|
// Determine marker content
|
|
88
87
|
// If marker.number exists, get correct symbol based on it
|
|
@@ -112,7 +111,7 @@ function addMarkerSpans(tokens, listToken, listIndex, markerInfo, opt, spanClass
|
|
|
112
111
|
// Use markerContent as-is (prefix+symbol+suffix) for alwaysMarkerSpan
|
|
113
112
|
text.content = markerContent
|
|
114
113
|
|
|
115
|
-
const spanClose = new
|
|
114
|
+
const spanClose = new TokenClass('span_close', 'span', -1)
|
|
116
115
|
|
|
117
116
|
// Initialize children if not exist
|
|
118
117
|
if (!token.children) {
|
|
@@ -124,14 +123,14 @@ function addMarkerSpans(tokens, listToken, listIndex, markerInfo, opt, spanClass
|
|
|
124
123
|
|
|
125
124
|
// Add space before content if exists
|
|
126
125
|
if (token.content) {
|
|
127
|
-
const spaceToken = new
|
|
126
|
+
const spaceToken = new TokenClass('text', '', 0)
|
|
128
127
|
spaceToken.content = ' '
|
|
129
128
|
token.children.push(spaceToken)
|
|
130
129
|
}
|
|
131
130
|
listItemInlineFound = true // First inline of this list_item is processed
|
|
132
131
|
}
|
|
133
132
|
// When list_item_close is found, go to next list_item
|
|
134
|
-
else if (token.type === 'list_item_close' && token.level ===
|
|
133
|
+
else if (token.type === 'list_item_close' && token.level === listItemLevel) {
|
|
135
134
|
inListItem = false
|
|
136
135
|
markerIndex++ // Move to next marker
|
|
137
136
|
}
|
|
@@ -91,46 +91,32 @@ const hasLikelyLiteralMarkerToken = (token) => {
|
|
|
91
91
|
return core.length <= 4
|
|
92
92
|
}
|
|
93
93
|
|
|
94
|
-
const
|
|
94
|
+
const scanLiteralLineHints = (content) => {
|
|
95
95
|
if (typeof content !== 'string' || !content.includes('\n')) {
|
|
96
|
-
return false
|
|
96
|
+
return { hasInlineLiteralHint: false, hasOverIndentedMarkerHint: false, lines: null }
|
|
97
97
|
}
|
|
98
98
|
const lines = content.split('\n')
|
|
99
|
+
let hasInlineLiteralHint = false
|
|
100
|
+
let hasOverIndentedMarkerHint = false
|
|
99
101
|
for (let i = 1; i < lines.length; i++) {
|
|
100
102
|
const line = lines[i]
|
|
101
103
|
if (!line || line.trim().length === 0) {
|
|
102
104
|
continue
|
|
103
105
|
}
|
|
104
106
|
const tokenInfo = getLineTokenWithIndent(line)
|
|
105
|
-
if (!tokenInfo || tokenInfo.
|
|
106
|
-
continue
|
|
107
|
-
}
|
|
108
|
-
if (hasLikelyLiteralMarkerToken(tokenInfo.token)) {
|
|
109
|
-
return true
|
|
110
|
-
}
|
|
111
|
-
}
|
|
112
|
-
return false
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
const hasOverIndentedMarkerLikeLine = (content) => {
|
|
116
|
-
if (typeof content !== 'string' || !content.includes('\n')) {
|
|
117
|
-
return false
|
|
118
|
-
}
|
|
119
|
-
const lines = content.split('\n')
|
|
120
|
-
for (let i = 1; i < lines.length; i++) {
|
|
121
|
-
const line = lines[i]
|
|
122
|
-
if (!line || line.trim().length === 0) {
|
|
107
|
+
if (!tokenInfo || !hasLikelyLiteralMarkerToken(tokenInfo.token)) {
|
|
123
108
|
continue
|
|
124
109
|
}
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
110
|
+
if (tokenInfo.indentWidth > MAX_LITERAL_INLINE_INDENT) {
|
|
111
|
+
hasOverIndentedMarkerHint = true
|
|
112
|
+
} else {
|
|
113
|
+
hasInlineLiteralHint = true
|
|
128
114
|
}
|
|
129
|
-
if (
|
|
130
|
-
|
|
115
|
+
if (hasInlineLiteralHint && hasOverIndentedMarkerHint) {
|
|
116
|
+
break
|
|
131
117
|
}
|
|
132
118
|
}
|
|
133
|
-
return
|
|
119
|
+
return { hasInlineLiteralHint, hasOverIndentedMarkerHint, lines }
|
|
134
120
|
}
|
|
135
121
|
|
|
136
122
|
/**
|
|
@@ -196,19 +182,20 @@ export function normalizeLiteralOrderedLists(tokens, opt) {
|
|
|
196
182
|
j = paragraphCloseIdx + 1
|
|
197
183
|
continue
|
|
198
184
|
}
|
|
199
|
-
|
|
185
|
+
const literalHints = scanLiteralLineHints(inlineToken.content)
|
|
186
|
+
if (!literalHints.hasInlineLiteralHint) {
|
|
200
187
|
j = paragraphCloseIdx + 1
|
|
201
188
|
continue
|
|
202
189
|
}
|
|
203
190
|
// Be conservative when deeply-indented marker-like lines are present.
|
|
204
191
|
// These lines are often code blocks; partial literal conversion is more surprising
|
|
205
192
|
// than preserving markdown-it's original rendering in this ambiguous case.
|
|
206
|
-
if (
|
|
193
|
+
if (literalHints.hasOverIndentedMarkerHint) {
|
|
207
194
|
j = paragraphCloseIdx + 1
|
|
208
195
|
continue
|
|
209
196
|
}
|
|
210
197
|
const baseLine = tokens[j].map ? tokens[j].map[0] : null
|
|
211
|
-
const segments = parseSegments(
|
|
198
|
+
const segments = parseSegments(literalHints.lines, markerWidth, baseLine)
|
|
212
199
|
if (!segments.hasLiteral) {
|
|
213
200
|
j = paragraphCloseIdx + 1
|
|
214
201
|
continue
|
|
@@ -247,12 +234,15 @@ export function normalizeLiteralOrderedLists(tokens, opt) {
|
|
|
247
234
|
}
|
|
248
235
|
}
|
|
249
236
|
|
|
250
|
-
function parseSegments(
|
|
251
|
-
if (!
|
|
237
|
+
function parseSegments(contentOrLines, markerWidth, baseLine = null) {
|
|
238
|
+
if (!contentOrLines) {
|
|
252
239
|
return { hasLiteral: false, list: [{ type: 'text', text: '', tight: false }] }
|
|
253
240
|
}
|
|
254
241
|
|
|
255
|
-
const lines =
|
|
242
|
+
const lines = Array.isArray(contentOrLines) ? contentOrLines : contentOrLines.split('\n')
|
|
243
|
+
if (lines.length === 0) {
|
|
244
|
+
return { hasLiteral: false, list: [{ type: 'text', text: '', tight: false }] }
|
|
245
|
+
}
|
|
256
246
|
const literalCache = new Array(lines.length)
|
|
257
247
|
const segments = []
|
|
258
248
|
let buffer = []
|
|
@@ -280,8 +270,7 @@ function parseSegments(content, markerWidth, baseLine = null) {
|
|
|
280
270
|
|
|
281
271
|
while (idx < lines.length) {
|
|
282
272
|
const isFirstLine = idx === 0
|
|
283
|
-
|
|
284
|
-
if (isFirstLine && trimmedLine.length > 0) {
|
|
273
|
+
if (isFirstLine && lines[idx].trim().length > 0) {
|
|
285
274
|
buffer.push(lines[idx])
|
|
286
275
|
idx++
|
|
287
276
|
continue
|
|
@@ -334,7 +323,7 @@ function detectLiteralLine(line, markerWidth) {
|
|
|
334
323
|
if (!trimmed.startsWith(markerInfo.marker)) {
|
|
335
324
|
return null
|
|
336
325
|
}
|
|
337
|
-
const remainder = trimmed.slice(markerInfo.marker.length).
|
|
326
|
+
const remainder = trimmed.slice(markerInfo.marker.length).trimStart()
|
|
338
327
|
const safeMarkerWidth = Number.isFinite(markerWidth) ? markerWidth : 1
|
|
339
328
|
return {
|
|
340
329
|
indent: safeMarkerWidth + indentWidth,
|
|
@@ -458,7 +447,7 @@ function buildReplacementTokens(segments, listItemLevel, TokenClass, paragraphOp
|
|
|
458
447
|
for (const listNode of segment.lists) {
|
|
459
448
|
const relativeIndex = tokens.length
|
|
460
449
|
tokens.push(...buildListTokens(listNode, listItemLevel + 1, TokenClass))
|
|
461
|
-
literalListPositions.push({ relativeIndex
|
|
450
|
+
literalListPositions.push({ relativeIndex })
|
|
462
451
|
}
|
|
463
452
|
templateUsed = true
|
|
464
453
|
}
|
package/src/types-utility.js
CHANGED
|
@@ -2,27 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
import types from '../listTypes.json' with { type: 'json' }
|
|
4
4
|
|
|
5
|
-
/**
|
|
6
|
-
* Check if a marker type is convertible in default mode
|
|
7
|
-
* Exotic markers that aren't commonly used are excluded from conversion
|
|
8
|
-
* @param {string} markerType - The marker type name (e.g., 'decimal', 'lower-greek')
|
|
9
|
-
* @returns {boolean} True if the marker type should be converted in default mode
|
|
10
|
-
*/
|
|
11
|
-
export const isConvertibleMarkerType = (markerType) => {
|
|
12
|
-
if (!markerType) return false
|
|
13
|
-
|
|
14
|
-
// Exclude exotic markers that should remain as <ul> in default mode
|
|
15
|
-
// These are rarely used and may not be well-supported
|
|
16
|
-
const excludedTypes = [
|
|
17
|
-
'fullwidth-lower-roman',
|
|
18
|
-
'fullwidth-upper-roman',
|
|
19
|
-
'squared-upper-latin',
|
|
20
|
-
'filled-squared-upper-latin'
|
|
21
|
-
]
|
|
22
|
-
|
|
23
|
-
return !excludedTypes.includes(markerType)
|
|
24
|
-
}
|
|
25
|
-
|
|
26
5
|
const escapeRegExp = (string) => {
|
|
27
6
|
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
|
|
28
7
|
}
|
|
@@ -304,10 +283,9 @@ export const detectSequencePattern = (allContents) => {
|
|
|
304
283
|
const allSame = pureSymbols.every(s => s === pureSymbols[0])
|
|
305
284
|
|
|
306
285
|
// Cache type lookups
|
|
307
|
-
const
|
|
308
|
-
const
|
|
309
|
-
const
|
|
310
|
-
const upperRomanType = typeInfoByName.get('upper-roman')
|
|
286
|
+
const irohaType = _TYPE_INFO_BY_NAME.get('katakana-iroha')
|
|
287
|
+
const katakanaType = _TYPE_INFO_BY_NAME.get('katakana')
|
|
288
|
+
const upperRomanType = _TYPE_INFO_BY_NAME.get('upper-roman')
|
|
311
289
|
|
|
312
290
|
// Get symbol sequences from listTypes.json
|
|
313
291
|
if (irohaType?.symbols) {
|
|
@@ -407,8 +385,7 @@ export const detectMarkerTypeWithContext = (content, contextResult = null) => {
|
|
|
407
385
|
* @returns {string|null} The symbol for that number, or null if not found
|
|
408
386
|
*/
|
|
409
387
|
export const getSymbolForNumber = (markerType, number) => {
|
|
410
|
-
const
|
|
411
|
-
const typeInfo = typeInfoByName.get(markerType)
|
|
388
|
+
const typeInfo = _TYPE_INFO_BY_NAME.get(markerType)
|
|
412
389
|
if (!typeInfo) {
|
|
413
390
|
return null
|
|
414
391
|
}
|
|
@@ -444,51 +421,19 @@ export const getSymbolForNumber = (markerType, number) => {
|
|
|
444
421
|
return null
|
|
445
422
|
}
|
|
446
423
|
|
|
447
|
-
|
|
448
|
-
* Get the default prefix/suffix pattern for a marker type
|
|
449
|
-
* @param {string} markerType - The marker type name (e.g., 'lower-roman', 'decimal')
|
|
450
|
-
* @returns {Object} Object with prefix and suffix properties
|
|
451
|
-
*/
|
|
452
|
-
export const getDefaultPatternForType = (markerType) => {
|
|
453
|
-
const { typeInfoByName } = getTypeSeparation()
|
|
454
|
-
const typeInfo = typeInfoByName.get(markerType)
|
|
455
|
-
if (!typeInfo) {
|
|
456
|
-
return { prefix: '', suffix: '.' }
|
|
457
|
-
}
|
|
458
|
-
|
|
459
|
-
// Get patterns for this type (prefer `pattern` property)
|
|
460
|
-
const patternRef = typeInfo.pattern || null
|
|
461
|
-
const patterns = getPatternsByName(patternRef)
|
|
462
|
-
if (!patterns || patterns.length === 0) {
|
|
463
|
-
return { prefix: '', suffix: '.' }
|
|
464
|
-
}
|
|
465
|
-
|
|
466
|
-
// Return the first pattern as the default
|
|
467
|
-
return {
|
|
468
|
-
prefix: patterns[0].prefix || '',
|
|
469
|
-
suffix: patterns[0].suffix || '.'
|
|
470
|
-
}
|
|
471
|
-
}
|
|
472
|
-
|
|
473
|
-
const prefixs = [
|
|
424
|
+
const prefixLabels = [
|
|
474
425
|
['(', 'round'],
|
|
475
|
-
|
|
476
|
-
//['{', 'curly'],
|
|
477
|
-
//['<', 'angle'],
|
|
478
|
-
['(', 'fullround'],
|
|
426
|
+
['(', 'fullround']
|
|
479
427
|
]
|
|
480
428
|
|
|
481
|
-
const
|
|
429
|
+
const suffixLabels = [
|
|
482
430
|
[')', 'round'],
|
|
483
|
-
|
|
484
|
-
//['}', 'curly'],
|
|
485
|
-
//['>', 'angle'],
|
|
486
|
-
[')', 'fullround'],
|
|
431
|
+
[')', 'fullround']
|
|
487
432
|
]
|
|
488
433
|
|
|
489
434
|
// Build Maps for O(1) lookups (faster than .find on every call)
|
|
490
|
-
const prefixMap = new Map(
|
|
491
|
-
const suffixMap = new Map(
|
|
435
|
+
const prefixMap = new Map(prefixLabels)
|
|
436
|
+
const suffixMap = new Map(suffixLabels)
|
|
492
437
|
|
|
493
438
|
const generateClassName = (baseClass, prefix, suffix) => {
|
|
494
439
|
// fast path: no prefix and no suffix
|
|
@@ -523,8 +468,7 @@ const CUSTOM_TYPES_NO_SUFFIX = new Set([
|
|
|
523
468
|
])
|
|
524
469
|
|
|
525
470
|
export const getTypeAttributes = (markerType, markerInfo = null, opt = {}) => {
|
|
526
|
-
const
|
|
527
|
-
const type = typeInfoByName.get(markerType)
|
|
471
|
+
const type = _TYPE_INFO_BY_NAME.get(markerType)
|
|
528
472
|
if (!type) {
|
|
529
473
|
return { type: '1', class: 'ol-decimal', suffix: '.' }
|
|
530
474
|
}
|
|
@@ -637,20 +581,13 @@ const createPatternTail = (pattern) => {
|
|
|
637
581
|
// Process patterns for symbols
|
|
638
582
|
const processSymbolPatterns = (patterns, symbols, typePatterns, type) => {
|
|
639
583
|
// Pre-compute escaped prefixes, suffixes and regex tail once
|
|
640
|
-
const patternCache =
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
suffix: pattern.suffix,
|
|
648
|
-
space: pattern.space,
|
|
649
|
-
escapedPrefix,
|
|
650
|
-
escapedSuffix,
|
|
651
|
-
tail
|
|
652
|
-
})
|
|
653
|
-
})
|
|
584
|
+
const patternCache = typePatterns.map(pattern => ({
|
|
585
|
+
prefix: pattern.prefix,
|
|
586
|
+
suffix: pattern.suffix,
|
|
587
|
+
escapedPrefix: pattern.prefix ? escapeRegExp(pattern.prefix) : '',
|
|
588
|
+
escapedSuffix: pattern.suffix ? escapeRegExp(pattern.suffix) : '',
|
|
589
|
+
tail: createPatternTail(pattern)
|
|
590
|
+
}))
|
|
654
591
|
|
|
655
592
|
// Use pre-computed cache for faster pattern generation
|
|
656
593
|
const symbolsLength = symbols.length
|
|
@@ -661,7 +598,7 @@ const processSymbolPatterns = (patterns, symbols, typePatterns, type) => {
|
|
|
661
598
|
const processedSym = sym.replace(/^\\\\/,'\\')
|
|
662
599
|
|
|
663
600
|
for (let patternIndex = 0; patternIndex < patternsLength; patternIndex++) {
|
|
664
|
-
const cached = patternCache
|
|
601
|
+
const cached = patternCache[patternIndex]
|
|
665
602
|
// Original suffix variant
|
|
666
603
|
const symbolPartOrig = cached.escapedPrefix + processedSym + cached.escapedSuffix
|
|
667
604
|
const regexStrOrig = `^(${symbolPartOrig})${cached.tail}`
|
|
@@ -767,16 +704,13 @@ export const compiledTypes = (() => {
|
|
|
767
704
|
}
|
|
768
705
|
})()
|
|
769
706
|
|
|
770
|
-
//
|
|
771
|
-
// Build a map of compiled types by name once for fast lookups
|
|
707
|
+
// Build a map of compiled types by name once for fast lookups.
|
|
772
708
|
const _COMPILED_BY_NAME = (() => {
|
|
773
709
|
const m = new Map()
|
|
774
710
|
for (const t of compiledTypes()) m.set(t.name, t)
|
|
775
711
|
return m
|
|
776
712
|
})()
|
|
777
713
|
|
|
778
|
-
export const compiledTypesByName = () => _COMPILED_BY_NAME
|
|
779
|
-
|
|
780
714
|
// Build a flattened pattern list (preserve previous priority: sortedSymbolTypes then rangeBasedTypes)
|
|
781
715
|
const _FLATTENED_PATTERNS = (() => {
|
|
782
716
|
const arr = []
|
|
@@ -791,8 +725,6 @@ const _FLATTENED_PATTERNS = (() => {
|
|
|
791
725
|
suffix: p.suffix,
|
|
792
726
|
typeName: compiledType.name,
|
|
793
727
|
symbolIndex: p.symbolIndex,
|
|
794
|
-
num: p.num,
|
|
795
|
-
isRange: p.isRange,
|
|
796
728
|
compiled: compiled || null
|
|
797
729
|
})
|
|
798
730
|
}
|
|
@@ -800,6 +732,74 @@ const _FLATTENED_PATTERNS = (() => {
|
|
|
800
732
|
return arr
|
|
801
733
|
})()
|
|
802
734
|
|
|
735
|
+
const _TYPE_INFO_BY_NAME = getTypeSeparation().typeInfoByName
|
|
736
|
+
const ASCII_DIGIT_LEADS = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
|
|
737
|
+
|
|
738
|
+
const getFirstCodePointChar = (text) => {
|
|
739
|
+
if (typeof text !== 'string' || text.length === 0) {
|
|
740
|
+
return null
|
|
741
|
+
}
|
|
742
|
+
const firstCodePoint = text.codePointAt(0)
|
|
743
|
+
if (firstCodePoint === undefined) {
|
|
744
|
+
return null
|
|
745
|
+
}
|
|
746
|
+
return firstCodePoint > 0xFFFF ? text.slice(0, 2) : text[0]
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
const buildEntryLeadingChars = (entry) => {
|
|
750
|
+
const prefixedLead = getFirstCodePointChar(entry.prefix)
|
|
751
|
+
if (prefixedLead) {
|
|
752
|
+
return [prefixedLead]
|
|
753
|
+
}
|
|
754
|
+
|
|
755
|
+
const typeInfo = _TYPE_INFO_BY_NAME.get(entry.typeName)
|
|
756
|
+
if (!typeInfo) {
|
|
757
|
+
return []
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
if (Array.isArray(typeInfo.symbols)) {
|
|
761
|
+
const symbol = typeInfo.symbols[entry.symbolIndex]
|
|
762
|
+
const symbolLead = getFirstCodePointChar(symbol)
|
|
763
|
+
return symbolLead ? [symbolLead] : []
|
|
764
|
+
}
|
|
765
|
+
|
|
766
|
+
if (!Array.isArray(typeInfo.range) || typeInfo.range.length !== 2) {
|
|
767
|
+
return []
|
|
768
|
+
}
|
|
769
|
+
|
|
770
|
+
if (typeof typeInfo.range[0] === 'number') {
|
|
771
|
+
return ASCII_DIGIT_LEADS
|
|
772
|
+
}
|
|
773
|
+
|
|
774
|
+
const start = typeInfo.range[0]?.codePointAt(0)
|
|
775
|
+
const end = typeInfo.range[1]?.codePointAt(0)
|
|
776
|
+
if (typeof start !== 'number' || typeof end !== 'number' || end < start) {
|
|
777
|
+
return []
|
|
778
|
+
}
|
|
779
|
+
|
|
780
|
+
const leadingChars = []
|
|
781
|
+
for (let codePoint = start; codePoint <= end; codePoint++) {
|
|
782
|
+
leadingChars.push(String.fromCodePoint(codePoint))
|
|
783
|
+
}
|
|
784
|
+
return leadingChars
|
|
785
|
+
}
|
|
786
|
+
|
|
787
|
+
const _FLATTENED_PATTERNS_BY_LEAD = (() => {
|
|
788
|
+
const buckets = new Map()
|
|
789
|
+
for (const entry of _FLATTENED_PATTERNS) {
|
|
790
|
+
const leadingChars = buildEntryLeadingChars(entry)
|
|
791
|
+
for (const leadingChar of leadingChars) {
|
|
792
|
+
let bucket = buckets.get(leadingChar)
|
|
793
|
+
if (!bucket) {
|
|
794
|
+
bucket = []
|
|
795
|
+
buckets.set(leadingChar, bucket)
|
|
796
|
+
}
|
|
797
|
+
bucket.push(entry)
|
|
798
|
+
}
|
|
799
|
+
}
|
|
800
|
+
return buckets
|
|
801
|
+
})()
|
|
802
|
+
|
|
803
803
|
const tryMatchAgainstType = (trimmed, typeName) => {
|
|
804
804
|
if (!typeName) return null
|
|
805
805
|
const compiled = _COMPILED_BY_NAME.get(typeName)
|
|
@@ -813,7 +813,12 @@ const tryMatchAgainstType = (trimmed, typeName) => {
|
|
|
813
813
|
|
|
814
814
|
// Fast matcher over flattened list
|
|
815
815
|
const tryMatchAgainstFlattened = (trimmed) => {
|
|
816
|
-
|
|
816
|
+
const leadingChar = getFirstCodePointChar(trimmed)
|
|
817
|
+
const candidates = leadingChar ? _FLATTENED_PATTERNS_BY_LEAD.get(leadingChar) : null
|
|
818
|
+
if (!candidates) {
|
|
819
|
+
return null
|
|
820
|
+
}
|
|
821
|
+
for (const entry of candidates) {
|
|
817
822
|
const m = matchRegexEntry(trimmed, entry.typeName, entry)
|
|
818
823
|
if (m) return m
|
|
819
824
|
}
|
|
@@ -828,155 +833,9 @@ const matchRegexEntry = (trimmed, typeName, entry) => {
|
|
|
828
833
|
|
|
829
834
|
const detectedMarker = result[1]
|
|
830
835
|
const pureSymbol = extractPureSymbol(detectedMarker, entry.prefix, entry.suffix)
|
|
831
|
-
const
|
|
832
|
-
const typeInfo = typeInfoByName.get(typeName)
|
|
836
|
+
const typeInfo = _TYPE_INFO_BY_NAME.get(typeName)
|
|
833
837
|
const compiledForCalc = entry.compiled || _COMPILED_BY_NAME.get(typeName)
|
|
834
838
|
const number = calculateNumber(typeInfo, pureSymbol, compiledForCalc)
|
|
835
839
|
|
|
836
840
|
return createMarkerResult(typeName, detectedMarker, number, entry.prefix, entry.suffix)
|
|
837
841
|
}
|
|
838
|
-
|
|
839
|
-
// Analyze list context to determine optimal marker type for ambiguous cases
|
|
840
|
-
export const analyzeListMarkerContext = (markerInfos) => {
|
|
841
|
-
if (!markerInfos || markerInfos.length === 0) return markerInfos
|
|
842
|
-
|
|
843
|
-
const { symbolBasedTypes, typeInfoByName } = getTypeSeparation()
|
|
844
|
-
|
|
845
|
-
// Create typeInfo lookup cache
|
|
846
|
-
const typeInfoCache = new Map()
|
|
847
|
-
for (const compiledType of symbolBasedTypes) {
|
|
848
|
-
const typeInfo = typeInfoByName.get(compiledType.name)
|
|
849
|
-
if (typeInfo?.symbols) {
|
|
850
|
-
typeInfoCache.set(compiledType.name, typeInfo)
|
|
851
|
-
}
|
|
852
|
-
}
|
|
853
|
-
|
|
854
|
-
// Group markers by possible types
|
|
855
|
-
const candidateTypes = new Map()
|
|
856
|
-
|
|
857
|
-
markerInfos.forEach((markerInfo, index) => {
|
|
858
|
-
if (!markerInfo.marker) return
|
|
859
|
-
|
|
860
|
-
// Extract the actual symbol without prefix/suffix
|
|
861
|
-
const actualSymbol = extractPureSymbol(markerInfo.marker, markerInfo.prefix, markerInfo.suffix)
|
|
862
|
-
|
|
863
|
-
// Find all possible types for this marker
|
|
864
|
-
const possibleTypes = []
|
|
865
|
-
for (const [typeName, typeInfo] of typeInfoCache) {
|
|
866
|
-
let symbolIndex = -1
|
|
867
|
-
const compiled = _COMPILED_BY_NAME.get(typeName)
|
|
868
|
-
if (compiled && compiled.symbolIndexMap) {
|
|
869
|
-
const idx = compiled.symbolIndexMap.get(actualSymbol)
|
|
870
|
-
symbolIndex = idx !== undefined ? idx : -1
|
|
871
|
-
} else {
|
|
872
|
-
symbolIndex = typeInfo.symbols.indexOf(actualSymbol)
|
|
873
|
-
}
|
|
874
|
-
|
|
875
|
-
if (symbolIndex !== -1) {
|
|
876
|
-
const expectedNumber = symbolIndex + getStartValue(typeInfo)
|
|
877
|
-
|
|
878
|
-
possibleTypes.push({
|
|
879
|
-
typeName,
|
|
880
|
-
symbolIndex,
|
|
881
|
-
expectedNumber,
|
|
882
|
-
actualPosition: index + 1
|
|
883
|
-
})
|
|
884
|
-
}
|
|
885
|
-
}
|
|
886
|
-
|
|
887
|
-
possibleTypes.forEach(pt => {
|
|
888
|
-
if (!candidateTypes.has(pt.typeName)) {
|
|
889
|
-
candidateTypes.set(pt.typeName, {
|
|
890
|
-
matches: 0,
|
|
891
|
-
totalItems: markerInfos.length,
|
|
892
|
-
positions: []
|
|
893
|
-
})
|
|
894
|
-
}
|
|
895
|
-
|
|
896
|
-
const candidate = candidateTypes.get(pt.typeName)
|
|
897
|
-
candidate.matches++
|
|
898
|
-
candidate.positions.push({
|
|
899
|
-
index,
|
|
900
|
-
expectedNumber: pt.expectedNumber,
|
|
901
|
-
actualPosition: pt.actualPosition,
|
|
902
|
-
marker: markerInfo.marker
|
|
903
|
-
})
|
|
904
|
-
})
|
|
905
|
-
})
|
|
906
|
-
|
|
907
|
-
// Score each candidate type
|
|
908
|
-
let bestType = null
|
|
909
|
-
let bestScore = -1
|
|
910
|
-
|
|
911
|
-
for (const [typeName, candidate] of candidateTypes) {
|
|
912
|
-
let score = 0
|
|
913
|
-
|
|
914
|
-
// Check if positions form a consecutive sequence starting from 1
|
|
915
|
-
candidate.positions.sort((a, b) => a.index - b.index)
|
|
916
|
-
let isConsecutiveFrom1 = true
|
|
917
|
-
let expectedStart = 1
|
|
918
|
-
|
|
919
|
-
for (let i = 0; i < candidate.positions.length; i++) {
|
|
920
|
-
const pos = candidate.positions[i]
|
|
921
|
-
if (pos.expectedNumber !== expectedStart + i) {
|
|
922
|
-
isConsecutiveFrom1 = false
|
|
923
|
-
break
|
|
924
|
-
}
|
|
925
|
-
}
|
|
926
|
-
|
|
927
|
-
// Higher score for consecutive sequences starting from 1
|
|
928
|
-
if (isConsecutiveFrom1 && candidate.positions.length > 0 && candidate.positions[0].expectedNumber === 1) {
|
|
929
|
-
score += 100
|
|
930
|
-
}
|
|
931
|
-
|
|
932
|
-
// Higher score for more matches
|
|
933
|
-
score += candidate.matches * 10
|
|
934
|
-
|
|
935
|
-
// Higher score for covering all items
|
|
936
|
-
if (candidate.matches === candidate.totalItems) {
|
|
937
|
-
score += 50
|
|
938
|
-
}
|
|
939
|
-
|
|
940
|
-
if (score > bestScore) {
|
|
941
|
-
bestScore = score
|
|
942
|
-
bestType = typeName
|
|
943
|
-
}
|
|
944
|
-
}
|
|
945
|
-
|
|
946
|
-
// If we found a better type, update all marker infos
|
|
947
|
-
if (bestType && candidateTypes.get(bestType).matches > 0) {
|
|
948
|
-
const typeInfo = typeInfoCache.get(bestType)
|
|
949
|
-
if (typeInfo) {
|
|
950
|
-
const updatedMarkerInfos = markerInfos.map((markerInfo, index) => {
|
|
951
|
-
if (!markerInfo.marker) return markerInfo
|
|
952
|
-
|
|
953
|
-
// Extract the actual symbol without prefix/suffix
|
|
954
|
-
const actualSymbol = extractPureSymbol(markerInfo.marker, markerInfo.prefix, markerInfo.suffix)
|
|
955
|
-
|
|
956
|
-
// Use precomputed symbolIndexMap if available
|
|
957
|
-
const compiled = _COMPILED_BY_NAME.get(bestType)
|
|
958
|
-
let symbolIndex = -1
|
|
959
|
-
if (compiled && compiled.symbolIndexMap) {
|
|
960
|
-
const idx = compiled.symbolIndexMap.get(actualSymbol)
|
|
961
|
-
symbolIndex = idx !== undefined ? idx : -1
|
|
962
|
-
} else {
|
|
963
|
-
symbolIndex = typeInfo.symbols.indexOf(actualSymbol)
|
|
964
|
-
}
|
|
965
|
-
if (symbolIndex !== -1) {
|
|
966
|
-
const number = calculateNumber(typeInfo, actualSymbol)
|
|
967
|
-
|
|
968
|
-
return {
|
|
969
|
-
...markerInfo,
|
|
970
|
-
type: bestType,
|
|
971
|
-
number: number
|
|
972
|
-
}
|
|
973
|
-
}
|
|
974
|
-
return markerInfo
|
|
975
|
-
})
|
|
976
|
-
|
|
977
|
-
return updatedMarkerInfos
|
|
978
|
-
}
|
|
979
|
-
}
|
|
980
|
-
|
|
981
|
-
return markerInfos
|
|
982
|
-
}
|