@peaceroad/markdown-it-strong-ja 0.7.2 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +314 -195
- package/index.js +18 -48
- package/package.json +23 -5
- package/src/token-compat.js +77 -22
- package/src/token-core.js +467 -92
- package/src/token-link-utils.js +104 -400
- package/src/token-postprocess/fastpaths.js +349 -0
- package/src/token-postprocess/guards.js +436 -0
- package/src/token-postprocess/orchestrator.js +733 -0
- package/src/token-postprocess.js +1 -334
- package/src/token-utils.js +192 -148
package/src/token-utils.js
CHANGED
|
@@ -1,166 +1,210 @@
|
|
|
1
|
-
const CHAR_ASTERISK = 0x2A // *
|
|
2
|
-
const
|
|
3
|
-
const
|
|
4
|
-
const
|
|
5
|
-
const
|
|
6
|
-
const
|
|
7
|
-
const
|
|
8
|
-
const
|
|
9
|
-
|
|
10
|
-
const
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
1
|
+
const CHAR_ASTERISK = 0x2A // *
|
|
2
|
+
const CHAR_SPACE = 0x20 // ' '
|
|
3
|
+
const CHAR_TAB = 0x09 // '\t'
|
|
4
|
+
const CHAR_NEWLINE = 0x0A // '\n'
|
|
5
|
+
const CHAR_IDEOGRAPHIC_SPACE = 0x3000 // fullwidth space
|
|
6
|
+
const MODE_FLAG_COMPATIBLE = 1 << 0
|
|
7
|
+
const MODE_FLAG_AGGRESSIVE = 1 << 1
|
|
8
|
+
const MODE_FLAG_JAPANESE_BASE = 1 << 2
|
|
9
|
+
const MODE_FLAG_JAPANESE_PLUS = 1 << 3
|
|
10
|
+
const MODE_FLAG_JAPANESE_ANY = MODE_FLAG_JAPANESE_BASE | MODE_FLAG_JAPANESE_PLUS
|
|
11
|
+
const REG_CJK_BREAKS_RULE_NAME = /(^|[_-])cjk_breaks([_-]|$)/
|
|
12
|
+
const VALID_CANONICAL_MODES = new Set([
|
|
13
|
+
'compatible',
|
|
14
|
+
'aggressive',
|
|
15
|
+
'japanese-boundary',
|
|
16
|
+
'japanese-boundary-guard'
|
|
17
|
+
])
|
|
18
|
+
const REG_JAPANESE = /[\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Han}\u3000-\u303F\uFF00-\uFFEF]/u
|
|
19
|
+
const REG_ATTRS = /{[^{}\n!@#%^&*()]+?}$/
|
|
20
|
+
|
|
15
21
|
const isJapaneseChar = (ch) => {
|
|
16
|
-
if (!ch) return false
|
|
17
|
-
const code = typeof ch === 'string' ? ch.charCodeAt(0) : ch
|
|
18
|
-
if (code < 128) return false
|
|
19
|
-
if (code >= 0x3040 && code <= 0x309F) return true
|
|
20
|
-
if (code >= 0x30A0 && code <= 0x30FF) return true
|
|
21
|
-
|
|
22
|
+
if (!ch) return false
|
|
23
|
+
const code = typeof ch === 'string' ? ch.charCodeAt(0) : ch
|
|
24
|
+
if (code < 128) return false
|
|
25
|
+
if (code >= 0x3040 && code <= 0x309F) return true
|
|
26
|
+
if (code >= 0x30A0 && code <= 0x30FF) return true
|
|
27
|
+
// Han + CJK punctuation/fullwidth ranges are common hot-path hits.
|
|
28
|
+
// Keep these as cheap numeric checks before the fallback regex.
|
|
29
|
+
if (code >= 0x3400 && code <= 0x4DBF) return true
|
|
30
|
+
if (code >= 0x4E00 && code <= 0x9FFF) return true
|
|
31
|
+
if (code >= 0xF900 && code <= 0xFAFF) return true
|
|
32
|
+
if (code >= 0x3000 && code <= 0x303F) return true
|
|
33
|
+
if (code >= 0xFF00 && code <= 0xFFEF) return true
|
|
22
34
|
return REG_JAPANESE.test(String.fromCharCode(code))
|
|
23
35
|
}
|
|
24
36
|
|
|
37
|
+
const getInlineWrapperBase = (type) => {
|
|
38
|
+
if (!type || typeof type !== 'string') return ''
|
|
39
|
+
if (type === 'link_open' || type === 'link_close') return ''
|
|
40
|
+
if (type.endsWith('_open')) return type.slice(0, -5)
|
|
41
|
+
if (type.endsWith('_close')) return type.slice(0, -6)
|
|
42
|
+
return ''
|
|
43
|
+
}
|
|
44
|
+
|
|
25
45
|
const hasCjkBreaksRule = (md) => {
|
|
26
46
|
if (!md || !md.core || !md.core.ruler || !Array.isArray(md.core.ruler.__rules__)) return false
|
|
27
47
|
if (md.__strongJaHasCjkBreaks === true) return true
|
|
28
|
-
const
|
|
29
|
-
if (
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
const findPrevNonSpace = (src, start) => {
|
|
34
|
-
for (let i = start; i >= 0; i--) {
|
|
35
|
-
const ch = src.charCodeAt(i)
|
|
36
|
-
if (ch === CHAR_NEWLINE) return 0
|
|
37
|
-
if (ch === CHAR_SPACE || ch === CHAR_TAB) continue
|
|
38
|
-
return ch
|
|
48
|
+
const rules = md.core.ruler.__rules__
|
|
49
|
+
if (md.__strongJaHasCjkBreaks === false &&
|
|
50
|
+
md.__strongJaCjkBreaksRuleCount === rules.length) {
|
|
51
|
+
return false
|
|
39
52
|
}
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
if (ch === CHAR_SPACE || ch === CHAR_TAB) continue
|
|
48
|
-
return ch
|
|
53
|
+
for (let idx = 0; idx < rules.length; idx++) {
|
|
54
|
+
const rule = rules[idx]
|
|
55
|
+
if (rule && typeof rule.name === 'string' && isCjkBreaksRuleName(rule.name)) {
|
|
56
|
+
md.__strongJaHasCjkBreaks = true
|
|
57
|
+
md.__strongJaCjkBreaksRuleCount = rules.length
|
|
58
|
+
return true
|
|
59
|
+
}
|
|
49
60
|
}
|
|
50
|
-
|
|
61
|
+
md.__strongJaHasCjkBreaks = false
|
|
62
|
+
md.__strongJaCjkBreaksRuleCount = rules.length
|
|
63
|
+
return false
|
|
51
64
|
}
|
|
52
65
|
|
|
66
|
+
const isCjkBreaksRuleName = (name) => {
|
|
67
|
+
return typeof name === 'string' && REG_CJK_BREAKS_RULE_NAME.test(name)
|
|
68
|
+
}
|
|
69
|
+
|
|
53
70
|
const resolveMode = (opt) => {
|
|
54
71
|
const raw = opt && typeof opt.mode === 'string' ? opt.mode : 'japanese'
|
|
55
|
-
const
|
|
56
|
-
|
|
57
|
-
return
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
if (mode === 'compatible') return false
|
|
63
|
-
let hasJapanese = state.__strongJaTokenHasJapanese
|
|
64
|
-
if (hasJapanese === undefined) {
|
|
65
|
-
hasJapanese = hasJapaneseText(state.src)
|
|
66
|
-
state.__strongJaTokenHasJapanese = hasJapanese
|
|
67
|
-
}
|
|
68
|
-
return hasJapanese
|
|
72
|
+
const normalized = raw.toLowerCase()
|
|
73
|
+
// `japanese` resolves to the guard mode.
|
|
74
|
+
if (normalized === 'japanese') return 'japanese-boundary-guard'
|
|
75
|
+
if (VALID_CANONICAL_MODES.has(normalized)) return normalized
|
|
76
|
+
throw new Error(
|
|
77
|
+
`mditStrongJa: unknown mode "${raw}". Valid modes: japanese, japanese-boundary, japanese-boundary-guard, aggressive, compatible`
|
|
78
|
+
)
|
|
69
79
|
}
|
|
70
|
-
|
|
71
|
-
const
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
if (!
|
|
88
|
-
const
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
if (
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
rules
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
80
|
+
|
|
81
|
+
const getModeFlags = (mode) => {
|
|
82
|
+
switch (mode) {
|
|
83
|
+
case 'compatible':
|
|
84
|
+
return MODE_FLAG_COMPATIBLE
|
|
85
|
+
case 'aggressive':
|
|
86
|
+
return MODE_FLAG_AGGRESSIVE
|
|
87
|
+
case 'japanese-boundary':
|
|
88
|
+
return MODE_FLAG_JAPANESE_BASE
|
|
89
|
+
case 'japanese-boundary-guard':
|
|
90
|
+
return MODE_FLAG_JAPANESE_PLUS
|
|
91
|
+
default:
|
|
92
|
+
return 0
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
const deriveModeInfo = (opt) => {
|
|
97
|
+
if (!opt || typeof opt !== 'object') return opt
|
|
98
|
+
const rawMode = opt.mode
|
|
99
|
+
if (opt.__strongJaModeRaw === rawMode &&
|
|
100
|
+
typeof opt.__strongJaMode === 'string' &&
|
|
101
|
+
typeof opt.__strongJaModeFlags === 'number') {
|
|
102
|
+
return opt
|
|
103
|
+
}
|
|
104
|
+
const mode = resolveMode(opt)
|
|
105
|
+
opt.__strongJaModeRaw = rawMode
|
|
106
|
+
opt.__strongJaMode = mode
|
|
107
|
+
opt.__strongJaModeFlags = getModeFlags(mode)
|
|
108
|
+
return opt
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
const getRuntimeOpt = (state, baseOpt) => {
|
|
112
|
+
if (!state || !state.env || !state.env.__strongJaTokenOpt) return deriveModeInfo(baseOpt)
|
|
113
|
+
const override = state.env.__strongJaTokenOpt
|
|
114
|
+
if (state.__strongJaTokenRuntimeOpt &&
|
|
115
|
+
state.__strongJaTokenRuntimeBase === baseOpt &&
|
|
116
|
+
state.__strongJaTokenRuntimeOverride === override) {
|
|
117
|
+
return state.__strongJaTokenRuntimeOpt
|
|
118
|
+
}
|
|
119
|
+
const merged = { ...baseOpt, ...override }
|
|
120
|
+
state.__strongJaTokenRuntimeOpt = deriveModeInfo(merged)
|
|
121
|
+
state.__strongJaTokenRuntimeBase = baseOpt
|
|
122
|
+
state.__strongJaTokenRuntimeOverride = override
|
|
123
|
+
return state.__strongJaTokenRuntimeOpt
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
function normalizeCoreRulesBeforePostprocess(value) {
|
|
127
|
+
if (!value) return []
|
|
128
|
+
const list = Array.isArray(value) ? value : [value]
|
|
129
|
+
const normalized = []
|
|
130
|
+
const seen = new Set()
|
|
131
|
+
for (let idx = 0; idx < list.length; idx++) {
|
|
132
|
+
const raw = list[idx]
|
|
133
|
+
if (typeof raw !== 'string') continue
|
|
134
|
+
const trimmed = raw.trim()
|
|
135
|
+
if (!trimmed || seen.has(trimmed)) continue
|
|
136
|
+
seen.add(trimmed)
|
|
137
|
+
normalized.push(trimmed)
|
|
138
|
+
}
|
|
139
|
+
return normalized
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
function ensureCoreRuleOrder(md, ruleNames, targetRuleName) {
|
|
143
|
+
if (!md || !md.core || !md.core.ruler) return
|
|
144
|
+
if (!ruleNames || ruleNames.length === 0) return
|
|
145
|
+
for (let idx = 0; idx < ruleNames.length; idx++) {
|
|
146
|
+
moveRuleBefore(md.core.ruler, ruleNames[idx], targetRuleName)
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
function moveRuleBefore(ruler, ruleName, beforeName) {
|
|
151
|
+
if (!ruler || !ruler.__rules__) return
|
|
152
|
+
const rules = ruler.__rules__
|
|
153
|
+
let fromIdx = -1
|
|
154
|
+
let beforeIdx = -1
|
|
155
|
+
for (let idx = 0; idx < rules.length; idx++) {
|
|
156
|
+
if (rules[idx].name === ruleName) fromIdx = idx
|
|
157
|
+
if (rules[idx].name === beforeName) beforeIdx = idx
|
|
158
|
+
if (fromIdx !== -1 && beforeIdx !== -1) break
|
|
159
|
+
}
|
|
160
|
+
// Ensure ruleName is before beforeName; keep existing order if already earlier.
|
|
161
|
+
if (fromIdx === -1 || beforeIdx === -1 || fromIdx < beforeIdx) return
|
|
162
|
+
|
|
163
|
+
const rule = rules.splice(fromIdx, 1)[0]
|
|
164
|
+
rules.splice(beforeIdx, 0, rule)
|
|
165
|
+
ruler.__cache__ = null
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
function moveRuleAfter(ruler, ruleName, afterName) {
|
|
169
|
+
if (!ruler || !ruler.__rules__) return
|
|
170
|
+
const rules = ruler.__rules__
|
|
171
|
+
let fromIdx = -1
|
|
172
|
+
let afterIdx = -1
|
|
173
|
+
for (let idx = 0; idx < rules.length; idx++) {
|
|
174
|
+
if (rules[idx].name === ruleName) fromIdx = idx
|
|
175
|
+
if (rules[idx].name === afterName) afterIdx = idx
|
|
176
|
+
if (fromIdx !== -1 && afterIdx !== -1) break
|
|
177
|
+
}
|
|
178
|
+
if (fromIdx === -1 || afterIdx === -1 || fromIdx === afterIdx + 1) return
|
|
179
|
+
|
|
180
|
+
const rule = rules.splice(fromIdx, 1)[0]
|
|
181
|
+
const targetIdx = fromIdx < afterIdx ? afterIdx - 1 : afterIdx
|
|
182
|
+
rules.splice(targetIdx + 1, 0, rule)
|
|
183
|
+
ruler.__cache__ = null
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
export {
|
|
187
|
+
CHAR_ASTERISK,
|
|
188
|
+
CHAR_SPACE,
|
|
189
|
+
CHAR_TAB,
|
|
190
|
+
CHAR_NEWLINE,
|
|
191
|
+
CHAR_IDEOGRAPHIC_SPACE,
|
|
153
192
|
REG_ATTRS,
|
|
154
|
-
hasJapaneseText,
|
|
155
193
|
isJapaneseChar,
|
|
194
|
+
getInlineWrapperBase,
|
|
156
195
|
hasCjkBreaksRule,
|
|
157
|
-
|
|
158
|
-
findNextNonSpace,
|
|
196
|
+
isCjkBreaksRuleName,
|
|
159
197
|
resolveMode,
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
198
|
+
getModeFlags,
|
|
199
|
+
deriveModeInfo,
|
|
200
|
+
MODE_FLAG_COMPATIBLE,
|
|
201
|
+
MODE_FLAG_AGGRESSIVE,
|
|
202
|
+
MODE_FLAG_JAPANESE_BASE,
|
|
203
|
+
MODE_FLAG_JAPANESE_PLUS,
|
|
204
|
+
MODE_FLAG_JAPANESE_ANY,
|
|
205
|
+
getRuntimeOpt,
|
|
206
|
+
normalizeCoreRulesBeforePostprocess,
|
|
207
|
+
ensureCoreRuleOrder,
|
|
208
|
+
moveRuleBefore,
|
|
209
|
+
moveRuleAfter
|
|
210
|
+
}
|