@peaceroad/markdown-it-strong-ja 0.7.2 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +326 -195
- package/index.js +27 -40
- package/package.json +26 -6
- package/src/token-compat.js +71 -22
- package/src/token-core.js +521 -132
- package/src/token-link-utils.js +434 -539
- package/src/token-postprocess/broken-ref.js +475 -0
- package/src/token-postprocess/fastpaths.js +349 -0
- package/src/token-postprocess/guards.js +499 -0
- package/src/token-postprocess/orchestrator.js +672 -0
- package/src/token-postprocess.js +1 -334
- package/src/token-utils.js +215 -142
package/src/token-utils.js
CHANGED
|
@@ -1,166 +1,239 @@
|
|
|
1
|
-
const CHAR_ASTERISK = 0x2A // *
|
|
2
|
-
const
|
|
3
|
-
const
|
|
4
|
-
const
|
|
5
|
-
const
|
|
6
|
-
const
|
|
7
|
-
const
|
|
8
|
-
const
|
|
9
|
-
|
|
10
|
-
const
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
1
|
+
const CHAR_ASTERISK = 0x2A // *
|
|
2
|
+
const CHAR_SPACE = 0x20 // ' '
|
|
3
|
+
const CHAR_TAB = 0x09 // '\t'
|
|
4
|
+
const CHAR_NEWLINE = 0x0A // '\n'
|
|
5
|
+
const CHAR_IDEOGRAPHIC_SPACE = 0x3000 // fullwidth space
|
|
6
|
+
const MODE_FLAG_COMPATIBLE = 1 << 0
|
|
7
|
+
const MODE_FLAG_AGGRESSIVE = 1 << 1
|
|
8
|
+
const MODE_FLAG_JAPANESE_BASE = 1 << 2
|
|
9
|
+
const MODE_FLAG_JAPANESE_PLUS = 1 << 3
|
|
10
|
+
const MODE_FLAG_JAPANESE_ANY = MODE_FLAG_JAPANESE_BASE | MODE_FLAG_JAPANESE_PLUS
|
|
11
|
+
const REG_CJK_BREAKS_RULE_NAME = /(^|[_-])cjk_breaks([_-]|$)/
|
|
12
|
+
const VALID_CANONICAL_MODES = new Set([
|
|
13
|
+
'compatible',
|
|
14
|
+
'aggressive',
|
|
15
|
+
'japanese-boundary',
|
|
16
|
+
'japanese-boundary-guard'
|
|
17
|
+
])
|
|
18
|
+
const REG_JAPANESE = /[\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Han}\u3000-\u303F\uFF00-\uFFEF]/u
|
|
19
|
+
const REG_ATTRS = /{[^{}\n!@#%^&*()]+?}$/
|
|
20
|
+
|
|
15
21
|
const isJapaneseChar = (ch) => {
|
|
16
|
-
if (!ch) return false
|
|
17
|
-
const code = typeof ch === 'string' ? ch.charCodeAt(0) : ch
|
|
18
|
-
if (code < 128) return false
|
|
19
|
-
if (code >= 0x3040 && code <= 0x309F) return true
|
|
20
|
-
if (code >= 0x30A0 && code <= 0x30FF) return true
|
|
21
|
-
|
|
22
|
+
if (!ch) return false
|
|
23
|
+
const code = typeof ch === 'string' ? ch.charCodeAt(0) : ch
|
|
24
|
+
if (code < 128) return false
|
|
25
|
+
if (code >= 0x3040 && code <= 0x309F) return true
|
|
26
|
+
if (code >= 0x30A0 && code <= 0x30FF) return true
|
|
27
|
+
// Han + CJK punctuation/fullwidth ranges are common hot-path hits.
|
|
28
|
+
// Keep these as cheap numeric checks before the fallback regex.
|
|
29
|
+
if (code >= 0x3400 && code <= 0x4DBF) return true
|
|
30
|
+
if (code >= 0x4E00 && code <= 0x9FFF) return true
|
|
31
|
+
if (code >= 0xF900 && code <= 0xFAFF) return true
|
|
32
|
+
if (code >= 0x3000 && code <= 0x303F) return true
|
|
33
|
+
if (code >= 0xFF00 && code <= 0xFFEF) return true
|
|
22
34
|
return REG_JAPANESE.test(String.fromCharCode(code))
|
|
23
35
|
}
|
|
24
36
|
|
|
25
37
|
const hasCjkBreaksRule = (md) => {
|
|
26
38
|
if (!md || !md.core || !md.core.ruler || !Array.isArray(md.core.ruler.__rules__)) return false
|
|
27
39
|
if (md.__strongJaHasCjkBreaks === true) return true
|
|
28
|
-
const
|
|
29
|
-
if (
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
const findPrevNonSpace = (src, start) => {
|
|
34
|
-
for (let i = start; i >= 0; i--) {
|
|
35
|
-
const ch = src.charCodeAt(i)
|
|
36
|
-
if (ch === CHAR_NEWLINE) return 0
|
|
37
|
-
if (ch === CHAR_SPACE || ch === CHAR_TAB) continue
|
|
38
|
-
return ch
|
|
40
|
+
const rules = md.core.ruler.__rules__
|
|
41
|
+
if (md.__strongJaHasCjkBreaks === false &&
|
|
42
|
+
md.__strongJaCjkBreaksRuleCount === rules.length) {
|
|
43
|
+
return false
|
|
39
44
|
}
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
if (ch === CHAR_SPACE || ch === CHAR_TAB) continue
|
|
48
|
-
return ch
|
|
45
|
+
for (let idx = 0; idx < rules.length; idx++) {
|
|
46
|
+
const rule = rules[idx]
|
|
47
|
+
if (rule && typeof rule.name === 'string' && isCjkBreaksRuleName(rule.name)) {
|
|
48
|
+
md.__strongJaHasCjkBreaks = true
|
|
49
|
+
md.__strongJaCjkBreaksRuleCount = rules.length
|
|
50
|
+
return true
|
|
51
|
+
}
|
|
49
52
|
}
|
|
50
|
-
|
|
53
|
+
md.__strongJaHasCjkBreaks = false
|
|
54
|
+
md.__strongJaCjkBreaksRuleCount = rules.length
|
|
55
|
+
return false
|
|
51
56
|
}
|
|
52
57
|
|
|
58
|
+
const isCjkBreaksRuleName = (name) => {
|
|
59
|
+
return typeof name === 'string' && REG_CJK_BREAKS_RULE_NAME.test(name)
|
|
60
|
+
}
|
|
61
|
+
|
|
53
62
|
const resolveMode = (opt) => {
|
|
54
63
|
const raw = opt && typeof opt.mode === 'string' ? opt.mode : 'japanese'
|
|
55
|
-
const
|
|
56
|
-
|
|
57
|
-
return
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
if (mode === 'compatible') return false
|
|
63
|
-
let hasJapanese = state.__strongJaTokenHasJapanese
|
|
64
|
-
if (hasJapanese === undefined) {
|
|
65
|
-
hasJapanese = hasJapaneseText(state.src)
|
|
66
|
-
state.__strongJaTokenHasJapanese = hasJapanese
|
|
67
|
-
}
|
|
68
|
-
return hasJapanese
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
const getRuntimeOpt = (state, baseOpt) => {
|
|
72
|
-
if (!state || !state.env || !state.env.__strongJaTokenOpt) return baseOpt
|
|
73
|
-
const override = state.env.__strongJaTokenOpt
|
|
74
|
-
if (state.__strongJaTokenRuntimeOpt &&
|
|
75
|
-
state.__strongJaTokenRuntimeBase === baseOpt &&
|
|
76
|
-
state.__strongJaTokenRuntimeOverride === override) {
|
|
77
|
-
return state.__strongJaTokenRuntimeOpt
|
|
78
|
-
}
|
|
79
|
-
const merged = { ...baseOpt, ...override }
|
|
80
|
-
state.__strongJaTokenRuntimeOpt = merged
|
|
81
|
-
state.__strongJaTokenRuntimeBase = baseOpt
|
|
82
|
-
state.__strongJaTokenRuntimeOverride = override
|
|
83
|
-
return merged
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
function normalizeCoreRulesBeforePostprocess(value) {
|
|
87
|
-
if (!value) return []
|
|
88
|
-
const list = Array.isArray(value) ? value : [value]
|
|
89
|
-
const normalized = []
|
|
90
|
-
const seen = new Set()
|
|
91
|
-
for (let idx = 0; idx < list.length; idx++) {
|
|
92
|
-
const raw = list[idx]
|
|
93
|
-
if (typeof raw !== 'string') continue
|
|
94
|
-
const trimmed = raw.trim()
|
|
95
|
-
if (!trimmed || seen.has(trimmed)) continue
|
|
96
|
-
seen.add(trimmed)
|
|
97
|
-
normalized.push(trimmed)
|
|
98
|
-
}
|
|
99
|
-
return normalized
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
function ensureCoreRuleOrder(md, ruleNames, targetRuleName) {
|
|
103
|
-
if (!md || !md.core || !md.core.ruler) return
|
|
104
|
-
if (!ruleNames || ruleNames.length === 0) return
|
|
105
|
-
for (let idx = 0; idx < ruleNames.length; idx++) {
|
|
106
|
-
moveRuleBefore(md.core.ruler, ruleNames[idx], targetRuleName)
|
|
107
|
-
}
|
|
64
|
+
const normalized = raw.toLowerCase()
|
|
65
|
+
// `japanese` resolves to the guard mode.
|
|
66
|
+
if (normalized === 'japanese') return 'japanese-boundary-guard'
|
|
67
|
+
if (VALID_CANONICAL_MODES.has(normalized)) return normalized
|
|
68
|
+
throw new Error(
|
|
69
|
+
`mditStrongJa: unknown mode "${raw}". Valid modes: japanese, japanese-boundary, japanese-boundary-guard, aggressive, compatible`
|
|
70
|
+
)
|
|
108
71
|
}
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
72
|
+
|
|
73
|
+
const getModeFlags = (mode) => {
|
|
74
|
+
switch (mode) {
|
|
75
|
+
case 'compatible':
|
|
76
|
+
return MODE_FLAG_COMPATIBLE
|
|
77
|
+
case 'aggressive':
|
|
78
|
+
return MODE_FLAG_AGGRESSIVE
|
|
79
|
+
case 'japanese-boundary':
|
|
80
|
+
return MODE_FLAG_JAPANESE_BASE
|
|
81
|
+
case 'japanese-boundary-guard':
|
|
82
|
+
return MODE_FLAG_JAPANESE_PLUS
|
|
83
|
+
default:
|
|
84
|
+
return 0
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
const deriveModeInfo = (opt) => {
|
|
89
|
+
if (!opt || typeof opt !== 'object') return opt
|
|
90
|
+
const rawMode = opt.mode
|
|
91
|
+
if (opt.__strongJaModeRaw === rawMode &&
|
|
92
|
+
typeof opt.__strongJaMode === 'string' &&
|
|
93
|
+
typeof opt.__strongJaModeFlags === 'number') {
|
|
94
|
+
return opt
|
|
95
|
+
}
|
|
96
|
+
const mode = resolveMode(opt)
|
|
97
|
+
opt.__strongJaModeRaw = rawMode
|
|
98
|
+
opt.__strongJaMode = mode
|
|
99
|
+
opt.__strongJaModeFlags = getModeFlags(mode)
|
|
100
|
+
return opt
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
const deriveOptionInfo = (opt) => {
|
|
104
|
+
if (!opt || typeof opt !== 'object') return opt
|
|
105
|
+
deriveModeInfo(opt)
|
|
106
|
+
const rawPostprocess = opt.postprocess
|
|
107
|
+
const rawCoreRules = opt.coreRulesBeforePostprocess
|
|
108
|
+
if (opt.__strongJaPlanPostprocessRaw === rawPostprocess &&
|
|
109
|
+
opt.__strongJaPlanCoreRulesRaw === rawCoreRules &&
|
|
110
|
+
typeof opt.__strongJaPostprocessActive === 'boolean' &&
|
|
111
|
+
typeof opt.__strongJaIsCompatibleMode === 'boolean' &&
|
|
112
|
+
typeof opt.__strongJaIsJapaneseMode === 'boolean' &&
|
|
113
|
+
typeof opt.__strongJaStrictAsciiCodeGuard === 'boolean' &&
|
|
114
|
+
typeof opt.__strongJaStrictAsciiStrongGuard === 'boolean' &&
|
|
115
|
+
Array.isArray(opt.__strongJaNormalizedCoreRulesBeforePostprocess)) {
|
|
116
|
+
return opt
|
|
119
117
|
}
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
118
|
+
opt.__strongJaPlanPostprocessRaw = rawPostprocess
|
|
119
|
+
opt.__strongJaPlanCoreRulesRaw = rawCoreRules
|
|
120
|
+
opt.__strongJaIsCompatibleMode = (opt.__strongJaModeFlags & MODE_FLAG_COMPATIBLE) !== 0
|
|
121
|
+
opt.__strongJaPostprocessActive = rawPostprocess !== false && !opt.__strongJaIsCompatibleMode
|
|
122
|
+
opt.__strongJaIsJapaneseMode = (opt.__strongJaModeFlags & MODE_FLAG_JAPANESE_ANY) !== 0
|
|
123
|
+
opt.__strongJaStrictAsciiCodeGuard = (opt.__strongJaModeFlags & MODE_FLAG_JAPANESE_PLUS) !== 0
|
|
124
|
+
opt.__strongJaStrictAsciiStrongGuard = (opt.__strongJaModeFlags & MODE_FLAG_AGGRESSIVE) === 0
|
|
125
|
+
opt.__strongJaNormalizedCoreRulesBeforePostprocess = normalizeCoreRulesBeforePostprocess(rawCoreRules)
|
|
126
|
+
return opt
|
|
126
127
|
}
|
|
127
128
|
|
|
128
|
-
|
|
129
|
-
if (!
|
|
130
|
-
const
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
129
|
+
const getRuntimeOpt = (state, baseOpt) => {
|
|
130
|
+
if (!state || !state.env || !state.env.__strongJaTokenOpt) return deriveOptionInfo(baseOpt)
|
|
131
|
+
const override = state.env.__strongJaTokenOpt
|
|
132
|
+
if (state.__strongJaTokenRuntimeOpt &&
|
|
133
|
+
state.__strongJaTokenRuntimeBase === baseOpt &&
|
|
134
|
+
state.__strongJaTokenRuntimeOverride === override) {
|
|
135
|
+
return state.__strongJaTokenRuntimeOpt
|
|
136
|
+
}
|
|
137
|
+
const merged = { ...baseOpt, ...override }
|
|
138
|
+
state.__strongJaTokenRuntimeOpt = deriveOptionInfo(merged)
|
|
139
|
+
state.__strongJaTokenRuntimeBase = baseOpt
|
|
140
|
+
state.__strongJaTokenRuntimeOverride = override
|
|
141
|
+
return state.__strongJaTokenRuntimeOpt
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
const getReferenceCount = (state) => {
|
|
145
|
+
if (!state) return 0
|
|
146
|
+
let referenceCount = state.__strongJaReferenceCount
|
|
147
|
+
if (referenceCount !== undefined) return referenceCount
|
|
148
|
+
const references = state.env && state.env.references
|
|
149
|
+
referenceCount = references ? Object.keys(references).length : 0
|
|
150
|
+
state.__strongJaReferenceCount = referenceCount
|
|
151
|
+
return referenceCount
|
|
144
152
|
}
|
|
145
153
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
154
|
+
function normalizeCoreRulesBeforePostprocess(value) {
|
|
155
|
+
if (!value) return []
|
|
156
|
+
const list = Array.isArray(value) ? value : [value]
|
|
157
|
+
const normalized = []
|
|
158
|
+
const seen = new Set()
|
|
159
|
+
for (let idx = 0; idx < list.length; idx++) {
|
|
160
|
+
const raw = list[idx]
|
|
161
|
+
if (typeof raw !== 'string') continue
|
|
162
|
+
const trimmed = raw.trim()
|
|
163
|
+
if (!trimmed || seen.has(trimmed)) continue
|
|
164
|
+
seen.add(trimmed)
|
|
165
|
+
normalized.push(trimmed)
|
|
166
|
+
}
|
|
167
|
+
return normalized
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
function ensureCoreRuleOrder(md, ruleNames, targetRuleName) {
|
|
171
|
+
if (!md || !md.core || !md.core.ruler) return
|
|
172
|
+
if (!ruleNames || ruleNames.length === 0) return
|
|
173
|
+
for (let idx = 0; idx < ruleNames.length; idx++) {
|
|
174
|
+
moveRuleBefore(md.core.ruler, ruleNames[idx], targetRuleName)
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
function moveRuleBefore(ruler, ruleName, beforeName) {
|
|
179
|
+
if (!ruler || !ruler.__rules__) return
|
|
180
|
+
const rules = ruler.__rules__
|
|
181
|
+
let fromIdx = -1
|
|
182
|
+
let beforeIdx = -1
|
|
183
|
+
for (let idx = 0; idx < rules.length; idx++) {
|
|
184
|
+
if (rules[idx].name === ruleName) fromIdx = idx
|
|
185
|
+
if (rules[idx].name === beforeName) beforeIdx = idx
|
|
186
|
+
if (fromIdx !== -1 && beforeIdx !== -1) break
|
|
187
|
+
}
|
|
188
|
+
// Ensure ruleName is before beforeName; keep existing order if already earlier.
|
|
189
|
+
if (fromIdx === -1 || beforeIdx === -1 || fromIdx < beforeIdx) return
|
|
190
|
+
|
|
191
|
+
const rule = rules.splice(fromIdx, 1)[0]
|
|
192
|
+
rules.splice(beforeIdx, 0, rule)
|
|
193
|
+
ruler.__cache__ = null
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
function moveRuleAfter(ruler, ruleName, afterName) {
|
|
197
|
+
if (!ruler || !ruler.__rules__) return
|
|
198
|
+
const rules = ruler.__rules__
|
|
199
|
+
let fromIdx = -1
|
|
200
|
+
let afterIdx = -1
|
|
201
|
+
for (let idx = 0; idx < rules.length; idx++) {
|
|
202
|
+
if (rules[idx].name === ruleName) fromIdx = idx
|
|
203
|
+
if (rules[idx].name === afterName) afterIdx = idx
|
|
204
|
+
if (fromIdx !== -1 && afterIdx !== -1) break
|
|
205
|
+
}
|
|
206
|
+
if (fromIdx === -1 || afterIdx === -1 || fromIdx === afterIdx + 1) return
|
|
207
|
+
|
|
208
|
+
const rule = rules.splice(fromIdx, 1)[0]
|
|
209
|
+
const targetIdx = fromIdx < afterIdx ? afterIdx - 1 : afterIdx
|
|
210
|
+
rules.splice(targetIdx + 1, 0, rule)
|
|
211
|
+
ruler.__cache__ = null
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
export {
|
|
215
|
+
CHAR_ASTERISK,
|
|
216
|
+
CHAR_SPACE,
|
|
217
|
+
CHAR_TAB,
|
|
218
|
+
CHAR_NEWLINE,
|
|
219
|
+
CHAR_IDEOGRAPHIC_SPACE,
|
|
153
220
|
REG_ATTRS,
|
|
154
|
-
hasJapaneseText,
|
|
155
221
|
isJapaneseChar,
|
|
156
222
|
hasCjkBreaksRule,
|
|
157
|
-
|
|
158
|
-
findNextNonSpace,
|
|
223
|
+
isCjkBreaksRuleName,
|
|
159
224
|
resolveMode,
|
|
160
|
-
|
|
225
|
+
getModeFlags,
|
|
226
|
+
deriveModeInfo,
|
|
227
|
+
deriveOptionInfo,
|
|
228
|
+
MODE_FLAG_COMPATIBLE,
|
|
229
|
+
MODE_FLAG_AGGRESSIVE,
|
|
230
|
+
MODE_FLAG_JAPANESE_BASE,
|
|
231
|
+
MODE_FLAG_JAPANESE_PLUS,
|
|
232
|
+
MODE_FLAG_JAPANESE_ANY,
|
|
161
233
|
getRuntimeOpt,
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
234
|
+
getReferenceCount,
|
|
235
|
+
normalizeCoreRulesBeforePostprocess,
|
|
236
|
+
ensureCoreRuleOrder,
|
|
237
|
+
moveRuleBefore,
|
|
238
|
+
moveRuleAfter
|
|
239
|
+
}
|