npm - @peaceroad/markdown-it-strong-ja - Versions diffs - 0.7.2 → 0.8.1 - Mend

@peaceroad/markdown-it-strong-ja 0.7.2 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/README.md +326 -195
package/index.js +27 -40
package/package.json +26 -6
package/src/token-compat.js +71 -22
package/src/token-core.js +521 -132
package/src/token-link-utils.js +434 -539
package/src/token-postprocess/broken-ref.js +475 -0
package/src/token-postprocess/fastpaths.js +349 -0
package/src/token-postprocess/guards.js +499 -0
package/src/token-postprocess/orchestrator.js +672 -0
package/src/token-postprocess.js +1 -334
package/src/token-utils.js +215 -142

package/index.js CHANGED Viewed

@@ -1,62 +1,49 @@
-import { hasCjkBreaksRule, normalizeCoreRulesBeforePostprocess, ensureCoreRuleOrder, resolveMode } from './src/token-utils.js'
+import { hasCjkBreaksRule, ensureCoreRuleOrder, deriveOptionInfo } from './src/token-utils.js'
 import { patchScanDelims } from './src/token-core.js'
 import { registerTokenCompat } from './src/token-compat.js'
 import { registerTokenPostprocess } from './src/token-postprocess.js'
-const buildNoLinkCacheKey = (opt) => {
-  const mode = resolveMode(opt)
-  const mditAttrs = opt && opt.mditAttrs === false ? '0' : '1'
-  const mdBreaks = opt && opt.mdBreaks === true ? '1' : '0'
-  return `${mode}|${mditAttrs}|${mdBreaks}`
+const DEFAULT_OPTION = {
+  mditAttrs: true, // assume markdown-it-attrs integration by default
+  mode: 'japanese', // 'japanese'(->japanese-boundary-guard) | 'japanese-boundary' | 'japanese-boundary-guard' | 'aggressive' | 'compatible'
+  coreRulesBeforePostprocess: [], // e.g. ['cjk_breaks'] to keep rules ahead of postprocess
+  postprocess: true, // enable link/ref reconstruction pass
+  patchCorePush: true // keep restore-softbreaks after late cjk_breaks
 }
-const getNoLinkMdInstance = (md, opt) => {
-  const baseOpt = opt || md.__strongJaTokenOpt || { mode: 'japanese' }
-  const key = buildNoLinkCacheKey(baseOpt)
-  if (!md.__strongJaTokenNoLinkCache) {
-    md.__strongJaTokenNoLinkCache = new Map()
+const buildNormalizedOption = (md, option) => {
+  const opt = { ...DEFAULT_OPTION }
+  if (option) Object.assign(opt, option)
+  opt.hasCjkBreaks = hasCjkBreaksRule(md)
+  deriveOptionInfo(opt)
+  return opt
+}
+const writeSharedOption = (target, source) => {
+  for (const key of Object.keys(target)) {
+    delete target[key]
   }
-  const cache = md.__strongJaTokenNoLinkCache
-  if (cache.has(key)) return cache.get(key)
-  const noLink = new md.constructor(md.options)
-  mditStrongJa(noLink, { ...baseOpt, _skipPostprocess: true })
-  noLink.inline.ruler.disable(['link'])
-  cache.set(key, noLink)
-  return noLink
+  Object.assign(target, source)
+  return target
 }
 const mditStrongJa = (md, option) => {
   if (option && typeof option.engine === 'string' && option.engine !== 'token') {
     throw new Error('mditStrongJa: legacy engine was removed; use token (default)')
   }
-  const opt = {
-    mditAttrs: true, // assume markdown-it-attrs integration by default
-    mdBreaks: md.options.breaks, // inherit md.options.breaks for compat handling
-    mode: 'japanese', // 'japanese' | 'aggressive' | 'compatible' (pairing behavior)
-    coreRulesBeforePostprocess: [], // e.g. ['cjk_breaks'] to keep rules ahead of postprocess
-    postprocess: true, // enable link/ref reconstruction pass
-    patchCorePush: true // keep restore-softbreaks after late cjk_breaks
-  }
-  if (option) Object.assign(opt, option)
-  opt.hasCjkBreaks = hasCjkBreaksRule(md)
+  const nextOpt = buildNormalizedOption(md, option)
+  const opt = md.__strongJaTokenOpt && typeof md.__strongJaTokenOpt === 'object'
+    ? writeSharedOption(md.__strongJaTokenOpt, nextOpt)
+    : nextOpt
   md.__strongJaTokenOpt = opt
   patchScanDelims(md)
   registerTokenCompat(md, opt)
-  if (!opt._skipPostprocess) {
-    registerTokenPostprocess(md, opt, getNoLinkMdInstance)
-    const rawCoreRules = opt.coreRulesBeforePostprocess
-    const hasCoreRuleConfig = Array.isArray(rawCoreRules)
-      ? rawCoreRules.length > 0
-      : !!rawCoreRules
-    const coreRulesBeforePostprocess = hasCoreRuleConfig
-      ? normalizeCoreRulesBeforePostprocess(rawCoreRules)
-      : []
-    ensureCoreRuleOrder(md, coreRulesBeforePostprocess, 'strong_ja_token_postprocess')
-  }
+  registerTokenPostprocess(md, opt)
+  ensureCoreRuleOrder(md, opt.__strongJaNormalizedCoreRulesBeforePostprocess, 'strong_ja_token_postprocess')
   return md
 }
-export default mditStrongJa
+export default mditStrongJa

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@peaceroad/markdown-it-strong-ja",
-  "description": "This is a plugin for markdown-it. It is an alternative to the standard `**` (strong)  and `*` (em) processing. It also processes strings that cannot be converted by the standard.",
-  "version": "0.7.2",
+  "description": "Extends asterisk emphasis handling for Japanese text while keeping markdown-it behavior as close as practical.",
+  "version": "0.8.1",
   "main": "index.js",
   "type": "module",
   "files": [
@@ -11,7 +11,23 @@
     "LICENSE"
   ],
   "scripts": {
-    "test": "node test/test.js"
+    "test": "node test/test.js",
+    "test:fixtures": "node test/test-fixtures.js",
+    "test:edge": "node test/test-edge.js",
+    "test:postprocess": "node test/post-processing.test.js",
+    "test:postprocess-fastpath": "node test/post-processing-fastpath.test.js",
+    "test:postprocess-fastpath-roster": "node test/post-processing-fastpath-roster.test.js",
+    "test:postprocess-flow": "node test/post-processing-flow.test.js",
+    "test:postprocess-link-helper": "node test/post-processing-link-helper.test.js",
+    "test:postprocess-gate": "node test/postprocess-gate.js",
+    "test:tokenonly-progress": "node test/post-processing-progress.test.js",
+    "test:readme": "node test/test-readme.js",
+    "test:map": "node test/test-map.js",
+    "test:all": "node test/test-all.js",
+    "bench:scan": "node test/material/perf-scan-delims.mjs",
+    "bench:postprocess": "node test/material/perf-postprocess.mjs",
+    "analyze:postprocess-calls": "node test/material/analyze-postprocess-calls.mjs",
+    "analyze:fastpath": "node test/material/analyze-fastpath-hits.mjs"
   },
   "repository": "https://github.com/peaceroad/p7d-markdown-it-strong-ja.git",
   "author": "peaceroad <peaceroad@gmail.com>",
@@ -20,10 +36,14 @@
     "markdown-it": "^14.1.0"
   },
   "devDependencies": {
-    "@peaceroad/markdown-it-cjk-breaks-mod": "^0.1.5",
-    "@peaceroad/markdown-it-hr-sandwiched-semantic-container": "^0.8.2",
+    "@peaceroad/markdown-it-cjk-breaks-mod": "^0.1.8",
+    "@peaceroad/markdown-it-hr-sandwiched-semantic-container": "^0.10.0",
+    "@peaceroad/markdown-it-renderer-image": "^0.12.0",
+    "@peaceroad/markdown-it-renderer-inline-text": "^0.7.0",
     "markdown-it-attrs": "^4.3.1",
     "markdown-it-sub": "^2.0.0",
-    "markdown-it-sup": "^2.0.0"
+    "markdown-it-sup": "^2.0.0",
+    "p7d-markdown-it-p-captions": "^0.21.0"
   }
 }

package/src/token-compat.js CHANGED Viewed

@@ -3,6 +3,7 @@ import {
   REG_ATTRS,
   isJapaneseChar,
   hasCjkBreaksRule,
+  isCjkBreaksRuleName,
   getRuntimeOpt,
   moveRuleAfter
 } from './token-utils.js'
@@ -13,14 +14,43 @@ const isAsciiWordCode = (code) => {
     (code >= 0x61 && code <= 0x7A)
 }
+const trimTrailingSpaceTab = (text) => {
+  if (!text) return text
+  let end = text.length
+  while (end > 0) {
+    const code = text.charCodeAt(end - 1)
+    if (code !== 0x20 && code !== 0x09) break
+    end--
+  }
+  return end === text.length ? text : text.slice(0, end)
+}
 const registerTokenCompat = (md, baseOpt) => {
-  const hasTextJoinRule = Array.isArray(md.core?.ruler?.__rules__)
-    ? md.core.ruler.__rules__.some((rule) => rule && rule.name === 'text_join')
-    : false
+  const isCompatibleMode = (state) => {
+    const override = state && state.env && state.env.__strongJaTokenOpt
+    if (!override) return baseOpt.__strongJaIsCompatibleMode === true
+    const opt = getRuntimeOpt(state, baseOpt)
+    return opt.__strongJaIsCompatibleMode === true
+  }
+  let hasTextJoinRule = false
+  const coreRules = md.core && md.core.ruler && Array.isArray(md.core.ruler.__rules__)
+    ? md.core.ruler.__rules__
+    : null
+  if (coreRules) {
+    for (let i = 0; i < coreRules.length; i++) {
+      const rule = coreRules[i]
+      if (rule && rule.name === 'text_join') {
+        hasTextJoinRule = true
+        break
+      }
+    }
+  }
   if (!md.__strongJaTokenTrimTrailingRegistered) {
     md.__strongJaTokenTrimTrailingRegistered = true
     const trimInlineTrailingSpaces = (state) => {
+      if (isCompatibleMode(state)) return
       if (!state || !state.tokens) return
       for (let i = 0; i < state.tokens.length; i++) {
         const token = state.tokens[i]
@@ -32,7 +62,9 @@ const registerTokenCompat = (md, baseOpt) => {
         if (idx < 0) continue
         const tail = token.children[idx]
         if (!tail || tail.type !== 'text' || !tail.content) continue
-        const trimmed = tail.content.replace(/[ \t]+$/, '')
+        const lastCode = tail.content.charCodeAt(tail.content.length - 1)
+        if (lastCode !== 0x20 && lastCode !== 0x09) continue
+        const trimmed = trimTrailingSpaceTab(tail.content)
         if (trimmed !== tail.content) {
           tail.content = trimmed
         }
@@ -48,6 +80,7 @@ const registerTokenCompat = (md, baseOpt) => {
   if (!md.__strongJaTokenSoftbreakSpacingRegistered) {
     md.__strongJaTokenSoftbreakSpacingRegistered = true
     const normalizeSoftbreakSpacing = (state) => {
+      if (isCompatibleMode(state)) return
       if (!state) return
       if (baseOpt.hasCjkBreaks !== true && state.md) {
         baseOpt.hasCjkBreaks = hasCjkBreaksRule(state.md)
@@ -57,29 +90,37 @@ const registerTokenCompat = (md, baseOpt) => {
       for (let i = 0; i < state.tokens.length; i++) {
         const token = state.tokens[i]
         if (!token || token.type !== 'inline' || !token.children || token.children.length === 0) continue
+        const children = token.children
         let hasEmphasis = false
-        for (let j = 0; j < token.children.length; j++) {
-          const child = token.children[j]
+        let hasBreakCandidate = false
+        for (let j = 0; j < children.length; j++) {
+          const child = children[j]
           if (!child) continue
           if (child.type === 'strong_open' || child.type === 'strong_close' || child.type === 'em_open' || child.type === 'em_close') {
             hasEmphasis = true
-            break
           }
+          if (!hasBreakCandidate &&
+              (child.type === 'softbreak' ||
+                (child.type === 'text' && child.content && child.content.indexOf('\n') !== -1))) {
+            hasBreakCandidate = true
+          }
+          if (hasEmphasis && hasBreakCandidate) break
         }
         if (!hasEmphasis) continue
-        for (let j = 0; j < token.children.length; j++) {
-          const child = token.children[j]
+        if (!hasBreakCandidate) continue
+        for (let j = 0; j < children.length; j++) {
+          const child = children[j]
           if (!child) continue
           if (child.type === 'softbreak') {
-            const prevToken = token.children[j - 1]
-            const nextToken = token.children[j + 1]
+            const prevToken = children[j - 1]
+            const nextToken = children[j + 1]
             if (!prevToken || !nextToken) continue
             if (prevToken.type !== 'text' || !prevToken.content) continue
             if (nextToken.type !== 'text' || !nextToken.content) continue
             const prevCharCode = prevToken.content.charCodeAt(prevToken.content.length - 1)
             const nextCharCode = nextToken.content.charCodeAt(0)
             const isAsciiWord = isAsciiWordCode(nextCharCode)
-            const shouldReplace = isAsciiWord && nextCharCode !== 0x7B && nextCharCode !== 0x5C &&
+            const shouldReplace = isAsciiWord &&
               isJapaneseChar(prevCharCode) && !isJapaneseChar(nextCharCode)
             if (!shouldReplace) continue
             child.type = 'text'
@@ -90,7 +131,6 @@ const registerTokenCompat = (md, baseOpt) => {
             continue
           }
           if (child.type !== 'text' || !child.content) continue
-          if (!hasEmphasis) continue
           if (child.content.indexOf('\n') === -1) continue
           let normalized = ''
           for (let idx = 0; idx < child.content.length; idx++) {
@@ -99,7 +139,7 @@ const registerTokenCompat = (md, baseOpt) => {
               const prevCharCode = idx > 0 ? child.content.charCodeAt(idx - 1) : 0
               const nextCharCode = idx + 1 < child.content.length ? child.content.charCodeAt(idx + 1) : 0
               const isAsciiWord = isAsciiWordCode(nextCharCode)
-              const shouldReplace = isAsciiWord && nextCharCode !== 0x7B && nextCharCode !== 0x5C &&
+              const shouldReplace = isAsciiWord &&
                 isJapaneseChar(prevCharCode) && !isJapaneseChar(nextCharCode)
               if (shouldReplace) {
                 normalized += ' '
@@ -122,9 +162,13 @@ const registerTokenCompat = (md, baseOpt) => {
   }
   const restoreSoftbreaksAfterCjk = (state) => {
+    if (isCompatibleMode(state)) return
     if (!state) return
-    const opt = getRuntimeOpt(state, baseOpt)
-    if (opt.mditAttrs !== false) return
+    const overrideOpt = state.env && state.env.__strongJaTokenOpt
+    if (overrideOpt) {
+      const opt = getRuntimeOpt(state, baseOpt)
+      if (opt.mditAttrs !== false) return
+    }
     if (!state.md || state.md.__strongJaRestoreSoftbreaksForAttrs !== true) return
     if (baseOpt.hasCjkBreaks !== true && state.md) {
       baseOpt.hasCjkBreaks = hasCjkBreaksRule(state.md)
@@ -167,16 +211,17 @@ const registerTokenCompat = (md, baseOpt) => {
     if (added !== false) {
       md.__strongJaTokenRestoreRegistered = true
       md.__strongJaRestoreSoftbreaksForAttrs = baseOpt.mditAttrs === false
-      if (baseOpt.hasCjkBreaks) {
-        moveRuleAfter(md.core.ruler, 'strong_ja_restore_softbreaks', 'cjk_breaks')
-      }
       if (baseOpt.patchCorePush !== false && !md.__strongJaTokenPatchCorePush) {
         md.__strongJaTokenPatchCorePush = true
         const originalPush = md.core.ruler.push.bind(md.core.ruler)
         md.core.ruler.push = (name, fn, options) => {
           const res = originalPush(name, fn, options)
-          if (name && name.indexOf && name.indexOf('cjk_breaks') !== -1) {
+          if (isCjkBreaksRuleName(name)) {
             baseOpt.hasCjkBreaks = true
+            md.__strongJaHasCjkBreaks = true
+            if (Array.isArray(md.core.ruler.__rules__)) {
+              md.__strongJaCjkBreaksRuleCount = md.core.ruler.__rules__.length
+            }
             moveRuleAfter(md.core.ruler, 'strong_ja_restore_softbreaks', name)
           }
           return res
@@ -192,9 +237,13 @@ const registerTokenCompat = (md, baseOpt) => {
   if (baseOpt.mditAttrs !== false && !md.__strongJaTokenPreAttrsRegistered) {
     md.__strongJaTokenPreAttrsRegistered = true
     md.core.ruler.before('linkify', 'strong_ja_token_pre_attrs', (state) => {
+      if (isCompatibleMode(state)) return
       if (!state || !state.tokens) return
-      const opt = getRuntimeOpt(state, baseOpt)
-      if (opt.mditAttrs === false) return
+      const overrideOpt = state.env && state.env.__strongJaTokenOpt
+      if (overrideOpt) {
+        const opt = getRuntimeOpt(state, baseOpt)
+        if (opt.mditAttrs === false) return
+      }
       for (let i = 0; i < state.tokens.length; i++) {
         const token = state.tokens[i]
         if (!token || token.type !== 'inline' || !token.children || token.children.length === 0) continue