npm - @dr-ishaan/rehype-perfect-code-blocks - Versions diffs - 1.2.2 → 1.3.0 - Mend

@dr-ishaan/rehype-perfect-code-blocks 1.2.2 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

package/src/shiki.ts CHANGED Viewed

@@ -17,6 +17,7 @@ import type { Element, Root } from 'hast';
 import { fromHtml } from 'hast-util-from-html';
 import { visit } from 'unist-util-visit';
 import type { PerfectCodeOptions } from './types.js';
+import { computeThemeAwareDefaults } from './color-utils.js';
 import {
   transformerNotationDiff,
   transformerNotationFocus,
@@ -66,6 +67,58 @@ type ShikiHighlighter = {
 const highlighterCache = new Map<string, Promise<ShikiHighlighter>>();
+// ───────────────────────────────────────────────────────────────────────────
+// Pattern 1 (adopted from expressive-code): Mutually exclusive highlighter
+// task queue.
+//
+// All highlighter operations (createHighlighter, loadLanguage, loadTheme,
+// codeToHast, codeToHtml) are wrapped in `runHighlighterTask(() => ...)`.
+// This serializes them globally, preventing race conditions in parallel
+// static-site builds where multiple unified pipelines share the same
+// module-level highlighter cache.
+//
+// Without this queue, if pipeline A calls `loadLanguage('ts')` and pipeline
+// B calls `codeToHast(code, { lang: 'ts' })` on the same tick, B may run
+// before A's load completes and fall back to plaintext — the "issue #13"
+// class of bug. The queue makes all operations globally sequential.
+//
+// Tradeoff: slight throughput reduction in parallel builds; correctness >
+// throughput for syntax highlighting.
+// ───────────────────────────────────────────────────────────────────────────
+type QueueTask = { taskFn: () => Promise<unknown>; resolve: (v: unknown) => void; reject: (e: unknown) => void };
+const taskQueue: QueueTask[] = [];
+let processingQueue = false;
+function processQueue(): void {
+  const next = taskQueue.shift();
+  if (!next) {
+    processingQueue = false;
+    return;
+  }
+  Promise.resolve()
+    .then(() => next.taskFn())
+    .then(
+      (result) => { next.resolve(result); processQueue(); },
+      (err) => { next.reject(err); processQueue(); }
+    );
+}
+/**
+ * Run a task function inside the mutually exclusive highlighter queue.
+ * All calls are serialized globally — the next task starts only after the
+ * current one resolves or rejects.
+ */
+export function runHighlighterTask<T>(taskFn: () => Promise<T>): Promise<T> {
+  return new Promise<T>((resolve, reject) => {
+    taskQueue.push({ taskFn: taskFn as () => Promise<unknown>, resolve: resolve as (v: unknown) => void, reject });
+    if (!processingQueue) {
+      processingQueue = true;
+      processQueue();
+    }
+  });
+}
 async function getHighlighter(
   themeKeys: string[],
   langs: string[],
@@ -79,7 +132,9 @@ async function getHighlighter(
   const cacheKey = `${themeKeys.join(',')}|${[...safeLangs].sort().join(',')}|${regexEngine ?? 'onig'}`;
   let promise = highlighterCache.get(cacheKey);
   if (!promise) {
-    promise = (async () => {
+    // Wrap the highlighter creation in the task queue so concurrent
+    // pipeline instances don't race on Shiki's internal singleton state.
+    promise = runHighlighterTask(async () => {
       if (userGetHighlighter) {
         return (await userGetHighlighter({ themes: themeKeys, langs: safeLangs })) as ShikiHighlighter;
       }
@@ -100,12 +155,39 @@ async function getHighlighter(
       }
       const all = await shiki.createHighlighter(createOpts as unknown as Parameters<typeof shiki.createHighlighter>[0]);
       return all as unknown as ShikiHighlighter;
-    })();
+    });
     highlighterCache.set(cacheKey, promise);
   }
   return promise;
 }
+/**
+ * Pattern 3 (adopted from VitePress): Dispose all cached highlighters and
+ * clear the cache. Call this in long-running dev servers when the theme
+ * changes, or during cleanup of a build pipeline, to release the WASM
+ * engine + loaded grammars + theme cache held by Shiki.
+ *
+ * After calling this, the next render will create a fresh highlighter.
+ *
+ * @example
+ *   // In a Vite dev server shutdown hook:
+ *   import { disposeHighlighter } from '@dr-ishaan/rehype-perfect-code-blocks';
+ *   server.http2.close(() => disposeHighlighter());
+ */
+export function disposeHighlighter(): void {
+  for (const promise of highlighterCache.values()) {
+    // The promise may still be pending; if so, attach a dispose-on-resolve.
+    promise.then(
+      (h) => {
+        const maybeDisposable = h as unknown as { dispose?: () => void };
+        if (typeof maybeDisposable.dispose === 'function') maybeDisposable.dispose();
+      },
+      () => { /* ignore — failed highlighters are already gone */ }
+    );
+  }
+  highlighterCache.clear();
+}
 /** Filter out languages that aren't bundled with Shiki (avoids sync throws). */
 function filterBundledLangs(langs: string[]): string[] {
   // Always keep plaintext variants (special — don't require a bundle).
@@ -421,17 +503,22 @@ export async function runShikiOnRawBlocks(
   // Lazily load any langs not yet loaded. Shiki's `loadLanguage` throws
   // synchronously for bundled-but-unknown langs (e.g. typos), so wrap each
   // call in its own try/catch and use Promise.allSettled to swallow rejects.
+  //
+  // Wrapped in `runHighlighterTask` so concurrent pipeline instances don't
+  // race on Shiki's internal language registry. (Pattern 1)
   const loaded = new Set(highlighter.getLoadedLanguages());
   const missing = [...langSet].filter((l) => !loaded.has(l));
   if (missing.length > 0) {
-    const results = await Promise.allSettled(
-      missing.map((l) => {
-        try {
-          return Promise.resolve(highlighter.loadLanguage(l));
-        } catch {
-          return Promise.resolve();
-        }
-      })
+    const results = await runHighlighterTask(() =>
+      Promise.allSettled(
+        missing.map((l) => {
+          try {
+            return Promise.resolve(highlighter.loadLanguage(l));
+          } catch {
+            return Promise.resolve();
+          }
+        })
+      )
     );
     // Log failed language loads (competitor analysis: EC does this, improves DX).
     const failed: string[] = [];
@@ -642,11 +729,71 @@ export async function runShikiOnRawBlocks(
         // language-* class and the Shiki lang we actually used.
         (newCode.properties as Record<string, unknown>).dataLanguage = normalizedRawLang;
       }
+      // Pattern 2: Apply theme-aware --pcb-* defaults as inline styles on the
+      // <pre> element. The static dist/styles.css ships its own defaults, but
+      // those are generic; the runtime overrides them here based on the loaded
+      // Shiki theme so colors look good with ANY theme out of the box.
+      //
+      // We compute the defaults once per (theme,lang) combination and cache
+      // them on a WeakMap keyed by the highlighter to avoid recomputing per block.
+      if (typeof newPre.properties === 'object' && newPre.properties !== null) {
+        const themeDefaults = getThemeAwareDefaults(highlighter, themeKeys);
+        if (themeDefaults) {
+          const existingStyle = (newPre.properties as { style?: string }).style;
+          // Prepend our defaults so user-provided inline styles (if any) win.
+          (newPre.properties as { style?: string }).style = themeDefaults + (existingStyle ? `;${existingStyle}` : '');
+        }
+      }
       Object.assign(pre, newPre);
     }
   }
 }
+// Cache theme-aware defaults per highlighter instance + theme keys, so we
+// don't recompute them for every code block on the page.
+const themeDefaultsCache = new WeakMap<object, Map<string, string>>();
+function getThemeAwareDefaults(highlighter: ShikiHighlighter, themeKeys: string[]): string {
+  // Use the highlighter object as the WeakMap key.
+  const hlKey = highlighter as unknown as object;
+  let perHl = themeDefaultsCache.get(hlKey);
+  if (!perHl) {
+    perHl = new Map();
+    themeDefaultsCache.set(hlKey, perHl);
+  }
+  const cacheKey = themeKeys.slice().sort().join(',');
+  let cached = perHl.get(cacheKey);
+  if (cached !== undefined) return cached;
+  // Get the theme object from the highlighter.
+  // Use the first theme key (typically the dark theme in dual-theme config).
+  let theme: unknown = null;
+  try {
+    // highlighter.getTheme() returns the resolved theme registration.
+    const themeName = themeKeys[0];
+    const hlAny = highlighter as unknown as { getTheme?: (name: string) => unknown };
+    if (themeName && typeof hlAny.getTheme === 'function') {
+      theme = hlAny.getTheme(themeName);
+    }
+  } catch {
+    theme = null;
+  }
+  let defaults = '';
+  if (theme) {
+    try {
+      defaults = computeThemeAwareDefaults(theme);
+    } catch {
+      defaults = '';
+    }
+  }
+  perHl.set(cacheKey, defaults);
+  return defaults;
+}
 function hasShikiMarker(className: unknown): boolean {
   if (!className) return false;
   const arr = Array.isArray(className) ? className : String(className).split(/\s+/);

package/src/transformer.ts CHANGED Viewed

@@ -21,6 +21,7 @@
 import type { Element, ElementContent, Properties, Root, Text } from 'hast';
 import { visit } from 'unist-util-visit';
 import { parseMeta } from './meta.js';
+import { wordDiff, hasChanges } from './word-diff.js';
 import type { PerfectCodeOptions, ResolvedBlock, MagicComment, ParsedMeta } from './types.js';
 /** Default inline SVG copy icon (16x16 GitHub octicon copy). */
@@ -185,6 +186,7 @@ export function rehypePerfectCodeBlocks(userOptions: PerfectCodeOptions = {}) {
     lineNumbersStart: 1,
     highlight: true,
     diff: true,
+    wordDiff: false,
     focus: true,
     errorLevels: true,
     wrap: false,
@@ -443,9 +445,18 @@ async function transformPre(
   // Filter out trailing empty line (from trailing newline in source).
   const filteredLines = filterTrailingEmpty(lineSpans);
+  // Pattern 5 (selective adoption from expressive-code): word-level diff.
+  // When `wordDiff` is enabled and `diff` is also true, scan for adjacent
+  // `pcb__line--del` / `pcb__line--add` pairs and wrap the changed words
+  // in `<mark class="pcb__word-diff--del">` / `<mark class="pcb__word-diff--add">`
+  // elements so readers can see exactly what changed within each diff line.
+  const linesForCollapse = opts.wordDiff && opts.diff
+    ? applyWordDiff(filteredLines)
+    : filteredLines;
   // Apply per-line collapsible sections (meta `collapse="5-12,20-30"`).
   // Wraps matching line ranges in <details><summary>N collapsed lines</summary>...</details>.
-  const collapsedLines = wrapCollapsedSections(filteredLines, meta, opts, resolved.lineNumbersStart);
+  const collapsedLines = wrapCollapsedSections(linesForCollapse, meta, opts, resolved.lineNumbersStart);
   // Call onVisitLine / onVisitHighlightedLine hooks.
   filteredLines.forEach((line, i) => {
@@ -498,6 +509,10 @@ async function transformPre(
   // Build code <pre><code> with line spans.
   // When keepBackground is true, preserve Shiki's inline `style` (which includes
   // background-color + color from the theme) on the new <pre>.
+  // Pattern 2: Always preserve our theme-aware --pcb-* defaults (set by
+  // shiki.ts:getThemeAwareDefaults) even when keepBackground is false —
+  // these are NOT Shiki's background/color styles, they're our CSS variable
+  // defaults that make the code block legible with any theme.
   const newCode = h('code', codeDataProps, collapsedLines);
   const newPreProps: Record<string, unknown> = {};
   if (preLevelClasses.size > 0) {
@@ -505,6 +520,15 @@ async function transformPre(
   }
   if (opts.keepBackground && pre.properties?.style) {
     newPreProps.style = pre.properties.style;
+  } else if (pre.properties?.style) {
+    // keepBackground is false — strip Shiki's bg/color inline styles but
+    // preserve our --pcb-* defaults (Pattern 2).
+    const originalStyle = pre.properties.style as string;
+    const pcbVars = originalStyle
+      .split(';')
+      .filter((part: string) => part.trim().startsWith('--pcb-'))
+      .join(';');
+    if (pcbVars) newPreProps.style = pcbVars;
   }
   // Don't carry over Shiki's tabindex — it causes unwanted focus rings on the
   // inner <pre>. The figure itself is not focusable; only the copy button is.
@@ -1121,3 +1145,87 @@ function h(tag: string, props: Record<string, unknown> = {}, children: ElementCo
 function hText(value: string): Text {
   return { type: 'text', value };
 }
+/* ---------- Pattern 5: word-level diff (selective adoption from expressive-code) ---------- */
+/**
+ * Extract the plain text content of a line span (for diff comparison).
+ * Walks the line's children and concatenates all text values.
+ */
+function extractLineText(line: Element): string {
+  const out: string[] = [];
+  const walk = (node: ElementContent): void => {
+    if (node.type === 'text') {
+      out.push(node.value);
+    } else if (node.type === 'element') {
+      for (const child of node.children) walk(child);
+    }
+  };
+  for (const child of line.children) walk(child);
+  return out.join('');
+}
+/**
+ * Find the `.pcb__code` child of a line span and replace its children
+ * with the given replacement nodes (preserving the `.pcb__code` wrapper).
+ */
+function replaceCodeChildren(line: Element, newChildren: ElementContent[]): void {
+  const codeChild = line.children.find(
+    (c): c is Element =>
+      c.type === 'element' &&
+      c.tagName === 'span' &&
+      ((c.properties?.className as string[] | undefined) ?? []).includes('pcb__code')
+  );
+  if (codeChild) {
+    codeChild.children = newChildren;
+  }
+}
+/**
+ * Apply word-level diff highlighting to adjacent `pcb__line--del` / `pcb__line--add`
+ * pairs. For each pair, compute the per-word diff between the del line's text and
+ * the add line's text, then wrap changed words in `<mark>` elements.
+ *
+ * Only adjacent del→add pairs are processed (the common case for unified diffs).
+ * Standalone del or add lines (no adjacent counterpart) are left unchanged.
+ *
+ * This is a post-processing step that runs after `toLineSpans` and before
+ * `wrapCollapsedSections`. It mutates the line spans in place.
+ */
+function applyWordDiff(lines: Element[]): Element[] {
+  for (let i = 0; i < lines.length - 1; i++) {
+    const cur = lines[i];
+    const next = lines[i + 1];
+    const curClasses = (cur.properties?.className as string[] | undefined) ?? [];
+    const nextClasses = (next.properties?.className as string[] | undefined) ?? [];
+    const curIsDel = curClasses.includes('pcb__line--del');
+    const nextIsAdd = nextClasses.includes('pcb__line--add');
+    if (!curIsDel || !nextIsAdd) continue;
+    const oldText = extractLineText(cur);
+    const newText = extractLineText(next);
+    const tokens = wordDiff(oldText, newText);
+    if (!hasChanges(tokens)) continue;
+    // Build replacement children for the del line: wrap 'del' tokens in <mark>,
+    // pass 'equal' and 'add' tokens through as plain text (add tokens don't
+    // belong in the del line).
+    const delChildren: ElementContent[] = [];
+    const addChildren: ElementContent[] = [];
+    for (const token of tokens) {
+      if (token.type === 'equal') {
+        delChildren.push(hText(token.text));
+        addChildren.push(hText(token.text));
+      } else if (token.type === 'del') {
+        delChildren.push(h('mark', { className: ['pcb__word-diff', 'pcb__word-diff--del'] }, [hText(token.text)]));
+        // del tokens don't appear in the add line
+      } else if (token.type === 'add') {
+        addChildren.push(h('mark', { className: ['pcb__word-diff', 'pcb__word-diff--add'] }, [hText(token.text)]));
+        // add tokens don't appear in the del line
+      }
+    }
+    replaceCodeChildren(cur, delChildren);
+    replaceCodeChildren(next, addChildren);
+  }
+  return lines;
+}

package/src/types.ts CHANGED Viewed

@@ -51,6 +51,18 @@ export interface PerfectCodeOptions {
   highlight?: boolean;
   /** Enable +/- diff line coloring AND // [!code ++] / [!code --] notation. Default: true */
   diff?: boolean;
+  /**
+   * Pattern 5 (selective adoption from expressive-code): Enable word-level diff
+   * highlighting. When `diff` is also true and a code block contains adjacent
+   * `+`/`-` diff lines, the plugin computes the per-word diff between the
+   * removed and added lines and wraps changed words in `<mark class="pcb__word-diff--add">`
+   * / `<mark class="pcb__word-diff--del">` elements. This makes it easy for
+   * readers to see exactly what changed, not just which lines changed.
+   *
+   * Uses a simple LCS-based word diff algorithm (no external deps).
+   * Default: false (opt-in; adds a small per-block cost when diff lines are present).
+   */
+  wordDiff?: boolean;
   /** Enable // [!code focus] notation. Default: true */
   focus?: boolean;
   /** Enable // [!code error] / [!code warning] notations. Default: true */

package/src/word-diff.ts ADDED Viewed

@@ -0,0 +1,143 @@
+/**
+ * Word-level diff utility for Pattern 5 (selective adoption from expressive-code).
+ *
+ * Computes a per-word diff between two lines of code using a simple LCS-based
+ * algorithm. Used to highlight the specific words that changed within `+`/`-`
+ * diff lines, so readers can see exactly what was added/removed rather than
+ * just which lines changed.
+ *
+ * Algorithm: split each line into tokens (words + whitespace + punctuation),
+ * compute the LCS (Longest Common Subsequence) between the two token arrays,
+ * then walk both arrays emitting add/remove/equal markers.
+ *
+ * No external dependencies — this is ~80 lines of self-contained code.
+ */
+/** A diff token: the text content + whether it was added, removed, or unchanged. */
+export interface DiffToken {
+  text: string;
+  type: 'add' | 'del' | 'equal';
+}
+/**
+ * Split a code line into tokens for diffing. Each token is either:
+ *   - a run of whitespace
+ *   - a run of word characters (alphanumeric + underscore)
+ *   - a single punctuation character
+ *
+ * This produces reasonable word-level diffs for most code without being
+ * overly granular (character-level) or too coarse (line-level).
+ */
+function tokenize(line: string): string[] {
+  const tokens: string[] = [];
+  let i = 0;
+  while (i < line.length) {
+    const ch = line[i];
+    // Whitespace run
+    if (/\s/.test(ch)) {
+      let j = i + 1;
+      while (j < line.length && /\s/.test(line[j])) j++;
+      tokens.push(line.slice(i, j));
+      i = j;
+      continue;
+    }
+    // Word character run (alphanumeric + underscore + dot for method chains)
+    if (/[\w.]/.test(ch)) {
+      let j = i + 1;
+      while (j < line.length && /[\w.]/.test(line[j])) j++;
+      tokens.push(line.slice(i, j));
+      i = j;
+      continue;
+    }
+    // Single punctuation character
+    tokens.push(ch);
+    i++;
+  }
+  return tokens;
+}
+/**
+ * Compute the LCS table between two token arrays.
+ * Returns a 2D array where table[i][j] = length of LCS of a[0..i) and b[0..j).
+ */
+function lcsTable(a: string[], b: string[]): number[][] {
+  const m = a.length;
+  const n = b.length;
+  const table: number[][] = Array.from({ length: m + 1 }, () => new Array(n + 1).fill(0));
+  for (let i = 1; i <= m; i++) {
+    for (let j = 1; j <= n; j++) {
+      if (a[i - 1] === b[j - 1]) {
+        table[i][j] = table[i - 1][j - 1] + 1;
+      } else {
+        table[i][j] = Math.max(table[i - 1][j], table[i][j - 1]);
+      }
+    }
+  }
+  return table;
+}
+/**
+ * Compute a word-level diff between two strings.
+ * Returns an array of DiffToken entries; concatenating all `.text` values
+ * reconstructs the union of both inputs. The `.type` field indicates whether
+ * each token was added, removed, or unchanged relative to the other string.
+ *
+ * @param oldStr The "before" line (typically the `-` line, without the prefix)
+ * @param newStr The "after" line (typically the `+` line, without the prefix)
+ * @returns Array of diff tokens
+ *
+ * @example
+ *   wordDiff('const x = 1', 'const y = 2')
+ *   // → [
+ *   //   { text: 'const ', type: 'equal' },
+ *   //   { text: 'x', type: 'del' },
+ *   //   { text: 'y', type: 'add' },
+ *   //   { text: ' = ', type: 'equal' },
+ *   //   { text: '1', type: 'del' },
+ *   //   { text: '2', type: 'add' },
+ *   // ]
+ */
+export function wordDiff(oldStr: string, newStr: string): DiffToken[] {
+  const a = tokenize(oldStr);
+  const b = tokenize(newStr);
+  const table = lcsTable(a, b);
+  // Backtrack through the LCS table to emit the diff.
+  const result: DiffToken[] = [];
+  let i = a.length;
+  let j = b.length;
+  while (i > 0 || j > 0) {
+    if (i > 0 && j > 0 && a[i - 1] === b[j - 1]) {
+      result.push({ text: a[i - 1], type: 'equal' });
+      i--;
+      j--;
+    } else if (j > 0 && (i === 0 || table[i][j - 1] >= table[i - 1][j])) {
+      result.push({ text: b[j - 1], type: 'add' });
+      j--;
+    } else {
+      result.push({ text: a[i - 1], type: 'del' });
+      i--;
+    }
+  }
+  result.reverse();
+  // Merge consecutive tokens of the same type to reduce output size.
+  const merged: DiffToken[] = [];
+  for (const token of result) {
+    const last = merged[merged.length - 1];
+    if (last && last.type === token.type) {
+      last.text += token.text;
+    } else {
+      merged.push({ ...token });
+    }
+  }
+  return merged;
+}
+/**
+ * Check if a diff result has any changes (i.e., at least one add or del token).
+ * Used to skip wrapping when the lines are identical.
+ */
+export function hasChanges(tokens: DiffToken[]): boolean {
+  return tokens.some((t) => t.type !== 'equal');
+}