@dr-ishaan/rehype-perfect-code-blocks 1.1.7 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/shiki.ts CHANGED
@@ -16,7 +16,6 @@
16
16
  import type { Element, Root } from 'hast';
17
17
  import { fromHtml } from 'hast-util-from-html';
18
18
  import { visit } from 'unist-util-visit';
19
- import { createRequire } from 'node:module';
20
19
  import type { PerfectCodeOptions } from './types.js';
21
20
  import {
22
21
  transformerNotationDiff,
@@ -31,7 +30,25 @@ import {
31
30
  transformerRemoveNotationEscape,
32
31
  } from '@shikijs/transformers';
33
32
 
34
- const require = createRequire(import.meta.url);
33
+ // Lazily resolve a `require` function for synchronous Shiki bundle lookups.
34
+ // In Node.js ESM we use `createRequire(import.meta.url)`. In edge runtimes
35
+ // / browsers / Deno, `node:module` may not exist — in that case we fall back
36
+ // to `null` and `filterBundledLangs` returns a permissive filter (all langs
37
+ // pass through; the try/catch around `codeToHast` handles unknown langs).
38
+ let syncRequire: ((id: string) => unknown) | null = null;
39
+ try {
40
+ // `node:module` is a Node.js built-in. The static import would fail at
41
+ // module-load time in non-Node environments, so we use a dynamic import
42
+ // wrapped in try/catch (top-level await is supported in ESM + Node 18+).
43
+ const nodeModuleApi = (await import('node:module').catch(() => null)) as
44
+ | { createRequire?: (url: string) => (id: string) => unknown }
45
+ | null;
46
+ if (nodeModuleApi?.createRequire) {
47
+ syncRequire = nodeModuleApi.createRequire(import.meta.url);
48
+ }
49
+ } catch {
50
+ syncRequire = null;
51
+ }
35
52
 
36
53
  // Use a permissive type for ShikiTransformer to avoid cross-package type
37
54
  // identity issues when @shikijs/transformers and shiki bundle different copies
@@ -91,9 +108,17 @@ async function getHighlighter(
91
108
 
92
109
  /** Filter out languages that aren't bundled with Shiki (avoids sync throws). */
93
110
  function filterBundledLangs(langs: string[]): string[] {
111
+ // Always keep plaintext variants (special — don't require a bundle).
112
+ const alwaysKeep = new Set(['plaintext', 'text', 'txt', 'ansi']);
94
113
  let bundled: Set<string>;
114
+ if (!syncRequire) {
115
+ // Edge runtime / browser — can't read shiki's bundle list synchronously.
116
+ // Pass through everything; the try/catch around codeToHast handles
117
+ // unknown langs by falling back to plaintext.
118
+ return langs;
119
+ }
95
120
  try {
96
- const shiki = require('shiki') as {
121
+ const shiki = syncRequire('shiki') as {
97
122
  bundledLanguages?: Record<string, unknown>;
98
123
  bundledLanguagesAlias?: Record<string, unknown>;
99
124
  };
@@ -102,13 +127,11 @@ function filterBundledLangs(langs: string[]): string[] {
102
127
  ...Object.keys(shiki.bundledLanguagesAlias ?? {}),
103
128
  ]);
104
129
  } catch {
105
- bundled = new Set();
130
+ bundled = new Set(alwaysKeep);
131
+ return langs.filter((l) => bundled.has(l) || bundled.has(l.toLowerCase()));
106
132
  }
107
- // Always keep plaintext variants (special — don't require a bundle).
108
- bundled.add('plaintext');
109
- bundled.add('text');
110
- bundled.add('txt');
111
- bundled.add('ansi');
133
+ // Always keep plaintext variants.
134
+ for (const p of alwaysKeep) bundled.add(p);
112
135
  return langs.filter((l) => bundled.has(l) || bundled.has(l.toLowerCase()));
113
136
  }
114
137
 
@@ -120,6 +143,14 @@ async function buildTransformers(
120
143
  const transformers: unknown[] = [];
121
144
  void metaStr;
122
145
 
146
+ // If user wants full manual control, only push their transformers.
147
+ if (opts.disableAutoTransformers) {
148
+ if (opts.shiki.transformers) {
149
+ transformers.push(...opts.shiki.transformers);
150
+ }
151
+ return transformers;
152
+ }
153
+
123
154
  // Always remove the escape marker `// [\!code xxx]` first so other
124
155
  // notation transformers can read what's left.
125
156
  transformers.push(transformerRemoveNotationEscape());
@@ -129,6 +160,8 @@ async function buildTransformers(
129
160
  transformers.push(
130
161
  transformerMetaHighlight({
131
162
  className: 'pcb__line--hl',
163
+ // Issue #11 from competitor analysis: support zero-indexed line numbers.
164
+ zeroIndexed: opts.zeroIndexed === true,
132
165
  })
133
166
  );
134
167
  }
@@ -217,14 +250,100 @@ async function buildTransformers(
217
250
  }
218
251
  }
219
252
 
220
- // User-provided transformers
221
- if (opts.shiki.transformers) {
222
- transformers.push(...opts.shiki.transformers);
253
+ // Custom notations: map custom // [!code xxx] markers to CSS classes.
254
+ // (Previously this was `void customNotations` — now actually wired up.)
255
+ if (opts.customNotations && Object.keys(opts.customNotations).length > 0) {
256
+ try {
257
+ const { transformerNotationMap } = await import('@shikijs/transformers');
258
+ // Build the classMap in the format transformerNotationMap expects:
259
+ // { markerName: [classList] }
260
+ const classMap: Record<string, string[]> = {};
261
+ for (const [marker, cls] of Object.entries(opts.customNotations)) {
262
+ classMap[marker] = [cls];
263
+ }
264
+ transformers.push(
265
+ (transformerNotationMap as (opts: { classMap: Record<string, string[]>; matchAlgorithm: 'v3' }) => unknown)({
266
+ classMap,
267
+ matchAlgorithm: 'v3',
268
+ })
269
+ );
270
+ } catch {
271
+ // transformerNotationMap not available in this @shikijs/transformers version.
272
+ }
273
+ }
274
+
275
+ // Remove comments from rendered code (// ..., # ..., /* ... */, <!-- ... -->)
276
+ if (opts.removeComments) {
277
+ try {
278
+ const { transformerRemoveComments } = await import('@shikijs/transformers');
279
+ transformers.push(transformerRemoveComments());
280
+ } catch {
281
+ // Module not available — skip silently.
282
+ }
283
+ }
284
+
285
+ // Remove line breaks (joins all lines into one)
286
+ if (opts.removeLineBreaks) {
287
+ try {
288
+ const { transformerRemoveLineBreak } = await import('@shikijs/transformers');
289
+ transformers.push(transformerRemoveLineBreak());
290
+ } catch {
291
+ // Module not available — skip silently.
292
+ }
293
+ }
294
+
295
+ // Programmatic per-line class assignment (transformerCompactLineOptions)
296
+ if (opts.lineOptions && opts.lineOptions.length > 0) {
297
+ try {
298
+ const { transformerCompactLineOptions } = await import('@shikijs/transformers');
299
+ transformers.push(transformerCompactLineOptions(opts.lineOptions));
300
+ } catch {
301
+ // Module not available — skip silently.
302
+ }
303
+ }
304
+
305
+ // ANSI escape sequence stripping for terminal output.
306
+ // We use a custom transformer (not in @shikijs/transformers) that walks
307
+ // all text nodes and removes `\x1b\[[0-9;]*[a-zA-Z]` sequences.
308
+ // Applied only when the lang is 'ansi' (which is in the default terminalLangs).
309
+ // (The actual per-block application happens in runShikiOnRawBlocks based on lang.)
310
+
311
+ // User-provided transformers — 'before' or 'after' (default) our auto-registered ones.
312
+ const userTransformers = opts.shiki.transformers ?? [];
313
+ if (opts.shiki.transformerOrder === 'before') {
314
+ transformers.unshift(...userTransformers);
315
+ } else {
316
+ transformers.push(...userTransformers);
223
317
  }
224
318
 
225
319
  return transformers;
226
320
  }
227
321
 
322
+ /**
323
+ * Custom transformer that strips ANSI escape sequences from text nodes.
324
+ * Used for `lang: 'ansi'` blocks (terminal output with color codes).
325
+ */
326
+ function createAnsiStripTransformer(): unknown {
327
+ return {
328
+ name: 'pcb:ansi-strip',
329
+ code(hast: unknown) {
330
+ // Walk all text nodes and strip \x1b\[[0-9;]*[a-zA-Z] sequences.
331
+ const visit = (node: unknown): void => {
332
+ if (!node || typeof node !== 'object') return;
333
+ const n = node as { type?: string; value?: string; children?: unknown[] };
334
+ if (n.type === 'text' && typeof n.value === 'string') {
335
+ n.value = n.value.replace(/\x1b\[[0-9;]*[a-zA-Z]/g, '');
336
+ }
337
+ if (Array.isArray(n.children)) {
338
+ for (const child of n.children) visit(child);
339
+ }
340
+ };
341
+ visit(hast);
342
+ return hast;
343
+ },
344
+ };
345
+ }
346
+
228
347
  /**
229
348
  * Walk the tree; for every <pre><code> that does NOT yet look Shiki-processed
230
349
  * (i.e. no `astro-code` / `shiki` class), tokenize it via Shiki and replace
@@ -246,14 +365,24 @@ export async function runShikiOnRawBlocks(
246
365
 
247
366
  if (targets.length === 0) return;
248
367
 
249
- // Build theme keys (one or two)
368
+ // Build theme keys — supports single (string), dual ({light,dark}), and
369
+ // multi-theme (Record<string,string> with 3+ entries) for advanced use cases.
250
370
  const themeSpec = opts.shiki.theme;
251
- const themeKeys: string[] =
252
- typeof themeSpec === 'string'
253
- ? [themeSpec]
254
- : themeSpec
255
- ? [themeSpec.dark, themeSpec.light]
256
- : ['github-dark'];
371
+ let themeKeys: string[];
372
+ let isMultiTheme = false;
373
+ if (typeof themeSpec === 'string') {
374
+ themeKeys = [themeSpec];
375
+ } else if (themeSpec && typeof themeSpec === 'object') {
376
+ if ('light' in themeSpec && 'dark' in themeSpec && Object.keys(themeSpec).length === 2) {
377
+ themeKeys = [themeSpec.dark, themeSpec.light];
378
+ } else {
379
+ // Multi-theme: Record<string, string> with 3+ entries.
380
+ themeKeys = Object.values(themeSpec);
381
+ isMultiTheme = true;
382
+ }
383
+ } else {
384
+ themeKeys = ['github-dark'];
385
+ }
257
386
 
258
387
  // Collect all langs needed for these blocks
259
388
  const langSet = new Set<string>(opts.shiki.langs ?? []);
@@ -287,7 +416,7 @@ export async function runShikiOnRawBlocks(
287
416
  const loaded = new Set(highlighter.getLoadedLanguages());
288
417
  const missing = [...langSet].filter((l) => !loaded.has(l));
289
418
  if (missing.length > 0) {
290
- await Promise.allSettled(
419
+ const results = await Promise.allSettled(
291
420
  missing.map((l) => {
292
421
  try {
293
422
  return Promise.resolve(highlighter.loadLanguage(l));
@@ -296,10 +425,27 @@ export async function runShikiOnRawBlocks(
296
425
  }
297
426
  })
298
427
  );
428
+ // Log failed language loads (competitor analysis: EC does this, improves DX).
429
+ const failed: string[] = [];
430
+ results.forEach((r, i) => {
431
+ if (r.status === 'rejected') failed.push(missing[i]);
432
+ });
433
+ if (failed.length > 0) {
434
+ const logger = opts.logger ?? console;
435
+ logger.warn(
436
+ `[rehype-perfect-code-blocks] Failed to load languages: ${failed.join(', ')}. ` +
437
+ `Falling back to plaintext for these blocks. ` +
438
+ `Check for typos or install the language grammar.`
439
+ );
440
+ }
299
441
  }
300
442
 
301
443
  // Apply language aliases (e.g., { ts: 'typescript' }).
302
444
  const langAlias = opts.languageAliases ?? {};
445
+ // Resolve the logger once.
446
+ const logger = opts.logger ?? console;
447
+ // Track which langs we've already warned about (avoid duplicate warnings).
448
+ const warnedLangs = new Set<string>();
303
449
 
304
450
  for (const pre of targets) {
305
451
  const code = pre.children.find(
@@ -307,7 +453,14 @@ export async function runShikiOnRawBlocks(
307
453
  );
308
454
  if (!code) continue;
309
455
 
310
- const text = extractText(code);
456
+ // Normalize line endings: \r\n and \r → \n (prevents \r artifacts in output).
457
+ let text = extractText(code).replace(/\r\n?/g, '\n');
458
+
459
+ // tabWidth normalization: replace tabs with N spaces before tokenization.
460
+ if (opts.tabWidth && opts.tabWidth > 0) {
461
+ text = text.replace(/\t/g, ' '.repeat(opts.tabWidth));
462
+ }
463
+
311
464
  const langClass = (code.properties?.className as string[] | undefined)?.[0] ?? '';
312
465
  const rawLang = (langClass.match(/^language-(.+)$/) ?? [])[1] ?? 'plaintext';
313
466
  const lang = langAlias[rawLang] ?? rawLang;
@@ -316,10 +469,31 @@ export async function runShikiOnRawBlocks(
316
469
  (pre.properties?.dataMeta as string | undefined) ??
317
470
  '';
318
471
 
472
+ // Terminal <placeholder> workaround: Shiki mis-highlights shell snippets
473
+ // containing `<user>@<host>`. Temporarily replace `<...>` with a sentinel,
474
+ // then restore after tokenization.
475
+ const isTerminalLang = opts.terminalLangs.includes(lang);
476
+ let placeholderMap: Map<string, string> | null = null;
477
+ if (isTerminalLang && /<([^>]*[^>\s])>/.test(text)) {
478
+ placeholderMap = new Map();
479
+ let i = 0;
480
+ text = text.replace(/<([^>]*[^>\s])>/g, (match, inner) => {
481
+ const sentinel = `\u0000PCB_PH_${i++}\u0000`;
482
+ placeholderMap!.set(sentinel, `<${inner}>`);
483
+ return sentinel;
484
+ });
485
+ }
486
+
319
487
  const transformers = await buildTransformers(opts, metaStr);
320
488
 
321
- // Build codeToHast/codeToHtml options. Use `themes` (plural) for dual-theme output
322
- // so Shiki emits `--shiki-light` / `--shiki-dark` CSS vars.
489
+ // For 'ansi' lang, add the ANSI escape-sequence stripper transformer.
490
+ if (lang === 'ansi') {
491
+ transformers.push(createAnsiStripTransformer());
492
+ }
493
+
494
+ // Build codeToHast/codeToHtml options. Use `themes` (plural) for dual-theme
495
+ // and multi-theme output so Shiki emits `--shiki-light` / `--shiki-dark` /
496
+ // `--shiki-<name>` CSS vars.
323
497
  const shikiOpts: Record<string, unknown> = {
324
498
  lang,
325
499
  meta: { __raw: metaStr },
@@ -327,6 +501,11 @@ export async function runShikiOnRawBlocks(
327
501
  };
328
502
  if (typeof themeSpec === 'string') {
329
503
  shikiOpts.theme = themeSpec;
504
+ } else if (isMultiTheme) {
505
+ // Multi-theme (3+ themes): pass the full Record as `themes`.
506
+ shikiOpts.themes = themeSpec;
507
+ // Don't inline any single theme — emit all variants as CSS vars.
508
+ shikiOpts.defaultColor = false;
330
509
  } else {
331
510
  shikiOpts.themes = themeSpec;
332
511
  shikiOpts.defaultColor = 'dark'; // tells Shiki which color to inline by default
@@ -344,29 +523,57 @@ export async function runShikiOnRawBlocks(
344
523
  // `className`, `aria-hidden` instead of `ariaHidden`). Normalize them
345
524
  // so the rest of our pipeline (which expects hast property names) works.
346
525
  normalizeHast(hastRoot);
526
+ // Restore terminal <placeholder> sentinels back to original text.
527
+ if (placeholderMap) {
528
+ restorePlaceholders(hastRoot, placeholderMap);
529
+ }
347
530
  newPre = hastRoot.children.find(
348
531
  (c): c is Element => c.type === 'element' && c.tagName === 'pre'
349
532
  ) ?? null;
350
533
  } else {
351
534
  const html = highlighter.codeToHtml(text, shikiOpts);
352
- const fragment = fromHtml(html, { fragment: true });
535
+ let htmlOut = html;
536
+ if (placeholderMap) {
537
+ for (const [sentinel, original] of placeholderMap) {
538
+ htmlOut = htmlOut.split(sentinel).join(original);
539
+ }
540
+ }
541
+ const fragment = fromHtml(htmlOut, { fragment: true });
353
542
  newPre = fragment.children.find(
354
543
  (c): c is Element => c.type === 'element' && c.tagName === 'pre'
355
544
  ) ?? null;
356
545
  }
357
- } catch {
546
+ } catch (err) {
547
+ // Log unknown-language fallbacks (once per lang).
548
+ const langKey = lang;
549
+ if (!warnedLangs.has(langKey) && langKey !== 'plaintext') {
550
+ warnedLangs.add(langKey);
551
+ logger.warn(
552
+ `[rehype-perfect-code-blocks] Failed to tokenize language "${langKey}" ` +
553
+ `(${err instanceof Error ? err.message : String(err)}). Falling back to plaintext.`
554
+ );
555
+ }
358
556
  // Fallback: plaintext
359
557
  try {
360
558
  const fallbackOpts = { ...shikiOpts, lang: 'plaintext' };
361
559
  if (useHast) {
362
560
  const hastRoot = highlighter.codeToHast(text, fallbackOpts) as { type: 'root'; children: Element[] };
363
561
  normalizeHast(hastRoot);
562
+ if (placeholderMap) {
563
+ restorePlaceholders(hastRoot, placeholderMap);
564
+ }
364
565
  newPre = hastRoot.children.find(
365
566
  (c): c is Element => c.type === 'element' && c.tagName === 'pre'
366
567
  ) ?? null;
367
568
  } else {
368
569
  const html = highlighter.codeToHtml(text, fallbackOpts);
369
- const fragment = fromHtml(html, { fragment: true });
570
+ let htmlOut = html;
571
+ if (placeholderMap) {
572
+ for (const [sentinel, original] of placeholderMap) {
573
+ htmlOut = htmlOut.split(sentinel).join(original);
574
+ }
575
+ }
576
+ const fragment = fromHtml(htmlOut, { fragment: true });
370
577
  newPre = fragment.children.find(
371
578
  (c): c is Element => c.type === 'element' && c.tagName === 'pre'
372
579
  ) ?? null;
@@ -465,3 +672,28 @@ function normalizeHast(node: unknown): void {
465
672
  for (const child of n.children) normalizeHast(child);
466
673
  }
467
674
  }
675
+
676
+ /**
677
+ * Restore terminal <placeholder> sentinels back to their original text.
678
+ * Walks all text nodes in the HAST tree and replaces sentinel strings
679
+ * with the original `<...>` content.
680
+ *
681
+ * Used after Shiki tokenization to undo the temporary sentinel substitution
682
+ * we applied to prevent Shiki from mis-highlighting `<user>@<host>` patterns
683
+ * in shell/terminal blocks.
684
+ */
685
+ function restorePlaceholders(node: unknown, map: Map<string, string>): void {
686
+ if (!node || typeof node !== 'object') return;
687
+ const n = node as { type?: string; value?: string; children?: unknown[] };
688
+ if (n.type === 'text' && typeof n.value === 'string') {
689
+ let value = n.value;
690
+ for (const [sentinel, original] of map) {
691
+ // Use split/join to avoid regex special-char issues with sentinels.
692
+ value = value.split(sentinel).join(original);
693
+ }
694
+ n.value = value;
695
+ }
696
+ if (Array.isArray(n.children)) {
697
+ for (const child of n.children) restorePlaceholders(child, map);
698
+ }
699
+ }