docrev 0.9.17 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/build.ts CHANGED
@@ -10,6 +10,7 @@
10
10
 
11
11
  import * as fs from 'fs';
12
12
  import * as path from 'path';
13
+ import { fileURLToPath } from 'url';
13
14
  import { execSync, spawn, ChildProcess } from 'child_process';
14
15
  import YAML from 'yaml';
15
16
  import { stripAnnotations } from './annotations.js';
@@ -24,6 +25,13 @@ import { buildImageRegistry, writeImageRegistry } from './image-registry.js';
24
25
  import type { Author, JournalFormatting } from './types.js';
25
26
  import { getJournalProfile } from './journals.js';
26
27
  import { resolveCSL } from './csl.js';
28
+ import {
29
+ type MacroDef,
30
+ mergeMacros,
31
+ generateLatexPreamble,
32
+ writeMacrosSidecar,
33
+ getMacroFilterPath,
34
+ } from './macros.js';
27
35
 
28
36
  // =============================================================================
29
37
  // Constants
@@ -32,8 +40,13 @@ import { resolveCSL } from './csl.js';
32
40
  /** Supported output formats */
33
41
  const SUPPORTED_FORMATS = ['pdf', 'docx', 'tex', 'beamer', 'pptx'] as const;
34
42
 
35
- /** Maximum title length for output filename */
36
- const MAX_TITLE_FILENAME_LENGTH = 50;
43
+ /**
44
+ * Maximum length for slugified-title output filenames. Only used when no
45
+ * explicit `output:` filename is configured. Long titles are truncated at the
46
+ * last `-` boundary at-or-before this length so words stay intact (the old
47
+ * blind `.slice(0, 50)` cut mid-word).
48
+ */
49
+ const MAX_TITLE_FILENAME_LENGTH = 80;
37
50
 
38
51
  // =============================================================================
39
52
  // Interfaces
@@ -69,6 +82,8 @@ export interface PdfConfig {
69
82
  sansfont?: string;
70
83
  /** Monospace font (xelatex/lualatex only). */
71
84
  monofont?: string;
85
+ /** Extra pandoc args appended for this format (after top-level pandocArgs). */
86
+ pandocArgs?: string[];
72
87
  }
73
88
 
74
89
  export interface DocxConfig {
@@ -76,10 +91,19 @@ export interface DocxConfig {
76
91
  keepComments?: boolean;
77
92
  affiliationNewline?: boolean;
78
93
  toc?: boolean;
94
+ pandocArgs?: string[];
95
+ /**
96
+ * Auto-translate the common-shape raw `\begin{figure}...\end{figure}` block
97
+ * to portable `![caption](path){#fig:label width=N%}` markdown so figures
98
+ * survive the docx build (pandoc otherwise drops raw LaTeX silently).
99
+ * Default true. Set false to opt out — blocks then warn and are left alone.
100
+ */
101
+ translateRawFigures?: boolean;
79
102
  }
80
103
 
81
104
  export interface TexConfig {
82
105
  standalone?: boolean;
106
+ pandocArgs?: string[];
83
107
  }
84
108
 
85
109
  export interface BeamerConfig {
@@ -91,6 +115,7 @@ export interface BeamerConfig {
91
115
  section?: boolean;
92
116
  notes?: string | false;
93
117
  fit_images?: boolean;
118
+ pandocArgs?: string[];
94
119
  }
95
120
 
96
121
  export interface PptxConfig {
@@ -106,6 +131,7 @@ export interface PptxConfig {
106
131
  accent?: string;
107
132
  enabled?: boolean;
108
133
  };
134
+ pandocArgs?: string[];
109
135
  }
110
136
 
111
137
  export interface TablesConfig {
@@ -137,12 +163,32 @@ export interface BuildConfig {
137
163
  pptx: PptxConfig;
138
164
  tables: TablesConfig;
139
165
  postprocess: PostprocessConfig;
166
+ /**
167
+ * User-declared placeholder macros. Merged with the built-in macros
168
+ * (currently \tofill). Each entry overrides a built-in by name.
169
+ *
170
+ * See lib/macros.ts for the per-format rendering rules.
171
+ */
172
+ macros?: MacroDef[];
140
173
  /**
141
174
  * Directory (relative to the project) where final outputs land. Created on
142
175
  * demand. Set to null/empty to keep outputs alongside paper.md (legacy
143
176
  * behavior).
144
177
  */
145
178
  outputDir?: string | null;
179
+ /**
180
+ * Per-format output filenames. Keys are format names (pdf/docx/tex/beamer/
181
+ * pptx); values are paths. Relative paths resolve under outputDir; absolute
182
+ * paths are honored as-is. Extension is added if missing. CLI `-o` wins
183
+ * over this map.
184
+ */
185
+ output?: Record<string, string>;
186
+ /**
187
+ * Extra pandoc args applied to every format. Format-specific args
188
+ * (e.g. docx.pandocArgs) are appended *after* these, and CLI --pandoc-arg
189
+ * values are appended last.
190
+ */
191
+ pandocArgs?: string[];
146
192
  _configPath?: string | null;
147
193
  }
148
194
 
@@ -156,8 +202,20 @@ export interface BuildResult {
156
202
  interface BuildOptions {
157
203
  verbose?: boolean;
158
204
  config?: BuildConfig;
205
+ /**
206
+ * Internal: forces the exact output path. Used by dual-mode/temp builds that
207
+ * route to specific temp files. Bypasses the `output:` resolver.
208
+ */
159
209
  outputPath?: string;
210
+ /**
211
+ * CLI override (`-o, --output <path>`). Beats `config.output[format]` but
212
+ * loses to `options.outputPath`. Relative paths resolve under outputDir;
213
+ * absolute paths bypass outputDir.
214
+ */
215
+ output?: string;
160
216
  crossref?: boolean;
217
+ /** Extra pandoc args from CLI (--pandoc-arg). Appended after config args. */
218
+ pandocArgs?: string[];
161
219
  _refsAutoInjected?: boolean;
162
220
  _forwardRefsResolved?: number;
163
221
  }
@@ -236,6 +294,7 @@ export const DEFAULT_CONFIG: BuildConfig = {
236
294
  keepComments: false,
237
295
  affiliationNewline: true,
238
296
  toc: false,
297
+ translateRawFigures: true,
239
298
  },
240
299
  tex: {
241
300
  standalone: true,
@@ -269,6 +328,9 @@ export const DEFAULT_CONFIG: BuildConfig = {
269
328
  beamer: null,
270
329
  all: null, // Runs after any format
271
330
  },
331
+ // Placeholder/highlight macros. Defaults are the built-ins from
332
+ // lib/macros.ts; users append their own here.
333
+ macros: [],
272
334
  // Final outputs land here (created on demand). Set to null or '' to keep
273
335
  // outputs in the project root.
274
336
  outputDir: 'output',
@@ -341,6 +403,20 @@ export function mergeJournalFormatting(config: BuildConfig, formatting: JournalF
341
403
  return merged;
342
404
  }
343
405
 
406
+ /**
407
+ * In-place: copy `pandoc-args` → `pandocArgs` on an object (if not already set).
408
+ * Idempotent. Coerces a single string into a one-element array.
409
+ */
410
+ function normalizePandocArgsKey(obj: Record<string, unknown>): void {
411
+ if (!obj || typeof obj !== 'object') return;
412
+ const hy = obj['pandoc-args'];
413
+ if (hy === undefined) return;
414
+ if (obj.pandocArgs === undefined) {
415
+ obj.pandocArgs = Array.isArray(hy) ? hy : [hy];
416
+ }
417
+ delete obj['pandoc-args'];
418
+ }
419
+
344
420
  /**
345
421
  * Load rev.yaml config from directory
346
422
  * @param directory - Project directory path
@@ -363,6 +439,16 @@ export function loadConfig(directory: string): BuildConfig {
363
439
  const content = fs.readFileSync(configPath, 'utf-8');
364
440
  const userConfig = YAML.parse(content) || {};
365
441
 
442
+ // Accept hyphenated `pandoc-args` (the form pandoc itself uses) in addition
443
+ // to camelCase `pandocArgs`. Hyphenated is what we document; camelCase is
444
+ // accepted for users who already prefer that convention.
445
+ normalizePandocArgsKey(userConfig);
446
+ for (const fmt of ['pdf', 'docx', 'tex', 'beamer', 'pptx'] as const) {
447
+ if (userConfig[fmt] && typeof userConfig[fmt] === 'object') {
448
+ normalizePandocArgsKey(userConfig[fmt]);
449
+ }
450
+ }
451
+
366
452
  // Deep merge with defaults
367
453
  let config: BuildConfig = {
368
454
  ...DEFAULT_CONFIG,
@@ -830,6 +916,14 @@ export function applyFormatTransforms(
830
916
  } else if (format === 'docx') {
831
917
  content = convertDynamicRefsToDisplay(content, registry);
832
918
 
919
+ // Pandoc strips raw LaTeX in docx output. Translate the common
920
+ // `\begin{figure}...\end{figure}` shape to portable markdown so figures
921
+ // actually appear; exotic blocks are left alone (warned about in build()).
922
+ if (config.docx?.translateRawFigures !== false) {
923
+ const { translated } = translateRawLatexFigures(content);
924
+ content = translated;
925
+ }
926
+
833
927
  if (hasNumberedAffiliations(config)) {
834
928
  const mdBlock = generateMarkdownAuthorBlock(config);
835
929
  content = content.replace(/^(---\r?\n[\s\S]*?---\r?\n)/, `$1\n${mdBlock}\n`);
@@ -897,8 +991,207 @@ function convertDynamicRefsToDisplay(text: string, registry: Registry): string {
897
991
  return result;
898
992
  }
899
993
 
994
+ // =============================================================================
995
+ // Raw LaTeX figure detection / translation (docx)
996
+ // =============================================================================
997
+
998
+ /**
999
+ * A raw LaTeX `\begin{figure}...\end{figure}` block found in source markdown.
1000
+ * `exotic` blocks contain features we don't auto-translate (multiple
1001
+ * `\includegraphics`, `\subfloat`, `\rotatebox`, unrecognised width units);
1002
+ * pandoc strips raw LaTeX silently in docx output, so users get warned about
1003
+ * anything that won't be translated.
1004
+ */
1005
+ export interface RawLatexFigure {
1006
+ file?: string;
1007
+ line: number;
1008
+ block: string;
1009
+ exotic: boolean;
1010
+ }
1011
+
1012
+ /** Match `\begin{figure}` / `\begin{figure*}` … `\end{figure}` blocks. */
1013
+ function makeRawFigureRegex(): RegExp {
1014
+ return /\\begin\{figure\*?\}(?:\[[^\]]*\])?[\s\S]*?\\end\{figure\*?\}/g;
1015
+ }
1016
+
1017
+ /**
1018
+ * Convert a LaTeX width spec to a markdown image attribute value.
1019
+ * - `0.8\textwidth` → `80%`
1020
+ * - `\linewidth` → `100%`
1021
+ * - `8cm`, `2in`, `12pt` → kept verbatim
1022
+ * Returns null for anything we don't translate (block stays "exotic").
1023
+ */
1024
+ function convertLatexWidth(raw: string): string | null {
1025
+ const trimmed = raw.trim();
1026
+ // Coefficient × relative length
1027
+ const rel = trimmed.match(/^([\d.]+)\s*\\(textwidth|linewidth|columnwidth)$/);
1028
+ if (rel) {
1029
+ const pct = Math.round(parseFloat(rel[1]!) * 100);
1030
+ if (!isFinite(pct) || pct <= 0) return null;
1031
+ return `${pct}%`;
1032
+ }
1033
+ // Bare relative length
1034
+ if (/^\\(textwidth|linewidth|columnwidth)$/.test(trimmed)) return '100%';
1035
+ // Absolute units
1036
+ if (/^[\d.]+\s*(cm|mm|in|pt|px|em|ex)$/.test(trimmed)) return trimmed.replace(/\s+/g, '');
1037
+ return null;
1038
+ }
1039
+
1040
+ /** Extract a balanced `{...}` argument that follows `command` in `text`. */
1041
+ function extractBracedArg(text: string, command: string): string | null {
1042
+ const idx = text.indexOf(command);
1043
+ if (idx === -1) return null;
1044
+ let i = idx + command.length;
1045
+ while (i < text.length && /\s/.test(text[i]!)) i++;
1046
+ if (text[i] !== '{') return null;
1047
+ i++;
1048
+ const start = i;
1049
+ let depth = 1;
1050
+ while (i < text.length) {
1051
+ const ch = text[i]!;
1052
+ if (ch === '\\' && i + 1 < text.length) { i += 2; continue; }
1053
+ if (ch === '{') depth++;
1054
+ else if (ch === '}') {
1055
+ depth--;
1056
+ if (depth === 0) return text.slice(start, i);
1057
+ }
1058
+ i++;
1059
+ }
1060
+ return null;
1061
+ }
1062
+
1063
+ /** True if a `\begin{figure}` block contains features we don't auto-translate. */
1064
+ function isExoticFigureBlock(block: string): boolean {
1065
+ if (/\\subfloat\b/.test(block)) return true;
1066
+ if (/\\rotatebox\b/.test(block)) return true;
1067
+ const includes = (block.match(/\\includegraphics\b/g) || []).length;
1068
+ if (includes !== 1) return true;
1069
+ const m = block.match(/\\includegraphics\s*(?:\[([^\]]*)\])?\s*\{([^}]+)\}/);
1070
+ if (!m) return true;
1071
+ const opts = m[1] || '';
1072
+ const widthMatch = opts.match(/(?:^|,)\s*width\s*=\s*([^,]+)/);
1073
+ if (widthMatch && !convertLatexWidth(widthMatch[1]!)) return true;
1074
+ return false;
1075
+ }
1076
+
1077
+ /**
1078
+ * Find raw LaTeX figure blocks containing `\includegraphics` in markdown.
1079
+ * `file`, if given, is attached to each result. `line` is 1-based within the
1080
+ * supplied content (the line where `\begin{figure}` sits).
1081
+ */
1082
+ export function detectRawLatexFigures(content: string, file?: string): RawLatexFigure[] {
1083
+ const figures: RawLatexFigure[] = [];
1084
+ const re = makeRawFigureRegex();
1085
+ let m: RegExpExecArray | null;
1086
+ while ((m = re.exec(content)) !== null) {
1087
+ const block = m[0];
1088
+ if (!block.includes('\\includegraphics')) continue;
1089
+ const line = content.slice(0, m.index).split(/\r?\n/).length;
1090
+ figures.push({ file, line, block, exotic: isExoticFigureBlock(block) });
1091
+ }
1092
+ return figures;
1093
+ }
1094
+
1095
+ /**
1096
+ * Translate the 80% case: single `\includegraphics` figure with optional
1097
+ * `\caption{...}` and `\label{...}`, wrapped in `\begin{figure}...\end{figure}`,
1098
+ * to portable `![caption](path){#fig:label width=N%}` markdown. Exotic blocks
1099
+ * (see `isExoticFigureBlock`) are left untouched.
1100
+ */
1101
+ export function translateRawLatexFigures(content: string): { translated: string; translatedCount: number } {
1102
+ let translatedCount = 0;
1103
+ const re = makeRawFigureRegex();
1104
+ const translated = content.replace(re, (block) => {
1105
+ if (!block.includes('\\includegraphics')) return block;
1106
+ if (isExoticFigureBlock(block)) return block;
1107
+
1108
+ const inc = block.match(/\\includegraphics\s*(?:\[([^\]]*)\])?\s*\{([^}]+)\}/);
1109
+ if (!inc) return block;
1110
+ const optsStr = inc[1] || '';
1111
+ const imgPath = inc[2]!.trim();
1112
+
1113
+ let width: string | undefined;
1114
+ const widthMatch = optsStr.match(/(?:^|,)\s*width\s*=\s*([^,]+)/);
1115
+ if (widthMatch) {
1116
+ const w = convertLatexWidth(widthMatch[1]!);
1117
+ if (!w) return block; // already filtered by isExoticFigureBlock, defensive
1118
+ width = w;
1119
+ }
1120
+
1121
+ const caption = (extractBracedArg(block, '\\caption') ?? '').trim();
1122
+ const labelRaw = extractBracedArg(block, '\\label');
1123
+
1124
+ const attrs: string[] = [];
1125
+ if (labelRaw) {
1126
+ const label = labelRaw.trim();
1127
+ const labelWithPrefix = /^[a-z]+:/i.test(label) ? label : `fig:${label}`;
1128
+ attrs.push(`#${labelWithPrefix}`);
1129
+ }
1130
+ if (width) attrs.push(`width=${width}`);
1131
+
1132
+ translatedCount++;
1133
+ const attrStr = attrs.length > 0 ? ` {${attrs.join(' ')}}` : '';
1134
+ return `![${caption}](${imgPath})${attrStr}`;
1135
+ });
1136
+ return { translated, translatedCount };
1137
+ }
1138
+
1139
+ /**
1140
+ * Format the warning surfaced for raw LaTeX figure blocks that won't render
1141
+ * in docx. `translateEnabled` reflects whether auto-translate ran (true = the
1142
+ * listed blocks are exotic leftovers; false = no translation was attempted).
1143
+ */
1144
+ function formatRawLatexFigureWarning(figs: RawLatexFigure[], translateEnabled: boolean): string {
1145
+ const reason = translateEnabled ? 'too complex to auto-translate' : 'translateRawFigures: false';
1146
+ const lines: string[] = [
1147
+ `${figs.length} raw LaTeX figure block(s) won't render in docx (${reason}).`,
1148
+ ];
1149
+ for (const f of figs) {
1150
+ const loc = f.file ? `${f.file}:${f.line}` : `line ${f.line}`;
1151
+ const pathMatch = f.block.match(/\\includegraphics\s*(?:\[[^\]]*\])?\s*\{([^}]+)\}/);
1152
+ const pathInfo = pathMatch ? ` ${pathMatch[1]!.trim()}` : '';
1153
+ lines.push(` ${loc}${pathInfo}`);
1154
+ }
1155
+ lines.push(' Hint: use ![caption](path){#fig:label width=80%} for format-portable figures,');
1156
+ lines.push(' or pass --pandoc-arg=--lua-filter=<your.lua> to translate them yourself.');
1157
+ return lines.join('\n');
1158
+ }
1159
+
1160
+ /**
1161
+ * Walk section files and gather a warning for any raw LaTeX figure blocks that
1162
+ * won't survive the docx build. Returns null when there's nothing to warn about.
1163
+ */
1164
+ export function collectRawLatexFigureWarning(directory: string, config: BuildConfig): string | null {
1165
+ const translateEnabled = config.docx?.translateRawFigures !== false;
1166
+ const all: RawLatexFigure[] = [];
1167
+ for (const section of findSections(directory, config.sections)) {
1168
+ const sectionPath = path.join(directory, section);
1169
+ if (!fs.existsSync(sectionPath)) continue;
1170
+ try {
1171
+ const content = fs.readFileSync(sectionPath, 'utf-8');
1172
+ const figs = detectRawLatexFigures(content, section);
1173
+ for (const f of figs) {
1174
+ // When auto-translate is on, non-exotic blocks get rewritten cleanly —
1175
+ // only the exotic leftovers need warning. When opted out, everything
1176
+ // is at risk and we warn about every block.
1177
+ if (translateEnabled && !f.exotic) continue;
1178
+ all.push(f);
1179
+ }
1180
+ } catch {
1181
+ // ignore unreadable sections
1182
+ }
1183
+ }
1184
+ if (all.length === 0) return null;
1185
+ return formatRawLatexFigureWarning(all, translateEnabled);
1186
+ }
1187
+
900
1188
  /**
901
- * Build pandoc arguments for format
1189
+ * Build pandoc arguments for format.
1190
+ *
1191
+ * Returns only the built-in args derived from config. Passthrough args
1192
+ * (config.pandocArgs, config[format].pandocArgs, CLI --pandoc-arg) are
1193
+ * appended later in runPandoc so they win against pptx/crossref defaults
1194
+ * added there.
902
1195
  */
903
1196
  export function buildPandocArgs(format: string, config: BuildConfig, outputPath: string): string[] {
904
1197
  const args: string[] = [];
@@ -1016,6 +1309,30 @@ export function buildPandocArgs(format: string, config: BuildConfig, outputPath:
1016
1309
  return args;
1017
1310
  }
1018
1311
 
1312
+ /**
1313
+ * Collect passthrough pandoc args for a format in the canonical order:
1314
+ * top-level config → format-specific config → CLI extras. Later wins for
1315
+ * repeated flags.
1316
+ */
1317
+ export function collectPandocPassthroughArgs(
1318
+ format: string,
1319
+ config: BuildConfig,
1320
+ extraArgs: string[] = []
1321
+ ): string[] {
1322
+ const out: string[] = [];
1323
+ if (config.pandocArgs && config.pandocArgs.length > 0) {
1324
+ out.push(...config.pandocArgs);
1325
+ }
1326
+ const formatConfig = (config as unknown as Record<string, { pandocArgs?: string[] } | undefined>)[format];
1327
+ if (formatConfig?.pandocArgs && formatConfig.pandocArgs.length > 0) {
1328
+ out.push(...formatConfig.pandocArgs);
1329
+ }
1330
+ if (extraArgs.length > 0) {
1331
+ out.push(...extraArgs);
1332
+ }
1333
+ return out;
1334
+ }
1335
+
1019
1336
  /**
1020
1337
  * Write crossref.yaml if needed
1021
1338
  */
@@ -1048,6 +1365,92 @@ export function resolveOutputDir(directory: string, config: BuildConfig): string
1048
1365
  return path.isAbsolute(out) ? out : path.join(directory, out);
1049
1366
  }
1050
1367
 
1368
+ /** File extension (with leading dot) for each supported pandoc format. */
1369
+ const FORMAT_EXTENSIONS: Record<string, string> = {
1370
+ tex: '.tex',
1371
+ pdf: '.pdf',
1372
+ docx: '.docx',
1373
+ beamer: '.pdf',
1374
+ pptx: '.pptx',
1375
+ };
1376
+
1377
+ /** Get file extension for a format, defaulting to `.pdf`. */
1378
+ export function getFormatExtension(format: string): string {
1379
+ return FORMAT_EXTENSIONS[format] ?? '.pdf';
1380
+ }
1381
+
1382
+ /**
1383
+ * Slugify a title for use as a default output filename. Lowercases, replaces
1384
+ * non-alphanumeric runs with `-`, and truncates at the last `-` boundary
1385
+ * at-or-before MAX_TITLE_FILENAME_LENGTH so words stay whole (the old blind
1386
+ * `.slice` cut mid-word).
1387
+ */
1388
+ export function slugifyTitle(title: string): string {
1389
+ if (!title) return 'paper';
1390
+ const slug = title.toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-+|-+$/g, '');
1391
+ if (!slug) return 'paper';
1392
+ if (slug.length <= MAX_TITLE_FILENAME_LENGTH) return slug;
1393
+ const cut = slug.slice(0, MAX_TITLE_FILENAME_LENGTH);
1394
+ const lastDash = cut.lastIndexOf('-');
1395
+ // Only truncate at a hyphen if it leaves a reasonable amount of content.
1396
+ // Otherwise hard-cut (handles degenerate titles with no spaces at all).
1397
+ if (lastDash >= MAX_TITLE_FILENAME_LENGTH / 2) {
1398
+ return slug.slice(0, lastDash);
1399
+ }
1400
+ return cut;
1401
+ }
1402
+
1403
+ /**
1404
+ * Ensure `name` ends with `ext` (case-insensitive). If the user already supplied
1405
+ * the correct extension, return unchanged; if they supplied none or a different
1406
+ * one, append the format's canonical extension.
1407
+ *
1408
+ * Different-extension case (e.g. `output.docx` when building tex): we append
1409
+ * rather than replace, since stripping looks like an unsafe guess. The result
1410
+ * `output.docx.tex` is loud enough to flag the misconfiguration.
1411
+ */
1412
+ function ensureExtension(name: string, ext: string): string {
1413
+ if (name.toLowerCase().endsWith(ext.toLowerCase())) return name;
1414
+ return name + ext;
1415
+ }
1416
+
1417
+ /**
1418
+ * Resolve the final output path for a build.
1419
+ *
1420
+ * Priority: `options.outputPath` (internal force) > `cliOverride` (-o flag) >
1421
+ * `config.output[format]` > slugified title fallback.
1422
+ *
1423
+ * Relative paths from `cliOverride`/`config.output` resolve under outputDir;
1424
+ * absolute paths bypass outputDir. The fallback path always lives under
1425
+ * outputDir.
1426
+ *
1427
+ * @param suffix - Appended before the extension (e.g. "-changes", "-slides").
1428
+ * Suppressed when user supplied an explicit name via CLI or
1429
+ * config — they pick their own suffix.
1430
+ */
1431
+ export function resolveOutputPath(
1432
+ directory: string,
1433
+ config: BuildConfig,
1434
+ format: string,
1435
+ options: { cliOverride?: string; suffix?: string } = {}
1436
+ ): string {
1437
+ const { cliOverride, suffix = '' } = options;
1438
+ const ext = getFormatExtension(format);
1439
+
1440
+ const explicit = cliOverride ?? config.output?.[format];
1441
+ if (explicit) {
1442
+ const baseDir = path.isAbsolute(explicit)
1443
+ ? path.dirname(explicit)
1444
+ : resolveOutputDir(directory, config);
1445
+ const baseName = path.basename(explicit);
1446
+ const stem = baseName.replace(/\.[^./\\]+$/, '');
1447
+ return path.join(baseDir, ensureExtension(`${stem}${suffix}`, ext));
1448
+ }
1449
+
1450
+ const slug = slugifyTitle(config.title);
1451
+ return path.join(resolveOutputDir(directory, config), `${slug}${suffix}${ext}`);
1452
+ }
1453
+
1051
1454
  /**
1052
1455
  * Run pandoc build
1053
1456
  */
@@ -1058,28 +1461,16 @@ export async function runPandoc(
1058
1461
  options: BuildOptions = {}
1059
1462
  ): Promise<PandocResult> {
1060
1463
  const directory = path.dirname(inputPath);
1061
- const baseName = config.title
1062
- ? config.title.toLowerCase().replace(/[^a-z0-9]+/g, '-').slice(0, 50)
1063
- : 'paper';
1064
-
1065
- // Map format to file extension
1066
- const extMap: Record<string, string> = {
1067
- tex: '.tex',
1068
- pdf: '.pdf',
1069
- docx: '.docx',
1070
- beamer: '.pdf', // beamer outputs PDF
1071
- pptx: '.pptx',
1072
- };
1073
- const ext = extMap[format] || '.pdf';
1074
1464
 
1075
- // For beamer, use -slides suffix to distinguish from regular PDF
1465
+ // outputPath (internal force) wins over the resolver. For beamer, we keep
1466
+ // the `-slides` suffix on the slug fallback to distinguish from a regular
1467
+ // PDF build; when the user supplies an explicit name, they pick their own.
1076
1468
  const suffix = format === 'beamer' ? '-slides' : '';
1077
- // Allow custom output path via options. Auto-named outputs go through the
1078
- // configured outputDir (default 'output/'); explicit paths are honored as-is
1079
- // so callers can route temp/intermediate artefacts where they want.
1080
1469
  const outputPath = options.outputPath
1081
- ? options.outputPath
1082
- : path.join(resolveOutputDir(directory, config), `${baseName}${suffix}${ext}`);
1470
+ ?? resolveOutputPath(directory, config, format, {
1471
+ cliOverride: options.output,
1472
+ suffix,
1473
+ });
1083
1474
 
1084
1475
  if (!options.outputPath) {
1085
1476
  const outDir = path.dirname(outputPath);
@@ -1130,13 +1521,50 @@ export async function runPandoc(
1130
1521
  args.push('--reference-doc', referenceDoc);
1131
1522
  }
1132
1523
 
1133
- // Add color filter for PPTX (handles [text]{color=#RRGGBB} syntax)
1134
- const colorFilterPath = path.join(path.dirname(new URL(import.meta.url).pathname.replace(/^\/([A-Z]:)/, '$1')), 'pptx-color-filter.lua');
1524
+ // Add color filter for PPTX (handles [text]{color=#RRGGBB} syntax).
1525
+ // fileURLToPath handles Windows paths with spaces — the old
1526
+ // `new URL(...).pathname` returned URL-encoded `%20` and fs.existsSync
1527
+ // silently failed.
1528
+ const colorFilterPath = path.join(
1529
+ path.dirname(fileURLToPath(import.meta.url)),
1530
+ 'pptx-color-filter.lua'
1531
+ );
1135
1532
  if (fs.existsSync(colorFilterPath)) {
1136
1533
  args.push('--lua-filter', colorFilterPath);
1137
1534
  }
1138
1535
  }
1139
1536
 
1537
+ // Wire placeholder macros (built-in \tofill plus user-declared entries).
1538
+ // - docx/html: lua filter expands \name{X} to format-specific raw runs.
1539
+ // - pdf/tex/beamer: inject a \providecommand preamble so LaTeX renders it
1540
+ // directly. `\providecommand` is non-clobbering, so a user who already
1541
+ // has `\providecommand{\tofill}{...}` in their own header keeps theirs.
1542
+ //
1543
+ // Sidecar path is passed to the lua filter via DOCREV_MACROS_FILE in the
1544
+ // child env (not pandoc metadata) because pandoc walks RawInline/RawBlock
1545
+ // BEFORE Meta — by the time a Meta handler could read the path, the inline
1546
+ // expansion has already happened.
1547
+ const macroTempFiles: string[] = [];
1548
+ let macroEnvFile: string | null = null;
1549
+ const macros = mergeMacros((config as { macros?: unknown }).macros);
1550
+ if (macros.length > 0) {
1551
+ if (format === 'docx' || format === 'html' || format === 'html5' || format === 'html4') {
1552
+ const sidecarPath = writeMacrosSidecar(directory, macros);
1553
+ macroTempFiles.push(sidecarPath);
1554
+ macroEnvFile = sidecarPath;
1555
+ const filterPath = getMacroFilterPath();
1556
+ if (fs.existsSync(filterPath)) {
1557
+ args.push('--lua-filter', filterPath);
1558
+ }
1559
+ } else if (format === 'pdf' || format === 'tex' || format === 'beamer') {
1560
+ const preamble = generateLatexPreamble(macros);
1561
+ const preamblePath = path.join(directory, '.macros.tex');
1562
+ fs.writeFileSync(preamblePath, preamble, 'utf-8');
1563
+ macroTempFiles.push(preamblePath);
1564
+ args.push('-H', path.basename(preamblePath));
1565
+ }
1566
+ }
1567
+
1140
1568
  // Add crossref metadata file if exists (skip for slides - they don't use crossref)
1141
1569
  if (format !== 'beamer' && format !== 'pptx') {
1142
1570
  const crossrefPath = path.join(directory, 'crossref.yaml');
@@ -1146,13 +1574,27 @@ export async function runPandoc(
1146
1574
  }
1147
1575
  }
1148
1576
 
1577
+ // Passthrough args go last so they win against built-in defaults.
1578
+ args.push(...collectPandocPassthroughArgs(format, config, options.pandocArgs));
1579
+
1149
1580
  // Input file (use basename since we set cwd to directory)
1150
1581
  args.push(path.basename(inputPath));
1151
1582
 
1583
+ if (options.verbose) {
1584
+ const quoted = args.map(a => /[\s"'$`]/.test(a) ? `"${a.replace(/"/g, '\\"')}"` : a).join(' ');
1585
+ console.error(`[pandoc ${format}] (cwd: ${directory})`);
1586
+ console.error(` pandoc ${quoted}`);
1587
+ }
1588
+
1152
1589
  return new Promise((resolve) => {
1590
+ const pandocEnv: NodeJS.ProcessEnv = { ...process.env };
1591
+ if (macroEnvFile) {
1592
+ pandocEnv.DOCREV_MACROS_FILE = macroEnvFile;
1593
+ }
1153
1594
  const pandoc: ChildProcess = spawn('pandoc', args, {
1154
1595
  cwd: directory,
1155
1596
  stdio: ['ignore', 'pipe', 'pipe'],
1597
+ env: pandocEnv,
1156
1598
  });
1157
1599
 
1158
1600
  let stderr = '';
@@ -1160,7 +1602,18 @@ export async function runPandoc(
1160
1602
  stderr += data.toString();
1161
1603
  });
1162
1604
 
1605
+ const cleanupMacroTempFiles = (): void => {
1606
+ for (const tmp of macroTempFiles) {
1607
+ try {
1608
+ fs.unlinkSync(tmp);
1609
+ } catch {
1610
+ // ignore — best-effort cleanup
1611
+ }
1612
+ }
1613
+ };
1614
+
1163
1615
  pandoc.on('close', async (code) => {
1616
+ cleanupMacroTempFiles();
1164
1617
  if (code === 0) {
1165
1618
  // For PPTX, post-process to add slide numbers, buildup colors, and logos
1166
1619
  if (format === 'pptx') {
@@ -1210,6 +1663,7 @@ export async function runPandoc(
1210
1663
  });
1211
1664
 
1212
1665
  pandoc.on('error', (err) => {
1666
+ cleanupMacroTempFiles();
1213
1667
  resolve({ outputPath, success: false, error: err.message });
1214
1668
  });
1215
1669
  });
@@ -1265,6 +1719,12 @@ export async function build(
1265
1719
  if ((imageReg as any).figures?.length > 0) {
1266
1720
  writeImageRegistry(directory, imageReg);
1267
1721
  }
1722
+
1723
+ // Warn about raw LaTeX figure blocks that won't render in docx (pandoc
1724
+ // drops them silently). With auto-translate on (default), this surfaces
1725
+ // only the exotic leftovers; with it off, every block.
1726
+ const rawFigWarning = collectRawLatexFigureWarning(directory, config);
1727
+ if (rawFigWarning) warnings.push(rawFigWarning);
1268
1728
  }
1269
1729
 
1270
1730
  const results: BuildResult[] = [];