docrev 0.9.18 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/.gitattributes +1 -1
  2. package/CHANGELOG.md +173 -149
  3. package/PLAN-tables-and-postprocess.md +850 -850
  4. package/README.md +431 -406
  5. package/bin/rev.js +11 -11
  6. package/bin/rev.ts +145 -145
  7. package/completions/rev.bash +127 -127
  8. package/completions/rev.ps1 +210 -210
  9. package/completions/rev.zsh +207 -207
  10. package/dist/lib/build.d.ts +8 -0
  11. package/dist/lib/build.d.ts.map +1 -1
  12. package/dist/lib/build.js +62 -6
  13. package/dist/lib/build.js.map +1 -1
  14. package/dist/lib/commands/context.d.ts +1 -1
  15. package/dist/lib/commands/context.d.ts.map +1 -1
  16. package/dist/lib/commands/context.js +1 -1
  17. package/dist/lib/commands/context.js.map +1 -1
  18. package/dist/lib/commands/sections.js +7 -7
  19. package/dist/lib/commands/sections.js.map +1 -1
  20. package/dist/lib/commands/sync.d.ts.map +1 -1
  21. package/dist/lib/commands/sync.js +15 -14
  22. package/dist/lib/commands/sync.js.map +1 -1
  23. package/dist/lib/commands/utilities.js +164 -164
  24. package/dist/lib/commands/verify-anchors.js +6 -6
  25. package/dist/lib/commands/verify-anchors.js.map +1 -1
  26. package/dist/lib/commands/word-tools.js +8 -8
  27. package/dist/lib/grammar.js +3 -3
  28. package/dist/lib/macro-filter.lua +201 -0
  29. package/dist/lib/macros.d.ts +102 -0
  30. package/dist/lib/macros.d.ts.map +1 -0
  31. package/dist/lib/macros.js +218 -0
  32. package/dist/lib/macros.js.map +1 -0
  33. package/dist/lib/pdf-comments.js +44 -44
  34. package/dist/lib/plugins.js +57 -57
  35. package/dist/lib/pptx-color-filter.lua +37 -0
  36. package/dist/lib/pptx-themes.js +115 -115
  37. package/dist/lib/schema.d.ts.map +1 -1
  38. package/dist/lib/schema.js +34 -0
  39. package/dist/lib/schema.js.map +1 -1
  40. package/dist/lib/sections.d.ts +35 -0
  41. package/dist/lib/sections.d.ts.map +1 -1
  42. package/dist/lib/sections.js +81 -0
  43. package/dist/lib/sections.js.map +1 -1
  44. package/dist/lib/spelling.js +2 -2
  45. package/dist/lib/templates.js +387 -387
  46. package/dist/lib/themes.js +51 -51
  47. package/eslint.config.js +27 -27
  48. package/lib/anchor-match.ts +276 -276
  49. package/lib/annotations.ts +644 -644
  50. package/lib/build.ts +1766 -1694
  51. package/lib/citations.ts +160 -160
  52. package/lib/commands/build.ts +855 -855
  53. package/lib/commands/citations.ts +515 -515
  54. package/lib/commands/comments.ts +1050 -1050
  55. package/lib/commands/context.ts +176 -174
  56. package/lib/commands/core.ts +309 -309
  57. package/lib/commands/doi.ts +435 -435
  58. package/lib/commands/file-ops.ts +372 -372
  59. package/lib/commands/history.ts +320 -320
  60. package/lib/commands/index.ts +87 -87
  61. package/lib/commands/init.ts +259 -259
  62. package/lib/commands/merge-resolve.ts +378 -378
  63. package/lib/commands/preview.ts +178 -178
  64. package/lib/commands/project-info.ts +244 -244
  65. package/lib/commands/quality.ts +517 -517
  66. package/lib/commands/response.ts +454 -454
  67. package/lib/commands/section-boundaries.ts +82 -82
  68. package/lib/commands/sections.ts +451 -451
  69. package/lib/commands/sync.ts +709 -706
  70. package/lib/commands/text-ops.ts +449 -449
  71. package/lib/commands/utilities.ts +448 -448
  72. package/lib/commands/verify-anchors.ts +272 -272
  73. package/lib/commands/word-tools.ts +340 -340
  74. package/lib/comment-realign.ts +517 -517
  75. package/lib/config.ts +84 -84
  76. package/lib/crossref.ts +781 -781
  77. package/lib/csl.ts +191 -191
  78. package/lib/dependencies.ts +98 -98
  79. package/lib/diff-engine.ts +465 -465
  80. package/lib/doi-cache.ts +115 -115
  81. package/lib/doi.ts +897 -897
  82. package/lib/equations.ts +506 -506
  83. package/lib/errors.ts +346 -346
  84. package/lib/format.ts +541 -541
  85. package/lib/git.ts +326 -326
  86. package/lib/grammar.ts +303 -303
  87. package/lib/image-registry.ts +180 -180
  88. package/lib/import.ts +911 -911
  89. package/lib/journals.ts +543 -543
  90. package/lib/macro-filter.lua +201 -0
  91. package/lib/macros.ts +273 -0
  92. package/lib/merge.ts +633 -633
  93. package/lib/orcid.ts +144 -144
  94. package/lib/pdf-comments.ts +263 -263
  95. package/lib/pdf-import.ts +524 -524
  96. package/lib/plugins.ts +362 -362
  97. package/lib/postprocess.ts +188 -188
  98. package/lib/pptx-color-filter.lua +37 -37
  99. package/lib/pptx-template.ts +469 -469
  100. package/lib/pptx-themes.ts +483 -483
  101. package/lib/protect-restore.ts +520 -520
  102. package/lib/rate-limiter.ts +94 -94
  103. package/lib/response.ts +197 -197
  104. package/lib/restore-references.ts +240 -240
  105. package/lib/review.ts +327 -327
  106. package/lib/schema.ts +488 -454
  107. package/lib/scientific-words.ts +73 -73
  108. package/lib/sections.ts +425 -335
  109. package/lib/slides.ts +756 -756
  110. package/lib/spelling.ts +334 -334
  111. package/lib/templates.ts +526 -526
  112. package/lib/themes.ts +742 -742
  113. package/lib/trackchanges.ts +247 -247
  114. package/lib/tui.ts +450 -450
  115. package/lib/types.ts +550 -550
  116. package/lib/undo.ts +250 -250
  117. package/lib/utils.ts +69 -69
  118. package/lib/variables.ts +179 -179
  119. package/lib/word-extraction.ts +806 -806
  120. package/lib/word.ts +643 -643
  121. package/lib/wordcomments.ts +840 -840
  122. package/package.json +137 -137
  123. package/scripts/postbuild.js +47 -28
  124. package/skill/REFERENCE.md +539 -539
  125. package/skill/SKILL.md +295 -295
  126. package/tsconfig.json +26 -26
  127. package/types/index.d.ts +525 -525
  128. package/issues.md +0 -180
  129. package/site/assets/extra.css +0 -208
  130. package/site/commands.html +0 -926
  131. package/site/configuration.html +0 -469
  132. package/site/index.html +0 -288
  133. package/site/troubleshooting.html +0 -461
  134. package/site/workflow.html +0 -518
package/lib/build.ts CHANGED
@@ -1,1694 +1,1766 @@
1
- /**
2
- * Build system - combines sections → paper.md → PDF/DOCX/TEX
3
- *
4
- * Features:
5
- * - Reads rev.yaml config
6
- * - Combines section files into paper.md (persisted)
7
- * - Strips annotations appropriately per output format
8
- * - Runs pandoc with crossref filter
9
- */
10
-
11
- import * as fs from 'fs';
12
- import * as path from 'path';
13
- import { execSync, spawn, ChildProcess } from 'child_process';
14
- import YAML from 'yaml';
15
- import { stripAnnotations } from './annotations.js';
16
- import { buildRegistry, labelToDisplay, detectDynamicRefs, resolveForwardRefs, resolveSupplementaryRefs } from './crossref.js';
17
- import { processVariables, hasVariables } from './variables.js';
18
- import { processSlideMarkdown, hasSlideSyntax } from './slides.js';
19
- import { generatePptxTemplate, templateNeedsRegeneration, injectMediaIntoPptx, injectSlideNumbers, applyThemeFonts, applyCentering, applyBuildupColors } from './pptx-template.js';
20
- import { getThemePath, getThemeNames, PPTX_THEMES } from './pptx-themes.js';
21
- import { runPostprocess } from './postprocess.js';
22
- import { hasPandoc, hasPandocCrossref, hasLatex } from './dependencies.js';
23
- import { buildImageRegistry, writeImageRegistry } from './image-registry.js';
24
- import type { Author, JournalFormatting } from './types.js';
25
- import { getJournalProfile } from './journals.js';
26
- import { resolveCSL } from './csl.js';
27
-
28
- // =============================================================================
29
- // Constants
30
- // =============================================================================
31
-
32
- /** Supported output formats */
33
- const SUPPORTED_FORMATS = ['pdf', 'docx', 'tex', 'beamer', 'pptx'] as const;
34
-
35
- /**
36
- * Maximum length for slugified-title output filenames. Only used when no
37
- * explicit `output:` filename is configured. Long titles are truncated at the
38
- * last `-` boundary at-or-before this length so words stay intact (the old
39
- * blind `.slice(0, 50)` cut mid-word).
40
- */
41
- const MAX_TITLE_FILENAME_LENGTH = 80;
42
-
43
- // =============================================================================
44
- // Interfaces
45
- // =============================================================================
46
-
47
- export interface CrossrefConfig {
48
- figureTitle?: string;
49
- tableTitle?: string;
50
- figPrefix?: string | string[];
51
- tblPrefix?: string | string[];
52
- secPrefix?: string | string[];
53
- linkReferences?: boolean;
54
- }
55
-
56
- export interface PdfConfig {
57
- template?: string | null;
58
- headerIncludes?: string | null;
59
- documentclass?: string;
60
- fontsize?: string;
61
- geometry?: string;
62
- linestretch?: number;
63
- numbersections?: boolean;
64
- toc?: boolean;
65
- /**
66
- * LaTeX engine: pdflatex (default), xelatex, lualatex, tectonic, etc.
67
- * xelatex/lualatex are required for native UTF-8 rendering of Latin-Extended
68
- * diacritics (Czech/Polish/Croatian/Spanish author names, species epithets).
69
- */
70
- engine?: string;
71
- /** Roman/serif main font (xelatex/lualatex only — uses fontspec). */
72
- mainfont?: string;
73
- /** Sans-serif font (xelatex/lualatex only). */
74
- sansfont?: string;
75
- /** Monospace font (xelatex/lualatex only). */
76
- monofont?: string;
77
- /** Extra pandoc args appended for this format (after top-level pandocArgs). */
78
- pandocArgs?: string[];
79
- }
80
-
81
- export interface DocxConfig {
82
- reference?: string | null;
83
- keepComments?: boolean;
84
- affiliationNewline?: boolean;
85
- toc?: boolean;
86
- pandocArgs?: string[];
87
- /**
88
- * Auto-translate the common-shape raw `\begin{figure}...\end{figure}` block
89
- * to portable `![caption](path){#fig:label width=N%}` markdown so figures
90
- * survive the docx build (pandoc otherwise drops raw LaTeX silently).
91
- * Default true. Set false to opt out — blocks then warn and are left alone.
92
- */
93
- translateRawFigures?: boolean;
94
- }
95
-
96
- export interface TexConfig {
97
- standalone?: boolean;
98
- pandocArgs?: string[];
99
- }
100
-
101
- export interface BeamerConfig {
102
- theme?: string;
103
- colortheme?: string | null;
104
- fonttheme?: string | null;
105
- aspectratio?: string | null;
106
- navigation?: string | null;
107
- section?: boolean;
108
- notes?: string | false;
109
- fit_images?: boolean;
110
- pandocArgs?: string[];
111
- }
112
-
113
- export interface PptxConfig {
114
- theme?: string;
115
- reference?: string | null;
116
- media?: string | null;
117
- colors?: {
118
- default?: string;
119
- title?: string;
120
- };
121
- buildup?: {
122
- grey?: string;
123
- accent?: string;
124
- enabled?: boolean;
125
- };
126
- pandocArgs?: string[];
127
- }
128
-
129
- export interface TablesConfig {
130
- nowrap?: string[];
131
- }
132
-
133
- export interface PostprocessConfig {
134
- pdf?: string | null;
135
- docx?: string | null;
136
- tex?: string | null;
137
- pptx?: string | null;
138
- beamer?: string | null;
139
- all?: string | null;
140
- [key: string]: string | null | undefined;
141
- }
142
-
143
- export interface BuildConfig {
144
- title: string;
145
- authors: (string | Author)[];
146
- affiliations: Record<string, string>;
147
- sections: string[];
148
- bibliography: string | null;
149
- csl: string | null;
150
- crossref: CrossrefConfig;
151
- pdf: PdfConfig;
152
- docx: DocxConfig;
153
- tex: TexConfig;
154
- beamer: BeamerConfig;
155
- pptx: PptxConfig;
156
- tables: TablesConfig;
157
- postprocess: PostprocessConfig;
158
- /**
159
- * Directory (relative to the project) where final outputs land. Created on
160
- * demand. Set to null/empty to keep outputs alongside paper.md (legacy
161
- * behavior).
162
- */
163
- outputDir?: string | null;
164
- /**
165
- * Per-format output filenames. Keys are format names (pdf/docx/tex/beamer/
166
- * pptx); values are paths. Relative paths resolve under outputDir; absolute
167
- * paths are honored as-is. Extension is added if missing. CLI `-o` wins
168
- * over this map.
169
- */
170
- output?: Record<string, string>;
171
- /**
172
- * Extra pandoc args applied to every format. Format-specific args
173
- * (e.g. docx.pandocArgs) are appended *after* these, and CLI --pandoc-arg
174
- * values are appended last.
175
- */
176
- pandocArgs?: string[];
177
- _configPath?: string | null;
178
- }
179
-
180
- export interface BuildResult {
181
- format: string;
182
- success: boolean;
183
- outputPath?: string;
184
- error?: string;
185
- }
186
-
187
- interface BuildOptions {
188
- verbose?: boolean;
189
- config?: BuildConfig;
190
- /**
191
- * Internal: forces the exact output path. Used by dual-mode/temp builds that
192
- * route to specific temp files. Bypasses the `output:` resolver.
193
- */
194
- outputPath?: string;
195
- /**
196
- * CLI override (`-o, --output <path>`). Beats `config.output[format]` but
197
- * loses to `options.outputPath`. Relative paths resolve under outputDir;
198
- * absolute paths bypass outputDir.
199
- */
200
- output?: string;
201
- crossref?: boolean;
202
- /** Extra pandoc args from CLI (--pandoc-arg). Appended after config args. */
203
- pandocArgs?: string[];
204
- _refsAutoInjected?: boolean;
205
- _forwardRefsResolved?: number;
206
- }
207
-
208
- interface CombineOptions extends BuildOptions {
209
- _refsAutoInjected?: boolean;
210
- }
211
-
212
- interface VariablesContext {
213
- sectionContents: string[];
214
- }
215
-
216
- interface PandocResult {
217
- outputPath: string;
218
- success: boolean;
219
- error?: string;
220
- }
221
-
222
- interface FullBuildResult {
223
- results: BuildResult[];
224
- paperPath: string;
225
- warnings: string[];
226
- forwardRefsResolved: number;
227
- refsAutoInjected?: boolean;
228
- }
229
-
230
- interface DynamicRef {
231
- type: string;
232
- label: string;
233
- match: string;
234
- position: number;
235
- }
236
-
237
- interface Registry {
238
- figures: Map<string, unknown>;
239
- tables: Map<string, unknown>;
240
- equations: Map<string, unknown>;
241
- byNumber: {
242
- fig?: Map<number, string>;
243
- figS?: Map<number, string>;
244
- tbl?: Map<number, string>;
245
- tblS?: Map<number, string>;
246
- eq?: Map<number, string>;
247
- };
248
- }
249
-
250
- /**
251
- * Default rev.yaml configuration
252
- */
253
- export const DEFAULT_CONFIG: BuildConfig = {
254
- title: 'Untitled Document',
255
- authors: [],
256
- affiliations: {},
257
- sections: [],
258
- bibliography: null,
259
- csl: null,
260
- crossref: {
261
- figureTitle: 'Figure',
262
- tableTitle: 'Table',
263
- figPrefix: ['Fig.', 'Figs.'],
264
- tblPrefix: ['Table', 'Tables'],
265
- secPrefix: ['Section', 'Sections'],
266
- linkReferences: true,
267
- },
268
- pdf: {
269
- template: null,
270
- documentclass: 'article',
271
- fontsize: '12pt',
272
- geometry: 'margin=1in',
273
- linestretch: 1.5,
274
- numbersections: false,
275
- toc: false,
276
- },
277
- docx: {
278
- reference: null,
279
- keepComments: false,
280
- affiliationNewline: true,
281
- toc: false,
282
- translateRawFigures: true,
283
- },
284
- tex: {
285
- standalone: true,
286
- },
287
- // Slide formats
288
- beamer: {
289
- theme: 'default',
290
- colortheme: null,
291
- fonttheme: null,
292
- aspectratio: null, // '169' for 16:9, '43' for 4:3
293
- navigation: null, // 'horizontal', 'vertical', 'frame', 'empty'
294
- section: true, // section divider slides
295
- notes: 'show', // 'show' (presenter view), 'only' (notes only), 'hide', or false
296
- fit_images: true, // scale images to fit within slide bounds
297
- },
298
- pptx: {
299
- theme: 'default', // Built-in theme: default, dark, academic, minimal, corporate
300
- reference: null, // Custom reference-doc (overrides theme)
301
- media: null, // directory with logo images (e.g., logo-left.png, logo-right.png)
302
- },
303
- // Table formatting
304
- tables: {
305
- nowrap: [], // Column headers to apply nowrap formatting (converts Normal() → $\mathcal{N}()$ etc.)
306
- },
307
- // Postprocess scripts
308
- postprocess: {
309
- pdf: null,
310
- docx: null,
311
- tex: null,
312
- pptx: null,
313
- beamer: null,
314
- all: null, // Runs after any format
315
- },
316
- // Final outputs land here (created on demand). Set to null or '' to keep
317
- // outputs in the project root.
318
- outputDir: 'output',
319
- };
320
-
321
- // =============================================================================
322
- // Public API
323
- // =============================================================================
324
-
325
- /**
326
- * Merge journal formatting defaults into a config.
327
- * Priority: DEFAULT_CONFIG < journal formatting < rev.yaml explicit settings
328
- */
329
- export function mergeJournalFormatting(config: BuildConfig, formatting: JournalFormatting, directory: string): BuildConfig {
330
- const merged = { ...config };
331
-
332
- // CSL: only apply if user hasn't set one
333
- if (formatting.csl && !config.csl) {
334
- const resolved = resolveCSL(formatting.csl, directory);
335
- if (resolved) {
336
- merged.csl = resolved;
337
- }
338
- // If not resolved locally, store the name — pandoc --citeproc
339
- // can sometimes resolve it, and the user can fetch with rev profiles --fetch-csl
340
- if (!resolved) {
341
- merged.csl = formatting.csl;
342
- }
343
- }
344
-
345
- // PDF settings: merge only unset fields
346
- if (formatting.pdf) {
347
- const userPdf = config.pdf || {};
348
- const defaults = DEFAULT_CONFIG.pdf;
349
- merged.pdf = { ...config.pdf };
350
- for (const [key, value] of Object.entries(formatting.pdf)) {
351
- const k = key as keyof PdfConfig;
352
- // Apply journal value only if user config matches the default (i.e., wasn't explicitly set)
353
- if (value !== undefined && JSON.stringify(userPdf[k]) === JSON.stringify(defaults[k])) {
354
- (merged.pdf as Record<string, unknown>)[k] = value;
355
- }
356
- }
357
- }
358
-
359
- // DOCX settings: merge only unset fields
360
- if (formatting.docx) {
361
- const userDocx = config.docx || {};
362
- const defaults = DEFAULT_CONFIG.docx;
363
- merged.docx = { ...config.docx };
364
- for (const [key, value] of Object.entries(formatting.docx)) {
365
- const k = key as keyof DocxConfig;
366
- if (value !== undefined && JSON.stringify(userDocx[k]) === JSON.stringify(defaults[k])) {
367
- (merged.docx as Record<string, unknown>)[k] = value;
368
- }
369
- }
370
- }
371
-
372
- // Crossref settings: merge only unset fields
373
- if (formatting.crossref) {
374
- const userCrossref = config.crossref || {};
375
- const defaults = DEFAULT_CONFIG.crossref;
376
- merged.crossref = { ...config.crossref };
377
- for (const [key, value] of Object.entries(formatting.crossref)) {
378
- const k = key as keyof CrossrefConfig;
379
- if (value !== undefined && JSON.stringify(userCrossref[k]) === JSON.stringify(defaults[k])) {
380
- (merged.crossref as Record<string, unknown>)[k] = value;
381
- }
382
- }
383
- }
384
-
385
- return merged;
386
- }
387
-
388
- /**
389
- * In-place: copy `pandoc-args` → `pandocArgs` on an object (if not already set).
390
- * Idempotent. Coerces a single string into a one-element array.
391
- */
392
- function normalizePandocArgsKey(obj: Record<string, unknown>): void {
393
- if (!obj || typeof obj !== 'object') return;
394
- const hy = obj['pandoc-args'];
395
- if (hy === undefined) return;
396
- if (obj.pandocArgs === undefined) {
397
- obj.pandocArgs = Array.isArray(hy) ? hy : [hy];
398
- }
399
- delete obj['pandoc-args'];
400
- }
401
-
402
- /**
403
- * Load rev.yaml config from directory
404
- * @param directory - Project directory path
405
- * @returns Merged config with defaults
406
- * @throws {TypeError} If directory is not a string
407
- * @throws {Error} If rev.yaml exists but cannot be parsed
408
- */
409
- export function loadConfig(directory: string): BuildConfig {
410
- if (typeof directory !== 'string') {
411
- throw new TypeError(`directory must be a string, got ${typeof directory}`);
412
- }
413
-
414
- const configPath = path.join(directory, 'rev.yaml');
415
-
416
- if (!fs.existsSync(configPath)) {
417
- return { ...DEFAULT_CONFIG, _configPath: null };
418
- }
419
-
420
- try {
421
- const content = fs.readFileSync(configPath, 'utf-8');
422
- const userConfig = YAML.parse(content) || {};
423
-
424
- // Accept hyphenated `pandoc-args` (the form pandoc itself uses) in addition
425
- // to camelCase `pandocArgs`. Hyphenated is what we document; camelCase is
426
- // accepted for users who already prefer that convention.
427
- normalizePandocArgsKey(userConfig);
428
- for (const fmt of ['pdf', 'docx', 'tex', 'beamer', 'pptx'] as const) {
429
- if (userConfig[fmt] && typeof userConfig[fmt] === 'object') {
430
- normalizePandocArgsKey(userConfig[fmt]);
431
- }
432
- }
433
-
434
- // Deep merge with defaults
435
- let config: BuildConfig = {
436
- ...DEFAULT_CONFIG,
437
- ...userConfig,
438
- crossref: { ...DEFAULT_CONFIG.crossref, ...userConfig.crossref },
439
- pdf: { ...DEFAULT_CONFIG.pdf, ...userConfig.pdf },
440
- docx: { ...DEFAULT_CONFIG.docx, ...userConfig.docx },
441
- tex: { ...DEFAULT_CONFIG.tex, ...userConfig.tex },
442
- beamer: { ...DEFAULT_CONFIG.beamer, ...userConfig.beamer },
443
- pptx: { ...DEFAULT_CONFIG.pptx, ...userConfig.pptx },
444
- tables: { ...DEFAULT_CONFIG.tables, ...userConfig.tables },
445
- postprocess: { ...DEFAULT_CONFIG.postprocess, ...userConfig.postprocess },
446
- _configPath: configPath,
447
- };
448
-
449
- // Apply journal formatting defaults (between DEFAULT_CONFIG and user settings)
450
- if (userConfig.journal) {
451
- const profile = getJournalProfile(userConfig.journal);
452
- if (profile?.formatting) {
453
- config = mergeJournalFormatting(config, profile.formatting, directory);
454
- }
455
- }
456
-
457
- return config;
458
- } catch (err) {
459
- const error = err as Error;
460
- throw new Error(`Failed to parse rev.yaml: ${error.message}`);
461
- }
462
- }
463
-
464
- /**
465
- * Find section files in directory
466
- * @param directory - Project directory path
467
- * @param configSections - Sections from rev.yaml (optional)
468
- * @returns Ordered list of section file names
469
- * @throws {TypeError} If directory is not a string
470
- */
471
- export function findSections(directory: string, configSections: string[] = []): string[] {
472
- if (typeof directory !== 'string') {
473
- throw new TypeError(`directory must be a string, got ${typeof directory}`);
474
- }
475
-
476
- // If sections specified in config, use that order
477
- if (configSections.length > 0) {
478
- const sections: string[] = [];
479
- for (const section of configSections) {
480
- const filePath = path.join(directory, section);
481
- if (fs.existsSync(filePath)) {
482
- sections.push(section);
483
- } else {
484
- console.warn(`Warning: Section file not found: ${section}`);
485
- }
486
- }
487
- return sections;
488
- }
489
-
490
- // Try sections.yaml
491
- const sectionsYamlPath = path.join(directory, 'sections.yaml');
492
- if (fs.existsSync(sectionsYamlPath)) {
493
- try {
494
- const sectionsConfig = YAML.parse(fs.readFileSync(sectionsYamlPath, 'utf-8'));
495
- if (sectionsConfig.sections) {
496
- return Object.entries(sectionsConfig.sections)
497
- .sort((a: [string, any], b: [string, any]) => (a[1].order ?? 999) - (b[1].order ?? 999))
498
- .map(([file]) => file)
499
- .filter((f) => fs.existsSync(path.join(directory, f)));
500
- }
501
- } catch (e) {
502
- if (process.env.DEBUG) {
503
- const error = e as Error;
504
- console.warn('build: YAML parse error in sections.yaml:', error.message);
505
- }
506
- }
507
- }
508
-
509
- // Default: find all .md files except special ones
510
- const exclude = ['paper.md', 'readme.md', 'claude.md'];
511
- const files = fs.readdirSync(directory).filter((f) => {
512
- if (!f.endsWith('.md')) return false;
513
- if (exclude.includes(f.toLowerCase())) return false;
514
- return true;
515
- });
516
-
517
- // Sort alphabetically as fallback
518
- return files.sort();
519
- }
520
-
521
- /**
522
- * Combine section files into paper.md
523
- */
524
- export function combineSections(directory: string, config: BuildConfig, options: CombineOptions = {}): string {
525
- const sections = findSections(directory, config.sections);
526
-
527
- if (sections.length === 0) {
528
- throw new Error('No section files found. Create .md files or specify sections in rev.yaml');
529
- }
530
-
531
- const parts: string[] = [];
532
-
533
- // Add YAML frontmatter
534
- const frontmatter = buildFrontmatter(config);
535
- parts.push('---');
536
- parts.push(YAML.stringify(frontmatter).trim());
537
- parts.push('---');
538
- parts.push('');
539
-
540
- // Read all section contents for variable processing
541
- const sectionContents: string[] = [];
542
-
543
- // Check if we need to auto-inject references before supplementary
544
- // Pandoc places refs at the end by default, which breaks when supplementary follows
545
- const hasRefsSection = sections.some(s =>
546
- s.toLowerCase().includes('reference') || s.toLowerCase().includes('refs')
547
- );
548
- const suppIndex = sections.findIndex(s =>
549
- s.toLowerCase().includes('supp') || s.toLowerCase().includes('appendix')
550
- );
551
- const hasBibliography = config.bibliography && fs.existsSync(path.join(directory, config.bibliography));
552
-
553
- // Track if we find an explicit refs div in any section
554
- let hasExplicitRefsDiv = false;
555
-
556
- // Combine sections
557
- for (let i = 0; i < sections.length; i++) {
558
- const section = sections[i];
559
- if (!section) continue;
560
- const filePath = path.join(directory, section);
561
- let content = fs.readFileSync(filePath, 'utf-8');
562
-
563
- // Remove any existing frontmatter from section files
564
- content = stripFrontmatter(content);
565
- sectionContents.push(content);
566
-
567
- // Check if this section has an explicit refs div
568
- if (content.includes('::: {#refs}') || content.includes('::: {#refs}')) {
569
- hasExplicitRefsDiv = true;
570
- }
571
-
572
- // Auto-inject references before supplementary if needed
573
- if (i === suppIndex && hasBibliography && !hasRefsSection && !hasExplicitRefsDiv) {
574
- parts.push('# References\n');
575
- parts.push('::: {#refs}');
576
- parts.push(':::');
577
- parts.push('');
578
- parts.push('');
579
- options._refsAutoInjected = true;
580
- }
581
-
582
- parts.push(content.trim());
583
- parts.push('');
584
- parts.push(''); // Double newline between sections
585
- }
586
-
587
- let paperContent = parts.join('\n');
588
-
589
- // Process template variables if any exist
590
- if (hasVariables(paperContent)) {
591
- paperContent = processVariables(paperContent, config as any, { sectionContents });
592
- }
593
-
594
- // Resolve forward references (refs that appear before their anchor definition)
595
- // This fixes pandoc-crossref limitation with multi-file documents
596
- if (hasPandocCrossref()) {
597
- const registry = buildRegistry(directory, sections);
598
- const { text, resolved } = resolveForwardRefs(paperContent, registry);
599
- if (resolved.length > 0) {
600
- paperContent = text;
601
- // Store resolved count for optional reporting
602
- options._forwardRefsResolved = resolved.length;
603
- }
604
-
605
- // Resolve supplementary references and strip their anchors.
606
- // pandoc-crossref cannot produce "Figure S1" numbering — it numbers all
607
- // figures sequentially. We resolve supplementary refs to plain text and
608
- // remove the {#fig:...} attributes so crossref ignores them.
609
- const supp = resolveSupplementaryRefs(paperContent, registry);
610
- if (supp.resolved.length > 0) {
611
- paperContent = supp.text;
612
- }
613
- }
614
-
615
- const paperPath = path.join(directory, 'paper.md');
616
-
617
- fs.writeFileSync(paperPath, paperContent, 'utf-8');
618
-
619
- return paperPath;
620
- }
621
-
622
- /**
623
- * Build YAML frontmatter from config
624
- */
625
- function buildFrontmatter(config: BuildConfig): Record<string, unknown> {
626
- const fm: Record<string, unknown> = {};
627
-
628
- if (config.title) fm.title = config.title;
629
-
630
- // Skip author in frontmatter when using numbered affiliations —
631
- // the author block is injected separately per format
632
- if (config.authors && config.authors.length > 0 && !hasNumberedAffiliations(config)) {
633
- fm.author = config.authors;
634
- }
635
-
636
- if (config.bibliography) {
637
- fm.bibliography = config.bibliography;
638
- }
639
-
640
- if (config.csl) {
641
- fm.csl = config.csl;
642
- }
643
-
644
- return fm;
645
- }
646
-
647
- /**
648
- * Strip YAML frontmatter from content
649
- */
650
- function stripFrontmatter(content: string): string {
651
- const match = content.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n/);
652
- if (match) {
653
- return content.slice(match[0].length);
654
- }
655
- return content;
656
- }
657
-
658
- /**
659
- * Check if config uses numbered affiliation mode
660
- * (authors have `affiliations` arrays and an affiliations map is defined)
661
- */
662
- function hasNumberedAffiliations(config: BuildConfig): boolean {
663
- if (!config.affiliations || Object.keys(config.affiliations).length === 0) return false;
664
- return config.authors.some(a => typeof a !== 'string' && a.affiliations && a.affiliations.length > 0);
665
- }
666
-
667
- /**
668
- * Generate LaTeX author block using authblk package for numbered superscript affiliations.
669
- * Returns LaTeX code to be injected via header-includes.
670
- */
671
- function generateLatexAuthorBlock(config: BuildConfig): string {
672
- const lines: string[] = [];
673
- lines.push('\\usepackage{authblk}');
674
- lines.push('\\renewcommand\\Authfont{\\normalsize}');
675
- lines.push('\\renewcommand\\Affilfont{\\small}');
676
- lines.push('');
677
-
678
- // Map affiliation keys to numbers
679
- const affiliationKeys = Object.keys(config.affiliations);
680
- const keyToNum = new Map<string, number>();
681
- affiliationKeys.forEach((key, i) => keyToNum.set(key, i + 1));
682
-
683
- // Authors
684
- for (const author of config.authors) {
685
- if (typeof author === 'string') {
686
- lines.push(`\\author{${author}}`);
687
- continue;
688
- }
689
- const marks = (author.affiliations || [])
690
- .map(k => keyToNum.get(k))
691
- .filter((n): n is number => n !== undefined);
692
-
693
- const markStr = marks.length > 0 ? `[${marks.join(',')}]` : '';
694
- let nameStr = author.name;
695
- if (author.corresponding && author.email) {
696
- nameStr += `\\thanks{Corresponding author: ${author.email}}`;
697
- } else if (author.corresponding) {
698
- nameStr += '\\thanks{Corresponding author}';
699
- }
700
- lines.push(`\\author${markStr}{${nameStr}}`);
701
- }
702
-
703
- // Affiliations
704
- for (const [key, text] of Object.entries(config.affiliations)) {
705
- const num = keyToNum.get(key);
706
- if (num !== undefined) {
707
- lines.push(`\\affil[${num}]{${text}}`);
708
- }
709
- }
710
-
711
- return lines.join('\n');
712
- }
713
-
714
- /**
715
- * Generate markdown author block for DOCX output with superscript affiliations.
716
- * Returns markdown text to insert after the YAML frontmatter.
717
- */
718
- function generateMarkdownAuthorBlock(config: BuildConfig): string {
719
- const lines: string[] = [];
720
-
721
- // Map affiliation keys to numbers
722
- const affiliationKeys = Object.keys(config.affiliations);
723
- const keyToNum = new Map<string, number>();
724
- affiliationKeys.forEach((key, i) => keyToNum.set(key, i + 1));
725
-
726
- // Author line: Name^1,2^, Name^3^, ...
727
- const authorParts: string[] = [];
728
- for (const author of config.authors) {
729
- if (typeof author === 'string') {
730
- authorParts.push(author);
731
- continue;
732
- }
733
- const marks = (author.affiliations || [])
734
- .map(k => keyToNum.get(k))
735
- .filter((n): n is number => n !== undefined);
736
- let entry = author.name;
737
- const superParts = marks.map(String);
738
- if (author.corresponding) superParts.push('\\*');
739
- if (superParts.length > 0) {
740
- entry += `^${superParts.join(',')}^`;
741
- }
742
- authorParts.push(entry);
743
- }
744
- lines.push(authorParts.join(', '));
745
- lines.push('');
746
-
747
- // Affiliation lines: ^1^ Department of ...
748
- const affiliationEntries = Object.entries(config.affiliations);
749
- const useLineBreaks = config.docx.affiliationNewline !== false;
750
- affiliationEntries.forEach(([key, text], idx) => {
751
- const num = keyToNum.get(key);
752
- if (num !== undefined) {
753
- const isLast = idx === affiliationEntries.length - 1;
754
- const suffix = useLineBreaks && !isLast ? '\\' : '';
755
- lines.push(`^${num}^ ${text}${suffix}`);
756
- }
757
- });
758
-
759
- // Corresponding author footnote
760
- const corresponding = config.authors.find(a => typeof a !== 'string' && a.corresponding) as Author | undefined;
761
- if (corresponding?.email) {
762
- lines.push('');
763
- lines.push(`^\\*^ Corresponding author: ${corresponding.email}`);
764
- }
765
-
766
- lines.push('');
767
- return lines.join('\n');
768
- }
769
-
770
- /**
771
- * Process markdown tables to apply nowrap formatting to specified columns.
772
- * Converts distribution notation (Normal, Student-t, Gamma) to LaTeX math.
773
- * @param content - Markdown content
774
- * @param tablesConfig - tables config from rev.yaml
775
- * @param format - output format (pdf, docx, etc.)
776
- * @returns processed content
777
- */
778
- export function processTablesForFormat(content: string, tablesConfig: TablesConfig, format: string): string {
779
- // Only process for PDF/TeX output
780
- if (format !== 'pdf' && format !== 'tex') {
781
- return content;
782
- }
783
-
784
- // Check if we have nowrap columns configured
785
- if (!tablesConfig?.nowrap?.length) {
786
- return content;
787
- }
788
-
789
- const nowrapPatterns = tablesConfig.nowrap.map((p) => p.toLowerCase());
790
-
791
- // Match pipe tables: header row, separator row, body rows
792
- // Header: | Col1 | Col2 | Col3 |
793
- // Separator: |:-----|:-----|:-----|
794
- // Body: | val1 | val2 | val3 |
795
- const tableRegex = /^(\|[^\n]+\|\r?\n\|[-:| ]+\|\r?\n)((?:\|[^\n]+\|\r?\n?)+)/gm;
796
-
797
- return content.replace(tableRegex, (match, headerAndSep, body) => {
798
- // Split header from separator
799
- const lines = headerAndSep.split(/\r?\n/);
800
- const headerLine = lines[0] ?? '';
801
-
802
- // Parse header cells to find nowrap column indices
803
- const headerCells = headerLine
804
- .split('|')
805
- .slice(1, -1)
806
- .map((c: string) => c.trim().toLowerCase());
807
-
808
- const nowrapCols: number[] = [];
809
- headerCells.forEach((cell: string, i: number) => {
810
- if (nowrapPatterns.some((p) => cell.includes(p))) {
811
- nowrapCols.push(i);
812
- }
813
- });
814
-
815
- // If no nowrap columns found in this table, return unchanged
816
- if (nowrapCols.length === 0) {
817
- return match;
818
- }
819
-
820
- // Process body rows
821
- const bodyLines = body.split(/\r?\n/).filter((l: string) => l.trim());
822
- const processedBody = bodyLines
823
- .map((row: string) => {
824
- // Split row into cells, keeping the pipe structure
825
- const cells = row.split('|');
826
- // cells[0] is empty (before first |), cells[last] is empty (after last |)
827
-
828
- nowrapCols.forEach((colIdx) => {
829
- const cellIdx = colIdx + 1; // Account for empty first element
830
- if (cells[cellIdx] !== undefined) {
831
- const cellContent = cells[cellIdx].trim();
832
-
833
- // Skip if empty, already math, or already has LaTeX commands
834
- if (!cellContent || cellContent.startsWith('$') || cellContent.startsWith('\\')) {
835
- return;
836
- }
837
-
838
- // Convert distribution notation to LaTeX math
839
- // Order matters: compound names (Half-Normal) must come before simple names (Normal)
840
- let processed = cellContent;
841
-
842
- // Half-Normal(x) $\text{Half-Normal}(x)$ (must come before Normal)
843
- processed = processed.replace(/Half-Normal\(([^)]+)\)/g, '$\\text{Half-Normal}($1)$');
844
-
845
- // Normal(x, y) → $\mathcal{N}(x, y)$
846
- processed = processed.replace(/Normal\(([^)]+)\)/g, '$\\mathcal{N}($1)$');
847
-
848
- // Student-t(df, loc, scale) → $t_{df}(loc, scale)$
849
- processed = processed.replace(/Student-t\((\d+),\s*([^)]+)\)/g, '$t_{$1}($2)$');
850
-
851
- // Gamma(a, b) → $\text{Gamma}(a, b)$
852
- processed = processed.replace(/Gamma\(([^)]+)\)/g, '$\\text{Gamma}($1)$');
853
-
854
- // Exponential(x) → $\text{Exp}(x)$
855
- processed = processed.replace(/Exponential\(([^)]+)\)/g, '$\\text{Exp}($1)$');
856
-
857
- // Update cell with padding
858
- cells[cellIdx] = ` ${processed} `;
859
- }
860
- });
861
-
862
- return cells.join('|');
863
- })
864
- .join('\n');
865
-
866
- return headerAndSep + processedBody + '\n';
867
- });
868
- }
869
-
870
- /**
871
- * Apply format-specific transforms (table normalization, author blocks,
872
- * crossref display conversion, slide syntax). Caller is responsible for
873
- * stripping annotations beforehand — the dual-output paths keep comments
874
- * in the markdown stream and need to apply these transforms separately
875
- * from annotation handling.
876
- *
877
- * @param content - Markdown content (annotations already stripped as needed)
878
- * @param format - Output format
879
- * @param config - Build config
880
- * @param registry - Crossref registry for the project
881
- * @returns Transformed markdown
882
- */
883
- export function applyFormatTransforms(
884
- content: string,
885
- format: string,
886
- config: BuildConfig,
887
- registry: Registry
888
- ): string {
889
- if (format === 'pdf' || format === 'tex') {
890
- content = processTablesForFormat(content, config.tables, format);
891
-
892
- if (hasNumberedAffiliations(config)) {
893
- const latexBlock = generateLatexAuthorBlock(config);
894
- content = content.replace(/^(---\r?\n[\s\S]*?)(---\r?\n)/, (_match, yamlContent, closing) => {
895
- return `${yamlContent}header-includes: |\n${latexBlock.split('\n').map(l => ' ' + l).join('\n')}\n${closing}`;
896
- });
897
- }
898
- } else if (format === 'docx') {
899
- content = convertDynamicRefsToDisplay(content, registry);
900
-
901
- // Pandoc strips raw LaTeX in docx output. Translate the common
902
- // `\begin{figure}...\end{figure}` shape to portable markdown so figures
903
- // actually appear; exotic blocks are left alone (warned about in build()).
904
- if (config.docx?.translateRawFigures !== false) {
905
- const { translated } = translateRawLatexFigures(content);
906
- content = translated;
907
- }
908
-
909
- if (hasNumberedAffiliations(config)) {
910
- const mdBlock = generateMarkdownAuthorBlock(config);
911
- content = content.replace(/^(---\r?\n[\s\S]*?---\r?\n)/, `$1\n${mdBlock}\n`);
912
- }
913
- } else if (format === 'beamer' || format === 'pptx') {
914
- if (hasSlideSyntax(content)) {
915
- content = processSlideMarkdown(content, format);
916
- }
917
- }
918
-
919
- return content;
920
- }
921
-
922
- /**
923
- * Prepare paper.md for specific output format
924
- */
925
- export function prepareForFormat(
926
- paperPath: string,
927
- format: string,
928
- config: BuildConfig,
929
- _options: BuildOptions = {}
930
- ): string {
931
- const directory = path.dirname(paperPath);
932
- let content = fs.readFileSync(paperPath, 'utf-8');
933
-
934
- // Build crossref registry for reference conversion
935
- // Pass sections from config to ensure correct file ordering
936
- const registry = buildRegistry(directory, config.sections);
937
-
938
- // Strip annotations per format
939
- if (format === 'docx') {
940
- content = stripAnnotations(content, { keepComments: config.docx.keepComments });
941
- } else {
942
- content = stripAnnotations(content);
943
- }
944
-
945
- // Apply shared format transforms
946
- content = applyFormatTransforms(content, format, config, registry);
947
-
948
- // Write to temporary file
949
- const preparedPath = path.join(directory, `.paper-${format}.md`);
950
- fs.writeFileSync(preparedPath, content, 'utf-8');
951
-
952
- return preparedPath;
953
- }
954
-
955
- /**
956
- * Convert @fig:label references to display format (Figure 1)
957
- */
958
- function convertDynamicRefsToDisplay(text: string, registry: Registry): string {
959
- const refs = detectDynamicRefs(text);
960
-
961
- // Process in reverse order to preserve positions
962
- let result = text;
963
- for (let i = refs.length - 1; i >= 0; i--) {
964
- const ref = refs[i];
965
- if (!ref) continue;
966
- const display = labelToDisplay(ref.type, ref.label, registry as any);
967
-
968
- if (display) {
969
- result = result.slice(0, ref.position) + display + result.slice(ref.position + ref.match.length);
970
- }
971
- }
972
-
973
- return result;
974
- }
975
-
976
- // =============================================================================
977
- // Raw LaTeX figure detection / translation (docx)
978
- // =============================================================================
979
-
980
- /**
981
- * A raw LaTeX `\begin{figure}...\end{figure}` block found in source markdown.
982
- * `exotic` blocks contain features we don't auto-translate (multiple
983
- * `\includegraphics`, `\subfloat`, `\rotatebox`, unrecognised width units);
984
- * pandoc strips raw LaTeX silently in docx output, so users get warned about
985
- * anything that won't be translated.
986
- */
987
- export interface RawLatexFigure {
988
- file?: string;
989
- line: number;
990
- block: string;
991
- exotic: boolean;
992
- }
993
-
994
- /** Match `\begin{figure}` / `\begin{figure*}` … `\end{figure}` blocks. */
995
- function makeRawFigureRegex(): RegExp {
996
- return /\\begin\{figure\*?\}(?:\[[^\]]*\])?[\s\S]*?\\end\{figure\*?\}/g;
997
- }
998
-
999
- /**
1000
- * Convert a LaTeX width spec to a markdown image attribute value.
1001
- * - `0.8\textwidth` `80%`
1002
- * - `\linewidth` `100%`
1003
- * - `8cm`, `2in`, `12pt` → kept verbatim
1004
- * Returns null for anything we don't translate (block stays "exotic").
1005
- */
1006
- function convertLatexWidth(raw: string): string | null {
1007
- const trimmed = raw.trim();
1008
- // Coefficient × relative length
1009
- const rel = trimmed.match(/^([\d.]+)\s*\\(textwidth|linewidth|columnwidth)$/);
1010
- if (rel) {
1011
- const pct = Math.round(parseFloat(rel[1]!) * 100);
1012
- if (!isFinite(pct) || pct <= 0) return null;
1013
- return `${pct}%`;
1014
- }
1015
- // Bare relative length
1016
- if (/^\\(textwidth|linewidth|columnwidth)$/.test(trimmed)) return '100%';
1017
- // Absolute units
1018
- if (/^[\d.]+\s*(cm|mm|in|pt|px|em|ex)$/.test(trimmed)) return trimmed.replace(/\s+/g, '');
1019
- return null;
1020
- }
1021
-
1022
- /** Extract a balanced `{...}` argument that follows `command` in `text`. */
1023
- function extractBracedArg(text: string, command: string): string | null {
1024
- const idx = text.indexOf(command);
1025
- if (idx === -1) return null;
1026
- let i = idx + command.length;
1027
- while (i < text.length && /\s/.test(text[i]!)) i++;
1028
- if (text[i] !== '{') return null;
1029
- i++;
1030
- const start = i;
1031
- let depth = 1;
1032
- while (i < text.length) {
1033
- const ch = text[i]!;
1034
- if (ch === '\\' && i + 1 < text.length) { i += 2; continue; }
1035
- if (ch === '{') depth++;
1036
- else if (ch === '}') {
1037
- depth--;
1038
- if (depth === 0) return text.slice(start, i);
1039
- }
1040
- i++;
1041
- }
1042
- return null;
1043
- }
1044
-
1045
- /** True if a `\begin{figure}` block contains features we don't auto-translate. */
1046
- function isExoticFigureBlock(block: string): boolean {
1047
- if (/\\subfloat\b/.test(block)) return true;
1048
- if (/\\rotatebox\b/.test(block)) return true;
1049
- const includes = (block.match(/\\includegraphics\b/g) || []).length;
1050
- if (includes !== 1) return true;
1051
- const m = block.match(/\\includegraphics\s*(?:\[([^\]]*)\])?\s*\{([^}]+)\}/);
1052
- if (!m) return true;
1053
- const opts = m[1] || '';
1054
- const widthMatch = opts.match(/(?:^|,)\s*width\s*=\s*([^,]+)/);
1055
- if (widthMatch && !convertLatexWidth(widthMatch[1]!)) return true;
1056
- return false;
1057
- }
1058
-
1059
- /**
1060
- * Find raw LaTeX figure blocks containing `\includegraphics` in markdown.
1061
- * `file`, if given, is attached to each result. `line` is 1-based within the
1062
- * supplied content (the line where `\begin{figure}` sits).
1063
- */
1064
- export function detectRawLatexFigures(content: string, file?: string): RawLatexFigure[] {
1065
- const figures: RawLatexFigure[] = [];
1066
- const re = makeRawFigureRegex();
1067
- let m: RegExpExecArray | null;
1068
- while ((m = re.exec(content)) !== null) {
1069
- const block = m[0];
1070
- if (!block.includes('\\includegraphics')) continue;
1071
- const line = content.slice(0, m.index).split(/\r?\n/).length;
1072
- figures.push({ file, line, block, exotic: isExoticFigureBlock(block) });
1073
- }
1074
- return figures;
1075
- }
1076
-
1077
- /**
1078
- * Translate the 80% case: single `\includegraphics` figure with optional
1079
- * `\caption{...}` and `\label{...}`, wrapped in `\begin{figure}...\end{figure}`,
1080
- * to portable `![caption](path){#fig:label width=N%}` markdown. Exotic blocks
1081
- * (see `isExoticFigureBlock`) are left untouched.
1082
- */
1083
- export function translateRawLatexFigures(content: string): { translated: string; translatedCount: number } {
1084
- let translatedCount = 0;
1085
- const re = makeRawFigureRegex();
1086
- const translated = content.replace(re, (block) => {
1087
- if (!block.includes('\\includegraphics')) return block;
1088
- if (isExoticFigureBlock(block)) return block;
1089
-
1090
- const inc = block.match(/\\includegraphics\s*(?:\[([^\]]*)\])?\s*\{([^}]+)\}/);
1091
- if (!inc) return block;
1092
- const optsStr = inc[1] || '';
1093
- const imgPath = inc[2]!.trim();
1094
-
1095
- let width: string | undefined;
1096
- const widthMatch = optsStr.match(/(?:^|,)\s*width\s*=\s*([^,]+)/);
1097
- if (widthMatch) {
1098
- const w = convertLatexWidth(widthMatch[1]!);
1099
- if (!w) return block; // already filtered by isExoticFigureBlock, defensive
1100
- width = w;
1101
- }
1102
-
1103
- const caption = (extractBracedArg(block, '\\caption') ?? '').trim();
1104
- const labelRaw = extractBracedArg(block, '\\label');
1105
-
1106
- const attrs: string[] = [];
1107
- if (labelRaw) {
1108
- const label = labelRaw.trim();
1109
- const labelWithPrefix = /^[a-z]+:/i.test(label) ? label : `fig:${label}`;
1110
- attrs.push(`#${labelWithPrefix}`);
1111
- }
1112
- if (width) attrs.push(`width=${width}`);
1113
-
1114
- translatedCount++;
1115
- const attrStr = attrs.length > 0 ? ` {${attrs.join(' ')}}` : '';
1116
- return `![${caption}](${imgPath})${attrStr}`;
1117
- });
1118
- return { translated, translatedCount };
1119
- }
1120
-
1121
- /**
1122
- * Format the warning surfaced for raw LaTeX figure blocks that won't render
1123
- * in docx. `translateEnabled` reflects whether auto-translate ran (true = the
1124
- * listed blocks are exotic leftovers; false = no translation was attempted).
1125
- */
1126
- function formatRawLatexFigureWarning(figs: RawLatexFigure[], translateEnabled: boolean): string {
1127
- const reason = translateEnabled ? 'too complex to auto-translate' : 'translateRawFigures: false';
1128
- const lines: string[] = [
1129
- `${figs.length} raw LaTeX figure block(s) won't render in docx (${reason}).`,
1130
- ];
1131
- for (const f of figs) {
1132
- const loc = f.file ? `${f.file}:${f.line}` : `line ${f.line}`;
1133
- const pathMatch = f.block.match(/\\includegraphics\s*(?:\[[^\]]*\])?\s*\{([^}]+)\}/);
1134
- const pathInfo = pathMatch ? ` ${pathMatch[1]!.trim()}` : '';
1135
- lines.push(` ${loc}${pathInfo}`);
1136
- }
1137
- lines.push(' Hint: use ![caption](path){#fig:label width=80%} for format-portable figures,');
1138
- lines.push(' or pass --pandoc-arg=--lua-filter=<your.lua> to translate them yourself.');
1139
- return lines.join('\n');
1140
- }
1141
-
1142
- /**
1143
- * Walk section files and gather a warning for any raw LaTeX figure blocks that
1144
- * won't survive the docx build. Returns null when there's nothing to warn about.
1145
- */
1146
- export function collectRawLatexFigureWarning(directory: string, config: BuildConfig): string | null {
1147
- const translateEnabled = config.docx?.translateRawFigures !== false;
1148
- const all: RawLatexFigure[] = [];
1149
- for (const section of findSections(directory, config.sections)) {
1150
- const sectionPath = path.join(directory, section);
1151
- if (!fs.existsSync(sectionPath)) continue;
1152
- try {
1153
- const content = fs.readFileSync(sectionPath, 'utf-8');
1154
- const figs = detectRawLatexFigures(content, section);
1155
- for (const f of figs) {
1156
- // When auto-translate is on, non-exotic blocks get rewritten cleanly —
1157
- // only the exotic leftovers need warning. When opted out, everything
1158
- // is at risk and we warn about every block.
1159
- if (translateEnabled && !f.exotic) continue;
1160
- all.push(f);
1161
- }
1162
- } catch {
1163
- // ignore unreadable sections
1164
- }
1165
- }
1166
- if (all.length === 0) return null;
1167
- return formatRawLatexFigureWarning(all, translateEnabled);
1168
- }
1169
-
1170
- /**
1171
- * Build pandoc arguments for format.
1172
- *
1173
- * Returns only the built-in args derived from config. Passthrough args
1174
- * (config.pandocArgs, config[format].pandocArgs, CLI --pandoc-arg) are
1175
- * appended later in runPandoc so they win against pptx/crossref defaults
1176
- * added there.
1177
- */
1178
- export function buildPandocArgs(format: string, config: BuildConfig, outputPath: string): string[] {
1179
- const args: string[] = [];
1180
-
1181
- // Output format
1182
- if (format === 'tex') {
1183
- args.push('-t', 'latex');
1184
- if (config.tex.standalone) {
1185
- args.push('-s');
1186
- }
1187
- } else if (format === 'pdf') {
1188
- args.push('-t', 'pdf');
1189
- } else if (format === 'docx') {
1190
- args.push('-t', 'docx');
1191
- } else if (format === 'beamer') {
1192
- args.push('-t', 'beamer');
1193
- } else if (format === 'pptx') {
1194
- args.push('-t', 'pptx');
1195
- }
1196
-
1197
- // Output file. runPandoc sets cwd to the project directory and passes a
1198
- // path relative to that cwd; passing it through here unchanged lets pandoc
1199
- // write to subdirectories like output/<title-slug>.<ext>.
1200
- args.push('-o', outputPath);
1201
-
1202
- // Crossref filter (if available) - skip for slides
1203
- if (hasPandocCrossref() && format !== 'beamer' && format !== 'pptx') {
1204
- args.push('--filter', 'pandoc-crossref');
1205
- }
1206
-
1207
- // Bibliography
1208
- if (config.bibliography) {
1209
- args.push('--citeproc');
1210
- }
1211
-
1212
- // Format-specific options
1213
- if (format === 'pdf') {
1214
- if (config.pdf.template) {
1215
- args.push('--template', config.pdf.template);
1216
- }
1217
- if (config.pdf.engine) {
1218
- args.push(`--pdf-engine=${config.pdf.engine}`);
1219
- }
1220
- if (config.pdf.mainfont) {
1221
- args.push('-V', `mainfont=${config.pdf.mainfont}`);
1222
- }
1223
- if (config.pdf.sansfont) {
1224
- args.push('-V', `sansfont=${config.pdf.sansfont}`);
1225
- }
1226
- if (config.pdf.monofont) {
1227
- args.push('-V', `monofont=${config.pdf.monofont}`);
1228
- }
1229
- args.push('-V', `documentclass=${config.pdf.documentclass}`);
1230
- args.push('-V', `fontsize=${config.pdf.fontsize}`);
1231
- args.push('-V', `geometry:${config.pdf.geometry}`);
1232
- if (config.pdf.headerIncludes) {
1233
- args.push('-H', config.pdf.headerIncludes);
1234
- }
1235
- if (config.pdf.linestretch !== 1) {
1236
- args.push('-V', `linestretch=${config.pdf.linestretch}`);
1237
- }
1238
- if (config.pdf.numbersections) {
1239
- args.push('--number-sections');
1240
- }
1241
- if (config.pdf.toc) {
1242
- args.push('--toc');
1243
- }
1244
- } else if (format === 'docx') {
1245
- if (config.docx.reference) {
1246
- args.push('--reference-doc', config.docx.reference);
1247
- }
1248
- if (config.docx.toc) {
1249
- args.push('--toc');
1250
- }
1251
- } else if (format === 'beamer') {
1252
- // Beamer slide options
1253
- const beamer = config.beamer || {};
1254
- if (beamer.theme) {
1255
- args.push('-V', `theme=${beamer.theme}`);
1256
- }
1257
- if (beamer.colortheme) {
1258
- args.push('-V', `colortheme=${beamer.colortheme}`);
1259
- }
1260
- if (beamer.fonttheme) {
1261
- args.push('-V', `fonttheme=${beamer.fonttheme}`);
1262
- }
1263
- if (beamer.aspectratio) {
1264
- args.push('-V', `aspectratio=${beamer.aspectratio}`);
1265
- }
1266
- if (beamer.navigation) {
1267
- args.push('-V', `navigation=${beamer.navigation}`);
1268
- }
1269
- // Speaker notes - default to 'show' which creates presenter view PDF
1270
- // Options: 'show' (dual screen), 'only' (notes only), 'hide' (no notes), false (disabled)
1271
- const notesMode = beamer.notes !== undefined ? beamer.notes : 'show';
1272
- if (notesMode && notesMode !== 'hide') {
1273
- args.push('-V', `classoption=notes=${notesMode}`);
1274
- }
1275
- // Fit images within slide bounds (default: true)
1276
- if (beamer.fit_images !== false) {
1277
- const fitImagesHeader = `\\makeatletter
1278
- \\def\\maxwidth{\\ifdim\\Gin@nat@width>\\linewidth\\linewidth\\else\\Gin@nat@width\\fi}
1279
- \\def\\maxheight{\\ifdim\\Gin@nat@height>0.75\\textheight 0.75\\textheight\\else\\Gin@nat@height\\fi}
1280
- \\makeatother
1281
- \\setkeys{Gin}{width=\\maxwidth,height=\\maxheight,keepaspectratio}`;
1282
- args.push('-V', `header-includes=${fitImagesHeader}`);
1283
- }
1284
- // Slides need standalone
1285
- args.push('-s');
1286
- } else if (format === 'pptx') {
1287
- // PowerPoint options - handled separately in preparePptxTemplate
1288
- // Reference doc is set by caller after template generation
1289
- }
1290
-
1291
- return args;
1292
- }
1293
-
1294
- /**
1295
- * Collect passthrough pandoc args for a format in the canonical order:
1296
- * top-level config → format-specific config → CLI extras. Later wins for
1297
- * repeated flags.
1298
- */
1299
- export function collectPandocPassthroughArgs(
1300
- format: string,
1301
- config: BuildConfig,
1302
- extraArgs: string[] = []
1303
- ): string[] {
1304
- const out: string[] = [];
1305
- if (config.pandocArgs && config.pandocArgs.length > 0) {
1306
- out.push(...config.pandocArgs);
1307
- }
1308
- const formatConfig = (config as unknown as Record<string, { pandocArgs?: string[] } | undefined>)[format];
1309
- if (formatConfig?.pandocArgs && formatConfig.pandocArgs.length > 0) {
1310
- out.push(...formatConfig.pandocArgs);
1311
- }
1312
- if (extraArgs.length > 0) {
1313
- out.push(...extraArgs);
1314
- }
1315
- return out;
1316
- }
1317
-
1318
- /**
1319
- * Write crossref.yaml if needed
1320
- */
1321
- function ensureCrossrefConfig(directory: string, config: BuildConfig): void {
1322
- const crossrefPath = path.join(directory, 'crossref.yaml');
1323
-
1324
- if (!fs.existsSync(crossrefPath) && hasPandocCrossref()) {
1325
- fs.writeFileSync(crossrefPath, YAML.stringify(config.crossref), 'utf-8');
1326
- }
1327
- }
1328
-
1329
- /**
1330
- * Get install instructions for missing dependency
1331
- */
1332
- function getInstallInstructions(tool: string): string {
1333
- const instructions: Record<string, string> = {
1334
- pandoc: 'https://pandoc.org/installing.html',
1335
- latex: 'https://www.latex-project.org/get/',
1336
- };
1337
- return instructions[tool] || 'Check documentation';
1338
- }
1339
-
1340
- /**
1341
- * Resolve the absolute directory where final outputs should land.
1342
- * Honors config.outputDir; falls back to the project directory when null/empty.
1343
- */
1344
- export function resolveOutputDir(directory: string, config: BuildConfig): string {
1345
- const out = config.outputDir;
1346
- if (!out) return directory;
1347
- return path.isAbsolute(out) ? out : path.join(directory, out);
1348
- }
1349
-
1350
- /** File extension (with leading dot) for each supported pandoc format. */
1351
- const FORMAT_EXTENSIONS: Record<string, string> = {
1352
- tex: '.tex',
1353
- pdf: '.pdf',
1354
- docx: '.docx',
1355
- beamer: '.pdf',
1356
- pptx: '.pptx',
1357
- };
1358
-
1359
- /** Get file extension for a format, defaulting to `.pdf`. */
1360
- export function getFormatExtension(format: string): string {
1361
- return FORMAT_EXTENSIONS[format] ?? '.pdf';
1362
- }
1363
-
1364
- /**
1365
- * Slugify a title for use as a default output filename. Lowercases, replaces
1366
- * non-alphanumeric runs with `-`, and truncates at the last `-` boundary
1367
- * at-or-before MAX_TITLE_FILENAME_LENGTH so words stay whole (the old blind
1368
- * `.slice` cut mid-word).
1369
- */
1370
- export function slugifyTitle(title: string): string {
1371
- if (!title) return 'paper';
1372
- const slug = title.toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-+|-+$/g, '');
1373
- if (!slug) return 'paper';
1374
- if (slug.length <= MAX_TITLE_FILENAME_LENGTH) return slug;
1375
- const cut = slug.slice(0, MAX_TITLE_FILENAME_LENGTH);
1376
- const lastDash = cut.lastIndexOf('-');
1377
- // Only truncate at a hyphen if it leaves a reasonable amount of content.
1378
- // Otherwise hard-cut (handles degenerate titles with no spaces at all).
1379
- if (lastDash >= MAX_TITLE_FILENAME_LENGTH / 2) {
1380
- return slug.slice(0, lastDash);
1381
- }
1382
- return cut;
1383
- }
1384
-
1385
- /**
1386
- * Ensure `name` ends with `ext` (case-insensitive). If the user already supplied
1387
- * the correct extension, return unchanged; if they supplied none or a different
1388
- * one, append the format's canonical extension.
1389
- *
1390
- * Different-extension case (e.g. `output.docx` when building tex): we append
1391
- * rather than replace, since stripping looks like an unsafe guess. The result
1392
- * `output.docx.tex` is loud enough to flag the misconfiguration.
1393
- */
1394
- function ensureExtension(name: string, ext: string): string {
1395
- if (name.toLowerCase().endsWith(ext.toLowerCase())) return name;
1396
- return name + ext;
1397
- }
1398
-
1399
- /**
1400
- * Resolve the final output path for a build.
1401
- *
1402
- * Priority: `options.outputPath` (internal force) > `cliOverride` (-o flag) >
1403
- * `config.output[format]` > slugified title fallback.
1404
- *
1405
- * Relative paths from `cliOverride`/`config.output` resolve under outputDir;
1406
- * absolute paths bypass outputDir. The fallback path always lives under
1407
- * outputDir.
1408
- *
1409
- * @param suffix - Appended before the extension (e.g. "-changes", "-slides").
1410
- * Suppressed when user supplied an explicit name via CLI or
1411
- * config — they pick their own suffix.
1412
- */
1413
- export function resolveOutputPath(
1414
- directory: string,
1415
- config: BuildConfig,
1416
- format: string,
1417
- options: { cliOverride?: string; suffix?: string } = {}
1418
- ): string {
1419
- const { cliOverride, suffix = '' } = options;
1420
- const ext = getFormatExtension(format);
1421
-
1422
- const explicit = cliOverride ?? config.output?.[format];
1423
- if (explicit) {
1424
- const baseDir = path.isAbsolute(explicit)
1425
- ? path.dirname(explicit)
1426
- : resolveOutputDir(directory, config);
1427
- const baseName = path.basename(explicit);
1428
- const stem = baseName.replace(/\.[^./\\]+$/, '');
1429
- return path.join(baseDir, ensureExtension(`${stem}${suffix}`, ext));
1430
- }
1431
-
1432
- const slug = slugifyTitle(config.title);
1433
- return path.join(resolveOutputDir(directory, config), `${slug}${suffix}${ext}`);
1434
- }
1435
-
1436
- /**
1437
- * Run pandoc build
1438
- */
1439
- export async function runPandoc(
1440
- inputPath: string,
1441
- format: string,
1442
- config: BuildConfig,
1443
- options: BuildOptions = {}
1444
- ): Promise<PandocResult> {
1445
- const directory = path.dirname(inputPath);
1446
-
1447
- // outputPath (internal force) wins over the resolver. For beamer, we keep
1448
- // the `-slides` suffix on the slug fallback to distinguish from a regular
1449
- // PDF build; when the user supplies an explicit name, they pick their own.
1450
- const suffix = format === 'beamer' ? '-slides' : '';
1451
- const outputPath = options.outputPath
1452
- ?? resolveOutputPath(directory, config, format, {
1453
- cliOverride: options.output,
1454
- suffix,
1455
- });
1456
-
1457
- if (!options.outputPath) {
1458
- const outDir = path.dirname(outputPath);
1459
- if (!fs.existsSync(outDir)) {
1460
- fs.mkdirSync(outDir, { recursive: true });
1461
- }
1462
- }
1463
-
1464
- // Ensure crossref.yaml exists
1465
- ensureCrossrefConfig(directory, config);
1466
-
1467
- // Pandoc runs with cwd = directory, so pass the output path relative to it.
1468
- const args = buildPandocArgs(format, config, path.relative(directory, outputPath) || path.basename(outputPath));
1469
-
1470
- // Handle PPTX reference template and themes
1471
- let pptxMediaDir: string | null = null;
1472
- if (format === 'pptx') {
1473
- const pptx = config.pptx || {};
1474
-
1475
- // Determine media directory (default: pptx/media or slides/media)
1476
- let mediaDir = pptx.media;
1477
- if (!mediaDir) {
1478
- if (fs.existsSync(path.join(directory, 'pptx', 'media'))) {
1479
- mediaDir = path.join(directory, 'pptx', 'media');
1480
- } else if (fs.existsSync(path.join(directory, 'slides', 'media'))) {
1481
- mediaDir = path.join(directory, 'slides', 'media');
1482
- }
1483
- } else if (!path.isAbsolute(mediaDir)) {
1484
- mediaDir = path.join(directory, mediaDir);
1485
- }
1486
- pptxMediaDir = mediaDir || null;
1487
-
1488
- // Determine reference doc: custom reference overrides theme
1489
- let referenceDoc: string | null = null;
1490
- if (pptx.reference && fs.existsSync(path.join(directory, pptx.reference))) {
1491
- // Custom reference doc takes precedence
1492
- referenceDoc = path.join(directory, pptx.reference);
1493
- } else {
1494
- // Use built-in theme (default: 'default')
1495
- const themeName = pptx.theme || 'default';
1496
- const themePath = getThemePath(themeName);
1497
- if (themePath && fs.existsSync(themePath)) {
1498
- referenceDoc = themePath;
1499
- }
1500
- }
1501
-
1502
- if (referenceDoc) {
1503
- args.push('--reference-doc', referenceDoc);
1504
- }
1505
-
1506
- // Add color filter for PPTX (handles [text]{color=#RRGGBB} syntax)
1507
- const colorFilterPath = path.join(path.dirname(new URL(import.meta.url).pathname.replace(/^\/([A-Z]:)/, '$1')), 'pptx-color-filter.lua');
1508
- if (fs.existsSync(colorFilterPath)) {
1509
- args.push('--lua-filter', colorFilterPath);
1510
- }
1511
- }
1512
-
1513
- // Add crossref metadata file if exists (skip for slides - they don't use crossref)
1514
- if (format !== 'beamer' && format !== 'pptx') {
1515
- const crossrefPath = path.join(directory, 'crossref.yaml');
1516
- if (fs.existsSync(crossrefPath) && hasPandocCrossref()) {
1517
- // Use basename since we set cwd to directory
1518
- args.push('--metadata-file', 'crossref.yaml');
1519
- }
1520
- }
1521
-
1522
- // Passthrough args go last so they win against built-in defaults.
1523
- args.push(...collectPandocPassthroughArgs(format, config, options.pandocArgs));
1524
-
1525
- // Input file (use basename since we set cwd to directory)
1526
- args.push(path.basename(inputPath));
1527
-
1528
- if (options.verbose) {
1529
- const quoted = args.map(a => /[\s"'$`]/.test(a) ? `"${a.replace(/"/g, '\\"')}"` : a).join(' ');
1530
- console.error(`[pandoc ${format}] (cwd: ${directory})`);
1531
- console.error(` pandoc ${quoted}`);
1532
- }
1533
-
1534
- return new Promise((resolve) => {
1535
- const pandoc: ChildProcess = spawn('pandoc', args, {
1536
- cwd: directory,
1537
- stdio: ['ignore', 'pipe', 'pipe'],
1538
- });
1539
-
1540
- let stderr = '';
1541
- pandoc.stderr?.on('data', (data) => {
1542
- stderr += data.toString();
1543
- });
1544
-
1545
- pandoc.on('close', async (code) => {
1546
- if (code === 0) {
1547
- // For PPTX, post-process to add slide numbers, buildup colors, and logos
1548
- if (format === 'pptx') {
1549
- try {
1550
- // Inject slide numbers into content slides only
1551
- await injectSlideNumbers(outputPath);
1552
- } catch (e) {
1553
- // Slide number injection failed but output was created
1554
- }
1555
- try {
1556
- // Apply colors (default text color, title color, buildup greying)
1557
- const pptxConfig = config.pptx || {};
1558
- const colorsConfig = pptxConfig.colors || {};
1559
- const buildupConfig = pptxConfig.buildup || {};
1560
- // Merge colors and buildup config for applyBuildupColors
1561
- const colorConfig = {
1562
- default: colorsConfig.default,
1563
- title: colorsConfig.title,
1564
- grey: buildupConfig.grey,
1565
- accent: buildupConfig.accent,
1566
- enabled: buildupConfig.enabled
1567
- };
1568
- await applyBuildupColors(outputPath, colorConfig);
1569
- } catch (e) {
1570
- // Color application failed but output was created
1571
- }
1572
- // Inject logos into cover slide (if media dir configured)
1573
- if (pptxMediaDir) {
1574
- try {
1575
- await injectMediaIntoPptx(outputPath, pptxMediaDir);
1576
- } catch (e) {
1577
- // Logo injection failed but output was created
1578
- }
1579
- }
1580
- }
1581
-
1582
- // Run user postprocess scripts
1583
- const postResult = await runPostprocess(outputPath, format, config as unknown as Parameters<typeof runPostprocess>[2], options);
1584
- if (!postResult.success && options.verbose) {
1585
- console.error(`Postprocess warning: ${postResult.error}`);
1586
- }
1587
-
1588
- resolve({ outputPath, success: true });
1589
- } else {
1590
- resolve({ outputPath, success: false, error: stderr || `Exit code ${code}` });
1591
- }
1592
- });
1593
-
1594
- pandoc.on('error', (err) => {
1595
- resolve({ outputPath, success: false, error: err.message });
1596
- });
1597
- });
1598
- }
1599
-
1600
- /**
1601
- * Full build pipeline
1602
- */
1603
- export async function build(
1604
- directory: string,
1605
- formats: string[] = ['pdf', 'docx'],
1606
- options: BuildOptions = {}
1607
- ): Promise<FullBuildResult> {
1608
- const warnings: string[] = [];
1609
- let forwardRefsResolved = 0;
1610
-
1611
- // Check pandoc
1612
- if (!hasPandoc()) {
1613
- const instruction = getInstallInstructions('pandoc');
1614
- throw new Error(`Pandoc not found. Install with: ${instruction}\nOr run: rev doctor`);
1615
- }
1616
-
1617
- // Check LaTeX if PDF is requested
1618
- if ((formats.includes('pdf') || formats.includes('all')) && !hasLatex()) {
1619
- warnings.push(`LaTeX not found - PDF generation may fail. Install with: ${getInstallInstructions('latex')}`);
1620
- }
1621
-
1622
- // Check pandoc-crossref
1623
- if (!hasPandocCrossref()) {
1624
- warnings.push('pandoc-crossref not found - figure/table numbering will not work');
1625
- }
1626
-
1627
- // Load config (use passed config if provided, otherwise load from file)
1628
- const config = options.config || loadConfig(directory);
1629
-
1630
- // Combine sections paper.md
1631
- const buildOptions: CombineOptions = { ...options };
1632
- const paperPath = combineSections(directory, config, buildOptions);
1633
- forwardRefsResolved = buildOptions._forwardRefsResolved || 0;
1634
- const refsAutoInjected = buildOptions._refsAutoInjected || false;
1635
-
1636
- // Expand 'all' to all formats
1637
- if (formats.includes('all')) {
1638
- formats = ['pdf', 'docx', 'tex'];
1639
- }
1640
-
1641
- // Build and save image registry when DOCX is being built
1642
- // This allows import to restore proper image syntax from Word documents
1643
- if (formats.includes('docx')) {
1644
- const paperContent = fs.readFileSync(paperPath, 'utf-8');
1645
- const crossrefReg = buildRegistry(directory, config.sections);
1646
- const imageReg = buildImageRegistry(paperContent, crossrefReg as any);
1647
- if ((imageReg as any).figures?.length > 0) {
1648
- writeImageRegistry(directory, imageReg);
1649
- }
1650
-
1651
- // Warn about raw LaTeX figure blocks that won't render in docx (pandoc
1652
- // drops them silently). With auto-translate on (default), this surfaces
1653
- // only the exotic leftovers; with it off, every block.
1654
- const rawFigWarning = collectRawLatexFigureWarning(directory, config);
1655
- if (rawFigWarning) warnings.push(rawFigWarning);
1656
- }
1657
-
1658
- const results: BuildResult[] = [];
1659
-
1660
- for (const format of formats) {
1661
- // Prepare format-specific version
1662
- const preparedPath = prepareForFormat(paperPath, format, config, options);
1663
-
1664
- // Run pandoc
1665
- const result = await runPandoc(preparedPath, format, config, options);
1666
- results.push({ format, ...result });
1667
-
1668
- // Clean up temp file
1669
- try {
1670
- fs.unlinkSync(preparedPath);
1671
- } catch {
1672
- // Ignore cleanup errors
1673
- }
1674
- }
1675
-
1676
- return { results, paperPath, warnings, forwardRefsResolved, refsAutoInjected };
1677
- }
1678
-
1679
- /**
1680
- * Get build status summary
1681
- */
1682
- export function formatBuildResults(results: BuildResult[]): string {
1683
- const lines: string[] = [];
1684
-
1685
- for (const r of results) {
1686
- if (r.success) {
1687
- lines.push(` ${r.format.toUpperCase()}: ${path.basename(r.outputPath!)}`);
1688
- } else {
1689
- lines.push(` ${r.format.toUpperCase()}: FAILED - ${r.error}`);
1690
- }
1691
- }
1692
-
1693
- return lines.join('\n');
1694
- }
1
+ /**
2
+ * Build system - combines sections → paper.md → PDF/DOCX/TEX
3
+ *
4
+ * Features:
5
+ * - Reads rev.yaml config
6
+ * - Combines section files into paper.md (persisted)
7
+ * - Strips annotations appropriately per output format
8
+ * - Runs pandoc with crossref filter
9
+ */
10
+
11
+ import * as fs from 'fs';
12
+ import * as path from 'path';
13
+ import { fileURLToPath } from 'url';
14
+ import { execSync, spawn, ChildProcess } from 'child_process';
15
+ import YAML from 'yaml';
16
+ import { stripAnnotations } from './annotations.js';
17
+ import { buildRegistry, labelToDisplay, detectDynamicRefs, resolveForwardRefs, resolveSupplementaryRefs } from './crossref.js';
18
+ import { processVariables, hasVariables } from './variables.js';
19
+ import { processSlideMarkdown, hasSlideSyntax } from './slides.js';
20
+ import { generatePptxTemplate, templateNeedsRegeneration, injectMediaIntoPptx, injectSlideNumbers, applyThemeFonts, applyCentering, applyBuildupColors } from './pptx-template.js';
21
+ import { getThemePath, getThemeNames, PPTX_THEMES } from './pptx-themes.js';
22
+ import { runPostprocess } from './postprocess.js';
23
+ import { hasPandoc, hasPandocCrossref, hasLatex } from './dependencies.js';
24
+ import { buildImageRegistry, writeImageRegistry } from './image-registry.js';
25
+ import type { Author, JournalFormatting } from './types.js';
26
+ import { getJournalProfile } from './journals.js';
27
+ import { resolveCSL } from './csl.js';
28
+ import {
29
+ type MacroDef,
30
+ mergeMacros,
31
+ generateLatexPreamble,
32
+ writeMacrosSidecar,
33
+ getMacroFilterPath,
34
+ } from './macros.js';
35
+
36
+ // =============================================================================
37
+ // Constants
38
+ // =============================================================================
39
+
40
+ /** Supported output formats */
41
+ const SUPPORTED_FORMATS = ['pdf', 'docx', 'tex', 'beamer', 'pptx'] as const;
42
+
43
+ /**
44
+ * Maximum length for slugified-title output filenames. Only used when no
45
+ * explicit `output:` filename is configured. Long titles are truncated at the
46
+ * last `-` boundary at-or-before this length so words stay intact (the old
47
+ * blind `.slice(0, 50)` cut mid-word).
48
+ */
49
+ const MAX_TITLE_FILENAME_LENGTH = 80;
50
+
51
+ // =============================================================================
52
+ // Interfaces
53
+ // =============================================================================
54
+
55
+ export interface CrossrefConfig {
56
+ figureTitle?: string;
57
+ tableTitle?: string;
58
+ figPrefix?: string | string[];
59
+ tblPrefix?: string | string[];
60
+ secPrefix?: string | string[];
61
+ linkReferences?: boolean;
62
+ }
63
+
64
+ export interface PdfConfig {
65
+ template?: string | null;
66
+ headerIncludes?: string | null;
67
+ documentclass?: string;
68
+ fontsize?: string;
69
+ geometry?: string;
70
+ linestretch?: number;
71
+ numbersections?: boolean;
72
+ toc?: boolean;
73
+ /**
74
+ * LaTeX engine: pdflatex (default), xelatex, lualatex, tectonic, etc.
75
+ * xelatex/lualatex are required for native UTF-8 rendering of Latin-Extended
76
+ * diacritics (Czech/Polish/Croatian/Spanish author names, species epithets).
77
+ */
78
+ engine?: string;
79
+ /** Roman/serif main font (xelatex/lualatex only — uses fontspec). */
80
+ mainfont?: string;
81
+ /** Sans-serif font (xelatex/lualatex only). */
82
+ sansfont?: string;
83
+ /** Monospace font (xelatex/lualatex only). */
84
+ monofont?: string;
85
+ /** Extra pandoc args appended for this format (after top-level pandocArgs). */
86
+ pandocArgs?: string[];
87
+ }
88
+
89
+ export interface DocxConfig {
90
+ reference?: string | null;
91
+ keepComments?: boolean;
92
+ affiliationNewline?: boolean;
93
+ toc?: boolean;
94
+ pandocArgs?: string[];
95
+ /**
96
+ * Auto-translate the common-shape raw `\begin{figure}...\end{figure}` block
97
+ * to portable `![caption](path){#fig:label width=N%}` markdown so figures
98
+ * survive the docx build (pandoc otherwise drops raw LaTeX silently).
99
+ * Default true. Set false to opt out — blocks then warn and are left alone.
100
+ */
101
+ translateRawFigures?: boolean;
102
+ }
103
+
104
+ export interface TexConfig {
105
+ standalone?: boolean;
106
+ pandocArgs?: string[];
107
+ }
108
+
109
+ export interface BeamerConfig {
110
+ theme?: string;
111
+ colortheme?: string | null;
112
+ fonttheme?: string | null;
113
+ aspectratio?: string | null;
114
+ navigation?: string | null;
115
+ section?: boolean;
116
+ notes?: string | false;
117
+ fit_images?: boolean;
118
+ pandocArgs?: string[];
119
+ }
120
+
121
+ export interface PptxConfig {
122
+ theme?: string;
123
+ reference?: string | null;
124
+ media?: string | null;
125
+ colors?: {
126
+ default?: string;
127
+ title?: string;
128
+ };
129
+ buildup?: {
130
+ grey?: string;
131
+ accent?: string;
132
+ enabled?: boolean;
133
+ };
134
+ pandocArgs?: string[];
135
+ }
136
+
137
+ export interface TablesConfig {
138
+ nowrap?: string[];
139
+ }
140
+
141
+ export interface PostprocessConfig {
142
+ pdf?: string | null;
143
+ docx?: string | null;
144
+ tex?: string | null;
145
+ pptx?: string | null;
146
+ beamer?: string | null;
147
+ all?: string | null;
148
+ [key: string]: string | null | undefined;
149
+ }
150
+
151
+ export interface BuildConfig {
152
+ title: string;
153
+ authors: (string | Author)[];
154
+ affiliations: Record<string, string>;
155
+ sections: string[];
156
+ bibliography: string | null;
157
+ csl: string | null;
158
+ crossref: CrossrefConfig;
159
+ pdf: PdfConfig;
160
+ docx: DocxConfig;
161
+ tex: TexConfig;
162
+ beamer: BeamerConfig;
163
+ pptx: PptxConfig;
164
+ tables: TablesConfig;
165
+ postprocess: PostprocessConfig;
166
+ /**
167
+ * User-declared placeholder macros. Merged with the built-in macros
168
+ * (currently \tofill). Each entry overrides a built-in by name.
169
+ *
170
+ * See lib/macros.ts for the per-format rendering rules.
171
+ */
172
+ macros?: MacroDef[];
173
+ /**
174
+ * Directory (relative to the project) where final outputs land. Created on
175
+ * demand. Set to null/empty to keep outputs alongside paper.md (legacy
176
+ * behavior).
177
+ */
178
+ outputDir?: string | null;
179
+ /**
180
+ * Per-format output filenames. Keys are format names (pdf/docx/tex/beamer/
181
+ * pptx); values are paths. Relative paths resolve under outputDir; absolute
182
+ * paths are honored as-is. Extension is added if missing. CLI `-o` wins
183
+ * over this map.
184
+ */
185
+ output?: Record<string, string>;
186
+ /**
187
+ * Extra pandoc args applied to every format. Format-specific args
188
+ * (e.g. docx.pandocArgs) are appended *after* these, and CLI --pandoc-arg
189
+ * values are appended last.
190
+ */
191
+ pandocArgs?: string[];
192
+ _configPath?: string | null;
193
+ }
194
+
195
+ export interface BuildResult {
196
+ format: string;
197
+ success: boolean;
198
+ outputPath?: string;
199
+ error?: string;
200
+ }
201
+
202
+ interface BuildOptions {
203
+ verbose?: boolean;
204
+ config?: BuildConfig;
205
+ /**
206
+ * Internal: forces the exact output path. Used by dual-mode/temp builds that
207
+ * route to specific temp files. Bypasses the `output:` resolver.
208
+ */
209
+ outputPath?: string;
210
+ /**
211
+ * CLI override (`-o, --output <path>`). Beats `config.output[format]` but
212
+ * loses to `options.outputPath`. Relative paths resolve under outputDir;
213
+ * absolute paths bypass outputDir.
214
+ */
215
+ output?: string;
216
+ crossref?: boolean;
217
+ /** Extra pandoc args from CLI (--pandoc-arg). Appended after config args. */
218
+ pandocArgs?: string[];
219
+ _refsAutoInjected?: boolean;
220
+ _forwardRefsResolved?: number;
221
+ }
222
+
223
+ interface CombineOptions extends BuildOptions {
224
+ _refsAutoInjected?: boolean;
225
+ }
226
+
227
+ interface VariablesContext {
228
+ sectionContents: string[];
229
+ }
230
+
231
+ interface PandocResult {
232
+ outputPath: string;
233
+ success: boolean;
234
+ error?: string;
235
+ }
236
+
237
+ interface FullBuildResult {
238
+ results: BuildResult[];
239
+ paperPath: string;
240
+ warnings: string[];
241
+ forwardRefsResolved: number;
242
+ refsAutoInjected?: boolean;
243
+ }
244
+
245
+ interface DynamicRef {
246
+ type: string;
247
+ label: string;
248
+ match: string;
249
+ position: number;
250
+ }
251
+
252
+ interface Registry {
253
+ figures: Map<string, unknown>;
254
+ tables: Map<string, unknown>;
255
+ equations: Map<string, unknown>;
256
+ byNumber: {
257
+ fig?: Map<number, string>;
258
+ figS?: Map<number, string>;
259
+ tbl?: Map<number, string>;
260
+ tblS?: Map<number, string>;
261
+ eq?: Map<number, string>;
262
+ };
263
+ }
264
+
265
+ /**
266
+ * Default rev.yaml configuration
267
+ */
268
+ export const DEFAULT_CONFIG: BuildConfig = {
269
+ title: 'Untitled Document',
270
+ authors: [],
271
+ affiliations: {},
272
+ sections: [],
273
+ bibliography: null,
274
+ csl: null,
275
+ crossref: {
276
+ figureTitle: 'Figure',
277
+ tableTitle: 'Table',
278
+ figPrefix: ['Fig.', 'Figs.'],
279
+ tblPrefix: ['Table', 'Tables'],
280
+ secPrefix: ['Section', 'Sections'],
281
+ linkReferences: true,
282
+ },
283
+ pdf: {
284
+ template: null,
285
+ documentclass: 'article',
286
+ fontsize: '12pt',
287
+ geometry: 'margin=1in',
288
+ linestretch: 1.5,
289
+ numbersections: false,
290
+ toc: false,
291
+ },
292
+ docx: {
293
+ reference: null,
294
+ keepComments: false,
295
+ affiliationNewline: true,
296
+ toc: false,
297
+ translateRawFigures: true,
298
+ },
299
+ tex: {
300
+ standalone: true,
301
+ },
302
+ // Slide formats
303
+ beamer: {
304
+ theme: 'default',
305
+ colortheme: null,
306
+ fonttheme: null,
307
+ aspectratio: null, // '169' for 16:9, '43' for 4:3
308
+ navigation: null, // 'horizontal', 'vertical', 'frame', 'empty'
309
+ section: true, // section divider slides
310
+ notes: 'show', // 'show' (presenter view), 'only' (notes only), 'hide', or false
311
+ fit_images: true, // scale images to fit within slide bounds
312
+ },
313
+ pptx: {
314
+ theme: 'default', // Built-in theme: default, dark, academic, minimal, corporate
315
+ reference: null, // Custom reference-doc (overrides theme)
316
+ media: null, // directory with logo images (e.g., logo-left.png, logo-right.png)
317
+ },
318
+ // Table formatting
319
+ tables: {
320
+ nowrap: [], // Column headers to apply nowrap formatting (converts Normal() → $\mathcal{N}()$ etc.)
321
+ },
322
+ // Postprocess scripts
323
+ postprocess: {
324
+ pdf: null,
325
+ docx: null,
326
+ tex: null,
327
+ pptx: null,
328
+ beamer: null,
329
+ all: null, // Runs after any format
330
+ },
331
+ // Placeholder/highlight macros. Defaults are the built-ins from
332
+ // lib/macros.ts; users append their own here.
333
+ macros: [],
334
+ // Final outputs land here (created on demand). Set to null or '' to keep
335
+ // outputs in the project root.
336
+ outputDir: 'output',
337
+ };
338
+
339
+ // =============================================================================
340
+ // Public API
341
+ // =============================================================================
342
+
343
+ /**
344
+ * Merge journal formatting defaults into a config.
345
+ * Priority: DEFAULT_CONFIG < journal formatting < rev.yaml explicit settings
346
+ */
347
+ export function mergeJournalFormatting(config: BuildConfig, formatting: JournalFormatting, directory: string): BuildConfig {
348
+ const merged = { ...config };
349
+
350
+ // CSL: only apply if user hasn't set one
351
+ if (formatting.csl && !config.csl) {
352
+ const resolved = resolveCSL(formatting.csl, directory);
353
+ if (resolved) {
354
+ merged.csl = resolved;
355
+ }
356
+ // If not resolved locally, store the name — pandoc --citeproc
357
+ // can sometimes resolve it, and the user can fetch with rev profiles --fetch-csl
358
+ if (!resolved) {
359
+ merged.csl = formatting.csl;
360
+ }
361
+ }
362
+
363
+ // PDF settings: merge only unset fields
364
+ if (formatting.pdf) {
365
+ const userPdf = config.pdf || {};
366
+ const defaults = DEFAULT_CONFIG.pdf;
367
+ merged.pdf = { ...config.pdf };
368
+ for (const [key, value] of Object.entries(formatting.pdf)) {
369
+ const k = key as keyof PdfConfig;
370
+ // Apply journal value only if user config matches the default (i.e., wasn't explicitly set)
371
+ if (value !== undefined && JSON.stringify(userPdf[k]) === JSON.stringify(defaults[k])) {
372
+ (merged.pdf as Record<string, unknown>)[k] = value;
373
+ }
374
+ }
375
+ }
376
+
377
+ // DOCX settings: merge only unset fields
378
+ if (formatting.docx) {
379
+ const userDocx = config.docx || {};
380
+ const defaults = DEFAULT_CONFIG.docx;
381
+ merged.docx = { ...config.docx };
382
+ for (const [key, value] of Object.entries(formatting.docx)) {
383
+ const k = key as keyof DocxConfig;
384
+ if (value !== undefined && JSON.stringify(userDocx[k]) === JSON.stringify(defaults[k])) {
385
+ (merged.docx as Record<string, unknown>)[k] = value;
386
+ }
387
+ }
388
+ }
389
+
390
+ // Crossref settings: merge only unset fields
391
+ if (formatting.crossref) {
392
+ const userCrossref = config.crossref || {};
393
+ const defaults = DEFAULT_CONFIG.crossref;
394
+ merged.crossref = { ...config.crossref };
395
+ for (const [key, value] of Object.entries(formatting.crossref)) {
396
+ const k = key as keyof CrossrefConfig;
397
+ if (value !== undefined && JSON.stringify(userCrossref[k]) === JSON.stringify(defaults[k])) {
398
+ (merged.crossref as Record<string, unknown>)[k] = value;
399
+ }
400
+ }
401
+ }
402
+
403
+ return merged;
404
+ }
405
+
406
+ /**
407
+ * In-place: copy `pandoc-args` `pandocArgs` on an object (if not already set).
408
+ * Idempotent. Coerces a single string into a one-element array.
409
+ */
410
+ function normalizePandocArgsKey(obj: Record<string, unknown>): void {
411
+ if (!obj || typeof obj !== 'object') return;
412
+ const hy = obj['pandoc-args'];
413
+ if (hy === undefined) return;
414
+ if (obj.pandocArgs === undefined) {
415
+ obj.pandocArgs = Array.isArray(hy) ? hy : [hy];
416
+ }
417
+ delete obj['pandoc-args'];
418
+ }
419
+
420
+ /**
421
+ * Load rev.yaml config from directory
422
+ * @param directory - Project directory path
423
+ * @returns Merged config with defaults
424
+ * @throws {TypeError} If directory is not a string
425
+ * @throws {Error} If rev.yaml exists but cannot be parsed
426
+ */
427
+ export function loadConfig(directory: string): BuildConfig {
428
+ if (typeof directory !== 'string') {
429
+ throw new TypeError(`directory must be a string, got ${typeof directory}`);
430
+ }
431
+
432
+ const configPath = path.join(directory, 'rev.yaml');
433
+
434
+ if (!fs.existsSync(configPath)) {
435
+ return { ...DEFAULT_CONFIG, _configPath: null };
436
+ }
437
+
438
+ try {
439
+ const content = fs.readFileSync(configPath, 'utf-8');
440
+ const userConfig = YAML.parse(content) || {};
441
+
442
+ // Accept hyphenated `pandoc-args` (the form pandoc itself uses) in addition
443
+ // to camelCase `pandocArgs`. Hyphenated is what we document; camelCase is
444
+ // accepted for users who already prefer that convention.
445
+ normalizePandocArgsKey(userConfig);
446
+ for (const fmt of ['pdf', 'docx', 'tex', 'beamer', 'pptx'] as const) {
447
+ if (userConfig[fmt] && typeof userConfig[fmt] === 'object') {
448
+ normalizePandocArgsKey(userConfig[fmt]);
449
+ }
450
+ }
451
+
452
+ // Deep merge with defaults
453
+ let config: BuildConfig = {
454
+ ...DEFAULT_CONFIG,
455
+ ...userConfig,
456
+ crossref: { ...DEFAULT_CONFIG.crossref, ...userConfig.crossref },
457
+ pdf: { ...DEFAULT_CONFIG.pdf, ...userConfig.pdf },
458
+ docx: { ...DEFAULT_CONFIG.docx, ...userConfig.docx },
459
+ tex: { ...DEFAULT_CONFIG.tex, ...userConfig.tex },
460
+ beamer: { ...DEFAULT_CONFIG.beamer, ...userConfig.beamer },
461
+ pptx: { ...DEFAULT_CONFIG.pptx, ...userConfig.pptx },
462
+ tables: { ...DEFAULT_CONFIG.tables, ...userConfig.tables },
463
+ postprocess: { ...DEFAULT_CONFIG.postprocess, ...userConfig.postprocess },
464
+ _configPath: configPath,
465
+ };
466
+
467
+ // Apply journal formatting defaults (between DEFAULT_CONFIG and user settings)
468
+ if (userConfig.journal) {
469
+ const profile = getJournalProfile(userConfig.journal);
470
+ if (profile?.formatting) {
471
+ config = mergeJournalFormatting(config, profile.formatting, directory);
472
+ }
473
+ }
474
+
475
+ return config;
476
+ } catch (err) {
477
+ const error = err as Error;
478
+ throw new Error(`Failed to parse rev.yaml: ${error.message}`);
479
+ }
480
+ }
481
+
482
+ /**
483
+ * Find section files in directory
484
+ * @param directory - Project directory path
485
+ * @param configSections - Sections from rev.yaml (optional)
486
+ * @returns Ordered list of section file names
487
+ * @throws {TypeError} If directory is not a string
488
+ */
489
+ export function findSections(directory: string, configSections: string[] = []): string[] {
490
+ if (typeof directory !== 'string') {
491
+ throw new TypeError(`directory must be a string, got ${typeof directory}`);
492
+ }
493
+
494
+ // If sections specified in config, use that order
495
+ if (configSections.length > 0) {
496
+ const sections: string[] = [];
497
+ for (const section of configSections) {
498
+ const filePath = path.join(directory, section);
499
+ if (fs.existsSync(filePath)) {
500
+ sections.push(section);
501
+ } else {
502
+ console.warn(`Warning: Section file not found: ${section}`);
503
+ }
504
+ }
505
+ return sections;
506
+ }
507
+
508
+ // Try sections.yaml
509
+ const sectionsYamlPath = path.join(directory, 'sections.yaml');
510
+ if (fs.existsSync(sectionsYamlPath)) {
511
+ try {
512
+ const sectionsConfig = YAML.parse(fs.readFileSync(sectionsYamlPath, 'utf-8'));
513
+ if (sectionsConfig.sections) {
514
+ return Object.entries(sectionsConfig.sections)
515
+ .sort((a: [string, any], b: [string, any]) => (a[1].order ?? 999) - (b[1].order ?? 999))
516
+ .map(([file]) => file)
517
+ .filter((f) => fs.existsSync(path.join(directory, f)));
518
+ }
519
+ } catch (e) {
520
+ if (process.env.DEBUG) {
521
+ const error = e as Error;
522
+ console.warn('build: YAML parse error in sections.yaml:', error.message);
523
+ }
524
+ }
525
+ }
526
+
527
+ // Default: find all .md files except special ones
528
+ const exclude = ['paper.md', 'readme.md', 'claude.md'];
529
+ const files = fs.readdirSync(directory).filter((f) => {
530
+ if (!f.endsWith('.md')) return false;
531
+ if (exclude.includes(f.toLowerCase())) return false;
532
+ return true;
533
+ });
534
+
535
+ // Sort alphabetically as fallback
536
+ return files.sort();
537
+ }
538
+
539
+ /**
540
+ * Combine section files into paper.md
541
+ */
542
+ export function combineSections(directory: string, config: BuildConfig, options: CombineOptions = {}): string {
543
+ const sections = findSections(directory, config.sections);
544
+
545
+ if (sections.length === 0) {
546
+ throw new Error('No section files found. Create .md files or specify sections in rev.yaml');
547
+ }
548
+
549
+ const parts: string[] = [];
550
+
551
+ // Add YAML frontmatter
552
+ const frontmatter = buildFrontmatter(config);
553
+ parts.push('---');
554
+ parts.push(YAML.stringify(frontmatter).trim());
555
+ parts.push('---');
556
+ parts.push('');
557
+
558
+ // Read all section contents for variable processing
559
+ const sectionContents: string[] = [];
560
+
561
+ // Check if we need to auto-inject references before supplementary
562
+ // Pandoc places refs at the end by default, which breaks when supplementary follows
563
+ const hasRefsSection = sections.some(s =>
564
+ s.toLowerCase().includes('reference') || s.toLowerCase().includes('refs')
565
+ );
566
+ const suppIndex = sections.findIndex(s =>
567
+ s.toLowerCase().includes('supp') || s.toLowerCase().includes('appendix')
568
+ );
569
+ const hasBibliography = config.bibliography && fs.existsSync(path.join(directory, config.bibliography));
570
+
571
+ // Track if we find an explicit refs div in any section
572
+ let hasExplicitRefsDiv = false;
573
+
574
+ // Combine sections
575
+ for (let i = 0; i < sections.length; i++) {
576
+ const section = sections[i];
577
+ if (!section) continue;
578
+ const filePath = path.join(directory, section);
579
+ let content = fs.readFileSync(filePath, 'utf-8');
580
+
581
+ // Remove any existing frontmatter from section files
582
+ content = stripFrontmatter(content);
583
+ sectionContents.push(content);
584
+
585
+ // Check if this section has an explicit refs div
586
+ if (content.includes('::: {#refs}') || content.includes('::: {#refs}')) {
587
+ hasExplicitRefsDiv = true;
588
+ }
589
+
590
+ // Auto-inject references before supplementary if needed
591
+ if (i === suppIndex && hasBibliography && !hasRefsSection && !hasExplicitRefsDiv) {
592
+ parts.push('# References\n');
593
+ parts.push('::: {#refs}');
594
+ parts.push(':::');
595
+ parts.push('');
596
+ parts.push('');
597
+ options._refsAutoInjected = true;
598
+ }
599
+
600
+ parts.push(content.trim());
601
+ parts.push('');
602
+ parts.push(''); // Double newline between sections
603
+ }
604
+
605
+ let paperContent = parts.join('\n');
606
+
607
+ // Process template variables if any exist
608
+ if (hasVariables(paperContent)) {
609
+ paperContent = processVariables(paperContent, config as any, { sectionContents });
610
+ }
611
+
612
+ // Resolve forward references (refs that appear before their anchor definition)
613
+ // This fixes pandoc-crossref limitation with multi-file documents
614
+ if (hasPandocCrossref()) {
615
+ const registry = buildRegistry(directory, sections);
616
+ const { text, resolved } = resolveForwardRefs(paperContent, registry);
617
+ if (resolved.length > 0) {
618
+ paperContent = text;
619
+ // Store resolved count for optional reporting
620
+ options._forwardRefsResolved = resolved.length;
621
+ }
622
+
623
+ // Resolve supplementary references and strip their anchors.
624
+ // pandoc-crossref cannot produce "Figure S1" numbering — it numbers all
625
+ // figures sequentially. We resolve supplementary refs to plain text and
626
+ // remove the {#fig:...} attributes so crossref ignores them.
627
+ const supp = resolveSupplementaryRefs(paperContent, registry);
628
+ if (supp.resolved.length > 0) {
629
+ paperContent = supp.text;
630
+ }
631
+ }
632
+
633
+ const paperPath = path.join(directory, 'paper.md');
634
+
635
+ fs.writeFileSync(paperPath, paperContent, 'utf-8');
636
+
637
+ return paperPath;
638
+ }
639
+
640
+ /**
641
+ * Build YAML frontmatter from config
642
+ */
643
+ function buildFrontmatter(config: BuildConfig): Record<string, unknown> {
644
+ const fm: Record<string, unknown> = {};
645
+
646
+ if (config.title) fm.title = config.title;
647
+
648
+ // Skip author in frontmatter when using numbered affiliations —
649
+ // the author block is injected separately per format
650
+ if (config.authors && config.authors.length > 0 && !hasNumberedAffiliations(config)) {
651
+ fm.author = config.authors;
652
+ }
653
+
654
+ if (config.bibliography) {
655
+ fm.bibliography = config.bibliography;
656
+ }
657
+
658
+ if (config.csl) {
659
+ fm.csl = config.csl;
660
+ }
661
+
662
+ return fm;
663
+ }
664
+
665
+ /**
666
+ * Strip YAML frontmatter from content
667
+ */
668
+ function stripFrontmatter(content: string): string {
669
+ const match = content.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n/);
670
+ if (match) {
671
+ return content.slice(match[0].length);
672
+ }
673
+ return content;
674
+ }
675
+
676
+ /**
677
+ * Check if config uses numbered affiliation mode
678
+ * (authors have `affiliations` arrays and an affiliations map is defined)
679
+ */
680
+ function hasNumberedAffiliations(config: BuildConfig): boolean {
681
+ if (!config.affiliations || Object.keys(config.affiliations).length === 0) return false;
682
+ return config.authors.some(a => typeof a !== 'string' && a.affiliations && a.affiliations.length > 0);
683
+ }
684
+
685
+ /**
686
+ * Generate LaTeX author block using authblk package for numbered superscript affiliations.
687
+ * Returns LaTeX code to be injected via header-includes.
688
+ */
689
+ function generateLatexAuthorBlock(config: BuildConfig): string {
690
+ const lines: string[] = [];
691
+ lines.push('\\usepackage{authblk}');
692
+ lines.push('\\renewcommand\\Authfont{\\normalsize}');
693
+ lines.push('\\renewcommand\\Affilfont{\\small}');
694
+ lines.push('');
695
+
696
+ // Map affiliation keys to numbers
697
+ const affiliationKeys = Object.keys(config.affiliations);
698
+ const keyToNum = new Map<string, number>();
699
+ affiliationKeys.forEach((key, i) => keyToNum.set(key, i + 1));
700
+
701
+ // Authors
702
+ for (const author of config.authors) {
703
+ if (typeof author === 'string') {
704
+ lines.push(`\\author{${author}}`);
705
+ continue;
706
+ }
707
+ const marks = (author.affiliations || [])
708
+ .map(k => keyToNum.get(k))
709
+ .filter((n): n is number => n !== undefined);
710
+
711
+ const markStr = marks.length > 0 ? `[${marks.join(',')}]` : '';
712
+ let nameStr = author.name;
713
+ if (author.corresponding && author.email) {
714
+ nameStr += `\\thanks{Corresponding author: ${author.email}}`;
715
+ } else if (author.corresponding) {
716
+ nameStr += '\\thanks{Corresponding author}';
717
+ }
718
+ lines.push(`\\author${markStr}{${nameStr}}`);
719
+ }
720
+
721
+ // Affiliations
722
+ for (const [key, text] of Object.entries(config.affiliations)) {
723
+ const num = keyToNum.get(key);
724
+ if (num !== undefined) {
725
+ lines.push(`\\affil[${num}]{${text}}`);
726
+ }
727
+ }
728
+
729
+ return lines.join('\n');
730
+ }
731
+
732
+ /**
733
+ * Generate markdown author block for DOCX output with superscript affiliations.
734
+ * Returns markdown text to insert after the YAML frontmatter.
735
+ */
736
+ function generateMarkdownAuthorBlock(config: BuildConfig): string {
737
+ const lines: string[] = [];
738
+
739
+ // Map affiliation keys to numbers
740
+ const affiliationKeys = Object.keys(config.affiliations);
741
+ const keyToNum = new Map<string, number>();
742
+ affiliationKeys.forEach((key, i) => keyToNum.set(key, i + 1));
743
+
744
+ // Author line: Name^1,2^, Name^3^, ...
745
+ const authorParts: string[] = [];
746
+ for (const author of config.authors) {
747
+ if (typeof author === 'string') {
748
+ authorParts.push(author);
749
+ continue;
750
+ }
751
+ const marks = (author.affiliations || [])
752
+ .map(k => keyToNum.get(k))
753
+ .filter((n): n is number => n !== undefined);
754
+ let entry = author.name;
755
+ const superParts = marks.map(String);
756
+ if (author.corresponding) superParts.push('\\*');
757
+ if (superParts.length > 0) {
758
+ entry += `^${superParts.join(',')}^`;
759
+ }
760
+ authorParts.push(entry);
761
+ }
762
+ lines.push(authorParts.join(', '));
763
+ lines.push('');
764
+
765
+ // Affiliation lines: ^1^ Department of ...
766
+ const affiliationEntries = Object.entries(config.affiliations);
767
+ const useLineBreaks = config.docx.affiliationNewline !== false;
768
+ affiliationEntries.forEach(([key, text], idx) => {
769
+ const num = keyToNum.get(key);
770
+ if (num !== undefined) {
771
+ const isLast = idx === affiliationEntries.length - 1;
772
+ const suffix = useLineBreaks && !isLast ? '\\' : '';
773
+ lines.push(`^${num}^ ${text}${suffix}`);
774
+ }
775
+ });
776
+
777
+ // Corresponding author footnote
778
+ const corresponding = config.authors.find(a => typeof a !== 'string' && a.corresponding) as Author | undefined;
779
+ if (corresponding?.email) {
780
+ lines.push('');
781
+ lines.push(`^\\*^ Corresponding author: ${corresponding.email}`);
782
+ }
783
+
784
+ lines.push('');
785
+ return lines.join('\n');
786
+ }
787
+
788
+ /**
789
+ * Process markdown tables to apply nowrap formatting to specified columns.
790
+ * Converts distribution notation (Normal, Student-t, Gamma) to LaTeX math.
791
+ * @param content - Markdown content
792
+ * @param tablesConfig - tables config from rev.yaml
793
+ * @param format - output format (pdf, docx, etc.)
794
+ * @returns processed content
795
+ */
796
+ export function processTablesForFormat(content: string, tablesConfig: TablesConfig, format: string): string {
797
+ // Only process for PDF/TeX output
798
+ if (format !== 'pdf' && format !== 'tex') {
799
+ return content;
800
+ }
801
+
802
+ // Check if we have nowrap columns configured
803
+ if (!tablesConfig?.nowrap?.length) {
804
+ return content;
805
+ }
806
+
807
+ const nowrapPatterns = tablesConfig.nowrap.map((p) => p.toLowerCase());
808
+
809
+ // Match pipe tables: header row, separator row, body rows
810
+ // Header: | Col1 | Col2 | Col3 |
811
+ // Separator: |:-----|:-----|:-----|
812
+ // Body: | val1 | val2 | val3 |
813
+ const tableRegex = /^(\|[^\n]+\|\r?\n\|[-:| ]+\|\r?\n)((?:\|[^\n]+\|\r?\n?)+)/gm;
814
+
815
+ return content.replace(tableRegex, (match, headerAndSep, body) => {
816
+ // Split header from separator
817
+ const lines = headerAndSep.split(/\r?\n/);
818
+ const headerLine = lines[0] ?? '';
819
+
820
+ // Parse header cells to find nowrap column indices
821
+ const headerCells = headerLine
822
+ .split('|')
823
+ .slice(1, -1)
824
+ .map((c: string) => c.trim().toLowerCase());
825
+
826
+ const nowrapCols: number[] = [];
827
+ headerCells.forEach((cell: string, i: number) => {
828
+ if (nowrapPatterns.some((p) => cell.includes(p))) {
829
+ nowrapCols.push(i);
830
+ }
831
+ });
832
+
833
+ // If no nowrap columns found in this table, return unchanged
834
+ if (nowrapCols.length === 0) {
835
+ return match;
836
+ }
837
+
838
+ // Process body rows
839
+ const bodyLines = body.split(/\r?\n/).filter((l: string) => l.trim());
840
+ const processedBody = bodyLines
841
+ .map((row: string) => {
842
+ // Split row into cells, keeping the pipe structure
843
+ const cells = row.split('|');
844
+ // cells[0] is empty (before first |), cells[last] is empty (after last |)
845
+
846
+ nowrapCols.forEach((colIdx) => {
847
+ const cellIdx = colIdx + 1; // Account for empty first element
848
+ if (cells[cellIdx] !== undefined) {
849
+ const cellContent = cells[cellIdx].trim();
850
+
851
+ // Skip if empty, already math, or already has LaTeX commands
852
+ if (!cellContent || cellContent.startsWith('$') || cellContent.startsWith('\\')) {
853
+ return;
854
+ }
855
+
856
+ // Convert distribution notation to LaTeX math
857
+ // Order matters: compound names (Half-Normal) must come before simple names (Normal)
858
+ let processed = cellContent;
859
+
860
+ // Half-Normal(x) → $\text{Half-Normal}(x)$ (must come before Normal)
861
+ processed = processed.replace(/Half-Normal\(([^)]+)\)/g, '$\\text{Half-Normal}($1)$');
862
+
863
+ // Normal(x, y) → $\mathcal{N}(x, y)$
864
+ processed = processed.replace(/Normal\(([^)]+)\)/g, '$\\mathcal{N}($1)$');
865
+
866
+ // Student-t(df, loc, scale) $t_{df}(loc, scale)$
867
+ processed = processed.replace(/Student-t\((\d+),\s*([^)]+)\)/g, '$t_{$1}($2)$');
868
+
869
+ // Gamma(a, b) → $\text{Gamma}(a, b)$
870
+ processed = processed.replace(/Gamma\(([^)]+)\)/g, '$\\text{Gamma}($1)$');
871
+
872
+ // Exponential(x) $\text{Exp}(x)$
873
+ processed = processed.replace(/Exponential\(([^)]+)\)/g, '$\\text{Exp}($1)$');
874
+
875
+ // Update cell with padding
876
+ cells[cellIdx] = ` ${processed} `;
877
+ }
878
+ });
879
+
880
+ return cells.join('|');
881
+ })
882
+ .join('\n');
883
+
884
+ return headerAndSep + processedBody + '\n';
885
+ });
886
+ }
887
+
888
+ /**
889
+ * Apply format-specific transforms (table normalization, author blocks,
890
+ * crossref display conversion, slide syntax). Caller is responsible for
891
+ * stripping annotations beforehand — the dual-output paths keep comments
892
+ * in the markdown stream and need to apply these transforms separately
893
+ * from annotation handling.
894
+ *
895
+ * @param content - Markdown content (annotations already stripped as needed)
896
+ * @param format - Output format
897
+ * @param config - Build config
898
+ * @param registry - Crossref registry for the project
899
+ * @returns Transformed markdown
900
+ */
901
+ export function applyFormatTransforms(
902
+ content: string,
903
+ format: string,
904
+ config: BuildConfig,
905
+ registry: Registry
906
+ ): string {
907
+ if (format === 'pdf' || format === 'tex') {
908
+ content = processTablesForFormat(content, config.tables, format);
909
+
910
+ if (hasNumberedAffiliations(config)) {
911
+ const latexBlock = generateLatexAuthorBlock(config);
912
+ content = content.replace(/^(---\r?\n[\s\S]*?)(---\r?\n)/, (_match, yamlContent, closing) => {
913
+ return `${yamlContent}header-includes: |\n${latexBlock.split('\n').map(l => ' ' + l).join('\n')}\n${closing}`;
914
+ });
915
+ }
916
+ } else if (format === 'docx') {
917
+ content = convertDynamicRefsToDisplay(content, registry);
918
+
919
+ // Pandoc strips raw LaTeX in docx output. Translate the common
920
+ // `\begin{figure}...\end{figure}` shape to portable markdown so figures
921
+ // actually appear; exotic blocks are left alone (warned about in build()).
922
+ if (config.docx?.translateRawFigures !== false) {
923
+ const { translated } = translateRawLatexFigures(content);
924
+ content = translated;
925
+ }
926
+
927
+ if (hasNumberedAffiliations(config)) {
928
+ const mdBlock = generateMarkdownAuthorBlock(config);
929
+ content = content.replace(/^(---\r?\n[\s\S]*?---\r?\n)/, `$1\n${mdBlock}\n`);
930
+ }
931
+ } else if (format === 'beamer' || format === 'pptx') {
932
+ if (hasSlideSyntax(content)) {
933
+ content = processSlideMarkdown(content, format);
934
+ }
935
+ }
936
+
937
+ return content;
938
+ }
939
+
940
+ /**
941
+ * Prepare paper.md for specific output format
942
+ */
943
+ export function prepareForFormat(
944
+ paperPath: string,
945
+ format: string,
946
+ config: BuildConfig,
947
+ _options: BuildOptions = {}
948
+ ): string {
949
+ const directory = path.dirname(paperPath);
950
+ let content = fs.readFileSync(paperPath, 'utf-8');
951
+
952
+ // Build crossref registry for reference conversion
953
+ // Pass sections from config to ensure correct file ordering
954
+ const registry = buildRegistry(directory, config.sections);
955
+
956
+ // Strip annotations per format
957
+ if (format === 'docx') {
958
+ content = stripAnnotations(content, { keepComments: config.docx.keepComments });
959
+ } else {
960
+ content = stripAnnotations(content);
961
+ }
962
+
963
+ // Apply shared format transforms
964
+ content = applyFormatTransforms(content, format, config, registry);
965
+
966
+ // Write to temporary file
967
+ const preparedPath = path.join(directory, `.paper-${format}.md`);
968
+ fs.writeFileSync(preparedPath, content, 'utf-8');
969
+
970
+ return preparedPath;
971
+ }
972
+
973
+ /**
974
+ * Convert @fig:label references to display format (Figure 1)
975
+ */
976
+ function convertDynamicRefsToDisplay(text: string, registry: Registry): string {
977
+ const refs = detectDynamicRefs(text);
978
+
979
+ // Process in reverse order to preserve positions
980
+ let result = text;
981
+ for (let i = refs.length - 1; i >= 0; i--) {
982
+ const ref = refs[i];
983
+ if (!ref) continue;
984
+ const display = labelToDisplay(ref.type, ref.label, registry as any);
985
+
986
+ if (display) {
987
+ result = result.slice(0, ref.position) + display + result.slice(ref.position + ref.match.length);
988
+ }
989
+ }
990
+
991
+ return result;
992
+ }
993
+
994
+ // =============================================================================
995
+ // Raw LaTeX figure detection / translation (docx)
996
+ // =============================================================================
997
+
998
+ /**
999
+ * A raw LaTeX `\begin{figure}...\end{figure}` block found in source markdown.
1000
+ * `exotic` blocks contain features we don't auto-translate (multiple
1001
+ * `\includegraphics`, `\subfloat`, `\rotatebox`, unrecognised width units);
1002
+ * pandoc strips raw LaTeX silently in docx output, so users get warned about
1003
+ * anything that won't be translated.
1004
+ */
1005
+ export interface RawLatexFigure {
1006
+ file?: string;
1007
+ line: number;
1008
+ block: string;
1009
+ exotic: boolean;
1010
+ }
1011
+
1012
+ /** Match `\begin{figure}` / `\begin{figure*}` `\end{figure}` blocks. */
1013
+ function makeRawFigureRegex(): RegExp {
1014
+ return /\\begin\{figure\*?\}(?:\[[^\]]*\])?[\s\S]*?\\end\{figure\*?\}/g;
1015
+ }
1016
+
1017
+ /**
1018
+ * Convert a LaTeX width spec to a markdown image attribute value.
1019
+ * - `0.8\textwidth` → `80%`
1020
+ * - `\linewidth` → `100%`
1021
+ * - `8cm`, `2in`, `12pt` → kept verbatim
1022
+ * Returns null for anything we don't translate (block stays "exotic").
1023
+ */
1024
+ function convertLatexWidth(raw: string): string | null {
1025
+ const trimmed = raw.trim();
1026
+ // Coefficient × relative length
1027
+ const rel = trimmed.match(/^([\d.]+)\s*\\(textwidth|linewidth|columnwidth)$/);
1028
+ if (rel) {
1029
+ const pct = Math.round(parseFloat(rel[1]!) * 100);
1030
+ if (!isFinite(pct) || pct <= 0) return null;
1031
+ return `${pct}%`;
1032
+ }
1033
+ // Bare relative length
1034
+ if (/^\\(textwidth|linewidth|columnwidth)$/.test(trimmed)) return '100%';
1035
+ // Absolute units
1036
+ if (/^[\d.]+\s*(cm|mm|in|pt|px|em|ex)$/.test(trimmed)) return trimmed.replace(/\s+/g, '');
1037
+ return null;
1038
+ }
1039
+
1040
+ /** Extract a balanced `{...}` argument that follows `command` in `text`. */
1041
+ function extractBracedArg(text: string, command: string): string | null {
1042
+ const idx = text.indexOf(command);
1043
+ if (idx === -1) return null;
1044
+ let i = idx + command.length;
1045
+ while (i < text.length && /\s/.test(text[i]!)) i++;
1046
+ if (text[i] !== '{') return null;
1047
+ i++;
1048
+ const start = i;
1049
+ let depth = 1;
1050
+ while (i < text.length) {
1051
+ const ch = text[i]!;
1052
+ if (ch === '\\' && i + 1 < text.length) { i += 2; continue; }
1053
+ if (ch === '{') depth++;
1054
+ else if (ch === '}') {
1055
+ depth--;
1056
+ if (depth === 0) return text.slice(start, i);
1057
+ }
1058
+ i++;
1059
+ }
1060
+ return null;
1061
+ }
1062
+
1063
+ /** True if a `\begin{figure}` block contains features we don't auto-translate. */
1064
+ function isExoticFigureBlock(block: string): boolean {
1065
+ if (/\\subfloat\b/.test(block)) return true;
1066
+ if (/\\rotatebox\b/.test(block)) return true;
1067
+ const includes = (block.match(/\\includegraphics\b/g) || []).length;
1068
+ if (includes !== 1) return true;
1069
+ const m = block.match(/\\includegraphics\s*(?:\[([^\]]*)\])?\s*\{([^}]+)\}/);
1070
+ if (!m) return true;
1071
+ const opts = m[1] || '';
1072
+ const widthMatch = opts.match(/(?:^|,)\s*width\s*=\s*([^,]+)/);
1073
+ if (widthMatch && !convertLatexWidth(widthMatch[1]!)) return true;
1074
+ return false;
1075
+ }
1076
+
1077
+ /**
1078
+ * Find raw LaTeX figure blocks containing `\includegraphics` in markdown.
1079
+ * `file`, if given, is attached to each result. `line` is 1-based within the
1080
+ * supplied content (the line where `\begin{figure}` sits).
1081
+ */
1082
+ export function detectRawLatexFigures(content: string, file?: string): RawLatexFigure[] {
1083
+ const figures: RawLatexFigure[] = [];
1084
+ const re = makeRawFigureRegex();
1085
+ let m: RegExpExecArray | null;
1086
+ while ((m = re.exec(content)) !== null) {
1087
+ const block = m[0];
1088
+ if (!block.includes('\\includegraphics')) continue;
1089
+ const line = content.slice(0, m.index).split(/\r?\n/).length;
1090
+ figures.push({ file, line, block, exotic: isExoticFigureBlock(block) });
1091
+ }
1092
+ return figures;
1093
+ }
1094
+
1095
+ /**
1096
+ * Translate the 80% case: single `\includegraphics` figure with optional
1097
+ * `\caption{...}` and `\label{...}`, wrapped in `\begin{figure}...\end{figure}`,
1098
+ * to portable `![caption](path){#fig:label width=N%}` markdown. Exotic blocks
1099
+ * (see `isExoticFigureBlock`) are left untouched.
1100
+ */
1101
+ export function translateRawLatexFigures(content: string): { translated: string; translatedCount: number } {
1102
+ let translatedCount = 0;
1103
+ const re = makeRawFigureRegex();
1104
+ const translated = content.replace(re, (block) => {
1105
+ if (!block.includes('\\includegraphics')) return block;
1106
+ if (isExoticFigureBlock(block)) return block;
1107
+
1108
+ const inc = block.match(/\\includegraphics\s*(?:\[([^\]]*)\])?\s*\{([^}]+)\}/);
1109
+ if (!inc) return block;
1110
+ const optsStr = inc[1] || '';
1111
+ const imgPath = inc[2]!.trim();
1112
+
1113
+ let width: string | undefined;
1114
+ const widthMatch = optsStr.match(/(?:^|,)\s*width\s*=\s*([^,]+)/);
1115
+ if (widthMatch) {
1116
+ const w = convertLatexWidth(widthMatch[1]!);
1117
+ if (!w) return block; // already filtered by isExoticFigureBlock, defensive
1118
+ width = w;
1119
+ }
1120
+
1121
+ const caption = (extractBracedArg(block, '\\caption') ?? '').trim();
1122
+ const labelRaw = extractBracedArg(block, '\\label');
1123
+
1124
+ const attrs: string[] = [];
1125
+ if (labelRaw) {
1126
+ const label = labelRaw.trim();
1127
+ const labelWithPrefix = /^[a-z]+:/i.test(label) ? label : `fig:${label}`;
1128
+ attrs.push(`#${labelWithPrefix}`);
1129
+ }
1130
+ if (width) attrs.push(`width=${width}`);
1131
+
1132
+ translatedCount++;
1133
+ const attrStr = attrs.length > 0 ? ` {${attrs.join(' ')}}` : '';
1134
+ return `![${caption}](${imgPath})${attrStr}`;
1135
+ });
1136
+ return { translated, translatedCount };
1137
+ }
1138
+
1139
+ /**
1140
+ * Format the warning surfaced for raw LaTeX figure blocks that won't render
1141
+ * in docx. `translateEnabled` reflects whether auto-translate ran (true = the
1142
+ * listed blocks are exotic leftovers; false = no translation was attempted).
1143
+ */
1144
+ function formatRawLatexFigureWarning(figs: RawLatexFigure[], translateEnabled: boolean): string {
1145
+ const reason = translateEnabled ? 'too complex to auto-translate' : 'translateRawFigures: false';
1146
+ const lines: string[] = [
1147
+ `${figs.length} raw LaTeX figure block(s) won't render in docx (${reason}).`,
1148
+ ];
1149
+ for (const f of figs) {
1150
+ const loc = f.file ? `${f.file}:${f.line}` : `line ${f.line}`;
1151
+ const pathMatch = f.block.match(/\\includegraphics\s*(?:\[[^\]]*\])?\s*\{([^}]+)\}/);
1152
+ const pathInfo = pathMatch ? ` ${pathMatch[1]!.trim()}` : '';
1153
+ lines.push(` ${loc}${pathInfo}`);
1154
+ }
1155
+ lines.push(' Hint: use ![caption](path){#fig:label width=80%} for format-portable figures,');
1156
+ lines.push(' or pass --pandoc-arg=--lua-filter=<your.lua> to translate them yourself.');
1157
+ return lines.join('\n');
1158
+ }
1159
+
1160
+ /**
1161
+ * Walk section files and gather a warning for any raw LaTeX figure blocks that
1162
+ * won't survive the docx build. Returns null when there's nothing to warn about.
1163
+ */
1164
+ export function collectRawLatexFigureWarning(directory: string, config: BuildConfig): string | null {
1165
+ const translateEnabled = config.docx?.translateRawFigures !== false;
1166
+ const all: RawLatexFigure[] = [];
1167
+ for (const section of findSections(directory, config.sections)) {
1168
+ const sectionPath = path.join(directory, section);
1169
+ if (!fs.existsSync(sectionPath)) continue;
1170
+ try {
1171
+ const content = fs.readFileSync(sectionPath, 'utf-8');
1172
+ const figs = detectRawLatexFigures(content, section);
1173
+ for (const f of figs) {
1174
+ // When auto-translate is on, non-exotic blocks get rewritten cleanly —
1175
+ // only the exotic leftovers need warning. When opted out, everything
1176
+ // is at risk and we warn about every block.
1177
+ if (translateEnabled && !f.exotic) continue;
1178
+ all.push(f);
1179
+ }
1180
+ } catch {
1181
+ // ignore unreadable sections
1182
+ }
1183
+ }
1184
+ if (all.length === 0) return null;
1185
+ return formatRawLatexFigureWarning(all, translateEnabled);
1186
+ }
1187
+
1188
+ /**
1189
+ * Build pandoc arguments for format.
1190
+ *
1191
+ * Returns only the built-in args derived from config. Passthrough args
1192
+ * (config.pandocArgs, config[format].pandocArgs, CLI --pandoc-arg) are
1193
+ * appended later in runPandoc so they win against pptx/crossref defaults
1194
+ * added there.
1195
+ */
1196
+ export function buildPandocArgs(format: string, config: BuildConfig, outputPath: string): string[] {
1197
+ const args: string[] = [];
1198
+
1199
+ // Output format
1200
+ if (format === 'tex') {
1201
+ args.push('-t', 'latex');
1202
+ if (config.tex.standalone) {
1203
+ args.push('-s');
1204
+ }
1205
+ } else if (format === 'pdf') {
1206
+ args.push('-t', 'pdf');
1207
+ } else if (format === 'docx') {
1208
+ args.push('-t', 'docx');
1209
+ } else if (format === 'beamer') {
1210
+ args.push('-t', 'beamer');
1211
+ } else if (format === 'pptx') {
1212
+ args.push('-t', 'pptx');
1213
+ }
1214
+
1215
+ // Output file. runPandoc sets cwd to the project directory and passes a
1216
+ // path relative to that cwd; passing it through here unchanged lets pandoc
1217
+ // write to subdirectories like output/<title-slug>.<ext>.
1218
+ args.push('-o', outputPath);
1219
+
1220
+ // Crossref filter (if available) - skip for slides
1221
+ if (hasPandocCrossref() && format !== 'beamer' && format !== 'pptx') {
1222
+ args.push('--filter', 'pandoc-crossref');
1223
+ }
1224
+
1225
+ // Bibliography
1226
+ if (config.bibliography) {
1227
+ args.push('--citeproc');
1228
+ }
1229
+
1230
+ // Format-specific options
1231
+ if (format === 'pdf') {
1232
+ if (config.pdf.template) {
1233
+ args.push('--template', config.pdf.template);
1234
+ }
1235
+ if (config.pdf.engine) {
1236
+ args.push(`--pdf-engine=${config.pdf.engine}`);
1237
+ }
1238
+ if (config.pdf.mainfont) {
1239
+ args.push('-V', `mainfont=${config.pdf.mainfont}`);
1240
+ }
1241
+ if (config.pdf.sansfont) {
1242
+ args.push('-V', `sansfont=${config.pdf.sansfont}`);
1243
+ }
1244
+ if (config.pdf.monofont) {
1245
+ args.push('-V', `monofont=${config.pdf.monofont}`);
1246
+ }
1247
+ args.push('-V', `documentclass=${config.pdf.documentclass}`);
1248
+ args.push('-V', `fontsize=${config.pdf.fontsize}`);
1249
+ args.push('-V', `geometry:${config.pdf.geometry}`);
1250
+ if (config.pdf.headerIncludes) {
1251
+ args.push('-H', config.pdf.headerIncludes);
1252
+ }
1253
+ if (config.pdf.linestretch !== 1) {
1254
+ args.push('-V', `linestretch=${config.pdf.linestretch}`);
1255
+ }
1256
+ if (config.pdf.numbersections) {
1257
+ args.push('--number-sections');
1258
+ }
1259
+ if (config.pdf.toc) {
1260
+ args.push('--toc');
1261
+ }
1262
+ } else if (format === 'docx') {
1263
+ if (config.docx.reference) {
1264
+ args.push('--reference-doc', config.docx.reference);
1265
+ }
1266
+ if (config.docx.toc) {
1267
+ args.push('--toc');
1268
+ }
1269
+ } else if (format === 'beamer') {
1270
+ // Beamer slide options
1271
+ const beamer = config.beamer || {};
1272
+ if (beamer.theme) {
1273
+ args.push('-V', `theme=${beamer.theme}`);
1274
+ }
1275
+ if (beamer.colortheme) {
1276
+ args.push('-V', `colortheme=${beamer.colortheme}`);
1277
+ }
1278
+ if (beamer.fonttheme) {
1279
+ args.push('-V', `fonttheme=${beamer.fonttheme}`);
1280
+ }
1281
+ if (beamer.aspectratio) {
1282
+ args.push('-V', `aspectratio=${beamer.aspectratio}`);
1283
+ }
1284
+ if (beamer.navigation) {
1285
+ args.push('-V', `navigation=${beamer.navigation}`);
1286
+ }
1287
+ // Speaker notes - default to 'show' which creates presenter view PDF
1288
+ // Options: 'show' (dual screen), 'only' (notes only), 'hide' (no notes), false (disabled)
1289
+ const notesMode = beamer.notes !== undefined ? beamer.notes : 'show';
1290
+ if (notesMode && notesMode !== 'hide') {
1291
+ args.push('-V', `classoption=notes=${notesMode}`);
1292
+ }
1293
+ // Fit images within slide bounds (default: true)
1294
+ if (beamer.fit_images !== false) {
1295
+ const fitImagesHeader = `\\makeatletter
1296
+ \\def\\maxwidth{\\ifdim\\Gin@nat@width>\\linewidth\\linewidth\\else\\Gin@nat@width\\fi}
1297
+ \\def\\maxheight{\\ifdim\\Gin@nat@height>0.75\\textheight 0.75\\textheight\\else\\Gin@nat@height\\fi}
1298
+ \\makeatother
1299
+ \\setkeys{Gin}{width=\\maxwidth,height=\\maxheight,keepaspectratio}`;
1300
+ args.push('-V', `header-includes=${fitImagesHeader}`);
1301
+ }
1302
+ // Slides need standalone
1303
+ args.push('-s');
1304
+ } else if (format === 'pptx') {
1305
+ // PowerPoint options - handled separately in preparePptxTemplate
1306
+ // Reference doc is set by caller after template generation
1307
+ }
1308
+
1309
+ return args;
1310
+ }
1311
+
1312
+ /**
1313
+ * Collect passthrough pandoc args for a format in the canonical order:
1314
+ * top-level config → format-specific config → CLI extras. Later wins for
1315
+ * repeated flags.
1316
+ */
1317
+ export function collectPandocPassthroughArgs(
1318
+ format: string,
1319
+ config: BuildConfig,
1320
+ extraArgs: string[] = []
1321
+ ): string[] {
1322
+ const out: string[] = [];
1323
+ if (config.pandocArgs && config.pandocArgs.length > 0) {
1324
+ out.push(...config.pandocArgs);
1325
+ }
1326
+ const formatConfig = (config as unknown as Record<string, { pandocArgs?: string[] } | undefined>)[format];
1327
+ if (formatConfig?.pandocArgs && formatConfig.pandocArgs.length > 0) {
1328
+ out.push(...formatConfig.pandocArgs);
1329
+ }
1330
+ if (extraArgs.length > 0) {
1331
+ out.push(...extraArgs);
1332
+ }
1333
+ return out;
1334
+ }
1335
+
1336
+ /**
1337
+ * Write crossref.yaml if needed
1338
+ */
1339
+ function ensureCrossrefConfig(directory: string, config: BuildConfig): void {
1340
+ const crossrefPath = path.join(directory, 'crossref.yaml');
1341
+
1342
+ if (!fs.existsSync(crossrefPath) && hasPandocCrossref()) {
1343
+ fs.writeFileSync(crossrefPath, YAML.stringify(config.crossref), 'utf-8');
1344
+ }
1345
+ }
1346
+
1347
+ /**
1348
+ * Get install instructions for missing dependency
1349
+ */
1350
+ function getInstallInstructions(tool: string): string {
1351
+ const instructions: Record<string, string> = {
1352
+ pandoc: 'https://pandoc.org/installing.html',
1353
+ latex: 'https://www.latex-project.org/get/',
1354
+ };
1355
+ return instructions[tool] || 'Check documentation';
1356
+ }
1357
+
1358
+ /**
1359
+ * Resolve the absolute directory where final outputs should land.
1360
+ * Honors config.outputDir; falls back to the project directory when null/empty.
1361
+ */
1362
+ export function resolveOutputDir(directory: string, config: BuildConfig): string {
1363
+ const out = config.outputDir;
1364
+ if (!out) return directory;
1365
+ return path.isAbsolute(out) ? out : path.join(directory, out);
1366
+ }
1367
+
1368
+ /** File extension (with leading dot) for each supported pandoc format. */
1369
+ const FORMAT_EXTENSIONS: Record<string, string> = {
1370
+ tex: '.tex',
1371
+ pdf: '.pdf',
1372
+ docx: '.docx',
1373
+ beamer: '.pdf',
1374
+ pptx: '.pptx',
1375
+ };
1376
+
1377
+ /** Get file extension for a format, defaulting to `.pdf`. */
1378
+ export function getFormatExtension(format: string): string {
1379
+ return FORMAT_EXTENSIONS[format] ?? '.pdf';
1380
+ }
1381
+
1382
+ /**
1383
+ * Slugify a title for use as a default output filename. Lowercases, replaces
1384
+ * non-alphanumeric runs with `-`, and truncates at the last `-` boundary
1385
+ * at-or-before MAX_TITLE_FILENAME_LENGTH so words stay whole (the old blind
1386
+ * `.slice` cut mid-word).
1387
+ */
1388
+ export function slugifyTitle(title: string): string {
1389
+ if (!title) return 'paper';
1390
+ const slug = title.toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-+|-+$/g, '');
1391
+ if (!slug) return 'paper';
1392
+ if (slug.length <= MAX_TITLE_FILENAME_LENGTH) return slug;
1393
+ const cut = slug.slice(0, MAX_TITLE_FILENAME_LENGTH);
1394
+ const lastDash = cut.lastIndexOf('-');
1395
+ // Only truncate at a hyphen if it leaves a reasonable amount of content.
1396
+ // Otherwise hard-cut (handles degenerate titles with no spaces at all).
1397
+ if (lastDash >= MAX_TITLE_FILENAME_LENGTH / 2) {
1398
+ return slug.slice(0, lastDash);
1399
+ }
1400
+ return cut;
1401
+ }
1402
+
1403
+ /**
1404
+ * Ensure `name` ends with `ext` (case-insensitive). If the user already supplied
1405
+ * the correct extension, return unchanged; if they supplied none or a different
1406
+ * one, append the format's canonical extension.
1407
+ *
1408
+ * Different-extension case (e.g. `output.docx` when building tex): we append
1409
+ * rather than replace, since stripping looks like an unsafe guess. The result
1410
+ * `output.docx.tex` is loud enough to flag the misconfiguration.
1411
+ */
1412
+ function ensureExtension(name: string, ext: string): string {
1413
+ if (name.toLowerCase().endsWith(ext.toLowerCase())) return name;
1414
+ return name + ext;
1415
+ }
1416
+
1417
+ /**
1418
+ * Resolve the final output path for a build.
1419
+ *
1420
+ * Priority: `options.outputPath` (internal force) > `cliOverride` (-o flag) >
1421
+ * `config.output[format]` > slugified title fallback.
1422
+ *
1423
+ * Relative paths from `cliOverride`/`config.output` resolve under outputDir;
1424
+ * absolute paths bypass outputDir. The fallback path always lives under
1425
+ * outputDir.
1426
+ *
1427
+ * @param suffix - Appended before the extension (e.g. "-changes", "-slides").
1428
+ * Suppressed when user supplied an explicit name via CLI or
1429
+ * config — they pick their own suffix.
1430
+ */
1431
+ export function resolveOutputPath(
1432
+ directory: string,
1433
+ config: BuildConfig,
1434
+ format: string,
1435
+ options: { cliOverride?: string; suffix?: string } = {}
1436
+ ): string {
1437
+ const { cliOverride, suffix = '' } = options;
1438
+ const ext = getFormatExtension(format);
1439
+
1440
+ const explicit = cliOverride ?? config.output?.[format];
1441
+ if (explicit) {
1442
+ const baseDir = path.isAbsolute(explicit)
1443
+ ? path.dirname(explicit)
1444
+ : resolveOutputDir(directory, config);
1445
+ const baseName = path.basename(explicit);
1446
+ const stem = baseName.replace(/\.[^./\\]+$/, '');
1447
+ return path.join(baseDir, ensureExtension(`${stem}${suffix}`, ext));
1448
+ }
1449
+
1450
+ const slug = slugifyTitle(config.title);
1451
+ return path.join(resolveOutputDir(directory, config), `${slug}${suffix}${ext}`);
1452
+ }
1453
+
1454
+ /**
1455
+ * Run pandoc build
1456
+ */
1457
+ export async function runPandoc(
1458
+ inputPath: string,
1459
+ format: string,
1460
+ config: BuildConfig,
1461
+ options: BuildOptions = {}
1462
+ ): Promise<PandocResult> {
1463
+ const directory = path.dirname(inputPath);
1464
+
1465
+ // outputPath (internal force) wins over the resolver. For beamer, we keep
1466
+ // the `-slides` suffix on the slug fallback to distinguish from a regular
1467
+ // PDF build; when the user supplies an explicit name, they pick their own.
1468
+ const suffix = format === 'beamer' ? '-slides' : '';
1469
+ const outputPath = options.outputPath
1470
+ ?? resolveOutputPath(directory, config, format, {
1471
+ cliOverride: options.output,
1472
+ suffix,
1473
+ });
1474
+
1475
+ if (!options.outputPath) {
1476
+ const outDir = path.dirname(outputPath);
1477
+ if (!fs.existsSync(outDir)) {
1478
+ fs.mkdirSync(outDir, { recursive: true });
1479
+ }
1480
+ }
1481
+
1482
+ // Ensure crossref.yaml exists
1483
+ ensureCrossrefConfig(directory, config);
1484
+
1485
+ // Pandoc runs with cwd = directory, so pass the output path relative to it.
1486
+ const args = buildPandocArgs(format, config, path.relative(directory, outputPath) || path.basename(outputPath));
1487
+
1488
+ // Handle PPTX reference template and themes
1489
+ let pptxMediaDir: string | null = null;
1490
+ if (format === 'pptx') {
1491
+ const pptx = config.pptx || {};
1492
+
1493
+ // Determine media directory (default: pptx/media or slides/media)
1494
+ let mediaDir = pptx.media;
1495
+ if (!mediaDir) {
1496
+ if (fs.existsSync(path.join(directory, 'pptx', 'media'))) {
1497
+ mediaDir = path.join(directory, 'pptx', 'media');
1498
+ } else if (fs.existsSync(path.join(directory, 'slides', 'media'))) {
1499
+ mediaDir = path.join(directory, 'slides', 'media');
1500
+ }
1501
+ } else if (!path.isAbsolute(mediaDir)) {
1502
+ mediaDir = path.join(directory, mediaDir);
1503
+ }
1504
+ pptxMediaDir = mediaDir || null;
1505
+
1506
+ // Determine reference doc: custom reference overrides theme
1507
+ let referenceDoc: string | null = null;
1508
+ if (pptx.reference && fs.existsSync(path.join(directory, pptx.reference))) {
1509
+ // Custom reference doc takes precedence
1510
+ referenceDoc = path.join(directory, pptx.reference);
1511
+ } else {
1512
+ // Use built-in theme (default: 'default')
1513
+ const themeName = pptx.theme || 'default';
1514
+ const themePath = getThemePath(themeName);
1515
+ if (themePath && fs.existsSync(themePath)) {
1516
+ referenceDoc = themePath;
1517
+ }
1518
+ }
1519
+
1520
+ if (referenceDoc) {
1521
+ args.push('--reference-doc', referenceDoc);
1522
+ }
1523
+
1524
+ // Add color filter for PPTX (handles [text]{color=#RRGGBB} syntax).
1525
+ // fileURLToPath handles Windows paths with spaces the old
1526
+ // `new URL(...).pathname` returned URL-encoded `%20` and fs.existsSync
1527
+ // silently failed.
1528
+ const colorFilterPath = path.join(
1529
+ path.dirname(fileURLToPath(import.meta.url)),
1530
+ 'pptx-color-filter.lua'
1531
+ );
1532
+ if (fs.existsSync(colorFilterPath)) {
1533
+ args.push('--lua-filter', colorFilterPath);
1534
+ }
1535
+ }
1536
+
1537
+ // Wire placeholder macros (built-in \tofill plus user-declared entries).
1538
+ // - docx/html: lua filter expands \name{X} to format-specific raw runs.
1539
+ // - pdf/tex/beamer: inject a \providecommand preamble so LaTeX renders it
1540
+ // directly. `\providecommand` is non-clobbering, so a user who already
1541
+ // has `\providecommand{\tofill}{...}` in their own header keeps theirs.
1542
+ //
1543
+ // Sidecar path is passed to the lua filter via DOCREV_MACROS_FILE in the
1544
+ // child env (not pandoc metadata) because pandoc walks RawInline/RawBlock
1545
+ // BEFORE Meta by the time a Meta handler could read the path, the inline
1546
+ // expansion has already happened.
1547
+ const macroTempFiles: string[] = [];
1548
+ let macroEnvFile: string | null = null;
1549
+ const macros = mergeMacros((config as { macros?: unknown }).macros);
1550
+ if (macros.length > 0) {
1551
+ if (format === 'docx' || format === 'html' || format === 'html5' || format === 'html4') {
1552
+ const sidecarPath = writeMacrosSidecar(directory, macros);
1553
+ macroTempFiles.push(sidecarPath);
1554
+ macroEnvFile = sidecarPath;
1555
+ const filterPath = getMacroFilterPath();
1556
+ if (fs.existsSync(filterPath)) {
1557
+ args.push('--lua-filter', filterPath);
1558
+ }
1559
+ } else if (format === 'pdf' || format === 'tex' || format === 'beamer') {
1560
+ const preamble = generateLatexPreamble(macros);
1561
+ const preamblePath = path.join(directory, '.macros.tex');
1562
+ fs.writeFileSync(preamblePath, preamble, 'utf-8');
1563
+ macroTempFiles.push(preamblePath);
1564
+ args.push('-H', path.basename(preamblePath));
1565
+ }
1566
+ }
1567
+
1568
+ // Add crossref metadata file if exists (skip for slides - they don't use crossref)
1569
+ if (format !== 'beamer' && format !== 'pptx') {
1570
+ const crossrefPath = path.join(directory, 'crossref.yaml');
1571
+ if (fs.existsSync(crossrefPath) && hasPandocCrossref()) {
1572
+ // Use basename since we set cwd to directory
1573
+ args.push('--metadata-file', 'crossref.yaml');
1574
+ }
1575
+ }
1576
+
1577
+ // Passthrough args go last so they win against built-in defaults.
1578
+ args.push(...collectPandocPassthroughArgs(format, config, options.pandocArgs));
1579
+
1580
+ // Input file (use basename since we set cwd to directory)
1581
+ args.push(path.basename(inputPath));
1582
+
1583
+ if (options.verbose) {
1584
+ const quoted = args.map(a => /[\s"'$`]/.test(a) ? `"${a.replace(/"/g, '\\"')}"` : a).join(' ');
1585
+ console.error(`[pandoc ${format}] (cwd: ${directory})`);
1586
+ console.error(` pandoc ${quoted}`);
1587
+ }
1588
+
1589
+ return new Promise((resolve) => {
1590
+ const pandocEnv: NodeJS.ProcessEnv = { ...process.env };
1591
+ if (macroEnvFile) {
1592
+ pandocEnv.DOCREV_MACROS_FILE = macroEnvFile;
1593
+ }
1594
+ const pandoc: ChildProcess = spawn('pandoc', args, {
1595
+ cwd: directory,
1596
+ stdio: ['ignore', 'pipe', 'pipe'],
1597
+ env: pandocEnv,
1598
+ });
1599
+
1600
+ let stderr = '';
1601
+ pandoc.stderr?.on('data', (data) => {
1602
+ stderr += data.toString();
1603
+ });
1604
+
1605
+ const cleanupMacroTempFiles = (): void => {
1606
+ for (const tmp of macroTempFiles) {
1607
+ try {
1608
+ fs.unlinkSync(tmp);
1609
+ } catch {
1610
+ // ignore — best-effort cleanup
1611
+ }
1612
+ }
1613
+ };
1614
+
1615
+ pandoc.on('close', async (code) => {
1616
+ cleanupMacroTempFiles();
1617
+ if (code === 0) {
1618
+ // For PPTX, post-process to add slide numbers, buildup colors, and logos
1619
+ if (format === 'pptx') {
1620
+ try {
1621
+ // Inject slide numbers into content slides only
1622
+ await injectSlideNumbers(outputPath);
1623
+ } catch (e) {
1624
+ // Slide number injection failed but output was created
1625
+ }
1626
+ try {
1627
+ // Apply colors (default text color, title color, buildup greying)
1628
+ const pptxConfig = config.pptx || {};
1629
+ const colorsConfig = pptxConfig.colors || {};
1630
+ const buildupConfig = pptxConfig.buildup || {};
1631
+ // Merge colors and buildup config for applyBuildupColors
1632
+ const colorConfig = {
1633
+ default: colorsConfig.default,
1634
+ title: colorsConfig.title,
1635
+ grey: buildupConfig.grey,
1636
+ accent: buildupConfig.accent,
1637
+ enabled: buildupConfig.enabled
1638
+ };
1639
+ await applyBuildupColors(outputPath, colorConfig);
1640
+ } catch (e) {
1641
+ // Color application failed but output was created
1642
+ }
1643
+ // Inject logos into cover slide (if media dir configured)
1644
+ if (pptxMediaDir) {
1645
+ try {
1646
+ await injectMediaIntoPptx(outputPath, pptxMediaDir);
1647
+ } catch (e) {
1648
+ // Logo injection failed but output was created
1649
+ }
1650
+ }
1651
+ }
1652
+
1653
+ // Run user postprocess scripts
1654
+ const postResult = await runPostprocess(outputPath, format, config as unknown as Parameters<typeof runPostprocess>[2], options);
1655
+ if (!postResult.success && options.verbose) {
1656
+ console.error(`Postprocess warning: ${postResult.error}`);
1657
+ }
1658
+
1659
+ resolve({ outputPath, success: true });
1660
+ } else {
1661
+ resolve({ outputPath, success: false, error: stderr || `Exit code ${code}` });
1662
+ }
1663
+ });
1664
+
1665
+ pandoc.on('error', (err) => {
1666
+ cleanupMacroTempFiles();
1667
+ resolve({ outputPath, success: false, error: err.message });
1668
+ });
1669
+ });
1670
+ }
1671
+
1672
+ /**
1673
+ * Full build pipeline
1674
+ */
1675
+ export async function build(
1676
+ directory: string,
1677
+ formats: string[] = ['pdf', 'docx'],
1678
+ options: BuildOptions = {}
1679
+ ): Promise<FullBuildResult> {
1680
+ const warnings: string[] = [];
1681
+ let forwardRefsResolved = 0;
1682
+
1683
+ // Check pandoc
1684
+ if (!hasPandoc()) {
1685
+ const instruction = getInstallInstructions('pandoc');
1686
+ throw new Error(`Pandoc not found. Install with: ${instruction}\nOr run: rev doctor`);
1687
+ }
1688
+
1689
+ // Check LaTeX if PDF is requested
1690
+ if ((formats.includes('pdf') || formats.includes('all')) && !hasLatex()) {
1691
+ warnings.push(`LaTeX not found - PDF generation may fail. Install with: ${getInstallInstructions('latex')}`);
1692
+ }
1693
+
1694
+ // Check pandoc-crossref
1695
+ if (!hasPandocCrossref()) {
1696
+ warnings.push('pandoc-crossref not found - figure/table numbering will not work');
1697
+ }
1698
+
1699
+ // Load config (use passed config if provided, otherwise load from file)
1700
+ const config = options.config || loadConfig(directory);
1701
+
1702
+ // Combine sections → paper.md
1703
+ const buildOptions: CombineOptions = { ...options };
1704
+ const paperPath = combineSections(directory, config, buildOptions);
1705
+ forwardRefsResolved = buildOptions._forwardRefsResolved || 0;
1706
+ const refsAutoInjected = buildOptions._refsAutoInjected || false;
1707
+
1708
+ // Expand 'all' to all formats
1709
+ if (formats.includes('all')) {
1710
+ formats = ['pdf', 'docx', 'tex'];
1711
+ }
1712
+
1713
+ // Build and save image registry when DOCX is being built
1714
+ // This allows import to restore proper image syntax from Word documents
1715
+ if (formats.includes('docx')) {
1716
+ const paperContent = fs.readFileSync(paperPath, 'utf-8');
1717
+ const crossrefReg = buildRegistry(directory, config.sections);
1718
+ const imageReg = buildImageRegistry(paperContent, crossrefReg as any);
1719
+ if ((imageReg as any).figures?.length > 0) {
1720
+ writeImageRegistry(directory, imageReg);
1721
+ }
1722
+
1723
+ // Warn about raw LaTeX figure blocks that won't render in docx (pandoc
1724
+ // drops them silently). With auto-translate on (default), this surfaces
1725
+ // only the exotic leftovers; with it off, every block.
1726
+ const rawFigWarning = collectRawLatexFigureWarning(directory, config);
1727
+ if (rawFigWarning) warnings.push(rawFigWarning);
1728
+ }
1729
+
1730
+ const results: BuildResult[] = [];
1731
+
1732
+ for (const format of formats) {
1733
+ // Prepare format-specific version
1734
+ const preparedPath = prepareForFormat(paperPath, format, config, options);
1735
+
1736
+ // Run pandoc
1737
+ const result = await runPandoc(preparedPath, format, config, options);
1738
+ results.push({ format, ...result });
1739
+
1740
+ // Clean up temp file
1741
+ try {
1742
+ fs.unlinkSync(preparedPath);
1743
+ } catch {
1744
+ // Ignore cleanup errors
1745
+ }
1746
+ }
1747
+
1748
+ return { results, paperPath, warnings, forwardRefsResolved, refsAutoInjected };
1749
+ }
1750
+
1751
+ /**
1752
+ * Get build status summary
1753
+ */
1754
+ export function formatBuildResults(results: BuildResult[]): string {
1755
+ const lines: string[] = [];
1756
+
1757
+ for (const r of results) {
1758
+ if (r.success) {
1759
+ lines.push(` ${r.format.toUpperCase()}: ${path.basename(r.outputPath!)}`);
1760
+ } else {
1761
+ lines.push(` ${r.format.toUpperCase()}: FAILED - ${r.error}`);
1762
+ }
1763
+ }
1764
+
1765
+ return lines.join('\n');
1766
+ }