docrev 0.10.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/.gitattributes +1 -1
  2. package/CHANGELOG.md +173 -164
  3. package/PLAN-tables-and-postprocess.md +850 -850
  4. package/README.md +431 -431
  5. package/bin/rev.js +11 -11
  6. package/bin/rev.ts +145 -145
  7. package/completions/rev.bash +127 -127
  8. package/completions/rev.ps1 +210 -210
  9. package/completions/rev.zsh +207 -207
  10. package/dist/lib/anchor-match.d.ts +1 -1
  11. package/dist/lib/anchor-match.d.ts.map +1 -1
  12. package/dist/lib/anchor-match.js +17 -47
  13. package/dist/lib/anchor-match.js.map +1 -1
  14. package/dist/lib/build.js +4 -4
  15. package/dist/lib/commands/context.d.ts +1 -1
  16. package/dist/lib/commands/context.d.ts.map +1 -1
  17. package/dist/lib/commands/context.js +1 -1
  18. package/dist/lib/commands/context.js.map +1 -1
  19. package/dist/lib/commands/sections.js +7 -7
  20. package/dist/lib/commands/sections.js.map +1 -1
  21. package/dist/lib/commands/sync.d.ts.map +1 -1
  22. package/dist/lib/commands/sync.js +15 -14
  23. package/dist/lib/commands/sync.js.map +1 -1
  24. package/dist/lib/commands/utilities.js +164 -164
  25. package/dist/lib/commands/verify-anchors.js +6 -6
  26. package/dist/lib/commands/verify-anchors.js.map +1 -1
  27. package/dist/lib/commands/word-tools.js +8 -8
  28. package/dist/lib/grammar.js +3 -3
  29. package/dist/lib/macro-filter.lua +201 -201
  30. package/dist/lib/pdf-comments.js +44 -44
  31. package/dist/lib/plugins.js +57 -57
  32. package/dist/lib/pptx-color-filter.lua +37 -37
  33. package/dist/lib/pptx-themes.js +115 -115
  34. package/dist/lib/sections.d.ts +35 -0
  35. package/dist/lib/sections.d.ts.map +1 -1
  36. package/dist/lib/sections.js +81 -0
  37. package/dist/lib/sections.js.map +1 -1
  38. package/dist/lib/spelling.js +2 -2
  39. package/dist/lib/templates.js +387 -387
  40. package/dist/lib/themes.js +51 -51
  41. package/docs-src/build.py +113 -113
  42. package/docs-src/extra.css +208 -208
  43. package/docs-src/md-to-html.lua +6 -6
  44. package/docs-src/template.html +116 -116
  45. package/eslint.config.js +27 -27
  46. package/lib/anchor-match.ts +276 -308
  47. package/lib/annotations.ts +644 -644
  48. package/lib/build.ts +1766 -1766
  49. package/lib/citations.ts +160 -160
  50. package/lib/commands/build.ts +855 -855
  51. package/lib/commands/citations.ts +515 -515
  52. package/lib/commands/comments.ts +1050 -1050
  53. package/lib/commands/context.ts +176 -174
  54. package/lib/commands/core.ts +309 -309
  55. package/lib/commands/doi.ts +435 -435
  56. package/lib/commands/file-ops.ts +372 -372
  57. package/lib/commands/history.ts +320 -320
  58. package/lib/commands/index.ts +87 -87
  59. package/lib/commands/init.ts +259 -259
  60. package/lib/commands/merge-resolve.ts +378 -378
  61. package/lib/commands/preview.ts +178 -178
  62. package/lib/commands/project-info.ts +244 -244
  63. package/lib/commands/quality.ts +517 -517
  64. package/lib/commands/response.ts +454 -454
  65. package/lib/commands/section-boundaries.ts +82 -82
  66. package/lib/commands/sections.ts +451 -451
  67. package/lib/commands/sync.ts +709 -706
  68. package/lib/commands/text-ops.ts +449 -449
  69. package/lib/commands/utilities.ts +448 -448
  70. package/lib/commands/verify-anchors.ts +272 -272
  71. package/lib/commands/word-tools.ts +340 -340
  72. package/lib/comment-realign.ts +517 -517
  73. package/lib/config.ts +84 -84
  74. package/lib/crossref.ts +781 -781
  75. package/lib/csl.ts +191 -191
  76. package/lib/dependencies.ts +98 -98
  77. package/lib/diff-engine.ts +465 -465
  78. package/lib/doi-cache.ts +115 -115
  79. package/lib/doi.ts +897 -897
  80. package/lib/equations.ts +506 -506
  81. package/lib/errors.ts +346 -346
  82. package/lib/format.ts +541 -541
  83. package/lib/git.ts +326 -326
  84. package/lib/grammar.ts +303 -303
  85. package/lib/image-registry.ts +180 -180
  86. package/lib/import.ts +911 -911
  87. package/lib/journals.ts +543 -543
  88. package/lib/macro-filter.lua +201 -201
  89. package/lib/macros.ts +273 -273
  90. package/lib/merge.ts +633 -633
  91. package/lib/orcid.ts +144 -144
  92. package/lib/pdf-comments.ts +263 -263
  93. package/lib/pdf-import.ts +524 -524
  94. package/lib/plugins.ts +362 -362
  95. package/lib/postprocess.ts +188 -188
  96. package/lib/pptx-color-filter.lua +37 -37
  97. package/lib/pptx-template.ts +469 -469
  98. package/lib/pptx-themes.ts +483 -483
  99. package/lib/protect-restore.ts +520 -520
  100. package/lib/rate-limiter.ts +94 -94
  101. package/lib/response.ts +197 -197
  102. package/lib/restore-references.ts +240 -240
  103. package/lib/review.ts +327 -327
  104. package/lib/schema.ts +488 -488
  105. package/lib/scientific-words.ts +73 -73
  106. package/lib/sections.ts +425 -335
  107. package/lib/slides.ts +756 -756
  108. package/lib/spelling.ts +334 -334
  109. package/lib/templates.ts +526 -526
  110. package/lib/themes.ts +742 -742
  111. package/lib/trackchanges.ts +247 -247
  112. package/lib/tui.ts +450 -450
  113. package/lib/types.ts +550 -550
  114. package/lib/undo.ts +250 -250
  115. package/lib/utils.ts +69 -69
  116. package/lib/variables.ts +179 -179
  117. package/lib/word-extraction.ts +806 -806
  118. package/lib/word.ts +643 -643
  119. package/lib/wordcomments.ts +840 -840
  120. package/mkdocs.yml +64 -64
  121. package/package.json +137 -137
  122. package/scripts/postbuild.js +47 -47
  123. package/skill/REFERENCE.md +539 -539
  124. package/skill/SKILL.md +295 -295
  125. package/tsconfig.json +26 -26
  126. package/types/index.d.ts +525 -525
package/lib/build.ts CHANGED
@@ -1,1766 +1,1766 @@
1
- /**
2
- * Build system - combines sections → paper.md → PDF/DOCX/TEX
3
- *
4
- * Features:
5
- * - Reads rev.yaml config
6
- * - Combines section files into paper.md (persisted)
7
- * - Strips annotations appropriately per output format
8
- * - Runs pandoc with crossref filter
9
- */
10
-
11
- import * as fs from 'fs';
12
- import * as path from 'path';
13
- import { fileURLToPath } from 'url';
14
- import { execSync, spawn, ChildProcess } from 'child_process';
15
- import YAML from 'yaml';
16
- import { stripAnnotations } from './annotations.js';
17
- import { buildRegistry, labelToDisplay, detectDynamicRefs, resolveForwardRefs, resolveSupplementaryRefs } from './crossref.js';
18
- import { processVariables, hasVariables } from './variables.js';
19
- import { processSlideMarkdown, hasSlideSyntax } from './slides.js';
20
- import { generatePptxTemplate, templateNeedsRegeneration, injectMediaIntoPptx, injectSlideNumbers, applyThemeFonts, applyCentering, applyBuildupColors } from './pptx-template.js';
21
- import { getThemePath, getThemeNames, PPTX_THEMES } from './pptx-themes.js';
22
- import { runPostprocess } from './postprocess.js';
23
- import { hasPandoc, hasPandocCrossref, hasLatex } from './dependencies.js';
24
- import { buildImageRegistry, writeImageRegistry } from './image-registry.js';
25
- import type { Author, JournalFormatting } from './types.js';
26
- import { getJournalProfile } from './journals.js';
27
- import { resolveCSL } from './csl.js';
28
- import {
29
- type MacroDef,
30
- mergeMacros,
31
- generateLatexPreamble,
32
- writeMacrosSidecar,
33
- getMacroFilterPath,
34
- } from './macros.js';
35
-
36
- // =============================================================================
37
- // Constants
38
- // =============================================================================
39
-
40
- /** Supported output formats */
41
- const SUPPORTED_FORMATS = ['pdf', 'docx', 'tex', 'beamer', 'pptx'] as const;
42
-
43
- /**
44
- * Maximum length for slugified-title output filenames. Only used when no
45
- * explicit `output:` filename is configured. Long titles are truncated at the
46
- * last `-` boundary at-or-before this length so words stay intact (the old
47
- * blind `.slice(0, 50)` cut mid-word).
48
- */
49
- const MAX_TITLE_FILENAME_LENGTH = 80;
50
-
51
- // =============================================================================
52
- // Interfaces
53
- // =============================================================================
54
-
55
- export interface CrossrefConfig {
56
- figureTitle?: string;
57
- tableTitle?: string;
58
- figPrefix?: string | string[];
59
- tblPrefix?: string | string[];
60
- secPrefix?: string | string[];
61
- linkReferences?: boolean;
62
- }
63
-
64
- export interface PdfConfig {
65
- template?: string | null;
66
- headerIncludes?: string | null;
67
- documentclass?: string;
68
- fontsize?: string;
69
- geometry?: string;
70
- linestretch?: number;
71
- numbersections?: boolean;
72
- toc?: boolean;
73
- /**
74
- * LaTeX engine: pdflatex (default), xelatex, lualatex, tectonic, etc.
75
- * xelatex/lualatex are required for native UTF-8 rendering of Latin-Extended
76
- * diacritics (Czech/Polish/Croatian/Spanish author names, species epithets).
77
- */
78
- engine?: string;
79
- /** Roman/serif main font (xelatex/lualatex only — uses fontspec). */
80
- mainfont?: string;
81
- /** Sans-serif font (xelatex/lualatex only). */
82
- sansfont?: string;
83
- /** Monospace font (xelatex/lualatex only). */
84
- monofont?: string;
85
- /** Extra pandoc args appended for this format (after top-level pandocArgs). */
86
- pandocArgs?: string[];
87
- }
88
-
89
- export interface DocxConfig {
90
- reference?: string | null;
91
- keepComments?: boolean;
92
- affiliationNewline?: boolean;
93
- toc?: boolean;
94
- pandocArgs?: string[];
95
- /**
96
- * Auto-translate the common-shape raw `\begin{figure}...\end{figure}` block
97
- * to portable `![caption](path){#fig:label width=N%}` markdown so figures
98
- * survive the docx build (pandoc otherwise drops raw LaTeX silently).
99
- * Default true. Set false to opt out — blocks then warn and are left alone.
100
- */
101
- translateRawFigures?: boolean;
102
- }
103
-
104
- export interface TexConfig {
105
- standalone?: boolean;
106
- pandocArgs?: string[];
107
- }
108
-
109
- export interface BeamerConfig {
110
- theme?: string;
111
- colortheme?: string | null;
112
- fonttheme?: string | null;
113
- aspectratio?: string | null;
114
- navigation?: string | null;
115
- section?: boolean;
116
- notes?: string | false;
117
- fit_images?: boolean;
118
- pandocArgs?: string[];
119
- }
120
-
121
- export interface PptxConfig {
122
- theme?: string;
123
- reference?: string | null;
124
- media?: string | null;
125
- colors?: {
126
- default?: string;
127
- title?: string;
128
- };
129
- buildup?: {
130
- grey?: string;
131
- accent?: string;
132
- enabled?: boolean;
133
- };
134
- pandocArgs?: string[];
135
- }
136
-
137
- export interface TablesConfig {
138
- nowrap?: string[];
139
- }
140
-
141
- export interface PostprocessConfig {
142
- pdf?: string | null;
143
- docx?: string | null;
144
- tex?: string | null;
145
- pptx?: string | null;
146
- beamer?: string | null;
147
- all?: string | null;
148
- [key: string]: string | null | undefined;
149
- }
150
-
151
- export interface BuildConfig {
152
- title: string;
153
- authors: (string | Author)[];
154
- affiliations: Record<string, string>;
155
- sections: string[];
156
- bibliography: string | null;
157
- csl: string | null;
158
- crossref: CrossrefConfig;
159
- pdf: PdfConfig;
160
- docx: DocxConfig;
161
- tex: TexConfig;
162
- beamer: BeamerConfig;
163
- pptx: PptxConfig;
164
- tables: TablesConfig;
165
- postprocess: PostprocessConfig;
166
- /**
167
- * User-declared placeholder macros. Merged with the built-in macros
168
- * (currently \tofill). Each entry overrides a built-in by name.
169
- *
170
- * See lib/macros.ts for the per-format rendering rules.
171
- */
172
- macros?: MacroDef[];
173
- /**
174
- * Directory (relative to the project) where final outputs land. Created on
175
- * demand. Set to null/empty to keep outputs alongside paper.md (legacy
176
- * behavior).
177
- */
178
- outputDir?: string | null;
179
- /**
180
- * Per-format output filenames. Keys are format names (pdf/docx/tex/beamer/
181
- * pptx); values are paths. Relative paths resolve under outputDir; absolute
182
- * paths are honored as-is. Extension is added if missing. CLI `-o` wins
183
- * over this map.
184
- */
185
- output?: Record<string, string>;
186
- /**
187
- * Extra pandoc args applied to every format. Format-specific args
188
- * (e.g. docx.pandocArgs) are appended *after* these, and CLI --pandoc-arg
189
- * values are appended last.
190
- */
191
- pandocArgs?: string[];
192
- _configPath?: string | null;
193
- }
194
-
195
- export interface BuildResult {
196
- format: string;
197
- success: boolean;
198
- outputPath?: string;
199
- error?: string;
200
- }
201
-
202
- interface BuildOptions {
203
- verbose?: boolean;
204
- config?: BuildConfig;
205
- /**
206
- * Internal: forces the exact output path. Used by dual-mode/temp builds that
207
- * route to specific temp files. Bypasses the `output:` resolver.
208
- */
209
- outputPath?: string;
210
- /**
211
- * CLI override (`-o, --output <path>`). Beats `config.output[format]` but
212
- * loses to `options.outputPath`. Relative paths resolve under outputDir;
213
- * absolute paths bypass outputDir.
214
- */
215
- output?: string;
216
- crossref?: boolean;
217
- /** Extra pandoc args from CLI (--pandoc-arg). Appended after config args. */
218
- pandocArgs?: string[];
219
- _refsAutoInjected?: boolean;
220
- _forwardRefsResolved?: number;
221
- }
222
-
223
- interface CombineOptions extends BuildOptions {
224
- _refsAutoInjected?: boolean;
225
- }
226
-
227
- interface VariablesContext {
228
- sectionContents: string[];
229
- }
230
-
231
- interface PandocResult {
232
- outputPath: string;
233
- success: boolean;
234
- error?: string;
235
- }
236
-
237
- interface FullBuildResult {
238
- results: BuildResult[];
239
- paperPath: string;
240
- warnings: string[];
241
- forwardRefsResolved: number;
242
- refsAutoInjected?: boolean;
243
- }
244
-
245
- interface DynamicRef {
246
- type: string;
247
- label: string;
248
- match: string;
249
- position: number;
250
- }
251
-
252
- interface Registry {
253
- figures: Map<string, unknown>;
254
- tables: Map<string, unknown>;
255
- equations: Map<string, unknown>;
256
- byNumber: {
257
- fig?: Map<number, string>;
258
- figS?: Map<number, string>;
259
- tbl?: Map<number, string>;
260
- tblS?: Map<number, string>;
261
- eq?: Map<number, string>;
262
- };
263
- }
264
-
265
- /**
266
- * Default rev.yaml configuration
267
- */
268
- export const DEFAULT_CONFIG: BuildConfig = {
269
- title: 'Untitled Document',
270
- authors: [],
271
- affiliations: {},
272
- sections: [],
273
- bibliography: null,
274
- csl: null,
275
- crossref: {
276
- figureTitle: 'Figure',
277
- tableTitle: 'Table',
278
- figPrefix: ['Fig.', 'Figs.'],
279
- tblPrefix: ['Table', 'Tables'],
280
- secPrefix: ['Section', 'Sections'],
281
- linkReferences: true,
282
- },
283
- pdf: {
284
- template: null,
285
- documentclass: 'article',
286
- fontsize: '12pt',
287
- geometry: 'margin=1in',
288
- linestretch: 1.5,
289
- numbersections: false,
290
- toc: false,
291
- },
292
- docx: {
293
- reference: null,
294
- keepComments: false,
295
- affiliationNewline: true,
296
- toc: false,
297
- translateRawFigures: true,
298
- },
299
- tex: {
300
- standalone: true,
301
- },
302
- // Slide formats
303
- beamer: {
304
- theme: 'default',
305
- colortheme: null,
306
- fonttheme: null,
307
- aspectratio: null, // '169' for 16:9, '43' for 4:3
308
- navigation: null, // 'horizontal', 'vertical', 'frame', 'empty'
309
- section: true, // section divider slides
310
- notes: 'show', // 'show' (presenter view), 'only' (notes only), 'hide', or false
311
- fit_images: true, // scale images to fit within slide bounds
312
- },
313
- pptx: {
314
- theme: 'default', // Built-in theme: default, dark, academic, minimal, corporate
315
- reference: null, // Custom reference-doc (overrides theme)
316
- media: null, // directory with logo images (e.g., logo-left.png, logo-right.png)
317
- },
318
- // Table formatting
319
- tables: {
320
- nowrap: [], // Column headers to apply nowrap formatting (converts Normal() → $\mathcal{N}()$ etc.)
321
- },
322
- // Postprocess scripts
323
- postprocess: {
324
- pdf: null,
325
- docx: null,
326
- tex: null,
327
- pptx: null,
328
- beamer: null,
329
- all: null, // Runs after any format
330
- },
331
- // Placeholder/highlight macros. Defaults are the built-ins from
332
- // lib/macros.ts; users append their own here.
333
- macros: [],
334
- // Final outputs land here (created on demand). Set to null or '' to keep
335
- // outputs in the project root.
336
- outputDir: 'output',
337
- };
338
-
339
- // =============================================================================
340
- // Public API
341
- // =============================================================================
342
-
343
- /**
344
- * Merge journal formatting defaults into a config.
345
- * Priority: DEFAULT_CONFIG < journal formatting < rev.yaml explicit settings
346
- */
347
- export function mergeJournalFormatting(config: BuildConfig, formatting: JournalFormatting, directory: string): BuildConfig {
348
- const merged = { ...config };
349
-
350
- // CSL: only apply if user hasn't set one
351
- if (formatting.csl && !config.csl) {
352
- const resolved = resolveCSL(formatting.csl, directory);
353
- if (resolved) {
354
- merged.csl = resolved;
355
- }
356
- // If not resolved locally, store the name — pandoc --citeproc
357
- // can sometimes resolve it, and the user can fetch with rev profiles --fetch-csl
358
- if (!resolved) {
359
- merged.csl = formatting.csl;
360
- }
361
- }
362
-
363
- // PDF settings: merge only unset fields
364
- if (formatting.pdf) {
365
- const userPdf = config.pdf || {};
366
- const defaults = DEFAULT_CONFIG.pdf;
367
- merged.pdf = { ...config.pdf };
368
- for (const [key, value] of Object.entries(formatting.pdf)) {
369
- const k = key as keyof PdfConfig;
370
- // Apply journal value only if user config matches the default (i.e., wasn't explicitly set)
371
- if (value !== undefined && JSON.stringify(userPdf[k]) === JSON.stringify(defaults[k])) {
372
- (merged.pdf as Record<string, unknown>)[k] = value;
373
- }
374
- }
375
- }
376
-
377
- // DOCX settings: merge only unset fields
378
- if (formatting.docx) {
379
- const userDocx = config.docx || {};
380
- const defaults = DEFAULT_CONFIG.docx;
381
- merged.docx = { ...config.docx };
382
- for (const [key, value] of Object.entries(formatting.docx)) {
383
- const k = key as keyof DocxConfig;
384
- if (value !== undefined && JSON.stringify(userDocx[k]) === JSON.stringify(defaults[k])) {
385
- (merged.docx as Record<string, unknown>)[k] = value;
386
- }
387
- }
388
- }
389
-
390
- // Crossref settings: merge only unset fields
391
- if (formatting.crossref) {
392
- const userCrossref = config.crossref || {};
393
- const defaults = DEFAULT_CONFIG.crossref;
394
- merged.crossref = { ...config.crossref };
395
- for (const [key, value] of Object.entries(formatting.crossref)) {
396
- const k = key as keyof CrossrefConfig;
397
- if (value !== undefined && JSON.stringify(userCrossref[k]) === JSON.stringify(defaults[k])) {
398
- (merged.crossref as Record<string, unknown>)[k] = value;
399
- }
400
- }
401
- }
402
-
403
- return merged;
404
- }
405
-
406
- /**
407
- * In-place: copy `pandoc-args` → `pandocArgs` on an object (if not already set).
408
- * Idempotent. Coerces a single string into a one-element array.
409
- */
410
- function normalizePandocArgsKey(obj: Record<string, unknown>): void {
411
- if (!obj || typeof obj !== 'object') return;
412
- const hy = obj['pandoc-args'];
413
- if (hy === undefined) return;
414
- if (obj.pandocArgs === undefined) {
415
- obj.pandocArgs = Array.isArray(hy) ? hy : [hy];
416
- }
417
- delete obj['pandoc-args'];
418
- }
419
-
420
- /**
421
- * Load rev.yaml config from directory
422
- * @param directory - Project directory path
423
- * @returns Merged config with defaults
424
- * @throws {TypeError} If directory is not a string
425
- * @throws {Error} If rev.yaml exists but cannot be parsed
426
- */
427
- export function loadConfig(directory: string): BuildConfig {
428
- if (typeof directory !== 'string') {
429
- throw new TypeError(`directory must be a string, got ${typeof directory}`);
430
- }
431
-
432
- const configPath = path.join(directory, 'rev.yaml');
433
-
434
- if (!fs.existsSync(configPath)) {
435
- return { ...DEFAULT_CONFIG, _configPath: null };
436
- }
437
-
438
- try {
439
- const content = fs.readFileSync(configPath, 'utf-8');
440
- const userConfig = YAML.parse(content) || {};
441
-
442
- // Accept hyphenated `pandoc-args` (the form pandoc itself uses) in addition
443
- // to camelCase `pandocArgs`. Hyphenated is what we document; camelCase is
444
- // accepted for users who already prefer that convention.
445
- normalizePandocArgsKey(userConfig);
446
- for (const fmt of ['pdf', 'docx', 'tex', 'beamer', 'pptx'] as const) {
447
- if (userConfig[fmt] && typeof userConfig[fmt] === 'object') {
448
- normalizePandocArgsKey(userConfig[fmt]);
449
- }
450
- }
451
-
452
- // Deep merge with defaults
453
- let config: BuildConfig = {
454
- ...DEFAULT_CONFIG,
455
- ...userConfig,
456
- crossref: { ...DEFAULT_CONFIG.crossref, ...userConfig.crossref },
457
- pdf: { ...DEFAULT_CONFIG.pdf, ...userConfig.pdf },
458
- docx: { ...DEFAULT_CONFIG.docx, ...userConfig.docx },
459
- tex: { ...DEFAULT_CONFIG.tex, ...userConfig.tex },
460
- beamer: { ...DEFAULT_CONFIG.beamer, ...userConfig.beamer },
461
- pptx: { ...DEFAULT_CONFIG.pptx, ...userConfig.pptx },
462
- tables: { ...DEFAULT_CONFIG.tables, ...userConfig.tables },
463
- postprocess: { ...DEFAULT_CONFIG.postprocess, ...userConfig.postprocess },
464
- _configPath: configPath,
465
- };
466
-
467
- // Apply journal formatting defaults (between DEFAULT_CONFIG and user settings)
468
- if (userConfig.journal) {
469
- const profile = getJournalProfile(userConfig.journal);
470
- if (profile?.formatting) {
471
- config = mergeJournalFormatting(config, profile.formatting, directory);
472
- }
473
- }
474
-
475
- return config;
476
- } catch (err) {
477
- const error = err as Error;
478
- throw new Error(`Failed to parse rev.yaml: ${error.message}`);
479
- }
480
- }
481
-
482
- /**
483
- * Find section files in directory
484
- * @param directory - Project directory path
485
- * @param configSections - Sections from rev.yaml (optional)
486
- * @returns Ordered list of section file names
487
- * @throws {TypeError} If directory is not a string
488
- */
489
- export function findSections(directory: string, configSections: string[] = []): string[] {
490
- if (typeof directory !== 'string') {
491
- throw new TypeError(`directory must be a string, got ${typeof directory}`);
492
- }
493
-
494
- // If sections specified in config, use that order
495
- if (configSections.length > 0) {
496
- const sections: string[] = [];
497
- for (const section of configSections) {
498
- const filePath = path.join(directory, section);
499
- if (fs.existsSync(filePath)) {
500
- sections.push(section);
501
- } else {
502
- console.warn(`Warning: Section file not found: ${section}`);
503
- }
504
- }
505
- return sections;
506
- }
507
-
508
- // Try sections.yaml
509
- const sectionsYamlPath = path.join(directory, 'sections.yaml');
510
- if (fs.existsSync(sectionsYamlPath)) {
511
- try {
512
- const sectionsConfig = YAML.parse(fs.readFileSync(sectionsYamlPath, 'utf-8'));
513
- if (sectionsConfig.sections) {
514
- return Object.entries(sectionsConfig.sections)
515
- .sort((a: [string, any], b: [string, any]) => (a[1].order ?? 999) - (b[1].order ?? 999))
516
- .map(([file]) => file)
517
- .filter((f) => fs.existsSync(path.join(directory, f)));
518
- }
519
- } catch (e) {
520
- if (process.env.DEBUG) {
521
- const error = e as Error;
522
- console.warn('build: YAML parse error in sections.yaml:', error.message);
523
- }
524
- }
525
- }
526
-
527
- // Default: find all .md files except special ones
528
- const exclude = ['paper.md', 'readme.md', 'claude.md'];
529
- const files = fs.readdirSync(directory).filter((f) => {
530
- if (!f.endsWith('.md')) return false;
531
- if (exclude.includes(f.toLowerCase())) return false;
532
- return true;
533
- });
534
-
535
- // Sort alphabetically as fallback
536
- return files.sort();
537
- }
538
-
539
- /**
540
- * Combine section files into paper.md
541
- */
542
- export function combineSections(directory: string, config: BuildConfig, options: CombineOptions = {}): string {
543
- const sections = findSections(directory, config.sections);
544
-
545
- if (sections.length === 0) {
546
- throw new Error('No section files found. Create .md files or specify sections in rev.yaml');
547
- }
548
-
549
- const parts: string[] = [];
550
-
551
- // Add YAML frontmatter
552
- const frontmatter = buildFrontmatter(config);
553
- parts.push('---');
554
- parts.push(YAML.stringify(frontmatter).trim());
555
- parts.push('---');
556
- parts.push('');
557
-
558
- // Read all section contents for variable processing
559
- const sectionContents: string[] = [];
560
-
561
- // Check if we need to auto-inject references before supplementary
562
- // Pandoc places refs at the end by default, which breaks when supplementary follows
563
- const hasRefsSection = sections.some(s =>
564
- s.toLowerCase().includes('reference') || s.toLowerCase().includes('refs')
565
- );
566
- const suppIndex = sections.findIndex(s =>
567
- s.toLowerCase().includes('supp') || s.toLowerCase().includes('appendix')
568
- );
569
- const hasBibliography = config.bibliography && fs.existsSync(path.join(directory, config.bibliography));
570
-
571
- // Track if we find an explicit refs div in any section
572
- let hasExplicitRefsDiv = false;
573
-
574
- // Combine sections
575
- for (let i = 0; i < sections.length; i++) {
576
- const section = sections[i];
577
- if (!section) continue;
578
- const filePath = path.join(directory, section);
579
- let content = fs.readFileSync(filePath, 'utf-8');
580
-
581
- // Remove any existing frontmatter from section files
582
- content = stripFrontmatter(content);
583
- sectionContents.push(content);
584
-
585
- // Check if this section has an explicit refs div
586
- if (content.includes('::: {#refs}') || content.includes('::: {#refs}')) {
587
- hasExplicitRefsDiv = true;
588
- }
589
-
590
- // Auto-inject references before supplementary if needed
591
- if (i === suppIndex && hasBibliography && !hasRefsSection && !hasExplicitRefsDiv) {
592
- parts.push('# References\n');
593
- parts.push('::: {#refs}');
594
- parts.push(':::');
595
- parts.push('');
596
- parts.push('');
597
- options._refsAutoInjected = true;
598
- }
599
-
600
- parts.push(content.trim());
601
- parts.push('');
602
- parts.push(''); // Double newline between sections
603
- }
604
-
605
- let paperContent = parts.join('\n');
606
-
607
- // Process template variables if any exist
608
- if (hasVariables(paperContent)) {
609
- paperContent = processVariables(paperContent, config as any, { sectionContents });
610
- }
611
-
612
- // Resolve forward references (refs that appear before their anchor definition)
613
- // This fixes pandoc-crossref limitation with multi-file documents
614
- if (hasPandocCrossref()) {
615
- const registry = buildRegistry(directory, sections);
616
- const { text, resolved } = resolveForwardRefs(paperContent, registry);
617
- if (resolved.length > 0) {
618
- paperContent = text;
619
- // Store resolved count for optional reporting
620
- options._forwardRefsResolved = resolved.length;
621
- }
622
-
623
- // Resolve supplementary references and strip their anchors.
624
- // pandoc-crossref cannot produce "Figure S1" numbering — it numbers all
625
- // figures sequentially. We resolve supplementary refs to plain text and
626
- // remove the {#fig:...} attributes so crossref ignores them.
627
- const supp = resolveSupplementaryRefs(paperContent, registry);
628
- if (supp.resolved.length > 0) {
629
- paperContent = supp.text;
630
- }
631
- }
632
-
633
- const paperPath = path.join(directory, 'paper.md');
634
-
635
- fs.writeFileSync(paperPath, paperContent, 'utf-8');
636
-
637
- return paperPath;
638
- }
639
-
640
- /**
641
- * Build YAML frontmatter from config
642
- */
643
- function buildFrontmatter(config: BuildConfig): Record<string, unknown> {
644
- const fm: Record<string, unknown> = {};
645
-
646
- if (config.title) fm.title = config.title;
647
-
648
- // Skip author in frontmatter when using numbered affiliations —
649
- // the author block is injected separately per format
650
- if (config.authors && config.authors.length > 0 && !hasNumberedAffiliations(config)) {
651
- fm.author = config.authors;
652
- }
653
-
654
- if (config.bibliography) {
655
- fm.bibliography = config.bibliography;
656
- }
657
-
658
- if (config.csl) {
659
- fm.csl = config.csl;
660
- }
661
-
662
- return fm;
663
- }
664
-
665
- /**
666
- * Strip YAML frontmatter from content
667
- */
668
- function stripFrontmatter(content: string): string {
669
- const match = content.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n/);
670
- if (match) {
671
- return content.slice(match[0].length);
672
- }
673
- return content;
674
- }
675
-
676
- /**
677
- * Check if config uses numbered affiliation mode
678
- * (authors have `affiliations` arrays and an affiliations map is defined)
679
- */
680
- function hasNumberedAffiliations(config: BuildConfig): boolean {
681
- if (!config.affiliations || Object.keys(config.affiliations).length === 0) return false;
682
- return config.authors.some(a => typeof a !== 'string' && a.affiliations && a.affiliations.length > 0);
683
- }
684
-
685
- /**
686
- * Generate LaTeX author block using authblk package for numbered superscript affiliations.
687
- * Returns LaTeX code to be injected via header-includes.
688
- */
689
- function generateLatexAuthorBlock(config: BuildConfig): string {
690
- const lines: string[] = [];
691
- lines.push('\\usepackage{authblk}');
692
- lines.push('\\renewcommand\\Authfont{\\normalsize}');
693
- lines.push('\\renewcommand\\Affilfont{\\small}');
694
- lines.push('');
695
-
696
- // Map affiliation keys to numbers
697
- const affiliationKeys = Object.keys(config.affiliations);
698
- const keyToNum = new Map<string, number>();
699
- affiliationKeys.forEach((key, i) => keyToNum.set(key, i + 1));
700
-
701
- // Authors
702
- for (const author of config.authors) {
703
- if (typeof author === 'string') {
704
- lines.push(`\\author{${author}}`);
705
- continue;
706
- }
707
- const marks = (author.affiliations || [])
708
- .map(k => keyToNum.get(k))
709
- .filter((n): n is number => n !== undefined);
710
-
711
- const markStr = marks.length > 0 ? `[${marks.join(',')}]` : '';
712
- let nameStr = author.name;
713
- if (author.corresponding && author.email) {
714
- nameStr += `\\thanks{Corresponding author: ${author.email}}`;
715
- } else if (author.corresponding) {
716
- nameStr += '\\thanks{Corresponding author}';
717
- }
718
- lines.push(`\\author${markStr}{${nameStr}}`);
719
- }
720
-
721
- // Affiliations
722
- for (const [key, text] of Object.entries(config.affiliations)) {
723
- const num = keyToNum.get(key);
724
- if (num !== undefined) {
725
- lines.push(`\\affil[${num}]{${text}}`);
726
- }
727
- }
728
-
729
- return lines.join('\n');
730
- }
731
-
732
- /**
733
- * Generate markdown author block for DOCX output with superscript affiliations.
734
- * Returns markdown text to insert after the YAML frontmatter.
735
- */
736
- function generateMarkdownAuthorBlock(config: BuildConfig): string {
737
- const lines: string[] = [];
738
-
739
- // Map affiliation keys to numbers
740
- const affiliationKeys = Object.keys(config.affiliations);
741
- const keyToNum = new Map<string, number>();
742
- affiliationKeys.forEach((key, i) => keyToNum.set(key, i + 1));
743
-
744
- // Author line: Name^1,2^, Name^3^, ...
745
- const authorParts: string[] = [];
746
- for (const author of config.authors) {
747
- if (typeof author === 'string') {
748
- authorParts.push(author);
749
- continue;
750
- }
751
- const marks = (author.affiliations || [])
752
- .map(k => keyToNum.get(k))
753
- .filter((n): n is number => n !== undefined);
754
- let entry = author.name;
755
- const superParts = marks.map(String);
756
- if (author.corresponding) superParts.push('\\*');
757
- if (superParts.length > 0) {
758
- entry += `^${superParts.join(',')}^`;
759
- }
760
- authorParts.push(entry);
761
- }
762
- lines.push(authorParts.join(', '));
763
- lines.push('');
764
-
765
- // Affiliation lines: ^1^ Department of ...
766
- const affiliationEntries = Object.entries(config.affiliations);
767
- const useLineBreaks = config.docx.affiliationNewline !== false;
768
- affiliationEntries.forEach(([key, text], idx) => {
769
- const num = keyToNum.get(key);
770
- if (num !== undefined) {
771
- const isLast = idx === affiliationEntries.length - 1;
772
- const suffix = useLineBreaks && !isLast ? '\\' : '';
773
- lines.push(`^${num}^ ${text}${suffix}`);
774
- }
775
- });
776
-
777
- // Corresponding author footnote
778
- const corresponding = config.authors.find(a => typeof a !== 'string' && a.corresponding) as Author | undefined;
779
- if (corresponding?.email) {
780
- lines.push('');
781
- lines.push(`^\\*^ Corresponding author: ${corresponding.email}`);
782
- }
783
-
784
- lines.push('');
785
- return lines.join('\n');
786
- }
787
-
788
- /**
789
- * Process markdown tables to apply nowrap formatting to specified columns.
790
- * Converts distribution notation (Normal, Student-t, Gamma) to LaTeX math.
791
- * @param content - Markdown content
792
- * @param tablesConfig - tables config from rev.yaml
793
- * @param format - output format (pdf, docx, etc.)
794
- * @returns processed content
795
- */
796
- export function processTablesForFormat(content: string, tablesConfig: TablesConfig, format: string): string {
797
- // Only process for PDF/TeX output
798
- if (format !== 'pdf' && format !== 'tex') {
799
- return content;
800
- }
801
-
802
- // Check if we have nowrap columns configured
803
- if (!tablesConfig?.nowrap?.length) {
804
- return content;
805
- }
806
-
807
- const nowrapPatterns = tablesConfig.nowrap.map((p) => p.toLowerCase());
808
-
809
- // Match pipe tables: header row, separator row, body rows
810
- // Header: | Col1 | Col2 | Col3 |
811
- // Separator: |:-----|:-----|:-----|
812
- // Body: | val1 | val2 | val3 |
813
- const tableRegex = /^(\|[^\n]+\|\r?\n\|[-:| ]+\|\r?\n)((?:\|[^\n]+\|\r?\n?)+)/gm;
814
-
815
- return content.replace(tableRegex, (match, headerAndSep, body) => {
816
- // Split header from separator
817
- const lines = headerAndSep.split(/\r?\n/);
818
- const headerLine = lines[0] ?? '';
819
-
820
- // Parse header cells to find nowrap column indices
821
- const headerCells = headerLine
822
- .split('|')
823
- .slice(1, -1)
824
- .map((c: string) => c.trim().toLowerCase());
825
-
826
- const nowrapCols: number[] = [];
827
- headerCells.forEach((cell: string, i: number) => {
828
- if (nowrapPatterns.some((p) => cell.includes(p))) {
829
- nowrapCols.push(i);
830
- }
831
- });
832
-
833
- // If no nowrap columns found in this table, return unchanged
834
- if (nowrapCols.length === 0) {
835
- return match;
836
- }
837
-
838
- // Process body rows
839
- const bodyLines = body.split(/\r?\n/).filter((l: string) => l.trim());
840
- const processedBody = bodyLines
841
- .map((row: string) => {
842
- // Split row into cells, keeping the pipe structure
843
- const cells = row.split('|');
844
- // cells[0] is empty (before first |), cells[last] is empty (after last |)
845
-
846
- nowrapCols.forEach((colIdx) => {
847
- const cellIdx = colIdx + 1; // Account for empty first element
848
- if (cells[cellIdx] !== undefined) {
849
- const cellContent = cells[cellIdx].trim();
850
-
851
- // Skip if empty, already math, or already has LaTeX commands
852
- if (!cellContent || cellContent.startsWith('$') || cellContent.startsWith('\\')) {
853
- return;
854
- }
855
-
856
- // Convert distribution notation to LaTeX math
857
- // Order matters: compound names (Half-Normal) must come before simple names (Normal)
858
- let processed = cellContent;
859
-
860
- // Half-Normal(x) → $\text{Half-Normal}(x)$ (must come before Normal)
861
- processed = processed.replace(/Half-Normal\(([^)]+)\)/g, '$\\text{Half-Normal}($1)$');
862
-
863
- // Normal(x, y) → $\mathcal{N}(x, y)$
864
- processed = processed.replace(/Normal\(([^)]+)\)/g, '$\\mathcal{N}($1)$');
865
-
866
- // Student-t(df, loc, scale) → $t_{df}(loc, scale)$
867
- processed = processed.replace(/Student-t\((\d+),\s*([^)]+)\)/g, '$t_{$1}($2)$');
868
-
869
- // Gamma(a, b) → $\text{Gamma}(a, b)$
870
- processed = processed.replace(/Gamma\(([^)]+)\)/g, '$\\text{Gamma}($1)$');
871
-
872
- // Exponential(x) → $\text{Exp}(x)$
873
- processed = processed.replace(/Exponential\(([^)]+)\)/g, '$\\text{Exp}($1)$');
874
-
875
- // Update cell with padding
876
- cells[cellIdx] = ` ${processed} `;
877
- }
878
- });
879
-
880
- return cells.join('|');
881
- })
882
- .join('\n');
883
-
884
- return headerAndSep + processedBody + '\n';
885
- });
886
- }
887
-
888
- /**
889
- * Apply format-specific transforms (table normalization, author blocks,
890
- * crossref display conversion, slide syntax). Caller is responsible for
891
- * stripping annotations beforehand — the dual-output paths keep comments
892
- * in the markdown stream and need to apply these transforms separately
893
- * from annotation handling.
894
- *
895
- * @param content - Markdown content (annotations already stripped as needed)
896
- * @param format - Output format
897
- * @param config - Build config
898
- * @param registry - Crossref registry for the project
899
- * @returns Transformed markdown
900
- */
901
- export function applyFormatTransforms(
902
- content: string,
903
- format: string,
904
- config: BuildConfig,
905
- registry: Registry
906
- ): string {
907
- if (format === 'pdf' || format === 'tex') {
908
- content = processTablesForFormat(content, config.tables, format);
909
-
910
- if (hasNumberedAffiliations(config)) {
911
- const latexBlock = generateLatexAuthorBlock(config);
912
- content = content.replace(/^(---\r?\n[\s\S]*?)(---\r?\n)/, (_match, yamlContent, closing) => {
913
- return `${yamlContent}header-includes: |\n${latexBlock.split('\n').map(l => ' ' + l).join('\n')}\n${closing}`;
914
- });
915
- }
916
- } else if (format === 'docx') {
917
- content = convertDynamicRefsToDisplay(content, registry);
918
-
919
- // Pandoc strips raw LaTeX in docx output. Translate the common
920
- // `\begin{figure}...\end{figure}` shape to portable markdown so figures
921
- // actually appear; exotic blocks are left alone (warned about in build()).
922
- if (config.docx?.translateRawFigures !== false) {
923
- const { translated } = translateRawLatexFigures(content);
924
- content = translated;
925
- }
926
-
927
- if (hasNumberedAffiliations(config)) {
928
- const mdBlock = generateMarkdownAuthorBlock(config);
929
- content = content.replace(/^(---\r?\n[\s\S]*?---\r?\n)/, `$1\n${mdBlock}\n`);
930
- }
931
- } else if (format === 'beamer' || format === 'pptx') {
932
- if (hasSlideSyntax(content)) {
933
- content = processSlideMarkdown(content, format);
934
- }
935
- }
936
-
937
- return content;
938
- }
939
-
940
- /**
941
- * Prepare paper.md for specific output format
942
- */
943
- export function prepareForFormat(
944
- paperPath: string,
945
- format: string,
946
- config: BuildConfig,
947
- _options: BuildOptions = {}
948
- ): string {
949
- const directory = path.dirname(paperPath);
950
- let content = fs.readFileSync(paperPath, 'utf-8');
951
-
952
- // Build crossref registry for reference conversion
953
- // Pass sections from config to ensure correct file ordering
954
- const registry = buildRegistry(directory, config.sections);
955
-
956
- // Strip annotations per format
957
- if (format === 'docx') {
958
- content = stripAnnotations(content, { keepComments: config.docx.keepComments });
959
- } else {
960
- content = stripAnnotations(content);
961
- }
962
-
963
- // Apply shared format transforms
964
- content = applyFormatTransforms(content, format, config, registry);
965
-
966
- // Write to temporary file
967
- const preparedPath = path.join(directory, `.paper-${format}.md`);
968
- fs.writeFileSync(preparedPath, content, 'utf-8');
969
-
970
- return preparedPath;
971
- }
972
-
973
- /**
974
- * Convert @fig:label references to display format (Figure 1)
975
- */
976
- function convertDynamicRefsToDisplay(text: string, registry: Registry): string {
977
- const refs = detectDynamicRefs(text);
978
-
979
- // Process in reverse order to preserve positions
980
- let result = text;
981
- for (let i = refs.length - 1; i >= 0; i--) {
982
- const ref = refs[i];
983
- if (!ref) continue;
984
- const display = labelToDisplay(ref.type, ref.label, registry as any);
985
-
986
- if (display) {
987
- result = result.slice(0, ref.position) + display + result.slice(ref.position + ref.match.length);
988
- }
989
- }
990
-
991
- return result;
992
- }
993
-
994
- // =============================================================================
995
- // Raw LaTeX figure detection / translation (docx)
996
- // =============================================================================
997
-
998
- /**
999
- * A raw LaTeX `\begin{figure}...\end{figure}` block found in source markdown.
1000
- * `exotic` blocks contain features we don't auto-translate (multiple
1001
- * `\includegraphics`, `\subfloat`, `\rotatebox`, unrecognised width units);
1002
- * pandoc strips raw LaTeX silently in docx output, so users get warned about
1003
- * anything that won't be translated.
1004
- */
1005
- export interface RawLatexFigure {
1006
- file?: string;
1007
- line: number;
1008
- block: string;
1009
- exotic: boolean;
1010
- }
1011
-
1012
- /** Match `\begin{figure}` / `\begin{figure*}` … `\end{figure}` blocks. */
1013
- function makeRawFigureRegex(): RegExp {
1014
- return /\\begin\{figure\*?\}(?:\[[^\]]*\])?[\s\S]*?\\end\{figure\*?\}/g;
1015
- }
1016
-
1017
- /**
1018
- * Convert a LaTeX width spec to a markdown image attribute value.
1019
- * - `0.8\textwidth` → `80%`
1020
- * - `\linewidth` → `100%`
1021
- * - `8cm`, `2in`, `12pt` → kept verbatim
1022
- * Returns null for anything we don't translate (block stays "exotic").
1023
- */
1024
- function convertLatexWidth(raw: string): string | null {
1025
- const trimmed = raw.trim();
1026
- // Coefficient × relative length
1027
- const rel = trimmed.match(/^([\d.]+)\s*\\(textwidth|linewidth|columnwidth)$/);
1028
- if (rel) {
1029
- const pct = Math.round(parseFloat(rel[1]!) * 100);
1030
- if (!isFinite(pct) || pct <= 0) return null;
1031
- return `${pct}%`;
1032
- }
1033
- // Bare relative length
1034
- if (/^\\(textwidth|linewidth|columnwidth)$/.test(trimmed)) return '100%';
1035
- // Absolute units
1036
- if (/^[\d.]+\s*(cm|mm|in|pt|px|em|ex)$/.test(trimmed)) return trimmed.replace(/\s+/g, '');
1037
- return null;
1038
- }
1039
-
1040
- /** Extract a balanced `{...}` argument that follows `command` in `text`. */
1041
- function extractBracedArg(text: string, command: string): string | null {
1042
- const idx = text.indexOf(command);
1043
- if (idx === -1) return null;
1044
- let i = idx + command.length;
1045
- while (i < text.length && /\s/.test(text[i]!)) i++;
1046
- if (text[i] !== '{') return null;
1047
- i++;
1048
- const start = i;
1049
- let depth = 1;
1050
- while (i < text.length) {
1051
- const ch = text[i]!;
1052
- if (ch === '\\' && i + 1 < text.length) { i += 2; continue; }
1053
- if (ch === '{') depth++;
1054
- else if (ch === '}') {
1055
- depth--;
1056
- if (depth === 0) return text.slice(start, i);
1057
- }
1058
- i++;
1059
- }
1060
- return null;
1061
- }
1062
-
1063
- /** True if a `\begin{figure}` block contains features we don't auto-translate. */
1064
- function isExoticFigureBlock(block: string): boolean {
1065
- if (/\\subfloat\b/.test(block)) return true;
1066
- if (/\\rotatebox\b/.test(block)) return true;
1067
- const includes = (block.match(/\\includegraphics\b/g) || []).length;
1068
- if (includes !== 1) return true;
1069
- const m = block.match(/\\includegraphics\s*(?:\[([^\]]*)\])?\s*\{([^}]+)\}/);
1070
- if (!m) return true;
1071
- const opts = m[1] || '';
1072
- const widthMatch = opts.match(/(?:^|,)\s*width\s*=\s*([^,]+)/);
1073
- if (widthMatch && !convertLatexWidth(widthMatch[1]!)) return true;
1074
- return false;
1075
- }
1076
-
1077
- /**
1078
- * Find raw LaTeX figure blocks containing `\includegraphics` in markdown.
1079
- * `file`, if given, is attached to each result. `line` is 1-based within the
1080
- * supplied content (the line where `\begin{figure}` sits).
1081
- */
1082
- export function detectRawLatexFigures(content: string, file?: string): RawLatexFigure[] {
1083
- const figures: RawLatexFigure[] = [];
1084
- const re = makeRawFigureRegex();
1085
- let m: RegExpExecArray | null;
1086
- while ((m = re.exec(content)) !== null) {
1087
- const block = m[0];
1088
- if (!block.includes('\\includegraphics')) continue;
1089
- const line = content.slice(0, m.index).split(/\r?\n/).length;
1090
- figures.push({ file, line, block, exotic: isExoticFigureBlock(block) });
1091
- }
1092
- return figures;
1093
- }
1094
-
1095
- /**
1096
- * Translate the 80% case: single `\includegraphics` figure with optional
1097
- * `\caption{...}` and `\label{...}`, wrapped in `\begin{figure}...\end{figure}`,
1098
- * to portable `![caption](path){#fig:label width=N%}` markdown. Exotic blocks
1099
- * (see `isExoticFigureBlock`) are left untouched.
1100
- */
1101
- export function translateRawLatexFigures(content: string): { translated: string; translatedCount: number } {
1102
- let translatedCount = 0;
1103
- const re = makeRawFigureRegex();
1104
- const translated = content.replace(re, (block) => {
1105
- if (!block.includes('\\includegraphics')) return block;
1106
- if (isExoticFigureBlock(block)) return block;
1107
-
1108
- const inc = block.match(/\\includegraphics\s*(?:\[([^\]]*)\])?\s*\{([^}]+)\}/);
1109
- if (!inc) return block;
1110
- const optsStr = inc[1] || '';
1111
- const imgPath = inc[2]!.trim();
1112
-
1113
- let width: string | undefined;
1114
- const widthMatch = optsStr.match(/(?:^|,)\s*width\s*=\s*([^,]+)/);
1115
- if (widthMatch) {
1116
- const w = convertLatexWidth(widthMatch[1]!);
1117
- if (!w) return block; // already filtered by isExoticFigureBlock, defensive
1118
- width = w;
1119
- }
1120
-
1121
- const caption = (extractBracedArg(block, '\\caption') ?? '').trim();
1122
- const labelRaw = extractBracedArg(block, '\\label');
1123
-
1124
- const attrs: string[] = [];
1125
- if (labelRaw) {
1126
- const label = labelRaw.trim();
1127
- const labelWithPrefix = /^[a-z]+:/i.test(label) ? label : `fig:${label}`;
1128
- attrs.push(`#${labelWithPrefix}`);
1129
- }
1130
- if (width) attrs.push(`width=${width}`);
1131
-
1132
- translatedCount++;
1133
- const attrStr = attrs.length > 0 ? ` {${attrs.join(' ')}}` : '';
1134
- return `![${caption}](${imgPath})${attrStr}`;
1135
- });
1136
- return { translated, translatedCount };
1137
- }
1138
-
1139
- /**
1140
- * Format the warning surfaced for raw LaTeX figure blocks that won't render
1141
- * in docx. `translateEnabled` reflects whether auto-translate ran (true = the
1142
- * listed blocks are exotic leftovers; false = no translation was attempted).
1143
- */
1144
- function formatRawLatexFigureWarning(figs: RawLatexFigure[], translateEnabled: boolean): string {
1145
- const reason = translateEnabled ? 'too complex to auto-translate' : 'translateRawFigures: false';
1146
- const lines: string[] = [
1147
- `${figs.length} raw LaTeX figure block(s) won't render in docx (${reason}).`,
1148
- ];
1149
- for (const f of figs) {
1150
- const loc = f.file ? `${f.file}:${f.line}` : `line ${f.line}`;
1151
- const pathMatch = f.block.match(/\\includegraphics\s*(?:\[[^\]]*\])?\s*\{([^}]+)\}/);
1152
- const pathInfo = pathMatch ? ` ${pathMatch[1]!.trim()}` : '';
1153
- lines.push(` ${loc}${pathInfo}`);
1154
- }
1155
- lines.push(' Hint: use ![caption](path){#fig:label width=80%} for format-portable figures,');
1156
- lines.push(' or pass --pandoc-arg=--lua-filter=<your.lua> to translate them yourself.');
1157
- return lines.join('\n');
1158
- }
1159
-
1160
- /**
1161
- * Walk section files and gather a warning for any raw LaTeX figure blocks that
1162
- * won't survive the docx build. Returns null when there's nothing to warn about.
1163
- */
1164
- export function collectRawLatexFigureWarning(directory: string, config: BuildConfig): string | null {
1165
- const translateEnabled = config.docx?.translateRawFigures !== false;
1166
- const all: RawLatexFigure[] = [];
1167
- for (const section of findSections(directory, config.sections)) {
1168
- const sectionPath = path.join(directory, section);
1169
- if (!fs.existsSync(sectionPath)) continue;
1170
- try {
1171
- const content = fs.readFileSync(sectionPath, 'utf-8');
1172
- const figs = detectRawLatexFigures(content, section);
1173
- for (const f of figs) {
1174
- // When auto-translate is on, non-exotic blocks get rewritten cleanly —
1175
- // only the exotic leftovers need warning. When opted out, everything
1176
- // is at risk and we warn about every block.
1177
- if (translateEnabled && !f.exotic) continue;
1178
- all.push(f);
1179
- }
1180
- } catch {
1181
- // ignore unreadable sections
1182
- }
1183
- }
1184
- if (all.length === 0) return null;
1185
- return formatRawLatexFigureWarning(all, translateEnabled);
1186
- }
1187
-
1188
- /**
1189
- * Build pandoc arguments for format.
1190
- *
1191
- * Returns only the built-in args derived from config. Passthrough args
1192
- * (config.pandocArgs, config[format].pandocArgs, CLI --pandoc-arg) are
1193
- * appended later in runPandoc so they win against pptx/crossref defaults
1194
- * added there.
1195
- */
1196
- export function buildPandocArgs(format: string, config: BuildConfig, outputPath: string): string[] {
1197
- const args: string[] = [];
1198
-
1199
- // Output format
1200
- if (format === 'tex') {
1201
- args.push('-t', 'latex');
1202
- if (config.tex.standalone) {
1203
- args.push('-s');
1204
- }
1205
- } else if (format === 'pdf') {
1206
- args.push('-t', 'pdf');
1207
- } else if (format === 'docx') {
1208
- args.push('-t', 'docx');
1209
- } else if (format === 'beamer') {
1210
- args.push('-t', 'beamer');
1211
- } else if (format === 'pptx') {
1212
- args.push('-t', 'pptx');
1213
- }
1214
-
1215
- // Output file. runPandoc sets cwd to the project directory and passes a
1216
- // path relative to that cwd; passing it through here unchanged lets pandoc
1217
- // write to subdirectories like output/<title-slug>.<ext>.
1218
- args.push('-o', outputPath);
1219
-
1220
- // Crossref filter (if available) - skip for slides
1221
- if (hasPandocCrossref() && format !== 'beamer' && format !== 'pptx') {
1222
- args.push('--filter', 'pandoc-crossref');
1223
- }
1224
-
1225
- // Bibliography
1226
- if (config.bibliography) {
1227
- args.push('--citeproc');
1228
- }
1229
-
1230
- // Format-specific options
1231
- if (format === 'pdf') {
1232
- if (config.pdf.template) {
1233
- args.push('--template', config.pdf.template);
1234
- }
1235
- if (config.pdf.engine) {
1236
- args.push(`--pdf-engine=${config.pdf.engine}`);
1237
- }
1238
- if (config.pdf.mainfont) {
1239
- args.push('-V', `mainfont=${config.pdf.mainfont}`);
1240
- }
1241
- if (config.pdf.sansfont) {
1242
- args.push('-V', `sansfont=${config.pdf.sansfont}`);
1243
- }
1244
- if (config.pdf.monofont) {
1245
- args.push('-V', `monofont=${config.pdf.monofont}`);
1246
- }
1247
- args.push('-V', `documentclass=${config.pdf.documentclass}`);
1248
- args.push('-V', `fontsize=${config.pdf.fontsize}`);
1249
- args.push('-V', `geometry:${config.pdf.geometry}`);
1250
- if (config.pdf.headerIncludes) {
1251
- args.push('-H', config.pdf.headerIncludes);
1252
- }
1253
- if (config.pdf.linestretch !== 1) {
1254
- args.push('-V', `linestretch=${config.pdf.linestretch}`);
1255
- }
1256
- if (config.pdf.numbersections) {
1257
- args.push('--number-sections');
1258
- }
1259
- if (config.pdf.toc) {
1260
- args.push('--toc');
1261
- }
1262
- } else if (format === 'docx') {
1263
- if (config.docx.reference) {
1264
- args.push('--reference-doc', config.docx.reference);
1265
- }
1266
- if (config.docx.toc) {
1267
- args.push('--toc');
1268
- }
1269
- } else if (format === 'beamer') {
1270
- // Beamer slide options
1271
- const beamer = config.beamer || {};
1272
- if (beamer.theme) {
1273
- args.push('-V', `theme=${beamer.theme}`);
1274
- }
1275
- if (beamer.colortheme) {
1276
- args.push('-V', `colortheme=${beamer.colortheme}`);
1277
- }
1278
- if (beamer.fonttheme) {
1279
- args.push('-V', `fonttheme=${beamer.fonttheme}`);
1280
- }
1281
- if (beamer.aspectratio) {
1282
- args.push('-V', `aspectratio=${beamer.aspectratio}`);
1283
- }
1284
- if (beamer.navigation) {
1285
- args.push('-V', `navigation=${beamer.navigation}`);
1286
- }
1287
- // Speaker notes - default to 'show' which creates presenter view PDF
1288
- // Options: 'show' (dual screen), 'only' (notes only), 'hide' (no notes), false (disabled)
1289
- const notesMode = beamer.notes !== undefined ? beamer.notes : 'show';
1290
- if (notesMode && notesMode !== 'hide') {
1291
- args.push('-V', `classoption=notes=${notesMode}`);
1292
- }
1293
- // Fit images within slide bounds (default: true)
1294
- if (beamer.fit_images !== false) {
1295
- const fitImagesHeader = `\\makeatletter
1296
- \\def\\maxwidth{\\ifdim\\Gin@nat@width>\\linewidth\\linewidth\\else\\Gin@nat@width\\fi}
1297
- \\def\\maxheight{\\ifdim\\Gin@nat@height>0.75\\textheight 0.75\\textheight\\else\\Gin@nat@height\\fi}
1298
- \\makeatother
1299
- \\setkeys{Gin}{width=\\maxwidth,height=\\maxheight,keepaspectratio}`;
1300
- args.push('-V', `header-includes=${fitImagesHeader}`);
1301
- }
1302
- // Slides need standalone
1303
- args.push('-s');
1304
- } else if (format === 'pptx') {
1305
- // PowerPoint options - handled separately in preparePptxTemplate
1306
- // Reference doc is set by caller after template generation
1307
- }
1308
-
1309
- return args;
1310
- }
1311
-
1312
- /**
1313
- * Collect passthrough pandoc args for a format in the canonical order:
1314
- * top-level config → format-specific config → CLI extras. Later wins for
1315
- * repeated flags.
1316
- */
1317
- export function collectPandocPassthroughArgs(
1318
- format: string,
1319
- config: BuildConfig,
1320
- extraArgs: string[] = []
1321
- ): string[] {
1322
- const out: string[] = [];
1323
- if (config.pandocArgs && config.pandocArgs.length > 0) {
1324
- out.push(...config.pandocArgs);
1325
- }
1326
- const formatConfig = (config as unknown as Record<string, { pandocArgs?: string[] } | undefined>)[format];
1327
- if (formatConfig?.pandocArgs && formatConfig.pandocArgs.length > 0) {
1328
- out.push(...formatConfig.pandocArgs);
1329
- }
1330
- if (extraArgs.length > 0) {
1331
- out.push(...extraArgs);
1332
- }
1333
- return out;
1334
- }
1335
-
1336
- /**
1337
- * Write crossref.yaml if needed
1338
- */
1339
- function ensureCrossrefConfig(directory: string, config: BuildConfig): void {
1340
- const crossrefPath = path.join(directory, 'crossref.yaml');
1341
-
1342
- if (!fs.existsSync(crossrefPath) && hasPandocCrossref()) {
1343
- fs.writeFileSync(crossrefPath, YAML.stringify(config.crossref), 'utf-8');
1344
- }
1345
- }
1346
-
1347
- /**
1348
- * Get install instructions for missing dependency
1349
- */
1350
- function getInstallInstructions(tool: string): string {
1351
- const instructions: Record<string, string> = {
1352
- pandoc: 'https://pandoc.org/installing.html',
1353
- latex: 'https://www.latex-project.org/get/',
1354
- };
1355
- return instructions[tool] || 'Check documentation';
1356
- }
1357
-
1358
- /**
1359
- * Resolve the absolute directory where final outputs should land.
1360
- * Honors config.outputDir; falls back to the project directory when null/empty.
1361
- */
1362
- export function resolveOutputDir(directory: string, config: BuildConfig): string {
1363
- const out = config.outputDir;
1364
- if (!out) return directory;
1365
- return path.isAbsolute(out) ? out : path.join(directory, out);
1366
- }
1367
-
1368
- /** File extension (with leading dot) for each supported pandoc format. */
1369
- const FORMAT_EXTENSIONS: Record<string, string> = {
1370
- tex: '.tex',
1371
- pdf: '.pdf',
1372
- docx: '.docx',
1373
- beamer: '.pdf',
1374
- pptx: '.pptx',
1375
- };
1376
-
1377
- /** Get file extension for a format, defaulting to `.pdf`. */
1378
- export function getFormatExtension(format: string): string {
1379
- return FORMAT_EXTENSIONS[format] ?? '.pdf';
1380
- }
1381
-
1382
- /**
1383
- * Slugify a title for use as a default output filename. Lowercases, replaces
1384
- * non-alphanumeric runs with `-`, and truncates at the last `-` boundary
1385
- * at-or-before MAX_TITLE_FILENAME_LENGTH so words stay whole (the old blind
1386
- * `.slice` cut mid-word).
1387
- */
1388
- export function slugifyTitle(title: string): string {
1389
- if (!title) return 'paper';
1390
- const slug = title.toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-+|-+$/g, '');
1391
- if (!slug) return 'paper';
1392
- if (slug.length <= MAX_TITLE_FILENAME_LENGTH) return slug;
1393
- const cut = slug.slice(0, MAX_TITLE_FILENAME_LENGTH);
1394
- const lastDash = cut.lastIndexOf('-');
1395
- // Only truncate at a hyphen if it leaves a reasonable amount of content.
1396
- // Otherwise hard-cut (handles degenerate titles with no spaces at all).
1397
- if (lastDash >= MAX_TITLE_FILENAME_LENGTH / 2) {
1398
- return slug.slice(0, lastDash);
1399
- }
1400
- return cut;
1401
- }
1402
-
1403
- /**
1404
- * Ensure `name` ends with `ext` (case-insensitive). If the user already supplied
1405
- * the correct extension, return unchanged; if they supplied none or a different
1406
- * one, append the format's canonical extension.
1407
- *
1408
- * Different-extension case (e.g. `output.docx` when building tex): we append
1409
- * rather than replace, since stripping looks like an unsafe guess. The result
1410
- * `output.docx.tex` is loud enough to flag the misconfiguration.
1411
- */
1412
- function ensureExtension(name: string, ext: string): string {
1413
- if (name.toLowerCase().endsWith(ext.toLowerCase())) return name;
1414
- return name + ext;
1415
- }
1416
-
1417
- /**
1418
- * Resolve the final output path for a build.
1419
- *
1420
- * Priority: `options.outputPath` (internal force) > `cliOverride` (-o flag) >
1421
- * `config.output[format]` > slugified title fallback.
1422
- *
1423
- * Relative paths from `cliOverride`/`config.output` resolve under outputDir;
1424
- * absolute paths bypass outputDir. The fallback path always lives under
1425
- * outputDir.
1426
- *
1427
- * @param suffix - Appended before the extension (e.g. "-changes", "-slides").
1428
- * Suppressed when user supplied an explicit name via CLI or
1429
- * config — they pick their own suffix.
1430
- */
1431
- export function resolveOutputPath(
1432
- directory: string,
1433
- config: BuildConfig,
1434
- format: string,
1435
- options: { cliOverride?: string; suffix?: string } = {}
1436
- ): string {
1437
- const { cliOverride, suffix = '' } = options;
1438
- const ext = getFormatExtension(format);
1439
-
1440
- const explicit = cliOverride ?? config.output?.[format];
1441
- if (explicit) {
1442
- const baseDir = path.isAbsolute(explicit)
1443
- ? path.dirname(explicit)
1444
- : resolveOutputDir(directory, config);
1445
- const baseName = path.basename(explicit);
1446
- const stem = baseName.replace(/\.[^./\\]+$/, '');
1447
- return path.join(baseDir, ensureExtension(`${stem}${suffix}`, ext));
1448
- }
1449
-
1450
- const slug = slugifyTitle(config.title);
1451
- return path.join(resolveOutputDir(directory, config), `${slug}${suffix}${ext}`);
1452
- }
1453
-
1454
- /**
1455
- * Run pandoc build
1456
- */
1457
- export async function runPandoc(
1458
- inputPath: string,
1459
- format: string,
1460
- config: BuildConfig,
1461
- options: BuildOptions = {}
1462
- ): Promise<PandocResult> {
1463
- const directory = path.dirname(inputPath);
1464
-
1465
- // outputPath (internal force) wins over the resolver. For beamer, we keep
1466
- // the `-slides` suffix on the slug fallback to distinguish from a regular
1467
- // PDF build; when the user supplies an explicit name, they pick their own.
1468
- const suffix = format === 'beamer' ? '-slides' : '';
1469
- const outputPath = options.outputPath
1470
- ?? resolveOutputPath(directory, config, format, {
1471
- cliOverride: options.output,
1472
- suffix,
1473
- });
1474
-
1475
- if (!options.outputPath) {
1476
- const outDir = path.dirname(outputPath);
1477
- if (!fs.existsSync(outDir)) {
1478
- fs.mkdirSync(outDir, { recursive: true });
1479
- }
1480
- }
1481
-
1482
- // Ensure crossref.yaml exists
1483
- ensureCrossrefConfig(directory, config);
1484
-
1485
- // Pandoc runs with cwd = directory, so pass the output path relative to it.
1486
- const args = buildPandocArgs(format, config, path.relative(directory, outputPath) || path.basename(outputPath));
1487
-
1488
- // Handle PPTX reference template and themes
1489
- let pptxMediaDir: string | null = null;
1490
- if (format === 'pptx') {
1491
- const pptx = config.pptx || {};
1492
-
1493
- // Determine media directory (default: pptx/media or slides/media)
1494
- let mediaDir = pptx.media;
1495
- if (!mediaDir) {
1496
- if (fs.existsSync(path.join(directory, 'pptx', 'media'))) {
1497
- mediaDir = path.join(directory, 'pptx', 'media');
1498
- } else if (fs.existsSync(path.join(directory, 'slides', 'media'))) {
1499
- mediaDir = path.join(directory, 'slides', 'media');
1500
- }
1501
- } else if (!path.isAbsolute(mediaDir)) {
1502
- mediaDir = path.join(directory, mediaDir);
1503
- }
1504
- pptxMediaDir = mediaDir || null;
1505
-
1506
- // Determine reference doc: custom reference overrides theme
1507
- let referenceDoc: string | null = null;
1508
- if (pptx.reference && fs.existsSync(path.join(directory, pptx.reference))) {
1509
- // Custom reference doc takes precedence
1510
- referenceDoc = path.join(directory, pptx.reference);
1511
- } else {
1512
- // Use built-in theme (default: 'default')
1513
- const themeName = pptx.theme || 'default';
1514
- const themePath = getThemePath(themeName);
1515
- if (themePath && fs.existsSync(themePath)) {
1516
- referenceDoc = themePath;
1517
- }
1518
- }
1519
-
1520
- if (referenceDoc) {
1521
- args.push('--reference-doc', referenceDoc);
1522
- }
1523
-
1524
- // Add color filter for PPTX (handles [text]{color=#RRGGBB} syntax).
1525
- // fileURLToPath handles Windows paths with spaces — the old
1526
- // `new URL(...).pathname` returned URL-encoded `%20` and fs.existsSync
1527
- // silently failed.
1528
- const colorFilterPath = path.join(
1529
- path.dirname(fileURLToPath(import.meta.url)),
1530
- 'pptx-color-filter.lua'
1531
- );
1532
- if (fs.existsSync(colorFilterPath)) {
1533
- args.push('--lua-filter', colorFilterPath);
1534
- }
1535
- }
1536
-
1537
- // Wire placeholder macros (built-in \tofill plus user-declared entries).
1538
- // - docx/html: lua filter expands \name{X} to format-specific raw runs.
1539
- // - pdf/tex/beamer: inject a \providecommand preamble so LaTeX renders it
1540
- // directly. `\providecommand` is non-clobbering, so a user who already
1541
- // has `\providecommand{\tofill}{...}` in their own header keeps theirs.
1542
- //
1543
- // Sidecar path is passed to the lua filter via DOCREV_MACROS_FILE in the
1544
- // child env (not pandoc metadata) because pandoc walks RawInline/RawBlock
1545
- // BEFORE Meta — by the time a Meta handler could read the path, the inline
1546
- // expansion has already happened.
1547
- const macroTempFiles: string[] = [];
1548
- let macroEnvFile: string | null = null;
1549
- const macros = mergeMacros((config as { macros?: unknown }).macros);
1550
- if (macros.length > 0) {
1551
- if (format === 'docx' || format === 'html' || format === 'html5' || format === 'html4') {
1552
- const sidecarPath = writeMacrosSidecar(directory, macros);
1553
- macroTempFiles.push(sidecarPath);
1554
- macroEnvFile = sidecarPath;
1555
- const filterPath = getMacroFilterPath();
1556
- if (fs.existsSync(filterPath)) {
1557
- args.push('--lua-filter', filterPath);
1558
- }
1559
- } else if (format === 'pdf' || format === 'tex' || format === 'beamer') {
1560
- const preamble = generateLatexPreamble(macros);
1561
- const preamblePath = path.join(directory, '.macros.tex');
1562
- fs.writeFileSync(preamblePath, preamble, 'utf-8');
1563
- macroTempFiles.push(preamblePath);
1564
- args.push('-H', path.basename(preamblePath));
1565
- }
1566
- }
1567
-
1568
- // Add crossref metadata file if exists (skip for slides - they don't use crossref)
1569
- if (format !== 'beamer' && format !== 'pptx') {
1570
- const crossrefPath = path.join(directory, 'crossref.yaml');
1571
- if (fs.existsSync(crossrefPath) && hasPandocCrossref()) {
1572
- // Use basename since we set cwd to directory
1573
- args.push('--metadata-file', 'crossref.yaml');
1574
- }
1575
- }
1576
-
1577
- // Passthrough args go last so they win against built-in defaults.
1578
- args.push(...collectPandocPassthroughArgs(format, config, options.pandocArgs));
1579
-
1580
- // Input file (use basename since we set cwd to directory)
1581
- args.push(path.basename(inputPath));
1582
-
1583
- if (options.verbose) {
1584
- const quoted = args.map(a => /[\s"'$`]/.test(a) ? `"${a.replace(/"/g, '\\"')}"` : a).join(' ');
1585
- console.error(`[pandoc ${format}] (cwd: ${directory})`);
1586
- console.error(` pandoc ${quoted}`);
1587
- }
1588
-
1589
- return new Promise((resolve) => {
1590
- const pandocEnv: NodeJS.ProcessEnv = { ...process.env };
1591
- if (macroEnvFile) {
1592
- pandocEnv.DOCREV_MACROS_FILE = macroEnvFile;
1593
- }
1594
- const pandoc: ChildProcess = spawn('pandoc', args, {
1595
- cwd: directory,
1596
- stdio: ['ignore', 'pipe', 'pipe'],
1597
- env: pandocEnv,
1598
- });
1599
-
1600
- let stderr = '';
1601
- pandoc.stderr?.on('data', (data) => {
1602
- stderr += data.toString();
1603
- });
1604
-
1605
- const cleanupMacroTempFiles = (): void => {
1606
- for (const tmp of macroTempFiles) {
1607
- try {
1608
- fs.unlinkSync(tmp);
1609
- } catch {
1610
- // ignore — best-effort cleanup
1611
- }
1612
- }
1613
- };
1614
-
1615
- pandoc.on('close', async (code) => {
1616
- cleanupMacroTempFiles();
1617
- if (code === 0) {
1618
- // For PPTX, post-process to add slide numbers, buildup colors, and logos
1619
- if (format === 'pptx') {
1620
- try {
1621
- // Inject slide numbers into content slides only
1622
- await injectSlideNumbers(outputPath);
1623
- } catch (e) {
1624
- // Slide number injection failed but output was created
1625
- }
1626
- try {
1627
- // Apply colors (default text color, title color, buildup greying)
1628
- const pptxConfig = config.pptx || {};
1629
- const colorsConfig = pptxConfig.colors || {};
1630
- const buildupConfig = pptxConfig.buildup || {};
1631
- // Merge colors and buildup config for applyBuildupColors
1632
- const colorConfig = {
1633
- default: colorsConfig.default,
1634
- title: colorsConfig.title,
1635
- grey: buildupConfig.grey,
1636
- accent: buildupConfig.accent,
1637
- enabled: buildupConfig.enabled
1638
- };
1639
- await applyBuildupColors(outputPath, colorConfig);
1640
- } catch (e) {
1641
- // Color application failed but output was created
1642
- }
1643
- // Inject logos into cover slide (if media dir configured)
1644
- if (pptxMediaDir) {
1645
- try {
1646
- await injectMediaIntoPptx(outputPath, pptxMediaDir);
1647
- } catch (e) {
1648
- // Logo injection failed but output was created
1649
- }
1650
- }
1651
- }
1652
-
1653
- // Run user postprocess scripts
1654
- const postResult = await runPostprocess(outputPath, format, config as unknown as Parameters<typeof runPostprocess>[2], options);
1655
- if (!postResult.success && options.verbose) {
1656
- console.error(`Postprocess warning: ${postResult.error}`);
1657
- }
1658
-
1659
- resolve({ outputPath, success: true });
1660
- } else {
1661
- resolve({ outputPath, success: false, error: stderr || `Exit code ${code}` });
1662
- }
1663
- });
1664
-
1665
- pandoc.on('error', (err) => {
1666
- cleanupMacroTempFiles();
1667
- resolve({ outputPath, success: false, error: err.message });
1668
- });
1669
- });
1670
- }
1671
-
1672
- /**
1673
- * Full build pipeline
1674
- */
1675
- export async function build(
1676
- directory: string,
1677
- formats: string[] = ['pdf', 'docx'],
1678
- options: BuildOptions = {}
1679
- ): Promise<FullBuildResult> {
1680
- const warnings: string[] = [];
1681
- let forwardRefsResolved = 0;
1682
-
1683
- // Check pandoc
1684
- if (!hasPandoc()) {
1685
- const instruction = getInstallInstructions('pandoc');
1686
- throw new Error(`Pandoc not found. Install with: ${instruction}\nOr run: rev doctor`);
1687
- }
1688
-
1689
- // Check LaTeX if PDF is requested
1690
- if ((formats.includes('pdf') || formats.includes('all')) && !hasLatex()) {
1691
- warnings.push(`LaTeX not found - PDF generation may fail. Install with: ${getInstallInstructions('latex')}`);
1692
- }
1693
-
1694
- // Check pandoc-crossref
1695
- if (!hasPandocCrossref()) {
1696
- warnings.push('pandoc-crossref not found - figure/table numbering will not work');
1697
- }
1698
-
1699
- // Load config (use passed config if provided, otherwise load from file)
1700
- const config = options.config || loadConfig(directory);
1701
-
1702
- // Combine sections → paper.md
1703
- const buildOptions: CombineOptions = { ...options };
1704
- const paperPath = combineSections(directory, config, buildOptions);
1705
- forwardRefsResolved = buildOptions._forwardRefsResolved || 0;
1706
- const refsAutoInjected = buildOptions._refsAutoInjected || false;
1707
-
1708
- // Expand 'all' to all formats
1709
- if (formats.includes('all')) {
1710
- formats = ['pdf', 'docx', 'tex'];
1711
- }
1712
-
1713
- // Build and save image registry when DOCX is being built
1714
- // This allows import to restore proper image syntax from Word documents
1715
- if (formats.includes('docx')) {
1716
- const paperContent = fs.readFileSync(paperPath, 'utf-8');
1717
- const crossrefReg = buildRegistry(directory, config.sections);
1718
- const imageReg = buildImageRegistry(paperContent, crossrefReg as any);
1719
- if ((imageReg as any).figures?.length > 0) {
1720
- writeImageRegistry(directory, imageReg);
1721
- }
1722
-
1723
- // Warn about raw LaTeX figure blocks that won't render in docx (pandoc
1724
- // drops them silently). With auto-translate on (default), this surfaces
1725
- // only the exotic leftovers; with it off, every block.
1726
- const rawFigWarning = collectRawLatexFigureWarning(directory, config);
1727
- if (rawFigWarning) warnings.push(rawFigWarning);
1728
- }
1729
-
1730
- const results: BuildResult[] = [];
1731
-
1732
- for (const format of formats) {
1733
- // Prepare format-specific version
1734
- const preparedPath = prepareForFormat(paperPath, format, config, options);
1735
-
1736
- // Run pandoc
1737
- const result = await runPandoc(preparedPath, format, config, options);
1738
- results.push({ format, ...result });
1739
-
1740
- // Clean up temp file
1741
- try {
1742
- fs.unlinkSync(preparedPath);
1743
- } catch {
1744
- // Ignore cleanup errors
1745
- }
1746
- }
1747
-
1748
- return { results, paperPath, warnings, forwardRefsResolved, refsAutoInjected };
1749
- }
1750
-
1751
- /**
1752
- * Get build status summary
1753
- */
1754
- export function formatBuildResults(results: BuildResult[]): string {
1755
- const lines: string[] = [];
1756
-
1757
- for (const r of results) {
1758
- if (r.success) {
1759
- lines.push(` ${r.format.toUpperCase()}: ${path.basename(r.outputPath!)}`);
1760
- } else {
1761
- lines.push(` ${r.format.toUpperCase()}: FAILED - ${r.error}`);
1762
- }
1763
- }
1764
-
1765
- return lines.join('\n');
1766
- }
1
+ /**
2
+ * Build system - combines sections → paper.md → PDF/DOCX/TEX
3
+ *
4
+ * Features:
5
+ * - Reads rev.yaml config
6
+ * - Combines section files into paper.md (persisted)
7
+ * - Strips annotations appropriately per output format
8
+ * - Runs pandoc with crossref filter
9
+ */
10
+
11
+ import * as fs from 'fs';
12
+ import * as path from 'path';
13
+ import { fileURLToPath } from 'url';
14
+ import { execSync, spawn, ChildProcess } from 'child_process';
15
+ import YAML from 'yaml';
16
+ import { stripAnnotations } from './annotations.js';
17
+ import { buildRegistry, labelToDisplay, detectDynamicRefs, resolveForwardRefs, resolveSupplementaryRefs } from './crossref.js';
18
+ import { processVariables, hasVariables } from './variables.js';
19
+ import { processSlideMarkdown, hasSlideSyntax } from './slides.js';
20
+ import { generatePptxTemplate, templateNeedsRegeneration, injectMediaIntoPptx, injectSlideNumbers, applyThemeFonts, applyCentering, applyBuildupColors } from './pptx-template.js';
21
+ import { getThemePath, getThemeNames, PPTX_THEMES } from './pptx-themes.js';
22
+ import { runPostprocess } from './postprocess.js';
23
+ import { hasPandoc, hasPandocCrossref, hasLatex } from './dependencies.js';
24
+ import { buildImageRegistry, writeImageRegistry } from './image-registry.js';
25
+ import type { Author, JournalFormatting } from './types.js';
26
+ import { getJournalProfile } from './journals.js';
27
+ import { resolveCSL } from './csl.js';
28
+ import {
29
+ type MacroDef,
30
+ mergeMacros,
31
+ generateLatexPreamble,
32
+ writeMacrosSidecar,
33
+ getMacroFilterPath,
34
+ } from './macros.js';
35
+
36
+ // =============================================================================
37
+ // Constants
38
+ // =============================================================================
39
+
40
+ /** Supported output formats */
41
+ const SUPPORTED_FORMATS = ['pdf', 'docx', 'tex', 'beamer', 'pptx'] as const;
42
+
43
+ /**
44
+ * Maximum length for slugified-title output filenames. Only used when no
45
+ * explicit `output:` filename is configured. Long titles are truncated at the
46
+ * last `-` boundary at-or-before this length so words stay intact (the old
47
+ * blind `.slice(0, 50)` cut mid-word).
48
+ */
49
+ const MAX_TITLE_FILENAME_LENGTH = 80;
50
+
51
+ // =============================================================================
52
+ // Interfaces
53
+ // =============================================================================
54
+
55
+ export interface CrossrefConfig {
56
+ figureTitle?: string;
57
+ tableTitle?: string;
58
+ figPrefix?: string | string[];
59
+ tblPrefix?: string | string[];
60
+ secPrefix?: string | string[];
61
+ linkReferences?: boolean;
62
+ }
63
+
64
+ export interface PdfConfig {
65
+ template?: string | null;
66
+ headerIncludes?: string | null;
67
+ documentclass?: string;
68
+ fontsize?: string;
69
+ geometry?: string;
70
+ linestretch?: number;
71
+ numbersections?: boolean;
72
+ toc?: boolean;
73
+ /**
74
+ * LaTeX engine: pdflatex (default), xelatex, lualatex, tectonic, etc.
75
+ * xelatex/lualatex are required for native UTF-8 rendering of Latin-Extended
76
+ * diacritics (Czech/Polish/Croatian/Spanish author names, species epithets).
77
+ */
78
+ engine?: string;
79
+ /** Roman/serif main font (xelatex/lualatex only — uses fontspec). */
80
+ mainfont?: string;
81
+ /** Sans-serif font (xelatex/lualatex only). */
82
+ sansfont?: string;
83
+ /** Monospace font (xelatex/lualatex only). */
84
+ monofont?: string;
85
+ /** Extra pandoc args appended for this format (after top-level pandocArgs). */
86
+ pandocArgs?: string[];
87
+ }
88
+
89
+ export interface DocxConfig {
90
+ reference?: string | null;
91
+ keepComments?: boolean;
92
+ affiliationNewline?: boolean;
93
+ toc?: boolean;
94
+ pandocArgs?: string[];
95
+ /**
96
+ * Auto-translate the common-shape raw `\begin{figure}...\end{figure}` block
97
+ * to portable `![caption](path){#fig:label width=N%}` markdown so figures
98
+ * survive the docx build (pandoc otherwise drops raw LaTeX silently).
99
+ * Default true. Set false to opt out — blocks then warn and are left alone.
100
+ */
101
+ translateRawFigures?: boolean;
102
+ }
103
+
104
+ export interface TexConfig {
105
+ standalone?: boolean;
106
+ pandocArgs?: string[];
107
+ }
108
+
109
+ export interface BeamerConfig {
110
+ theme?: string;
111
+ colortheme?: string | null;
112
+ fonttheme?: string | null;
113
+ aspectratio?: string | null;
114
+ navigation?: string | null;
115
+ section?: boolean;
116
+ notes?: string | false;
117
+ fit_images?: boolean;
118
+ pandocArgs?: string[];
119
+ }
120
+
121
+ export interface PptxConfig {
122
+ theme?: string;
123
+ reference?: string | null;
124
+ media?: string | null;
125
+ colors?: {
126
+ default?: string;
127
+ title?: string;
128
+ };
129
+ buildup?: {
130
+ grey?: string;
131
+ accent?: string;
132
+ enabled?: boolean;
133
+ };
134
+ pandocArgs?: string[];
135
+ }
136
+
137
+ export interface TablesConfig {
138
+ nowrap?: string[];
139
+ }
140
+
141
+ export interface PostprocessConfig {
142
+ pdf?: string | null;
143
+ docx?: string | null;
144
+ tex?: string | null;
145
+ pptx?: string | null;
146
+ beamer?: string | null;
147
+ all?: string | null;
148
+ [key: string]: string | null | undefined;
149
+ }
150
+
151
+ export interface BuildConfig {
152
+ title: string;
153
+ authors: (string | Author)[];
154
+ affiliations: Record<string, string>;
155
+ sections: string[];
156
+ bibliography: string | null;
157
+ csl: string | null;
158
+ crossref: CrossrefConfig;
159
+ pdf: PdfConfig;
160
+ docx: DocxConfig;
161
+ tex: TexConfig;
162
+ beamer: BeamerConfig;
163
+ pptx: PptxConfig;
164
+ tables: TablesConfig;
165
+ postprocess: PostprocessConfig;
166
+ /**
167
+ * User-declared placeholder macros. Merged with the built-in macros
168
+ * (currently \tofill). Each entry overrides a built-in by name.
169
+ *
170
+ * See lib/macros.ts for the per-format rendering rules.
171
+ */
172
+ macros?: MacroDef[];
173
+ /**
174
+ * Directory (relative to the project) where final outputs land. Created on
175
+ * demand. Set to null/empty to keep outputs alongside paper.md (legacy
176
+ * behavior).
177
+ */
178
+ outputDir?: string | null;
179
+ /**
180
+ * Per-format output filenames. Keys are format names (pdf/docx/tex/beamer/
181
+ * pptx); values are paths. Relative paths resolve under outputDir; absolute
182
+ * paths are honored as-is. Extension is added if missing. CLI `-o` wins
183
+ * over this map.
184
+ */
185
+ output?: Record<string, string>;
186
+ /**
187
+ * Extra pandoc args applied to every format. Format-specific args
188
+ * (e.g. docx.pandocArgs) are appended *after* these, and CLI --pandoc-arg
189
+ * values are appended last.
190
+ */
191
+ pandocArgs?: string[];
192
+ _configPath?: string | null;
193
+ }
194
+
195
+ export interface BuildResult {
196
+ format: string;
197
+ success: boolean;
198
+ outputPath?: string;
199
+ error?: string;
200
+ }
201
+
202
+ interface BuildOptions {
203
+ verbose?: boolean;
204
+ config?: BuildConfig;
205
+ /**
206
+ * Internal: forces the exact output path. Used by dual-mode/temp builds that
207
+ * route to specific temp files. Bypasses the `output:` resolver.
208
+ */
209
+ outputPath?: string;
210
+ /**
211
+ * CLI override (`-o, --output <path>`). Beats `config.output[format]` but
212
+ * loses to `options.outputPath`. Relative paths resolve under outputDir;
213
+ * absolute paths bypass outputDir.
214
+ */
215
+ output?: string;
216
+ crossref?: boolean;
217
+ /** Extra pandoc args from CLI (--pandoc-arg). Appended after config args. */
218
+ pandocArgs?: string[];
219
+ _refsAutoInjected?: boolean;
220
+ _forwardRefsResolved?: number;
221
+ }
222
+
223
+ interface CombineOptions extends BuildOptions {
224
+ _refsAutoInjected?: boolean;
225
+ }
226
+
227
+ interface VariablesContext {
228
+ sectionContents: string[];
229
+ }
230
+
231
+ interface PandocResult {
232
+ outputPath: string;
233
+ success: boolean;
234
+ error?: string;
235
+ }
236
+
237
+ interface FullBuildResult {
238
+ results: BuildResult[];
239
+ paperPath: string;
240
+ warnings: string[];
241
+ forwardRefsResolved: number;
242
+ refsAutoInjected?: boolean;
243
+ }
244
+
245
+ interface DynamicRef {
246
+ type: string;
247
+ label: string;
248
+ match: string;
249
+ position: number;
250
+ }
251
+
252
+ interface Registry {
253
+ figures: Map<string, unknown>;
254
+ tables: Map<string, unknown>;
255
+ equations: Map<string, unknown>;
256
+ byNumber: {
257
+ fig?: Map<number, string>;
258
+ figS?: Map<number, string>;
259
+ tbl?: Map<number, string>;
260
+ tblS?: Map<number, string>;
261
+ eq?: Map<number, string>;
262
+ };
263
+ }
264
+
265
+ /**
266
+ * Default rev.yaml configuration
267
+ */
268
+ export const DEFAULT_CONFIG: BuildConfig = {
269
+ title: 'Untitled Document',
270
+ authors: [],
271
+ affiliations: {},
272
+ sections: [],
273
+ bibliography: null,
274
+ csl: null,
275
+ crossref: {
276
+ figureTitle: 'Figure',
277
+ tableTitle: 'Table',
278
+ figPrefix: ['Fig.', 'Figs.'],
279
+ tblPrefix: ['Table', 'Tables'],
280
+ secPrefix: ['Section', 'Sections'],
281
+ linkReferences: true,
282
+ },
283
+ pdf: {
284
+ template: null,
285
+ documentclass: 'article',
286
+ fontsize: '12pt',
287
+ geometry: 'margin=1in',
288
+ linestretch: 1.5,
289
+ numbersections: false,
290
+ toc: false,
291
+ },
292
+ docx: {
293
+ reference: null,
294
+ keepComments: false,
295
+ affiliationNewline: true,
296
+ toc: false,
297
+ translateRawFigures: true,
298
+ },
299
+ tex: {
300
+ standalone: true,
301
+ },
302
+ // Slide formats
303
+ beamer: {
304
+ theme: 'default',
305
+ colortheme: null,
306
+ fonttheme: null,
307
+ aspectratio: null, // '169' for 16:9, '43' for 4:3
308
+ navigation: null, // 'horizontal', 'vertical', 'frame', 'empty'
309
+ section: true, // section divider slides
310
+ notes: 'show', // 'show' (presenter view), 'only' (notes only), 'hide', or false
311
+ fit_images: true, // scale images to fit within slide bounds
312
+ },
313
+ pptx: {
314
+ theme: 'default', // Built-in theme: default, dark, academic, minimal, corporate
315
+ reference: null, // Custom reference-doc (overrides theme)
316
+ media: null, // directory with logo images (e.g., logo-left.png, logo-right.png)
317
+ },
318
+ // Table formatting
319
+ tables: {
320
+ nowrap: [], // Column headers to apply nowrap formatting (converts Normal() → $\mathcal{N}()$ etc.)
321
+ },
322
+ // Postprocess scripts
323
+ postprocess: {
324
+ pdf: null,
325
+ docx: null,
326
+ tex: null,
327
+ pptx: null,
328
+ beamer: null,
329
+ all: null, // Runs after any format
330
+ },
331
+ // Placeholder/highlight macros. Defaults are the built-ins from
332
+ // lib/macros.ts; users append their own here.
333
+ macros: [],
334
+ // Final outputs land here (created on demand). Set to null or '' to keep
335
+ // outputs in the project root.
336
+ outputDir: 'output',
337
+ };
338
+
339
+ // =============================================================================
340
+ // Public API
341
+ // =============================================================================
342
+
343
+ /**
344
+ * Merge journal formatting defaults into a config.
345
+ * Priority: DEFAULT_CONFIG < journal formatting < rev.yaml explicit settings
346
+ */
347
+ export function mergeJournalFormatting(config: BuildConfig, formatting: JournalFormatting, directory: string): BuildConfig {
348
+ const merged = { ...config };
349
+
350
+ // CSL: only apply if user hasn't set one
351
+ if (formatting.csl && !config.csl) {
352
+ const resolved = resolveCSL(formatting.csl, directory);
353
+ if (resolved) {
354
+ merged.csl = resolved;
355
+ }
356
+ // If not resolved locally, store the name — pandoc --citeproc
357
+ // can sometimes resolve it, and the user can fetch with rev profiles --fetch-csl
358
+ if (!resolved) {
359
+ merged.csl = formatting.csl;
360
+ }
361
+ }
362
+
363
+ // PDF settings: merge only unset fields
364
+ if (formatting.pdf) {
365
+ const userPdf = config.pdf || {};
366
+ const defaults = DEFAULT_CONFIG.pdf;
367
+ merged.pdf = { ...config.pdf };
368
+ for (const [key, value] of Object.entries(formatting.pdf)) {
369
+ const k = key as keyof PdfConfig;
370
+ // Apply journal value only if user config matches the default (i.e., wasn't explicitly set)
371
+ if (value !== undefined && JSON.stringify(userPdf[k]) === JSON.stringify(defaults[k])) {
372
+ (merged.pdf as Record<string, unknown>)[k] = value;
373
+ }
374
+ }
375
+ }
376
+
377
+ // DOCX settings: merge only unset fields
378
+ if (formatting.docx) {
379
+ const userDocx = config.docx || {};
380
+ const defaults = DEFAULT_CONFIG.docx;
381
+ merged.docx = { ...config.docx };
382
+ for (const [key, value] of Object.entries(formatting.docx)) {
383
+ const k = key as keyof DocxConfig;
384
+ if (value !== undefined && JSON.stringify(userDocx[k]) === JSON.stringify(defaults[k])) {
385
+ (merged.docx as Record<string, unknown>)[k] = value;
386
+ }
387
+ }
388
+ }
389
+
390
+ // Crossref settings: merge only unset fields
391
+ if (formatting.crossref) {
392
+ const userCrossref = config.crossref || {};
393
+ const defaults = DEFAULT_CONFIG.crossref;
394
+ merged.crossref = { ...config.crossref };
395
+ for (const [key, value] of Object.entries(formatting.crossref)) {
396
+ const k = key as keyof CrossrefConfig;
397
+ if (value !== undefined && JSON.stringify(userCrossref[k]) === JSON.stringify(defaults[k])) {
398
+ (merged.crossref as Record<string, unknown>)[k] = value;
399
+ }
400
+ }
401
+ }
402
+
403
+ return merged;
404
+ }
405
+
406
+ /**
407
+ * In-place: copy `pandoc-args` → `pandocArgs` on an object (if not already set).
408
+ * Idempotent. Coerces a single string into a one-element array.
409
+ */
410
+ function normalizePandocArgsKey(obj: Record<string, unknown>): void {
411
+ if (!obj || typeof obj !== 'object') return;
412
+ const hy = obj['pandoc-args'];
413
+ if (hy === undefined) return;
414
+ if (obj.pandocArgs === undefined) {
415
+ obj.pandocArgs = Array.isArray(hy) ? hy : [hy];
416
+ }
417
+ delete obj['pandoc-args'];
418
+ }
419
+
420
+ /**
421
+ * Load rev.yaml config from directory
422
+ * @param directory - Project directory path
423
+ * @returns Merged config with defaults
424
+ * @throws {TypeError} If directory is not a string
425
+ * @throws {Error} If rev.yaml exists but cannot be parsed
426
+ */
427
+ export function loadConfig(directory: string): BuildConfig {
428
+ if (typeof directory !== 'string') {
429
+ throw new TypeError(`directory must be a string, got ${typeof directory}`);
430
+ }
431
+
432
+ const configPath = path.join(directory, 'rev.yaml');
433
+
434
+ if (!fs.existsSync(configPath)) {
435
+ return { ...DEFAULT_CONFIG, _configPath: null };
436
+ }
437
+
438
+ try {
439
+ const content = fs.readFileSync(configPath, 'utf-8');
440
+ const userConfig = YAML.parse(content) || {};
441
+
442
+ // Accept hyphenated `pandoc-args` (the form pandoc itself uses) in addition
443
+ // to camelCase `pandocArgs`. Hyphenated is what we document; camelCase is
444
+ // accepted for users who already prefer that convention.
445
+ normalizePandocArgsKey(userConfig);
446
+ for (const fmt of ['pdf', 'docx', 'tex', 'beamer', 'pptx'] as const) {
447
+ if (userConfig[fmt] && typeof userConfig[fmt] === 'object') {
448
+ normalizePandocArgsKey(userConfig[fmt]);
449
+ }
450
+ }
451
+
452
+ // Deep merge with defaults
453
+ let config: BuildConfig = {
454
+ ...DEFAULT_CONFIG,
455
+ ...userConfig,
456
+ crossref: { ...DEFAULT_CONFIG.crossref, ...userConfig.crossref },
457
+ pdf: { ...DEFAULT_CONFIG.pdf, ...userConfig.pdf },
458
+ docx: { ...DEFAULT_CONFIG.docx, ...userConfig.docx },
459
+ tex: { ...DEFAULT_CONFIG.tex, ...userConfig.tex },
460
+ beamer: { ...DEFAULT_CONFIG.beamer, ...userConfig.beamer },
461
+ pptx: { ...DEFAULT_CONFIG.pptx, ...userConfig.pptx },
462
+ tables: { ...DEFAULT_CONFIG.tables, ...userConfig.tables },
463
+ postprocess: { ...DEFAULT_CONFIG.postprocess, ...userConfig.postprocess },
464
+ _configPath: configPath,
465
+ };
466
+
467
+ // Apply journal formatting defaults (between DEFAULT_CONFIG and user settings)
468
+ if (userConfig.journal) {
469
+ const profile = getJournalProfile(userConfig.journal);
470
+ if (profile?.formatting) {
471
+ config = mergeJournalFormatting(config, profile.formatting, directory);
472
+ }
473
+ }
474
+
475
+ return config;
476
+ } catch (err) {
477
+ const error = err as Error;
478
+ throw new Error(`Failed to parse rev.yaml: ${error.message}`);
479
+ }
480
+ }
481
+
482
+ /**
483
+ * Find section files in directory
484
+ * @param directory - Project directory path
485
+ * @param configSections - Sections from rev.yaml (optional)
486
+ * @returns Ordered list of section file names
487
+ * @throws {TypeError} If directory is not a string
488
+ */
489
+ export function findSections(directory: string, configSections: string[] = []): string[] {
490
+ if (typeof directory !== 'string') {
491
+ throw new TypeError(`directory must be a string, got ${typeof directory}`);
492
+ }
493
+
494
+ // If sections specified in config, use that order
495
+ if (configSections.length > 0) {
496
+ const sections: string[] = [];
497
+ for (const section of configSections) {
498
+ const filePath = path.join(directory, section);
499
+ if (fs.existsSync(filePath)) {
500
+ sections.push(section);
501
+ } else {
502
+ console.warn(`Warning: Section file not found: ${section}`);
503
+ }
504
+ }
505
+ return sections;
506
+ }
507
+
508
+ // Try sections.yaml
509
+ const sectionsYamlPath = path.join(directory, 'sections.yaml');
510
+ if (fs.existsSync(sectionsYamlPath)) {
511
+ try {
512
+ const sectionsConfig = YAML.parse(fs.readFileSync(sectionsYamlPath, 'utf-8'));
513
+ if (sectionsConfig.sections) {
514
+ return Object.entries(sectionsConfig.sections)
515
+ .sort((a: [string, any], b: [string, any]) => (a[1].order ?? 999) - (b[1].order ?? 999))
516
+ .map(([file]) => file)
517
+ .filter((f) => fs.existsSync(path.join(directory, f)));
518
+ }
519
+ } catch (e) {
520
+ if (process.env.DEBUG) {
521
+ const error = e as Error;
522
+ console.warn('build: YAML parse error in sections.yaml:', error.message);
523
+ }
524
+ }
525
+ }
526
+
527
+ // Default: find all .md files except special ones
528
+ const exclude = ['paper.md', 'readme.md', 'claude.md'];
529
+ const files = fs.readdirSync(directory).filter((f) => {
530
+ if (!f.endsWith('.md')) return false;
531
+ if (exclude.includes(f.toLowerCase())) return false;
532
+ return true;
533
+ });
534
+
535
+ // Sort alphabetically as fallback
536
+ return files.sort();
537
+ }
538
+
539
+ /**
540
+ * Combine section files into paper.md
541
+ */
542
+ export function combineSections(directory: string, config: BuildConfig, options: CombineOptions = {}): string {
543
+ const sections = findSections(directory, config.sections);
544
+
545
+ if (sections.length === 0) {
546
+ throw new Error('No section files found. Create .md files or specify sections in rev.yaml');
547
+ }
548
+
549
+ const parts: string[] = [];
550
+
551
+ // Add YAML frontmatter
552
+ const frontmatter = buildFrontmatter(config);
553
+ parts.push('---');
554
+ parts.push(YAML.stringify(frontmatter).trim());
555
+ parts.push('---');
556
+ parts.push('');
557
+
558
+ // Read all section contents for variable processing
559
+ const sectionContents: string[] = [];
560
+
561
+ // Check if we need to auto-inject references before supplementary
562
+ // Pandoc places refs at the end by default, which breaks when supplementary follows
563
+ const hasRefsSection = sections.some(s =>
564
+ s.toLowerCase().includes('reference') || s.toLowerCase().includes('refs')
565
+ );
566
+ const suppIndex = sections.findIndex(s =>
567
+ s.toLowerCase().includes('supp') || s.toLowerCase().includes('appendix')
568
+ );
569
+ const hasBibliography = config.bibliography && fs.existsSync(path.join(directory, config.bibliography));
570
+
571
+ // Track if we find an explicit refs div in any section
572
+ let hasExplicitRefsDiv = false;
573
+
574
+ // Combine sections
575
+ for (let i = 0; i < sections.length; i++) {
576
+ const section = sections[i];
577
+ if (!section) continue;
578
+ const filePath = path.join(directory, section);
579
+ let content = fs.readFileSync(filePath, 'utf-8');
580
+
581
+ // Remove any existing frontmatter from section files
582
+ content = stripFrontmatter(content);
583
+ sectionContents.push(content);
584
+
585
+ // Check if this section has an explicit refs div
586
+ if (content.includes('::: {#refs}') || content.includes('::: {#refs}')) {
587
+ hasExplicitRefsDiv = true;
588
+ }
589
+
590
+ // Auto-inject references before supplementary if needed
591
+ if (i === suppIndex && hasBibliography && !hasRefsSection && !hasExplicitRefsDiv) {
592
+ parts.push('# References\n');
593
+ parts.push('::: {#refs}');
594
+ parts.push(':::');
595
+ parts.push('');
596
+ parts.push('');
597
+ options._refsAutoInjected = true;
598
+ }
599
+
600
+ parts.push(content.trim());
601
+ parts.push('');
602
+ parts.push(''); // Double newline between sections
603
+ }
604
+
605
+ let paperContent = parts.join('\n');
606
+
607
+ // Process template variables if any exist
608
+ if (hasVariables(paperContent)) {
609
+ paperContent = processVariables(paperContent, config as any, { sectionContents });
610
+ }
611
+
612
+ // Resolve forward references (refs that appear before their anchor definition)
613
+ // This fixes pandoc-crossref limitation with multi-file documents
614
+ if (hasPandocCrossref()) {
615
+ const registry = buildRegistry(directory, sections);
616
+ const { text, resolved } = resolveForwardRefs(paperContent, registry);
617
+ if (resolved.length > 0) {
618
+ paperContent = text;
619
+ // Store resolved count for optional reporting
620
+ options._forwardRefsResolved = resolved.length;
621
+ }
622
+
623
+ // Resolve supplementary references and strip their anchors.
624
+ // pandoc-crossref cannot produce "Figure S1" numbering — it numbers all
625
+ // figures sequentially. We resolve supplementary refs to plain text and
626
+ // remove the {#fig:...} attributes so crossref ignores them.
627
+ const supp = resolveSupplementaryRefs(paperContent, registry);
628
+ if (supp.resolved.length > 0) {
629
+ paperContent = supp.text;
630
+ }
631
+ }
632
+
633
+ const paperPath = path.join(directory, 'paper.md');
634
+
635
+ fs.writeFileSync(paperPath, paperContent, 'utf-8');
636
+
637
+ return paperPath;
638
+ }
639
+
640
+ /**
641
+ * Build YAML frontmatter from config
642
+ */
643
+ function buildFrontmatter(config: BuildConfig): Record<string, unknown> {
644
+ const fm: Record<string, unknown> = {};
645
+
646
+ if (config.title) fm.title = config.title;
647
+
648
+ // Skip author in frontmatter when using numbered affiliations —
649
+ // the author block is injected separately per format
650
+ if (config.authors && config.authors.length > 0 && !hasNumberedAffiliations(config)) {
651
+ fm.author = config.authors;
652
+ }
653
+
654
+ if (config.bibliography) {
655
+ fm.bibliography = config.bibliography;
656
+ }
657
+
658
+ if (config.csl) {
659
+ fm.csl = config.csl;
660
+ }
661
+
662
+ return fm;
663
+ }
664
+
665
+ /**
666
+ * Strip YAML frontmatter from content
667
+ */
668
+ function stripFrontmatter(content: string): string {
669
+ const match = content.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n/);
670
+ if (match) {
671
+ return content.slice(match[0].length);
672
+ }
673
+ return content;
674
+ }
675
+
676
+ /**
677
+ * Check if config uses numbered affiliation mode
678
+ * (authors have `affiliations` arrays and an affiliations map is defined)
679
+ */
680
+ function hasNumberedAffiliations(config: BuildConfig): boolean {
681
+ if (!config.affiliations || Object.keys(config.affiliations).length === 0) return false;
682
+ return config.authors.some(a => typeof a !== 'string' && a.affiliations && a.affiliations.length > 0);
683
+ }
684
+
685
+ /**
686
+ * Generate LaTeX author block using authblk package for numbered superscript affiliations.
687
+ * Returns LaTeX code to be injected via header-includes.
688
+ */
689
+ function generateLatexAuthorBlock(config: BuildConfig): string {
690
+ const lines: string[] = [];
691
+ lines.push('\\usepackage{authblk}');
692
+ lines.push('\\renewcommand\\Authfont{\\normalsize}');
693
+ lines.push('\\renewcommand\\Affilfont{\\small}');
694
+ lines.push('');
695
+
696
+ // Map affiliation keys to numbers
697
+ const affiliationKeys = Object.keys(config.affiliations);
698
+ const keyToNum = new Map<string, number>();
699
+ affiliationKeys.forEach((key, i) => keyToNum.set(key, i + 1));
700
+
701
+ // Authors
702
+ for (const author of config.authors) {
703
+ if (typeof author === 'string') {
704
+ lines.push(`\\author{${author}}`);
705
+ continue;
706
+ }
707
+ const marks = (author.affiliations || [])
708
+ .map(k => keyToNum.get(k))
709
+ .filter((n): n is number => n !== undefined);
710
+
711
+ const markStr = marks.length > 0 ? `[${marks.join(',')}]` : '';
712
+ let nameStr = author.name;
713
+ if (author.corresponding && author.email) {
714
+ nameStr += `\\thanks{Corresponding author: ${author.email}}`;
715
+ } else if (author.corresponding) {
716
+ nameStr += '\\thanks{Corresponding author}';
717
+ }
718
+ lines.push(`\\author${markStr}{${nameStr}}`);
719
+ }
720
+
721
+ // Affiliations
722
+ for (const [key, text] of Object.entries(config.affiliations)) {
723
+ const num = keyToNum.get(key);
724
+ if (num !== undefined) {
725
+ lines.push(`\\affil[${num}]{${text}}`);
726
+ }
727
+ }
728
+
729
+ return lines.join('\n');
730
+ }
731
+
732
+ /**
733
+ * Generate markdown author block for DOCX output with superscript affiliations.
734
+ * Returns markdown text to insert after the YAML frontmatter.
735
+ */
736
+ function generateMarkdownAuthorBlock(config: BuildConfig): string {
737
+ const lines: string[] = [];
738
+
739
+ // Map affiliation keys to numbers
740
+ const affiliationKeys = Object.keys(config.affiliations);
741
+ const keyToNum = new Map<string, number>();
742
+ affiliationKeys.forEach((key, i) => keyToNum.set(key, i + 1));
743
+
744
+ // Author line: Name^1,2^, Name^3^, ...
745
+ const authorParts: string[] = [];
746
+ for (const author of config.authors) {
747
+ if (typeof author === 'string') {
748
+ authorParts.push(author);
749
+ continue;
750
+ }
751
+ const marks = (author.affiliations || [])
752
+ .map(k => keyToNum.get(k))
753
+ .filter((n): n is number => n !== undefined);
754
+ let entry = author.name;
755
+ const superParts = marks.map(String);
756
+ if (author.corresponding) superParts.push('\\*');
757
+ if (superParts.length > 0) {
758
+ entry += `^${superParts.join(',')}^`;
759
+ }
760
+ authorParts.push(entry);
761
+ }
762
+ lines.push(authorParts.join(', '));
763
+ lines.push('');
764
+
765
+ // Affiliation lines: ^1^ Department of ...
766
+ const affiliationEntries = Object.entries(config.affiliations);
767
+ const useLineBreaks = config.docx.affiliationNewline !== false;
768
+ affiliationEntries.forEach(([key, text], idx) => {
769
+ const num = keyToNum.get(key);
770
+ if (num !== undefined) {
771
+ const isLast = idx === affiliationEntries.length - 1;
772
+ const suffix = useLineBreaks && !isLast ? '\\' : '';
773
+ lines.push(`^${num}^ ${text}${suffix}`);
774
+ }
775
+ });
776
+
777
+ // Corresponding author footnote
778
+ const corresponding = config.authors.find(a => typeof a !== 'string' && a.corresponding) as Author | undefined;
779
+ if (corresponding?.email) {
780
+ lines.push('');
781
+ lines.push(`^\\*^ Corresponding author: ${corresponding.email}`);
782
+ }
783
+
784
+ lines.push('');
785
+ return lines.join('\n');
786
+ }
787
+
788
+ /**
789
+ * Process markdown tables to apply nowrap formatting to specified columns.
790
+ * Converts distribution notation (Normal, Student-t, Gamma) to LaTeX math.
791
+ * @param content - Markdown content
792
+ * @param tablesConfig - tables config from rev.yaml
793
+ * @param format - output format (pdf, docx, etc.)
794
+ * @returns processed content
795
+ */
796
+ export function processTablesForFormat(content: string, tablesConfig: TablesConfig, format: string): string {
797
+ // Only process for PDF/TeX output
798
+ if (format !== 'pdf' && format !== 'tex') {
799
+ return content;
800
+ }
801
+
802
+ // Check if we have nowrap columns configured
803
+ if (!tablesConfig?.nowrap?.length) {
804
+ return content;
805
+ }
806
+
807
+ const nowrapPatterns = tablesConfig.nowrap.map((p) => p.toLowerCase());
808
+
809
+ // Match pipe tables: header row, separator row, body rows
810
+ // Header: | Col1 | Col2 | Col3 |
811
+ // Separator: |:-----|:-----|:-----|
812
+ // Body: | val1 | val2 | val3 |
813
+ const tableRegex = /^(\|[^\n]+\|\r?\n\|[-:| ]+\|\r?\n)((?:\|[^\n]+\|\r?\n?)+)/gm;
814
+
815
+ return content.replace(tableRegex, (match, headerAndSep, body) => {
816
+ // Split header from separator
817
+ const lines = headerAndSep.split(/\r?\n/);
818
+ const headerLine = lines[0] ?? '';
819
+
820
+ // Parse header cells to find nowrap column indices
821
+ const headerCells = headerLine
822
+ .split('|')
823
+ .slice(1, -1)
824
+ .map((c: string) => c.trim().toLowerCase());
825
+
826
+ const nowrapCols: number[] = [];
827
+ headerCells.forEach((cell: string, i: number) => {
828
+ if (nowrapPatterns.some((p) => cell.includes(p))) {
829
+ nowrapCols.push(i);
830
+ }
831
+ });
832
+
833
+ // If no nowrap columns found in this table, return unchanged
834
+ if (nowrapCols.length === 0) {
835
+ return match;
836
+ }
837
+
838
+ // Process body rows
839
+ const bodyLines = body.split(/\r?\n/).filter((l: string) => l.trim());
840
+ const processedBody = bodyLines
841
+ .map((row: string) => {
842
+ // Split row into cells, keeping the pipe structure
843
+ const cells = row.split('|');
844
+ // cells[0] is empty (before first |), cells[last] is empty (after last |)
845
+
846
+ nowrapCols.forEach((colIdx) => {
847
+ const cellIdx = colIdx + 1; // Account for empty first element
848
+ if (cells[cellIdx] !== undefined) {
849
+ const cellContent = cells[cellIdx].trim();
850
+
851
+ // Skip if empty, already math, or already has LaTeX commands
852
+ if (!cellContent || cellContent.startsWith('$') || cellContent.startsWith('\\')) {
853
+ return;
854
+ }
855
+
856
+ // Convert distribution notation to LaTeX math
857
+ // Order matters: compound names (Half-Normal) must come before simple names (Normal)
858
+ let processed = cellContent;
859
+
860
+ // Half-Normal(x) → $\text{Half-Normal}(x)$ (must come before Normal)
861
+ processed = processed.replace(/Half-Normal\(([^)]+)\)/g, '$\\text{Half-Normal}($1)$');
862
+
863
+ // Normal(x, y) → $\mathcal{N}(x, y)$
864
+ processed = processed.replace(/Normal\(([^)]+)\)/g, '$\\mathcal{N}($1)$');
865
+
866
+ // Student-t(df, loc, scale) → $t_{df}(loc, scale)$
867
+ processed = processed.replace(/Student-t\((\d+),\s*([^)]+)\)/g, '$t_{$1}($2)$');
868
+
869
+ // Gamma(a, b) → $\text{Gamma}(a, b)$
870
+ processed = processed.replace(/Gamma\(([^)]+)\)/g, '$\\text{Gamma}($1)$');
871
+
872
+ // Exponential(x) → $\text{Exp}(x)$
873
+ processed = processed.replace(/Exponential\(([^)]+)\)/g, '$\\text{Exp}($1)$');
874
+
875
+ // Update cell with padding
876
+ cells[cellIdx] = ` ${processed} `;
877
+ }
878
+ });
879
+
880
+ return cells.join('|');
881
+ })
882
+ .join('\n');
883
+
884
+ return headerAndSep + processedBody + '\n';
885
+ });
886
+ }
887
+
888
+ /**
889
+ * Apply format-specific transforms (table normalization, author blocks,
890
+ * crossref display conversion, slide syntax). Caller is responsible for
891
+ * stripping annotations beforehand — the dual-output paths keep comments
892
+ * in the markdown stream and need to apply these transforms separately
893
+ * from annotation handling.
894
+ *
895
+ * @param content - Markdown content (annotations already stripped as needed)
896
+ * @param format - Output format
897
+ * @param config - Build config
898
+ * @param registry - Crossref registry for the project
899
+ * @returns Transformed markdown
900
+ */
901
+ export function applyFormatTransforms(
902
+ content: string,
903
+ format: string,
904
+ config: BuildConfig,
905
+ registry: Registry
906
+ ): string {
907
+ if (format === 'pdf' || format === 'tex') {
908
+ content = processTablesForFormat(content, config.tables, format);
909
+
910
+ if (hasNumberedAffiliations(config)) {
911
+ const latexBlock = generateLatexAuthorBlock(config);
912
+ content = content.replace(/^(---\r?\n[\s\S]*?)(---\r?\n)/, (_match, yamlContent, closing) => {
913
+ return `${yamlContent}header-includes: |\n${latexBlock.split('\n').map(l => ' ' + l).join('\n')}\n${closing}`;
914
+ });
915
+ }
916
+ } else if (format === 'docx') {
917
+ content = convertDynamicRefsToDisplay(content, registry);
918
+
919
+ // Pandoc strips raw LaTeX in docx output. Translate the common
920
+ // `\begin{figure}...\end{figure}` shape to portable markdown so figures
921
+ // actually appear; exotic blocks are left alone (warned about in build()).
922
+ if (config.docx?.translateRawFigures !== false) {
923
+ const { translated } = translateRawLatexFigures(content);
924
+ content = translated;
925
+ }
926
+
927
+ if (hasNumberedAffiliations(config)) {
928
+ const mdBlock = generateMarkdownAuthorBlock(config);
929
+ content = content.replace(/^(---\r?\n[\s\S]*?---\r?\n)/, `$1\n${mdBlock}\n`);
930
+ }
931
+ } else if (format === 'beamer' || format === 'pptx') {
932
+ if (hasSlideSyntax(content)) {
933
+ content = processSlideMarkdown(content, format);
934
+ }
935
+ }
936
+
937
+ return content;
938
+ }
939
+
940
+ /**
941
+ * Prepare paper.md for specific output format
942
+ */
943
+ export function prepareForFormat(
944
+ paperPath: string,
945
+ format: string,
946
+ config: BuildConfig,
947
+ _options: BuildOptions = {}
948
+ ): string {
949
+ const directory = path.dirname(paperPath);
950
+ let content = fs.readFileSync(paperPath, 'utf-8');
951
+
952
+ // Build crossref registry for reference conversion
953
+ // Pass sections from config to ensure correct file ordering
954
+ const registry = buildRegistry(directory, config.sections);
955
+
956
+ // Strip annotations per format
957
+ if (format === 'docx') {
958
+ content = stripAnnotations(content, { keepComments: config.docx.keepComments });
959
+ } else {
960
+ content = stripAnnotations(content);
961
+ }
962
+
963
+ // Apply shared format transforms
964
+ content = applyFormatTransforms(content, format, config, registry);
965
+
966
+ // Write to temporary file
967
+ const preparedPath = path.join(directory, `.paper-${format}.md`);
968
+ fs.writeFileSync(preparedPath, content, 'utf-8');
969
+
970
+ return preparedPath;
971
+ }
972
+
973
+ /**
974
+ * Convert @fig:label references to display format (Figure 1)
975
+ */
976
+ function convertDynamicRefsToDisplay(text: string, registry: Registry): string {
977
+ const refs = detectDynamicRefs(text);
978
+
979
+ // Process in reverse order to preserve positions
980
+ let result = text;
981
+ for (let i = refs.length - 1; i >= 0; i--) {
982
+ const ref = refs[i];
983
+ if (!ref) continue;
984
+ const display = labelToDisplay(ref.type, ref.label, registry as any);
985
+
986
+ if (display) {
987
+ result = result.slice(0, ref.position) + display + result.slice(ref.position + ref.match.length);
988
+ }
989
+ }
990
+
991
+ return result;
992
+ }
993
+
994
+ // =============================================================================
995
+ // Raw LaTeX figure detection / translation (docx)
996
+ // =============================================================================
997
+
998
+ /**
999
+ * A raw LaTeX `\begin{figure}...\end{figure}` block found in source markdown.
1000
+ * `exotic` blocks contain features we don't auto-translate (multiple
1001
+ * `\includegraphics`, `\subfloat`, `\rotatebox`, unrecognised width units);
1002
+ * pandoc strips raw LaTeX silently in docx output, so users get warned about
1003
+ * anything that won't be translated.
1004
+ */
1005
+ export interface RawLatexFigure {
1006
+ file?: string;
1007
+ line: number;
1008
+ block: string;
1009
+ exotic: boolean;
1010
+ }
1011
+
1012
+ /** Match `\begin{figure}` / `\begin{figure*}` … `\end{figure}` blocks. */
1013
+ function makeRawFigureRegex(): RegExp {
1014
+ return /\\begin\{figure\*?\}(?:\[[^\]]*\])?[\s\S]*?\\end\{figure\*?\}/g;
1015
+ }
1016
+
1017
+ /**
1018
+ * Convert a LaTeX width spec to a markdown image attribute value.
1019
+ * - `0.8\textwidth` → `80%`
1020
+ * - `\linewidth` → `100%`
1021
+ * - `8cm`, `2in`, `12pt` → kept verbatim
1022
+ * Returns null for anything we don't translate (block stays "exotic").
1023
+ */
1024
+ function convertLatexWidth(raw: string): string | null {
1025
+ const trimmed = raw.trim();
1026
+ // Coefficient × relative length
1027
+ const rel = trimmed.match(/^([\d.]+)\s*\\(textwidth|linewidth|columnwidth)$/);
1028
+ if (rel) {
1029
+ const pct = Math.round(parseFloat(rel[1]!) * 100);
1030
+ if (!isFinite(pct) || pct <= 0) return null;
1031
+ return `${pct}%`;
1032
+ }
1033
+ // Bare relative length
1034
+ if (/^\\(textwidth|linewidth|columnwidth)$/.test(trimmed)) return '100%';
1035
+ // Absolute units
1036
+ if (/^[\d.]+\s*(cm|mm|in|pt|px|em|ex)$/.test(trimmed)) return trimmed.replace(/\s+/g, '');
1037
+ return null;
1038
+ }
1039
+
1040
+ /** Extract a balanced `{...}` argument that follows `command` in `text`. */
1041
+ function extractBracedArg(text: string, command: string): string | null {
1042
+ const idx = text.indexOf(command);
1043
+ if (idx === -1) return null;
1044
+ let i = idx + command.length;
1045
+ while (i < text.length && /\s/.test(text[i]!)) i++;
1046
+ if (text[i] !== '{') return null;
1047
+ i++;
1048
+ const start = i;
1049
+ let depth = 1;
1050
+ while (i < text.length) {
1051
+ const ch = text[i]!;
1052
+ if (ch === '\\' && i + 1 < text.length) { i += 2; continue; }
1053
+ if (ch === '{') depth++;
1054
+ else if (ch === '}') {
1055
+ depth--;
1056
+ if (depth === 0) return text.slice(start, i);
1057
+ }
1058
+ i++;
1059
+ }
1060
+ return null;
1061
+ }
1062
+
1063
+ /** True if a `\begin{figure}` block contains features we don't auto-translate. */
1064
+ function isExoticFigureBlock(block: string): boolean {
1065
+ if (/\\subfloat\b/.test(block)) return true;
1066
+ if (/\\rotatebox\b/.test(block)) return true;
1067
+ const includes = (block.match(/\\includegraphics\b/g) || []).length;
1068
+ if (includes !== 1) return true;
1069
+ const m = block.match(/\\includegraphics\s*(?:\[([^\]]*)\])?\s*\{([^}]+)\}/);
1070
+ if (!m) return true;
1071
+ const opts = m[1] || '';
1072
+ const widthMatch = opts.match(/(?:^|,)\s*width\s*=\s*([^,]+)/);
1073
+ if (widthMatch && !convertLatexWidth(widthMatch[1]!)) return true;
1074
+ return false;
1075
+ }
1076
+
1077
+ /**
1078
+ * Find raw LaTeX figure blocks containing `\includegraphics` in markdown.
1079
+ * `file`, if given, is attached to each result. `line` is 1-based within the
1080
+ * supplied content (the line where `\begin{figure}` sits).
1081
+ */
1082
+ export function detectRawLatexFigures(content: string, file?: string): RawLatexFigure[] {
1083
+ const figures: RawLatexFigure[] = [];
1084
+ const re = makeRawFigureRegex();
1085
+ let m: RegExpExecArray | null;
1086
+ while ((m = re.exec(content)) !== null) {
1087
+ const block = m[0];
1088
+ if (!block.includes('\\includegraphics')) continue;
1089
+ const line = content.slice(0, m.index).split(/\r?\n/).length;
1090
+ figures.push({ file, line, block, exotic: isExoticFigureBlock(block) });
1091
+ }
1092
+ return figures;
1093
+ }
1094
+
1095
+ /**
1096
+ * Translate the 80% case: single `\includegraphics` figure with optional
1097
+ * `\caption{...}` and `\label{...}`, wrapped in `\begin{figure}...\end{figure}`,
1098
+ * to portable `![caption](path){#fig:label width=N%}` markdown. Exotic blocks
1099
+ * (see `isExoticFigureBlock`) are left untouched.
1100
+ */
1101
+ export function translateRawLatexFigures(content: string): { translated: string; translatedCount: number } {
1102
+ let translatedCount = 0;
1103
+ const re = makeRawFigureRegex();
1104
+ const translated = content.replace(re, (block) => {
1105
+ if (!block.includes('\\includegraphics')) return block;
1106
+ if (isExoticFigureBlock(block)) return block;
1107
+
1108
+ const inc = block.match(/\\includegraphics\s*(?:\[([^\]]*)\])?\s*\{([^}]+)\}/);
1109
+ if (!inc) return block;
1110
+ const optsStr = inc[1] || '';
1111
+ const imgPath = inc[2]!.trim();
1112
+
1113
+ let width: string | undefined;
1114
+ const widthMatch = optsStr.match(/(?:^|,)\s*width\s*=\s*([^,]+)/);
1115
+ if (widthMatch) {
1116
+ const w = convertLatexWidth(widthMatch[1]!);
1117
+ if (!w) return block; // already filtered by isExoticFigureBlock, defensive
1118
+ width = w;
1119
+ }
1120
+
1121
+ const caption = (extractBracedArg(block, '\\caption') ?? '').trim();
1122
+ const labelRaw = extractBracedArg(block, '\\label');
1123
+
1124
+ const attrs: string[] = [];
1125
+ if (labelRaw) {
1126
+ const label = labelRaw.trim();
1127
+ const labelWithPrefix = /^[a-z]+:/i.test(label) ? label : `fig:${label}`;
1128
+ attrs.push(`#${labelWithPrefix}`);
1129
+ }
1130
+ if (width) attrs.push(`width=${width}`);
1131
+
1132
+ translatedCount++;
1133
+ const attrStr = attrs.length > 0 ? ` {${attrs.join(' ')}}` : '';
1134
+ return `![${caption}](${imgPath})${attrStr}`;
1135
+ });
1136
+ return { translated, translatedCount };
1137
+ }
1138
+
1139
+ /**
1140
+ * Format the warning surfaced for raw LaTeX figure blocks that won't render
1141
+ * in docx. `translateEnabled` reflects whether auto-translate ran (true = the
1142
+ * listed blocks are exotic leftovers; false = no translation was attempted).
1143
+ */
1144
+ function formatRawLatexFigureWarning(figs: RawLatexFigure[], translateEnabled: boolean): string {
1145
+ const reason = translateEnabled ? 'too complex to auto-translate' : 'translateRawFigures: false';
1146
+ const lines: string[] = [
1147
+ `${figs.length} raw LaTeX figure block(s) won't render in docx (${reason}).`,
1148
+ ];
1149
+ for (const f of figs) {
1150
+ const loc = f.file ? `${f.file}:${f.line}` : `line ${f.line}`;
1151
+ const pathMatch = f.block.match(/\\includegraphics\s*(?:\[[^\]]*\])?\s*\{([^}]+)\}/);
1152
+ const pathInfo = pathMatch ? ` ${pathMatch[1]!.trim()}` : '';
1153
+ lines.push(` ${loc}${pathInfo}`);
1154
+ }
1155
+ lines.push(' Hint: use ![caption](path){#fig:label width=80%} for format-portable figures,');
1156
+ lines.push(' or pass --pandoc-arg=--lua-filter=<your.lua> to translate them yourself.');
1157
+ return lines.join('\n');
1158
+ }
1159
+
1160
+ /**
1161
+ * Walk section files and gather a warning for any raw LaTeX figure blocks that
1162
+ * won't survive the docx build. Returns null when there's nothing to warn about.
1163
+ */
1164
+ export function collectRawLatexFigureWarning(directory: string, config: BuildConfig): string | null {
1165
+ const translateEnabled = config.docx?.translateRawFigures !== false;
1166
+ const all: RawLatexFigure[] = [];
1167
+ for (const section of findSections(directory, config.sections)) {
1168
+ const sectionPath = path.join(directory, section);
1169
+ if (!fs.existsSync(sectionPath)) continue;
1170
+ try {
1171
+ const content = fs.readFileSync(sectionPath, 'utf-8');
1172
+ const figs = detectRawLatexFigures(content, section);
1173
+ for (const f of figs) {
1174
+ // When auto-translate is on, non-exotic blocks get rewritten cleanly —
1175
+ // only the exotic leftovers need warning. When opted out, everything
1176
+ // is at risk and we warn about every block.
1177
+ if (translateEnabled && !f.exotic) continue;
1178
+ all.push(f);
1179
+ }
1180
+ } catch {
1181
+ // ignore unreadable sections
1182
+ }
1183
+ }
1184
+ if (all.length === 0) return null;
1185
+ return formatRawLatexFigureWarning(all, translateEnabled);
1186
+ }
1187
+
1188
+ /**
1189
+ * Build pandoc arguments for format.
1190
+ *
1191
+ * Returns only the built-in args derived from config. Passthrough args
1192
+ * (config.pandocArgs, config[format].pandocArgs, CLI --pandoc-arg) are
1193
+ * appended later in runPandoc so they win against pptx/crossref defaults
1194
+ * added there.
1195
+ */
1196
+ export function buildPandocArgs(format: string, config: BuildConfig, outputPath: string): string[] {
1197
+ const args: string[] = [];
1198
+
1199
+ // Output format
1200
+ if (format === 'tex') {
1201
+ args.push('-t', 'latex');
1202
+ if (config.tex.standalone) {
1203
+ args.push('-s');
1204
+ }
1205
+ } else if (format === 'pdf') {
1206
+ args.push('-t', 'pdf');
1207
+ } else if (format === 'docx') {
1208
+ args.push('-t', 'docx');
1209
+ } else if (format === 'beamer') {
1210
+ args.push('-t', 'beamer');
1211
+ } else if (format === 'pptx') {
1212
+ args.push('-t', 'pptx');
1213
+ }
1214
+
1215
+ // Output file. runPandoc sets cwd to the project directory and passes a
1216
+ // path relative to that cwd; passing it through here unchanged lets pandoc
1217
+ // write to subdirectories like output/<title-slug>.<ext>.
1218
+ args.push('-o', outputPath);
1219
+
1220
+ // Crossref filter (if available) - skip for slides
1221
+ if (hasPandocCrossref() && format !== 'beamer' && format !== 'pptx') {
1222
+ args.push('--filter', 'pandoc-crossref');
1223
+ }
1224
+
1225
+ // Bibliography
1226
+ if (config.bibliography) {
1227
+ args.push('--citeproc');
1228
+ }
1229
+
1230
+ // Format-specific options
1231
+ if (format === 'pdf') {
1232
+ if (config.pdf.template) {
1233
+ args.push('--template', config.pdf.template);
1234
+ }
1235
+ if (config.pdf.engine) {
1236
+ args.push(`--pdf-engine=${config.pdf.engine}`);
1237
+ }
1238
+ if (config.pdf.mainfont) {
1239
+ args.push('-V', `mainfont=${config.pdf.mainfont}`);
1240
+ }
1241
+ if (config.pdf.sansfont) {
1242
+ args.push('-V', `sansfont=${config.pdf.sansfont}`);
1243
+ }
1244
+ if (config.pdf.monofont) {
1245
+ args.push('-V', `monofont=${config.pdf.monofont}`);
1246
+ }
1247
+ args.push('-V', `documentclass=${config.pdf.documentclass}`);
1248
+ args.push('-V', `fontsize=${config.pdf.fontsize}`);
1249
+ args.push('-V', `geometry:${config.pdf.geometry}`);
1250
+ if (config.pdf.headerIncludes) {
1251
+ args.push('-H', config.pdf.headerIncludes);
1252
+ }
1253
+ if (config.pdf.linestretch !== 1) {
1254
+ args.push('-V', `linestretch=${config.pdf.linestretch}`);
1255
+ }
1256
+ if (config.pdf.numbersections) {
1257
+ args.push('--number-sections');
1258
+ }
1259
+ if (config.pdf.toc) {
1260
+ args.push('--toc');
1261
+ }
1262
+ } else if (format === 'docx') {
1263
+ if (config.docx.reference) {
1264
+ args.push('--reference-doc', config.docx.reference);
1265
+ }
1266
+ if (config.docx.toc) {
1267
+ args.push('--toc');
1268
+ }
1269
+ } else if (format === 'beamer') {
1270
+ // Beamer slide options
1271
+ const beamer = config.beamer || {};
1272
+ if (beamer.theme) {
1273
+ args.push('-V', `theme=${beamer.theme}`);
1274
+ }
1275
+ if (beamer.colortheme) {
1276
+ args.push('-V', `colortheme=${beamer.colortheme}`);
1277
+ }
1278
+ if (beamer.fonttheme) {
1279
+ args.push('-V', `fonttheme=${beamer.fonttheme}`);
1280
+ }
1281
+ if (beamer.aspectratio) {
1282
+ args.push('-V', `aspectratio=${beamer.aspectratio}`);
1283
+ }
1284
+ if (beamer.navigation) {
1285
+ args.push('-V', `navigation=${beamer.navigation}`);
1286
+ }
1287
+ // Speaker notes - default to 'show' which creates presenter view PDF
1288
+ // Options: 'show' (dual screen), 'only' (notes only), 'hide' (no notes), false (disabled)
1289
+ const notesMode = beamer.notes !== undefined ? beamer.notes : 'show';
1290
+ if (notesMode && notesMode !== 'hide') {
1291
+ args.push('-V', `classoption=notes=${notesMode}`);
1292
+ }
1293
+ // Fit images within slide bounds (default: true)
1294
+ if (beamer.fit_images !== false) {
1295
+ const fitImagesHeader = `\\makeatletter
1296
+ \\def\\maxwidth{\\ifdim\\Gin@nat@width>\\linewidth\\linewidth\\else\\Gin@nat@width\\fi}
1297
+ \\def\\maxheight{\\ifdim\\Gin@nat@height>0.75\\textheight 0.75\\textheight\\else\\Gin@nat@height\\fi}
1298
+ \\makeatother
1299
+ \\setkeys{Gin}{width=\\maxwidth,height=\\maxheight,keepaspectratio}`;
1300
+ args.push('-V', `header-includes=${fitImagesHeader}`);
1301
+ }
1302
+ // Slides need standalone
1303
+ args.push('-s');
1304
+ } else if (format === 'pptx') {
1305
+ // PowerPoint options - handled separately in preparePptxTemplate
1306
+ // Reference doc is set by caller after template generation
1307
+ }
1308
+
1309
+ return args;
1310
+ }
1311
+
1312
+ /**
1313
+ * Collect passthrough pandoc args for a format in the canonical order:
1314
+ * top-level config → format-specific config → CLI extras. Later wins for
1315
+ * repeated flags.
1316
+ */
1317
+ export function collectPandocPassthroughArgs(
1318
+ format: string,
1319
+ config: BuildConfig,
1320
+ extraArgs: string[] = []
1321
+ ): string[] {
1322
+ const out: string[] = [];
1323
+ if (config.pandocArgs && config.pandocArgs.length > 0) {
1324
+ out.push(...config.pandocArgs);
1325
+ }
1326
+ const formatConfig = (config as unknown as Record<string, { pandocArgs?: string[] } | undefined>)[format];
1327
+ if (formatConfig?.pandocArgs && formatConfig.pandocArgs.length > 0) {
1328
+ out.push(...formatConfig.pandocArgs);
1329
+ }
1330
+ if (extraArgs.length > 0) {
1331
+ out.push(...extraArgs);
1332
+ }
1333
+ return out;
1334
+ }
1335
+
1336
+ /**
1337
+ * Write crossref.yaml if needed
1338
+ */
1339
+ function ensureCrossrefConfig(directory: string, config: BuildConfig): void {
1340
+ const crossrefPath = path.join(directory, 'crossref.yaml');
1341
+
1342
+ if (!fs.existsSync(crossrefPath) && hasPandocCrossref()) {
1343
+ fs.writeFileSync(crossrefPath, YAML.stringify(config.crossref), 'utf-8');
1344
+ }
1345
+ }
1346
+
1347
+ /**
1348
+ * Get install instructions for missing dependency
1349
+ */
1350
+ function getInstallInstructions(tool: string): string {
1351
+ const instructions: Record<string, string> = {
1352
+ pandoc: 'https://pandoc.org/installing.html',
1353
+ latex: 'https://www.latex-project.org/get/',
1354
+ };
1355
+ return instructions[tool] || 'Check documentation';
1356
+ }
1357
+
1358
+ /**
1359
+ * Resolve the absolute directory where final outputs should land.
1360
+ * Honors config.outputDir; falls back to the project directory when null/empty.
1361
+ */
1362
+ export function resolveOutputDir(directory: string, config: BuildConfig): string {
1363
+ const out = config.outputDir;
1364
+ if (!out) return directory;
1365
+ return path.isAbsolute(out) ? out : path.join(directory, out);
1366
+ }
1367
+
1368
+ /** File extension (with leading dot) for each supported pandoc format. */
1369
+ const FORMAT_EXTENSIONS: Record<string, string> = {
1370
+ tex: '.tex',
1371
+ pdf: '.pdf',
1372
+ docx: '.docx',
1373
+ beamer: '.pdf',
1374
+ pptx: '.pptx',
1375
+ };
1376
+
1377
+ /** Get file extension for a format, defaulting to `.pdf`. */
1378
+ export function getFormatExtension(format: string): string {
1379
+ return FORMAT_EXTENSIONS[format] ?? '.pdf';
1380
+ }
1381
+
1382
+ /**
1383
+ * Slugify a title for use as a default output filename. Lowercases, replaces
1384
+ * non-alphanumeric runs with `-`, and truncates at the last `-` boundary
1385
+ * at-or-before MAX_TITLE_FILENAME_LENGTH so words stay whole (the old blind
1386
+ * `.slice` cut mid-word).
1387
+ */
1388
+ export function slugifyTitle(title: string): string {
1389
+ if (!title) return 'paper';
1390
+ const slug = title.toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-+|-+$/g, '');
1391
+ if (!slug) return 'paper';
1392
+ if (slug.length <= MAX_TITLE_FILENAME_LENGTH) return slug;
1393
+ const cut = slug.slice(0, MAX_TITLE_FILENAME_LENGTH);
1394
+ const lastDash = cut.lastIndexOf('-');
1395
+ // Only truncate at a hyphen if it leaves a reasonable amount of content.
1396
+ // Otherwise hard-cut (handles degenerate titles with no spaces at all).
1397
+ if (lastDash >= MAX_TITLE_FILENAME_LENGTH / 2) {
1398
+ return slug.slice(0, lastDash);
1399
+ }
1400
+ return cut;
1401
+ }
1402
+
1403
+ /**
1404
+ * Ensure `name` ends with `ext` (case-insensitive). If the user already supplied
1405
+ * the correct extension, return unchanged; if they supplied none or a different
1406
+ * one, append the format's canonical extension.
1407
+ *
1408
+ * Different-extension case (e.g. `output.docx` when building tex): we append
1409
+ * rather than replace, since stripping looks like an unsafe guess. The result
1410
+ * `output.docx.tex` is loud enough to flag the misconfiguration.
1411
+ */
1412
+ function ensureExtension(name: string, ext: string): string {
1413
+ if (name.toLowerCase().endsWith(ext.toLowerCase())) return name;
1414
+ return name + ext;
1415
+ }
1416
+
1417
+ /**
1418
+ * Resolve the final output path for a build.
1419
+ *
1420
+ * Priority: `options.outputPath` (internal force) > `cliOverride` (-o flag) >
1421
+ * `config.output[format]` > slugified title fallback.
1422
+ *
1423
+ * Relative paths from `cliOverride`/`config.output` resolve under outputDir;
1424
+ * absolute paths bypass outputDir. The fallback path always lives under
1425
+ * outputDir.
1426
+ *
1427
+ * @param suffix - Appended before the extension (e.g. "-changes", "-slides").
1428
+ * Suppressed when user supplied an explicit name via CLI or
1429
+ * config — they pick their own suffix.
1430
+ */
1431
+ export function resolveOutputPath(
1432
+ directory: string,
1433
+ config: BuildConfig,
1434
+ format: string,
1435
+ options: { cliOverride?: string; suffix?: string } = {}
1436
+ ): string {
1437
+ const { cliOverride, suffix = '' } = options;
1438
+ const ext = getFormatExtension(format);
1439
+
1440
+ const explicit = cliOverride ?? config.output?.[format];
1441
+ if (explicit) {
1442
+ const baseDir = path.isAbsolute(explicit)
1443
+ ? path.dirname(explicit)
1444
+ : resolveOutputDir(directory, config);
1445
+ const baseName = path.basename(explicit);
1446
+ const stem = baseName.replace(/\.[^./\\]+$/, '');
1447
+ return path.join(baseDir, ensureExtension(`${stem}${suffix}`, ext));
1448
+ }
1449
+
1450
+ const slug = slugifyTitle(config.title);
1451
+ return path.join(resolveOutputDir(directory, config), `${slug}${suffix}${ext}`);
1452
+ }
1453
+
1454
+ /**
1455
+ * Run pandoc build
1456
+ */
1457
+ export async function runPandoc(
1458
+ inputPath: string,
1459
+ format: string,
1460
+ config: BuildConfig,
1461
+ options: BuildOptions = {}
1462
+ ): Promise<PandocResult> {
1463
+ const directory = path.dirname(inputPath);
1464
+
1465
+ // outputPath (internal force) wins over the resolver. For beamer, we keep
1466
+ // the `-slides` suffix on the slug fallback to distinguish from a regular
1467
+ // PDF build; when the user supplies an explicit name, they pick their own.
1468
+ const suffix = format === 'beamer' ? '-slides' : '';
1469
+ const outputPath = options.outputPath
1470
+ ?? resolveOutputPath(directory, config, format, {
1471
+ cliOverride: options.output,
1472
+ suffix,
1473
+ });
1474
+
1475
+ if (!options.outputPath) {
1476
+ const outDir = path.dirname(outputPath);
1477
+ if (!fs.existsSync(outDir)) {
1478
+ fs.mkdirSync(outDir, { recursive: true });
1479
+ }
1480
+ }
1481
+
1482
+ // Ensure crossref.yaml exists
1483
+ ensureCrossrefConfig(directory, config);
1484
+
1485
+ // Pandoc runs with cwd = directory, so pass the output path relative to it.
1486
+ const args = buildPandocArgs(format, config, path.relative(directory, outputPath) || path.basename(outputPath));
1487
+
1488
+ // Handle PPTX reference template and themes
1489
+ let pptxMediaDir: string | null = null;
1490
+ if (format === 'pptx') {
1491
+ const pptx = config.pptx || {};
1492
+
1493
+ // Determine media directory (default: pptx/media or slides/media)
1494
+ let mediaDir = pptx.media;
1495
+ if (!mediaDir) {
1496
+ if (fs.existsSync(path.join(directory, 'pptx', 'media'))) {
1497
+ mediaDir = path.join(directory, 'pptx', 'media');
1498
+ } else if (fs.existsSync(path.join(directory, 'slides', 'media'))) {
1499
+ mediaDir = path.join(directory, 'slides', 'media');
1500
+ }
1501
+ } else if (!path.isAbsolute(mediaDir)) {
1502
+ mediaDir = path.join(directory, mediaDir);
1503
+ }
1504
+ pptxMediaDir = mediaDir || null;
1505
+
1506
+ // Determine reference doc: custom reference overrides theme
1507
+ let referenceDoc: string | null = null;
1508
+ if (pptx.reference && fs.existsSync(path.join(directory, pptx.reference))) {
1509
+ // Custom reference doc takes precedence
1510
+ referenceDoc = path.join(directory, pptx.reference);
1511
+ } else {
1512
+ // Use built-in theme (default: 'default')
1513
+ const themeName = pptx.theme || 'default';
1514
+ const themePath = getThemePath(themeName);
1515
+ if (themePath && fs.existsSync(themePath)) {
1516
+ referenceDoc = themePath;
1517
+ }
1518
+ }
1519
+
1520
+ if (referenceDoc) {
1521
+ args.push('--reference-doc', referenceDoc);
1522
+ }
1523
+
1524
+ // Add color filter for PPTX (handles [text]{color=#RRGGBB} syntax).
1525
+ // fileURLToPath handles Windows paths with spaces — the old
1526
+ // `new URL(...).pathname` returned URL-encoded `%20` and fs.existsSync
1527
+ // silently failed.
1528
+ const colorFilterPath = path.join(
1529
+ path.dirname(fileURLToPath(import.meta.url)),
1530
+ 'pptx-color-filter.lua'
1531
+ );
1532
+ if (fs.existsSync(colorFilterPath)) {
1533
+ args.push('--lua-filter', colorFilterPath);
1534
+ }
1535
+ }
1536
+
1537
+ // Wire placeholder macros (built-in \tofill plus user-declared entries).
1538
+ // - docx/html: lua filter expands \name{X} to format-specific raw runs.
1539
+ // - pdf/tex/beamer: inject a \providecommand preamble so LaTeX renders it
1540
+ // directly. `\providecommand` is non-clobbering, so a user who already
1541
+ // has `\providecommand{\tofill}{...}` in their own header keeps theirs.
1542
+ //
1543
+ // Sidecar path is passed to the lua filter via DOCREV_MACROS_FILE in the
1544
+ // child env (not pandoc metadata) because pandoc walks RawInline/RawBlock
1545
+ // BEFORE Meta — by the time a Meta handler could read the path, the inline
1546
+ // expansion has already happened.
1547
+ const macroTempFiles: string[] = [];
1548
+ let macroEnvFile: string | null = null;
1549
+ const macros = mergeMacros((config as { macros?: unknown }).macros);
1550
+ if (macros.length > 0) {
1551
+ if (format === 'docx' || format === 'html' || format === 'html5' || format === 'html4') {
1552
+ const sidecarPath = writeMacrosSidecar(directory, macros);
1553
+ macroTempFiles.push(sidecarPath);
1554
+ macroEnvFile = sidecarPath;
1555
+ const filterPath = getMacroFilterPath();
1556
+ if (fs.existsSync(filterPath)) {
1557
+ args.push('--lua-filter', filterPath);
1558
+ }
1559
+ } else if (format === 'pdf' || format === 'tex' || format === 'beamer') {
1560
+ const preamble = generateLatexPreamble(macros);
1561
+ const preamblePath = path.join(directory, '.macros.tex');
1562
+ fs.writeFileSync(preamblePath, preamble, 'utf-8');
1563
+ macroTempFiles.push(preamblePath);
1564
+ args.push('-H', path.basename(preamblePath));
1565
+ }
1566
+ }
1567
+
1568
+ // Add crossref metadata file if exists (skip for slides - they don't use crossref)
1569
+ if (format !== 'beamer' && format !== 'pptx') {
1570
+ const crossrefPath = path.join(directory, 'crossref.yaml');
1571
+ if (fs.existsSync(crossrefPath) && hasPandocCrossref()) {
1572
+ // Use basename since we set cwd to directory
1573
+ args.push('--metadata-file', 'crossref.yaml');
1574
+ }
1575
+ }
1576
+
1577
+ // Passthrough args go last so they win against built-in defaults.
1578
+ args.push(...collectPandocPassthroughArgs(format, config, options.pandocArgs));
1579
+
1580
+ // Input file (use basename since we set cwd to directory)
1581
+ args.push(path.basename(inputPath));
1582
+
1583
+ if (options.verbose) {
1584
+ const quoted = args.map(a => /[\s"'$`]/.test(a) ? `"${a.replace(/"/g, '\\"')}"` : a).join(' ');
1585
+ console.error(`[pandoc ${format}] (cwd: ${directory})`);
1586
+ console.error(` pandoc ${quoted}`);
1587
+ }
1588
+
1589
+ return new Promise((resolve) => {
1590
+ const pandocEnv: NodeJS.ProcessEnv = { ...process.env };
1591
+ if (macroEnvFile) {
1592
+ pandocEnv.DOCREV_MACROS_FILE = macroEnvFile;
1593
+ }
1594
+ const pandoc: ChildProcess = spawn('pandoc', args, {
1595
+ cwd: directory,
1596
+ stdio: ['ignore', 'pipe', 'pipe'],
1597
+ env: pandocEnv,
1598
+ });
1599
+
1600
+ let stderr = '';
1601
+ pandoc.stderr?.on('data', (data) => {
1602
+ stderr += data.toString();
1603
+ });
1604
+
1605
+ const cleanupMacroTempFiles = (): void => {
1606
+ for (const tmp of macroTempFiles) {
1607
+ try {
1608
+ fs.unlinkSync(tmp);
1609
+ } catch {
1610
+ // ignore — best-effort cleanup
1611
+ }
1612
+ }
1613
+ };
1614
+
1615
+ pandoc.on('close', async (code) => {
1616
+ cleanupMacroTempFiles();
1617
+ if (code === 0) {
1618
+ // For PPTX, post-process to add slide numbers, buildup colors, and logos
1619
+ if (format === 'pptx') {
1620
+ try {
1621
+ // Inject slide numbers into content slides only
1622
+ await injectSlideNumbers(outputPath);
1623
+ } catch (e) {
1624
+ // Slide number injection failed but output was created
1625
+ }
1626
+ try {
1627
+ // Apply colors (default text color, title color, buildup greying)
1628
+ const pptxConfig = config.pptx || {};
1629
+ const colorsConfig = pptxConfig.colors || {};
1630
+ const buildupConfig = pptxConfig.buildup || {};
1631
+ // Merge colors and buildup config for applyBuildupColors
1632
+ const colorConfig = {
1633
+ default: colorsConfig.default,
1634
+ title: colorsConfig.title,
1635
+ grey: buildupConfig.grey,
1636
+ accent: buildupConfig.accent,
1637
+ enabled: buildupConfig.enabled
1638
+ };
1639
+ await applyBuildupColors(outputPath, colorConfig);
1640
+ } catch (e) {
1641
+ // Color application failed but output was created
1642
+ }
1643
+ // Inject logos into cover slide (if media dir configured)
1644
+ if (pptxMediaDir) {
1645
+ try {
1646
+ await injectMediaIntoPptx(outputPath, pptxMediaDir);
1647
+ } catch (e) {
1648
+ // Logo injection failed but output was created
1649
+ }
1650
+ }
1651
+ }
1652
+
1653
+ // Run user postprocess scripts
1654
+ const postResult = await runPostprocess(outputPath, format, config as unknown as Parameters<typeof runPostprocess>[2], options);
1655
+ if (!postResult.success && options.verbose) {
1656
+ console.error(`Postprocess warning: ${postResult.error}`);
1657
+ }
1658
+
1659
+ resolve({ outputPath, success: true });
1660
+ } else {
1661
+ resolve({ outputPath, success: false, error: stderr || `Exit code ${code}` });
1662
+ }
1663
+ });
1664
+
1665
+ pandoc.on('error', (err) => {
1666
+ cleanupMacroTempFiles();
1667
+ resolve({ outputPath, success: false, error: err.message });
1668
+ });
1669
+ });
1670
+ }
1671
+
1672
+ /**
1673
+ * Full build pipeline
1674
+ */
1675
+ export async function build(
1676
+ directory: string,
1677
+ formats: string[] = ['pdf', 'docx'],
1678
+ options: BuildOptions = {}
1679
+ ): Promise<FullBuildResult> {
1680
+ const warnings: string[] = [];
1681
+ let forwardRefsResolved = 0;
1682
+
1683
+ // Check pandoc
1684
+ if (!hasPandoc()) {
1685
+ const instruction = getInstallInstructions('pandoc');
1686
+ throw new Error(`Pandoc not found. Install with: ${instruction}\nOr run: rev doctor`);
1687
+ }
1688
+
1689
+ // Check LaTeX if PDF is requested
1690
+ if ((formats.includes('pdf') || formats.includes('all')) && !hasLatex()) {
1691
+ warnings.push(`LaTeX not found - PDF generation may fail. Install with: ${getInstallInstructions('latex')}`);
1692
+ }
1693
+
1694
+ // Check pandoc-crossref
1695
+ if (!hasPandocCrossref()) {
1696
+ warnings.push('pandoc-crossref not found - figure/table numbering will not work');
1697
+ }
1698
+
1699
+ // Load config (use passed config if provided, otherwise load from file)
1700
+ const config = options.config || loadConfig(directory);
1701
+
1702
+ // Combine sections → paper.md
1703
+ const buildOptions: CombineOptions = { ...options };
1704
+ const paperPath = combineSections(directory, config, buildOptions);
1705
+ forwardRefsResolved = buildOptions._forwardRefsResolved || 0;
1706
+ const refsAutoInjected = buildOptions._refsAutoInjected || false;
1707
+
1708
+ // Expand 'all' to all formats
1709
+ if (formats.includes('all')) {
1710
+ formats = ['pdf', 'docx', 'tex'];
1711
+ }
1712
+
1713
+ // Build and save image registry when DOCX is being built
1714
+ // This allows import to restore proper image syntax from Word documents
1715
+ if (formats.includes('docx')) {
1716
+ const paperContent = fs.readFileSync(paperPath, 'utf-8');
1717
+ const crossrefReg = buildRegistry(directory, config.sections);
1718
+ const imageReg = buildImageRegistry(paperContent, crossrefReg as any);
1719
+ if ((imageReg as any).figures?.length > 0) {
1720
+ writeImageRegistry(directory, imageReg);
1721
+ }
1722
+
1723
+ // Warn about raw LaTeX figure blocks that won't render in docx (pandoc
1724
+ // drops them silently). With auto-translate on (default), this surfaces
1725
+ // only the exotic leftovers; with it off, every block.
1726
+ const rawFigWarning = collectRawLatexFigureWarning(directory, config);
1727
+ if (rawFigWarning) warnings.push(rawFigWarning);
1728
+ }
1729
+
1730
+ const results: BuildResult[] = [];
1731
+
1732
+ for (const format of formats) {
1733
+ // Prepare format-specific version
1734
+ const preparedPath = prepareForFormat(paperPath, format, config, options);
1735
+
1736
+ // Run pandoc
1737
+ const result = await runPandoc(preparedPath, format, config, options);
1738
+ results.push({ format, ...result });
1739
+
1740
+ // Clean up temp file
1741
+ try {
1742
+ fs.unlinkSync(preparedPath);
1743
+ } catch {
1744
+ // Ignore cleanup errors
1745
+ }
1746
+ }
1747
+
1748
+ return { results, paperPath, warnings, forwardRefsResolved, refsAutoInjected };
1749
+ }
1750
+
1751
+ /**
1752
+ * Get build status summary
1753
+ */
1754
+ export function formatBuildResults(results: BuildResult[]): string {
1755
+ const lines: string[] = [];
1756
+
1757
+ for (const r of results) {
1758
+ if (r.success) {
1759
+ lines.push(` ${r.format.toUpperCase()}: ${path.basename(r.outputPath!)}`);
1760
+ } else {
1761
+ lines.push(` ${r.format.toUpperCase()}: FAILED - ${r.error}`);
1762
+ }
1763
+ }
1764
+
1765
+ return lines.join('\n');
1766
+ }