docrev 0.9.11 → 0.9.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. package/.claude/settings.local.json +9 -9
  2. package/.gitattributes +1 -1
  3. package/CHANGELOG.md +149 -149
  4. package/PLAN-tables-and-postprocess.md +850 -850
  5. package/README.md +391 -391
  6. package/bin/rev.js +11 -11
  7. package/bin/rev.ts +145 -145
  8. package/completions/rev.bash +127 -127
  9. package/completions/rev.ps1 +210 -210
  10. package/completions/rev.zsh +207 -207
  11. package/dev_notes/stress2/build_adversarial.ts +186 -186
  12. package/dev_notes/stress2/drift_matcher.ts +62 -62
  13. package/dev_notes/stress2/probe_anchors.ts +35 -35
  14. package/dev_notes/stress2/project/discussion.before.md +3 -3
  15. package/dev_notes/stress2/project/discussion.md +3 -3
  16. package/dev_notes/stress2/project/methods.before.md +20 -20
  17. package/dev_notes/stress2/project/methods.md +20 -20
  18. package/dev_notes/stress2/project/rev.yaml +5 -5
  19. package/dev_notes/stress2/project/sections.yaml +4 -4
  20. package/dev_notes/stress2/sections.yaml +5 -5
  21. package/dev_notes/stress2/trace_placement.ts +50 -50
  22. package/dev_notes/stresstest_boundaries.ts +27 -27
  23. package/dev_notes/stresstest_drift_apply.ts +43 -43
  24. package/dev_notes/stresstest_drift_compare.ts +43 -43
  25. package/dev_notes/stresstest_drift_v2.ts +54 -54
  26. package/dev_notes/stresstest_inspect.ts +54 -54
  27. package/dev_notes/stresstest_pstyle.ts +55 -55
  28. package/dev_notes/stresstest_section_debug.ts +23 -23
  29. package/dev_notes/stresstest_split.ts +70 -70
  30. package/dev_notes/stresstest_trace.ts +19 -19
  31. package/dev_notes/stresstest_verify_no_overwrite.ts +40 -40
  32. package/dist/lib/build.d.ts +50 -1
  33. package/dist/lib/build.d.ts.map +1 -1
  34. package/dist/lib/build.js +80 -30
  35. package/dist/lib/build.js.map +1 -1
  36. package/dist/lib/commands/build.d.ts.map +1 -1
  37. package/dist/lib/commands/build.js +38 -5
  38. package/dist/lib/commands/build.js.map +1 -1
  39. package/dist/lib/commands/utilities.js +164 -164
  40. package/dist/lib/commands/word-tools.js +8 -8
  41. package/dist/lib/grammar.js +3 -3
  42. package/dist/lib/import.d.ts.map +1 -1
  43. package/dist/lib/import.js +146 -24
  44. package/dist/lib/import.js.map +1 -1
  45. package/dist/lib/pdf-comments.js +44 -44
  46. package/dist/lib/plugins.js +57 -57
  47. package/dist/lib/pptx-themes.js +115 -115
  48. package/dist/lib/spelling.js +2 -2
  49. package/dist/lib/templates.js +387 -387
  50. package/dist/lib/themes.js +51 -51
  51. package/dist/lib/types.d.ts +20 -0
  52. package/dist/lib/types.d.ts.map +1 -1
  53. package/dist/lib/word-extraction.d.ts +6 -0
  54. package/dist/lib/word-extraction.d.ts.map +1 -1
  55. package/dist/lib/word-extraction.js +46 -3
  56. package/dist/lib/word-extraction.js.map +1 -1
  57. package/dist/lib/wordcomments.d.ts.map +1 -1
  58. package/dist/lib/wordcomments.js +23 -5
  59. package/dist/lib/wordcomments.js.map +1 -1
  60. package/eslint.config.js +27 -27
  61. package/lib/anchor-match.ts +276 -276
  62. package/lib/annotations.ts +644 -644
  63. package/lib/build.ts +1300 -1227
  64. package/lib/citations.ts +160 -160
  65. package/lib/commands/build.ts +833 -801
  66. package/lib/commands/citations.ts +515 -515
  67. package/lib/commands/comments.ts +1050 -1050
  68. package/lib/commands/context.ts +174 -174
  69. package/lib/commands/core.ts +309 -309
  70. package/lib/commands/doi.ts +435 -435
  71. package/lib/commands/file-ops.ts +372 -372
  72. package/lib/commands/history.ts +320 -320
  73. package/lib/commands/index.ts +87 -87
  74. package/lib/commands/init.ts +259 -259
  75. package/lib/commands/merge-resolve.ts +378 -378
  76. package/lib/commands/preview.ts +178 -178
  77. package/lib/commands/project-info.ts +244 -244
  78. package/lib/commands/quality.ts +517 -517
  79. package/lib/commands/response.ts +454 -454
  80. package/lib/commands/section-boundaries.ts +82 -82
  81. package/lib/commands/sections.ts +451 -451
  82. package/lib/commands/sync.ts +706 -706
  83. package/lib/commands/text-ops.ts +449 -449
  84. package/lib/commands/utilities.ts +448 -448
  85. package/lib/commands/verify-anchors.ts +272 -272
  86. package/lib/commands/word-tools.ts +340 -340
  87. package/lib/comment-realign.ts +517 -517
  88. package/lib/config.ts +84 -84
  89. package/lib/crossref.ts +781 -781
  90. package/lib/csl.ts +191 -191
  91. package/lib/dependencies.ts +98 -98
  92. package/lib/diff-engine.ts +465 -465
  93. package/lib/doi-cache.ts +115 -115
  94. package/lib/doi.ts +897 -897
  95. package/lib/equations.ts +506 -506
  96. package/lib/errors.ts +346 -346
  97. package/lib/format.ts +541 -541
  98. package/lib/git.ts +326 -326
  99. package/lib/grammar.ts +303 -303
  100. package/lib/image-registry.ts +180 -180
  101. package/lib/import.ts +911 -792
  102. package/lib/journals.ts +543 -543
  103. package/lib/merge.ts +633 -633
  104. package/lib/orcid.ts +144 -144
  105. package/lib/pdf-comments.ts +263 -263
  106. package/lib/pdf-import.ts +524 -524
  107. package/lib/plugins.ts +362 -362
  108. package/lib/postprocess.ts +188 -188
  109. package/lib/pptx-color-filter.lua +37 -37
  110. package/lib/pptx-template.ts +469 -469
  111. package/lib/pptx-themes.ts +483 -483
  112. package/lib/protect-restore.ts +520 -520
  113. package/lib/rate-limiter.ts +94 -94
  114. package/lib/response.ts +197 -197
  115. package/lib/restore-references.ts +240 -240
  116. package/lib/review.ts +327 -327
  117. package/lib/schema.ts +417 -417
  118. package/lib/scientific-words.ts +73 -73
  119. package/lib/sections.ts +335 -335
  120. package/lib/slides.ts +756 -756
  121. package/lib/spelling.ts +334 -334
  122. package/lib/templates.ts +526 -526
  123. package/lib/themes.ts +742 -742
  124. package/lib/trackchanges.ts +247 -247
  125. package/lib/tui.ts +450 -450
  126. package/lib/types.ts +550 -530
  127. package/lib/undo.ts +250 -250
  128. package/lib/utils.ts +69 -69
  129. package/lib/variables.ts +179 -179
  130. package/lib/word-extraction.ts +806 -759
  131. package/lib/word.ts +643 -643
  132. package/lib/wordcomments.ts +817 -798
  133. package/package.json +137 -137
  134. package/scripts/postbuild.js +28 -28
  135. package/skill/REFERENCE.md +431 -431
  136. package/skill/SKILL.md +258 -258
  137. package/tsconfig.json +26 -26
  138. package/types/index.d.ts +525 -525
@@ -1,850 +1,850 @@
1
- # Implementation Plan: Table Formatting & Postprocess Scripting
2
-
3
- ## Overview
4
-
5
- Two features to implement:
6
- 1. **Table formatting config** - Make `tables:` config in rev.yaml actually work
7
- 2. **Postprocess scripting** - Allow users to run custom transformations on output
8
-
9
- ---
10
-
11
- ## Part 1: Table Formatting
12
-
13
- ### Problem Statement
14
-
15
- Pandoc's longtable with proportional `p{}` column widths forces text wrapping. Users need:
16
- - Columns that don't wrap (e.g., `N(0, 0.5)` should stay on one line)
17
- - Custom alignment per column
18
- - Math notation conversion (Normal → 𝒩)
19
-
20
- ### Why Previous Attempt Failed
21
-
22
- The Lua filter approach failed because:
23
- 1. Pandoc calculates column widths from markdown before the filter runs
24
- 2. Setting `ColWidthDefault` in Lua results in `0.0000` width, not auto-width
25
- 3. `\mbox{}` in a `p{}` column overflows instead of expanding
26
-
27
- ### Solution: LaTeX Header Injection
28
-
29
- Instead of a Lua filter, inject LaTeX packages/commands via `header-includes`.
30
-
31
- #### Implementation Steps
32
-
33
- **Step 1: Add `pdf.header-includes` config option**
34
-
35
- File: `lib/build.js`
36
-
37
- ```javascript
38
- // In DEFAULT_CONFIG.pdf:
39
- pdf: {
40
- template: null,
41
- documentclass: 'article',
42
- fontsize: '12pt',
43
- geometry: 'margin=1in',
44
- linestretch: 1.5,
45
- numbersections: false,
46
- toc: false,
47
- headerIncludes: null, // NEW: string or array of LaTeX code
48
- },
49
- ```
50
-
51
- **Step 2: Pass header-includes to pandoc**
52
-
53
- File: `lib/build.js`, in `buildPandocArgs()`:
54
-
55
- ```javascript
56
- if (format === 'pdf') {
57
- // ... existing code ...
58
-
59
- // Header includes (LaTeX preamble additions)
60
- if (config.pdf.headerIncludes) {
61
- const includes = Array.isArray(config.pdf.headerIncludes)
62
- ? config.pdf.headerIncludes
63
- : [config.pdf.headerIncludes];
64
- for (const inc of includes) {
65
- args.push('-V', `header-includes=${inc}`);
66
- }
67
- }
68
- }
69
- ```
70
-
71
- **Step 3: Create table-focused presets**
72
-
73
- File: `lib/build.js`, new function:
74
-
75
- ```javascript
76
- /**
77
- * Generate LaTeX header-includes for table configuration
78
- * @param {object} tablesConfig
79
- * @returns {string[]} LaTeX code lines
80
- */
81
- function generateTableLatex(tablesConfig) {
82
- const lines = [];
83
-
84
- if (!tablesConfig) return lines;
85
-
86
- // Always include array package for column type customization
87
- lines.push('\\usepackage{array}');
88
-
89
- // Add nowrap column type: use with N{width} in manual tables
90
- // This creates a column that doesn't wrap but respects minipage
91
- if (tablesConfig.nowrap) {
92
- lines.push('% Nowrap column type for tables');
93
- lines.push('\\newcolumntype{N}[1]{>{\\raggedright\\arraybackslash}p{#1}}');
94
- }
95
-
96
- // Small tables
97
- if (tablesConfig.small) {
98
- lines.push('% Apply small font to longtable environment');
99
- lines.push('\\AtBeginEnvironment{longtable}{\\small}');
100
- lines.push('\\usepackage{etoolbox}'); // for AtBeginEnvironment
101
- }
102
-
103
- return lines;
104
- }
105
- ```
106
-
107
- **Step 4: Integrate into build pipeline**
108
-
109
- File: `lib/build.js`, in `buildPandocArgs()`:
110
-
111
- ```javascript
112
- if (format === 'pdf' || format === 'tex') {
113
- // Generate table-specific LaTeX if tables config exists
114
- const tableLatex = generateTableLatex(config.tables);
115
- if (tableLatex.length > 0) {
116
- for (const line of tableLatex) {
117
- args.push('-V', `header-includes=${line}`);
118
- }
119
- }
120
- }
121
- ```
122
-
123
- **Step 5: Add markdown preprocessing for nowrap columns**
124
-
125
- Since we can't change pandoc's column width calculation, we preprocess the markdown to wrap nowrap column content in `\mbox{}` directly.
126
-
127
- File: `lib/build.js`, new function:
128
-
129
- ```javascript
130
- /**
131
- * Process markdown tables to apply nowrap to specified columns
132
- * Wraps cell content in \mbox{} for LaTeX output
133
- * @param {string} content - Markdown content
134
- * @param {object} tablesConfig - tables config from rev.yaml
135
- * @param {string} format - output format
136
- * @returns {string} processed content
137
- */
138
- function processTablesForFormat(content, tablesConfig, format) {
139
- if (!tablesConfig?.nowrap?.length || format !== 'pdf') {
140
- return content;
141
- }
142
-
143
- const nowrapPatterns = tablesConfig.nowrap.map(p => p.toLowerCase());
144
-
145
- // Match pipe tables
146
- const tableRegex = /(\|[^\n]+\|\n\|[-:| ]+\|\n)((?:\|[^\n]+\|\n)+)/g;
147
-
148
- return content.replace(tableRegex, (match, header, body) => {
149
- // Parse header to find nowrap column indices
150
- const headerCells = header.split('|').slice(1, -1).map(c => c.trim().toLowerCase());
151
- const nowrapCols = headerCells.map((cell, i) =>
152
- nowrapPatterns.some(p => cell.includes(p)) ? i : -1
153
- ).filter(i => i >= 0);
154
-
155
- if (nowrapCols.length === 0) return match;
156
-
157
- // Process body rows
158
- const processedBody = body.split('\n').filter(l => l.trim()).map(row => {
159
- const cells = row.split('|').slice(1, -1);
160
- nowrapCols.forEach(colIdx => {
161
- if (cells[colIdx]) {
162
- const content = cells[colIdx].trim();
163
- // Skip if already has LaTeX or is empty
164
- if (content && !content.startsWith('\\') && !content.startsWith('$')) {
165
- // Convert distribution notation to math
166
- let processed = content
167
- .replace(/Normal\(([^)]+)\)/g, '$\\mathcal{N}($1)$')
168
- .replace(/Student-t\((\d+),\s*([^)]+)\)/g, '$t_{$1}($2)$')
169
- .replace(/Gamma\(([^)]+)\)/g, '$\\text{Gamma}($1)$');
170
- cells[colIdx] = ` ${processed} `;
171
- }
172
- }
173
- });
174
- return '|' + cells.join('|') + '|';
175
- }).join('\n');
176
-
177
- return header + processedBody + '\n';
178
- });
179
- }
180
- ```
181
-
182
- **Step 6: Call from prepareForFormat**
183
-
184
- ```javascript
185
- export function prepareForFormat(paperPath, format, config, options = {}) {
186
- // ... existing code ...
187
-
188
- if (format === 'pdf' || format === 'tex') {
189
- content = stripAnnotations(content);
190
- // NEW: Process tables for nowrap columns
191
- content = processTablesForFormat(content, config.tables, format);
192
- }
193
-
194
- // ... rest of function ...
195
- }
196
- ```
197
-
198
- #### Test Plan for Tables
199
-
200
- File: `test/tables.test.js`
201
-
202
- ```javascript
203
- import { describe, it, beforeEach, afterEach } from 'node:test';
204
- import assert from 'node:assert';
205
- import * as fs from 'fs';
206
- import * as path from 'path';
207
- import * as os from 'os';
208
- import { processTablesForFormat, generateTableLatex } from '../lib/build.js';
209
-
210
- describe('Table Processing', () => {
211
- describe('generateTableLatex', () => {
212
- it('returns empty array with no config', () => {
213
- assert.deepStrictEqual(generateTableLatex(null), []);
214
- assert.deepStrictEqual(generateTableLatex({}), []);
215
- });
216
-
217
- it('adds array package when nowrap specified', () => {
218
- const result = generateTableLatex({ nowrap: ['Prior'] });
219
- assert.ok(result.includes('\\usepackage{array}'));
220
- });
221
-
222
- it('adds small table styling when small=true', () => {
223
- const result = generateTableLatex({ small: true });
224
- assert.ok(result.some(l => l.includes('\\small')));
225
- assert.ok(result.some(l => l.includes('etoolbox')));
226
- });
227
- });
228
-
229
- describe('processTablesForFormat', () => {
230
- const sampleTable = `| Component | Prior | Justification |
231
- |:----------|:------|:--------------|
232
- | Intercept | Normal(1.5, 0.5) | Weak prior |
233
- | Slope | Normal(0, 0.3) | Centered |`;
234
-
235
- it('returns unchanged for non-pdf format', () => {
236
- const config = { nowrap: ['Prior'] };
237
- const result = processTablesForFormat(sampleTable, config, 'docx');
238
- assert.strictEqual(result, sampleTable);
239
- });
240
-
241
- it('returns unchanged with no nowrap config', () => {
242
- const result = processTablesForFormat(sampleTable, {}, 'pdf');
243
- assert.strictEqual(result, sampleTable);
244
- });
245
-
246
- it('converts Normal() to mathcal N in nowrap columns', () => {
247
- const config = { nowrap: ['Prior'] };
248
- const result = processTablesForFormat(sampleTable, config, 'pdf');
249
- assert.ok(result.includes('$\\mathcal{N}(1.5, 0.5)$'));
250
- assert.ok(result.includes('$\\mathcal{N}(0, 0.3)$'));
251
- });
252
-
253
- it('converts Student-t() to subscript notation', () => {
254
- const table = `| Param | Prior |
255
- |-------|-------|
256
- | SD | Student-t(3, 0, 2.5) |`;
257
- const config = { nowrap: ['Prior'] };
258
- const result = processTablesForFormat(table, config, 'pdf');
259
- assert.ok(result.includes('$t_{3}(0, 2.5)$'));
260
- });
261
-
262
- it('does not modify columns not in nowrap list', () => {
263
- const config = { nowrap: ['Prior'] };
264
- const result = processTablesForFormat(sampleTable, config, 'pdf');
265
- assert.ok(result.includes('Weak prior')); // unchanged
266
- assert.ok(!result.includes('$Weak prior$'));
267
- });
268
-
269
- it('handles case-insensitive column matching', () => {
270
- const config = { nowrap: ['PRIOR'] };
271
- const result = processTablesForFormat(sampleTable, config, 'pdf');
272
- assert.ok(result.includes('$\\mathcal{N}'));
273
- });
274
-
275
- it('skips cells that already have math', () => {
276
- const table = `| Param | Prior |
277
- |-------|-------|
278
- | X | $\\mathcal{N}(0, 1)$ |`;
279
- const config = { nowrap: ['Prior'] };
280
- const result = processTablesForFormat(table, config, 'pdf');
281
- // Should not double-wrap
282
- assert.ok(!result.includes('$$'));
283
- });
284
- });
285
- });
286
- ```
287
-
288
- #### Usage Example
289
-
290
- ```yaml
291
- # rev.yaml
292
- tables:
293
- nowrap:
294
- - Prior
295
- - "$\\widehat{R}$"
296
- small: false
297
- ```
298
-
299
- ```markdown
300
- | Parameter | Prior | Justification |
301
- |:----------|:------|:--------------|
302
- | Intercept | Normal(1.5, 0.5) | Prior P ~82% |
303
- | Slope | Normal(0, 0.5) | Moderate |
304
- ```
305
-
306
- Output: Prior column cells become `$\mathcal{N}(1.5, 0.5)$` in PDF.
307
-
308
- ---
309
-
310
- ## Part 2: Postprocess Scripting
311
-
312
- ### Problem Statement
313
-
314
- Users need fine-grained control over output that pandoc/docrev can't provide:
315
- - Custom LaTeX tweaks after generation
316
- - Search/replace in generated files
317
- - Format-specific post-processing (e.g., inject custom XML into DOCX)
318
-
319
- ### Design Principles
320
-
321
- 1. **Start simple** - Shell scripts first, DSL later if needed
322
- 2. **Per-format** - Different postprocess for PDF vs DOCX
323
- 3. **Safe defaults** - Scripts must be explicitly enabled
324
- 4. **Debugging** - Clear error messages, optional verbose mode
325
-
326
- ### Implementation Approach
327
-
328
- #### Phase 1: Shell Script Postprocessing (MVP)
329
-
330
- **Config Schema:**
331
-
332
- ```yaml
333
- # rev.yaml
334
- postprocess:
335
- pdf: ./scripts/fix-tables.sh # Run after PDF generated
336
- docx: ./scripts/add-headers.ps1 # Run after DOCX generated
337
- all: ./scripts/common.sh # Run after any format
338
- ```
339
-
340
- **Implementation Steps:**
341
-
342
- **Step 1: Add postprocess to DEFAULT_CONFIG**
343
-
344
- File: `lib/build.js`
345
-
346
- ```javascript
347
- export const DEFAULT_CONFIG = {
348
- // ... existing ...
349
- postprocess: {
350
- pdf: null,
351
- docx: null,
352
- tex: null,
353
- pptx: null,
354
- beamer: null,
355
- all: null,
356
- },
357
- };
358
- ```
359
-
360
- **Step 2: Add postprocess runner**
361
-
362
- File: `lib/postprocess.js` (new file)
363
-
364
- ```javascript
365
- import * as fs from 'fs';
366
- import * as path from 'path';
367
- import { execSync, spawn } from 'child_process';
368
-
369
- /**
370
- * Run postprocess script for a given format
371
- * @param {string} outputPath - Path to generated file
372
- * @param {string} format - Output format (pdf, docx, etc.)
373
- * @param {object} config - Full config object
374
- * @param {object} options - { verbose: boolean }
375
- * @returns {Promise<{success: boolean, error?: string}>}
376
- */
377
- export async function runPostprocess(outputPath, format, config, options = {}) {
378
- const postprocessConfig = config.postprocess || {};
379
-
380
- // Collect scripts to run (format-specific + all)
381
- const scripts = [];
382
- if (postprocessConfig[format]) {
383
- scripts.push(postprocessConfig[format]);
384
- }
385
- if (postprocessConfig.all) {
386
- scripts.push(postprocessConfig.all);
387
- }
388
-
389
- if (scripts.length === 0) {
390
- return { success: true };
391
- }
392
-
393
- const directory = path.dirname(outputPath);
394
- const errors = [];
395
-
396
- for (const scriptPath of scripts) {
397
- const absoluteScript = path.isAbsolute(scriptPath)
398
- ? scriptPath
399
- : path.join(directory, scriptPath);
400
-
401
- if (!fs.existsSync(absoluteScript)) {
402
- errors.push(`Postprocess script not found: ${scriptPath}`);
403
- continue;
404
- }
405
-
406
- try {
407
- const result = await executeScript(absoluteScript, {
408
- OUTPUT_FILE: outputPath,
409
- OUTPUT_FORMAT: format,
410
- PROJECT_DIR: directory,
411
- CONFIG_PATH: config._configPath || '',
412
- }, options);
413
-
414
- if (!result.success) {
415
- errors.push(`Script ${scriptPath} failed: ${result.error}`);
416
- }
417
- } catch (err) {
418
- errors.push(`Script ${scriptPath} error: ${err.message}`);
419
- }
420
- }
421
-
422
- return {
423
- success: errors.length === 0,
424
- error: errors.join('\n'),
425
- };
426
- }
427
-
428
- /**
429
- * Execute a script with environment variables
430
- * @param {string} scriptPath
431
- * @param {object} env - Environment variables to set
432
- * @param {object} options
433
- * @returns {Promise<{success: boolean, stdout: string, stderr: string, error?: string}>}
434
- */
435
- async function executeScript(scriptPath, env, options = {}) {
436
- return new Promise((resolve) => {
437
- const ext = path.extname(scriptPath).toLowerCase();
438
- let command, args;
439
-
440
- // Determine how to run based on extension
441
- if (ext === '.ps1') {
442
- command = 'powershell';
443
- args = ['-ExecutionPolicy', 'Bypass', '-File', scriptPath];
444
- } else if (ext === '.py') {
445
- command = 'python';
446
- args = [scriptPath];
447
- } else if (ext === '.js') {
448
- command = 'node';
449
- args = [scriptPath];
450
- } else {
451
- // Assume shell script
452
- command = process.platform === 'win32' ? 'bash' : '/bin/bash';
453
- args = [scriptPath];
454
- }
455
-
456
- const proc = spawn(command, args, {
457
- env: { ...process.env, ...env },
458
- cwd: path.dirname(scriptPath),
459
- stdio: ['ignore', 'pipe', 'pipe'],
460
- });
461
-
462
- let stdout = '';
463
- let stderr = '';
464
-
465
- proc.stdout.on('data', (data) => {
466
- stdout += data.toString();
467
- if (options.verbose) {
468
- process.stdout.write(data);
469
- }
470
- });
471
-
472
- proc.stderr.on('data', (data) => {
473
- stderr += data.toString();
474
- if (options.verbose) {
475
- process.stderr.write(data);
476
- }
477
- });
478
-
479
- proc.on('error', (err) => {
480
- resolve({ success: false, stdout, stderr, error: err.message });
481
- });
482
-
483
- proc.on('close', (code) => {
484
- if (code === 0) {
485
- resolve({ success: true, stdout, stderr });
486
- } else {
487
- resolve({
488
- success: false,
489
- stdout,
490
- stderr,
491
- error: `Exit code ${code}: ${stderr.trim() || 'Unknown error'}`
492
- });
493
- }
494
- });
495
- });
496
- }
497
-
498
- export { executeScript };
499
- ```
500
-
501
- **Step 3: Integrate into runPandoc**
502
-
503
- File: `lib/build.js`
504
-
505
- ```javascript
506
- import { runPostprocess } from './postprocess.js';
507
-
508
- // In runPandoc(), after pandoc completes successfully:
509
-
510
- pandoc.on('close', async (code) => {
511
- if (code === 0) {
512
- // Existing PPTX post-processing...
513
- if (format === 'pptx') {
514
- // ...
515
- }
516
-
517
- // NEW: Run user postprocess scripts
518
- const postResult = await runPostprocess(outputPath, format, config, options);
519
- if (!postResult.success) {
520
- console.error(`Postprocess warning: ${postResult.error}`);
521
- }
522
-
523
- resolve({ outputPath, success: true });
524
- } else {
525
- resolve({ outputPath: null, success: false, error: stderr });
526
- }
527
- });
528
- ```
529
-
530
- **Step 4: Add CLI verbose flag**
531
-
532
- File: `lib/commands/build.js`
533
-
534
- ```javascript
535
- .option('--verbose', 'Show detailed output including postprocess scripts')
536
-
537
- // Pass to build():
538
- await build(targetDir, formats, { verbose: options.verbose });
539
- ```
540
-
541
- #### Phase 2: DSL for Common Operations (Future)
542
-
543
- If shell scripts prove insufficient, add a simple declarative DSL:
544
-
545
- ```yaml
546
- # rev.yaml
547
- postprocess:
548
- pdf:
549
- - type: replace
550
- pattern: "\\\\begin{longtable}"
551
- replacement: "\\\\begin{longtable}[l]"
552
- - type: inject
553
- after: "\\\\begin{document}"
554
- content: "\\\\newcommand{\\\\N}{\\\\mathcal{N}}"
555
- - type: script
556
- path: ./scripts/final-fixes.sh
557
- ```
558
-
559
- This would require:
560
- - New file: `lib/postprocess-dsl.js`
561
- - Operation handlers for each type
562
- - Validation of DSL syntax
563
- - Clear error messages for invalid operations
564
-
565
- #### Test Plan for Postprocessing
566
-
567
- File: `test/postprocess.test.js`
568
-
569
- ```javascript
570
- import { describe, it, beforeEach, afterEach } from 'node:test';
571
- import assert from 'node:assert';
572
- import * as fs from 'fs';
573
- import * as path from 'path';
574
- import * as os from 'os';
575
- import { runPostprocess, executeScript } from '../lib/postprocess.js';
576
-
577
- describe('Postprocessing', () => {
578
- let tempDir;
579
-
580
- beforeEach(() => {
581
- tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'docrev-test-'));
582
- });
583
-
584
- afterEach(() => {
585
- fs.rmSync(tempDir, { recursive: true, force: true });
586
- });
587
-
588
- describe('executeScript', () => {
589
- it('runs shell script with environment variables', async () => {
590
- const scriptPath = path.join(tempDir, 'test.sh');
591
- fs.writeFileSync(scriptPath, '#!/bin/bash\necho "$OUTPUT_FILE"', { mode: 0o755 });
592
-
593
- const result = await executeScript(scriptPath, { OUTPUT_FILE: '/tmp/test.pdf' });
594
- assert.ok(result.success);
595
- assert.ok(result.stdout.includes('/tmp/test.pdf'));
596
- });
597
-
598
- it('returns error for non-existent script', async () => {
599
- const result = await executeScript('/nonexistent/script.sh', {});
600
- assert.ok(!result.success);
601
- });
602
-
603
- it('captures exit code on failure', async () => {
604
- const scriptPath = path.join(tempDir, 'fail.sh');
605
- fs.writeFileSync(scriptPath, '#!/bin/bash\nexit 1', { mode: 0o755 });
606
-
607
- const result = await executeScript(scriptPath, {});
608
- assert.ok(!result.success);
609
- assert.ok(result.error.includes('Exit code 1'));
610
- });
611
-
612
- it('runs PowerShell scripts on Windows', async function() {
613
- if (process.platform !== 'win32') {
614
- this.skip();
615
- return;
616
- }
617
-
618
- const scriptPath = path.join(tempDir, 'test.ps1');
619
- fs.writeFileSync(scriptPath, 'Write-Host $env:OUTPUT_FILE');
620
-
621
- const result = await executeScript(scriptPath, { OUTPUT_FILE: 'C:\\test.pdf' });
622
- assert.ok(result.success);
623
- assert.ok(result.stdout.includes('C:\\test.pdf'));
624
- });
625
-
626
- it('runs Python scripts', async () => {
627
- const scriptPath = path.join(tempDir, 'test.py');
628
- fs.writeFileSync(scriptPath, 'import os; print(os.environ["OUTPUT_FILE"])');
629
-
630
- const result = await executeScript(scriptPath, { OUTPUT_FILE: '/tmp/test.pdf' });
631
- assert.ok(result.success);
632
- assert.ok(result.stdout.includes('/tmp/test.pdf'));
633
- });
634
-
635
- it('runs Node.js scripts', async () => {
636
- const scriptPath = path.join(tempDir, 'test.js');
637
- fs.writeFileSync(scriptPath, 'console.log(process.env.OUTPUT_FILE)');
638
-
639
- const result = await executeScript(scriptPath, { OUTPUT_FILE: '/tmp/test.pdf' });
640
- assert.ok(result.success);
641
- assert.ok(result.stdout.includes('/tmp/test.pdf'));
642
- });
643
- });
644
-
645
- describe('runPostprocess', () => {
646
- it('returns success with no postprocess config', async () => {
647
- const result = await runPostprocess('/tmp/test.pdf', 'pdf', {});
648
- assert.ok(result.success);
649
- });
650
-
651
- it('runs format-specific script', async () => {
652
- const scriptPath = path.join(tempDir, 'pdf-post.sh');
653
- const markerPath = path.join(tempDir, 'marker.txt');
654
- fs.writeFileSync(scriptPath, `#!/bin/bash\necho "ran" > "${markerPath}"`, { mode: 0o755 });
655
-
656
- const config = {
657
- postprocess: { pdf: scriptPath },
658
- _configPath: path.join(tempDir, 'rev.yaml'),
659
- };
660
-
661
- const result = await runPostprocess(path.join(tempDir, 'out.pdf'), 'pdf', config);
662
- assert.ok(result.success);
663
- assert.ok(fs.existsSync(markerPath));
664
- });
665
-
666
- it('runs "all" script for any format', async () => {
667
- const scriptPath = path.join(tempDir, 'all-post.sh');
668
- const markerPath = path.join(tempDir, 'marker.txt');
669
- fs.writeFileSync(scriptPath, `#!/bin/bash\necho "$OUTPUT_FORMAT" > "${markerPath}"`, { mode: 0o755 });
670
-
671
- const config = {
672
- postprocess: { all: scriptPath },
673
- _configPath: path.join(tempDir, 'rev.yaml'),
674
- };
675
-
676
- await runPostprocess(path.join(tempDir, 'out.docx'), 'docx', config);
677
- assert.ok(fs.existsSync(markerPath));
678
- assert.strictEqual(fs.readFileSync(markerPath, 'utf-8').trim(), 'docx');
679
- });
680
-
681
- it('runs both format-specific and all scripts', async () => {
682
- const pdfScript = path.join(tempDir, 'pdf.sh');
683
- const allScript = path.join(tempDir, 'all.sh');
684
- const pdfMarker = path.join(tempDir, 'pdf-marker.txt');
685
- const allMarker = path.join(tempDir, 'all-marker.txt');
686
-
687
- fs.writeFileSync(pdfScript, `#!/bin/bash\ntouch "${pdfMarker}"`, { mode: 0o755 });
688
- fs.writeFileSync(allScript, `#!/bin/bash\ntouch "${allMarker}"`, { mode: 0o755 });
689
-
690
- const config = {
691
- postprocess: { pdf: pdfScript, all: allScript },
692
- _configPath: path.join(tempDir, 'rev.yaml'),
693
- };
694
-
695
- await runPostprocess(path.join(tempDir, 'out.pdf'), 'pdf', config);
696
- assert.ok(fs.existsSync(pdfMarker));
697
- assert.ok(fs.existsSync(allMarker));
698
- });
699
-
700
- it('reports error for missing script', async () => {
701
- const config = {
702
- postprocess: { pdf: './nonexistent.sh' },
703
- _configPath: path.join(tempDir, 'rev.yaml'),
704
- };
705
-
706
- const result = await runPostprocess(path.join(tempDir, 'out.pdf'), 'pdf', config);
707
- assert.ok(!result.success);
708
- assert.ok(result.error.includes('not found'));
709
- });
710
-
711
- it('reports error for failing script', async () => {
712
- const scriptPath = path.join(tempDir, 'fail.sh');
713
- fs.writeFileSync(scriptPath, '#!/bin/bash\nexit 42', { mode: 0o755 });
714
-
715
- const config = {
716
- postprocess: { pdf: scriptPath },
717
- _configPath: path.join(tempDir, 'rev.yaml'),
718
- };
719
-
720
- const result = await runPostprocess(path.join(tempDir, 'out.pdf'), 'pdf', config);
721
- assert.ok(!result.success);
722
- assert.ok(result.error.includes('42') || result.error.includes('failed'));
723
- });
724
- });
725
- });
726
- ```
727
-
728
- ---
729
-
730
- ## Implementation Order
731
-
732
- ### Sprint 1: Table Preprocessing (2-3 hours)
733
-
734
- 1. [ ] Add `processTablesForFormat()` function to `lib/build.js`
735
- 2. [ ] Integrate into `prepareForFormat()`
736
- 3. [ ] Write tests in `test/tables.test.js`
737
- 4. [ ] Test with paper 2 priors table
738
- 5. [ ] Document in README
739
-
740
- ### Sprint 2: Postprocess Shell Scripts (3-4 hours)
741
-
742
- 1. [ ] Create `lib/postprocess.js` with `executeScript()` and `runPostprocess()`
743
- 2. [ ] Add `postprocess` to `DEFAULT_CONFIG`
744
- 3. [ ] Add config merging in `loadConfig()`
745
- 4. [ ] Integrate into `runPandoc()` after output generation
746
- 5. [ ] Add `--verbose` flag to CLI
747
- 6. [ ] Write tests in `test/postprocess.test.js`
748
- 7. [ ] Create example scripts in `examples/postprocess/`
749
- 8. [ ] Document in README
750
-
751
- ### Sprint 3: Header Includes (1-2 hours)
752
-
753
- 1. [ ] Add `pdf.headerIncludes` config option
754
- 2. [ ] Add `generateTableLatex()` helper
755
- 3. [ ] Pass to pandoc in `buildPandocArgs()`
756
- 4. [ ] Add tests
757
- 5. [ ] Document
758
-
759
- ### Future: DSL (if needed)
760
-
761
- Only implement if shell scripts prove insufficient for common use cases.
762
-
763
- ---
764
-
765
- ## Files to Create/Modify
766
-
767
- ### New Files
768
-
769
- | File | Purpose |
770
- |------|---------|
771
- | `lib/postprocess.js` | Postprocess script execution |
772
- | `test/tables.test.js` | Table processing tests |
773
- | `test/postprocess.test.js` | Postprocess tests |
774
- | `examples/postprocess/fix-tables.sh` | Example PDF postprocess |
775
- | `examples/postprocess/inject-headers.ps1` | Example DOCX postprocess |
776
-
777
- ### Modified Files
778
-
779
- | File | Changes |
780
- |------|---------|
781
- | `lib/build.js` | Add `processTablesForFormat()`, `generateTableLatex()`, integrate postprocess, add configs |
782
- | `lib/commands/build.js` | Add `--verbose` flag |
783
-
784
- ---
785
-
786
- ## Example Usage After Implementation
787
-
788
- ### Table Config
789
-
790
- ```yaml
791
- # rev.yaml
792
- tables:
793
- nowrap:
794
- - Prior
795
- - Value
796
- - Count
797
- small: true
798
- ```
799
-
800
- ### Postprocess Scripts
801
-
802
- ```yaml
803
- # rev.yaml
804
- postprocess:
805
- pdf: ./scripts/fix-latex.sh
806
- docx: ./scripts/add-metadata.py
807
- all: ./scripts/notify.js
808
- ```
809
-
810
- Example `fix-latex.sh`:
811
- ```bash
812
- #!/bin/bash
813
- # Receives: OUTPUT_FILE, OUTPUT_FORMAT, PROJECT_DIR, CONFIG_PATH
814
-
815
- # Example: Replace longtable alignment
816
- if [ "$OUTPUT_FORMAT" = "pdf" ]; then
817
- echo "PDF postprocessing not needed (can't modify PDF)"
818
- fi
819
- ```
820
-
821
- Example `add-metadata.py`:
822
- ```python
823
- #!/usr/bin/env python3
824
- import os
825
- from docx import Document
826
-
827
- doc = Document(os.environ['OUTPUT_FILE'])
828
- doc.core_properties.author = "Research Team"
829
- doc.save(os.environ['OUTPUT_FILE'])
830
- ```
831
-
832
- ---
833
-
834
- ## Risk Assessment
835
-
836
- | Risk | Likelihood | Impact | Mitigation |
837
- |------|------------|--------|------------|
838
- | Table preprocessing breaks edge cases | Medium | Medium | Extensive tests, careful regex |
839
- | Shell script security concerns | Low | High | Document that scripts run with user permissions |
840
- | Cross-platform script compatibility | Medium | Medium | Support multiple interpreters, document requirements |
841
- | Performance overhead from postprocess | Low | Low | Scripts are optional, run after main build |
842
-
843
- ---
844
-
845
- ## Success Criteria
846
-
847
- 1. **Tables**: `Normal(0, 0.5)` in nowrap column → `$\mathcal{N}(0, 0.5)$` in PDF output
848
- 2. **Postprocess**: User script receives correct environment variables and can modify output
849
- 3. **Tests**: All new tests pass, existing tests unchanged
850
- 4. **Docs**: README updated with examples for both features
1
+ # Implementation Plan: Table Formatting & Postprocess Scripting
2
+
3
+ ## Overview
4
+
5
+ Two features to implement:
6
+ 1. **Table formatting config** - Make `tables:` config in rev.yaml actually work
7
+ 2. **Postprocess scripting** - Allow users to run custom transformations on output
8
+
9
+ ---
10
+
11
+ ## Part 1: Table Formatting
12
+
13
+ ### Problem Statement
14
+
15
+ Pandoc's longtable with proportional `p{}` column widths forces text wrapping. Users need:
16
+ - Columns that don't wrap (e.g., `N(0, 0.5)` should stay on one line)
17
+ - Custom alignment per column
18
+ - Math notation conversion (Normal → 𝒩)
19
+
20
+ ### Why Previous Attempt Failed
21
+
22
+ The Lua filter approach failed because:
23
+ 1. Pandoc calculates column widths from markdown before the filter runs
24
+ 2. Setting `ColWidthDefault` in Lua results in `0.0000` width, not auto-width
25
+ 3. `\mbox{}` in a `p{}` column overflows instead of expanding
26
+
27
+ ### Solution: LaTeX Header Injection
28
+
29
+ Instead of a Lua filter, inject LaTeX packages/commands via `header-includes`.
30
+
31
+ #### Implementation Steps
32
+
33
+ **Step 1: Add `pdf.header-includes` config option**
34
+
35
+ File: `lib/build.js`
36
+
37
+ ```javascript
38
+ // In DEFAULT_CONFIG.pdf:
39
+ pdf: {
40
+ template: null,
41
+ documentclass: 'article',
42
+ fontsize: '12pt',
43
+ geometry: 'margin=1in',
44
+ linestretch: 1.5,
45
+ numbersections: false,
46
+ toc: false,
47
+ headerIncludes: null, // NEW: string or array of LaTeX code
48
+ },
49
+ ```
50
+
51
+ **Step 2: Pass header-includes to pandoc**
52
+
53
+ File: `lib/build.js`, in `buildPandocArgs()`:
54
+
55
+ ```javascript
56
+ if (format === 'pdf') {
57
+ // ... existing code ...
58
+
59
+ // Header includes (LaTeX preamble additions)
60
+ if (config.pdf.headerIncludes) {
61
+ const includes = Array.isArray(config.pdf.headerIncludes)
62
+ ? config.pdf.headerIncludes
63
+ : [config.pdf.headerIncludes];
64
+ for (const inc of includes) {
65
+ args.push('-V', `header-includes=${inc}`);
66
+ }
67
+ }
68
+ }
69
+ ```
70
+
71
+ **Step 3: Create table-focused presets**
72
+
73
+ File: `lib/build.js`, new function:
74
+
75
+ ```javascript
76
+ /**
77
+ * Generate LaTeX header-includes for table configuration
78
+ * @param {object} tablesConfig
79
+ * @returns {string[]} LaTeX code lines
80
+ */
81
+ function generateTableLatex(tablesConfig) {
82
+ const lines = [];
83
+
84
+ if (!tablesConfig) return lines;
85
+
86
+ // Always include array package for column type customization
87
+ lines.push('\\usepackage{array}');
88
+
89
+ // Add nowrap column type: use with N{width} in manual tables
90
+ // This creates a column that doesn't wrap but respects minipage
91
+ if (tablesConfig.nowrap) {
92
+ lines.push('% Nowrap column type for tables');
93
+ lines.push('\\newcolumntype{N}[1]{>{\\raggedright\\arraybackslash}p{#1}}');
94
+ }
95
+
96
+ // Small tables
97
+ if (tablesConfig.small) {
98
+ lines.push('% Apply small font to longtable environment');
99
+ lines.push('\\AtBeginEnvironment{longtable}{\\small}');
100
+ lines.push('\\usepackage{etoolbox}'); // for AtBeginEnvironment
101
+ }
102
+
103
+ return lines;
104
+ }
105
+ ```
106
+
107
+ **Step 4: Integrate into build pipeline**
108
+
109
+ File: `lib/build.js`, in `buildPandocArgs()`:
110
+
111
+ ```javascript
112
+ if (format === 'pdf' || format === 'tex') {
113
+ // Generate table-specific LaTeX if tables config exists
114
+ const tableLatex = generateTableLatex(config.tables);
115
+ if (tableLatex.length > 0) {
116
+ for (const line of tableLatex) {
117
+ args.push('-V', `header-includes=${line}`);
118
+ }
119
+ }
120
+ }
121
+ ```
122
+
123
+ **Step 5: Add markdown preprocessing for nowrap columns**
124
+
125
+ Since we can't change pandoc's column width calculation, we preprocess the markdown to wrap nowrap column content in `\mbox{}` directly.
126
+
127
+ File: `lib/build.js`, new function:
128
+
129
+ ```javascript
130
+ /**
131
+ * Process markdown tables to apply nowrap to specified columns
132
+ * Wraps cell content in \mbox{} for LaTeX output
133
+ * @param {string} content - Markdown content
134
+ * @param {object} tablesConfig - tables config from rev.yaml
135
+ * @param {string} format - output format
136
+ * @returns {string} processed content
137
+ */
138
+ function processTablesForFormat(content, tablesConfig, format) {
139
+ if (!tablesConfig?.nowrap?.length || format !== 'pdf') {
140
+ return content;
141
+ }
142
+
143
+ const nowrapPatterns = tablesConfig.nowrap.map(p => p.toLowerCase());
144
+
145
+ // Match pipe tables
146
+ const tableRegex = /(\|[^\n]+\|\n\|[-:| ]+\|\n)((?:\|[^\n]+\|\n)+)/g;
147
+
148
+ return content.replace(tableRegex, (match, header, body) => {
149
+ // Parse header to find nowrap column indices
150
+ const headerCells = header.split('|').slice(1, -1).map(c => c.trim().toLowerCase());
151
+ const nowrapCols = headerCells.map((cell, i) =>
152
+ nowrapPatterns.some(p => cell.includes(p)) ? i : -1
153
+ ).filter(i => i >= 0);
154
+
155
+ if (nowrapCols.length === 0) return match;
156
+
157
+ // Process body rows
158
+ const processedBody = body.split('\n').filter(l => l.trim()).map(row => {
159
+ const cells = row.split('|').slice(1, -1);
160
+ nowrapCols.forEach(colIdx => {
161
+ if (cells[colIdx]) {
162
+ const content = cells[colIdx].trim();
163
+ // Skip if already has LaTeX or is empty
164
+ if (content && !content.startsWith('\\') && !content.startsWith('$')) {
165
+ // Convert distribution notation to math
166
+ let processed = content
167
+ .replace(/Normal\(([^)]+)\)/g, '$\\mathcal{N}($1)$')
168
+ .replace(/Student-t\((\d+),\s*([^)]+)\)/g, '$t_{$1}($2)$')
169
+ .replace(/Gamma\(([^)]+)\)/g, '$\\text{Gamma}($1)$');
170
+ cells[colIdx] = ` ${processed} `;
171
+ }
172
+ }
173
+ });
174
+ return '|' + cells.join('|') + '|';
175
+ }).join('\n');
176
+
177
+ return header + processedBody + '\n';
178
+ });
179
+ }
180
+ ```
181
+
182
+ **Step 6: Call from prepareForFormat**
183
+
184
+ ```javascript
185
+ export function prepareForFormat(paperPath, format, config, options = {}) {
186
+ // ... existing code ...
187
+
188
+ if (format === 'pdf' || format === 'tex') {
189
+ content = stripAnnotations(content);
190
+ // NEW: Process tables for nowrap columns
191
+ content = processTablesForFormat(content, config.tables, format);
192
+ }
193
+
194
+ // ... rest of function ...
195
+ }
196
+ ```
197
+
198
+ #### Test Plan for Tables
199
+
200
+ File: `test/tables.test.js`
201
+
202
+ ```javascript
203
+ import { describe, it, beforeEach, afterEach } from 'node:test';
204
+ import assert from 'node:assert';
205
+ import * as fs from 'fs';
206
+ import * as path from 'path';
207
+ import * as os from 'os';
208
+ import { processTablesForFormat, generateTableLatex } from '../lib/build.js';
209
+
210
+ describe('Table Processing', () => {
211
+ describe('generateTableLatex', () => {
212
+ it('returns empty array with no config', () => {
213
+ assert.deepStrictEqual(generateTableLatex(null), []);
214
+ assert.deepStrictEqual(generateTableLatex({}), []);
215
+ });
216
+
217
+ it('adds array package when nowrap specified', () => {
218
+ const result = generateTableLatex({ nowrap: ['Prior'] });
219
+ assert.ok(result.includes('\\usepackage{array}'));
220
+ });
221
+
222
+ it('adds small table styling when small=true', () => {
223
+ const result = generateTableLatex({ small: true });
224
+ assert.ok(result.some(l => l.includes('\\small')));
225
+ assert.ok(result.some(l => l.includes('etoolbox')));
226
+ });
227
+ });
228
+
229
+ describe('processTablesForFormat', () => {
230
+ const sampleTable = `| Component | Prior | Justification |
231
+ |:----------|:------|:--------------|
232
+ | Intercept | Normal(1.5, 0.5) | Weak prior |
233
+ | Slope | Normal(0, 0.3) | Centered |`;
234
+
235
+ it('returns unchanged for non-pdf format', () => {
236
+ const config = { nowrap: ['Prior'] };
237
+ const result = processTablesForFormat(sampleTable, config, 'docx');
238
+ assert.strictEqual(result, sampleTable);
239
+ });
240
+
241
+ it('returns unchanged with no nowrap config', () => {
242
+ const result = processTablesForFormat(sampleTable, {}, 'pdf');
243
+ assert.strictEqual(result, sampleTable);
244
+ });
245
+
246
+ it('converts Normal() to mathcal N in nowrap columns', () => {
247
+ const config = { nowrap: ['Prior'] };
248
+ const result = processTablesForFormat(sampleTable, config, 'pdf');
249
+ assert.ok(result.includes('$\\mathcal{N}(1.5, 0.5)$'));
250
+ assert.ok(result.includes('$\\mathcal{N}(0, 0.3)$'));
251
+ });
252
+
253
+ it('converts Student-t() to subscript notation', () => {
254
+ const table = `| Param | Prior |
255
+ |-------|-------|
256
+ | SD | Student-t(3, 0, 2.5) |`;
257
+ const config = { nowrap: ['Prior'] };
258
+ const result = processTablesForFormat(table, config, 'pdf');
259
+ assert.ok(result.includes('$t_{3}(0, 2.5)$'));
260
+ });
261
+
262
+ it('does not modify columns not in nowrap list', () => {
263
+ const config = { nowrap: ['Prior'] };
264
+ const result = processTablesForFormat(sampleTable, config, 'pdf');
265
+ assert.ok(result.includes('Weak prior')); // unchanged
266
+ assert.ok(!result.includes('$Weak prior$'));
267
+ });
268
+
269
+ it('handles case-insensitive column matching', () => {
270
+ const config = { nowrap: ['PRIOR'] };
271
+ const result = processTablesForFormat(sampleTable, config, 'pdf');
272
+ assert.ok(result.includes('$\\mathcal{N}'));
273
+ });
274
+
275
+ it('skips cells that already have math', () => {
276
+ const table = `| Param | Prior |
277
+ |-------|-------|
278
+ | X | $\\mathcal{N}(0, 1)$ |`;
279
+ const config = { nowrap: ['Prior'] };
280
+ const result = processTablesForFormat(table, config, 'pdf');
281
+ // Should not double-wrap
282
+ assert.ok(!result.includes('$$'));
283
+ });
284
+ });
285
+ });
286
+ ```
287
+
288
+ #### Usage Example
289
+
290
+ ```yaml
291
+ # rev.yaml
292
+ tables:
293
+ nowrap:
294
+ - Prior
295
+ - "$\\widehat{R}$"
296
+ small: false
297
+ ```
298
+
299
+ ```markdown
300
+ | Parameter | Prior | Justification |
301
+ |:----------|:------|:--------------|
302
+ | Intercept | Normal(1.5, 0.5) | Prior P ~82% |
303
+ | Slope | Normal(0, 0.5) | Moderate |
304
+ ```
305
+
306
+ Output: Prior column cells become `$\mathcal{N}(1.5, 0.5)$` in PDF.
307
+
308
+ ---
309
+
310
+ ## Part 2: Postprocess Scripting
311
+
312
+ ### Problem Statement
313
+
314
+ Users need fine-grained control over output that pandoc/docrev can't provide:
315
+ - Custom LaTeX tweaks after generation
316
+ - Search/replace in generated files
317
+ - Format-specific post-processing (e.g., inject custom XML into DOCX)
318
+
319
+ ### Design Principles
320
+
321
+ 1. **Start simple** - Shell scripts first, DSL later if needed
322
+ 2. **Per-format** - Different postprocess for PDF vs DOCX
323
+ 3. **Safe defaults** - Scripts must be explicitly enabled
324
+ 4. **Debugging** - Clear error messages, optional verbose mode
325
+
326
+ ### Implementation Approach
327
+
328
+ #### Phase 1: Shell Script Postprocessing (MVP)
329
+
330
+ **Config Schema:**
331
+
332
+ ```yaml
333
+ # rev.yaml
334
+ postprocess:
335
+ pdf: ./scripts/fix-tables.sh # Run after PDF generated
336
+ docx: ./scripts/add-headers.ps1 # Run after DOCX generated
337
+ all: ./scripts/common.sh # Run after any format
338
+ ```
339
+
340
+ **Implementation Steps:**
341
+
342
+ **Step 1: Add postprocess to DEFAULT_CONFIG**
343
+
344
+ File: `lib/build.js`
345
+
346
+ ```javascript
347
+ export const DEFAULT_CONFIG = {
348
+ // ... existing ...
349
+ postprocess: {
350
+ pdf: null,
351
+ docx: null,
352
+ tex: null,
353
+ pptx: null,
354
+ beamer: null,
355
+ all: null,
356
+ },
357
+ };
358
+ ```
359
+
360
+ **Step 2: Add postprocess runner**
361
+
362
+ File: `lib/postprocess.js` (new file)
363
+
364
+ ```javascript
365
+ import * as fs from 'fs';
366
+ import * as path from 'path';
367
+ import { execSync, spawn } from 'child_process';
368
+
369
+ /**
370
+ * Run postprocess script for a given format
371
+ * @param {string} outputPath - Path to generated file
372
+ * @param {string} format - Output format (pdf, docx, etc.)
373
+ * @param {object} config - Full config object
374
+ * @param {object} options - { verbose: boolean }
375
+ * @returns {Promise<{success: boolean, error?: string}>}
376
+ */
377
+ export async function runPostprocess(outputPath, format, config, options = {}) {
378
+ const postprocessConfig = config.postprocess || {};
379
+
380
+ // Collect scripts to run (format-specific + all)
381
+ const scripts = [];
382
+ if (postprocessConfig[format]) {
383
+ scripts.push(postprocessConfig[format]);
384
+ }
385
+ if (postprocessConfig.all) {
386
+ scripts.push(postprocessConfig.all);
387
+ }
388
+
389
+ if (scripts.length === 0) {
390
+ return { success: true };
391
+ }
392
+
393
+ const directory = path.dirname(outputPath);
394
+ const errors = [];
395
+
396
+ for (const scriptPath of scripts) {
397
+ const absoluteScript = path.isAbsolute(scriptPath)
398
+ ? scriptPath
399
+ : path.join(directory, scriptPath);
400
+
401
+ if (!fs.existsSync(absoluteScript)) {
402
+ errors.push(`Postprocess script not found: ${scriptPath}`);
403
+ continue;
404
+ }
405
+
406
+ try {
407
+ const result = await executeScript(absoluteScript, {
408
+ OUTPUT_FILE: outputPath,
409
+ OUTPUT_FORMAT: format,
410
+ PROJECT_DIR: directory,
411
+ CONFIG_PATH: config._configPath || '',
412
+ }, options);
413
+
414
+ if (!result.success) {
415
+ errors.push(`Script ${scriptPath} failed: ${result.error}`);
416
+ }
417
+ } catch (err) {
418
+ errors.push(`Script ${scriptPath} error: ${err.message}`);
419
+ }
420
+ }
421
+
422
+ return {
423
+ success: errors.length === 0,
424
+ error: errors.join('\n'),
425
+ };
426
+ }
427
+
428
+ /**
429
+ * Execute a script with environment variables
430
+ * @param {string} scriptPath
431
+ * @param {object} env - Environment variables to set
432
+ * @param {object} options
433
+ * @returns {Promise<{success: boolean, stdout: string, stderr: string, error?: string}>}
434
+ */
435
+ async function executeScript(scriptPath, env, options = {}) {
436
+ return new Promise((resolve) => {
437
+ const ext = path.extname(scriptPath).toLowerCase();
438
+ let command, args;
439
+
440
+ // Determine how to run based on extension
441
+ if (ext === '.ps1') {
442
+ command = 'powershell';
443
+ args = ['-ExecutionPolicy', 'Bypass', '-File', scriptPath];
444
+ } else if (ext === '.py') {
445
+ command = 'python';
446
+ args = [scriptPath];
447
+ } else if (ext === '.js') {
448
+ command = 'node';
449
+ args = [scriptPath];
450
+ } else {
451
+ // Assume shell script
452
+ command = process.platform === 'win32' ? 'bash' : '/bin/bash';
453
+ args = [scriptPath];
454
+ }
455
+
456
+ const proc = spawn(command, args, {
457
+ env: { ...process.env, ...env },
458
+ cwd: path.dirname(scriptPath),
459
+ stdio: ['ignore', 'pipe', 'pipe'],
460
+ });
461
+
462
+ let stdout = '';
463
+ let stderr = '';
464
+
465
+ proc.stdout.on('data', (data) => {
466
+ stdout += data.toString();
467
+ if (options.verbose) {
468
+ process.stdout.write(data);
469
+ }
470
+ });
471
+
472
+ proc.stderr.on('data', (data) => {
473
+ stderr += data.toString();
474
+ if (options.verbose) {
475
+ process.stderr.write(data);
476
+ }
477
+ });
478
+
479
+ proc.on('error', (err) => {
480
+ resolve({ success: false, stdout, stderr, error: err.message });
481
+ });
482
+
483
+ proc.on('close', (code) => {
484
+ if (code === 0) {
485
+ resolve({ success: true, stdout, stderr });
486
+ } else {
487
+ resolve({
488
+ success: false,
489
+ stdout,
490
+ stderr,
491
+ error: `Exit code ${code}: ${stderr.trim() || 'Unknown error'}`
492
+ });
493
+ }
494
+ });
495
+ });
496
+ }
497
+
498
+ export { executeScript };
499
+ ```
500
+
501
+ **Step 3: Integrate into runPandoc**
502
+
503
+ File: `lib/build.js`
504
+
505
+ ```javascript
506
+ import { runPostprocess } from './postprocess.js';
507
+
508
+ // In runPandoc(), after pandoc completes successfully:
509
+
510
+ pandoc.on('close', async (code) => {
511
+ if (code === 0) {
512
+ // Existing PPTX post-processing...
513
+ if (format === 'pptx') {
514
+ // ...
515
+ }
516
+
517
+ // NEW: Run user postprocess scripts
518
+ const postResult = await runPostprocess(outputPath, format, config, options);
519
+ if (!postResult.success) {
520
+ console.error(`Postprocess warning: ${postResult.error}`);
521
+ }
522
+
523
+ resolve({ outputPath, success: true });
524
+ } else {
525
+ resolve({ outputPath: null, success: false, error: stderr });
526
+ }
527
+ });
528
+ ```
529
+
530
+ **Step 4: Add CLI verbose flag**
531
+
532
+ File: `lib/commands/build.js`
533
+
534
+ ```javascript
535
+ .option('--verbose', 'Show detailed output including postprocess scripts')
536
+
537
+ // Pass to build():
538
+ await build(targetDir, formats, { verbose: options.verbose });
539
+ ```
540
+
541
+ #### Phase 2: DSL for Common Operations (Future)
542
+
543
+ If shell scripts prove insufficient, add a simple declarative DSL:
544
+
545
+ ```yaml
546
+ # rev.yaml
547
+ postprocess:
548
+ pdf:
549
+ - type: replace
550
+ pattern: "\\\\begin{longtable}"
551
+ replacement: "\\\\begin{longtable}[l]"
552
+ - type: inject
553
+ after: "\\\\begin{document}"
554
+ content: "\\\\newcommand{\\\\N}{\\\\mathcal{N}}"
555
+ - type: script
556
+ path: ./scripts/final-fixes.sh
557
+ ```
558
+
559
+ This would require:
560
+ - New file: `lib/postprocess-dsl.js`
561
+ - Operation handlers for each type
562
+ - Validation of DSL syntax
563
+ - Clear error messages for invalid operations
564
+
565
+ #### Test Plan for Postprocessing
566
+
567
+ File: `test/postprocess.test.js`
568
+
569
+ ```javascript
570
+ import { describe, it, beforeEach, afterEach } from 'node:test';
571
+ import assert from 'node:assert';
572
+ import * as fs from 'fs';
573
+ import * as path from 'path';
574
+ import * as os from 'os';
575
+ import { runPostprocess, executeScript } from '../lib/postprocess.js';
576
+
577
+ describe('Postprocessing', () => {
578
+ let tempDir;
579
+
580
+ beforeEach(() => {
581
+ tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'docrev-test-'));
582
+ });
583
+
584
+ afterEach(() => {
585
+ fs.rmSync(tempDir, { recursive: true, force: true });
586
+ });
587
+
588
+ describe('executeScript', () => {
589
+ it('runs shell script with environment variables', async () => {
590
+ const scriptPath = path.join(tempDir, 'test.sh');
591
+ fs.writeFileSync(scriptPath, '#!/bin/bash\necho "$OUTPUT_FILE"', { mode: 0o755 });
592
+
593
+ const result = await executeScript(scriptPath, { OUTPUT_FILE: '/tmp/test.pdf' });
594
+ assert.ok(result.success);
595
+ assert.ok(result.stdout.includes('/tmp/test.pdf'));
596
+ });
597
+
598
+ it('returns error for non-existent script', async () => {
599
+ const result = await executeScript('/nonexistent/script.sh', {});
600
+ assert.ok(!result.success);
601
+ });
602
+
603
+ it('captures exit code on failure', async () => {
604
+ const scriptPath = path.join(tempDir, 'fail.sh');
605
+ fs.writeFileSync(scriptPath, '#!/bin/bash\nexit 1', { mode: 0o755 });
606
+
607
+ const result = await executeScript(scriptPath, {});
608
+ assert.ok(!result.success);
609
+ assert.ok(result.error.includes('Exit code 1'));
610
+ });
611
+
612
+ it('runs PowerShell scripts on Windows', async function() {
613
+ if (process.platform !== 'win32') {
614
+ this.skip();
615
+ return;
616
+ }
617
+
618
+ const scriptPath = path.join(tempDir, 'test.ps1');
619
+ fs.writeFileSync(scriptPath, 'Write-Host $env:OUTPUT_FILE');
620
+
621
+ const result = await executeScript(scriptPath, { OUTPUT_FILE: 'C:\\test.pdf' });
622
+ assert.ok(result.success);
623
+ assert.ok(result.stdout.includes('C:\\test.pdf'));
624
+ });
625
+
626
+ it('runs Python scripts', async () => {
627
+ const scriptPath = path.join(tempDir, 'test.py');
628
+ fs.writeFileSync(scriptPath, 'import os; print(os.environ["OUTPUT_FILE"])');
629
+
630
+ const result = await executeScript(scriptPath, { OUTPUT_FILE: '/tmp/test.pdf' });
631
+ assert.ok(result.success);
632
+ assert.ok(result.stdout.includes('/tmp/test.pdf'));
633
+ });
634
+
635
+ it('runs Node.js scripts', async () => {
636
+ const scriptPath = path.join(tempDir, 'test.js');
637
+ fs.writeFileSync(scriptPath, 'console.log(process.env.OUTPUT_FILE)');
638
+
639
+ const result = await executeScript(scriptPath, { OUTPUT_FILE: '/tmp/test.pdf' });
640
+ assert.ok(result.success);
641
+ assert.ok(result.stdout.includes('/tmp/test.pdf'));
642
+ });
643
+ });
644
+
645
+ describe('runPostprocess', () => {
646
+ it('returns success with no postprocess config', async () => {
647
+ const result = await runPostprocess('/tmp/test.pdf', 'pdf', {});
648
+ assert.ok(result.success);
649
+ });
650
+
651
+ it('runs format-specific script', async () => {
652
+ const scriptPath = path.join(tempDir, 'pdf-post.sh');
653
+ const markerPath = path.join(tempDir, 'marker.txt');
654
+ fs.writeFileSync(scriptPath, `#!/bin/bash\necho "ran" > "${markerPath}"`, { mode: 0o755 });
655
+
656
+ const config = {
657
+ postprocess: { pdf: scriptPath },
658
+ _configPath: path.join(tempDir, 'rev.yaml'),
659
+ };
660
+
661
+ const result = await runPostprocess(path.join(tempDir, 'out.pdf'), 'pdf', config);
662
+ assert.ok(result.success);
663
+ assert.ok(fs.existsSync(markerPath));
664
+ });
665
+
666
+ it('runs "all" script for any format', async () => {
667
+ const scriptPath = path.join(tempDir, 'all-post.sh');
668
+ const markerPath = path.join(tempDir, 'marker.txt');
669
+ fs.writeFileSync(scriptPath, `#!/bin/bash\necho "$OUTPUT_FORMAT" > "${markerPath}"`, { mode: 0o755 });
670
+
671
+ const config = {
672
+ postprocess: { all: scriptPath },
673
+ _configPath: path.join(tempDir, 'rev.yaml'),
674
+ };
675
+
676
+ await runPostprocess(path.join(tempDir, 'out.docx'), 'docx', config);
677
+ assert.ok(fs.existsSync(markerPath));
678
+ assert.strictEqual(fs.readFileSync(markerPath, 'utf-8').trim(), 'docx');
679
+ });
680
+
681
+ it('runs both format-specific and all scripts', async () => {
682
+ const pdfScript = path.join(tempDir, 'pdf.sh');
683
+ const allScript = path.join(tempDir, 'all.sh');
684
+ const pdfMarker = path.join(tempDir, 'pdf-marker.txt');
685
+ const allMarker = path.join(tempDir, 'all-marker.txt');
686
+
687
+ fs.writeFileSync(pdfScript, `#!/bin/bash\ntouch "${pdfMarker}"`, { mode: 0o755 });
688
+ fs.writeFileSync(allScript, `#!/bin/bash\ntouch "${allMarker}"`, { mode: 0o755 });
689
+
690
+ const config = {
691
+ postprocess: { pdf: pdfScript, all: allScript },
692
+ _configPath: path.join(tempDir, 'rev.yaml'),
693
+ };
694
+
695
+ await runPostprocess(path.join(tempDir, 'out.pdf'), 'pdf', config);
696
+ assert.ok(fs.existsSync(pdfMarker));
697
+ assert.ok(fs.existsSync(allMarker));
698
+ });
699
+
700
+ it('reports error for missing script', async () => {
701
+ const config = {
702
+ postprocess: { pdf: './nonexistent.sh' },
703
+ _configPath: path.join(tempDir, 'rev.yaml'),
704
+ };
705
+
706
+ const result = await runPostprocess(path.join(tempDir, 'out.pdf'), 'pdf', config);
707
+ assert.ok(!result.success);
708
+ assert.ok(result.error.includes('not found'));
709
+ });
710
+
711
+ it('reports error for failing script', async () => {
712
+ const scriptPath = path.join(tempDir, 'fail.sh');
713
+ fs.writeFileSync(scriptPath, '#!/bin/bash\nexit 42', { mode: 0o755 });
714
+
715
+ const config = {
716
+ postprocess: { pdf: scriptPath },
717
+ _configPath: path.join(tempDir, 'rev.yaml'),
718
+ };
719
+
720
+ const result = await runPostprocess(path.join(tempDir, 'out.pdf'), 'pdf', config);
721
+ assert.ok(!result.success);
722
+ assert.ok(result.error.includes('42') || result.error.includes('failed'));
723
+ });
724
+ });
725
+ });
726
+ ```
727
+
728
+ ---
729
+
730
+ ## Implementation Order
731
+
732
+ ### Sprint 1: Table Preprocessing (2-3 hours)
733
+
734
+ 1. [ ] Add `processTablesForFormat()` function to `lib/build.js`
735
+ 2. [ ] Integrate into `prepareForFormat()`
736
+ 3. [ ] Write tests in `test/tables.test.js`
737
+ 4. [ ] Test with paper 2 priors table
738
+ 5. [ ] Document in README
739
+
740
+ ### Sprint 2: Postprocess Shell Scripts (3-4 hours)
741
+
742
+ 1. [ ] Create `lib/postprocess.js` with `executeScript()` and `runPostprocess()`
743
+ 2. [ ] Add `postprocess` to `DEFAULT_CONFIG`
744
+ 3. [ ] Add config merging in `loadConfig()`
745
+ 4. [ ] Integrate into `runPandoc()` after output generation
746
+ 5. [ ] Add `--verbose` flag to CLI
747
+ 6. [ ] Write tests in `test/postprocess.test.js`
748
+ 7. [ ] Create example scripts in `examples/postprocess/`
749
+ 8. [ ] Document in README
750
+
751
+ ### Sprint 3: Header Includes (1-2 hours)
752
+
753
+ 1. [ ] Add `pdf.headerIncludes` config option
754
+ 2. [ ] Add `generateTableLatex()` helper
755
+ 3. [ ] Pass to pandoc in `buildPandocArgs()`
756
+ 4. [ ] Add tests
757
+ 5. [ ] Document
758
+
759
+ ### Future: DSL (if needed)
760
+
761
+ Only implement if shell scripts prove insufficient for common use cases.
762
+
763
+ ---
764
+
765
+ ## Files to Create/Modify
766
+
767
+ ### New Files
768
+
769
+ | File | Purpose |
770
+ |------|---------|
771
+ | `lib/postprocess.js` | Postprocess script execution |
772
+ | `test/tables.test.js` | Table processing tests |
773
+ | `test/postprocess.test.js` | Postprocess tests |
774
+ | `examples/postprocess/fix-tables.sh` | Example PDF postprocess |
775
+ | `examples/postprocess/inject-headers.ps1` | Example DOCX postprocess |
776
+
777
+ ### Modified Files
778
+
779
+ | File | Changes |
780
+ |------|---------|
781
+ | `lib/build.js` | Add `processTablesForFormat()`, `generateTableLatex()`, integrate postprocess, add configs |
782
+ | `lib/commands/build.js` | Add `--verbose` flag |
783
+
784
+ ---
785
+
786
+ ## Example Usage After Implementation
787
+
788
+ ### Table Config
789
+
790
+ ```yaml
791
+ # rev.yaml
792
+ tables:
793
+ nowrap:
794
+ - Prior
795
+ - Value
796
+ - Count
797
+ small: true
798
+ ```
799
+
800
+ ### Postprocess Scripts
801
+
802
+ ```yaml
803
+ # rev.yaml
804
+ postprocess:
805
+ pdf: ./scripts/fix-latex.sh
806
+ docx: ./scripts/add-metadata.py
807
+ all: ./scripts/notify.js
808
+ ```
809
+
810
+ Example `fix-latex.sh`:
811
+ ```bash
812
+ #!/bin/bash
813
+ # Receives: OUTPUT_FILE, OUTPUT_FORMAT, PROJECT_DIR, CONFIG_PATH
814
+
815
+ # Example: Replace longtable alignment
816
+ if [ "$OUTPUT_FORMAT" = "pdf" ]; then
817
+ echo "PDF postprocessing not needed (can't modify PDF)"
818
+ fi
819
+ ```
820
+
821
+ Example `add-metadata.py`:
822
+ ```python
823
+ #!/usr/bin/env python3
824
+ import os
825
+ from docx import Document
826
+
827
+ doc = Document(os.environ['OUTPUT_FILE'])
828
+ doc.core_properties.author = "Research Team"
829
+ doc.save(os.environ['OUTPUT_FILE'])
830
+ ```
831
+
832
+ ---
833
+
834
+ ## Risk Assessment
835
+
836
+ | Risk | Likelihood | Impact | Mitigation |
837
+ |------|------------|--------|------------|
838
+ | Table preprocessing breaks edge cases | Medium | Medium | Extensive tests, careful regex |
839
+ | Shell script security concerns | Low | High | Document that scripts run with user permissions |
840
+ | Cross-platform script compatibility | Medium | Medium | Support multiple interpreters, document requirements |
841
+ | Performance overhead from postprocess | Low | Low | Scripts are optional, run after main build |
842
+
843
+ ---
844
+
845
+ ## Success Criteria
846
+
847
+ 1. **Tables**: `Normal(0, 0.5)` in nowrap column → `$\mathcal{N}(0, 0.5)$` in PDF output
848
+ 2. **Postprocess**: User script receives correct environment variables and can modify output
849
+ 3. **Tests**: All new tests pass, existing tests unchanged
850
+ 4. **Docs**: README updated with examples for both features