docrev 0.9.13 → 0.9.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/.claude/settings.local.json +9 -9
  2. package/.gitattributes +1 -1
  3. package/CHANGELOG.md +149 -149
  4. package/PLAN-tables-and-postprocess.md +850 -850
  5. package/README.md +391 -391
  6. package/bin/rev.js +11 -11
  7. package/bin/rev.ts +145 -145
  8. package/completions/rev.bash +127 -127
  9. package/completions/rev.ps1 +210 -210
  10. package/completions/rev.zsh +207 -207
  11. package/dev_notes/stress2/build_adversarial.ts +186 -186
  12. package/dev_notes/stress2/drift_matcher.ts +62 -62
  13. package/dev_notes/stress2/probe_anchors.ts +35 -35
  14. package/dev_notes/stress2/project/discussion.before.md +3 -3
  15. package/dev_notes/stress2/project/discussion.md +3 -3
  16. package/dev_notes/stress2/project/methods.before.md +20 -20
  17. package/dev_notes/stress2/project/methods.md +20 -20
  18. package/dev_notes/stress2/project/rev.yaml +5 -5
  19. package/dev_notes/stress2/project/sections.yaml +4 -4
  20. package/dev_notes/stress2/sections.yaml +5 -5
  21. package/dev_notes/stress2/trace_placement.ts +50 -50
  22. package/dev_notes/stresstest_boundaries.ts +27 -27
  23. package/dev_notes/stresstest_drift_apply.ts +43 -43
  24. package/dev_notes/stresstest_drift_compare.ts +43 -43
  25. package/dev_notes/stresstest_drift_v2.ts +54 -54
  26. package/dev_notes/stresstest_inspect.ts +54 -54
  27. package/dev_notes/stresstest_pstyle.ts +55 -55
  28. package/dev_notes/stresstest_section_debug.ts +23 -23
  29. package/dev_notes/stresstest_split.ts +70 -70
  30. package/dev_notes/stresstest_trace.ts +19 -19
  31. package/dev_notes/stresstest_verify_no_overwrite.ts +40 -40
  32. package/dist/lib/build.d.ts +38 -1
  33. package/dist/lib/build.d.ts.map +1 -1
  34. package/dist/lib/build.js +68 -30
  35. package/dist/lib/build.js.map +1 -1
  36. package/dist/lib/commands/build.d.ts.map +1 -1
  37. package/dist/lib/commands/build.js +38 -5
  38. package/dist/lib/commands/build.js.map +1 -1
  39. package/dist/lib/commands/utilities.js +164 -164
  40. package/dist/lib/commands/word-tools.js +8 -8
  41. package/dist/lib/grammar.js +3 -3
  42. package/dist/lib/pdf-comments.js +44 -44
  43. package/dist/lib/plugins.js +57 -57
  44. package/dist/lib/pptx-themes.js +115 -115
  45. package/dist/lib/spelling.js +2 -2
  46. package/dist/lib/templates.js +387 -387
  47. package/dist/lib/themes.js +51 -51
  48. package/eslint.config.js +27 -27
  49. package/lib/anchor-match.ts +276 -276
  50. package/lib/annotations.ts +644 -644
  51. package/lib/build.ts +1300 -1251
  52. package/lib/citations.ts +160 -160
  53. package/lib/commands/build.ts +833 -801
  54. package/lib/commands/citations.ts +515 -515
  55. package/lib/commands/comments.ts +1050 -1050
  56. package/lib/commands/context.ts +174 -174
  57. package/lib/commands/core.ts +309 -309
  58. package/lib/commands/doi.ts +435 -435
  59. package/lib/commands/file-ops.ts +372 -372
  60. package/lib/commands/history.ts +320 -320
  61. package/lib/commands/index.ts +87 -87
  62. package/lib/commands/init.ts +259 -259
  63. package/lib/commands/merge-resolve.ts +378 -378
  64. package/lib/commands/preview.ts +178 -178
  65. package/lib/commands/project-info.ts +244 -244
  66. package/lib/commands/quality.ts +517 -517
  67. package/lib/commands/response.ts +454 -454
  68. package/lib/commands/section-boundaries.ts +82 -82
  69. package/lib/commands/sections.ts +451 -451
  70. package/lib/commands/sync.ts +706 -706
  71. package/lib/commands/text-ops.ts +449 -449
  72. package/lib/commands/utilities.ts +448 -448
  73. package/lib/commands/verify-anchors.ts +272 -272
  74. package/lib/commands/word-tools.ts +340 -340
  75. package/lib/comment-realign.ts +517 -517
  76. package/lib/config.ts +84 -84
  77. package/lib/crossref.ts +781 -781
  78. package/lib/csl.ts +191 -191
  79. package/lib/dependencies.ts +98 -98
  80. package/lib/diff-engine.ts +465 -465
  81. package/lib/doi-cache.ts +115 -115
  82. package/lib/doi.ts +897 -897
  83. package/lib/equations.ts +506 -506
  84. package/lib/errors.ts +346 -346
  85. package/lib/format.ts +541 -541
  86. package/lib/git.ts +326 -326
  87. package/lib/grammar.ts +303 -303
  88. package/lib/image-registry.ts +180 -180
  89. package/lib/import.ts +911 -911
  90. package/lib/journals.ts +543 -543
  91. package/lib/merge.ts +633 -633
  92. package/lib/orcid.ts +144 -144
  93. package/lib/pdf-comments.ts +263 -263
  94. package/lib/pdf-import.ts +524 -524
  95. package/lib/plugins.ts +362 -362
  96. package/lib/postprocess.ts +188 -188
  97. package/lib/pptx-color-filter.lua +37 -37
  98. package/lib/pptx-template.ts +469 -469
  99. package/lib/pptx-themes.ts +483 -483
  100. package/lib/protect-restore.ts +520 -520
  101. package/lib/rate-limiter.ts +94 -94
  102. package/lib/response.ts +197 -197
  103. package/lib/restore-references.ts +240 -240
  104. package/lib/review.ts +327 -327
  105. package/lib/schema.ts +417 -417
  106. package/lib/scientific-words.ts +73 -73
  107. package/lib/sections.ts +335 -335
  108. package/lib/slides.ts +756 -756
  109. package/lib/spelling.ts +334 -334
  110. package/lib/templates.ts +526 -526
  111. package/lib/themes.ts +742 -742
  112. package/lib/trackchanges.ts +247 -247
  113. package/lib/tui.ts +450 -450
  114. package/lib/types.ts +550 -550
  115. package/lib/undo.ts +250 -250
  116. package/lib/utils.ts +69 -69
  117. package/lib/variables.ts +179 -179
  118. package/lib/word-extraction.ts +806 -806
  119. package/lib/word.ts +643 -643
  120. package/lib/wordcomments.ts +817 -817
  121. package/package.json +137 -137
  122. package/scripts/postbuild.js +28 -28
  123. package/skill/REFERENCE.md +431 -431
  124. package/skill/SKILL.md +258 -258
  125. package/tsconfig.json +26 -26
  126. package/types/index.d.ts +525 -525
package/lib/crossref.ts CHANGED
@@ -1,781 +1,781 @@
1
- /**
2
- * Cross-reference handling - dynamic figure/table references
3
- *
4
- * Enables:
5
- * - @fig:label syntax in source (auto-numbered)
6
- * - Conversion to "Figure 1" in Word output
7
- * - Auto-conversion back during import
8
- */
9
-
10
- import * as fs from 'fs';
11
- import * as path from 'path';
12
- import YAML from 'yaml';
13
- import type {
14
- RefNumber,
15
- HardcodedRef,
16
- DynamicRef,
17
- FigureInfo,
18
- Registry,
19
- RefStatus,
20
- ConversionResult,
21
- } from './types.js';
22
-
23
- // =============================================================================
24
- // Constants
25
- // =============================================================================
26
-
27
- /** Characters of context to check before a reference for deduplication */
28
- const REF_CONTEXT_WINDOW = 100;
29
-
30
- /** Minimum word length for similarity calculations */
31
- const MIN_WORD_LENGTH = 2;
32
-
33
- // =============================================================================
34
- // Type Definitions (Internal)
35
- // =============================================================================
36
-
37
- /**
38
- * Reference info (internal use in registry building)
39
- */
40
- interface RefInfo {
41
- label: string;
42
- num: number;
43
- isSupp: boolean;
44
- file: string;
45
- }
46
-
47
- /**
48
- * Parsed reference number components
49
- */
50
- interface ParsedRefNumber {
51
- isSupp: boolean;
52
- num: number;
53
- suffix: string | null;
54
- }
55
-
56
- /**
57
- * Detected reference with parsed numbers
58
- */
59
- interface DetectedRef {
60
- type: 'fig' | 'tbl' | 'eq';
61
- match: string;
62
- numbers: ParsedRefNumber[];
63
- position: number;
64
- }
65
-
66
- // =============================================================================
67
- // Internal Helpers
68
- // =============================================================================
69
-
70
- /**
71
- * Discover section files from a directory by reading config files
72
- * Only returns files explicitly defined in rev.yaml or sections.yaml
73
- * Returns empty array if no config found (caller should handle this)
74
- */
75
- function discoverSectionFiles(directory: string): string[] {
76
- // Try rev.yaml first
77
- const revYamlPath = path.join(directory, 'rev.yaml');
78
- if (fs.existsSync(revYamlPath)) {
79
- try {
80
- const config = YAML.parse(fs.readFileSync(revYamlPath, 'utf-8'));
81
- if (config.sections && Array.isArray(config.sections) && config.sections.length > 0) {
82
- return config.sections.filter((f: string) => fs.existsSync(path.join(directory, f)));
83
- }
84
- } catch (e) {
85
- if (process.env.DEBUG) {
86
- console.warn('crossref: YAML parse error in rev.yaml:', (e as Error).message);
87
- }
88
- }
89
- }
90
-
91
- // Try sections.yaml
92
- const sectionsPath = path.join(directory, 'sections.yaml');
93
- if (fs.existsSync(sectionsPath)) {
94
- try {
95
- const config = YAML.parse(fs.readFileSync(sectionsPath, 'utf-8'));
96
- if (config.sections) {
97
- const sectionOrder = Object.entries(config.sections)
98
- .sort((a, b) => ((a[1] as any).order ?? 999) - ((b[1] as any).order ?? 999))
99
- .map(([file]) => file);
100
- return sectionOrder.filter((f) => fs.existsSync(path.join(directory, f)));
101
- }
102
- } catch (e) {
103
- if (process.env.DEBUG) {
104
- console.warn('crossref: YAML parse error in sections.yaml:', (e as Error).message);
105
- }
106
- }
107
- }
108
-
109
- // No config found - return empty array
110
- // Caller must handle this (either error or use explicit sections)
111
- return [];
112
- }
113
-
114
- // =============================================================================
115
- // Detection Patterns
116
- // =============================================================================
117
-
118
- /**
119
- * Patterns for detecting hardcoded references
120
- * Matches complex patterns including:
121
- * - Simple: "Figure 1", "Fig. 2a", "Table S1"
122
- * - Ranges: "Figures 1-3", "Fig. 1a-c", "Figs. 1a-3b"
123
- * - Lists: "Figures 1, 2, and 3", "Fig. 1a, b, c", "Tables 1 & 2"
124
- * - Mixed: "Figs. 1, 3-5, and 7"
125
- *
126
- * Uses a simpler base pattern and parses the full match for lists
127
- */
128
- const DETECTION_PATTERNS: Record<string, RegExp> = {
129
- // Captures the full reference including lists with "and"
130
- // Group 1: type prefix (Figure, Fig., etc.)
131
- // Group 2: reference list (parsed by parseReferenceList())
132
- // Matches: "1", "1a", "1-3", "1a-c", "1, 2, 3", "1 and 2", "1, 2 and 3", "1, 2, and 3"
133
- // Separator: comma/dash/ampersand, optionally followed by "and"
134
- // Standalone letters must be followed by separator, punctuation, or word boundary
135
- // Also handles: "see Figure 1", "(Fig. 1)", "in Figures 1–3"
136
- // Note: 'gi' flag makes these case-insensitive, so "figure 1" is also matched
137
- figure: /\b(Figures?|Figs?\.?)\s+((?:\d+|S\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+|S\d+)[a-z]?|[a-z]\b))*)/gi,
138
-
139
- table: /\b(Tables?|Tabs?\.?)\s+((?:\d+|S\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+|S\d+)[a-z]?|[a-z]\b))*)/gi,
140
-
141
- equation: /\b(Equations?|Eqs?\.?)\s+((?:\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+)[a-z]?|[a-z]\b))*)/gi,
142
- };
143
-
144
- /**
145
- * Patterns to EXCLUDE from detection (false positives)
146
- * These look like references but aren't (e.g., "Table of Contents", "Figure skating")
147
- */
148
- const EXCLUSION_PATTERNS = [
149
- /\bTable\s+of\s+Contents?\b/gi,
150
- /\bFigure\s+skating\b/gi,
151
- /\bFigure\s+out\b/gi,
152
- /\bFigure\s+it\b/gi,
153
- /\bTable\s+setting/gi,
154
- /\bEquation\s+editor\b/gi,
155
- ];
156
-
157
- /**
158
- * Pattern for extracting anchors from markdown: {#fig:label}, {#tbl:label}
159
- */
160
- const ANCHOR_PATTERN = /\{#(fig|tbl|eq):([a-zA-Z0-9_-]+)/gi;
161
-
162
- /**
163
- * Pattern for @-style references: @fig:label, @tbl:label
164
- */
165
- const REF_PATTERN = /@(fig|tbl|eq):([a-zA-Z0-9_-]+)/gi;
166
-
167
- // =============================================================================
168
- // Public API
169
- // =============================================================================
170
-
171
- /**
172
- * Normalize a reference type to standard form
173
- */
174
- export function normalizeType(typeStr: string): 'fig' | 'tbl' | 'eq' | string {
175
- if (typeof typeStr !== 'string') {
176
- throw new TypeError(`typeStr must be a string, got ${typeof typeStr}`);
177
- }
178
- const lower = typeStr.toLowerCase().replace(/\.$/, '');
179
- if (lower.startsWith('fig')) return 'fig';
180
- if (lower.startsWith('tab')) return 'tbl';
181
- if (lower.startsWith('eq')) return 'eq';
182
- return lower;
183
- }
184
-
185
- /**
186
- * Parse a reference number, handling supplementary (S1, S2) and letter suffixes (1a, 1b)
187
- */
188
- export function parseRefNumber(numStr: string, suffix: string | null = null): ParsedRefNumber {
189
- if (!numStr || typeof numStr !== 'string') {
190
- return { isSupp: false, num: 0, suffix: suffix || null };
191
- }
192
- const isSupp = numStr.toUpperCase().startsWith('S');
193
- const numPart = isSupp ? numStr.slice(1) : numStr;
194
- // Extract suffix if embedded in numStr (e.g., "1a")
195
- const match = numPart.match(/^(\d+)([a-z])?$/i);
196
- const num = match && match[1] ? parseInt(match[1], 10) : parseInt(numPart, 10);
197
- const extractedSuffix = suffix || (match && match[2]) || null;
198
- return { isSupp, num, suffix: extractedSuffix ? extractedSuffix.toLowerCase() : null };
199
- }
200
-
201
- /**
202
- * Parse a reference list string like "1, 2, and 3" or "1a-c" or "1a-3b"
203
- * Returns an array of {num, isSupp, suffix} objects
204
- */
205
- export function parseReferenceList(listStr: string): ParsedRefNumber[] {
206
- const results: ParsedRefNumber[] = [];
207
- if (!listStr || typeof listStr !== 'string') return results;
208
-
209
- // Normalize: replace "and" with comma, normalize dashes
210
- let normalized = listStr
211
- .replace(/\s+and\s+/gi, ', ')
212
- .replace(/[–—]/g, '-') // en-dash, em-dash → hyphen
213
- .replace(/&/g, ', '); // & → comma
214
-
215
- // Split by comma (but not by dash, which indicates ranges)
216
- const parts = normalized.split(/\s*,\s*/).filter((p) => p.trim());
217
-
218
- let lastFullRef: { num: number; isSupp: boolean } | null = null; // Track the last full reference for implicit prefixes
219
-
220
- for (const part of parts) {
221
- const trimmed = part.trim();
222
- if (!trimmed) continue;
223
-
224
- // Check if this is a range (contains -)
225
- if (trimmed.includes('-')) {
226
- const parts = trimmed.split('-').map((s) => s.trim());
227
- const start = parts[0] || '';
228
- const end = parts[1] || '';
229
-
230
- // Check if end is just a letter (e.g., "1a-c" where end is "c")
231
- const endIsLetterOnly = /^[a-z]$/i.test(end);
232
-
233
- const startRef = parseRefNumber(start);
234
- // For letter-only end, don't parse as number
235
- const endRef = endIsLetterOnly
236
- ? { num: startRef.num, isSupp: startRef.isSupp, suffix: end.toLowerCase() }
237
- : parseRefNumber(end);
238
-
239
- // Handle different range types:
240
- // 1. Suffix-only range on same number: "1a-c" → 1a, 1b, 1c
241
- // 2. Number range: "1-3" → 1, 2, 3
242
- // 3. Cross-number suffix range: "1a-3b" → 1a...1z, 2a...2z, 3a, 3b (limited)
243
-
244
- if (startRef.suffix && endRef.suffix && startRef.num !== endRef.num) {
245
- // Cross-number suffix range: "1a-3b"
246
- // For academic papers, limit intermediate figures to same suffix range
247
- // e.g., "1a-3b" typically means 1a, 1b, 2a, 2b, 3a, 3b
248
- const maxSuffix = Math.max(
249
- startRef.suffix.charCodeAt(0),
250
- endRef.suffix.charCodeAt(0)
251
- );
252
-
253
- for (let n = startRef.num; n <= endRef.num; n++) {
254
- const suffixStart =
255
- n === startRef.num ? startRef.suffix.charCodeAt(0) : 'a'.charCodeAt(0);
256
- const suffixEnd = n === endRef.num ? endRef.suffix.charCodeAt(0) : maxSuffix;
257
-
258
- for (let s = suffixStart; s <= suffixEnd; s++) {
259
- results.push({
260
- num: n,
261
- isSupp: startRef.isSupp,
262
- suffix: String.fromCharCode(s),
263
- });
264
- }
265
- }
266
- lastFullRef = { num: endRef.num, isSupp: startRef.isSupp };
267
- } else if (startRef.suffix || endRef.suffix) {
268
- // Suffix range on same number: "1a-c"
269
- const num: number = startRef.num !== 0 ? startRef.num : (lastFullRef ? lastFullRef.num : 1);
270
- const isSupp: boolean = startRef.isSupp ? startRef.isSupp : (lastFullRef ? lastFullRef.isSupp : false);
271
- const startCode = (startRef.suffix || 'a').charCodeAt(0);
272
- const endCode = (endRef.suffix || 'a').charCodeAt(0);
273
-
274
- for (let code = startCode; code <= endCode; code++) {
275
- results.push({
276
- num,
277
- isSupp,
278
- suffix: String.fromCharCode(code),
279
- });
280
- }
281
- lastFullRef = { num, isSupp };
282
- } else {
283
- // Pure number range: "1-3"
284
- for (let n = startRef.num; n <= endRef.num; n++) {
285
- results.push({
286
- num: n,
287
- isSupp: startRef.isSupp,
288
- suffix: null,
289
- });
290
- }
291
- lastFullRef = { num: endRef.num, isSupp: startRef.isSupp };
292
- }
293
- } else {
294
- // Single reference or implicit suffix
295
- // Check if it's just a letter (implicit prefix from previous number)
296
- if (/^[a-z]$/i.test(trimmed) && lastFullRef) {
297
- // Implicit prefix: "b" after "1a" means "1b"
298
- results.push({
299
- num: lastFullRef.num,
300
- isSupp: lastFullRef.isSupp,
301
- suffix: trimmed.toLowerCase(),
302
- });
303
- } else {
304
- // Full reference: "1", "1a", "S1", "S1a"
305
- const ref = parseRefNumber(trimmed);
306
- results.push(ref);
307
- lastFullRef = { num: ref.num, isSupp: ref.isSupp };
308
- }
309
- }
310
- }
311
-
312
- return results;
313
- }
314
-
315
- /**
316
- * Build a registry of figure/table labels from .md files
317
- * Scans for {#fig:label} and {#tbl:label} anchors
318
- *
319
- * IMPORTANT: This function requires either explicit sections or a rev.yaml/sections.yaml config.
320
- * It will NOT guess by scanning all .md files, as this leads to incorrect numbering
321
- * when temporary files (paper_clean.md, etc.) exist in the directory.
322
- */
323
- export function buildRegistry(directory: string, sections?: string[]): Registry {
324
- if (typeof directory !== 'string') {
325
- throw new TypeError(`directory must be a string, got ${typeof directory}`);
326
- }
327
-
328
- const figures = new Map<string, FigureInfo>();
329
- const tables = new Map<string, FigureInfo>();
330
- const equations = new Map<string, FigureInfo>();
331
-
332
- // Counters for numbering (separate for main and supplementary)
333
- let figNum = 0;
334
- let figSuppNum = 0;
335
- let tblNum = 0;
336
- let tblSuppNum = 0;
337
- let eqNum = 0;
338
-
339
- let orderedFiles: string[];
340
-
341
- if (Array.isArray(sections) && sections.length > 0) {
342
- // Use explicitly provided section files - most reliable
343
- orderedFiles = sections.filter((f) => fs.existsSync(path.join(directory, f)));
344
- } else {
345
- // Try to determine sections from config files (rev.yaml or sections.yaml)
346
- orderedFiles = discoverSectionFiles(directory);
347
- // If no config found, return empty registry rather than guessing
348
- // This prevents bugs from scanning wrong files
349
- }
350
-
351
- // Determine if a file is supplementary
352
- const isSupplementary = (filename: string): boolean =>
353
- filename.toLowerCase().includes('supp') || filename.toLowerCase().includes('appendix');
354
-
355
- // Process each file in order
356
- for (const file of orderedFiles) {
357
- const filePath = path.join(directory, file);
358
- const content = fs.readFileSync(filePath, 'utf-8');
359
- const isSupp = isSupplementary(file);
360
-
361
- // Find all anchors
362
- let match: RegExpExecArray | null;
363
- ANCHOR_PATTERN.lastIndex = 0;
364
- while ((match = ANCHOR_PATTERN.exec(content)) !== null) {
365
- const typeRaw = match[1];
366
- const labelRaw = match[2];
367
- if (!typeRaw || !labelRaw) continue;
368
-
369
- const type = typeRaw.toLowerCase();
370
- const label = labelRaw;
371
-
372
- if (type === 'fig') {
373
- if (isSupp) {
374
- figSuppNum++;
375
- figures.set(label, { label, num: figSuppNum, isSupp: true, file });
376
- } else {
377
- figNum++;
378
- figures.set(label, { label, num: figNum, isSupp: false, file });
379
- }
380
- } else if (type === 'tbl') {
381
- if (isSupp) {
382
- tblSuppNum++;
383
- tables.set(label, { label, num: tblSuppNum, isSupp: true, file });
384
- } else {
385
- tblNum++;
386
- tables.set(label, { label, num: tblNum, isSupp: false, file });
387
- }
388
- } else if (type === 'eq') {
389
- eqNum++;
390
- equations.set(label, { label, num: eqNum, isSupp: false, file });
391
- }
392
- }
393
- }
394
-
395
- // Build reverse lookup: number → label
396
- const byNumber: Registry['byNumber'] = {
397
- fig: new Map(),
398
- figS: new Map(),
399
- tbl: new Map(),
400
- tblS: new Map(),
401
- eq: new Map(),
402
- };
403
-
404
- for (const [label, info] of figures) {
405
- const key = info.isSupp ? 'figS' : 'fig';
406
- byNumber[key].set(info.num, label);
407
- }
408
- for (const [label, info] of tables) {
409
- const key = info.isSupp ? 'tblS' : 'tbl';
410
- byNumber[key].set(info.num, label);
411
- }
412
- for (const [label, info] of equations) {
413
- byNumber.eq.set(info.num, label);
414
- }
415
-
416
- return { figures, tables, equations, byNumber };
417
- }
418
-
419
- /**
420
- * Get the display string for a label (e.g., "Figure 1", "Table S2")
421
- */
422
- export function labelToDisplay(
423
- type: 'fig' | 'tbl' | 'eq',
424
- label: string,
425
- registry: Registry
426
- ): string | null {
427
- if (!registry || !registry.figures) return null;
428
-
429
- const collection =
430
- type === 'fig' ? registry.figures : type === 'tbl' ? registry.tables : registry.equations;
431
-
432
- const info = collection.get(label);
433
- if (!info) return null;
434
-
435
- const prefix = type === 'fig' ? 'Figure' : type === 'tbl' ? 'Table' : 'Equation';
436
- const numStr = info.isSupp ? `S${info.num}` : `${info.num}`;
437
-
438
- return `${prefix} ${numStr}`;
439
- }
440
-
441
- /**
442
- * Get the label for a display number (e.g., "fig:heatmap" from Figure 1)
443
- */
444
- export function numberToLabel(
445
- type: 'fig' | 'tbl' | 'eq',
446
- num: number,
447
- isSupp: boolean,
448
- registry: Registry
449
- ): string | null {
450
- if (!registry || !registry.byNumber) return null;
451
-
452
- const key = isSupp ? (`${type}S` as keyof Registry['byNumber']) : type;
453
- return registry.byNumber[key]?.get(num) || null;
454
- }
455
-
456
- /**
457
- * Detect all hardcoded references in text
458
- */
459
- export function detectHardcodedRefs(text: string): DetectedRef[] {
460
- if (typeof text !== 'string') {
461
- throw new TypeError(`text must be a string, got ${typeof text}`);
462
- }
463
-
464
- const refs: DetectedRef[] = [];
465
-
466
- for (const [type, pattern] of Object.entries(DETECTION_PATTERNS)) {
467
- pattern.lastIndex = 0;
468
- let match: RegExpExecArray | null;
469
-
470
- while ((match = pattern.exec(text)) !== null) {
471
- // Pattern groups:
472
- // [1] = type prefix (Figure, Fig., etc.)
473
- // [2] = reference list string (e.g., "1, 2, and 3" or "1a-3b")
474
-
475
- const listStr = match[2];
476
- if (!listStr) continue;
477
- const numbers = parseReferenceList(listStr);
478
-
479
- // Skip if no valid numbers were parsed
480
- if (numbers.length === 0) continue;
481
-
482
- refs.push({
483
- type: normalizeType(type) as 'fig' | 'tbl' | 'eq',
484
- match: match[0],
485
- numbers,
486
- position: match.index,
487
- });
488
- }
489
- }
490
-
491
- // Sort by position
492
- refs.sort((a, b) => a.position - b.position);
493
- return refs;
494
- }
495
-
496
- /**
497
- * Convert hardcoded references to @-style references
498
- */
499
- export function convertHardcodedRefs(text: string, registry: Registry): ConversionResult {
500
- // Input validation delegated to detectHardcodedRefs
501
- const refs = detectHardcodedRefs(text);
502
- const conversions: Array<{ from: string; to: string }> = [];
503
- const warnings: string[] = [];
504
-
505
- // Process in reverse order to preserve positions
506
- let result = text;
507
- for (let i = refs.length - 1; i >= 0; i--) {
508
- const ref = refs[i];
509
- if (!ref) continue;
510
-
511
- // Build replacement
512
- const labels: string[] = [];
513
- for (const { num, isSupp } of ref.numbers) {
514
- const label = numberToLabel(ref.type, num, isSupp, registry);
515
- if (label) {
516
- labels.push(`@${ref.type}:${label}`);
517
- } else {
518
- const displayNum = isSupp ? `S${num}` : `${num}`;
519
- warnings.push(`Unknown reference: ${ref.type} ${displayNum} (no matching label)`);
520
- labels.push(ref.match); // Keep original if no match
521
- }
522
- }
523
-
524
- if (labels.length > 0 && !labels.includes(ref.match)) {
525
- const replacement = labels.join('; ');
526
-
527
- // Skip if the @-syntax already appears in the preceding text
528
- // This prevents duplication when import restores @fig:x and then we see "Fig. 1"
529
- // e.g., "@fig:map@fig:map{++@fig:map++}" or "@fig:mapFigure 1" patterns
530
- const textBefore = result.slice(Math.max(0, ref.position - REF_CONTEXT_WINDOW), ref.position);
531
- const alreadyHasRef = labels.some((label) => textBefore.includes(label));
532
- if (alreadyHasRef) {
533
- continue; // Skip - ref already present nearby
534
- }
535
-
536
- result =
537
- result.slice(0, ref.position) + replacement + result.slice(ref.position + ref.match.length);
538
-
539
- conversions.push({
540
- from: ref.match,
541
- to: replacement,
542
- });
543
- }
544
- }
545
-
546
- return { converted: result, conversions, warnings };
547
- }
548
-
549
- /**
550
- * Detect @-style references in text
551
- */
552
- export function detectDynamicRefs(text: string): DynamicRef[] {
553
- if (typeof text !== 'string') {
554
- throw new TypeError(`text must be a string, got ${typeof text}`);
555
- }
556
-
557
- const refs: DynamicRef[] = [];
558
- REF_PATTERN.lastIndex = 0;
559
- let match: RegExpExecArray | null;
560
-
561
- while ((match = REF_PATTERN.exec(text)) !== null) {
562
- const type = match[1];
563
- const label = match[2];
564
- if (!type || !label) continue;
565
- refs.push({
566
- type: type as 'fig' | 'tbl' | 'eq',
567
- label: label,
568
- match: match[0],
569
- position: match.index,
570
- });
571
- }
572
-
573
- return refs;
574
- }
575
-
576
- /**
577
- * Get reference status for a file/text
578
- */
579
- export function getRefStatus(text: string, registry: Registry): RefStatus {
580
- const dynamic = detectDynamicRefs(text);
581
- const hardcoded = detectHardcodedRefs(text) as HardcodedRef[];
582
-
583
- // Count anchors in this text
584
- ANCHOR_PATTERN.lastIndex = 0;
585
- let figCount = 0,
586
- tblCount = 0,
587
- eqCount = 0;
588
- let match: RegExpExecArray | null;
589
- while ((match = ANCHOR_PATTERN.exec(text)) !== null) {
590
- const type = match[1];
591
- if (!type) continue;
592
- if (type === 'fig') figCount++;
593
- else if (type === 'tbl') tblCount++;
594
- else if (type === 'eq') eqCount++;
595
- }
596
-
597
- return {
598
- dynamic,
599
- hardcoded,
600
- anchors: { figures: figCount, tables: tblCount, equations: eqCount },
601
- };
602
- }
603
-
604
- /**
605
- * Detect forward references in combined text
606
- * A forward reference is a @ref that appears before its {#anchor} definition
607
- */
608
- export function detectForwardRefs(text: string): {
609
- forwardRefs: Array<{ type: string; label: string; match: string; position: number }>;
610
- anchorPositions: Map<string, number>;
611
- } {
612
- // Build map of anchor positions: "fig:label" -> position
613
- const anchorPositions = new Map<string, number>();
614
- ANCHOR_PATTERN.lastIndex = 0;
615
- let match: RegExpExecArray | null;
616
- while ((match = ANCHOR_PATTERN.exec(text)) !== null) {
617
- const type = match[1];
618
- const label = match[2];
619
- if (!type || !label) continue;
620
- const key = `${type}:${label}`;
621
- // Only store first occurrence (in case of duplicates)
622
- if (!anchorPositions.has(key)) {
623
- anchorPositions.set(key, match.index);
624
- }
625
- }
626
-
627
- // Find all references
628
- const refs = detectDynamicRefs(text);
629
-
630
- // Filter to only forward references
631
- const forwardRefs = refs.filter((ref) => {
632
- const key = `${ref.type}:${ref.label}`;
633
- const anchorPos = anchorPositions.get(key);
634
- // Forward ref if anchor doesn't exist or appears after the reference
635
- return anchorPos === undefined || ref.position < anchorPos;
636
- });
637
-
638
- return { forwardRefs, anchorPositions };
639
- }
640
-
641
- /**
642
- * Resolve forward references to display format
643
- * Only resolves refs that appear before their anchor definition
644
- * Leaves other refs for pandoc-crossref to handle (preserves clickable links)
645
- */
646
- export function resolveForwardRefs(
647
- text: string,
648
- registry: Registry
649
- ): {
650
- text: string;
651
- resolved: Array<{ from: string; to: string; position: number }>;
652
- unresolved: Array<{ ref: string; position: number }>;
653
- } {
654
- const { forwardRefs } = detectForwardRefs(text);
655
- const resolved: Array<{ from: string; to: string; position: number }> = [];
656
- const unresolved: Array<{ ref: string; position: number }> = [];
657
-
658
- // Process in reverse order to preserve positions
659
- let result = text;
660
- for (let i = forwardRefs.length - 1; i >= 0; i--) {
661
- const ref = forwardRefs[i];
662
- if (!ref) continue;
663
- const display = labelToDisplay(ref.type as 'fig' | 'tbl' | 'eq', ref.label, registry);
664
-
665
- if (display) {
666
- result =
667
- result.slice(0, ref.position) + display + result.slice(ref.position + ref.match.length);
668
- resolved.push({
669
- from: ref.match,
670
- to: display,
671
- position: ref.position,
672
- });
673
- } else {
674
- unresolved.push({
675
- ref: ref.match,
676
- position: ref.position,
677
- });
678
- }
679
- }
680
-
681
- return { text: result, resolved, unresolved };
682
- }
683
-
684
- /**
685
- * Resolve ALL supplementary references and strip supplementary anchor labels.
686
- *
687
- * pandoc-crossref cannot produce "Figure S1" numbering — it numbers all figures
688
- * sequentially. This function resolves every @fig:label / @tbl:label that points
689
- * to a supplementary item to plain text ("Figure S1", "Table S1") and removes
690
- * the {#fig:label} / {#tbl:label} attributes so pandoc-crossref ignores them.
691
- */
692
- export function resolveSupplementaryRefs(
693
- text: string,
694
- registry: Registry
695
- ): {
696
- text: string;
697
- resolved: Array<{ from: string; to: string }>;
698
- } {
699
- const resolved: Array<{ from: string; to: string }> = [];
700
- let result = text;
701
-
702
- // Collect supplementary labels
703
- const suppLabels = new Set<string>();
704
- for (const [label, info] of registry.figures) {
705
- if (info.isSupp) suppLabels.add(`fig:${label}`);
706
- }
707
- for (const [label, info] of registry.tables) {
708
- if (info.isSupp) suppLabels.add(`tbl:${label}`);
709
- }
710
-
711
- if (suppLabels.size === 0) return { text: result, resolved };
712
-
713
- // 1. Replace all @fig:label / @tbl:label references to supplementary items
714
- const refs = detectDynamicRefs(result);
715
- // Process in reverse to preserve positions
716
- for (let i = refs.length - 1; i >= 0; i--) {
717
- const ref = refs[i];
718
- if (!ref) continue;
719
- const key = `${ref.type}:${ref.label}`;
720
- if (!suppLabels.has(key)) continue;
721
-
722
- const display = labelToDisplay(ref.type as 'fig' | 'tbl' | 'eq', ref.label, registry);
723
- if (display) {
724
- result =
725
- result.slice(0, ref.position) + display + result.slice(ref.position + ref.match.length);
726
- resolved.push({ from: ref.match, to: display });
727
- }
728
- }
729
-
730
- // 2. Strip {#fig:label} and {#tbl:label} attributes from supplementary anchors
731
- // so pandoc-crossref does not re-number them
732
- for (const key of suppLabels) {
733
- // Match {#fig:label ...} or just {#fig:label}
734
- const escaped = key.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
735
- const pattern = new RegExp(`\\{#${escaped}(?:\\s[^}]*)?\\}`, 'g');
736
- result = result.replace(pattern, (match) => {
737
- resolved.push({ from: match, to: '(stripped)' });
738
- return '';
739
- });
740
- }
741
-
742
- return { text: result, resolved };
743
- }
744
-
745
- /**
746
- * Format registry for display
747
- */
748
- export function formatRegistry(registry: Registry): string {
749
- const lines: string[] = [];
750
-
751
- if (registry.figures.size > 0) {
752
- lines.push('Figures:');
753
- for (const [label, info] of registry.figures) {
754
- const num = info.isSupp ? `S${info.num}` : info.num;
755
- lines.push(` Figure ${num}: @fig:${label} (${info.file})`);
756
- }
757
- }
758
-
759
- if (registry.tables.size > 0) {
760
- if (lines.length > 0) lines.push('');
761
- lines.push('Tables:');
762
- for (const [label, info] of registry.tables) {
763
- const num = info.isSupp ? `S${info.num}` : info.num;
764
- lines.push(` Table ${num}: @tbl:${label} (${info.file})`);
765
- }
766
- }
767
-
768
- if (registry.equations.size > 0) {
769
- if (lines.length > 0) lines.push('');
770
- lines.push('Equations:');
771
- for (const [label, info] of registry.equations) {
772
- lines.push(` Equation ${info.num}: @eq:${label} (${info.file})`);
773
- }
774
- }
775
-
776
- if (lines.length === 0) {
777
- lines.push('No figure/table anchors found.');
778
- }
779
-
780
- return lines.join('\n');
781
- }
1
+ /**
2
+ * Cross-reference handling - dynamic figure/table references
3
+ *
4
+ * Enables:
5
+ * - @fig:label syntax in source (auto-numbered)
6
+ * - Conversion to "Figure 1" in Word output
7
+ * - Auto-conversion back during import
8
+ */
9
+
10
+ import * as fs from 'fs';
11
+ import * as path from 'path';
12
+ import YAML from 'yaml';
13
+ import type {
14
+ RefNumber,
15
+ HardcodedRef,
16
+ DynamicRef,
17
+ FigureInfo,
18
+ Registry,
19
+ RefStatus,
20
+ ConversionResult,
21
+ } from './types.js';
22
+
23
+ // =============================================================================
24
+ // Constants
25
+ // =============================================================================
26
+
27
+ /** Characters of context to check before a reference for deduplication */
28
+ const REF_CONTEXT_WINDOW = 100;
29
+
30
+ /** Minimum word length for similarity calculations */
31
+ const MIN_WORD_LENGTH = 2;
32
+
33
+ // =============================================================================
34
+ // Type Definitions (Internal)
35
+ // =============================================================================
36
+
37
+ /**
38
+ * Reference info (internal use in registry building)
39
+ */
40
+ interface RefInfo {
41
+ label: string;
42
+ num: number;
43
+ isSupp: boolean;
44
+ file: string;
45
+ }
46
+
47
+ /**
48
+ * Parsed reference number components
49
+ */
50
+ interface ParsedRefNumber {
51
+ isSupp: boolean;
52
+ num: number;
53
+ suffix: string | null;
54
+ }
55
+
56
+ /**
57
+ * Detected reference with parsed numbers
58
+ */
59
+ interface DetectedRef {
60
+ type: 'fig' | 'tbl' | 'eq';
61
+ match: string;
62
+ numbers: ParsedRefNumber[];
63
+ position: number;
64
+ }
65
+
66
+ // =============================================================================
67
+ // Internal Helpers
68
+ // =============================================================================
69
+
70
+ /**
71
+ * Discover section files from a directory by reading config files
72
+ * Only returns files explicitly defined in rev.yaml or sections.yaml
73
+ * Returns empty array if no config found (caller should handle this)
74
+ */
75
+ function discoverSectionFiles(directory: string): string[] {
76
+ // Try rev.yaml first
77
+ const revYamlPath = path.join(directory, 'rev.yaml');
78
+ if (fs.existsSync(revYamlPath)) {
79
+ try {
80
+ const config = YAML.parse(fs.readFileSync(revYamlPath, 'utf-8'));
81
+ if (config.sections && Array.isArray(config.sections) && config.sections.length > 0) {
82
+ return config.sections.filter((f: string) => fs.existsSync(path.join(directory, f)));
83
+ }
84
+ } catch (e) {
85
+ if (process.env.DEBUG) {
86
+ console.warn('crossref: YAML parse error in rev.yaml:', (e as Error).message);
87
+ }
88
+ }
89
+ }
90
+
91
+ // Try sections.yaml
92
+ const sectionsPath = path.join(directory, 'sections.yaml');
93
+ if (fs.existsSync(sectionsPath)) {
94
+ try {
95
+ const config = YAML.parse(fs.readFileSync(sectionsPath, 'utf-8'));
96
+ if (config.sections) {
97
+ const sectionOrder = Object.entries(config.sections)
98
+ .sort((a, b) => ((a[1] as any).order ?? 999) - ((b[1] as any).order ?? 999))
99
+ .map(([file]) => file);
100
+ return sectionOrder.filter((f) => fs.existsSync(path.join(directory, f)));
101
+ }
102
+ } catch (e) {
103
+ if (process.env.DEBUG) {
104
+ console.warn('crossref: YAML parse error in sections.yaml:', (e as Error).message);
105
+ }
106
+ }
107
+ }
108
+
109
+ // No config found - return empty array
110
+ // Caller must handle this (either error or use explicit sections)
111
+ return [];
112
+ }
113
+
114
+ // =============================================================================
115
+ // Detection Patterns
116
+ // =============================================================================
117
+
118
+ /**
119
+ * Patterns for detecting hardcoded references
120
+ * Matches complex patterns including:
121
+ * - Simple: "Figure 1", "Fig. 2a", "Table S1"
122
+ * - Ranges: "Figures 1-3", "Fig. 1a-c", "Figs. 1a-3b"
123
+ * - Lists: "Figures 1, 2, and 3", "Fig. 1a, b, c", "Tables 1 & 2"
124
+ * - Mixed: "Figs. 1, 3-5, and 7"
125
+ *
126
+ * Uses a simpler base pattern and parses the full match for lists
127
+ */
128
+ const DETECTION_PATTERNS: Record<string, RegExp> = {
129
+ // Captures the full reference including lists with "and"
130
+ // Group 1: type prefix (Figure, Fig., etc.)
131
+ // Group 2: reference list (parsed by parseReferenceList())
132
+ // Matches: "1", "1a", "1-3", "1a-c", "1, 2, 3", "1 and 2", "1, 2 and 3", "1, 2, and 3"
133
+ // Separator: comma/dash/ampersand, optionally followed by "and"
134
+ // Standalone letters must be followed by separator, punctuation, or word boundary
135
+ // Also handles: "see Figure 1", "(Fig. 1)", "in Figures 1–3"
136
+ // Note: 'gi' flag makes these case-insensitive, so "figure 1" is also matched
137
+ figure: /\b(Figures?|Figs?\.?)\s+((?:\d+|S\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+|S\d+)[a-z]?|[a-z]\b))*)/gi,
138
+
139
+ table: /\b(Tables?|Tabs?\.?)\s+((?:\d+|S\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+|S\d+)[a-z]?|[a-z]\b))*)/gi,
140
+
141
+ equation: /\b(Equations?|Eqs?\.?)\s+((?:\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+)[a-z]?|[a-z]\b))*)/gi,
142
+ };
143
+
144
+ /**
145
+ * Patterns to EXCLUDE from detection (false positives)
146
+ * These look like references but aren't (e.g., "Table of Contents", "Figure skating")
147
+ */
148
+ const EXCLUSION_PATTERNS = [
149
+ /\bTable\s+of\s+Contents?\b/gi,
150
+ /\bFigure\s+skating\b/gi,
151
+ /\bFigure\s+out\b/gi,
152
+ /\bFigure\s+it\b/gi,
153
+ /\bTable\s+setting/gi,
154
+ /\bEquation\s+editor\b/gi,
155
+ ];
156
+
157
+ /**
158
+ * Pattern for extracting anchors from markdown: {#fig:label}, {#tbl:label}
159
+ */
160
+ const ANCHOR_PATTERN = /\{#(fig|tbl|eq):([a-zA-Z0-9_-]+)/gi;
161
+
162
+ /**
163
+ * Pattern for @-style references: @fig:label, @tbl:label
164
+ */
165
+ const REF_PATTERN = /@(fig|tbl|eq):([a-zA-Z0-9_-]+)/gi;
166
+
167
+ // =============================================================================
168
+ // Public API
169
+ // =============================================================================
170
+
171
+ /**
172
+ * Normalize a reference type to standard form
173
+ */
174
+ export function normalizeType(typeStr: string): 'fig' | 'tbl' | 'eq' | string {
175
+ if (typeof typeStr !== 'string') {
176
+ throw new TypeError(`typeStr must be a string, got ${typeof typeStr}`);
177
+ }
178
+ const lower = typeStr.toLowerCase().replace(/\.$/, '');
179
+ if (lower.startsWith('fig')) return 'fig';
180
+ if (lower.startsWith('tab')) return 'tbl';
181
+ if (lower.startsWith('eq')) return 'eq';
182
+ return lower;
183
+ }
184
+
185
+ /**
186
+ * Parse a reference number, handling supplementary (S1, S2) and letter suffixes (1a, 1b)
187
+ */
188
+ export function parseRefNumber(numStr: string, suffix: string | null = null): ParsedRefNumber {
189
+ if (!numStr || typeof numStr !== 'string') {
190
+ return { isSupp: false, num: 0, suffix: suffix || null };
191
+ }
192
+ const isSupp = numStr.toUpperCase().startsWith('S');
193
+ const numPart = isSupp ? numStr.slice(1) : numStr;
194
+ // Extract suffix if embedded in numStr (e.g., "1a")
195
+ const match = numPart.match(/^(\d+)([a-z])?$/i);
196
+ const num = match && match[1] ? parseInt(match[1], 10) : parseInt(numPart, 10);
197
+ const extractedSuffix = suffix || (match && match[2]) || null;
198
+ return { isSupp, num, suffix: extractedSuffix ? extractedSuffix.toLowerCase() : null };
199
+ }
200
+
201
+ /**
202
+ * Parse a reference list string like "1, 2, and 3" or "1a-c" or "1a-3b"
203
+ * Returns an array of {num, isSupp, suffix} objects
204
+ */
205
+ export function parseReferenceList(listStr: string): ParsedRefNumber[] {
206
+ const results: ParsedRefNumber[] = [];
207
+ if (!listStr || typeof listStr !== 'string') return results;
208
+
209
+ // Normalize: replace "and" with comma, normalize dashes
210
+ let normalized = listStr
211
+ .replace(/\s+and\s+/gi, ', ')
212
+ .replace(/[–—]/g, '-') // en-dash, em-dash → hyphen
213
+ .replace(/&/g, ', '); // & → comma
214
+
215
+ // Split by comma (but not by dash, which indicates ranges)
216
+ const parts = normalized.split(/\s*,\s*/).filter((p) => p.trim());
217
+
218
+ let lastFullRef: { num: number; isSupp: boolean } | null = null; // Track the last full reference for implicit prefixes
219
+
220
+ for (const part of parts) {
221
+ const trimmed = part.trim();
222
+ if (!trimmed) continue;
223
+
224
+ // Check if this is a range (contains -)
225
+ if (trimmed.includes('-')) {
226
+ const parts = trimmed.split('-').map((s) => s.trim());
227
+ const start = parts[0] || '';
228
+ const end = parts[1] || '';
229
+
230
+ // Check if end is just a letter (e.g., "1a-c" where end is "c")
231
+ const endIsLetterOnly = /^[a-z]$/i.test(end);
232
+
233
+ const startRef = parseRefNumber(start);
234
+ // For letter-only end, don't parse as number
235
+ const endRef = endIsLetterOnly
236
+ ? { num: startRef.num, isSupp: startRef.isSupp, suffix: end.toLowerCase() }
237
+ : parseRefNumber(end);
238
+
239
+ // Handle different range types:
240
+ // 1. Suffix-only range on same number: "1a-c" → 1a, 1b, 1c
241
+ // 2. Number range: "1-3" → 1, 2, 3
242
+ // 3. Cross-number suffix range: "1a-3b" → 1a...1z, 2a...2z, 3a, 3b (limited)
243
+
244
+ if (startRef.suffix && endRef.suffix && startRef.num !== endRef.num) {
245
+ // Cross-number suffix range: "1a-3b"
246
+ // For academic papers, limit intermediate figures to same suffix range
247
+ // e.g., "1a-3b" typically means 1a, 1b, 2a, 2b, 3a, 3b
248
+ const maxSuffix = Math.max(
249
+ startRef.suffix.charCodeAt(0),
250
+ endRef.suffix.charCodeAt(0)
251
+ );
252
+
253
+ for (let n = startRef.num; n <= endRef.num; n++) {
254
+ const suffixStart =
255
+ n === startRef.num ? startRef.suffix.charCodeAt(0) : 'a'.charCodeAt(0);
256
+ const suffixEnd = n === endRef.num ? endRef.suffix.charCodeAt(0) : maxSuffix;
257
+
258
+ for (let s = suffixStart; s <= suffixEnd; s++) {
259
+ results.push({
260
+ num: n,
261
+ isSupp: startRef.isSupp,
262
+ suffix: String.fromCharCode(s),
263
+ });
264
+ }
265
+ }
266
+ lastFullRef = { num: endRef.num, isSupp: startRef.isSupp };
267
+ } else if (startRef.suffix || endRef.suffix) {
268
+ // Suffix range on same number: "1a-c"
269
+ const num: number = startRef.num !== 0 ? startRef.num : (lastFullRef ? lastFullRef.num : 1);
270
+ const isSupp: boolean = startRef.isSupp ? startRef.isSupp : (lastFullRef ? lastFullRef.isSupp : false);
271
+ const startCode = (startRef.suffix || 'a').charCodeAt(0);
272
+ const endCode = (endRef.suffix || 'a').charCodeAt(0);
273
+
274
+ for (let code = startCode; code <= endCode; code++) {
275
+ results.push({
276
+ num,
277
+ isSupp,
278
+ suffix: String.fromCharCode(code),
279
+ });
280
+ }
281
+ lastFullRef = { num, isSupp };
282
+ } else {
283
+ // Pure number range: "1-3"
284
+ for (let n = startRef.num; n <= endRef.num; n++) {
285
+ results.push({
286
+ num: n,
287
+ isSupp: startRef.isSupp,
288
+ suffix: null,
289
+ });
290
+ }
291
+ lastFullRef = { num: endRef.num, isSupp: startRef.isSupp };
292
+ }
293
+ } else {
294
+ // Single reference or implicit suffix
295
+ // Check if it's just a letter (implicit prefix from previous number)
296
+ if (/^[a-z]$/i.test(trimmed) && lastFullRef) {
297
+ // Implicit prefix: "b" after "1a" means "1b"
298
+ results.push({
299
+ num: lastFullRef.num,
300
+ isSupp: lastFullRef.isSupp,
301
+ suffix: trimmed.toLowerCase(),
302
+ });
303
+ } else {
304
+ // Full reference: "1", "1a", "S1", "S1a"
305
+ const ref = parseRefNumber(trimmed);
306
+ results.push(ref);
307
+ lastFullRef = { num: ref.num, isSupp: ref.isSupp };
308
+ }
309
+ }
310
+ }
311
+
312
+ return results;
313
+ }
314
+
315
+ /**
316
+ * Build a registry of figure/table labels from .md files
317
+ * Scans for {#fig:label} and {#tbl:label} anchors
318
+ *
319
+ * IMPORTANT: This function requires either explicit sections or a rev.yaml/sections.yaml config.
320
+ * It will NOT guess by scanning all .md files, as this leads to incorrect numbering
321
+ * when temporary files (paper_clean.md, etc.) exist in the directory.
322
+ */
323
+ export function buildRegistry(directory: string, sections?: string[]): Registry {
324
+ if (typeof directory !== 'string') {
325
+ throw new TypeError(`directory must be a string, got ${typeof directory}`);
326
+ }
327
+
328
+ const figures = new Map<string, FigureInfo>();
329
+ const tables = new Map<string, FigureInfo>();
330
+ const equations = new Map<string, FigureInfo>();
331
+
332
+ // Counters for numbering (separate for main and supplementary)
333
+ let figNum = 0;
334
+ let figSuppNum = 0;
335
+ let tblNum = 0;
336
+ let tblSuppNum = 0;
337
+ let eqNum = 0;
338
+
339
+ let orderedFiles: string[];
340
+
341
+ if (Array.isArray(sections) && sections.length > 0) {
342
+ // Use explicitly provided section files - most reliable
343
+ orderedFiles = sections.filter((f) => fs.existsSync(path.join(directory, f)));
344
+ } else {
345
+ // Try to determine sections from config files (rev.yaml or sections.yaml)
346
+ orderedFiles = discoverSectionFiles(directory);
347
+ // If no config found, return empty registry rather than guessing
348
+ // This prevents bugs from scanning wrong files
349
+ }
350
+
351
+ // Determine if a file is supplementary
352
+ const isSupplementary = (filename: string): boolean =>
353
+ filename.toLowerCase().includes('supp') || filename.toLowerCase().includes('appendix');
354
+
355
+ // Process each file in order
356
+ for (const file of orderedFiles) {
357
+ const filePath = path.join(directory, file);
358
+ const content = fs.readFileSync(filePath, 'utf-8');
359
+ const isSupp = isSupplementary(file);
360
+
361
+ // Find all anchors
362
+ let match: RegExpExecArray | null;
363
+ ANCHOR_PATTERN.lastIndex = 0;
364
+ while ((match = ANCHOR_PATTERN.exec(content)) !== null) {
365
+ const typeRaw = match[1];
366
+ const labelRaw = match[2];
367
+ if (!typeRaw || !labelRaw) continue;
368
+
369
+ const type = typeRaw.toLowerCase();
370
+ const label = labelRaw;
371
+
372
+ if (type === 'fig') {
373
+ if (isSupp) {
374
+ figSuppNum++;
375
+ figures.set(label, { label, num: figSuppNum, isSupp: true, file });
376
+ } else {
377
+ figNum++;
378
+ figures.set(label, { label, num: figNum, isSupp: false, file });
379
+ }
380
+ } else if (type === 'tbl') {
381
+ if (isSupp) {
382
+ tblSuppNum++;
383
+ tables.set(label, { label, num: tblSuppNum, isSupp: true, file });
384
+ } else {
385
+ tblNum++;
386
+ tables.set(label, { label, num: tblNum, isSupp: false, file });
387
+ }
388
+ } else if (type === 'eq') {
389
+ eqNum++;
390
+ equations.set(label, { label, num: eqNum, isSupp: false, file });
391
+ }
392
+ }
393
+ }
394
+
395
+ // Build reverse lookup: number → label
396
+ const byNumber: Registry['byNumber'] = {
397
+ fig: new Map(),
398
+ figS: new Map(),
399
+ tbl: new Map(),
400
+ tblS: new Map(),
401
+ eq: new Map(),
402
+ };
403
+
404
+ for (const [label, info] of figures) {
405
+ const key = info.isSupp ? 'figS' : 'fig';
406
+ byNumber[key].set(info.num, label);
407
+ }
408
+ for (const [label, info] of tables) {
409
+ const key = info.isSupp ? 'tblS' : 'tbl';
410
+ byNumber[key].set(info.num, label);
411
+ }
412
+ for (const [label, info] of equations) {
413
+ byNumber.eq.set(info.num, label);
414
+ }
415
+
416
+ return { figures, tables, equations, byNumber };
417
+ }
418
+
419
+ /**
420
+ * Get the display string for a label (e.g., "Figure 1", "Table S2")
421
+ */
422
+ export function labelToDisplay(
423
+ type: 'fig' | 'tbl' | 'eq',
424
+ label: string,
425
+ registry: Registry
426
+ ): string | null {
427
+ if (!registry || !registry.figures) return null;
428
+
429
+ const collection =
430
+ type === 'fig' ? registry.figures : type === 'tbl' ? registry.tables : registry.equations;
431
+
432
+ const info = collection.get(label);
433
+ if (!info) return null;
434
+
435
+ const prefix = type === 'fig' ? 'Figure' : type === 'tbl' ? 'Table' : 'Equation';
436
+ const numStr = info.isSupp ? `S${info.num}` : `${info.num}`;
437
+
438
+ return `${prefix} ${numStr}`;
439
+ }
440
+
441
+ /**
442
+ * Get the label for a display number (e.g., "fig:heatmap" from Figure 1)
443
+ */
444
+ export function numberToLabel(
445
+ type: 'fig' | 'tbl' | 'eq',
446
+ num: number,
447
+ isSupp: boolean,
448
+ registry: Registry
449
+ ): string | null {
450
+ if (!registry || !registry.byNumber) return null;
451
+
452
+ const key = isSupp ? (`${type}S` as keyof Registry['byNumber']) : type;
453
+ return registry.byNumber[key]?.get(num) || null;
454
+ }
455
+
456
+ /**
457
+ * Detect all hardcoded references in text
458
+ */
459
+ export function detectHardcodedRefs(text: string): DetectedRef[] {
460
+ if (typeof text !== 'string') {
461
+ throw new TypeError(`text must be a string, got ${typeof text}`);
462
+ }
463
+
464
+ const refs: DetectedRef[] = [];
465
+
466
+ for (const [type, pattern] of Object.entries(DETECTION_PATTERNS)) {
467
+ pattern.lastIndex = 0;
468
+ let match: RegExpExecArray | null;
469
+
470
+ while ((match = pattern.exec(text)) !== null) {
471
+ // Pattern groups:
472
+ // [1] = type prefix (Figure, Fig., etc.)
473
+ // [2] = reference list string (e.g., "1, 2, and 3" or "1a-3b")
474
+
475
+ const listStr = match[2];
476
+ if (!listStr) continue;
477
+ const numbers = parseReferenceList(listStr);
478
+
479
+ // Skip if no valid numbers were parsed
480
+ if (numbers.length === 0) continue;
481
+
482
+ refs.push({
483
+ type: normalizeType(type) as 'fig' | 'tbl' | 'eq',
484
+ match: match[0],
485
+ numbers,
486
+ position: match.index,
487
+ });
488
+ }
489
+ }
490
+
491
+ // Sort by position
492
+ refs.sort((a, b) => a.position - b.position);
493
+ return refs;
494
+ }
495
+
496
+ /**
497
+ * Convert hardcoded references to @-style references
498
+ */
499
+ export function convertHardcodedRefs(text: string, registry: Registry): ConversionResult {
500
+ // Input validation delegated to detectHardcodedRefs
501
+ const refs = detectHardcodedRefs(text);
502
+ const conversions: Array<{ from: string; to: string }> = [];
503
+ const warnings: string[] = [];
504
+
505
+ // Process in reverse order to preserve positions
506
+ let result = text;
507
+ for (let i = refs.length - 1; i >= 0; i--) {
508
+ const ref = refs[i];
509
+ if (!ref) continue;
510
+
511
+ // Build replacement
512
+ const labels: string[] = [];
513
+ for (const { num, isSupp } of ref.numbers) {
514
+ const label = numberToLabel(ref.type, num, isSupp, registry);
515
+ if (label) {
516
+ labels.push(`@${ref.type}:${label}`);
517
+ } else {
518
+ const displayNum = isSupp ? `S${num}` : `${num}`;
519
+ warnings.push(`Unknown reference: ${ref.type} ${displayNum} (no matching label)`);
520
+ labels.push(ref.match); // Keep original if no match
521
+ }
522
+ }
523
+
524
+ if (labels.length > 0 && !labels.includes(ref.match)) {
525
+ const replacement = labels.join('; ');
526
+
527
+ // Skip if the @-syntax already appears in the preceding text
528
+ // This prevents duplication when import restores @fig:x and then we see "Fig. 1"
529
+ // e.g., "@fig:map@fig:map{++@fig:map++}" or "@fig:mapFigure 1" patterns
530
+ const textBefore = result.slice(Math.max(0, ref.position - REF_CONTEXT_WINDOW), ref.position);
531
+ const alreadyHasRef = labels.some((label) => textBefore.includes(label));
532
+ if (alreadyHasRef) {
533
+ continue; // Skip - ref already present nearby
534
+ }
535
+
536
+ result =
537
+ result.slice(0, ref.position) + replacement + result.slice(ref.position + ref.match.length);
538
+
539
+ conversions.push({
540
+ from: ref.match,
541
+ to: replacement,
542
+ });
543
+ }
544
+ }
545
+
546
+ return { converted: result, conversions, warnings };
547
+ }
548
+
549
+ /**
550
+ * Detect @-style references in text
551
+ */
552
+ export function detectDynamicRefs(text: string): DynamicRef[] {
553
+ if (typeof text !== 'string') {
554
+ throw new TypeError(`text must be a string, got ${typeof text}`);
555
+ }
556
+
557
+ const refs: DynamicRef[] = [];
558
+ REF_PATTERN.lastIndex = 0;
559
+ let match: RegExpExecArray | null;
560
+
561
+ while ((match = REF_PATTERN.exec(text)) !== null) {
562
+ const type = match[1];
563
+ const label = match[2];
564
+ if (!type || !label) continue;
565
+ refs.push({
566
+ type: type as 'fig' | 'tbl' | 'eq',
567
+ label: label,
568
+ match: match[0],
569
+ position: match.index,
570
+ });
571
+ }
572
+
573
+ return refs;
574
+ }
575
+
576
+ /**
577
+ * Get reference status for a file/text
578
+ */
579
+ export function getRefStatus(text: string, registry: Registry): RefStatus {
580
+ const dynamic = detectDynamicRefs(text);
581
+ const hardcoded = detectHardcodedRefs(text) as HardcodedRef[];
582
+
583
+ // Count anchors in this text
584
+ ANCHOR_PATTERN.lastIndex = 0;
585
+ let figCount = 0,
586
+ tblCount = 0,
587
+ eqCount = 0;
588
+ let match: RegExpExecArray | null;
589
+ while ((match = ANCHOR_PATTERN.exec(text)) !== null) {
590
+ const type = match[1];
591
+ if (!type) continue;
592
+ if (type === 'fig') figCount++;
593
+ else if (type === 'tbl') tblCount++;
594
+ else if (type === 'eq') eqCount++;
595
+ }
596
+
597
+ return {
598
+ dynamic,
599
+ hardcoded,
600
+ anchors: { figures: figCount, tables: tblCount, equations: eqCount },
601
+ };
602
+ }
603
+
604
+ /**
605
+ * Detect forward references in combined text
606
+ * A forward reference is a @ref that appears before its {#anchor} definition
607
+ */
608
+ export function detectForwardRefs(text: string): {
609
+ forwardRefs: Array<{ type: string; label: string; match: string; position: number }>;
610
+ anchorPositions: Map<string, number>;
611
+ } {
612
+ // Build map of anchor positions: "fig:label" -> position
613
+ const anchorPositions = new Map<string, number>();
614
+ ANCHOR_PATTERN.lastIndex = 0;
615
+ let match: RegExpExecArray | null;
616
+ while ((match = ANCHOR_PATTERN.exec(text)) !== null) {
617
+ const type = match[1];
618
+ const label = match[2];
619
+ if (!type || !label) continue;
620
+ const key = `${type}:${label}`;
621
+ // Only store first occurrence (in case of duplicates)
622
+ if (!anchorPositions.has(key)) {
623
+ anchorPositions.set(key, match.index);
624
+ }
625
+ }
626
+
627
+ // Find all references
628
+ const refs = detectDynamicRefs(text);
629
+
630
+ // Filter to only forward references
631
+ const forwardRefs = refs.filter((ref) => {
632
+ const key = `${ref.type}:${ref.label}`;
633
+ const anchorPos = anchorPositions.get(key);
634
+ // Forward ref if anchor doesn't exist or appears after the reference
635
+ return anchorPos === undefined || ref.position < anchorPos;
636
+ });
637
+
638
+ return { forwardRefs, anchorPositions };
639
+ }
640
+
641
+ /**
642
+ * Resolve forward references to display format
643
+ * Only resolves refs that appear before their anchor definition
644
+ * Leaves other refs for pandoc-crossref to handle (preserves clickable links)
645
+ */
646
+ export function resolveForwardRefs(
647
+ text: string,
648
+ registry: Registry
649
+ ): {
650
+ text: string;
651
+ resolved: Array<{ from: string; to: string; position: number }>;
652
+ unresolved: Array<{ ref: string; position: number }>;
653
+ } {
654
+ const { forwardRefs } = detectForwardRefs(text);
655
+ const resolved: Array<{ from: string; to: string; position: number }> = [];
656
+ const unresolved: Array<{ ref: string; position: number }> = [];
657
+
658
+ // Process in reverse order to preserve positions
659
+ let result = text;
660
+ for (let i = forwardRefs.length - 1; i >= 0; i--) {
661
+ const ref = forwardRefs[i];
662
+ if (!ref) continue;
663
+ const display = labelToDisplay(ref.type as 'fig' | 'tbl' | 'eq', ref.label, registry);
664
+
665
+ if (display) {
666
+ result =
667
+ result.slice(0, ref.position) + display + result.slice(ref.position + ref.match.length);
668
+ resolved.push({
669
+ from: ref.match,
670
+ to: display,
671
+ position: ref.position,
672
+ });
673
+ } else {
674
+ unresolved.push({
675
+ ref: ref.match,
676
+ position: ref.position,
677
+ });
678
+ }
679
+ }
680
+
681
+ return { text: result, resolved, unresolved };
682
+ }
683
+
684
+ /**
685
+ * Resolve ALL supplementary references and strip supplementary anchor labels.
686
+ *
687
+ * pandoc-crossref cannot produce "Figure S1" numbering — it numbers all figures
688
+ * sequentially. This function resolves every @fig:label / @tbl:label that points
689
+ * to a supplementary item to plain text ("Figure S1", "Table S1") and removes
690
+ * the {#fig:label} / {#tbl:label} attributes so pandoc-crossref ignores them.
691
+ */
692
+ export function resolveSupplementaryRefs(
693
+ text: string,
694
+ registry: Registry
695
+ ): {
696
+ text: string;
697
+ resolved: Array<{ from: string; to: string }>;
698
+ } {
699
+ const resolved: Array<{ from: string; to: string }> = [];
700
+ let result = text;
701
+
702
+ // Collect supplementary labels
703
+ const suppLabels = new Set<string>();
704
+ for (const [label, info] of registry.figures) {
705
+ if (info.isSupp) suppLabels.add(`fig:${label}`);
706
+ }
707
+ for (const [label, info] of registry.tables) {
708
+ if (info.isSupp) suppLabels.add(`tbl:${label}`);
709
+ }
710
+
711
+ if (suppLabels.size === 0) return { text: result, resolved };
712
+
713
+ // 1. Replace all @fig:label / @tbl:label references to supplementary items
714
+ const refs = detectDynamicRefs(result);
715
+ // Process in reverse to preserve positions
716
+ for (let i = refs.length - 1; i >= 0; i--) {
717
+ const ref = refs[i];
718
+ if (!ref) continue;
719
+ const key = `${ref.type}:${ref.label}`;
720
+ if (!suppLabels.has(key)) continue;
721
+
722
+ const display = labelToDisplay(ref.type as 'fig' | 'tbl' | 'eq', ref.label, registry);
723
+ if (display) {
724
+ result =
725
+ result.slice(0, ref.position) + display + result.slice(ref.position + ref.match.length);
726
+ resolved.push({ from: ref.match, to: display });
727
+ }
728
+ }
729
+
730
+ // 2. Strip {#fig:label} and {#tbl:label} attributes from supplementary anchors
731
+ // so pandoc-crossref does not re-number them
732
+ for (const key of suppLabels) {
733
+ // Match {#fig:label ...} or just {#fig:label}
734
+ const escaped = key.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
735
+ const pattern = new RegExp(`\\{#${escaped}(?:\\s[^}]*)?\\}`, 'g');
736
+ result = result.replace(pattern, (match) => {
737
+ resolved.push({ from: match, to: '(stripped)' });
738
+ return '';
739
+ });
740
+ }
741
+
742
+ return { text: result, resolved };
743
+ }
744
+
745
+ /**
746
+ * Format registry for display
747
+ */
748
+ export function formatRegistry(registry: Registry): string {
749
+ const lines: string[] = [];
750
+
751
+ if (registry.figures.size > 0) {
752
+ lines.push('Figures:');
753
+ for (const [label, info] of registry.figures) {
754
+ const num = info.isSupp ? `S${info.num}` : info.num;
755
+ lines.push(` Figure ${num}: @fig:${label} (${info.file})`);
756
+ }
757
+ }
758
+
759
+ if (registry.tables.size > 0) {
760
+ if (lines.length > 0) lines.push('');
761
+ lines.push('Tables:');
762
+ for (const [label, info] of registry.tables) {
763
+ const num = info.isSupp ? `S${info.num}` : info.num;
764
+ lines.push(` Table ${num}: @tbl:${label} (${info.file})`);
765
+ }
766
+ }
767
+
768
+ if (registry.equations.size > 0) {
769
+ if (lines.length > 0) lines.push('');
770
+ lines.push('Equations:');
771
+ for (const [label, info] of registry.equations) {
772
+ lines.push(` Equation ${info.num}: @eq:${label} (${info.file})`);
773
+ }
774
+ }
775
+
776
+ if (lines.length === 0) {
777
+ lines.push('No figure/table anchors found.');
778
+ }
779
+
780
+ return lines.join('\n');
781
+ }