docrev 0.10.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/.gitattributes +1 -1
  2. package/CHANGELOG.md +173 -164
  3. package/PLAN-tables-and-postprocess.md +850 -850
  4. package/README.md +431 -431
  5. package/bin/rev.js +11 -11
  6. package/bin/rev.ts +145 -145
  7. package/completions/rev.bash +127 -127
  8. package/completions/rev.ps1 +210 -210
  9. package/completions/rev.zsh +207 -207
  10. package/dist/lib/anchor-match.d.ts +1 -1
  11. package/dist/lib/anchor-match.d.ts.map +1 -1
  12. package/dist/lib/anchor-match.js +17 -47
  13. package/dist/lib/anchor-match.js.map +1 -1
  14. package/dist/lib/build.js +4 -4
  15. package/dist/lib/commands/context.d.ts +1 -1
  16. package/dist/lib/commands/context.d.ts.map +1 -1
  17. package/dist/lib/commands/context.js +1 -1
  18. package/dist/lib/commands/context.js.map +1 -1
  19. package/dist/lib/commands/sections.js +7 -7
  20. package/dist/lib/commands/sections.js.map +1 -1
  21. package/dist/lib/commands/sync.d.ts.map +1 -1
  22. package/dist/lib/commands/sync.js +15 -14
  23. package/dist/lib/commands/sync.js.map +1 -1
  24. package/dist/lib/commands/utilities.js +164 -164
  25. package/dist/lib/commands/verify-anchors.js +6 -6
  26. package/dist/lib/commands/verify-anchors.js.map +1 -1
  27. package/dist/lib/commands/word-tools.js +8 -8
  28. package/dist/lib/grammar.js +3 -3
  29. package/dist/lib/macro-filter.lua +201 -201
  30. package/dist/lib/pdf-comments.js +44 -44
  31. package/dist/lib/plugins.js +57 -57
  32. package/dist/lib/pptx-color-filter.lua +37 -37
  33. package/dist/lib/pptx-themes.js +115 -115
  34. package/dist/lib/sections.d.ts +35 -0
  35. package/dist/lib/sections.d.ts.map +1 -1
  36. package/dist/lib/sections.js +81 -0
  37. package/dist/lib/sections.js.map +1 -1
  38. package/dist/lib/spelling.js +2 -2
  39. package/dist/lib/templates.js +387 -387
  40. package/dist/lib/themes.js +51 -51
  41. package/docs-src/build.py +113 -113
  42. package/docs-src/extra.css +208 -208
  43. package/docs-src/md-to-html.lua +6 -6
  44. package/docs-src/template.html +116 -116
  45. package/eslint.config.js +27 -27
  46. package/lib/anchor-match.ts +276 -308
  47. package/lib/annotations.ts +644 -644
  48. package/lib/build.ts +1766 -1766
  49. package/lib/citations.ts +160 -160
  50. package/lib/commands/build.ts +855 -855
  51. package/lib/commands/citations.ts +515 -515
  52. package/lib/commands/comments.ts +1050 -1050
  53. package/lib/commands/context.ts +176 -174
  54. package/lib/commands/core.ts +309 -309
  55. package/lib/commands/doi.ts +435 -435
  56. package/lib/commands/file-ops.ts +372 -372
  57. package/lib/commands/history.ts +320 -320
  58. package/lib/commands/index.ts +87 -87
  59. package/lib/commands/init.ts +259 -259
  60. package/lib/commands/merge-resolve.ts +378 -378
  61. package/lib/commands/preview.ts +178 -178
  62. package/lib/commands/project-info.ts +244 -244
  63. package/lib/commands/quality.ts +517 -517
  64. package/lib/commands/response.ts +454 -454
  65. package/lib/commands/section-boundaries.ts +82 -82
  66. package/lib/commands/sections.ts +451 -451
  67. package/lib/commands/sync.ts +709 -706
  68. package/lib/commands/text-ops.ts +449 -449
  69. package/lib/commands/utilities.ts +448 -448
  70. package/lib/commands/verify-anchors.ts +272 -272
  71. package/lib/commands/word-tools.ts +340 -340
  72. package/lib/comment-realign.ts +517 -517
  73. package/lib/config.ts +84 -84
  74. package/lib/crossref.ts +781 -781
  75. package/lib/csl.ts +191 -191
  76. package/lib/dependencies.ts +98 -98
  77. package/lib/diff-engine.ts +465 -465
  78. package/lib/doi-cache.ts +115 -115
  79. package/lib/doi.ts +897 -897
  80. package/lib/equations.ts +506 -506
  81. package/lib/errors.ts +346 -346
  82. package/lib/format.ts +541 -541
  83. package/lib/git.ts +326 -326
  84. package/lib/grammar.ts +303 -303
  85. package/lib/image-registry.ts +180 -180
  86. package/lib/import.ts +911 -911
  87. package/lib/journals.ts +543 -543
  88. package/lib/macro-filter.lua +201 -201
  89. package/lib/macros.ts +273 -273
  90. package/lib/merge.ts +633 -633
  91. package/lib/orcid.ts +144 -144
  92. package/lib/pdf-comments.ts +263 -263
  93. package/lib/pdf-import.ts +524 -524
  94. package/lib/plugins.ts +362 -362
  95. package/lib/postprocess.ts +188 -188
  96. package/lib/pptx-color-filter.lua +37 -37
  97. package/lib/pptx-template.ts +469 -469
  98. package/lib/pptx-themes.ts +483 -483
  99. package/lib/protect-restore.ts +520 -520
  100. package/lib/rate-limiter.ts +94 -94
  101. package/lib/response.ts +197 -197
  102. package/lib/restore-references.ts +240 -240
  103. package/lib/review.ts +327 -327
  104. package/lib/schema.ts +488 -488
  105. package/lib/scientific-words.ts +73 -73
  106. package/lib/sections.ts +425 -335
  107. package/lib/slides.ts +756 -756
  108. package/lib/spelling.ts +334 -334
  109. package/lib/templates.ts +526 -526
  110. package/lib/themes.ts +742 -742
  111. package/lib/trackchanges.ts +247 -247
  112. package/lib/tui.ts +450 -450
  113. package/lib/types.ts +550 -550
  114. package/lib/undo.ts +250 -250
  115. package/lib/utils.ts +69 -69
  116. package/lib/variables.ts +179 -179
  117. package/lib/word-extraction.ts +806 -806
  118. package/lib/word.ts +643 -643
  119. package/lib/wordcomments.ts +840 -840
  120. package/mkdocs.yml +64 -64
  121. package/package.json +137 -137
  122. package/scripts/postbuild.js +47 -47
  123. package/skill/REFERENCE.md +539 -539
  124. package/skill/SKILL.md +295 -295
  125. package/tsconfig.json +26 -26
  126. package/types/index.d.ts +525 -525
package/lib/crossref.ts CHANGED
@@ -1,781 +1,781 @@
1
- /**
2
- * Cross-reference handling - dynamic figure/table references
3
- *
4
- * Enables:
5
- * - @fig:label syntax in source (auto-numbered)
6
- * - Conversion to "Figure 1" in Word output
7
- * - Auto-conversion back during import
8
- */
9
-
10
- import * as fs from 'fs';
11
- import * as path from 'path';
12
- import YAML from 'yaml';
13
- import type {
14
- RefNumber,
15
- HardcodedRef,
16
- DynamicRef,
17
- FigureInfo,
18
- Registry,
19
- RefStatus,
20
- ConversionResult,
21
- } from './types.js';
22
-
23
- // =============================================================================
24
- // Constants
25
- // =============================================================================
26
-
27
- /** Characters of context to check before a reference for deduplication */
28
- const REF_CONTEXT_WINDOW = 100;
29
-
30
- /** Minimum word length for similarity calculations */
31
- const MIN_WORD_LENGTH = 2;
32
-
33
- // =============================================================================
34
- // Type Definitions (Internal)
35
- // =============================================================================
36
-
37
- /**
38
- * Reference info (internal use in registry building)
39
- */
40
- interface RefInfo {
41
- label: string;
42
- num: number;
43
- isSupp: boolean;
44
- file: string;
45
- }
46
-
47
- /**
48
- * Parsed reference number components
49
- */
50
- interface ParsedRefNumber {
51
- isSupp: boolean;
52
- num: number;
53
- suffix: string | null;
54
- }
55
-
56
- /**
57
- * Detected reference with parsed numbers
58
- */
59
- interface DetectedRef {
60
- type: 'fig' | 'tbl' | 'eq';
61
- match: string;
62
- numbers: ParsedRefNumber[];
63
- position: number;
64
- }
65
-
66
- // =============================================================================
67
- // Internal Helpers
68
- // =============================================================================
69
-
70
- /**
71
- * Discover section files from a directory by reading config files
72
- * Only returns files explicitly defined in rev.yaml or sections.yaml
73
- * Returns empty array if no config found (caller should handle this)
74
- */
75
- function discoverSectionFiles(directory: string): string[] {
76
- // Try rev.yaml first
77
- const revYamlPath = path.join(directory, 'rev.yaml');
78
- if (fs.existsSync(revYamlPath)) {
79
- try {
80
- const config = YAML.parse(fs.readFileSync(revYamlPath, 'utf-8'));
81
- if (config.sections && Array.isArray(config.sections) && config.sections.length > 0) {
82
- return config.sections.filter((f: string) => fs.existsSync(path.join(directory, f)));
83
- }
84
- } catch (e) {
85
- if (process.env.DEBUG) {
86
- console.warn('crossref: YAML parse error in rev.yaml:', (e as Error).message);
87
- }
88
- }
89
- }
90
-
91
- // Try sections.yaml
92
- const sectionsPath = path.join(directory, 'sections.yaml');
93
- if (fs.existsSync(sectionsPath)) {
94
- try {
95
- const config = YAML.parse(fs.readFileSync(sectionsPath, 'utf-8'));
96
- if (config.sections) {
97
- const sectionOrder = Object.entries(config.sections)
98
- .sort((a, b) => ((a[1] as any).order ?? 999) - ((b[1] as any).order ?? 999))
99
- .map(([file]) => file);
100
- return sectionOrder.filter((f) => fs.existsSync(path.join(directory, f)));
101
- }
102
- } catch (e) {
103
- if (process.env.DEBUG) {
104
- console.warn('crossref: YAML parse error in sections.yaml:', (e as Error).message);
105
- }
106
- }
107
- }
108
-
109
- // No config found - return empty array
110
- // Caller must handle this (either error or use explicit sections)
111
- return [];
112
- }
113
-
114
- // =============================================================================
115
- // Detection Patterns
116
- // =============================================================================
117
-
118
- /**
119
- * Patterns for detecting hardcoded references
120
- * Matches complex patterns including:
121
- * - Simple: "Figure 1", "Fig. 2a", "Table S1"
122
- * - Ranges: "Figures 1-3", "Fig. 1a-c", "Figs. 1a-3b"
123
- * - Lists: "Figures 1, 2, and 3", "Fig. 1a, b, c", "Tables 1 & 2"
124
- * - Mixed: "Figs. 1, 3-5, and 7"
125
- *
126
- * Uses a simpler base pattern and parses the full match for lists
127
- */
128
- const DETECTION_PATTERNS: Record<string, RegExp> = {
129
- // Captures the full reference including lists with "and"
130
- // Group 1: type prefix (Figure, Fig., etc.)
131
- // Group 2: reference list (parsed by parseReferenceList())
132
- // Matches: "1", "1a", "1-3", "1a-c", "1, 2, 3", "1 and 2", "1, 2 and 3", "1, 2, and 3"
133
- // Separator: comma/dash/ampersand, optionally followed by "and"
134
- // Standalone letters must be followed by separator, punctuation, or word boundary
135
- // Also handles: "see Figure 1", "(Fig. 1)", "in Figures 1–3"
136
- // Note: 'gi' flag makes these case-insensitive, so "figure 1" is also matched
137
- figure: /\b(Figures?|Figs?\.?)\s+((?:\d+|S\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+|S\d+)[a-z]?|[a-z]\b))*)/gi,
138
-
139
- table: /\b(Tables?|Tabs?\.?)\s+((?:\d+|S\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+|S\d+)[a-z]?|[a-z]\b))*)/gi,
140
-
141
- equation: /\b(Equations?|Eqs?\.?)\s+((?:\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+)[a-z]?|[a-z]\b))*)/gi,
142
- };
143
-
144
- /**
145
- * Patterns to EXCLUDE from detection (false positives)
146
- * These look like references but aren't (e.g., "Table of Contents", "Figure skating")
147
- */
148
- const EXCLUSION_PATTERNS = [
149
- /\bTable\s+of\s+Contents?\b/gi,
150
- /\bFigure\s+skating\b/gi,
151
- /\bFigure\s+out\b/gi,
152
- /\bFigure\s+it\b/gi,
153
- /\bTable\s+setting/gi,
154
- /\bEquation\s+editor\b/gi,
155
- ];
156
-
157
- /**
158
- * Pattern for extracting anchors from markdown: {#fig:label}, {#tbl:label}
159
- */
160
- const ANCHOR_PATTERN = /\{#(fig|tbl|eq):([a-zA-Z0-9_-]+)/gi;
161
-
162
- /**
163
- * Pattern for @-style references: @fig:label, @tbl:label
164
- */
165
- const REF_PATTERN = /@(fig|tbl|eq):([a-zA-Z0-9_-]+)/gi;
166
-
167
- // =============================================================================
168
- // Public API
169
- // =============================================================================
170
-
171
- /**
172
- * Normalize a reference type to standard form
173
- */
174
- export function normalizeType(typeStr: string): 'fig' | 'tbl' | 'eq' | string {
175
- if (typeof typeStr !== 'string') {
176
- throw new TypeError(`typeStr must be a string, got ${typeof typeStr}`);
177
- }
178
- const lower = typeStr.toLowerCase().replace(/\.$/, '');
179
- if (lower.startsWith('fig')) return 'fig';
180
- if (lower.startsWith('tab')) return 'tbl';
181
- if (lower.startsWith('eq')) return 'eq';
182
- return lower;
183
- }
184
-
185
- /**
186
- * Parse a reference number, handling supplementary (S1, S2) and letter suffixes (1a, 1b)
187
- */
188
- export function parseRefNumber(numStr: string, suffix: string | null = null): ParsedRefNumber {
189
- if (!numStr || typeof numStr !== 'string') {
190
- return { isSupp: false, num: 0, suffix: suffix || null };
191
- }
192
- const isSupp = numStr.toUpperCase().startsWith('S');
193
- const numPart = isSupp ? numStr.slice(1) : numStr;
194
- // Extract suffix if embedded in numStr (e.g., "1a")
195
- const match = numPart.match(/^(\d+)([a-z])?$/i);
196
- const num = match && match[1] ? parseInt(match[1], 10) : parseInt(numPart, 10);
197
- const extractedSuffix = suffix || (match && match[2]) || null;
198
- return { isSupp, num, suffix: extractedSuffix ? extractedSuffix.toLowerCase() : null };
199
- }
200
-
201
- /**
202
- * Parse a reference list string like "1, 2, and 3" or "1a-c" or "1a-3b"
203
- * Returns an array of {num, isSupp, suffix} objects
204
- */
205
- export function parseReferenceList(listStr: string): ParsedRefNumber[] {
206
- const results: ParsedRefNumber[] = [];
207
- if (!listStr || typeof listStr !== 'string') return results;
208
-
209
- // Normalize: replace "and" with comma, normalize dashes
210
- let normalized = listStr
211
- .replace(/\s+and\s+/gi, ', ')
212
- .replace(/[–—]/g, '-') // en-dash, em-dash → hyphen
213
- .replace(/&/g, ', '); // & → comma
214
-
215
- // Split by comma (but not by dash, which indicates ranges)
216
- const parts = normalized.split(/\s*,\s*/).filter((p) => p.trim());
217
-
218
- let lastFullRef: { num: number; isSupp: boolean } | null = null; // Track the last full reference for implicit prefixes
219
-
220
- for (const part of parts) {
221
- const trimmed = part.trim();
222
- if (!trimmed) continue;
223
-
224
- // Check if this is a range (contains -)
225
- if (trimmed.includes('-')) {
226
- const parts = trimmed.split('-').map((s) => s.trim());
227
- const start = parts[0] || '';
228
- const end = parts[1] || '';
229
-
230
- // Check if end is just a letter (e.g., "1a-c" where end is "c")
231
- const endIsLetterOnly = /^[a-z]$/i.test(end);
232
-
233
- const startRef = parseRefNumber(start);
234
- // For letter-only end, don't parse as number
235
- const endRef = endIsLetterOnly
236
- ? { num: startRef.num, isSupp: startRef.isSupp, suffix: end.toLowerCase() }
237
- : parseRefNumber(end);
238
-
239
- // Handle different range types:
240
- // 1. Suffix-only range on same number: "1a-c" → 1a, 1b, 1c
241
- // 2. Number range: "1-3" → 1, 2, 3
242
- // 3. Cross-number suffix range: "1a-3b" → 1a...1z, 2a...2z, 3a, 3b (limited)
243
-
244
- if (startRef.suffix && endRef.suffix && startRef.num !== endRef.num) {
245
- // Cross-number suffix range: "1a-3b"
246
- // For academic papers, limit intermediate figures to same suffix range
247
- // e.g., "1a-3b" typically means 1a, 1b, 2a, 2b, 3a, 3b
248
- const maxSuffix = Math.max(
249
- startRef.suffix.charCodeAt(0),
250
- endRef.suffix.charCodeAt(0)
251
- );
252
-
253
- for (let n = startRef.num; n <= endRef.num; n++) {
254
- const suffixStart =
255
- n === startRef.num ? startRef.suffix.charCodeAt(0) : 'a'.charCodeAt(0);
256
- const suffixEnd = n === endRef.num ? endRef.suffix.charCodeAt(0) : maxSuffix;
257
-
258
- for (let s = suffixStart; s <= suffixEnd; s++) {
259
- results.push({
260
- num: n,
261
- isSupp: startRef.isSupp,
262
- suffix: String.fromCharCode(s),
263
- });
264
- }
265
- }
266
- lastFullRef = { num: endRef.num, isSupp: startRef.isSupp };
267
- } else if (startRef.suffix || endRef.suffix) {
268
- // Suffix range on same number: "1a-c"
269
- const num: number = startRef.num !== 0 ? startRef.num : (lastFullRef ? lastFullRef.num : 1);
270
- const isSupp: boolean = startRef.isSupp ? startRef.isSupp : (lastFullRef ? lastFullRef.isSupp : false);
271
- const startCode = (startRef.suffix || 'a').charCodeAt(0);
272
- const endCode = (endRef.suffix || 'a').charCodeAt(0);
273
-
274
- for (let code = startCode; code <= endCode; code++) {
275
- results.push({
276
- num,
277
- isSupp,
278
- suffix: String.fromCharCode(code),
279
- });
280
- }
281
- lastFullRef = { num, isSupp };
282
- } else {
283
- // Pure number range: "1-3"
284
- for (let n = startRef.num; n <= endRef.num; n++) {
285
- results.push({
286
- num: n,
287
- isSupp: startRef.isSupp,
288
- suffix: null,
289
- });
290
- }
291
- lastFullRef = { num: endRef.num, isSupp: startRef.isSupp };
292
- }
293
- } else {
294
- // Single reference or implicit suffix
295
- // Check if it's just a letter (implicit prefix from previous number)
296
- if (/^[a-z]$/i.test(trimmed) && lastFullRef) {
297
- // Implicit prefix: "b" after "1a" means "1b"
298
- results.push({
299
- num: lastFullRef.num,
300
- isSupp: lastFullRef.isSupp,
301
- suffix: trimmed.toLowerCase(),
302
- });
303
- } else {
304
- // Full reference: "1", "1a", "S1", "S1a"
305
- const ref = parseRefNumber(trimmed);
306
- results.push(ref);
307
- lastFullRef = { num: ref.num, isSupp: ref.isSupp };
308
- }
309
- }
310
- }
311
-
312
- return results;
313
- }
314
-
315
- /**
316
- * Build a registry of figure/table labels from .md files
317
- * Scans for {#fig:label} and {#tbl:label} anchors
318
- *
319
- * IMPORTANT: This function requires either explicit sections or a rev.yaml/sections.yaml config.
320
- * It will NOT guess by scanning all .md files, as this leads to incorrect numbering
321
- * when temporary files (paper_clean.md, etc.) exist in the directory.
322
- */
323
- export function buildRegistry(directory: string, sections?: string[]): Registry {
324
- if (typeof directory !== 'string') {
325
- throw new TypeError(`directory must be a string, got ${typeof directory}`);
326
- }
327
-
328
- const figures = new Map<string, FigureInfo>();
329
- const tables = new Map<string, FigureInfo>();
330
- const equations = new Map<string, FigureInfo>();
331
-
332
- // Counters for numbering (separate for main and supplementary)
333
- let figNum = 0;
334
- let figSuppNum = 0;
335
- let tblNum = 0;
336
- let tblSuppNum = 0;
337
- let eqNum = 0;
338
-
339
- let orderedFiles: string[];
340
-
341
- if (Array.isArray(sections) && sections.length > 0) {
342
- // Use explicitly provided section files - most reliable
343
- orderedFiles = sections.filter((f) => fs.existsSync(path.join(directory, f)));
344
- } else {
345
- // Try to determine sections from config files (rev.yaml or sections.yaml)
346
- orderedFiles = discoverSectionFiles(directory);
347
- // If no config found, return empty registry rather than guessing
348
- // This prevents bugs from scanning wrong files
349
- }
350
-
351
- // Determine if a file is supplementary
352
- const isSupplementary = (filename: string): boolean =>
353
- filename.toLowerCase().includes('supp') || filename.toLowerCase().includes('appendix');
354
-
355
- // Process each file in order
356
- for (const file of orderedFiles) {
357
- const filePath = path.join(directory, file);
358
- const content = fs.readFileSync(filePath, 'utf-8');
359
- const isSupp = isSupplementary(file);
360
-
361
- // Find all anchors
362
- let match: RegExpExecArray | null;
363
- ANCHOR_PATTERN.lastIndex = 0;
364
- while ((match = ANCHOR_PATTERN.exec(content)) !== null) {
365
- const typeRaw = match[1];
366
- const labelRaw = match[2];
367
- if (!typeRaw || !labelRaw) continue;
368
-
369
- const type = typeRaw.toLowerCase();
370
- const label = labelRaw;
371
-
372
- if (type === 'fig') {
373
- if (isSupp) {
374
- figSuppNum++;
375
- figures.set(label, { label, num: figSuppNum, isSupp: true, file });
376
- } else {
377
- figNum++;
378
- figures.set(label, { label, num: figNum, isSupp: false, file });
379
- }
380
- } else if (type === 'tbl') {
381
- if (isSupp) {
382
- tblSuppNum++;
383
- tables.set(label, { label, num: tblSuppNum, isSupp: true, file });
384
- } else {
385
- tblNum++;
386
- tables.set(label, { label, num: tblNum, isSupp: false, file });
387
- }
388
- } else if (type === 'eq') {
389
- eqNum++;
390
- equations.set(label, { label, num: eqNum, isSupp: false, file });
391
- }
392
- }
393
- }
394
-
395
- // Build reverse lookup: number → label
396
- const byNumber: Registry['byNumber'] = {
397
- fig: new Map(),
398
- figS: new Map(),
399
- tbl: new Map(),
400
- tblS: new Map(),
401
- eq: new Map(),
402
- };
403
-
404
- for (const [label, info] of figures) {
405
- const key = info.isSupp ? 'figS' : 'fig';
406
- byNumber[key].set(info.num, label);
407
- }
408
- for (const [label, info] of tables) {
409
- const key = info.isSupp ? 'tblS' : 'tbl';
410
- byNumber[key].set(info.num, label);
411
- }
412
- for (const [label, info] of equations) {
413
- byNumber.eq.set(info.num, label);
414
- }
415
-
416
- return { figures, tables, equations, byNumber };
417
- }
418
-
419
- /**
420
- * Get the display string for a label (e.g., "Figure 1", "Table S2")
421
- */
422
- export function labelToDisplay(
423
- type: 'fig' | 'tbl' | 'eq',
424
- label: string,
425
- registry: Registry
426
- ): string | null {
427
- if (!registry || !registry.figures) return null;
428
-
429
- const collection =
430
- type === 'fig' ? registry.figures : type === 'tbl' ? registry.tables : registry.equations;
431
-
432
- const info = collection.get(label);
433
- if (!info) return null;
434
-
435
- const prefix = type === 'fig' ? 'Figure' : type === 'tbl' ? 'Table' : 'Equation';
436
- const numStr = info.isSupp ? `S${info.num}` : `${info.num}`;
437
-
438
- return `${prefix} ${numStr}`;
439
- }
440
-
441
- /**
442
- * Get the label for a display number (e.g., "fig:heatmap" from Figure 1)
443
- */
444
- export function numberToLabel(
445
- type: 'fig' | 'tbl' | 'eq',
446
- num: number,
447
- isSupp: boolean,
448
- registry: Registry
449
- ): string | null {
450
- if (!registry || !registry.byNumber) return null;
451
-
452
- const key = isSupp ? (`${type}S` as keyof Registry['byNumber']) : type;
453
- return registry.byNumber[key]?.get(num) || null;
454
- }
455
-
456
- /**
457
- * Detect all hardcoded references in text
458
- */
459
- export function detectHardcodedRefs(text: string): DetectedRef[] {
460
- if (typeof text !== 'string') {
461
- throw new TypeError(`text must be a string, got ${typeof text}`);
462
- }
463
-
464
- const refs: DetectedRef[] = [];
465
-
466
- for (const [type, pattern] of Object.entries(DETECTION_PATTERNS)) {
467
- pattern.lastIndex = 0;
468
- let match: RegExpExecArray | null;
469
-
470
- while ((match = pattern.exec(text)) !== null) {
471
- // Pattern groups:
472
- // [1] = type prefix (Figure, Fig., etc.)
473
- // [2] = reference list string (e.g., "1, 2, and 3" or "1a-3b")
474
-
475
- const listStr = match[2];
476
- if (!listStr) continue;
477
- const numbers = parseReferenceList(listStr);
478
-
479
- // Skip if no valid numbers were parsed
480
- if (numbers.length === 0) continue;
481
-
482
- refs.push({
483
- type: normalizeType(type) as 'fig' | 'tbl' | 'eq',
484
- match: match[0],
485
- numbers,
486
- position: match.index,
487
- });
488
- }
489
- }
490
-
491
- // Sort by position
492
- refs.sort((a, b) => a.position - b.position);
493
- return refs;
494
- }
495
-
496
- /**
497
- * Convert hardcoded references to @-style references
498
- */
499
- export function convertHardcodedRefs(text: string, registry: Registry): ConversionResult {
500
- // Input validation delegated to detectHardcodedRefs
501
- const refs = detectHardcodedRefs(text);
502
- const conversions: Array<{ from: string; to: string }> = [];
503
- const warnings: string[] = [];
504
-
505
- // Process in reverse order to preserve positions
506
- let result = text;
507
- for (let i = refs.length - 1; i >= 0; i--) {
508
- const ref = refs[i];
509
- if (!ref) continue;
510
-
511
- // Build replacement
512
- const labels: string[] = [];
513
- for (const { num, isSupp } of ref.numbers) {
514
- const label = numberToLabel(ref.type, num, isSupp, registry);
515
- if (label) {
516
- labels.push(`@${ref.type}:${label}`);
517
- } else {
518
- const displayNum = isSupp ? `S${num}` : `${num}`;
519
- warnings.push(`Unknown reference: ${ref.type} ${displayNum} (no matching label)`);
520
- labels.push(ref.match); // Keep original if no match
521
- }
522
- }
523
-
524
- if (labels.length > 0 && !labels.includes(ref.match)) {
525
- const replacement = labels.join('; ');
526
-
527
- // Skip if the @-syntax already appears in the preceding text
528
- // This prevents duplication when import restores @fig:x and then we see "Fig. 1"
529
- // e.g., "@fig:map@fig:map{++@fig:map++}" or "@fig:mapFigure 1" patterns
530
- const textBefore = result.slice(Math.max(0, ref.position - REF_CONTEXT_WINDOW), ref.position);
531
- const alreadyHasRef = labels.some((label) => textBefore.includes(label));
532
- if (alreadyHasRef) {
533
- continue; // Skip - ref already present nearby
534
- }
535
-
536
- result =
537
- result.slice(0, ref.position) + replacement + result.slice(ref.position + ref.match.length);
538
-
539
- conversions.push({
540
- from: ref.match,
541
- to: replacement,
542
- });
543
- }
544
- }
545
-
546
- return { converted: result, conversions, warnings };
547
- }
548
-
549
- /**
550
- * Detect @-style references in text
551
- */
552
- export function detectDynamicRefs(text: string): DynamicRef[] {
553
- if (typeof text !== 'string') {
554
- throw new TypeError(`text must be a string, got ${typeof text}`);
555
- }
556
-
557
- const refs: DynamicRef[] = [];
558
- REF_PATTERN.lastIndex = 0;
559
- let match: RegExpExecArray | null;
560
-
561
- while ((match = REF_PATTERN.exec(text)) !== null) {
562
- const type = match[1];
563
- const label = match[2];
564
- if (!type || !label) continue;
565
- refs.push({
566
- type: type as 'fig' | 'tbl' | 'eq',
567
- label: label,
568
- match: match[0],
569
- position: match.index,
570
- });
571
- }
572
-
573
- return refs;
574
- }
575
-
576
- /**
577
- * Get reference status for a file/text
578
- */
579
- export function getRefStatus(text: string, registry: Registry): RefStatus {
580
- const dynamic = detectDynamicRefs(text);
581
- const hardcoded = detectHardcodedRefs(text) as HardcodedRef[];
582
-
583
- // Count anchors in this text
584
- ANCHOR_PATTERN.lastIndex = 0;
585
- let figCount = 0,
586
- tblCount = 0,
587
- eqCount = 0;
588
- let match: RegExpExecArray | null;
589
- while ((match = ANCHOR_PATTERN.exec(text)) !== null) {
590
- const type = match[1];
591
- if (!type) continue;
592
- if (type === 'fig') figCount++;
593
- else if (type === 'tbl') tblCount++;
594
- else if (type === 'eq') eqCount++;
595
- }
596
-
597
- return {
598
- dynamic,
599
- hardcoded,
600
- anchors: { figures: figCount, tables: tblCount, equations: eqCount },
601
- };
602
- }
603
-
604
- /**
605
- * Detect forward references in combined text
606
- * A forward reference is a @ref that appears before its {#anchor} definition
607
- */
608
- export function detectForwardRefs(text: string): {
609
- forwardRefs: Array<{ type: string; label: string; match: string; position: number }>;
610
- anchorPositions: Map<string, number>;
611
- } {
612
- // Build map of anchor positions: "fig:label" -> position
613
- const anchorPositions = new Map<string, number>();
614
- ANCHOR_PATTERN.lastIndex = 0;
615
- let match: RegExpExecArray | null;
616
- while ((match = ANCHOR_PATTERN.exec(text)) !== null) {
617
- const type = match[1];
618
- const label = match[2];
619
- if (!type || !label) continue;
620
- const key = `${type}:${label}`;
621
- // Only store first occurrence (in case of duplicates)
622
- if (!anchorPositions.has(key)) {
623
- anchorPositions.set(key, match.index);
624
- }
625
- }
626
-
627
- // Find all references
628
- const refs = detectDynamicRefs(text);
629
-
630
- // Filter to only forward references
631
- const forwardRefs = refs.filter((ref) => {
632
- const key = `${ref.type}:${ref.label}`;
633
- const anchorPos = anchorPositions.get(key);
634
- // Forward ref if anchor doesn't exist or appears after the reference
635
- return anchorPos === undefined || ref.position < anchorPos;
636
- });
637
-
638
- return { forwardRefs, anchorPositions };
639
- }
640
-
641
- /**
642
- * Resolve forward references to display format
643
- * Only resolves refs that appear before their anchor definition
644
- * Leaves other refs for pandoc-crossref to handle (preserves clickable links)
645
- */
646
- export function resolveForwardRefs(
647
- text: string,
648
- registry: Registry
649
- ): {
650
- text: string;
651
- resolved: Array<{ from: string; to: string; position: number }>;
652
- unresolved: Array<{ ref: string; position: number }>;
653
- } {
654
- const { forwardRefs } = detectForwardRefs(text);
655
- const resolved: Array<{ from: string; to: string; position: number }> = [];
656
- const unresolved: Array<{ ref: string; position: number }> = [];
657
-
658
- // Process in reverse order to preserve positions
659
- let result = text;
660
- for (let i = forwardRefs.length - 1; i >= 0; i--) {
661
- const ref = forwardRefs[i];
662
- if (!ref) continue;
663
- const display = labelToDisplay(ref.type as 'fig' | 'tbl' | 'eq', ref.label, registry);
664
-
665
- if (display) {
666
- result =
667
- result.slice(0, ref.position) + display + result.slice(ref.position + ref.match.length);
668
- resolved.push({
669
- from: ref.match,
670
- to: display,
671
- position: ref.position,
672
- });
673
- } else {
674
- unresolved.push({
675
- ref: ref.match,
676
- position: ref.position,
677
- });
678
- }
679
- }
680
-
681
- return { text: result, resolved, unresolved };
682
- }
683
-
684
- /**
685
- * Resolve ALL supplementary references and strip supplementary anchor labels.
686
- *
687
- * pandoc-crossref cannot produce "Figure S1" numbering — it numbers all figures
688
- * sequentially. This function resolves every @fig:label / @tbl:label that points
689
- * to a supplementary item to plain text ("Figure S1", "Table S1") and removes
690
- * the {#fig:label} / {#tbl:label} attributes so pandoc-crossref ignores them.
691
- */
692
- export function resolveSupplementaryRefs(
693
- text: string,
694
- registry: Registry
695
- ): {
696
- text: string;
697
- resolved: Array<{ from: string; to: string }>;
698
- } {
699
- const resolved: Array<{ from: string; to: string }> = [];
700
- let result = text;
701
-
702
- // Collect supplementary labels
703
- const suppLabels = new Set<string>();
704
- for (const [label, info] of registry.figures) {
705
- if (info.isSupp) suppLabels.add(`fig:${label}`);
706
- }
707
- for (const [label, info] of registry.tables) {
708
- if (info.isSupp) suppLabels.add(`tbl:${label}`);
709
- }
710
-
711
- if (suppLabels.size === 0) return { text: result, resolved };
712
-
713
- // 1. Replace all @fig:label / @tbl:label references to supplementary items
714
- const refs = detectDynamicRefs(result);
715
- // Process in reverse to preserve positions
716
- for (let i = refs.length - 1; i >= 0; i--) {
717
- const ref = refs[i];
718
- if (!ref) continue;
719
- const key = `${ref.type}:${ref.label}`;
720
- if (!suppLabels.has(key)) continue;
721
-
722
- const display = labelToDisplay(ref.type as 'fig' | 'tbl' | 'eq', ref.label, registry);
723
- if (display) {
724
- result =
725
- result.slice(0, ref.position) + display + result.slice(ref.position + ref.match.length);
726
- resolved.push({ from: ref.match, to: display });
727
- }
728
- }
729
-
730
- // 2. Strip {#fig:label} and {#tbl:label} attributes from supplementary anchors
731
- // so pandoc-crossref does not re-number them
732
- for (const key of suppLabels) {
733
- // Match {#fig:label ...} or just {#fig:label}
734
- const escaped = key.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
735
- const pattern = new RegExp(`\\{#${escaped}(?:\\s[^}]*)?\\}`, 'g');
736
- result = result.replace(pattern, (match) => {
737
- resolved.push({ from: match, to: '(stripped)' });
738
- return '';
739
- });
740
- }
741
-
742
- return { text: result, resolved };
743
- }
744
-
745
- /**
746
- * Format registry for display
747
- */
748
- export function formatRegistry(registry: Registry): string {
749
- const lines: string[] = [];
750
-
751
- if (registry.figures.size > 0) {
752
- lines.push('Figures:');
753
- for (const [label, info] of registry.figures) {
754
- const num = info.isSupp ? `S${info.num}` : info.num;
755
- lines.push(` Figure ${num}: @fig:${label} (${info.file})`);
756
- }
757
- }
758
-
759
- if (registry.tables.size > 0) {
760
- if (lines.length > 0) lines.push('');
761
- lines.push('Tables:');
762
- for (const [label, info] of registry.tables) {
763
- const num = info.isSupp ? `S${info.num}` : info.num;
764
- lines.push(` Table ${num}: @tbl:${label} (${info.file})`);
765
- }
766
- }
767
-
768
- if (registry.equations.size > 0) {
769
- if (lines.length > 0) lines.push('');
770
- lines.push('Equations:');
771
- for (const [label, info] of registry.equations) {
772
- lines.push(` Equation ${info.num}: @eq:${label} (${info.file})`);
773
- }
774
- }
775
-
776
- if (lines.length === 0) {
777
- lines.push('No figure/table anchors found.');
778
- }
779
-
780
- return lines.join('\n');
781
- }
1
+ /**
2
+ * Cross-reference handling - dynamic figure/table references
3
+ *
4
+ * Enables:
5
+ * - @fig:label syntax in source (auto-numbered)
6
+ * - Conversion to "Figure 1" in Word output
7
+ * - Auto-conversion back during import
8
+ */
9
+
10
+ import * as fs from 'fs';
11
+ import * as path from 'path';
12
+ import YAML from 'yaml';
13
+ import type {
14
+ RefNumber,
15
+ HardcodedRef,
16
+ DynamicRef,
17
+ FigureInfo,
18
+ Registry,
19
+ RefStatus,
20
+ ConversionResult,
21
+ } from './types.js';
22
+
23
+ // =============================================================================
24
+ // Constants
25
+ // =============================================================================
26
+
27
+ /** Characters of context to check before a reference for deduplication */
28
+ const REF_CONTEXT_WINDOW = 100;
29
+
30
+ /** Minimum word length for similarity calculations */
31
+ const MIN_WORD_LENGTH = 2;
32
+
33
+ // =============================================================================
34
+ // Type Definitions (Internal)
35
+ // =============================================================================
36
+
37
+ /**
38
+ * Reference info (internal use in registry building)
39
+ */
40
+ interface RefInfo {
41
+ label: string;
42
+ num: number;
43
+ isSupp: boolean;
44
+ file: string;
45
+ }
46
+
47
+ /**
48
+ * Parsed reference number components
49
+ */
50
+ interface ParsedRefNumber {
51
+ isSupp: boolean;
52
+ num: number;
53
+ suffix: string | null;
54
+ }
55
+
56
+ /**
57
+ * Detected reference with parsed numbers
58
+ */
59
+ interface DetectedRef {
60
+ type: 'fig' | 'tbl' | 'eq';
61
+ match: string;
62
+ numbers: ParsedRefNumber[];
63
+ position: number;
64
+ }
65
+
66
+ // =============================================================================
67
+ // Internal Helpers
68
+ // =============================================================================
69
+
70
+ /**
71
+ * Discover section files from a directory by reading config files
72
+ * Only returns files explicitly defined in rev.yaml or sections.yaml
73
+ * Returns empty array if no config found (caller should handle this)
74
+ */
75
+ function discoverSectionFiles(directory: string): string[] {
76
+ // Try rev.yaml first
77
+ const revYamlPath = path.join(directory, 'rev.yaml');
78
+ if (fs.existsSync(revYamlPath)) {
79
+ try {
80
+ const config = YAML.parse(fs.readFileSync(revYamlPath, 'utf-8'));
81
+ if (config.sections && Array.isArray(config.sections) && config.sections.length > 0) {
82
+ return config.sections.filter((f: string) => fs.existsSync(path.join(directory, f)));
83
+ }
84
+ } catch (e) {
85
+ if (process.env.DEBUG) {
86
+ console.warn('crossref: YAML parse error in rev.yaml:', (e as Error).message);
87
+ }
88
+ }
89
+ }
90
+
91
+ // Try sections.yaml
92
+ const sectionsPath = path.join(directory, 'sections.yaml');
93
+ if (fs.existsSync(sectionsPath)) {
94
+ try {
95
+ const config = YAML.parse(fs.readFileSync(sectionsPath, 'utf-8'));
96
+ if (config.sections) {
97
+ const sectionOrder = Object.entries(config.sections)
98
+ .sort((a, b) => ((a[1] as any).order ?? 999) - ((b[1] as any).order ?? 999))
99
+ .map(([file]) => file);
100
+ return sectionOrder.filter((f) => fs.existsSync(path.join(directory, f)));
101
+ }
102
+ } catch (e) {
103
+ if (process.env.DEBUG) {
104
+ console.warn('crossref: YAML parse error in sections.yaml:', (e as Error).message);
105
+ }
106
+ }
107
+ }
108
+
109
+ // No config found - return empty array
110
+ // Caller must handle this (either error or use explicit sections)
111
+ return [];
112
+ }
113
+
114
+ // =============================================================================
115
+ // Detection Patterns
116
+ // =============================================================================
117
+
118
+ /**
119
+ * Patterns for detecting hardcoded references
120
+ * Matches complex patterns including:
121
+ * - Simple: "Figure 1", "Fig. 2a", "Table S1"
122
+ * - Ranges: "Figures 1-3", "Fig. 1a-c", "Figs. 1a-3b"
123
+ * - Lists: "Figures 1, 2, and 3", "Fig. 1a, b, c", "Tables 1 & 2"
124
+ * - Mixed: "Figs. 1, 3-5, and 7"
125
+ *
126
+ * Uses a simpler base pattern and parses the full match for lists
127
+ */
128
+ const DETECTION_PATTERNS: Record<string, RegExp> = {
129
+ // Captures the full reference including lists with "and"
130
+ // Group 1: type prefix (Figure, Fig., etc.)
131
+ // Group 2: reference list (parsed by parseReferenceList())
132
+ // Matches: "1", "1a", "1-3", "1a-c", "1, 2, 3", "1 and 2", "1, 2 and 3", "1, 2, and 3"
133
+ // Separator: comma/dash/ampersand, optionally followed by "and"
134
+ // Standalone letters must be followed by separator, punctuation, or word boundary
135
+ // Also handles: "see Figure 1", "(Fig. 1)", "in Figures 1–3"
136
+ // Note: 'gi' flag makes these case-insensitive, so "figure 1" is also matched
137
+ figure: /\b(Figures?|Figs?\.?)\s+((?:\d+|S\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+|S\d+)[a-z]?|[a-z]\b))*)/gi,
138
+
139
+ table: /\b(Tables?|Tabs?\.?)\s+((?:\d+|S\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+|S\d+)[a-z]?|[a-z]\b))*)/gi,
140
+
141
+ equation: /\b(Equations?|Eqs?\.?)\s+((?:\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+)[a-z]?|[a-z]\b))*)/gi,
142
+ };
143
+
144
+ /**
145
+ * Patterns to EXCLUDE from detection (false positives)
146
+ * These look like references but aren't (e.g., "Table of Contents", "Figure skating")
147
+ */
148
+ const EXCLUSION_PATTERNS = [
149
+ /\bTable\s+of\s+Contents?\b/gi,
150
+ /\bFigure\s+skating\b/gi,
151
+ /\bFigure\s+out\b/gi,
152
+ /\bFigure\s+it\b/gi,
153
+ /\bTable\s+setting/gi,
154
+ /\bEquation\s+editor\b/gi,
155
+ ];
156
+
157
+ /**
158
+ * Pattern for extracting anchors from markdown: {#fig:label}, {#tbl:label}
159
+ */
160
+ const ANCHOR_PATTERN = /\{#(fig|tbl|eq):([a-zA-Z0-9_-]+)/gi;
161
+
162
+ /**
163
+ * Pattern for @-style references: @fig:label, @tbl:label
164
+ */
165
+ const REF_PATTERN = /@(fig|tbl|eq):([a-zA-Z0-9_-]+)/gi;
166
+
167
+ // =============================================================================
168
+ // Public API
169
+ // =============================================================================
170
+
171
+ /**
172
+ * Normalize a reference type to standard form
173
+ */
174
+ export function normalizeType(typeStr: string): 'fig' | 'tbl' | 'eq' | string {
175
+ if (typeof typeStr !== 'string') {
176
+ throw new TypeError(`typeStr must be a string, got ${typeof typeStr}`);
177
+ }
178
+ const lower = typeStr.toLowerCase().replace(/\.$/, '');
179
+ if (lower.startsWith('fig')) return 'fig';
180
+ if (lower.startsWith('tab')) return 'tbl';
181
+ if (lower.startsWith('eq')) return 'eq';
182
+ return lower;
183
+ }
184
+
185
+ /**
186
+ * Parse a reference number, handling supplementary (S1, S2) and letter suffixes (1a, 1b)
187
+ */
188
+ export function parseRefNumber(numStr: string, suffix: string | null = null): ParsedRefNumber {
189
+ if (!numStr || typeof numStr !== 'string') {
190
+ return { isSupp: false, num: 0, suffix: suffix || null };
191
+ }
192
+ const isSupp = numStr.toUpperCase().startsWith('S');
193
+ const numPart = isSupp ? numStr.slice(1) : numStr;
194
+ // Extract suffix if embedded in numStr (e.g., "1a")
195
+ const match = numPart.match(/^(\d+)([a-z])?$/i);
196
+ const num = match && match[1] ? parseInt(match[1], 10) : parseInt(numPart, 10);
197
+ const extractedSuffix = suffix || (match && match[2]) || null;
198
+ return { isSupp, num, suffix: extractedSuffix ? extractedSuffix.toLowerCase() : null };
199
+ }
200
+
201
+ /**
202
+ * Parse a reference list string like "1, 2, and 3" or "1a-c" or "1a-3b"
203
+ * Returns an array of {num, isSupp, suffix} objects
204
+ */
205
+ export function parseReferenceList(listStr: string): ParsedRefNumber[] {
206
+ const results: ParsedRefNumber[] = [];
207
+ if (!listStr || typeof listStr !== 'string') return results;
208
+
209
+ // Normalize: replace "and" with comma, normalize dashes
210
+ let normalized = listStr
211
+ .replace(/\s+and\s+/gi, ', ')
212
+ .replace(/[–—]/g, '-') // en-dash, em-dash → hyphen
213
+ .replace(/&/g, ', '); // & → comma
214
+
215
+ // Split by comma (but not by dash, which indicates ranges)
216
+ const parts = normalized.split(/\s*,\s*/).filter((p) => p.trim());
217
+
218
+ let lastFullRef: { num: number; isSupp: boolean } | null = null; // Track the last full reference for implicit prefixes
219
+
220
+ for (const part of parts) {
221
+ const trimmed = part.trim();
222
+ if (!trimmed) continue;
223
+
224
+ // Check if this is a range (contains -)
225
+ if (trimmed.includes('-')) {
226
+ const parts = trimmed.split('-').map((s) => s.trim());
227
+ const start = parts[0] || '';
228
+ const end = parts[1] || '';
229
+
230
+ // Check if end is just a letter (e.g., "1a-c" where end is "c")
231
+ const endIsLetterOnly = /^[a-z]$/i.test(end);
232
+
233
+ const startRef = parseRefNumber(start);
234
+ // For letter-only end, don't parse as number
235
+ const endRef = endIsLetterOnly
236
+ ? { num: startRef.num, isSupp: startRef.isSupp, suffix: end.toLowerCase() }
237
+ : parseRefNumber(end);
238
+
239
+ // Handle different range types:
240
+ // 1. Suffix-only range on same number: "1a-c" → 1a, 1b, 1c
241
+ // 2. Number range: "1-3" → 1, 2, 3
242
+ // 3. Cross-number suffix range: "1a-3b" → 1a...1z, 2a...2z, 3a, 3b (limited)
243
+
244
+ if (startRef.suffix && endRef.suffix && startRef.num !== endRef.num) {
245
+ // Cross-number suffix range: "1a-3b"
246
+ // For academic papers, limit intermediate figures to same suffix range
247
+ // e.g., "1a-3b" typically means 1a, 1b, 2a, 2b, 3a, 3b
248
+ const maxSuffix = Math.max(
249
+ startRef.suffix.charCodeAt(0),
250
+ endRef.suffix.charCodeAt(0)
251
+ );
252
+
253
+ for (let n = startRef.num; n <= endRef.num; n++) {
254
+ const suffixStart =
255
+ n === startRef.num ? startRef.suffix.charCodeAt(0) : 'a'.charCodeAt(0);
256
+ const suffixEnd = n === endRef.num ? endRef.suffix.charCodeAt(0) : maxSuffix;
257
+
258
+ for (let s = suffixStart; s <= suffixEnd; s++) {
259
+ results.push({
260
+ num: n,
261
+ isSupp: startRef.isSupp,
262
+ suffix: String.fromCharCode(s),
263
+ });
264
+ }
265
+ }
266
+ lastFullRef = { num: endRef.num, isSupp: startRef.isSupp };
267
+ } else if (startRef.suffix || endRef.suffix) {
268
+ // Suffix range on same number: "1a-c"
269
+ const num: number = startRef.num !== 0 ? startRef.num : (lastFullRef ? lastFullRef.num : 1);
270
+ const isSupp: boolean = startRef.isSupp ? startRef.isSupp : (lastFullRef ? lastFullRef.isSupp : false);
271
+ const startCode = (startRef.suffix || 'a').charCodeAt(0);
272
+ const endCode = (endRef.suffix || 'a').charCodeAt(0);
273
+
274
+ for (let code = startCode; code <= endCode; code++) {
275
+ results.push({
276
+ num,
277
+ isSupp,
278
+ suffix: String.fromCharCode(code),
279
+ });
280
+ }
281
+ lastFullRef = { num, isSupp };
282
+ } else {
283
+ // Pure number range: "1-3"
284
+ for (let n = startRef.num; n <= endRef.num; n++) {
285
+ results.push({
286
+ num: n,
287
+ isSupp: startRef.isSupp,
288
+ suffix: null,
289
+ });
290
+ }
291
+ lastFullRef = { num: endRef.num, isSupp: startRef.isSupp };
292
+ }
293
+ } else {
294
+ // Single reference or implicit suffix
295
+ // Check if it's just a letter (implicit prefix from previous number)
296
+ if (/^[a-z]$/i.test(trimmed) && lastFullRef) {
297
+ // Implicit prefix: "b" after "1a" means "1b"
298
+ results.push({
299
+ num: lastFullRef.num,
300
+ isSupp: lastFullRef.isSupp,
301
+ suffix: trimmed.toLowerCase(),
302
+ });
303
+ } else {
304
+ // Full reference: "1", "1a", "S1", "S1a"
305
+ const ref = parseRefNumber(trimmed);
306
+ results.push(ref);
307
+ lastFullRef = { num: ref.num, isSupp: ref.isSupp };
308
+ }
309
+ }
310
+ }
311
+
312
+ return results;
313
+ }
314
+
315
+ /**
316
+ * Build a registry of figure/table labels from .md files
317
+ * Scans for {#fig:label} and {#tbl:label} anchors
318
+ *
319
+ * IMPORTANT: This function requires either explicit sections or a rev.yaml/sections.yaml config.
320
+ * It will NOT guess by scanning all .md files, as this leads to incorrect numbering
321
+ * when temporary files (paper_clean.md, etc.) exist in the directory.
322
+ */
323
+ export function buildRegistry(directory: string, sections?: string[]): Registry {
324
+ if (typeof directory !== 'string') {
325
+ throw new TypeError(`directory must be a string, got ${typeof directory}`);
326
+ }
327
+
328
+ const figures = new Map<string, FigureInfo>();
329
+ const tables = new Map<string, FigureInfo>();
330
+ const equations = new Map<string, FigureInfo>();
331
+
332
+ // Counters for numbering (separate for main and supplementary)
333
+ let figNum = 0;
334
+ let figSuppNum = 0;
335
+ let tblNum = 0;
336
+ let tblSuppNum = 0;
337
+ let eqNum = 0;
338
+
339
+ let orderedFiles: string[];
340
+
341
+ if (Array.isArray(sections) && sections.length > 0) {
342
+ // Use explicitly provided section files - most reliable
343
+ orderedFiles = sections.filter((f) => fs.existsSync(path.join(directory, f)));
344
+ } else {
345
+ // Try to determine sections from config files (rev.yaml or sections.yaml)
346
+ orderedFiles = discoverSectionFiles(directory);
347
+ // If no config found, return empty registry rather than guessing
348
+ // This prevents bugs from scanning wrong files
349
+ }
350
+
351
+ // Determine if a file is supplementary
352
+ const isSupplementary = (filename: string): boolean =>
353
+ filename.toLowerCase().includes('supp') || filename.toLowerCase().includes('appendix');
354
+
355
+ // Process each file in order
356
+ for (const file of orderedFiles) {
357
+ const filePath = path.join(directory, file);
358
+ const content = fs.readFileSync(filePath, 'utf-8');
359
+ const isSupp = isSupplementary(file);
360
+
361
+ // Find all anchors
362
+ let match: RegExpExecArray | null;
363
+ ANCHOR_PATTERN.lastIndex = 0;
364
+ while ((match = ANCHOR_PATTERN.exec(content)) !== null) {
365
+ const typeRaw = match[1];
366
+ const labelRaw = match[2];
367
+ if (!typeRaw || !labelRaw) continue;
368
+
369
+ const type = typeRaw.toLowerCase();
370
+ const label = labelRaw;
371
+
372
+ if (type === 'fig') {
373
+ if (isSupp) {
374
+ figSuppNum++;
375
+ figures.set(label, { label, num: figSuppNum, isSupp: true, file });
376
+ } else {
377
+ figNum++;
378
+ figures.set(label, { label, num: figNum, isSupp: false, file });
379
+ }
380
+ } else if (type === 'tbl') {
381
+ if (isSupp) {
382
+ tblSuppNum++;
383
+ tables.set(label, { label, num: tblSuppNum, isSupp: true, file });
384
+ } else {
385
+ tblNum++;
386
+ tables.set(label, { label, num: tblNum, isSupp: false, file });
387
+ }
388
+ } else if (type === 'eq') {
389
+ eqNum++;
390
+ equations.set(label, { label, num: eqNum, isSupp: false, file });
391
+ }
392
+ }
393
+ }
394
+
395
+ // Build reverse lookup: number → label
396
+ const byNumber: Registry['byNumber'] = {
397
+ fig: new Map(),
398
+ figS: new Map(),
399
+ tbl: new Map(),
400
+ tblS: new Map(),
401
+ eq: new Map(),
402
+ };
403
+
404
+ for (const [label, info] of figures) {
405
+ const key = info.isSupp ? 'figS' : 'fig';
406
+ byNumber[key].set(info.num, label);
407
+ }
408
+ for (const [label, info] of tables) {
409
+ const key = info.isSupp ? 'tblS' : 'tbl';
410
+ byNumber[key].set(info.num, label);
411
+ }
412
+ for (const [label, info] of equations) {
413
+ byNumber.eq.set(info.num, label);
414
+ }
415
+
416
+ return { figures, tables, equations, byNumber };
417
+ }
418
+
419
+ /**
420
+ * Get the display string for a label (e.g., "Figure 1", "Table S2")
421
+ */
422
+ export function labelToDisplay(
423
+ type: 'fig' | 'tbl' | 'eq',
424
+ label: string,
425
+ registry: Registry
426
+ ): string | null {
427
+ if (!registry || !registry.figures) return null;
428
+
429
+ const collection =
430
+ type === 'fig' ? registry.figures : type === 'tbl' ? registry.tables : registry.equations;
431
+
432
+ const info = collection.get(label);
433
+ if (!info) return null;
434
+
435
+ const prefix = type === 'fig' ? 'Figure' : type === 'tbl' ? 'Table' : 'Equation';
436
+ const numStr = info.isSupp ? `S${info.num}` : `${info.num}`;
437
+
438
+ return `${prefix} ${numStr}`;
439
+ }
440
+
441
+ /**
442
+ * Get the label for a display number (e.g., "fig:heatmap" from Figure 1)
443
+ */
444
+ export function numberToLabel(
445
+ type: 'fig' | 'tbl' | 'eq',
446
+ num: number,
447
+ isSupp: boolean,
448
+ registry: Registry
449
+ ): string | null {
450
+ if (!registry || !registry.byNumber) return null;
451
+
452
+ const key = isSupp ? (`${type}S` as keyof Registry['byNumber']) : type;
453
+ return registry.byNumber[key]?.get(num) || null;
454
+ }
455
+
456
+ /**
457
+ * Detect all hardcoded references in text
458
+ */
459
+ export function detectHardcodedRefs(text: string): DetectedRef[] {
460
+ if (typeof text !== 'string') {
461
+ throw new TypeError(`text must be a string, got ${typeof text}`);
462
+ }
463
+
464
+ const refs: DetectedRef[] = [];
465
+
466
+ for (const [type, pattern] of Object.entries(DETECTION_PATTERNS)) {
467
+ pattern.lastIndex = 0;
468
+ let match: RegExpExecArray | null;
469
+
470
+ while ((match = pattern.exec(text)) !== null) {
471
+ // Pattern groups:
472
+ // [1] = type prefix (Figure, Fig., etc.)
473
+ // [2] = reference list string (e.g., "1, 2, and 3" or "1a-3b")
474
+
475
+ const listStr = match[2];
476
+ if (!listStr) continue;
477
+ const numbers = parseReferenceList(listStr);
478
+
479
+ // Skip if no valid numbers were parsed
480
+ if (numbers.length === 0) continue;
481
+
482
+ refs.push({
483
+ type: normalizeType(type) as 'fig' | 'tbl' | 'eq',
484
+ match: match[0],
485
+ numbers,
486
+ position: match.index,
487
+ });
488
+ }
489
+ }
490
+
491
+ // Sort by position
492
+ refs.sort((a, b) => a.position - b.position);
493
+ return refs;
494
+ }
495
+
496
+ /**
497
+ * Convert hardcoded references to @-style references
498
+ */
499
+ export function convertHardcodedRefs(text: string, registry: Registry): ConversionResult {
500
+ // Input validation delegated to detectHardcodedRefs
501
+ const refs = detectHardcodedRefs(text);
502
+ const conversions: Array<{ from: string; to: string }> = [];
503
+ const warnings: string[] = [];
504
+
505
+ // Process in reverse order to preserve positions
506
+ let result = text;
507
+ for (let i = refs.length - 1; i >= 0; i--) {
508
+ const ref = refs[i];
509
+ if (!ref) continue;
510
+
511
+ // Build replacement
512
+ const labels: string[] = [];
513
+ for (const { num, isSupp } of ref.numbers) {
514
+ const label = numberToLabel(ref.type, num, isSupp, registry);
515
+ if (label) {
516
+ labels.push(`@${ref.type}:${label}`);
517
+ } else {
518
+ const displayNum = isSupp ? `S${num}` : `${num}`;
519
+ warnings.push(`Unknown reference: ${ref.type} ${displayNum} (no matching label)`);
520
+ labels.push(ref.match); // Keep original if no match
521
+ }
522
+ }
523
+
524
+ if (labels.length > 0 && !labels.includes(ref.match)) {
525
+ const replacement = labels.join('; ');
526
+
527
+ // Skip if the @-syntax already appears in the preceding text
528
+ // This prevents duplication when import restores @fig:x and then we see "Fig. 1"
529
+ // e.g., "@fig:map@fig:map{++@fig:map++}" or "@fig:mapFigure 1" patterns
530
+ const textBefore = result.slice(Math.max(0, ref.position - REF_CONTEXT_WINDOW), ref.position);
531
+ const alreadyHasRef = labels.some((label) => textBefore.includes(label));
532
+ if (alreadyHasRef) {
533
+ continue; // Skip - ref already present nearby
534
+ }
535
+
536
+ result =
537
+ result.slice(0, ref.position) + replacement + result.slice(ref.position + ref.match.length);
538
+
539
+ conversions.push({
540
+ from: ref.match,
541
+ to: replacement,
542
+ });
543
+ }
544
+ }
545
+
546
+ return { converted: result, conversions, warnings };
547
+ }
548
+
549
+ /**
550
+ * Detect @-style references in text
551
+ */
552
+ export function detectDynamicRefs(text: string): DynamicRef[] {
553
+ if (typeof text !== 'string') {
554
+ throw new TypeError(`text must be a string, got ${typeof text}`);
555
+ }
556
+
557
+ const refs: DynamicRef[] = [];
558
+ REF_PATTERN.lastIndex = 0;
559
+ let match: RegExpExecArray | null;
560
+
561
+ while ((match = REF_PATTERN.exec(text)) !== null) {
562
+ const type = match[1];
563
+ const label = match[2];
564
+ if (!type || !label) continue;
565
+ refs.push({
566
+ type: type as 'fig' | 'tbl' | 'eq',
567
+ label: label,
568
+ match: match[0],
569
+ position: match.index,
570
+ });
571
+ }
572
+
573
+ return refs;
574
+ }
575
+
576
+ /**
577
+ * Get reference status for a file/text
578
+ */
579
+ export function getRefStatus(text: string, registry: Registry): RefStatus {
580
+ const dynamic = detectDynamicRefs(text);
581
+ const hardcoded = detectHardcodedRefs(text) as HardcodedRef[];
582
+
583
+ // Count anchors in this text
584
+ ANCHOR_PATTERN.lastIndex = 0;
585
+ let figCount = 0,
586
+ tblCount = 0,
587
+ eqCount = 0;
588
+ let match: RegExpExecArray | null;
589
+ while ((match = ANCHOR_PATTERN.exec(text)) !== null) {
590
+ const type = match[1];
591
+ if (!type) continue;
592
+ if (type === 'fig') figCount++;
593
+ else if (type === 'tbl') tblCount++;
594
+ else if (type === 'eq') eqCount++;
595
+ }
596
+
597
+ return {
598
+ dynamic,
599
+ hardcoded,
600
+ anchors: { figures: figCount, tables: tblCount, equations: eqCount },
601
+ };
602
+ }
603
+
604
+ /**
605
+ * Detect forward references in combined text
606
+ * A forward reference is a @ref that appears before its {#anchor} definition
607
+ */
608
+ export function detectForwardRefs(text: string): {
609
+ forwardRefs: Array<{ type: string; label: string; match: string; position: number }>;
610
+ anchorPositions: Map<string, number>;
611
+ } {
612
+ // Build map of anchor positions: "fig:label" -> position
613
+ const anchorPositions = new Map<string, number>();
614
+ ANCHOR_PATTERN.lastIndex = 0;
615
+ let match: RegExpExecArray | null;
616
+ while ((match = ANCHOR_PATTERN.exec(text)) !== null) {
617
+ const type = match[1];
618
+ const label = match[2];
619
+ if (!type || !label) continue;
620
+ const key = `${type}:${label}`;
621
+ // Only store first occurrence (in case of duplicates)
622
+ if (!anchorPositions.has(key)) {
623
+ anchorPositions.set(key, match.index);
624
+ }
625
+ }
626
+
627
+ // Find all references
628
+ const refs = detectDynamicRefs(text);
629
+
630
+ // Filter to only forward references
631
+ const forwardRefs = refs.filter((ref) => {
632
+ const key = `${ref.type}:${ref.label}`;
633
+ const anchorPos = anchorPositions.get(key);
634
+ // Forward ref if anchor doesn't exist or appears after the reference
635
+ return anchorPos === undefined || ref.position < anchorPos;
636
+ });
637
+
638
+ return { forwardRefs, anchorPositions };
639
+ }
640
+
641
+ /**
642
+ * Resolve forward references to display format
643
+ * Only resolves refs that appear before their anchor definition
644
+ * Leaves other refs for pandoc-crossref to handle (preserves clickable links)
645
+ */
646
+ export function resolveForwardRefs(
647
+ text: string,
648
+ registry: Registry
649
+ ): {
650
+ text: string;
651
+ resolved: Array<{ from: string; to: string; position: number }>;
652
+ unresolved: Array<{ ref: string; position: number }>;
653
+ } {
654
+ const { forwardRefs } = detectForwardRefs(text);
655
+ const resolved: Array<{ from: string; to: string; position: number }> = [];
656
+ const unresolved: Array<{ ref: string; position: number }> = [];
657
+
658
+ // Process in reverse order to preserve positions
659
+ let result = text;
660
+ for (let i = forwardRefs.length - 1; i >= 0; i--) {
661
+ const ref = forwardRefs[i];
662
+ if (!ref) continue;
663
+ const display = labelToDisplay(ref.type as 'fig' | 'tbl' | 'eq', ref.label, registry);
664
+
665
+ if (display) {
666
+ result =
667
+ result.slice(0, ref.position) + display + result.slice(ref.position + ref.match.length);
668
+ resolved.push({
669
+ from: ref.match,
670
+ to: display,
671
+ position: ref.position,
672
+ });
673
+ } else {
674
+ unresolved.push({
675
+ ref: ref.match,
676
+ position: ref.position,
677
+ });
678
+ }
679
+ }
680
+
681
+ return { text: result, resolved, unresolved };
682
+ }
683
+
684
+ /**
685
+ * Resolve ALL supplementary references and strip supplementary anchor labels.
686
+ *
687
+ * pandoc-crossref cannot produce "Figure S1" numbering — it numbers all figures
688
+ * sequentially. This function resolves every @fig:label / @tbl:label that points
689
+ * to a supplementary item to plain text ("Figure S1", "Table S1") and removes
690
+ * the {#fig:label} / {#tbl:label} attributes so pandoc-crossref ignores them.
691
+ */
692
+ export function resolveSupplementaryRefs(
693
+ text: string,
694
+ registry: Registry
695
+ ): {
696
+ text: string;
697
+ resolved: Array<{ from: string; to: string }>;
698
+ } {
699
+ const resolved: Array<{ from: string; to: string }> = [];
700
+ let result = text;
701
+
702
+ // Collect supplementary labels
703
+ const suppLabels = new Set<string>();
704
+ for (const [label, info] of registry.figures) {
705
+ if (info.isSupp) suppLabels.add(`fig:${label}`);
706
+ }
707
+ for (const [label, info] of registry.tables) {
708
+ if (info.isSupp) suppLabels.add(`tbl:${label}`);
709
+ }
710
+
711
+ if (suppLabels.size === 0) return { text: result, resolved };
712
+
713
+ // 1. Replace all @fig:label / @tbl:label references to supplementary items
714
+ const refs = detectDynamicRefs(result);
715
+ // Process in reverse to preserve positions
716
+ for (let i = refs.length - 1; i >= 0; i--) {
717
+ const ref = refs[i];
718
+ if (!ref) continue;
719
+ const key = `${ref.type}:${ref.label}`;
720
+ if (!suppLabels.has(key)) continue;
721
+
722
+ const display = labelToDisplay(ref.type as 'fig' | 'tbl' | 'eq', ref.label, registry);
723
+ if (display) {
724
+ result =
725
+ result.slice(0, ref.position) + display + result.slice(ref.position + ref.match.length);
726
+ resolved.push({ from: ref.match, to: display });
727
+ }
728
+ }
729
+
730
+ // 2. Strip {#fig:label} and {#tbl:label} attributes from supplementary anchors
731
+ // so pandoc-crossref does not re-number them
732
+ for (const key of suppLabels) {
733
+ // Match {#fig:label ...} or just {#fig:label}
734
+ const escaped = key.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
735
+ const pattern = new RegExp(`\\{#${escaped}(?:\\s[^}]*)?\\}`, 'g');
736
+ result = result.replace(pattern, (match) => {
737
+ resolved.push({ from: match, to: '(stripped)' });
738
+ return '';
739
+ });
740
+ }
741
+
742
+ return { text: result, resolved };
743
+ }
744
+
745
+ /**
746
+ * Format registry for display
747
+ */
748
+ export function formatRegistry(registry: Registry): string {
749
+ const lines: string[] = [];
750
+
751
+ if (registry.figures.size > 0) {
752
+ lines.push('Figures:');
753
+ for (const [label, info] of registry.figures) {
754
+ const num = info.isSupp ? `S${info.num}` : info.num;
755
+ lines.push(` Figure ${num}: @fig:${label} (${info.file})`);
756
+ }
757
+ }
758
+
759
+ if (registry.tables.size > 0) {
760
+ if (lines.length > 0) lines.push('');
761
+ lines.push('Tables:');
762
+ for (const [label, info] of registry.tables) {
763
+ const num = info.isSupp ? `S${info.num}` : info.num;
764
+ lines.push(` Table ${num}: @tbl:${label} (${info.file})`);
765
+ }
766
+ }
767
+
768
+ if (registry.equations.size > 0) {
769
+ if (lines.length > 0) lines.push('');
770
+ lines.push('Equations:');
771
+ for (const [label, info] of registry.equations) {
772
+ lines.push(` Equation ${info.num}: @eq:${label} (${info.file})`);
773
+ }
774
+ }
775
+
776
+ if (lines.length === 0) {
777
+ lines.push('No figure/table anchors found.');
778
+ }
779
+
780
+ return lines.join('\n');
781
+ }