docrev 0.9.18 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/.gitattributes +1 -1
  2. package/CHANGELOG.md +173 -149
  3. package/PLAN-tables-and-postprocess.md +850 -850
  4. package/README.md +431 -406
  5. package/bin/rev.js +11 -11
  6. package/bin/rev.ts +145 -145
  7. package/completions/rev.bash +127 -127
  8. package/completions/rev.ps1 +210 -210
  9. package/completions/rev.zsh +207 -207
  10. package/dist/lib/build.d.ts +8 -0
  11. package/dist/lib/build.d.ts.map +1 -1
  12. package/dist/lib/build.js +62 -6
  13. package/dist/lib/build.js.map +1 -1
  14. package/dist/lib/commands/context.d.ts +1 -1
  15. package/dist/lib/commands/context.d.ts.map +1 -1
  16. package/dist/lib/commands/context.js +1 -1
  17. package/dist/lib/commands/context.js.map +1 -1
  18. package/dist/lib/commands/sections.js +7 -7
  19. package/dist/lib/commands/sections.js.map +1 -1
  20. package/dist/lib/commands/sync.d.ts.map +1 -1
  21. package/dist/lib/commands/sync.js +15 -14
  22. package/dist/lib/commands/sync.js.map +1 -1
  23. package/dist/lib/commands/utilities.js +164 -164
  24. package/dist/lib/commands/verify-anchors.js +6 -6
  25. package/dist/lib/commands/verify-anchors.js.map +1 -1
  26. package/dist/lib/commands/word-tools.js +8 -8
  27. package/dist/lib/grammar.js +3 -3
  28. package/dist/lib/macro-filter.lua +201 -0
  29. package/dist/lib/macros.d.ts +102 -0
  30. package/dist/lib/macros.d.ts.map +1 -0
  31. package/dist/lib/macros.js +218 -0
  32. package/dist/lib/macros.js.map +1 -0
  33. package/dist/lib/pdf-comments.js +44 -44
  34. package/dist/lib/plugins.js +57 -57
  35. package/dist/lib/pptx-color-filter.lua +37 -0
  36. package/dist/lib/pptx-themes.js +115 -115
  37. package/dist/lib/schema.d.ts.map +1 -1
  38. package/dist/lib/schema.js +34 -0
  39. package/dist/lib/schema.js.map +1 -1
  40. package/dist/lib/sections.d.ts +35 -0
  41. package/dist/lib/sections.d.ts.map +1 -1
  42. package/dist/lib/sections.js +81 -0
  43. package/dist/lib/sections.js.map +1 -1
  44. package/dist/lib/spelling.js +2 -2
  45. package/dist/lib/templates.js +387 -387
  46. package/dist/lib/themes.js +51 -51
  47. package/eslint.config.js +27 -27
  48. package/lib/anchor-match.ts +276 -276
  49. package/lib/annotations.ts +644 -644
  50. package/lib/build.ts +1766 -1694
  51. package/lib/citations.ts +160 -160
  52. package/lib/commands/build.ts +855 -855
  53. package/lib/commands/citations.ts +515 -515
  54. package/lib/commands/comments.ts +1050 -1050
  55. package/lib/commands/context.ts +176 -174
  56. package/lib/commands/core.ts +309 -309
  57. package/lib/commands/doi.ts +435 -435
  58. package/lib/commands/file-ops.ts +372 -372
  59. package/lib/commands/history.ts +320 -320
  60. package/lib/commands/index.ts +87 -87
  61. package/lib/commands/init.ts +259 -259
  62. package/lib/commands/merge-resolve.ts +378 -378
  63. package/lib/commands/preview.ts +178 -178
  64. package/lib/commands/project-info.ts +244 -244
  65. package/lib/commands/quality.ts +517 -517
  66. package/lib/commands/response.ts +454 -454
  67. package/lib/commands/section-boundaries.ts +82 -82
  68. package/lib/commands/sections.ts +451 -451
  69. package/lib/commands/sync.ts +709 -706
  70. package/lib/commands/text-ops.ts +449 -449
  71. package/lib/commands/utilities.ts +448 -448
  72. package/lib/commands/verify-anchors.ts +272 -272
  73. package/lib/commands/word-tools.ts +340 -340
  74. package/lib/comment-realign.ts +517 -517
  75. package/lib/config.ts +84 -84
  76. package/lib/crossref.ts +781 -781
  77. package/lib/csl.ts +191 -191
  78. package/lib/dependencies.ts +98 -98
  79. package/lib/diff-engine.ts +465 -465
  80. package/lib/doi-cache.ts +115 -115
  81. package/lib/doi.ts +897 -897
  82. package/lib/equations.ts +506 -506
  83. package/lib/errors.ts +346 -346
  84. package/lib/format.ts +541 -541
  85. package/lib/git.ts +326 -326
  86. package/lib/grammar.ts +303 -303
  87. package/lib/image-registry.ts +180 -180
  88. package/lib/import.ts +911 -911
  89. package/lib/journals.ts +543 -543
  90. package/lib/macro-filter.lua +201 -0
  91. package/lib/macros.ts +273 -0
  92. package/lib/merge.ts +633 -633
  93. package/lib/orcid.ts +144 -144
  94. package/lib/pdf-comments.ts +263 -263
  95. package/lib/pdf-import.ts +524 -524
  96. package/lib/plugins.ts +362 -362
  97. package/lib/postprocess.ts +188 -188
  98. package/lib/pptx-color-filter.lua +37 -37
  99. package/lib/pptx-template.ts +469 -469
  100. package/lib/pptx-themes.ts +483 -483
  101. package/lib/protect-restore.ts +520 -520
  102. package/lib/rate-limiter.ts +94 -94
  103. package/lib/response.ts +197 -197
  104. package/lib/restore-references.ts +240 -240
  105. package/lib/review.ts +327 -327
  106. package/lib/schema.ts +488 -454
  107. package/lib/scientific-words.ts +73 -73
  108. package/lib/sections.ts +425 -335
  109. package/lib/slides.ts +756 -756
  110. package/lib/spelling.ts +334 -334
  111. package/lib/templates.ts +526 -526
  112. package/lib/themes.ts +742 -742
  113. package/lib/trackchanges.ts +247 -247
  114. package/lib/tui.ts +450 -450
  115. package/lib/types.ts +550 -550
  116. package/lib/undo.ts +250 -250
  117. package/lib/utils.ts +69 -69
  118. package/lib/variables.ts +179 -179
  119. package/lib/word-extraction.ts +806 -806
  120. package/lib/word.ts +643 -643
  121. package/lib/wordcomments.ts +840 -840
  122. package/package.json +137 -137
  123. package/scripts/postbuild.js +47 -28
  124. package/skill/REFERENCE.md +539 -539
  125. package/skill/SKILL.md +295 -295
  126. package/tsconfig.json +26 -26
  127. package/types/index.d.ts +525 -525
  128. package/issues.md +0 -180
  129. package/site/assets/extra.css +0 -208
  130. package/site/commands.html +0 -926
  131. package/site/configuration.html +0 -469
  132. package/site/index.html +0 -288
  133. package/site/troubleshooting.html +0 -461
  134. package/site/workflow.html +0 -518
package/lib/crossref.ts CHANGED
@@ -1,781 +1,781 @@
1
- /**
2
- * Cross-reference handling - dynamic figure/table references
3
- *
4
- * Enables:
5
- * - @fig:label syntax in source (auto-numbered)
6
- * - Conversion to "Figure 1" in Word output
7
- * - Auto-conversion back during import
8
- */
9
-
10
- import * as fs from 'fs';
11
- import * as path from 'path';
12
- import YAML from 'yaml';
13
- import type {
14
- RefNumber,
15
- HardcodedRef,
16
- DynamicRef,
17
- FigureInfo,
18
- Registry,
19
- RefStatus,
20
- ConversionResult,
21
- } from './types.js';
22
-
23
- // =============================================================================
24
- // Constants
25
- // =============================================================================
26
-
27
- /** Characters of context to check before a reference for deduplication */
28
- const REF_CONTEXT_WINDOW = 100;
29
-
30
- /** Minimum word length for similarity calculations */
31
- const MIN_WORD_LENGTH = 2;
32
-
33
- // =============================================================================
34
- // Type Definitions (Internal)
35
- // =============================================================================
36
-
37
- /**
38
- * Reference info (internal use in registry building)
39
- */
40
- interface RefInfo {
41
- label: string;
42
- num: number;
43
- isSupp: boolean;
44
- file: string;
45
- }
46
-
47
- /**
48
- * Parsed reference number components
49
- */
50
- interface ParsedRefNumber {
51
- isSupp: boolean;
52
- num: number;
53
- suffix: string | null;
54
- }
55
-
56
- /**
57
- * Detected reference with parsed numbers
58
- */
59
- interface DetectedRef {
60
- type: 'fig' | 'tbl' | 'eq';
61
- match: string;
62
- numbers: ParsedRefNumber[];
63
- position: number;
64
- }
65
-
66
- // =============================================================================
67
- // Internal Helpers
68
- // =============================================================================
69
-
70
- /**
71
- * Discover section files from a directory by reading config files
72
- * Only returns files explicitly defined in rev.yaml or sections.yaml
73
- * Returns empty array if no config found (caller should handle this)
74
- */
75
- function discoverSectionFiles(directory: string): string[] {
76
- // Try rev.yaml first
77
- const revYamlPath = path.join(directory, 'rev.yaml');
78
- if (fs.existsSync(revYamlPath)) {
79
- try {
80
- const config = YAML.parse(fs.readFileSync(revYamlPath, 'utf-8'));
81
- if (config.sections && Array.isArray(config.sections) && config.sections.length > 0) {
82
- return config.sections.filter((f: string) => fs.existsSync(path.join(directory, f)));
83
- }
84
- } catch (e) {
85
- if (process.env.DEBUG) {
86
- console.warn('crossref: YAML parse error in rev.yaml:', (e as Error).message);
87
- }
88
- }
89
- }
90
-
91
- // Try sections.yaml
92
- const sectionsPath = path.join(directory, 'sections.yaml');
93
- if (fs.existsSync(sectionsPath)) {
94
- try {
95
- const config = YAML.parse(fs.readFileSync(sectionsPath, 'utf-8'));
96
- if (config.sections) {
97
- const sectionOrder = Object.entries(config.sections)
98
- .sort((a, b) => ((a[1] as any).order ?? 999) - ((b[1] as any).order ?? 999))
99
- .map(([file]) => file);
100
- return sectionOrder.filter((f) => fs.existsSync(path.join(directory, f)));
101
- }
102
- } catch (e) {
103
- if (process.env.DEBUG) {
104
- console.warn('crossref: YAML parse error in sections.yaml:', (e as Error).message);
105
- }
106
- }
107
- }
108
-
109
- // No config found - return empty array
110
- // Caller must handle this (either error or use explicit sections)
111
- return [];
112
- }
113
-
114
- // =============================================================================
115
- // Detection Patterns
116
- // =============================================================================
117
-
118
- /**
119
- * Patterns for detecting hardcoded references
120
- * Matches complex patterns including:
121
- * - Simple: "Figure 1", "Fig. 2a", "Table S1"
122
- * - Ranges: "Figures 1-3", "Fig. 1a-c", "Figs. 1a-3b"
123
- * - Lists: "Figures 1, 2, and 3", "Fig. 1a, b, c", "Tables 1 & 2"
124
- * - Mixed: "Figs. 1, 3-5, and 7"
125
- *
126
- * Uses a simpler base pattern and parses the full match for lists
127
- */
128
- const DETECTION_PATTERNS: Record<string, RegExp> = {
129
- // Captures the full reference including lists with "and"
130
- // Group 1: type prefix (Figure, Fig., etc.)
131
- // Group 2: reference list (parsed by parseReferenceList())
132
- // Matches: "1", "1a", "1-3", "1a-c", "1, 2, 3", "1 and 2", "1, 2 and 3", "1, 2, and 3"
133
- // Separator: comma/dash/ampersand, optionally followed by "and"
134
- // Standalone letters must be followed by separator, punctuation, or word boundary
135
- // Also handles: "see Figure 1", "(Fig. 1)", "in Figures 1–3"
136
- // Note: 'gi' flag makes these case-insensitive, so "figure 1" is also matched
137
- figure: /\b(Figures?|Figs?\.?)\s+((?:\d+|S\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+|S\d+)[a-z]?|[a-z]\b))*)/gi,
138
-
139
- table: /\b(Tables?|Tabs?\.?)\s+((?:\d+|S\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+|S\d+)[a-z]?|[a-z]\b))*)/gi,
140
-
141
- equation: /\b(Equations?|Eqs?\.?)\s+((?:\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+)[a-z]?|[a-z]\b))*)/gi,
142
- };
143
-
144
- /**
145
- * Patterns to EXCLUDE from detection (false positives)
146
- * These look like references but aren't (e.g., "Table of Contents", "Figure skating")
147
- */
148
- const EXCLUSION_PATTERNS = [
149
- /\bTable\s+of\s+Contents?\b/gi,
150
- /\bFigure\s+skating\b/gi,
151
- /\bFigure\s+out\b/gi,
152
- /\bFigure\s+it\b/gi,
153
- /\bTable\s+setting/gi,
154
- /\bEquation\s+editor\b/gi,
155
- ];
156
-
157
- /**
158
- * Pattern for extracting anchors from markdown: {#fig:label}, {#tbl:label}
159
- */
160
- const ANCHOR_PATTERN = /\{#(fig|tbl|eq):([a-zA-Z0-9_-]+)/gi;
161
-
162
- /**
163
- * Pattern for @-style references: @fig:label, @tbl:label
164
- */
165
- const REF_PATTERN = /@(fig|tbl|eq):([a-zA-Z0-9_-]+)/gi;
166
-
167
- // =============================================================================
168
- // Public API
169
- // =============================================================================
170
-
171
- /**
172
- * Normalize a reference type to standard form
173
- */
174
- export function normalizeType(typeStr: string): 'fig' | 'tbl' | 'eq' | string {
175
- if (typeof typeStr !== 'string') {
176
- throw new TypeError(`typeStr must be a string, got ${typeof typeStr}`);
177
- }
178
- const lower = typeStr.toLowerCase().replace(/\.$/, '');
179
- if (lower.startsWith('fig')) return 'fig';
180
- if (lower.startsWith('tab')) return 'tbl';
181
- if (lower.startsWith('eq')) return 'eq';
182
- return lower;
183
- }
184
-
185
- /**
186
- * Parse a reference number, handling supplementary (S1, S2) and letter suffixes (1a, 1b)
187
- */
188
- export function parseRefNumber(numStr: string, suffix: string | null = null): ParsedRefNumber {
189
- if (!numStr || typeof numStr !== 'string') {
190
- return { isSupp: false, num: 0, suffix: suffix || null };
191
- }
192
- const isSupp = numStr.toUpperCase().startsWith('S');
193
- const numPart = isSupp ? numStr.slice(1) : numStr;
194
- // Extract suffix if embedded in numStr (e.g., "1a")
195
- const match = numPart.match(/^(\d+)([a-z])?$/i);
196
- const num = match && match[1] ? parseInt(match[1], 10) : parseInt(numPart, 10);
197
- const extractedSuffix = suffix || (match && match[2]) || null;
198
- return { isSupp, num, suffix: extractedSuffix ? extractedSuffix.toLowerCase() : null };
199
- }
200
-
201
- /**
202
- * Parse a reference list string like "1, 2, and 3" or "1a-c" or "1a-3b"
203
- * Returns an array of {num, isSupp, suffix} objects
204
- */
205
- export function parseReferenceList(listStr: string): ParsedRefNumber[] {
206
- const results: ParsedRefNumber[] = [];
207
- if (!listStr || typeof listStr !== 'string') return results;
208
-
209
- // Normalize: replace "and" with comma, normalize dashes
210
- let normalized = listStr
211
- .replace(/\s+and\s+/gi, ', ')
212
- .replace(/[–—]/g, '-') // en-dash, em-dash → hyphen
213
- .replace(/&/g, ', '); // & → comma
214
-
215
- // Split by comma (but not by dash, which indicates ranges)
216
- const parts = normalized.split(/\s*,\s*/).filter((p) => p.trim());
217
-
218
- let lastFullRef: { num: number; isSupp: boolean } | null = null; // Track the last full reference for implicit prefixes
219
-
220
- for (const part of parts) {
221
- const trimmed = part.trim();
222
- if (!trimmed) continue;
223
-
224
- // Check if this is a range (contains -)
225
- if (trimmed.includes('-')) {
226
- const parts = trimmed.split('-').map((s) => s.trim());
227
- const start = parts[0] || '';
228
- const end = parts[1] || '';
229
-
230
- // Check if end is just a letter (e.g., "1a-c" where end is "c")
231
- const endIsLetterOnly = /^[a-z]$/i.test(end);
232
-
233
- const startRef = parseRefNumber(start);
234
- // For letter-only end, don't parse as number
235
- const endRef = endIsLetterOnly
236
- ? { num: startRef.num, isSupp: startRef.isSupp, suffix: end.toLowerCase() }
237
- : parseRefNumber(end);
238
-
239
- // Handle different range types:
240
- // 1. Suffix-only range on same number: "1a-c" → 1a, 1b, 1c
241
- // 2. Number range: "1-3" → 1, 2, 3
242
- // 3. Cross-number suffix range: "1a-3b" → 1a...1z, 2a...2z, 3a, 3b (limited)
243
-
244
- if (startRef.suffix && endRef.suffix && startRef.num !== endRef.num) {
245
- // Cross-number suffix range: "1a-3b"
246
- // For academic papers, limit intermediate figures to same suffix range
247
- // e.g., "1a-3b" typically means 1a, 1b, 2a, 2b, 3a, 3b
248
- const maxSuffix = Math.max(
249
- startRef.suffix.charCodeAt(0),
250
- endRef.suffix.charCodeAt(0)
251
- );
252
-
253
- for (let n = startRef.num; n <= endRef.num; n++) {
254
- const suffixStart =
255
- n === startRef.num ? startRef.suffix.charCodeAt(0) : 'a'.charCodeAt(0);
256
- const suffixEnd = n === endRef.num ? endRef.suffix.charCodeAt(0) : maxSuffix;
257
-
258
- for (let s = suffixStart; s <= suffixEnd; s++) {
259
- results.push({
260
- num: n,
261
- isSupp: startRef.isSupp,
262
- suffix: String.fromCharCode(s),
263
- });
264
- }
265
- }
266
- lastFullRef = { num: endRef.num, isSupp: startRef.isSupp };
267
- } else if (startRef.suffix || endRef.suffix) {
268
- // Suffix range on same number: "1a-c"
269
- const num: number = startRef.num !== 0 ? startRef.num : (lastFullRef ? lastFullRef.num : 1);
270
- const isSupp: boolean = startRef.isSupp ? startRef.isSupp : (lastFullRef ? lastFullRef.isSupp : false);
271
- const startCode = (startRef.suffix || 'a').charCodeAt(0);
272
- const endCode = (endRef.suffix || 'a').charCodeAt(0);
273
-
274
- for (let code = startCode; code <= endCode; code++) {
275
- results.push({
276
- num,
277
- isSupp,
278
- suffix: String.fromCharCode(code),
279
- });
280
- }
281
- lastFullRef = { num, isSupp };
282
- } else {
283
- // Pure number range: "1-3"
284
- for (let n = startRef.num; n <= endRef.num; n++) {
285
- results.push({
286
- num: n,
287
- isSupp: startRef.isSupp,
288
- suffix: null,
289
- });
290
- }
291
- lastFullRef = { num: endRef.num, isSupp: startRef.isSupp };
292
- }
293
- } else {
294
- // Single reference or implicit suffix
295
- // Check if it's just a letter (implicit prefix from previous number)
296
- if (/^[a-z]$/i.test(trimmed) && lastFullRef) {
297
- // Implicit prefix: "b" after "1a" means "1b"
298
- results.push({
299
- num: lastFullRef.num,
300
- isSupp: lastFullRef.isSupp,
301
- suffix: trimmed.toLowerCase(),
302
- });
303
- } else {
304
- // Full reference: "1", "1a", "S1", "S1a"
305
- const ref = parseRefNumber(trimmed);
306
- results.push(ref);
307
- lastFullRef = { num: ref.num, isSupp: ref.isSupp };
308
- }
309
- }
310
- }
311
-
312
- return results;
313
- }
314
-
315
- /**
316
- * Build a registry of figure/table labels from .md files
317
- * Scans for {#fig:label} and {#tbl:label} anchors
318
- *
319
- * IMPORTANT: This function requires either explicit sections or a rev.yaml/sections.yaml config.
320
- * It will NOT guess by scanning all .md files, as this leads to incorrect numbering
321
- * when temporary files (paper_clean.md, etc.) exist in the directory.
322
- */
323
- export function buildRegistry(directory: string, sections?: string[]): Registry {
324
- if (typeof directory !== 'string') {
325
- throw new TypeError(`directory must be a string, got ${typeof directory}`);
326
- }
327
-
328
- const figures = new Map<string, FigureInfo>();
329
- const tables = new Map<string, FigureInfo>();
330
- const equations = new Map<string, FigureInfo>();
331
-
332
- // Counters for numbering (separate for main and supplementary)
333
- let figNum = 0;
334
- let figSuppNum = 0;
335
- let tblNum = 0;
336
- let tblSuppNum = 0;
337
- let eqNum = 0;
338
-
339
- let orderedFiles: string[];
340
-
341
- if (Array.isArray(sections) && sections.length > 0) {
342
- // Use explicitly provided section files - most reliable
343
- orderedFiles = sections.filter((f) => fs.existsSync(path.join(directory, f)));
344
- } else {
345
- // Try to determine sections from config files (rev.yaml or sections.yaml)
346
- orderedFiles = discoverSectionFiles(directory);
347
- // If no config found, return empty registry rather than guessing
348
- // This prevents bugs from scanning wrong files
349
- }
350
-
351
- // Determine if a file is supplementary
352
- const isSupplementary = (filename: string): boolean =>
353
- filename.toLowerCase().includes('supp') || filename.toLowerCase().includes('appendix');
354
-
355
- // Process each file in order
356
- for (const file of orderedFiles) {
357
- const filePath = path.join(directory, file);
358
- const content = fs.readFileSync(filePath, 'utf-8');
359
- const isSupp = isSupplementary(file);
360
-
361
- // Find all anchors
362
- let match: RegExpExecArray | null;
363
- ANCHOR_PATTERN.lastIndex = 0;
364
- while ((match = ANCHOR_PATTERN.exec(content)) !== null) {
365
- const typeRaw = match[1];
366
- const labelRaw = match[2];
367
- if (!typeRaw || !labelRaw) continue;
368
-
369
- const type = typeRaw.toLowerCase();
370
- const label = labelRaw;
371
-
372
- if (type === 'fig') {
373
- if (isSupp) {
374
- figSuppNum++;
375
- figures.set(label, { label, num: figSuppNum, isSupp: true, file });
376
- } else {
377
- figNum++;
378
- figures.set(label, { label, num: figNum, isSupp: false, file });
379
- }
380
- } else if (type === 'tbl') {
381
- if (isSupp) {
382
- tblSuppNum++;
383
- tables.set(label, { label, num: tblSuppNum, isSupp: true, file });
384
- } else {
385
- tblNum++;
386
- tables.set(label, { label, num: tblNum, isSupp: false, file });
387
- }
388
- } else if (type === 'eq') {
389
- eqNum++;
390
- equations.set(label, { label, num: eqNum, isSupp: false, file });
391
- }
392
- }
393
- }
394
-
395
- // Build reverse lookup: number → label
396
- const byNumber: Registry['byNumber'] = {
397
- fig: new Map(),
398
- figS: new Map(),
399
- tbl: new Map(),
400
- tblS: new Map(),
401
- eq: new Map(),
402
- };
403
-
404
- for (const [label, info] of figures) {
405
- const key = info.isSupp ? 'figS' : 'fig';
406
- byNumber[key].set(info.num, label);
407
- }
408
- for (const [label, info] of tables) {
409
- const key = info.isSupp ? 'tblS' : 'tbl';
410
- byNumber[key].set(info.num, label);
411
- }
412
- for (const [label, info] of equations) {
413
- byNumber.eq.set(info.num, label);
414
- }
415
-
416
- return { figures, tables, equations, byNumber };
417
- }
418
-
419
- /**
420
- * Get the display string for a label (e.g., "Figure 1", "Table S2")
421
- */
422
- export function labelToDisplay(
423
- type: 'fig' | 'tbl' | 'eq',
424
- label: string,
425
- registry: Registry
426
- ): string | null {
427
- if (!registry || !registry.figures) return null;
428
-
429
- const collection =
430
- type === 'fig' ? registry.figures : type === 'tbl' ? registry.tables : registry.equations;
431
-
432
- const info = collection.get(label);
433
- if (!info) return null;
434
-
435
- const prefix = type === 'fig' ? 'Figure' : type === 'tbl' ? 'Table' : 'Equation';
436
- const numStr = info.isSupp ? `S${info.num}` : `${info.num}`;
437
-
438
- return `${prefix} ${numStr}`;
439
- }
440
-
441
- /**
442
- * Get the label for a display number (e.g., "fig:heatmap" from Figure 1)
443
- */
444
- export function numberToLabel(
445
- type: 'fig' | 'tbl' | 'eq',
446
- num: number,
447
- isSupp: boolean,
448
- registry: Registry
449
- ): string | null {
450
- if (!registry || !registry.byNumber) return null;
451
-
452
- const key = isSupp ? (`${type}S` as keyof Registry['byNumber']) : type;
453
- return registry.byNumber[key]?.get(num) || null;
454
- }
455
-
456
- /**
457
- * Detect all hardcoded references in text
458
- */
459
- export function detectHardcodedRefs(text: string): DetectedRef[] {
460
- if (typeof text !== 'string') {
461
- throw new TypeError(`text must be a string, got ${typeof text}`);
462
- }
463
-
464
- const refs: DetectedRef[] = [];
465
-
466
- for (const [type, pattern] of Object.entries(DETECTION_PATTERNS)) {
467
- pattern.lastIndex = 0;
468
- let match: RegExpExecArray | null;
469
-
470
- while ((match = pattern.exec(text)) !== null) {
471
- // Pattern groups:
472
- // [1] = type prefix (Figure, Fig., etc.)
473
- // [2] = reference list string (e.g., "1, 2, and 3" or "1a-3b")
474
-
475
- const listStr = match[2];
476
- if (!listStr) continue;
477
- const numbers = parseReferenceList(listStr);
478
-
479
- // Skip if no valid numbers were parsed
480
- if (numbers.length === 0) continue;
481
-
482
- refs.push({
483
- type: normalizeType(type) as 'fig' | 'tbl' | 'eq',
484
- match: match[0],
485
- numbers,
486
- position: match.index,
487
- });
488
- }
489
- }
490
-
491
- // Sort by position
492
- refs.sort((a, b) => a.position - b.position);
493
- return refs;
494
- }
495
-
496
- /**
497
- * Convert hardcoded references to @-style references
498
- */
499
- export function convertHardcodedRefs(text: string, registry: Registry): ConversionResult {
500
- // Input validation delegated to detectHardcodedRefs
501
- const refs = detectHardcodedRefs(text);
502
- const conversions: Array<{ from: string; to: string }> = [];
503
- const warnings: string[] = [];
504
-
505
- // Process in reverse order to preserve positions
506
- let result = text;
507
- for (let i = refs.length - 1; i >= 0; i--) {
508
- const ref = refs[i];
509
- if (!ref) continue;
510
-
511
- // Build replacement
512
- const labels: string[] = [];
513
- for (const { num, isSupp } of ref.numbers) {
514
- const label = numberToLabel(ref.type, num, isSupp, registry);
515
- if (label) {
516
- labels.push(`@${ref.type}:${label}`);
517
- } else {
518
- const displayNum = isSupp ? `S${num}` : `${num}`;
519
- warnings.push(`Unknown reference: ${ref.type} ${displayNum} (no matching label)`);
520
- labels.push(ref.match); // Keep original if no match
521
- }
522
- }
523
-
524
- if (labels.length > 0 && !labels.includes(ref.match)) {
525
- const replacement = labels.join('; ');
526
-
527
- // Skip if the @-syntax already appears in the preceding text
528
- // This prevents duplication when import restores @fig:x and then we see "Fig. 1"
529
- // e.g., "@fig:map@fig:map{++@fig:map++}" or "@fig:mapFigure 1" patterns
530
- const textBefore = result.slice(Math.max(0, ref.position - REF_CONTEXT_WINDOW), ref.position);
531
- const alreadyHasRef = labels.some((label) => textBefore.includes(label));
532
- if (alreadyHasRef) {
533
- continue; // Skip - ref already present nearby
534
- }
535
-
536
- result =
537
- result.slice(0, ref.position) + replacement + result.slice(ref.position + ref.match.length);
538
-
539
- conversions.push({
540
- from: ref.match,
541
- to: replacement,
542
- });
543
- }
544
- }
545
-
546
- return { converted: result, conversions, warnings };
547
- }
548
-
549
- /**
550
- * Detect @-style references in text
551
- */
552
- export function detectDynamicRefs(text: string): DynamicRef[] {
553
- if (typeof text !== 'string') {
554
- throw new TypeError(`text must be a string, got ${typeof text}`);
555
- }
556
-
557
- const refs: DynamicRef[] = [];
558
- REF_PATTERN.lastIndex = 0;
559
- let match: RegExpExecArray | null;
560
-
561
- while ((match = REF_PATTERN.exec(text)) !== null) {
562
- const type = match[1];
563
- const label = match[2];
564
- if (!type || !label) continue;
565
- refs.push({
566
- type: type as 'fig' | 'tbl' | 'eq',
567
- label: label,
568
- match: match[0],
569
- position: match.index,
570
- });
571
- }
572
-
573
- return refs;
574
- }
575
-
576
- /**
577
- * Get reference status for a file/text
578
- */
579
- export function getRefStatus(text: string, registry: Registry): RefStatus {
580
- const dynamic = detectDynamicRefs(text);
581
- const hardcoded = detectHardcodedRefs(text) as HardcodedRef[];
582
-
583
- // Count anchors in this text
584
- ANCHOR_PATTERN.lastIndex = 0;
585
- let figCount = 0,
586
- tblCount = 0,
587
- eqCount = 0;
588
- let match: RegExpExecArray | null;
589
- while ((match = ANCHOR_PATTERN.exec(text)) !== null) {
590
- const type = match[1];
591
- if (!type) continue;
592
- if (type === 'fig') figCount++;
593
- else if (type === 'tbl') tblCount++;
594
- else if (type === 'eq') eqCount++;
595
- }
596
-
597
- return {
598
- dynamic,
599
- hardcoded,
600
- anchors: { figures: figCount, tables: tblCount, equations: eqCount },
601
- };
602
- }
603
-
604
- /**
605
- * Detect forward references in combined text
606
- * A forward reference is a @ref that appears before its {#anchor} definition
607
- */
608
- export function detectForwardRefs(text: string): {
609
- forwardRefs: Array<{ type: string; label: string; match: string; position: number }>;
610
- anchorPositions: Map<string, number>;
611
- } {
612
- // Build map of anchor positions: "fig:label" -> position
613
- const anchorPositions = new Map<string, number>();
614
- ANCHOR_PATTERN.lastIndex = 0;
615
- let match: RegExpExecArray | null;
616
- while ((match = ANCHOR_PATTERN.exec(text)) !== null) {
617
- const type = match[1];
618
- const label = match[2];
619
- if (!type || !label) continue;
620
- const key = `${type}:${label}`;
621
- // Only store first occurrence (in case of duplicates)
622
- if (!anchorPositions.has(key)) {
623
- anchorPositions.set(key, match.index);
624
- }
625
- }
626
-
627
- // Find all references
628
- const refs = detectDynamicRefs(text);
629
-
630
- // Filter to only forward references
631
- const forwardRefs = refs.filter((ref) => {
632
- const key = `${ref.type}:${ref.label}`;
633
- const anchorPos = anchorPositions.get(key);
634
- // Forward ref if anchor doesn't exist or appears after the reference
635
- return anchorPos === undefined || ref.position < anchorPos;
636
- });
637
-
638
- return { forwardRefs, anchorPositions };
639
- }
640
-
641
- /**
642
- * Resolve forward references to display format
643
- * Only resolves refs that appear before their anchor definition
644
- * Leaves other refs for pandoc-crossref to handle (preserves clickable links)
645
- */
646
- export function resolveForwardRefs(
647
- text: string,
648
- registry: Registry
649
- ): {
650
- text: string;
651
- resolved: Array<{ from: string; to: string; position: number }>;
652
- unresolved: Array<{ ref: string; position: number }>;
653
- } {
654
- const { forwardRefs } = detectForwardRefs(text);
655
- const resolved: Array<{ from: string; to: string; position: number }> = [];
656
- const unresolved: Array<{ ref: string; position: number }> = [];
657
-
658
- // Process in reverse order to preserve positions
659
- let result = text;
660
- for (let i = forwardRefs.length - 1; i >= 0; i--) {
661
- const ref = forwardRefs[i];
662
- if (!ref) continue;
663
- const display = labelToDisplay(ref.type as 'fig' | 'tbl' | 'eq', ref.label, registry);
664
-
665
- if (display) {
666
- result =
667
- result.slice(0, ref.position) + display + result.slice(ref.position + ref.match.length);
668
- resolved.push({
669
- from: ref.match,
670
- to: display,
671
- position: ref.position,
672
- });
673
- } else {
674
- unresolved.push({
675
- ref: ref.match,
676
- position: ref.position,
677
- });
678
- }
679
- }
680
-
681
- return { text: result, resolved, unresolved };
682
- }
683
-
684
- /**
685
- * Resolve ALL supplementary references and strip supplementary anchor labels.
686
- *
687
- * pandoc-crossref cannot produce "Figure S1" numbering — it numbers all figures
688
- * sequentially. This function resolves every @fig:label / @tbl:label that points
689
- * to a supplementary item to plain text ("Figure S1", "Table S1") and removes
690
- * the {#fig:label} / {#tbl:label} attributes so pandoc-crossref ignores them.
691
- */
692
- export function resolveSupplementaryRefs(
693
- text: string,
694
- registry: Registry
695
- ): {
696
- text: string;
697
- resolved: Array<{ from: string; to: string }>;
698
- } {
699
- const resolved: Array<{ from: string; to: string }> = [];
700
- let result = text;
701
-
702
- // Collect supplementary labels
703
- const suppLabels = new Set<string>();
704
- for (const [label, info] of registry.figures) {
705
- if (info.isSupp) suppLabels.add(`fig:${label}`);
706
- }
707
- for (const [label, info] of registry.tables) {
708
- if (info.isSupp) suppLabels.add(`tbl:${label}`);
709
- }
710
-
711
- if (suppLabels.size === 0) return { text: result, resolved };
712
-
713
- // 1. Replace all @fig:label / @tbl:label references to supplementary items
714
- const refs = detectDynamicRefs(result);
715
- // Process in reverse to preserve positions
716
- for (let i = refs.length - 1; i >= 0; i--) {
717
- const ref = refs[i];
718
- if (!ref) continue;
719
- const key = `${ref.type}:${ref.label}`;
720
- if (!suppLabels.has(key)) continue;
721
-
722
- const display = labelToDisplay(ref.type as 'fig' | 'tbl' | 'eq', ref.label, registry);
723
- if (display) {
724
- result =
725
- result.slice(0, ref.position) + display + result.slice(ref.position + ref.match.length);
726
- resolved.push({ from: ref.match, to: display });
727
- }
728
- }
729
-
730
- // 2. Strip {#fig:label} and {#tbl:label} attributes from supplementary anchors
731
- // so pandoc-crossref does not re-number them
732
- for (const key of suppLabels) {
733
- // Match {#fig:label ...} or just {#fig:label}
734
- const escaped = key.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
735
- const pattern = new RegExp(`\\{#${escaped}(?:\\s[^}]*)?\\}`, 'g');
736
- result = result.replace(pattern, (match) => {
737
- resolved.push({ from: match, to: '(stripped)' });
738
- return '';
739
- });
740
- }
741
-
742
- return { text: result, resolved };
743
- }
744
-
745
- /**
746
- * Format registry for display
747
- */
748
- export function formatRegistry(registry: Registry): string {
749
- const lines: string[] = [];
750
-
751
- if (registry.figures.size > 0) {
752
- lines.push('Figures:');
753
- for (const [label, info] of registry.figures) {
754
- const num = info.isSupp ? `S${info.num}` : info.num;
755
- lines.push(` Figure ${num}: @fig:${label} (${info.file})`);
756
- }
757
- }
758
-
759
- if (registry.tables.size > 0) {
760
- if (lines.length > 0) lines.push('');
761
- lines.push('Tables:');
762
- for (const [label, info] of registry.tables) {
763
- const num = info.isSupp ? `S${info.num}` : info.num;
764
- lines.push(` Table ${num}: @tbl:${label} (${info.file})`);
765
- }
766
- }
767
-
768
- if (registry.equations.size > 0) {
769
- if (lines.length > 0) lines.push('');
770
- lines.push('Equations:');
771
- for (const [label, info] of registry.equations) {
772
- lines.push(` Equation ${info.num}: @eq:${label} (${info.file})`);
773
- }
774
- }
775
-
776
- if (lines.length === 0) {
777
- lines.push('No figure/table anchors found.');
778
- }
779
-
780
- return lines.join('\n');
781
- }
1
+ /**
2
+ * Cross-reference handling - dynamic figure/table references
3
+ *
4
+ * Enables:
5
+ * - @fig:label syntax in source (auto-numbered)
6
+ * - Conversion to "Figure 1" in Word output
7
+ * - Auto-conversion back during import
8
+ */
9
+
10
+ import * as fs from 'fs';
11
+ import * as path from 'path';
12
+ import YAML from 'yaml';
13
+ import type {
14
+ RefNumber,
15
+ HardcodedRef,
16
+ DynamicRef,
17
+ FigureInfo,
18
+ Registry,
19
+ RefStatus,
20
+ ConversionResult,
21
+ } from './types.js';
22
+
23
+ // =============================================================================
24
+ // Constants
25
+ // =============================================================================
26
+
27
+ /** Characters of context to check before a reference for deduplication */
28
+ const REF_CONTEXT_WINDOW = 100;
29
+
30
+ /** Minimum word length for similarity calculations */
31
+ const MIN_WORD_LENGTH = 2;
32
+
33
+ // =============================================================================
34
+ // Type Definitions (Internal)
35
+ // =============================================================================
36
+
37
+ /**
38
+ * Reference info (internal use in registry building)
39
+ */
40
+ interface RefInfo {
41
+ label: string;
42
+ num: number;
43
+ isSupp: boolean;
44
+ file: string;
45
+ }
46
+
47
+ /**
48
+ * Parsed reference number components
49
+ */
50
+ interface ParsedRefNumber {
51
+ isSupp: boolean;
52
+ num: number;
53
+ suffix: string | null;
54
+ }
55
+
56
+ /**
57
+ * Detected reference with parsed numbers
58
+ */
59
+ interface DetectedRef {
60
+ type: 'fig' | 'tbl' | 'eq';
61
+ match: string;
62
+ numbers: ParsedRefNumber[];
63
+ position: number;
64
+ }
65
+
66
+ // =============================================================================
67
+ // Internal Helpers
68
+ // =============================================================================
69
+
70
+ /**
71
+ * Discover section files from a directory by reading config files
72
+ * Only returns files explicitly defined in rev.yaml or sections.yaml
73
+ * Returns empty array if no config found (caller should handle this)
74
+ */
75
+ function discoverSectionFiles(directory: string): string[] {
76
+ // Try rev.yaml first
77
+ const revYamlPath = path.join(directory, 'rev.yaml');
78
+ if (fs.existsSync(revYamlPath)) {
79
+ try {
80
+ const config = YAML.parse(fs.readFileSync(revYamlPath, 'utf-8'));
81
+ if (config.sections && Array.isArray(config.sections) && config.sections.length > 0) {
82
+ return config.sections.filter((f: string) => fs.existsSync(path.join(directory, f)));
83
+ }
84
+ } catch (e) {
85
+ if (process.env.DEBUG) {
86
+ console.warn('crossref: YAML parse error in rev.yaml:', (e as Error).message);
87
+ }
88
+ }
89
+ }
90
+
91
+ // Try sections.yaml
92
+ const sectionsPath = path.join(directory, 'sections.yaml');
93
+ if (fs.existsSync(sectionsPath)) {
94
+ try {
95
+ const config = YAML.parse(fs.readFileSync(sectionsPath, 'utf-8'));
96
+ if (config.sections) {
97
+ const sectionOrder = Object.entries(config.sections)
98
+ .sort((a, b) => ((a[1] as any).order ?? 999) - ((b[1] as any).order ?? 999))
99
+ .map(([file]) => file);
100
+ return sectionOrder.filter((f) => fs.existsSync(path.join(directory, f)));
101
+ }
102
+ } catch (e) {
103
+ if (process.env.DEBUG) {
104
+ console.warn('crossref: YAML parse error in sections.yaml:', (e as Error).message);
105
+ }
106
+ }
107
+ }
108
+
109
+ // No config found - return empty array
110
+ // Caller must handle this (either error or use explicit sections)
111
+ return [];
112
+ }
113
+
114
+ // =============================================================================
115
+ // Detection Patterns
116
+ // =============================================================================
117
+
118
+ /**
119
+ * Patterns for detecting hardcoded references
120
+ * Matches complex patterns including:
121
+ * - Simple: "Figure 1", "Fig. 2a", "Table S1"
122
+ * - Ranges: "Figures 1-3", "Fig. 1a-c", "Figs. 1a-3b"
123
+ * - Lists: "Figures 1, 2, and 3", "Fig. 1a, b, c", "Tables 1 & 2"
124
+ * - Mixed: "Figs. 1, 3-5, and 7"
125
+ *
126
+ * Uses a simpler base pattern and parses the full match for lists
127
+ */
128
+ const DETECTION_PATTERNS: Record<string, RegExp> = {
129
+ // Captures the full reference including lists with "and"
130
+ // Group 1: type prefix (Figure, Fig., etc.)
131
+ // Group 2: reference list (parsed by parseReferenceList())
132
+ // Matches: "1", "1a", "1-3", "1a-c", "1, 2, 3", "1 and 2", "1, 2 and 3", "1, 2, and 3"
133
+ // Separator: comma/dash/ampersand, optionally followed by "and"
134
+ // Standalone letters must be followed by separator, punctuation, or word boundary
135
+ // Also handles: "see Figure 1", "(Fig. 1)", "in Figures 1–3"
136
+ // Note: 'gi' flag makes these case-insensitive, so "figure 1" is also matched
137
+ figure: /\b(Figures?|Figs?\.?)\s+((?:\d+|S\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+|S\d+)[a-z]?|[a-z]\b))*)/gi,
138
+
139
+ table: /\b(Tables?|Tabs?\.?)\s+((?:\d+|S\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+|S\d+)[a-z]?|[a-z]\b))*)/gi,
140
+
141
+ equation: /\b(Equations?|Eqs?\.?)\s+((?:\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+)[a-z]?|[a-z]\b))*)/gi,
142
+ };
143
+
144
+ /**
145
+ * Patterns to EXCLUDE from detection (false positives)
146
+ * These look like references but aren't (e.g., "Table of Contents", "Figure skating")
147
+ */
148
+ const EXCLUSION_PATTERNS = [
149
+ /\bTable\s+of\s+Contents?\b/gi,
150
+ /\bFigure\s+skating\b/gi,
151
+ /\bFigure\s+out\b/gi,
152
+ /\bFigure\s+it\b/gi,
153
+ /\bTable\s+setting/gi,
154
+ /\bEquation\s+editor\b/gi,
155
+ ];
156
+
157
+ /**
158
+ * Pattern for extracting anchors from markdown: {#fig:label}, {#tbl:label}
159
+ */
160
+ const ANCHOR_PATTERN = /\{#(fig|tbl|eq):([a-zA-Z0-9_-]+)/gi;
161
+
162
+ /**
163
+ * Pattern for @-style references: @fig:label, @tbl:label
164
+ */
165
+ const REF_PATTERN = /@(fig|tbl|eq):([a-zA-Z0-9_-]+)/gi;
166
+
167
+ // =============================================================================
168
+ // Public API
169
+ // =============================================================================
170
+
171
+ /**
172
+ * Normalize a reference type to standard form
173
+ */
174
+ export function normalizeType(typeStr: string): 'fig' | 'tbl' | 'eq' | string {
175
+ if (typeof typeStr !== 'string') {
176
+ throw new TypeError(`typeStr must be a string, got ${typeof typeStr}`);
177
+ }
178
+ const lower = typeStr.toLowerCase().replace(/\.$/, '');
179
+ if (lower.startsWith('fig')) return 'fig';
180
+ if (lower.startsWith('tab')) return 'tbl';
181
+ if (lower.startsWith('eq')) return 'eq';
182
+ return lower;
183
+ }
184
+
185
+ /**
186
+ * Parse a reference number, handling supplementary (S1, S2) and letter suffixes (1a, 1b)
187
+ */
188
+ export function parseRefNumber(numStr: string, suffix: string | null = null): ParsedRefNumber {
189
+ if (!numStr || typeof numStr !== 'string') {
190
+ return { isSupp: false, num: 0, suffix: suffix || null };
191
+ }
192
+ const isSupp = numStr.toUpperCase().startsWith('S');
193
+ const numPart = isSupp ? numStr.slice(1) : numStr;
194
+ // Extract suffix if embedded in numStr (e.g., "1a")
195
+ const match = numPart.match(/^(\d+)([a-z])?$/i);
196
+ const num = match && match[1] ? parseInt(match[1], 10) : parseInt(numPart, 10);
197
+ const extractedSuffix = suffix || (match && match[2]) || null;
198
+ return { isSupp, num, suffix: extractedSuffix ? extractedSuffix.toLowerCase() : null };
199
+ }
200
+
201
+ /**
202
+ * Parse a reference list string like "1, 2, and 3" or "1a-c" or "1a-3b"
203
+ * Returns an array of {num, isSupp, suffix} objects
204
+ */
205
+ export function parseReferenceList(listStr: string): ParsedRefNumber[] {
206
+ const results: ParsedRefNumber[] = [];
207
+ if (!listStr || typeof listStr !== 'string') return results;
208
+
209
+ // Normalize: replace "and" with comma, normalize dashes
210
+ let normalized = listStr
211
+ .replace(/\s+and\s+/gi, ', ')
212
+ .replace(/[–—]/g, '-') // en-dash, em-dash → hyphen
213
+ .replace(/&/g, ', '); // & → comma
214
+
215
+ // Split by comma (but not by dash, which indicates ranges)
216
+ const parts = normalized.split(/\s*,\s*/).filter((p) => p.trim());
217
+
218
+ let lastFullRef: { num: number; isSupp: boolean } | null = null; // Track the last full reference for implicit prefixes
219
+
220
+ for (const part of parts) {
221
+ const trimmed = part.trim();
222
+ if (!trimmed) continue;
223
+
224
+ // Check if this is a range (contains -)
225
+ if (trimmed.includes('-')) {
226
+ const parts = trimmed.split('-').map((s) => s.trim());
227
+ const start = parts[0] || '';
228
+ const end = parts[1] || '';
229
+
230
+ // Check if end is just a letter (e.g., "1a-c" where end is "c")
231
+ const endIsLetterOnly = /^[a-z]$/i.test(end);
232
+
233
+ const startRef = parseRefNumber(start);
234
+ // For letter-only end, don't parse as number
235
+ const endRef = endIsLetterOnly
236
+ ? { num: startRef.num, isSupp: startRef.isSupp, suffix: end.toLowerCase() }
237
+ : parseRefNumber(end);
238
+
239
+ // Handle different range types:
240
+ // 1. Suffix-only range on same number: "1a-c" → 1a, 1b, 1c
241
+ // 2. Number range: "1-3" → 1, 2, 3
242
+ // 3. Cross-number suffix range: "1a-3b" → 1a...1z, 2a...2z, 3a, 3b (limited)
243
+
244
+ if (startRef.suffix && endRef.suffix && startRef.num !== endRef.num) {
245
+ // Cross-number suffix range: "1a-3b"
246
+ // For academic papers, limit intermediate figures to same suffix range
247
+ // e.g., "1a-3b" typically means 1a, 1b, 2a, 2b, 3a, 3b
248
+ const maxSuffix = Math.max(
249
+ startRef.suffix.charCodeAt(0),
250
+ endRef.suffix.charCodeAt(0)
251
+ );
252
+
253
+ for (let n = startRef.num; n <= endRef.num; n++) {
254
+ const suffixStart =
255
+ n === startRef.num ? startRef.suffix.charCodeAt(0) : 'a'.charCodeAt(0);
256
+ const suffixEnd = n === endRef.num ? endRef.suffix.charCodeAt(0) : maxSuffix;
257
+
258
+ for (let s = suffixStart; s <= suffixEnd; s++) {
259
+ results.push({
260
+ num: n,
261
+ isSupp: startRef.isSupp,
262
+ suffix: String.fromCharCode(s),
263
+ });
264
+ }
265
+ }
266
+ lastFullRef = { num: endRef.num, isSupp: startRef.isSupp };
267
+ } else if (startRef.suffix || endRef.suffix) {
268
+ // Suffix range on same number: "1a-c"
269
+ const num: number = startRef.num !== 0 ? startRef.num : (lastFullRef ? lastFullRef.num : 1);
270
+ const isSupp: boolean = startRef.isSupp ? startRef.isSupp : (lastFullRef ? lastFullRef.isSupp : false);
271
+ const startCode = (startRef.suffix || 'a').charCodeAt(0);
272
+ const endCode = (endRef.suffix || 'a').charCodeAt(0);
273
+
274
+ for (let code = startCode; code <= endCode; code++) {
275
+ results.push({
276
+ num,
277
+ isSupp,
278
+ suffix: String.fromCharCode(code),
279
+ });
280
+ }
281
+ lastFullRef = { num, isSupp };
282
+ } else {
283
+ // Pure number range: "1-3"
284
+ for (let n = startRef.num; n <= endRef.num; n++) {
285
+ results.push({
286
+ num: n,
287
+ isSupp: startRef.isSupp,
288
+ suffix: null,
289
+ });
290
+ }
291
+ lastFullRef = { num: endRef.num, isSupp: startRef.isSupp };
292
+ }
293
+ } else {
294
+ // Single reference or implicit suffix
295
+ // Check if it's just a letter (implicit prefix from previous number)
296
+ if (/^[a-z]$/i.test(trimmed) && lastFullRef) {
297
+ // Implicit prefix: "b" after "1a" means "1b"
298
+ results.push({
299
+ num: lastFullRef.num,
300
+ isSupp: lastFullRef.isSupp,
301
+ suffix: trimmed.toLowerCase(),
302
+ });
303
+ } else {
304
+ // Full reference: "1", "1a", "S1", "S1a"
305
+ const ref = parseRefNumber(trimmed);
306
+ results.push(ref);
307
+ lastFullRef = { num: ref.num, isSupp: ref.isSupp };
308
+ }
309
+ }
310
+ }
311
+
312
+ return results;
313
+ }
314
+
315
+ /**
316
+ * Build a registry of figure/table labels from .md files
317
+ * Scans for {#fig:label} and {#tbl:label} anchors
318
+ *
319
+ * IMPORTANT: This function requires either explicit sections or a rev.yaml/sections.yaml config.
320
+ * It will NOT guess by scanning all .md files, as this leads to incorrect numbering
321
+ * when temporary files (paper_clean.md, etc.) exist in the directory.
322
+ */
323
+ export function buildRegistry(directory: string, sections?: string[]): Registry {
324
+ if (typeof directory !== 'string') {
325
+ throw new TypeError(`directory must be a string, got ${typeof directory}`);
326
+ }
327
+
328
+ const figures = new Map<string, FigureInfo>();
329
+ const tables = new Map<string, FigureInfo>();
330
+ const equations = new Map<string, FigureInfo>();
331
+
332
+ // Counters for numbering (separate for main and supplementary)
333
+ let figNum = 0;
334
+ let figSuppNum = 0;
335
+ let tblNum = 0;
336
+ let tblSuppNum = 0;
337
+ let eqNum = 0;
338
+
339
+ let orderedFiles: string[];
340
+
341
+ if (Array.isArray(sections) && sections.length > 0) {
342
+ // Use explicitly provided section files - most reliable
343
+ orderedFiles = sections.filter((f) => fs.existsSync(path.join(directory, f)));
344
+ } else {
345
+ // Try to determine sections from config files (rev.yaml or sections.yaml)
346
+ orderedFiles = discoverSectionFiles(directory);
347
+ // If no config found, return empty registry rather than guessing
348
+ // This prevents bugs from scanning wrong files
349
+ }
350
+
351
+ // Determine if a file is supplementary
352
+ const isSupplementary = (filename: string): boolean =>
353
+ filename.toLowerCase().includes('supp') || filename.toLowerCase().includes('appendix');
354
+
355
+ // Process each file in order
356
+ for (const file of orderedFiles) {
357
+ const filePath = path.join(directory, file);
358
+ const content = fs.readFileSync(filePath, 'utf-8');
359
+ const isSupp = isSupplementary(file);
360
+
361
+ // Find all anchors
362
+ let match: RegExpExecArray | null;
363
+ ANCHOR_PATTERN.lastIndex = 0;
364
+ while ((match = ANCHOR_PATTERN.exec(content)) !== null) {
365
+ const typeRaw = match[1];
366
+ const labelRaw = match[2];
367
+ if (!typeRaw || !labelRaw) continue;
368
+
369
+ const type = typeRaw.toLowerCase();
370
+ const label = labelRaw;
371
+
372
+ if (type === 'fig') {
373
+ if (isSupp) {
374
+ figSuppNum++;
375
+ figures.set(label, { label, num: figSuppNum, isSupp: true, file });
376
+ } else {
377
+ figNum++;
378
+ figures.set(label, { label, num: figNum, isSupp: false, file });
379
+ }
380
+ } else if (type === 'tbl') {
381
+ if (isSupp) {
382
+ tblSuppNum++;
383
+ tables.set(label, { label, num: tblSuppNum, isSupp: true, file });
384
+ } else {
385
+ tblNum++;
386
+ tables.set(label, { label, num: tblNum, isSupp: false, file });
387
+ }
388
+ } else if (type === 'eq') {
389
+ eqNum++;
390
+ equations.set(label, { label, num: eqNum, isSupp: false, file });
391
+ }
392
+ }
393
+ }
394
+
395
+ // Build reverse lookup: number → label
396
+ const byNumber: Registry['byNumber'] = {
397
+ fig: new Map(),
398
+ figS: new Map(),
399
+ tbl: new Map(),
400
+ tblS: new Map(),
401
+ eq: new Map(),
402
+ };
403
+
404
+ for (const [label, info] of figures) {
405
+ const key = info.isSupp ? 'figS' : 'fig';
406
+ byNumber[key].set(info.num, label);
407
+ }
408
+ for (const [label, info] of tables) {
409
+ const key = info.isSupp ? 'tblS' : 'tbl';
410
+ byNumber[key].set(info.num, label);
411
+ }
412
+ for (const [label, info] of equations) {
413
+ byNumber.eq.set(info.num, label);
414
+ }
415
+
416
+ return { figures, tables, equations, byNumber };
417
+ }
418
+
419
+ /**
420
+ * Get the display string for a label (e.g., "Figure 1", "Table S2")
421
+ */
422
+ export function labelToDisplay(
423
+ type: 'fig' | 'tbl' | 'eq',
424
+ label: string,
425
+ registry: Registry
426
+ ): string | null {
427
+ if (!registry || !registry.figures) return null;
428
+
429
+ const collection =
430
+ type === 'fig' ? registry.figures : type === 'tbl' ? registry.tables : registry.equations;
431
+
432
+ const info = collection.get(label);
433
+ if (!info) return null;
434
+
435
+ const prefix = type === 'fig' ? 'Figure' : type === 'tbl' ? 'Table' : 'Equation';
436
+ const numStr = info.isSupp ? `S${info.num}` : `${info.num}`;
437
+
438
+ return `${prefix} ${numStr}`;
439
+ }
440
+
441
+ /**
442
+ * Get the label for a display number (e.g., "fig:heatmap" from Figure 1)
443
+ */
444
+ export function numberToLabel(
445
+ type: 'fig' | 'tbl' | 'eq',
446
+ num: number,
447
+ isSupp: boolean,
448
+ registry: Registry
449
+ ): string | null {
450
+ if (!registry || !registry.byNumber) return null;
451
+
452
+ const key = isSupp ? (`${type}S` as keyof Registry['byNumber']) : type;
453
+ return registry.byNumber[key]?.get(num) || null;
454
+ }
455
+
456
+ /**
457
+ * Detect all hardcoded references in text
458
+ */
459
+ export function detectHardcodedRefs(text: string): DetectedRef[] {
460
+ if (typeof text !== 'string') {
461
+ throw new TypeError(`text must be a string, got ${typeof text}`);
462
+ }
463
+
464
+ const refs: DetectedRef[] = [];
465
+
466
+ for (const [type, pattern] of Object.entries(DETECTION_PATTERNS)) {
467
+ pattern.lastIndex = 0;
468
+ let match: RegExpExecArray | null;
469
+
470
+ while ((match = pattern.exec(text)) !== null) {
471
+ // Pattern groups:
472
+ // [1] = type prefix (Figure, Fig., etc.)
473
+ // [2] = reference list string (e.g., "1, 2, and 3" or "1a-3b")
474
+
475
+ const listStr = match[2];
476
+ if (!listStr) continue;
477
+ const numbers = parseReferenceList(listStr);
478
+
479
+ // Skip if no valid numbers were parsed
480
+ if (numbers.length === 0) continue;
481
+
482
+ refs.push({
483
+ type: normalizeType(type) as 'fig' | 'tbl' | 'eq',
484
+ match: match[0],
485
+ numbers,
486
+ position: match.index,
487
+ });
488
+ }
489
+ }
490
+
491
+ // Sort by position
492
+ refs.sort((a, b) => a.position - b.position);
493
+ return refs;
494
+ }
495
+
496
+ /**
497
+ * Convert hardcoded references to @-style references
498
+ */
499
+ export function convertHardcodedRefs(text: string, registry: Registry): ConversionResult {
500
+ // Input validation delegated to detectHardcodedRefs
501
+ const refs = detectHardcodedRefs(text);
502
+ const conversions: Array<{ from: string; to: string }> = [];
503
+ const warnings: string[] = [];
504
+
505
+ // Process in reverse order to preserve positions
506
+ let result = text;
507
+ for (let i = refs.length - 1; i >= 0; i--) {
508
+ const ref = refs[i];
509
+ if (!ref) continue;
510
+
511
+ // Build replacement
512
+ const labels: string[] = [];
513
+ for (const { num, isSupp } of ref.numbers) {
514
+ const label = numberToLabel(ref.type, num, isSupp, registry);
515
+ if (label) {
516
+ labels.push(`@${ref.type}:${label}`);
517
+ } else {
518
+ const displayNum = isSupp ? `S${num}` : `${num}`;
519
+ warnings.push(`Unknown reference: ${ref.type} ${displayNum} (no matching label)`);
520
+ labels.push(ref.match); // Keep original if no match
521
+ }
522
+ }
523
+
524
+ if (labels.length > 0 && !labels.includes(ref.match)) {
525
+ const replacement = labels.join('; ');
526
+
527
+ // Skip if the @-syntax already appears in the preceding text
528
+ // This prevents duplication when import restores @fig:x and then we see "Fig. 1"
529
+ // e.g., "@fig:map@fig:map{++@fig:map++}" or "@fig:mapFigure 1" patterns
530
+ const textBefore = result.slice(Math.max(0, ref.position - REF_CONTEXT_WINDOW), ref.position);
531
+ const alreadyHasRef = labels.some((label) => textBefore.includes(label));
532
+ if (alreadyHasRef) {
533
+ continue; // Skip - ref already present nearby
534
+ }
535
+
536
+ result =
537
+ result.slice(0, ref.position) + replacement + result.slice(ref.position + ref.match.length);
538
+
539
+ conversions.push({
540
+ from: ref.match,
541
+ to: replacement,
542
+ });
543
+ }
544
+ }
545
+
546
+ return { converted: result, conversions, warnings };
547
+ }
548
+
549
+ /**
550
+ * Detect @-style references in text
551
+ */
552
+ export function detectDynamicRefs(text: string): DynamicRef[] {
553
+ if (typeof text !== 'string') {
554
+ throw new TypeError(`text must be a string, got ${typeof text}`);
555
+ }
556
+
557
+ const refs: DynamicRef[] = [];
558
+ REF_PATTERN.lastIndex = 0;
559
+ let match: RegExpExecArray | null;
560
+
561
+ while ((match = REF_PATTERN.exec(text)) !== null) {
562
+ const type = match[1];
563
+ const label = match[2];
564
+ if (!type || !label) continue;
565
+ refs.push({
566
+ type: type as 'fig' | 'tbl' | 'eq',
567
+ label: label,
568
+ match: match[0],
569
+ position: match.index,
570
+ });
571
+ }
572
+
573
+ return refs;
574
+ }
575
+
576
+ /**
577
+ * Get reference status for a file/text
578
+ */
579
+ export function getRefStatus(text: string, registry: Registry): RefStatus {
580
+ const dynamic = detectDynamicRefs(text);
581
+ const hardcoded = detectHardcodedRefs(text) as HardcodedRef[];
582
+
583
+ // Count anchors in this text
584
+ ANCHOR_PATTERN.lastIndex = 0;
585
+ let figCount = 0,
586
+ tblCount = 0,
587
+ eqCount = 0;
588
+ let match: RegExpExecArray | null;
589
+ while ((match = ANCHOR_PATTERN.exec(text)) !== null) {
590
+ const type = match[1];
591
+ if (!type) continue;
592
+ if (type === 'fig') figCount++;
593
+ else if (type === 'tbl') tblCount++;
594
+ else if (type === 'eq') eqCount++;
595
+ }
596
+
597
+ return {
598
+ dynamic,
599
+ hardcoded,
600
+ anchors: { figures: figCount, tables: tblCount, equations: eqCount },
601
+ };
602
+ }
603
+
604
+ /**
605
+ * Detect forward references in combined text
606
+ * A forward reference is a @ref that appears before its {#anchor} definition
607
+ */
608
+ export function detectForwardRefs(text: string): {
609
+ forwardRefs: Array<{ type: string; label: string; match: string; position: number }>;
610
+ anchorPositions: Map<string, number>;
611
+ } {
612
+ // Build map of anchor positions: "fig:label" -> position
613
+ const anchorPositions = new Map<string, number>();
614
+ ANCHOR_PATTERN.lastIndex = 0;
615
+ let match: RegExpExecArray | null;
616
+ while ((match = ANCHOR_PATTERN.exec(text)) !== null) {
617
+ const type = match[1];
618
+ const label = match[2];
619
+ if (!type || !label) continue;
620
+ const key = `${type}:${label}`;
621
+ // Only store first occurrence (in case of duplicates)
622
+ if (!anchorPositions.has(key)) {
623
+ anchorPositions.set(key, match.index);
624
+ }
625
+ }
626
+
627
+ // Find all references
628
+ const refs = detectDynamicRefs(text);
629
+
630
+ // Filter to only forward references
631
+ const forwardRefs = refs.filter((ref) => {
632
+ const key = `${ref.type}:${ref.label}`;
633
+ const anchorPos = anchorPositions.get(key);
634
+ // Forward ref if anchor doesn't exist or appears after the reference
635
+ return anchorPos === undefined || ref.position < anchorPos;
636
+ });
637
+
638
+ return { forwardRefs, anchorPositions };
639
+ }
640
+
641
+ /**
642
+ * Resolve forward references to display format
643
+ * Only resolves refs that appear before their anchor definition
644
+ * Leaves other refs for pandoc-crossref to handle (preserves clickable links)
645
+ */
646
+ export function resolveForwardRefs(
647
+ text: string,
648
+ registry: Registry
649
+ ): {
650
+ text: string;
651
+ resolved: Array<{ from: string; to: string; position: number }>;
652
+ unresolved: Array<{ ref: string; position: number }>;
653
+ } {
654
+ const { forwardRefs } = detectForwardRefs(text);
655
+ const resolved: Array<{ from: string; to: string; position: number }> = [];
656
+ const unresolved: Array<{ ref: string; position: number }> = [];
657
+
658
+ // Process in reverse order to preserve positions
659
+ let result = text;
660
+ for (let i = forwardRefs.length - 1; i >= 0; i--) {
661
+ const ref = forwardRefs[i];
662
+ if (!ref) continue;
663
+ const display = labelToDisplay(ref.type as 'fig' | 'tbl' | 'eq', ref.label, registry);
664
+
665
+ if (display) {
666
+ result =
667
+ result.slice(0, ref.position) + display + result.slice(ref.position + ref.match.length);
668
+ resolved.push({
669
+ from: ref.match,
670
+ to: display,
671
+ position: ref.position,
672
+ });
673
+ } else {
674
+ unresolved.push({
675
+ ref: ref.match,
676
+ position: ref.position,
677
+ });
678
+ }
679
+ }
680
+
681
+ return { text: result, resolved, unresolved };
682
+ }
683
+
684
+ /**
685
+ * Resolve ALL supplementary references and strip supplementary anchor labels.
686
+ *
687
+ * pandoc-crossref cannot produce "Figure S1" numbering — it numbers all figures
688
+ * sequentially. This function resolves every @fig:label / @tbl:label that points
689
+ * to a supplementary item to plain text ("Figure S1", "Table S1") and removes
690
+ * the {#fig:label} / {#tbl:label} attributes so pandoc-crossref ignores them.
691
+ */
692
+ export function resolveSupplementaryRefs(
693
+ text: string,
694
+ registry: Registry
695
+ ): {
696
+ text: string;
697
+ resolved: Array<{ from: string; to: string }>;
698
+ } {
699
+ const resolved: Array<{ from: string; to: string }> = [];
700
+ let result = text;
701
+
702
+ // Collect supplementary labels
703
+ const suppLabels = new Set<string>();
704
+ for (const [label, info] of registry.figures) {
705
+ if (info.isSupp) suppLabels.add(`fig:${label}`);
706
+ }
707
+ for (const [label, info] of registry.tables) {
708
+ if (info.isSupp) suppLabels.add(`tbl:${label}`);
709
+ }
710
+
711
+ if (suppLabels.size === 0) return { text: result, resolved };
712
+
713
+ // 1. Replace all @fig:label / @tbl:label references to supplementary items
714
+ const refs = detectDynamicRefs(result);
715
+ // Process in reverse to preserve positions
716
+ for (let i = refs.length - 1; i >= 0; i--) {
717
+ const ref = refs[i];
718
+ if (!ref) continue;
719
+ const key = `${ref.type}:${ref.label}`;
720
+ if (!suppLabels.has(key)) continue;
721
+
722
+ const display = labelToDisplay(ref.type as 'fig' | 'tbl' | 'eq', ref.label, registry);
723
+ if (display) {
724
+ result =
725
+ result.slice(0, ref.position) + display + result.slice(ref.position + ref.match.length);
726
+ resolved.push({ from: ref.match, to: display });
727
+ }
728
+ }
729
+
730
+ // 2. Strip {#fig:label} and {#tbl:label} attributes from supplementary anchors
731
+ // so pandoc-crossref does not re-number them
732
+ for (const key of suppLabels) {
733
+ // Match {#fig:label ...} or just {#fig:label}
734
+ const escaped = key.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
735
+ const pattern = new RegExp(`\\{#${escaped}(?:\\s[^}]*)?\\}`, 'g');
736
+ result = result.replace(pattern, (match) => {
737
+ resolved.push({ from: match, to: '(stripped)' });
738
+ return '';
739
+ });
740
+ }
741
+
742
+ return { text: result, resolved };
743
+ }
744
+
745
+ /**
746
+ * Format registry for display
747
+ */
748
+ export function formatRegistry(registry: Registry): string {
749
+ const lines: string[] = [];
750
+
751
+ if (registry.figures.size > 0) {
752
+ lines.push('Figures:');
753
+ for (const [label, info] of registry.figures) {
754
+ const num = info.isSupp ? `S${info.num}` : info.num;
755
+ lines.push(` Figure ${num}: @fig:${label} (${info.file})`);
756
+ }
757
+ }
758
+
759
+ if (registry.tables.size > 0) {
760
+ if (lines.length > 0) lines.push('');
761
+ lines.push('Tables:');
762
+ for (const [label, info] of registry.tables) {
763
+ const num = info.isSupp ? `S${info.num}` : info.num;
764
+ lines.push(` Table ${num}: @tbl:${label} (${info.file})`);
765
+ }
766
+ }
767
+
768
+ if (registry.equations.size > 0) {
769
+ if (lines.length > 0) lines.push('');
770
+ lines.push('Equations:');
771
+ for (const [label, info] of registry.equations) {
772
+ lines.push(` Equation ${info.num}: @eq:${label} (${info.file})`);
773
+ }
774
+ }
775
+
776
+ if (lines.length === 0) {
777
+ lines.push('No figure/table anchors found.');
778
+ }
779
+
780
+ return lines.join('\n');
781
+ }