docrev 0.9.11 → 0.9.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. package/.claude/settings.local.json +9 -9
  2. package/.gitattributes +1 -1
  3. package/CHANGELOG.md +149 -149
  4. package/PLAN-tables-and-postprocess.md +850 -850
  5. package/README.md +391 -391
  6. package/bin/rev.js +11 -11
  7. package/bin/rev.ts +145 -145
  8. package/completions/rev.bash +127 -127
  9. package/completions/rev.ps1 +210 -210
  10. package/completions/rev.zsh +207 -207
  11. package/dev_notes/stress2/build_adversarial.ts +186 -186
  12. package/dev_notes/stress2/drift_matcher.ts +62 -62
  13. package/dev_notes/stress2/probe_anchors.ts +35 -35
  14. package/dev_notes/stress2/project/discussion.before.md +3 -3
  15. package/dev_notes/stress2/project/discussion.md +3 -3
  16. package/dev_notes/stress2/project/methods.before.md +20 -20
  17. package/dev_notes/stress2/project/methods.md +20 -20
  18. package/dev_notes/stress2/project/rev.yaml +5 -5
  19. package/dev_notes/stress2/project/sections.yaml +4 -4
  20. package/dev_notes/stress2/sections.yaml +5 -5
  21. package/dev_notes/stress2/trace_placement.ts +50 -50
  22. package/dev_notes/stresstest_boundaries.ts +27 -27
  23. package/dev_notes/stresstest_drift_apply.ts +43 -43
  24. package/dev_notes/stresstest_drift_compare.ts +43 -43
  25. package/dev_notes/stresstest_drift_v2.ts +54 -54
  26. package/dev_notes/stresstest_inspect.ts +54 -54
  27. package/dev_notes/stresstest_pstyle.ts +55 -55
  28. package/dev_notes/stresstest_section_debug.ts +23 -23
  29. package/dev_notes/stresstest_split.ts +70 -70
  30. package/dev_notes/stresstest_trace.ts +19 -19
  31. package/dev_notes/stresstest_verify_no_overwrite.ts +40 -40
  32. package/dist/lib/build.d.ts +50 -1
  33. package/dist/lib/build.d.ts.map +1 -1
  34. package/dist/lib/build.js +80 -30
  35. package/dist/lib/build.js.map +1 -1
  36. package/dist/lib/commands/build.d.ts.map +1 -1
  37. package/dist/lib/commands/build.js +38 -5
  38. package/dist/lib/commands/build.js.map +1 -1
  39. package/dist/lib/commands/utilities.js +164 -164
  40. package/dist/lib/commands/word-tools.js +8 -8
  41. package/dist/lib/grammar.js +3 -3
  42. package/dist/lib/import.d.ts.map +1 -1
  43. package/dist/lib/import.js +146 -24
  44. package/dist/lib/import.js.map +1 -1
  45. package/dist/lib/pdf-comments.js +44 -44
  46. package/dist/lib/plugins.js +57 -57
  47. package/dist/lib/pptx-themes.js +115 -115
  48. package/dist/lib/spelling.js +2 -2
  49. package/dist/lib/templates.js +387 -387
  50. package/dist/lib/themes.js +51 -51
  51. package/dist/lib/types.d.ts +20 -0
  52. package/dist/lib/types.d.ts.map +1 -1
  53. package/dist/lib/word-extraction.d.ts +6 -0
  54. package/dist/lib/word-extraction.d.ts.map +1 -1
  55. package/dist/lib/word-extraction.js +46 -3
  56. package/dist/lib/word-extraction.js.map +1 -1
  57. package/dist/lib/wordcomments.d.ts.map +1 -1
  58. package/dist/lib/wordcomments.js +23 -5
  59. package/dist/lib/wordcomments.js.map +1 -1
  60. package/eslint.config.js +27 -27
  61. package/lib/anchor-match.ts +276 -276
  62. package/lib/annotations.ts +644 -644
  63. package/lib/build.ts +1300 -1227
  64. package/lib/citations.ts +160 -160
  65. package/lib/commands/build.ts +833 -801
  66. package/lib/commands/citations.ts +515 -515
  67. package/lib/commands/comments.ts +1050 -1050
  68. package/lib/commands/context.ts +174 -174
  69. package/lib/commands/core.ts +309 -309
  70. package/lib/commands/doi.ts +435 -435
  71. package/lib/commands/file-ops.ts +372 -372
  72. package/lib/commands/history.ts +320 -320
  73. package/lib/commands/index.ts +87 -87
  74. package/lib/commands/init.ts +259 -259
  75. package/lib/commands/merge-resolve.ts +378 -378
  76. package/lib/commands/preview.ts +178 -178
  77. package/lib/commands/project-info.ts +244 -244
  78. package/lib/commands/quality.ts +517 -517
  79. package/lib/commands/response.ts +454 -454
  80. package/lib/commands/section-boundaries.ts +82 -82
  81. package/lib/commands/sections.ts +451 -451
  82. package/lib/commands/sync.ts +706 -706
  83. package/lib/commands/text-ops.ts +449 -449
  84. package/lib/commands/utilities.ts +448 -448
  85. package/lib/commands/verify-anchors.ts +272 -272
  86. package/lib/commands/word-tools.ts +340 -340
  87. package/lib/comment-realign.ts +517 -517
  88. package/lib/config.ts +84 -84
  89. package/lib/crossref.ts +781 -781
  90. package/lib/csl.ts +191 -191
  91. package/lib/dependencies.ts +98 -98
  92. package/lib/diff-engine.ts +465 -465
  93. package/lib/doi-cache.ts +115 -115
  94. package/lib/doi.ts +897 -897
  95. package/lib/equations.ts +506 -506
  96. package/lib/errors.ts +346 -346
  97. package/lib/format.ts +541 -541
  98. package/lib/git.ts +326 -326
  99. package/lib/grammar.ts +303 -303
  100. package/lib/image-registry.ts +180 -180
  101. package/lib/import.ts +911 -792
  102. package/lib/journals.ts +543 -543
  103. package/lib/merge.ts +633 -633
  104. package/lib/orcid.ts +144 -144
  105. package/lib/pdf-comments.ts +263 -263
  106. package/lib/pdf-import.ts +524 -524
  107. package/lib/plugins.ts +362 -362
  108. package/lib/postprocess.ts +188 -188
  109. package/lib/pptx-color-filter.lua +37 -37
  110. package/lib/pptx-template.ts +469 -469
  111. package/lib/pptx-themes.ts +483 -483
  112. package/lib/protect-restore.ts +520 -520
  113. package/lib/rate-limiter.ts +94 -94
  114. package/lib/response.ts +197 -197
  115. package/lib/restore-references.ts +240 -240
  116. package/lib/review.ts +327 -327
  117. package/lib/schema.ts +417 -417
  118. package/lib/scientific-words.ts +73 -73
  119. package/lib/sections.ts +335 -335
  120. package/lib/slides.ts +756 -756
  121. package/lib/spelling.ts +334 -334
  122. package/lib/templates.ts +526 -526
  123. package/lib/themes.ts +742 -742
  124. package/lib/trackchanges.ts +247 -247
  125. package/lib/tui.ts +450 -450
  126. package/lib/types.ts +550 -530
  127. package/lib/undo.ts +250 -250
  128. package/lib/utils.ts +69 -69
  129. package/lib/variables.ts +179 -179
  130. package/lib/word-extraction.ts +806 -759
  131. package/lib/word.ts +643 -643
  132. package/lib/wordcomments.ts +817 -798
  133. package/package.json +137 -137
  134. package/scripts/postbuild.js +28 -28
  135. package/skill/REFERENCE.md +431 -431
  136. package/skill/SKILL.md +258 -258
  137. package/tsconfig.json +26 -26
  138. package/types/index.d.ts +525 -525
package/lib/crossref.ts CHANGED
@@ -1,781 +1,781 @@
1
- /**
2
- * Cross-reference handling - dynamic figure/table references
3
- *
4
- * Enables:
5
- * - @fig:label syntax in source (auto-numbered)
6
- * - Conversion to "Figure 1" in Word output
7
- * - Auto-conversion back during import
8
- */
9
-
10
- import * as fs from 'fs';
11
- import * as path from 'path';
12
- import YAML from 'yaml';
13
- import type {
14
- RefNumber,
15
- HardcodedRef,
16
- DynamicRef,
17
- FigureInfo,
18
- Registry,
19
- RefStatus,
20
- ConversionResult,
21
- } from './types.js';
22
-
23
- // =============================================================================
24
- // Constants
25
- // =============================================================================
26
-
27
- /** Characters of context to check before a reference for deduplication */
28
- const REF_CONTEXT_WINDOW = 100;
29
-
30
- /** Minimum word length for similarity calculations */
31
- const MIN_WORD_LENGTH = 2;
32
-
33
- // =============================================================================
34
- // Type Definitions (Internal)
35
- // =============================================================================
36
-
37
- /**
38
- * Reference info (internal use in registry building)
39
- */
40
- interface RefInfo {
41
- label: string;
42
- num: number;
43
- isSupp: boolean;
44
- file: string;
45
- }
46
-
47
- /**
48
- * Parsed reference number components
49
- */
50
- interface ParsedRefNumber {
51
- isSupp: boolean;
52
- num: number;
53
- suffix: string | null;
54
- }
55
-
56
- /**
57
- * Detected reference with parsed numbers
58
- */
59
- interface DetectedRef {
60
- type: 'fig' | 'tbl' | 'eq';
61
- match: string;
62
- numbers: ParsedRefNumber[];
63
- position: number;
64
- }
65
-
66
- // =============================================================================
67
- // Internal Helpers
68
- // =============================================================================
69
-
70
- /**
71
- * Discover section files from a directory by reading config files
72
- * Only returns files explicitly defined in rev.yaml or sections.yaml
73
- * Returns empty array if no config found (caller should handle this)
74
- */
75
- function discoverSectionFiles(directory: string): string[] {
76
- // Try rev.yaml first
77
- const revYamlPath = path.join(directory, 'rev.yaml');
78
- if (fs.existsSync(revYamlPath)) {
79
- try {
80
- const config = YAML.parse(fs.readFileSync(revYamlPath, 'utf-8'));
81
- if (config.sections && Array.isArray(config.sections) && config.sections.length > 0) {
82
- return config.sections.filter((f: string) => fs.existsSync(path.join(directory, f)));
83
- }
84
- } catch (e) {
85
- if (process.env.DEBUG) {
86
- console.warn('crossref: YAML parse error in rev.yaml:', (e as Error).message);
87
- }
88
- }
89
- }
90
-
91
- // Try sections.yaml
92
- const sectionsPath = path.join(directory, 'sections.yaml');
93
- if (fs.existsSync(sectionsPath)) {
94
- try {
95
- const config = YAML.parse(fs.readFileSync(sectionsPath, 'utf-8'));
96
- if (config.sections) {
97
- const sectionOrder = Object.entries(config.sections)
98
- .sort((a, b) => ((a[1] as any).order ?? 999) - ((b[1] as any).order ?? 999))
99
- .map(([file]) => file);
100
- return sectionOrder.filter((f) => fs.existsSync(path.join(directory, f)));
101
- }
102
- } catch (e) {
103
- if (process.env.DEBUG) {
104
- console.warn('crossref: YAML parse error in sections.yaml:', (e as Error).message);
105
- }
106
- }
107
- }
108
-
109
- // No config found - return empty array
110
- // Caller must handle this (either error or use explicit sections)
111
- return [];
112
- }
113
-
114
- // =============================================================================
115
- // Detection Patterns
116
- // =============================================================================
117
-
118
- /**
119
- * Patterns for detecting hardcoded references
120
- * Matches complex patterns including:
121
- * - Simple: "Figure 1", "Fig. 2a", "Table S1"
122
- * - Ranges: "Figures 1-3", "Fig. 1a-c", "Figs. 1a-3b"
123
- * - Lists: "Figures 1, 2, and 3", "Fig. 1a, b, c", "Tables 1 & 2"
124
- * - Mixed: "Figs. 1, 3-5, and 7"
125
- *
126
- * Uses a simpler base pattern and parses the full match for lists
127
- */
128
- const DETECTION_PATTERNS: Record<string, RegExp> = {
129
- // Captures the full reference including lists with "and"
130
- // Group 1: type prefix (Figure, Fig., etc.)
131
- // Group 2: reference list (parsed by parseReferenceList())
132
- // Matches: "1", "1a", "1-3", "1a-c", "1, 2, 3", "1 and 2", "1, 2 and 3", "1, 2, and 3"
133
- // Separator: comma/dash/ampersand, optionally followed by "and"
134
- // Standalone letters must be followed by separator, punctuation, or word boundary
135
- // Also handles: "see Figure 1", "(Fig. 1)", "in Figures 1–3"
136
- // Note: 'gi' flag makes these case-insensitive, so "figure 1" is also matched
137
- figure: /\b(Figures?|Figs?\.?)\s+((?:\d+|S\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+|S\d+)[a-z]?|[a-z]\b))*)/gi,
138
-
139
- table: /\b(Tables?|Tabs?\.?)\s+((?:\d+|S\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+|S\d+)[a-z]?|[a-z]\b))*)/gi,
140
-
141
- equation: /\b(Equations?|Eqs?\.?)\s+((?:\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+)[a-z]?|[a-z]\b))*)/gi,
142
- };
143
-
144
- /**
145
- * Patterns to EXCLUDE from detection (false positives)
146
- * These look like references but aren't (e.g., "Table of Contents", "Figure skating")
147
- */
148
- const EXCLUSION_PATTERNS = [
149
- /\bTable\s+of\s+Contents?\b/gi,
150
- /\bFigure\s+skating\b/gi,
151
- /\bFigure\s+out\b/gi,
152
- /\bFigure\s+it\b/gi,
153
- /\bTable\s+setting/gi,
154
- /\bEquation\s+editor\b/gi,
155
- ];
156
-
157
- /**
158
- * Pattern for extracting anchors from markdown: {#fig:label}, {#tbl:label}
159
- */
160
- const ANCHOR_PATTERN = /\{#(fig|tbl|eq):([a-zA-Z0-9_-]+)/gi;
161
-
162
- /**
163
- * Pattern for @-style references: @fig:label, @tbl:label
164
- */
165
- const REF_PATTERN = /@(fig|tbl|eq):([a-zA-Z0-9_-]+)/gi;
166
-
167
- // =============================================================================
168
- // Public API
169
- // =============================================================================
170
-
171
- /**
172
- * Normalize a reference type to standard form
173
- */
174
- export function normalizeType(typeStr: string): 'fig' | 'tbl' | 'eq' | string {
175
- if (typeof typeStr !== 'string') {
176
- throw new TypeError(`typeStr must be a string, got ${typeof typeStr}`);
177
- }
178
- const lower = typeStr.toLowerCase().replace(/\.$/, '');
179
- if (lower.startsWith('fig')) return 'fig';
180
- if (lower.startsWith('tab')) return 'tbl';
181
- if (lower.startsWith('eq')) return 'eq';
182
- return lower;
183
- }
184
-
185
- /**
186
- * Parse a reference number, handling supplementary (S1, S2) and letter suffixes (1a, 1b)
187
- */
188
- export function parseRefNumber(numStr: string, suffix: string | null = null): ParsedRefNumber {
189
- if (!numStr || typeof numStr !== 'string') {
190
- return { isSupp: false, num: 0, suffix: suffix || null };
191
- }
192
- const isSupp = numStr.toUpperCase().startsWith('S');
193
- const numPart = isSupp ? numStr.slice(1) : numStr;
194
- // Extract suffix if embedded in numStr (e.g., "1a")
195
- const match = numPart.match(/^(\d+)([a-z])?$/i);
196
- const num = match && match[1] ? parseInt(match[1], 10) : parseInt(numPart, 10);
197
- const extractedSuffix = suffix || (match && match[2]) || null;
198
- return { isSupp, num, suffix: extractedSuffix ? extractedSuffix.toLowerCase() : null };
199
- }
200
-
201
- /**
202
- * Parse a reference list string like "1, 2, and 3" or "1a-c" or "1a-3b"
203
- * Returns an array of {num, isSupp, suffix} objects
204
- */
205
- export function parseReferenceList(listStr: string): ParsedRefNumber[] {
206
- const results: ParsedRefNumber[] = [];
207
- if (!listStr || typeof listStr !== 'string') return results;
208
-
209
- // Normalize: replace "and" with comma, normalize dashes
210
- let normalized = listStr
211
- .replace(/\s+and\s+/gi, ', ')
212
- .replace(/[–—]/g, '-') // en-dash, em-dash → hyphen
213
- .replace(/&/g, ', '); // & → comma
214
-
215
- // Split by comma (but not by dash, which indicates ranges)
216
- const parts = normalized.split(/\s*,\s*/).filter((p) => p.trim());
217
-
218
- let lastFullRef: { num: number; isSupp: boolean } | null = null; // Track the last full reference for implicit prefixes
219
-
220
- for (const part of parts) {
221
- const trimmed = part.trim();
222
- if (!trimmed) continue;
223
-
224
- // Check if this is a range (contains -)
225
- if (trimmed.includes('-')) {
226
- const parts = trimmed.split('-').map((s) => s.trim());
227
- const start = parts[0] || '';
228
- const end = parts[1] || '';
229
-
230
- // Check if end is just a letter (e.g., "1a-c" where end is "c")
231
- const endIsLetterOnly = /^[a-z]$/i.test(end);
232
-
233
- const startRef = parseRefNumber(start);
234
- // For letter-only end, don't parse as number
235
- const endRef = endIsLetterOnly
236
- ? { num: startRef.num, isSupp: startRef.isSupp, suffix: end.toLowerCase() }
237
- : parseRefNumber(end);
238
-
239
- // Handle different range types:
240
- // 1. Suffix-only range on same number: "1a-c" → 1a, 1b, 1c
241
- // 2. Number range: "1-3" → 1, 2, 3
242
- // 3. Cross-number suffix range: "1a-3b" → 1a...1z, 2a...2z, 3a, 3b (limited)
243
-
244
- if (startRef.suffix && endRef.suffix && startRef.num !== endRef.num) {
245
- // Cross-number suffix range: "1a-3b"
246
- // For academic papers, limit intermediate figures to same suffix range
247
- // e.g., "1a-3b" typically means 1a, 1b, 2a, 2b, 3a, 3b
248
- const maxSuffix = Math.max(
249
- startRef.suffix.charCodeAt(0),
250
- endRef.suffix.charCodeAt(0)
251
- );
252
-
253
- for (let n = startRef.num; n <= endRef.num; n++) {
254
- const suffixStart =
255
- n === startRef.num ? startRef.suffix.charCodeAt(0) : 'a'.charCodeAt(0);
256
- const suffixEnd = n === endRef.num ? endRef.suffix.charCodeAt(0) : maxSuffix;
257
-
258
- for (let s = suffixStart; s <= suffixEnd; s++) {
259
- results.push({
260
- num: n,
261
- isSupp: startRef.isSupp,
262
- suffix: String.fromCharCode(s),
263
- });
264
- }
265
- }
266
- lastFullRef = { num: endRef.num, isSupp: startRef.isSupp };
267
- } else if (startRef.suffix || endRef.suffix) {
268
- // Suffix range on same number: "1a-c"
269
- const num: number = startRef.num !== 0 ? startRef.num : (lastFullRef ? lastFullRef.num : 1);
270
- const isSupp: boolean = startRef.isSupp ? startRef.isSupp : (lastFullRef ? lastFullRef.isSupp : false);
271
- const startCode = (startRef.suffix || 'a').charCodeAt(0);
272
- const endCode = (endRef.suffix || 'a').charCodeAt(0);
273
-
274
- for (let code = startCode; code <= endCode; code++) {
275
- results.push({
276
- num,
277
- isSupp,
278
- suffix: String.fromCharCode(code),
279
- });
280
- }
281
- lastFullRef = { num, isSupp };
282
- } else {
283
- // Pure number range: "1-3"
284
- for (let n = startRef.num; n <= endRef.num; n++) {
285
- results.push({
286
- num: n,
287
- isSupp: startRef.isSupp,
288
- suffix: null,
289
- });
290
- }
291
- lastFullRef = { num: endRef.num, isSupp: startRef.isSupp };
292
- }
293
- } else {
294
- // Single reference or implicit suffix
295
- // Check if it's just a letter (implicit prefix from previous number)
296
- if (/^[a-z]$/i.test(trimmed) && lastFullRef) {
297
- // Implicit prefix: "b" after "1a" means "1b"
298
- results.push({
299
- num: lastFullRef.num,
300
- isSupp: lastFullRef.isSupp,
301
- suffix: trimmed.toLowerCase(),
302
- });
303
- } else {
304
- // Full reference: "1", "1a", "S1", "S1a"
305
- const ref = parseRefNumber(trimmed);
306
- results.push(ref);
307
- lastFullRef = { num: ref.num, isSupp: ref.isSupp };
308
- }
309
- }
310
- }
311
-
312
- return results;
313
- }
314
-
315
- /**
316
- * Build a registry of figure/table labels from .md files
317
- * Scans for {#fig:label} and {#tbl:label} anchors
318
- *
319
- * IMPORTANT: This function requires either explicit sections or a rev.yaml/sections.yaml config.
320
- * It will NOT guess by scanning all .md files, as this leads to incorrect numbering
321
- * when temporary files (paper_clean.md, etc.) exist in the directory.
322
- */
323
- export function buildRegistry(directory: string, sections?: string[]): Registry {
324
- if (typeof directory !== 'string') {
325
- throw new TypeError(`directory must be a string, got ${typeof directory}`);
326
- }
327
-
328
- const figures = new Map<string, FigureInfo>();
329
- const tables = new Map<string, FigureInfo>();
330
- const equations = new Map<string, FigureInfo>();
331
-
332
- // Counters for numbering (separate for main and supplementary)
333
- let figNum = 0;
334
- let figSuppNum = 0;
335
- let tblNum = 0;
336
- let tblSuppNum = 0;
337
- let eqNum = 0;
338
-
339
- let orderedFiles: string[];
340
-
341
- if (Array.isArray(sections) && sections.length > 0) {
342
- // Use explicitly provided section files - most reliable
343
- orderedFiles = sections.filter((f) => fs.existsSync(path.join(directory, f)));
344
- } else {
345
- // Try to determine sections from config files (rev.yaml or sections.yaml)
346
- orderedFiles = discoverSectionFiles(directory);
347
- // If no config found, return empty registry rather than guessing
348
- // This prevents bugs from scanning wrong files
349
- }
350
-
351
- // Determine if a file is supplementary
352
- const isSupplementary = (filename: string): boolean =>
353
- filename.toLowerCase().includes('supp') || filename.toLowerCase().includes('appendix');
354
-
355
- // Process each file in order
356
- for (const file of orderedFiles) {
357
- const filePath = path.join(directory, file);
358
- const content = fs.readFileSync(filePath, 'utf-8');
359
- const isSupp = isSupplementary(file);
360
-
361
- // Find all anchors
362
- let match: RegExpExecArray | null;
363
- ANCHOR_PATTERN.lastIndex = 0;
364
- while ((match = ANCHOR_PATTERN.exec(content)) !== null) {
365
- const typeRaw = match[1];
366
- const labelRaw = match[2];
367
- if (!typeRaw || !labelRaw) continue;
368
-
369
- const type = typeRaw.toLowerCase();
370
- const label = labelRaw;
371
-
372
- if (type === 'fig') {
373
- if (isSupp) {
374
- figSuppNum++;
375
- figures.set(label, { label, num: figSuppNum, isSupp: true, file });
376
- } else {
377
- figNum++;
378
- figures.set(label, { label, num: figNum, isSupp: false, file });
379
- }
380
- } else if (type === 'tbl') {
381
- if (isSupp) {
382
- tblSuppNum++;
383
- tables.set(label, { label, num: tblSuppNum, isSupp: true, file });
384
- } else {
385
- tblNum++;
386
- tables.set(label, { label, num: tblNum, isSupp: false, file });
387
- }
388
- } else if (type === 'eq') {
389
- eqNum++;
390
- equations.set(label, { label, num: eqNum, isSupp: false, file });
391
- }
392
- }
393
- }
394
-
395
- // Build reverse lookup: number → label
396
- const byNumber: Registry['byNumber'] = {
397
- fig: new Map(),
398
- figS: new Map(),
399
- tbl: new Map(),
400
- tblS: new Map(),
401
- eq: new Map(),
402
- };
403
-
404
- for (const [label, info] of figures) {
405
- const key = info.isSupp ? 'figS' : 'fig';
406
- byNumber[key].set(info.num, label);
407
- }
408
- for (const [label, info] of tables) {
409
- const key = info.isSupp ? 'tblS' : 'tbl';
410
- byNumber[key].set(info.num, label);
411
- }
412
- for (const [label, info] of equations) {
413
- byNumber.eq.set(info.num, label);
414
- }
415
-
416
- return { figures, tables, equations, byNumber };
417
- }
418
-
419
- /**
420
- * Get the display string for a label (e.g., "Figure 1", "Table S2")
421
- */
422
- export function labelToDisplay(
423
- type: 'fig' | 'tbl' | 'eq',
424
- label: string,
425
- registry: Registry
426
- ): string | null {
427
- if (!registry || !registry.figures) return null;
428
-
429
- const collection =
430
- type === 'fig' ? registry.figures : type === 'tbl' ? registry.tables : registry.equations;
431
-
432
- const info = collection.get(label);
433
- if (!info) return null;
434
-
435
- const prefix = type === 'fig' ? 'Figure' : type === 'tbl' ? 'Table' : 'Equation';
436
- const numStr = info.isSupp ? `S${info.num}` : `${info.num}`;
437
-
438
- return `${prefix} ${numStr}`;
439
- }
440
-
441
- /**
442
- * Get the label for a display number (e.g., "fig:heatmap" from Figure 1)
443
- */
444
- export function numberToLabel(
445
- type: 'fig' | 'tbl' | 'eq',
446
- num: number,
447
- isSupp: boolean,
448
- registry: Registry
449
- ): string | null {
450
- if (!registry || !registry.byNumber) return null;
451
-
452
- const key = isSupp ? (`${type}S` as keyof Registry['byNumber']) : type;
453
- return registry.byNumber[key]?.get(num) || null;
454
- }
455
-
456
- /**
457
- * Detect all hardcoded references in text
458
- */
459
- export function detectHardcodedRefs(text: string): DetectedRef[] {
460
- if (typeof text !== 'string') {
461
- throw new TypeError(`text must be a string, got ${typeof text}`);
462
- }
463
-
464
- const refs: DetectedRef[] = [];
465
-
466
- for (const [type, pattern] of Object.entries(DETECTION_PATTERNS)) {
467
- pattern.lastIndex = 0;
468
- let match: RegExpExecArray | null;
469
-
470
- while ((match = pattern.exec(text)) !== null) {
471
- // Pattern groups:
472
- // [1] = type prefix (Figure, Fig., etc.)
473
- // [2] = reference list string (e.g., "1, 2, and 3" or "1a-3b")
474
-
475
- const listStr = match[2];
476
- if (!listStr) continue;
477
- const numbers = parseReferenceList(listStr);
478
-
479
- // Skip if no valid numbers were parsed
480
- if (numbers.length === 0) continue;
481
-
482
- refs.push({
483
- type: normalizeType(type) as 'fig' | 'tbl' | 'eq',
484
- match: match[0],
485
- numbers,
486
- position: match.index,
487
- });
488
- }
489
- }
490
-
491
- // Sort by position
492
- refs.sort((a, b) => a.position - b.position);
493
- return refs;
494
- }
495
-
496
- /**
497
- * Convert hardcoded references to @-style references
498
- */
499
- export function convertHardcodedRefs(text: string, registry: Registry): ConversionResult {
500
- // Input validation delegated to detectHardcodedRefs
501
- const refs = detectHardcodedRefs(text);
502
- const conversions: Array<{ from: string; to: string }> = [];
503
- const warnings: string[] = [];
504
-
505
- // Process in reverse order to preserve positions
506
- let result = text;
507
- for (let i = refs.length - 1; i >= 0; i--) {
508
- const ref = refs[i];
509
- if (!ref) continue;
510
-
511
- // Build replacement
512
- const labels: string[] = [];
513
- for (const { num, isSupp } of ref.numbers) {
514
- const label = numberToLabel(ref.type, num, isSupp, registry);
515
- if (label) {
516
- labels.push(`@${ref.type}:${label}`);
517
- } else {
518
- const displayNum = isSupp ? `S${num}` : `${num}`;
519
- warnings.push(`Unknown reference: ${ref.type} ${displayNum} (no matching label)`);
520
- labels.push(ref.match); // Keep original if no match
521
- }
522
- }
523
-
524
- if (labels.length > 0 && !labels.includes(ref.match)) {
525
- const replacement = labels.join('; ');
526
-
527
- // Skip if the @-syntax already appears in the preceding text
528
- // This prevents duplication when import restores @fig:x and then we see "Fig. 1"
529
- // e.g., "@fig:map@fig:map{++@fig:map++}" or "@fig:mapFigure 1" patterns
530
- const textBefore = result.slice(Math.max(0, ref.position - REF_CONTEXT_WINDOW), ref.position);
531
- const alreadyHasRef = labels.some((label) => textBefore.includes(label));
532
- if (alreadyHasRef) {
533
- continue; // Skip - ref already present nearby
534
- }
535
-
536
- result =
537
- result.slice(0, ref.position) + replacement + result.slice(ref.position + ref.match.length);
538
-
539
- conversions.push({
540
- from: ref.match,
541
- to: replacement,
542
- });
543
- }
544
- }
545
-
546
- return { converted: result, conversions, warnings };
547
- }
548
-
549
- /**
550
- * Detect @-style references in text
551
- */
552
- export function detectDynamicRefs(text: string): DynamicRef[] {
553
- if (typeof text !== 'string') {
554
- throw new TypeError(`text must be a string, got ${typeof text}`);
555
- }
556
-
557
- const refs: DynamicRef[] = [];
558
- REF_PATTERN.lastIndex = 0;
559
- let match: RegExpExecArray | null;
560
-
561
- while ((match = REF_PATTERN.exec(text)) !== null) {
562
- const type = match[1];
563
- const label = match[2];
564
- if (!type || !label) continue;
565
- refs.push({
566
- type: type as 'fig' | 'tbl' | 'eq',
567
- label: label,
568
- match: match[0],
569
- position: match.index,
570
- });
571
- }
572
-
573
- return refs;
574
- }
575
-
576
- /**
577
- * Get reference status for a file/text
578
- */
579
- export function getRefStatus(text: string, registry: Registry): RefStatus {
580
- const dynamic = detectDynamicRefs(text);
581
- const hardcoded = detectHardcodedRefs(text) as HardcodedRef[];
582
-
583
- // Count anchors in this text
584
- ANCHOR_PATTERN.lastIndex = 0;
585
- let figCount = 0,
586
- tblCount = 0,
587
- eqCount = 0;
588
- let match: RegExpExecArray | null;
589
- while ((match = ANCHOR_PATTERN.exec(text)) !== null) {
590
- const type = match[1];
591
- if (!type) continue;
592
- if (type === 'fig') figCount++;
593
- else if (type === 'tbl') tblCount++;
594
- else if (type === 'eq') eqCount++;
595
- }
596
-
597
- return {
598
- dynamic,
599
- hardcoded,
600
- anchors: { figures: figCount, tables: tblCount, equations: eqCount },
601
- };
602
- }
603
-
604
- /**
605
- * Detect forward references in combined text
606
- * A forward reference is a @ref that appears before its {#anchor} definition
607
- */
608
- export function detectForwardRefs(text: string): {
609
- forwardRefs: Array<{ type: string; label: string; match: string; position: number }>;
610
- anchorPositions: Map<string, number>;
611
- } {
612
- // Build map of anchor positions: "fig:label" -> position
613
- const anchorPositions = new Map<string, number>();
614
- ANCHOR_PATTERN.lastIndex = 0;
615
- let match: RegExpExecArray | null;
616
- while ((match = ANCHOR_PATTERN.exec(text)) !== null) {
617
- const type = match[1];
618
- const label = match[2];
619
- if (!type || !label) continue;
620
- const key = `${type}:${label}`;
621
- // Only store first occurrence (in case of duplicates)
622
- if (!anchorPositions.has(key)) {
623
- anchorPositions.set(key, match.index);
624
- }
625
- }
626
-
627
- // Find all references
628
- const refs = detectDynamicRefs(text);
629
-
630
- // Filter to only forward references
631
- const forwardRefs = refs.filter((ref) => {
632
- const key = `${ref.type}:${ref.label}`;
633
- const anchorPos = anchorPositions.get(key);
634
- // Forward ref if anchor doesn't exist or appears after the reference
635
- return anchorPos === undefined || ref.position < anchorPos;
636
- });
637
-
638
- return { forwardRefs, anchorPositions };
639
- }
640
-
641
- /**
642
- * Resolve forward references to display format
643
- * Only resolves refs that appear before their anchor definition
644
- * Leaves other refs for pandoc-crossref to handle (preserves clickable links)
645
- */
646
- export function resolveForwardRefs(
647
- text: string,
648
- registry: Registry
649
- ): {
650
- text: string;
651
- resolved: Array<{ from: string; to: string; position: number }>;
652
- unresolved: Array<{ ref: string; position: number }>;
653
- } {
654
- const { forwardRefs } = detectForwardRefs(text);
655
- const resolved: Array<{ from: string; to: string; position: number }> = [];
656
- const unresolved: Array<{ ref: string; position: number }> = [];
657
-
658
- // Process in reverse order to preserve positions
659
- let result = text;
660
- for (let i = forwardRefs.length - 1; i >= 0; i--) {
661
- const ref = forwardRefs[i];
662
- if (!ref) continue;
663
- const display = labelToDisplay(ref.type as 'fig' | 'tbl' | 'eq', ref.label, registry);
664
-
665
- if (display) {
666
- result =
667
- result.slice(0, ref.position) + display + result.slice(ref.position + ref.match.length);
668
- resolved.push({
669
- from: ref.match,
670
- to: display,
671
- position: ref.position,
672
- });
673
- } else {
674
- unresolved.push({
675
- ref: ref.match,
676
- position: ref.position,
677
- });
678
- }
679
- }
680
-
681
- return { text: result, resolved, unresolved };
682
- }
683
-
684
- /**
685
- * Resolve ALL supplementary references and strip supplementary anchor labels.
686
- *
687
- * pandoc-crossref cannot produce "Figure S1" numbering — it numbers all figures
688
- * sequentially. This function resolves every @fig:label / @tbl:label that points
689
- * to a supplementary item to plain text ("Figure S1", "Table S1") and removes
690
- * the {#fig:label} / {#tbl:label} attributes so pandoc-crossref ignores them.
691
- */
692
- export function resolveSupplementaryRefs(
693
- text: string,
694
- registry: Registry
695
- ): {
696
- text: string;
697
- resolved: Array<{ from: string; to: string }>;
698
- } {
699
- const resolved: Array<{ from: string; to: string }> = [];
700
- let result = text;
701
-
702
- // Collect supplementary labels
703
- const suppLabels = new Set<string>();
704
- for (const [label, info] of registry.figures) {
705
- if (info.isSupp) suppLabels.add(`fig:${label}`);
706
- }
707
- for (const [label, info] of registry.tables) {
708
- if (info.isSupp) suppLabels.add(`tbl:${label}`);
709
- }
710
-
711
- if (suppLabels.size === 0) return { text: result, resolved };
712
-
713
- // 1. Replace all @fig:label / @tbl:label references to supplementary items
714
- const refs = detectDynamicRefs(result);
715
- // Process in reverse to preserve positions
716
- for (let i = refs.length - 1; i >= 0; i--) {
717
- const ref = refs[i];
718
- if (!ref) continue;
719
- const key = `${ref.type}:${ref.label}`;
720
- if (!suppLabels.has(key)) continue;
721
-
722
- const display = labelToDisplay(ref.type as 'fig' | 'tbl' | 'eq', ref.label, registry);
723
- if (display) {
724
- result =
725
- result.slice(0, ref.position) + display + result.slice(ref.position + ref.match.length);
726
- resolved.push({ from: ref.match, to: display });
727
- }
728
- }
729
-
730
- // 2. Strip {#fig:label} and {#tbl:label} attributes from supplementary anchors
731
- // so pandoc-crossref does not re-number them
732
- for (const key of suppLabels) {
733
- // Match {#fig:label ...} or just {#fig:label}
734
- const escaped = key.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
735
- const pattern = new RegExp(`\\{#${escaped}(?:\\s[^}]*)?\\}`, 'g');
736
- result = result.replace(pattern, (match) => {
737
- resolved.push({ from: match, to: '(stripped)' });
738
- return '';
739
- });
740
- }
741
-
742
- return { text: result, resolved };
743
- }
744
-
745
- /**
746
- * Format registry for display
747
- */
748
- export function formatRegistry(registry: Registry): string {
749
- const lines: string[] = [];
750
-
751
- if (registry.figures.size > 0) {
752
- lines.push('Figures:');
753
- for (const [label, info] of registry.figures) {
754
- const num = info.isSupp ? `S${info.num}` : info.num;
755
- lines.push(` Figure ${num}: @fig:${label} (${info.file})`);
756
- }
757
- }
758
-
759
- if (registry.tables.size > 0) {
760
- if (lines.length > 0) lines.push('');
761
- lines.push('Tables:');
762
- for (const [label, info] of registry.tables) {
763
- const num = info.isSupp ? `S${info.num}` : info.num;
764
- lines.push(` Table ${num}: @tbl:${label} (${info.file})`);
765
- }
766
- }
767
-
768
- if (registry.equations.size > 0) {
769
- if (lines.length > 0) lines.push('');
770
- lines.push('Equations:');
771
- for (const [label, info] of registry.equations) {
772
- lines.push(` Equation ${info.num}: @eq:${label} (${info.file})`);
773
- }
774
- }
775
-
776
- if (lines.length === 0) {
777
- lines.push('No figure/table anchors found.');
778
- }
779
-
780
- return lines.join('\n');
781
- }
1
+ /**
2
+ * Cross-reference handling - dynamic figure/table references
3
+ *
4
+ * Enables:
5
+ * - @fig:label syntax in source (auto-numbered)
6
+ * - Conversion to "Figure 1" in Word output
7
+ * - Auto-conversion back during import
8
+ */
9
+
10
+ import * as fs from 'fs';
11
+ import * as path from 'path';
12
+ import YAML from 'yaml';
13
+ import type {
14
+ RefNumber,
15
+ HardcodedRef,
16
+ DynamicRef,
17
+ FigureInfo,
18
+ Registry,
19
+ RefStatus,
20
+ ConversionResult,
21
+ } from './types.js';
22
+
23
+ // =============================================================================
24
+ // Constants
25
+ // =============================================================================
26
+
27
+ /** Characters of context to check before a reference for deduplication */
28
+ const REF_CONTEXT_WINDOW = 100;
29
+
30
+ /** Minimum word length for similarity calculations */
31
+ const MIN_WORD_LENGTH = 2;
32
+
33
+ // =============================================================================
34
+ // Type Definitions (Internal)
35
+ // =============================================================================
36
+
37
+ /**
38
+ * Reference info (internal use in registry building)
39
+ */
40
+ interface RefInfo {
41
+ label: string;
42
+ num: number;
43
+ isSupp: boolean;
44
+ file: string;
45
+ }
46
+
47
+ /**
48
+ * Parsed reference number components
49
+ */
50
+ interface ParsedRefNumber {
51
+ isSupp: boolean;
52
+ num: number;
53
+ suffix: string | null;
54
+ }
55
+
56
+ /**
57
+ * Detected reference with parsed numbers
58
+ */
59
+ interface DetectedRef {
60
+ type: 'fig' | 'tbl' | 'eq';
61
+ match: string;
62
+ numbers: ParsedRefNumber[];
63
+ position: number;
64
+ }
65
+
66
+ // =============================================================================
67
+ // Internal Helpers
68
+ // =============================================================================
69
+
70
+ /**
71
+ * Discover section files from a directory by reading config files
72
+ * Only returns files explicitly defined in rev.yaml or sections.yaml
73
+ * Returns empty array if no config found (caller should handle this)
74
+ */
75
+ function discoverSectionFiles(directory: string): string[] {
76
+ // Try rev.yaml first
77
+ const revYamlPath = path.join(directory, 'rev.yaml');
78
+ if (fs.existsSync(revYamlPath)) {
79
+ try {
80
+ const config = YAML.parse(fs.readFileSync(revYamlPath, 'utf-8'));
81
+ if (config.sections && Array.isArray(config.sections) && config.sections.length > 0) {
82
+ return config.sections.filter((f: string) => fs.existsSync(path.join(directory, f)));
83
+ }
84
+ } catch (e) {
85
+ if (process.env.DEBUG) {
86
+ console.warn('crossref: YAML parse error in rev.yaml:', (e as Error).message);
87
+ }
88
+ }
89
+ }
90
+
91
+ // Try sections.yaml
92
+ const sectionsPath = path.join(directory, 'sections.yaml');
93
+ if (fs.existsSync(sectionsPath)) {
94
+ try {
95
+ const config = YAML.parse(fs.readFileSync(sectionsPath, 'utf-8'));
96
+ if (config.sections) {
97
+ const sectionOrder = Object.entries(config.sections)
98
+ .sort((a, b) => ((a[1] as any).order ?? 999) - ((b[1] as any).order ?? 999))
99
+ .map(([file]) => file);
100
+ return sectionOrder.filter((f) => fs.existsSync(path.join(directory, f)));
101
+ }
102
+ } catch (e) {
103
+ if (process.env.DEBUG) {
104
+ console.warn('crossref: YAML parse error in sections.yaml:', (e as Error).message);
105
+ }
106
+ }
107
+ }
108
+
109
+ // No config found - return empty array
110
+ // Caller must handle this (either error or use explicit sections)
111
+ return [];
112
+ }
113
+
114
+ // =============================================================================
115
+ // Detection Patterns
116
+ // =============================================================================
117
+
118
+ /**
119
+ * Patterns for detecting hardcoded references
120
+ * Matches complex patterns including:
121
+ * - Simple: "Figure 1", "Fig. 2a", "Table S1"
122
+ * - Ranges: "Figures 1-3", "Fig. 1a-c", "Figs. 1a-3b"
123
+ * - Lists: "Figures 1, 2, and 3", "Fig. 1a, b, c", "Tables 1 & 2"
124
+ * - Mixed: "Figs. 1, 3-5, and 7"
125
+ *
126
+ * Uses a simpler base pattern and parses the full match for lists
127
+ */
128
+ const DETECTION_PATTERNS: Record<string, RegExp> = {
129
+ // Captures the full reference including lists with "and"
130
+ // Group 1: type prefix (Figure, Fig., etc.)
131
+ // Group 2: reference list (parsed by parseReferenceList())
132
+ // Matches: "1", "1a", "1-3", "1a-c", "1, 2, 3", "1 and 2", "1, 2 and 3", "1, 2, and 3"
133
+ // Separator: comma/dash/ampersand, optionally followed by "and"
134
+ // Standalone letters must be followed by separator, punctuation, or word boundary
135
+ // Also handles: "see Figure 1", "(Fig. 1)", "in Figures 1–3"
136
+ // Note: 'gi' flag makes these case-insensitive, so "figure 1" is also matched
137
+ figure: /\b(Figures?|Figs?\.?)\s+((?:\d+|S\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+|S\d+)[a-z]?|[a-z]\b))*)/gi,
138
+
139
+ table: /\b(Tables?|Tabs?\.?)\s+((?:\d+|S\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+|S\d+)[a-z]?|[a-z]\b))*)/gi,
140
+
141
+ equation: /\b(Equations?|Eqs?\.?)\s+((?:\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+)[a-z]?|[a-z]\b))*)/gi,
142
+ };
143
+
144
+ /**
145
+ * Patterns to EXCLUDE from detection (false positives)
146
+ * These look like references but aren't (e.g., "Table of Contents", "Figure skating")
147
+ */
148
+ const EXCLUSION_PATTERNS = [
149
+ /\bTable\s+of\s+Contents?\b/gi,
150
+ /\bFigure\s+skating\b/gi,
151
+ /\bFigure\s+out\b/gi,
152
+ /\bFigure\s+it\b/gi,
153
+ /\bTable\s+setting/gi,
154
+ /\bEquation\s+editor\b/gi,
155
+ ];
156
+
157
+ /**
158
+ * Pattern for extracting anchors from markdown: {#fig:label}, {#tbl:label}
159
+ */
160
+ const ANCHOR_PATTERN = /\{#(fig|tbl|eq):([a-zA-Z0-9_-]+)/gi;
161
+
162
+ /**
163
+ * Pattern for @-style references: @fig:label, @tbl:label
164
+ */
165
+ const REF_PATTERN = /@(fig|tbl|eq):([a-zA-Z0-9_-]+)/gi;
166
+
167
+ // =============================================================================
168
+ // Public API
169
+ // =============================================================================
170
+
171
+ /**
172
+ * Normalize a reference type to standard form
173
+ */
174
+ export function normalizeType(typeStr: string): 'fig' | 'tbl' | 'eq' | string {
175
+ if (typeof typeStr !== 'string') {
176
+ throw new TypeError(`typeStr must be a string, got ${typeof typeStr}`);
177
+ }
178
+ const lower = typeStr.toLowerCase().replace(/\.$/, '');
179
+ if (lower.startsWith('fig')) return 'fig';
180
+ if (lower.startsWith('tab')) return 'tbl';
181
+ if (lower.startsWith('eq')) return 'eq';
182
+ return lower;
183
+ }
184
+
185
+ /**
186
+ * Parse a reference number, handling supplementary (S1, S2) and letter suffixes (1a, 1b)
187
+ */
188
+ export function parseRefNumber(numStr: string, suffix: string | null = null): ParsedRefNumber {
189
+ if (!numStr || typeof numStr !== 'string') {
190
+ return { isSupp: false, num: 0, suffix: suffix || null };
191
+ }
192
+ const isSupp = numStr.toUpperCase().startsWith('S');
193
+ const numPart = isSupp ? numStr.slice(1) : numStr;
194
+ // Extract suffix if embedded in numStr (e.g., "1a")
195
+ const match = numPart.match(/^(\d+)([a-z])?$/i);
196
+ const num = match && match[1] ? parseInt(match[1], 10) : parseInt(numPart, 10);
197
+ const extractedSuffix = suffix || (match && match[2]) || null;
198
+ return { isSupp, num, suffix: extractedSuffix ? extractedSuffix.toLowerCase() : null };
199
+ }
200
+
201
+ /**
202
+ * Parse a reference list string like "1, 2, and 3" or "1a-c" or "1a-3b"
203
+ * Returns an array of {num, isSupp, suffix} objects
204
+ */
205
+ export function parseReferenceList(listStr: string): ParsedRefNumber[] {
206
+ const results: ParsedRefNumber[] = [];
207
+ if (!listStr || typeof listStr !== 'string') return results;
208
+
209
+ // Normalize: replace "and" with comma, normalize dashes
210
+ let normalized = listStr
211
+ .replace(/\s+and\s+/gi, ', ')
212
+ .replace(/[–—]/g, '-') // en-dash, em-dash → hyphen
213
+ .replace(/&/g, ', '); // & → comma
214
+
215
+ // Split by comma (but not by dash, which indicates ranges)
216
+ const parts = normalized.split(/\s*,\s*/).filter((p) => p.trim());
217
+
218
+ let lastFullRef: { num: number; isSupp: boolean } | null = null; // Track the last full reference for implicit prefixes
219
+
220
+ for (const part of parts) {
221
+ const trimmed = part.trim();
222
+ if (!trimmed) continue;
223
+
224
+ // Check if this is a range (contains -)
225
+ if (trimmed.includes('-')) {
226
+ const parts = trimmed.split('-').map((s) => s.trim());
227
+ const start = parts[0] || '';
228
+ const end = parts[1] || '';
229
+
230
+ // Check if end is just a letter (e.g., "1a-c" where end is "c")
231
+ const endIsLetterOnly = /^[a-z]$/i.test(end);
232
+
233
+ const startRef = parseRefNumber(start);
234
+ // For letter-only end, don't parse as number
235
+ const endRef = endIsLetterOnly
236
+ ? { num: startRef.num, isSupp: startRef.isSupp, suffix: end.toLowerCase() }
237
+ : parseRefNumber(end);
238
+
239
+ // Handle different range types:
240
+ // 1. Suffix-only range on same number: "1a-c" → 1a, 1b, 1c
241
+ // 2. Number range: "1-3" → 1, 2, 3
242
+ // 3. Cross-number suffix range: "1a-3b" → 1a...1z, 2a...2z, 3a, 3b (limited)
243
+
244
+ if (startRef.suffix && endRef.suffix && startRef.num !== endRef.num) {
245
+ // Cross-number suffix range: "1a-3b"
246
+ // For academic papers, limit intermediate figures to same suffix range
247
+ // e.g., "1a-3b" typically means 1a, 1b, 2a, 2b, 3a, 3b
248
+ const maxSuffix = Math.max(
249
+ startRef.suffix.charCodeAt(0),
250
+ endRef.suffix.charCodeAt(0)
251
+ );
252
+
253
+ for (let n = startRef.num; n <= endRef.num; n++) {
254
+ const suffixStart =
255
+ n === startRef.num ? startRef.suffix.charCodeAt(0) : 'a'.charCodeAt(0);
256
+ const suffixEnd = n === endRef.num ? endRef.suffix.charCodeAt(0) : maxSuffix;
257
+
258
+ for (let s = suffixStart; s <= suffixEnd; s++) {
259
+ results.push({
260
+ num: n,
261
+ isSupp: startRef.isSupp,
262
+ suffix: String.fromCharCode(s),
263
+ });
264
+ }
265
+ }
266
+ lastFullRef = { num: endRef.num, isSupp: startRef.isSupp };
267
+ } else if (startRef.suffix || endRef.suffix) {
268
+ // Suffix range on same number: "1a-c"
269
+ const num: number = startRef.num !== 0 ? startRef.num : (lastFullRef ? lastFullRef.num : 1);
270
+ const isSupp: boolean = startRef.isSupp ? startRef.isSupp : (lastFullRef ? lastFullRef.isSupp : false);
271
+ const startCode = (startRef.suffix || 'a').charCodeAt(0);
272
+ const endCode = (endRef.suffix || 'a').charCodeAt(0);
273
+
274
+ for (let code = startCode; code <= endCode; code++) {
275
+ results.push({
276
+ num,
277
+ isSupp,
278
+ suffix: String.fromCharCode(code),
279
+ });
280
+ }
281
+ lastFullRef = { num, isSupp };
282
+ } else {
283
+ // Pure number range: "1-3"
284
+ for (let n = startRef.num; n <= endRef.num; n++) {
285
+ results.push({
286
+ num: n,
287
+ isSupp: startRef.isSupp,
288
+ suffix: null,
289
+ });
290
+ }
291
+ lastFullRef = { num: endRef.num, isSupp: startRef.isSupp };
292
+ }
293
+ } else {
294
+ // Single reference or implicit suffix
295
+ // Check if it's just a letter (implicit prefix from previous number)
296
+ if (/^[a-z]$/i.test(trimmed) && lastFullRef) {
297
+ // Implicit prefix: "b" after "1a" means "1b"
298
+ results.push({
299
+ num: lastFullRef.num,
300
+ isSupp: lastFullRef.isSupp,
301
+ suffix: trimmed.toLowerCase(),
302
+ });
303
+ } else {
304
+ // Full reference: "1", "1a", "S1", "S1a"
305
+ const ref = parseRefNumber(trimmed);
306
+ results.push(ref);
307
+ lastFullRef = { num: ref.num, isSupp: ref.isSupp };
308
+ }
309
+ }
310
+ }
311
+
312
+ return results;
313
+ }
314
+
315
+ /**
316
+ * Build a registry of figure/table labels from .md files
317
+ * Scans for {#fig:label} and {#tbl:label} anchors
318
+ *
319
+ * IMPORTANT: This function requires either explicit sections or a rev.yaml/sections.yaml config.
320
+ * It will NOT guess by scanning all .md files, as this leads to incorrect numbering
321
+ * when temporary files (paper_clean.md, etc.) exist in the directory.
322
+ */
323
+ export function buildRegistry(directory: string, sections?: string[]): Registry {
324
+ if (typeof directory !== 'string') {
325
+ throw new TypeError(`directory must be a string, got ${typeof directory}`);
326
+ }
327
+
328
+ const figures = new Map<string, FigureInfo>();
329
+ const tables = new Map<string, FigureInfo>();
330
+ const equations = new Map<string, FigureInfo>();
331
+
332
+ // Counters for numbering (separate for main and supplementary)
333
+ let figNum = 0;
334
+ let figSuppNum = 0;
335
+ let tblNum = 0;
336
+ let tblSuppNum = 0;
337
+ let eqNum = 0;
338
+
339
+ let orderedFiles: string[];
340
+
341
+ if (Array.isArray(sections) && sections.length > 0) {
342
+ // Use explicitly provided section files - most reliable
343
+ orderedFiles = sections.filter((f) => fs.existsSync(path.join(directory, f)));
344
+ } else {
345
+ // Try to determine sections from config files (rev.yaml or sections.yaml)
346
+ orderedFiles = discoverSectionFiles(directory);
347
+ // If no config found, return empty registry rather than guessing
348
+ // This prevents bugs from scanning wrong files
349
+ }
350
+
351
+ // Determine if a file is supplementary
352
+ const isSupplementary = (filename: string): boolean =>
353
+ filename.toLowerCase().includes('supp') || filename.toLowerCase().includes('appendix');
354
+
355
+ // Process each file in order
356
+ for (const file of orderedFiles) {
357
+ const filePath = path.join(directory, file);
358
+ const content = fs.readFileSync(filePath, 'utf-8');
359
+ const isSupp = isSupplementary(file);
360
+
361
+ // Find all anchors
362
+ let match: RegExpExecArray | null;
363
+ ANCHOR_PATTERN.lastIndex = 0;
364
+ while ((match = ANCHOR_PATTERN.exec(content)) !== null) {
365
+ const typeRaw = match[1];
366
+ const labelRaw = match[2];
367
+ if (!typeRaw || !labelRaw) continue;
368
+
369
+ const type = typeRaw.toLowerCase();
370
+ const label = labelRaw;
371
+
372
+ if (type === 'fig') {
373
+ if (isSupp) {
374
+ figSuppNum++;
375
+ figures.set(label, { label, num: figSuppNum, isSupp: true, file });
376
+ } else {
377
+ figNum++;
378
+ figures.set(label, { label, num: figNum, isSupp: false, file });
379
+ }
380
+ } else if (type === 'tbl') {
381
+ if (isSupp) {
382
+ tblSuppNum++;
383
+ tables.set(label, { label, num: tblSuppNum, isSupp: true, file });
384
+ } else {
385
+ tblNum++;
386
+ tables.set(label, { label, num: tblNum, isSupp: false, file });
387
+ }
388
+ } else if (type === 'eq') {
389
+ eqNum++;
390
+ equations.set(label, { label, num: eqNum, isSupp: false, file });
391
+ }
392
+ }
393
+ }
394
+
395
+ // Build reverse lookup: number → label
396
+ const byNumber: Registry['byNumber'] = {
397
+ fig: new Map(),
398
+ figS: new Map(),
399
+ tbl: new Map(),
400
+ tblS: new Map(),
401
+ eq: new Map(),
402
+ };
403
+
404
+ for (const [label, info] of figures) {
405
+ const key = info.isSupp ? 'figS' : 'fig';
406
+ byNumber[key].set(info.num, label);
407
+ }
408
+ for (const [label, info] of tables) {
409
+ const key = info.isSupp ? 'tblS' : 'tbl';
410
+ byNumber[key].set(info.num, label);
411
+ }
412
+ for (const [label, info] of equations) {
413
+ byNumber.eq.set(info.num, label);
414
+ }
415
+
416
+ return { figures, tables, equations, byNumber };
417
+ }
418
+
419
+ /**
420
+ * Get the display string for a label (e.g., "Figure 1", "Table S2")
421
+ */
422
+ export function labelToDisplay(
423
+ type: 'fig' | 'tbl' | 'eq',
424
+ label: string,
425
+ registry: Registry
426
+ ): string | null {
427
+ if (!registry || !registry.figures) return null;
428
+
429
+ const collection =
430
+ type === 'fig' ? registry.figures : type === 'tbl' ? registry.tables : registry.equations;
431
+
432
+ const info = collection.get(label);
433
+ if (!info) return null;
434
+
435
+ const prefix = type === 'fig' ? 'Figure' : type === 'tbl' ? 'Table' : 'Equation';
436
+ const numStr = info.isSupp ? `S${info.num}` : `${info.num}`;
437
+
438
+ return `${prefix} ${numStr}`;
439
+ }
440
+
441
+ /**
442
+ * Get the label for a display number (e.g., "fig:heatmap" from Figure 1)
443
+ */
444
+ export function numberToLabel(
445
+ type: 'fig' | 'tbl' | 'eq',
446
+ num: number,
447
+ isSupp: boolean,
448
+ registry: Registry
449
+ ): string | null {
450
+ if (!registry || !registry.byNumber) return null;
451
+
452
+ const key = isSupp ? (`${type}S` as keyof Registry['byNumber']) : type;
453
+ return registry.byNumber[key]?.get(num) || null;
454
+ }
455
+
456
+ /**
457
+ * Detect all hardcoded references in text
458
+ */
459
+ export function detectHardcodedRefs(text: string): DetectedRef[] {
460
+ if (typeof text !== 'string') {
461
+ throw new TypeError(`text must be a string, got ${typeof text}`);
462
+ }
463
+
464
+ const refs: DetectedRef[] = [];
465
+
466
+ for (const [type, pattern] of Object.entries(DETECTION_PATTERNS)) {
467
+ pattern.lastIndex = 0;
468
+ let match: RegExpExecArray | null;
469
+
470
+ while ((match = pattern.exec(text)) !== null) {
471
+ // Pattern groups:
472
+ // [1] = type prefix (Figure, Fig., etc.)
473
+ // [2] = reference list string (e.g., "1, 2, and 3" or "1a-3b")
474
+
475
+ const listStr = match[2];
476
+ if (!listStr) continue;
477
+ const numbers = parseReferenceList(listStr);
478
+
479
+ // Skip if no valid numbers were parsed
480
+ if (numbers.length === 0) continue;
481
+
482
+ refs.push({
483
+ type: normalizeType(type) as 'fig' | 'tbl' | 'eq',
484
+ match: match[0],
485
+ numbers,
486
+ position: match.index,
487
+ });
488
+ }
489
+ }
490
+
491
+ // Sort by position
492
+ refs.sort((a, b) => a.position - b.position);
493
+ return refs;
494
+ }
495
+
496
+ /**
497
+ * Convert hardcoded references to @-style references
498
+ */
499
+ export function convertHardcodedRefs(text: string, registry: Registry): ConversionResult {
500
+ // Input validation delegated to detectHardcodedRefs
501
+ const refs = detectHardcodedRefs(text);
502
+ const conversions: Array<{ from: string; to: string }> = [];
503
+ const warnings: string[] = [];
504
+
505
+ // Process in reverse order to preserve positions
506
+ let result = text;
507
+ for (let i = refs.length - 1; i >= 0; i--) {
508
+ const ref = refs[i];
509
+ if (!ref) continue;
510
+
511
+ // Build replacement
512
+ const labels: string[] = [];
513
+ for (const { num, isSupp } of ref.numbers) {
514
+ const label = numberToLabel(ref.type, num, isSupp, registry);
515
+ if (label) {
516
+ labels.push(`@${ref.type}:${label}`);
517
+ } else {
518
+ const displayNum = isSupp ? `S${num}` : `${num}`;
519
+ warnings.push(`Unknown reference: ${ref.type} ${displayNum} (no matching label)`);
520
+ labels.push(ref.match); // Keep original if no match
521
+ }
522
+ }
523
+
524
+ if (labels.length > 0 && !labels.includes(ref.match)) {
525
+ const replacement = labels.join('; ');
526
+
527
+ // Skip if the @-syntax already appears in the preceding text
528
+ // This prevents duplication when import restores @fig:x and then we see "Fig. 1"
529
+ // e.g., "@fig:map@fig:map{++@fig:map++}" or "@fig:mapFigure 1" patterns
530
+ const textBefore = result.slice(Math.max(0, ref.position - REF_CONTEXT_WINDOW), ref.position);
531
+ const alreadyHasRef = labels.some((label) => textBefore.includes(label));
532
+ if (alreadyHasRef) {
533
+ continue; // Skip - ref already present nearby
534
+ }
535
+
536
+ result =
537
+ result.slice(0, ref.position) + replacement + result.slice(ref.position + ref.match.length);
538
+
539
+ conversions.push({
540
+ from: ref.match,
541
+ to: replacement,
542
+ });
543
+ }
544
+ }
545
+
546
+ return { converted: result, conversions, warnings };
547
+ }
548
+
549
+ /**
550
+ * Detect @-style references in text
551
+ */
552
+ export function detectDynamicRefs(text: string): DynamicRef[] {
553
+ if (typeof text !== 'string') {
554
+ throw new TypeError(`text must be a string, got ${typeof text}`);
555
+ }
556
+
557
+ const refs: DynamicRef[] = [];
558
+ REF_PATTERN.lastIndex = 0;
559
+ let match: RegExpExecArray | null;
560
+
561
+ while ((match = REF_PATTERN.exec(text)) !== null) {
562
+ const type = match[1];
563
+ const label = match[2];
564
+ if (!type || !label) continue;
565
+ refs.push({
566
+ type: type as 'fig' | 'tbl' | 'eq',
567
+ label: label,
568
+ match: match[0],
569
+ position: match.index,
570
+ });
571
+ }
572
+
573
+ return refs;
574
+ }
575
+
576
+ /**
577
+ * Get reference status for a file/text
578
+ */
579
+ export function getRefStatus(text: string, registry: Registry): RefStatus {
580
+ const dynamic = detectDynamicRefs(text);
581
+ const hardcoded = detectHardcodedRefs(text) as HardcodedRef[];
582
+
583
+ // Count anchors in this text
584
+ ANCHOR_PATTERN.lastIndex = 0;
585
+ let figCount = 0,
586
+ tblCount = 0,
587
+ eqCount = 0;
588
+ let match: RegExpExecArray | null;
589
+ while ((match = ANCHOR_PATTERN.exec(text)) !== null) {
590
+ const type = match[1];
591
+ if (!type) continue;
592
+ if (type === 'fig') figCount++;
593
+ else if (type === 'tbl') tblCount++;
594
+ else if (type === 'eq') eqCount++;
595
+ }
596
+
597
+ return {
598
+ dynamic,
599
+ hardcoded,
600
+ anchors: { figures: figCount, tables: tblCount, equations: eqCount },
601
+ };
602
+ }
603
+
604
+ /**
605
+ * Detect forward references in combined text
606
+ * A forward reference is a @ref that appears before its {#anchor} definition
607
+ */
608
+ export function detectForwardRefs(text: string): {
609
+ forwardRefs: Array<{ type: string; label: string; match: string; position: number }>;
610
+ anchorPositions: Map<string, number>;
611
+ } {
612
+ // Build map of anchor positions: "fig:label" -> position
613
+ const anchorPositions = new Map<string, number>();
614
+ ANCHOR_PATTERN.lastIndex = 0;
615
+ let match: RegExpExecArray | null;
616
+ while ((match = ANCHOR_PATTERN.exec(text)) !== null) {
617
+ const type = match[1];
618
+ const label = match[2];
619
+ if (!type || !label) continue;
620
+ const key = `${type}:${label}`;
621
+ // Only store first occurrence (in case of duplicates)
622
+ if (!anchorPositions.has(key)) {
623
+ anchorPositions.set(key, match.index);
624
+ }
625
+ }
626
+
627
+ // Find all references
628
+ const refs = detectDynamicRefs(text);
629
+
630
+ // Filter to only forward references
631
+ const forwardRefs = refs.filter((ref) => {
632
+ const key = `${ref.type}:${ref.label}`;
633
+ const anchorPos = anchorPositions.get(key);
634
+ // Forward ref if anchor doesn't exist or appears after the reference
635
+ return anchorPos === undefined || ref.position < anchorPos;
636
+ });
637
+
638
+ return { forwardRefs, anchorPositions };
639
+ }
640
+
641
+ /**
642
+ * Resolve forward references to display format
643
+ * Only resolves refs that appear before their anchor definition
644
+ * Leaves other refs for pandoc-crossref to handle (preserves clickable links)
645
+ */
646
+ export function resolveForwardRefs(
647
+ text: string,
648
+ registry: Registry
649
+ ): {
650
+ text: string;
651
+ resolved: Array<{ from: string; to: string; position: number }>;
652
+ unresolved: Array<{ ref: string; position: number }>;
653
+ } {
654
+ const { forwardRefs } = detectForwardRefs(text);
655
+ const resolved: Array<{ from: string; to: string; position: number }> = [];
656
+ const unresolved: Array<{ ref: string; position: number }> = [];
657
+
658
+ // Process in reverse order to preserve positions
659
+ let result = text;
660
+ for (let i = forwardRefs.length - 1; i >= 0; i--) {
661
+ const ref = forwardRefs[i];
662
+ if (!ref) continue;
663
+ const display = labelToDisplay(ref.type as 'fig' | 'tbl' | 'eq', ref.label, registry);
664
+
665
+ if (display) {
666
+ result =
667
+ result.slice(0, ref.position) + display + result.slice(ref.position + ref.match.length);
668
+ resolved.push({
669
+ from: ref.match,
670
+ to: display,
671
+ position: ref.position,
672
+ });
673
+ } else {
674
+ unresolved.push({
675
+ ref: ref.match,
676
+ position: ref.position,
677
+ });
678
+ }
679
+ }
680
+
681
+ return { text: result, resolved, unresolved };
682
+ }
683
+
684
+ /**
685
+ * Resolve ALL supplementary references and strip supplementary anchor labels.
686
+ *
687
+ * pandoc-crossref cannot produce "Figure S1" numbering — it numbers all figures
688
+ * sequentially. This function resolves every @fig:label / @tbl:label that points
689
+ * to a supplementary item to plain text ("Figure S1", "Table S1") and removes
690
+ * the {#fig:label} / {#tbl:label} attributes so pandoc-crossref ignores them.
691
+ */
692
+ export function resolveSupplementaryRefs(
693
+ text: string,
694
+ registry: Registry
695
+ ): {
696
+ text: string;
697
+ resolved: Array<{ from: string; to: string }>;
698
+ } {
699
+ const resolved: Array<{ from: string; to: string }> = [];
700
+ let result = text;
701
+
702
+ // Collect supplementary labels
703
+ const suppLabels = new Set<string>();
704
+ for (const [label, info] of registry.figures) {
705
+ if (info.isSupp) suppLabels.add(`fig:${label}`);
706
+ }
707
+ for (const [label, info] of registry.tables) {
708
+ if (info.isSupp) suppLabels.add(`tbl:${label}`);
709
+ }
710
+
711
+ if (suppLabels.size === 0) return { text: result, resolved };
712
+
713
+ // 1. Replace all @fig:label / @tbl:label references to supplementary items
714
+ const refs = detectDynamicRefs(result);
715
+ // Process in reverse to preserve positions
716
+ for (let i = refs.length - 1; i >= 0; i--) {
717
+ const ref = refs[i];
718
+ if (!ref) continue;
719
+ const key = `${ref.type}:${ref.label}`;
720
+ if (!suppLabels.has(key)) continue;
721
+
722
+ const display = labelToDisplay(ref.type as 'fig' | 'tbl' | 'eq', ref.label, registry);
723
+ if (display) {
724
+ result =
725
+ result.slice(0, ref.position) + display + result.slice(ref.position + ref.match.length);
726
+ resolved.push({ from: ref.match, to: display });
727
+ }
728
+ }
729
+
730
+ // 2. Strip {#fig:label} and {#tbl:label} attributes from supplementary anchors
731
+ // so pandoc-crossref does not re-number them
732
+ for (const key of suppLabels) {
733
+ // Match {#fig:label ...} or just {#fig:label}
734
+ const escaped = key.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
735
+ const pattern = new RegExp(`\\{#${escaped}(?:\\s[^}]*)?\\}`, 'g');
736
+ result = result.replace(pattern, (match) => {
737
+ resolved.push({ from: match, to: '(stripped)' });
738
+ return '';
739
+ });
740
+ }
741
+
742
+ return { text: result, resolved };
743
+ }
744
+
745
+ /**
746
+ * Format registry for display
747
+ */
748
+ export function formatRegistry(registry: Registry): string {
749
+ const lines: string[] = [];
750
+
751
+ if (registry.figures.size > 0) {
752
+ lines.push('Figures:');
753
+ for (const [label, info] of registry.figures) {
754
+ const num = info.isSupp ? `S${info.num}` : info.num;
755
+ lines.push(` Figure ${num}: @fig:${label} (${info.file})`);
756
+ }
757
+ }
758
+
759
+ if (registry.tables.size > 0) {
760
+ if (lines.length > 0) lines.push('');
761
+ lines.push('Tables:');
762
+ for (const [label, info] of registry.tables) {
763
+ const num = info.isSupp ? `S${info.num}` : info.num;
764
+ lines.push(` Table ${num}: @tbl:${label} (${info.file})`);
765
+ }
766
+ }
767
+
768
+ if (registry.equations.size > 0) {
769
+ if (lines.length > 0) lines.push('');
770
+ lines.push('Equations:');
771
+ for (const [label, info] of registry.equations) {
772
+ lines.push(` Equation ${info.num}: @eq:${label} (${info.file})`);
773
+ }
774
+ }
775
+
776
+ if (lines.length === 0) {
777
+ lines.push('No figure/table anchors found.');
778
+ }
779
+
780
+ return lines.join('\n');
781
+ }