docrev 0.9.11 → 0.9.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +9 -9
- package/.gitattributes +1 -1
- package/CHANGELOG.md +149 -149
- package/PLAN-tables-and-postprocess.md +850 -850
- package/README.md +391 -391
- package/bin/rev.js +11 -11
- package/bin/rev.ts +145 -145
- package/completions/rev.bash +127 -127
- package/completions/rev.ps1 +210 -210
- package/completions/rev.zsh +207 -207
- package/dev_notes/stress2/build_adversarial.ts +186 -186
- package/dev_notes/stress2/drift_matcher.ts +62 -62
- package/dev_notes/stress2/probe_anchors.ts +35 -35
- package/dev_notes/stress2/project/discussion.before.md +3 -3
- package/dev_notes/stress2/project/discussion.md +3 -3
- package/dev_notes/stress2/project/methods.before.md +20 -20
- package/dev_notes/stress2/project/methods.md +20 -20
- package/dev_notes/stress2/project/rev.yaml +5 -5
- package/dev_notes/stress2/project/sections.yaml +4 -4
- package/dev_notes/stress2/sections.yaml +5 -5
- package/dev_notes/stress2/trace_placement.ts +50 -50
- package/dev_notes/stresstest_boundaries.ts +27 -27
- package/dev_notes/stresstest_drift_apply.ts +43 -43
- package/dev_notes/stresstest_drift_compare.ts +43 -43
- package/dev_notes/stresstest_drift_v2.ts +54 -54
- package/dev_notes/stresstest_inspect.ts +54 -54
- package/dev_notes/stresstest_pstyle.ts +55 -55
- package/dev_notes/stresstest_section_debug.ts +23 -23
- package/dev_notes/stresstest_split.ts +70 -70
- package/dev_notes/stresstest_trace.ts +19 -19
- package/dev_notes/stresstest_verify_no_overwrite.ts +40 -40
- package/dist/lib/build.d.ts +50 -1
- package/dist/lib/build.d.ts.map +1 -1
- package/dist/lib/build.js +80 -30
- package/dist/lib/build.js.map +1 -1
- package/dist/lib/commands/build.d.ts.map +1 -1
- package/dist/lib/commands/build.js +38 -5
- package/dist/lib/commands/build.js.map +1 -1
- package/dist/lib/commands/utilities.js +164 -164
- package/dist/lib/commands/word-tools.js +8 -8
- package/dist/lib/grammar.js +3 -3
- package/dist/lib/import.d.ts.map +1 -1
- package/dist/lib/import.js +146 -24
- package/dist/lib/import.js.map +1 -1
- package/dist/lib/pdf-comments.js +44 -44
- package/dist/lib/plugins.js +57 -57
- package/dist/lib/pptx-themes.js +115 -115
- package/dist/lib/spelling.js +2 -2
- package/dist/lib/templates.js +387 -387
- package/dist/lib/themes.js +51 -51
- package/dist/lib/types.d.ts +20 -0
- package/dist/lib/types.d.ts.map +1 -1
- package/dist/lib/word-extraction.d.ts +6 -0
- package/dist/lib/word-extraction.d.ts.map +1 -1
- package/dist/lib/word-extraction.js +46 -3
- package/dist/lib/word-extraction.js.map +1 -1
- package/dist/lib/wordcomments.d.ts.map +1 -1
- package/dist/lib/wordcomments.js +23 -5
- package/dist/lib/wordcomments.js.map +1 -1
- package/eslint.config.js +27 -27
- package/lib/anchor-match.ts +276 -276
- package/lib/annotations.ts +644 -644
- package/lib/build.ts +1300 -1227
- package/lib/citations.ts +160 -160
- package/lib/commands/build.ts +833 -801
- package/lib/commands/citations.ts +515 -515
- package/lib/commands/comments.ts +1050 -1050
- package/lib/commands/context.ts +174 -174
- package/lib/commands/core.ts +309 -309
- package/lib/commands/doi.ts +435 -435
- package/lib/commands/file-ops.ts +372 -372
- package/lib/commands/history.ts +320 -320
- package/lib/commands/index.ts +87 -87
- package/lib/commands/init.ts +259 -259
- package/lib/commands/merge-resolve.ts +378 -378
- package/lib/commands/preview.ts +178 -178
- package/lib/commands/project-info.ts +244 -244
- package/lib/commands/quality.ts +517 -517
- package/lib/commands/response.ts +454 -454
- package/lib/commands/section-boundaries.ts +82 -82
- package/lib/commands/sections.ts +451 -451
- package/lib/commands/sync.ts +706 -706
- package/lib/commands/text-ops.ts +449 -449
- package/lib/commands/utilities.ts +448 -448
- package/lib/commands/verify-anchors.ts +272 -272
- package/lib/commands/word-tools.ts +340 -340
- package/lib/comment-realign.ts +517 -517
- package/lib/config.ts +84 -84
- package/lib/crossref.ts +781 -781
- package/lib/csl.ts +191 -191
- package/lib/dependencies.ts +98 -98
- package/lib/diff-engine.ts +465 -465
- package/lib/doi-cache.ts +115 -115
- package/lib/doi.ts +897 -897
- package/lib/equations.ts +506 -506
- package/lib/errors.ts +346 -346
- package/lib/format.ts +541 -541
- package/lib/git.ts +326 -326
- package/lib/grammar.ts +303 -303
- package/lib/image-registry.ts +180 -180
- package/lib/import.ts +911 -792
- package/lib/journals.ts +543 -543
- package/lib/merge.ts +633 -633
- package/lib/orcid.ts +144 -144
- package/lib/pdf-comments.ts +263 -263
- package/lib/pdf-import.ts +524 -524
- package/lib/plugins.ts +362 -362
- package/lib/postprocess.ts +188 -188
- package/lib/pptx-color-filter.lua +37 -37
- package/lib/pptx-template.ts +469 -469
- package/lib/pptx-themes.ts +483 -483
- package/lib/protect-restore.ts +520 -520
- package/lib/rate-limiter.ts +94 -94
- package/lib/response.ts +197 -197
- package/lib/restore-references.ts +240 -240
- package/lib/review.ts +327 -327
- package/lib/schema.ts +417 -417
- package/lib/scientific-words.ts +73 -73
- package/lib/sections.ts +335 -335
- package/lib/slides.ts +756 -756
- package/lib/spelling.ts +334 -334
- package/lib/templates.ts +526 -526
- package/lib/themes.ts +742 -742
- package/lib/trackchanges.ts +247 -247
- package/lib/tui.ts +450 -450
- package/lib/types.ts +550 -530
- package/lib/undo.ts +250 -250
- package/lib/utils.ts +69 -69
- package/lib/variables.ts +179 -179
- package/lib/word-extraction.ts +806 -759
- package/lib/word.ts +643 -643
- package/lib/wordcomments.ts +817 -798
- package/package.json +137 -137
- package/scripts/postbuild.js +28 -28
- package/skill/REFERENCE.md +431 -431
- package/skill/SKILL.md +258 -258
- package/tsconfig.json +26 -26
- package/types/index.d.ts +525 -525
package/lib/crossref.ts
CHANGED
|
@@ -1,781 +1,781 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Cross-reference handling - dynamic figure/table references
|
|
3
|
-
*
|
|
4
|
-
* Enables:
|
|
5
|
-
* - @fig:label syntax in source (auto-numbered)
|
|
6
|
-
* - Conversion to "Figure 1" in Word output
|
|
7
|
-
* - Auto-conversion back during import
|
|
8
|
-
*/
|
|
9
|
-
|
|
10
|
-
import * as fs from 'fs';
|
|
11
|
-
import * as path from 'path';
|
|
12
|
-
import YAML from 'yaml';
|
|
13
|
-
import type {
|
|
14
|
-
RefNumber,
|
|
15
|
-
HardcodedRef,
|
|
16
|
-
DynamicRef,
|
|
17
|
-
FigureInfo,
|
|
18
|
-
Registry,
|
|
19
|
-
RefStatus,
|
|
20
|
-
ConversionResult,
|
|
21
|
-
} from './types.js';
|
|
22
|
-
|
|
23
|
-
// =============================================================================
|
|
24
|
-
// Constants
|
|
25
|
-
// =============================================================================
|
|
26
|
-
|
|
27
|
-
/** Characters of context to check before a reference for deduplication */
|
|
28
|
-
const REF_CONTEXT_WINDOW = 100;
|
|
29
|
-
|
|
30
|
-
/** Minimum word length for similarity calculations */
|
|
31
|
-
const MIN_WORD_LENGTH = 2;
|
|
32
|
-
|
|
33
|
-
// =============================================================================
|
|
34
|
-
// Type Definitions (Internal)
|
|
35
|
-
// =============================================================================
|
|
36
|
-
|
|
37
|
-
/**
|
|
38
|
-
* Reference info (internal use in registry building)
|
|
39
|
-
*/
|
|
40
|
-
interface RefInfo {
|
|
41
|
-
label: string;
|
|
42
|
-
num: number;
|
|
43
|
-
isSupp: boolean;
|
|
44
|
-
file: string;
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
/**
|
|
48
|
-
* Parsed reference number components
|
|
49
|
-
*/
|
|
50
|
-
interface ParsedRefNumber {
|
|
51
|
-
isSupp: boolean;
|
|
52
|
-
num: number;
|
|
53
|
-
suffix: string | null;
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
/**
|
|
57
|
-
* Detected reference with parsed numbers
|
|
58
|
-
*/
|
|
59
|
-
interface DetectedRef {
|
|
60
|
-
type: 'fig' | 'tbl' | 'eq';
|
|
61
|
-
match: string;
|
|
62
|
-
numbers: ParsedRefNumber[];
|
|
63
|
-
position: number;
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
// =============================================================================
|
|
67
|
-
// Internal Helpers
|
|
68
|
-
// =============================================================================
|
|
69
|
-
|
|
70
|
-
/**
|
|
71
|
-
* Discover section files from a directory by reading config files
|
|
72
|
-
* Only returns files explicitly defined in rev.yaml or sections.yaml
|
|
73
|
-
* Returns empty array if no config found (caller should handle this)
|
|
74
|
-
*/
|
|
75
|
-
function discoverSectionFiles(directory: string): string[] {
|
|
76
|
-
// Try rev.yaml first
|
|
77
|
-
const revYamlPath = path.join(directory, 'rev.yaml');
|
|
78
|
-
if (fs.existsSync(revYamlPath)) {
|
|
79
|
-
try {
|
|
80
|
-
const config = YAML.parse(fs.readFileSync(revYamlPath, 'utf-8'));
|
|
81
|
-
if (config.sections && Array.isArray(config.sections) && config.sections.length > 0) {
|
|
82
|
-
return config.sections.filter((f: string) => fs.existsSync(path.join(directory, f)));
|
|
83
|
-
}
|
|
84
|
-
} catch (e) {
|
|
85
|
-
if (process.env.DEBUG) {
|
|
86
|
-
console.warn('crossref: YAML parse error in rev.yaml:', (e as Error).message);
|
|
87
|
-
}
|
|
88
|
-
}
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
// Try sections.yaml
|
|
92
|
-
const sectionsPath = path.join(directory, 'sections.yaml');
|
|
93
|
-
if (fs.existsSync(sectionsPath)) {
|
|
94
|
-
try {
|
|
95
|
-
const config = YAML.parse(fs.readFileSync(sectionsPath, 'utf-8'));
|
|
96
|
-
if (config.sections) {
|
|
97
|
-
const sectionOrder = Object.entries(config.sections)
|
|
98
|
-
.sort((a, b) => ((a[1] as any).order ?? 999) - ((b[1] as any).order ?? 999))
|
|
99
|
-
.map(([file]) => file);
|
|
100
|
-
return sectionOrder.filter((f) => fs.existsSync(path.join(directory, f)));
|
|
101
|
-
}
|
|
102
|
-
} catch (e) {
|
|
103
|
-
if (process.env.DEBUG) {
|
|
104
|
-
console.warn('crossref: YAML parse error in sections.yaml:', (e as Error).message);
|
|
105
|
-
}
|
|
106
|
-
}
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
// No config found - return empty array
|
|
110
|
-
// Caller must handle this (either error or use explicit sections)
|
|
111
|
-
return [];
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
// =============================================================================
|
|
115
|
-
// Detection Patterns
|
|
116
|
-
// =============================================================================
|
|
117
|
-
|
|
118
|
-
/**
|
|
119
|
-
* Patterns for detecting hardcoded references
|
|
120
|
-
* Matches complex patterns including:
|
|
121
|
-
* - Simple: "Figure 1", "Fig. 2a", "Table S1"
|
|
122
|
-
* - Ranges: "Figures 1-3", "Fig. 1a-c", "Figs. 1a-3b"
|
|
123
|
-
* - Lists: "Figures 1, 2, and 3", "Fig. 1a, b, c", "Tables 1 & 2"
|
|
124
|
-
* - Mixed: "Figs. 1, 3-5, and 7"
|
|
125
|
-
*
|
|
126
|
-
* Uses a simpler base pattern and parses the full match for lists
|
|
127
|
-
*/
|
|
128
|
-
const DETECTION_PATTERNS: Record<string, RegExp> = {
|
|
129
|
-
// Captures the full reference including lists with "and"
|
|
130
|
-
// Group 1: type prefix (Figure, Fig., etc.)
|
|
131
|
-
// Group 2: reference list (parsed by parseReferenceList())
|
|
132
|
-
// Matches: "1", "1a", "1-3", "1a-c", "1, 2, 3", "1 and 2", "1, 2 and 3", "1, 2, and 3"
|
|
133
|
-
// Separator: comma/dash/ampersand, optionally followed by "and"
|
|
134
|
-
// Standalone letters must be followed by separator, punctuation, or word boundary
|
|
135
|
-
// Also handles: "see Figure 1", "(Fig. 1)", "in Figures 1–3"
|
|
136
|
-
// Note: 'gi' flag makes these case-insensitive, so "figure 1" is also matched
|
|
137
|
-
figure: /\b(Figures?|Figs?\.?)\s+((?:\d+|S\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+|S\d+)[a-z]?|[a-z]\b))*)/gi,
|
|
138
|
-
|
|
139
|
-
table: /\b(Tables?|Tabs?\.?)\s+((?:\d+|S\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+|S\d+)[a-z]?|[a-z]\b))*)/gi,
|
|
140
|
-
|
|
141
|
-
equation: /\b(Equations?|Eqs?\.?)\s+((?:\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+)[a-z]?|[a-z]\b))*)/gi,
|
|
142
|
-
};
|
|
143
|
-
|
|
144
|
-
/**
|
|
145
|
-
* Patterns to EXCLUDE from detection (false positives)
|
|
146
|
-
* These look like references but aren't (e.g., "Table of Contents", "Figure skating")
|
|
147
|
-
*/
|
|
148
|
-
const EXCLUSION_PATTERNS = [
|
|
149
|
-
/\bTable\s+of\s+Contents?\b/gi,
|
|
150
|
-
/\bFigure\s+skating\b/gi,
|
|
151
|
-
/\bFigure\s+out\b/gi,
|
|
152
|
-
/\bFigure\s+it\b/gi,
|
|
153
|
-
/\bTable\s+setting/gi,
|
|
154
|
-
/\bEquation\s+editor\b/gi,
|
|
155
|
-
];
|
|
156
|
-
|
|
157
|
-
/**
|
|
158
|
-
* Pattern for extracting anchors from markdown: {#fig:label}, {#tbl:label}
|
|
159
|
-
*/
|
|
160
|
-
const ANCHOR_PATTERN = /\{#(fig|tbl|eq):([a-zA-Z0-9_-]+)/gi;
|
|
161
|
-
|
|
162
|
-
/**
|
|
163
|
-
* Pattern for @-style references: @fig:label, @tbl:label
|
|
164
|
-
*/
|
|
165
|
-
const REF_PATTERN = /@(fig|tbl|eq):([a-zA-Z0-9_-]+)/gi;
|
|
166
|
-
|
|
167
|
-
// =============================================================================
|
|
168
|
-
// Public API
|
|
169
|
-
// =============================================================================
|
|
170
|
-
|
|
171
|
-
/**
|
|
172
|
-
* Normalize a reference type to standard form
|
|
173
|
-
*/
|
|
174
|
-
export function normalizeType(typeStr: string): 'fig' | 'tbl' | 'eq' | string {
|
|
175
|
-
if (typeof typeStr !== 'string') {
|
|
176
|
-
throw new TypeError(`typeStr must be a string, got ${typeof typeStr}`);
|
|
177
|
-
}
|
|
178
|
-
const lower = typeStr.toLowerCase().replace(/\.$/, '');
|
|
179
|
-
if (lower.startsWith('fig')) return 'fig';
|
|
180
|
-
if (lower.startsWith('tab')) return 'tbl';
|
|
181
|
-
if (lower.startsWith('eq')) return 'eq';
|
|
182
|
-
return lower;
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
/**
|
|
186
|
-
* Parse a reference number, handling supplementary (S1, S2) and letter suffixes (1a, 1b)
|
|
187
|
-
*/
|
|
188
|
-
export function parseRefNumber(numStr: string, suffix: string | null = null): ParsedRefNumber {
|
|
189
|
-
if (!numStr || typeof numStr !== 'string') {
|
|
190
|
-
return { isSupp: false, num: 0, suffix: suffix || null };
|
|
191
|
-
}
|
|
192
|
-
const isSupp = numStr.toUpperCase().startsWith('S');
|
|
193
|
-
const numPart = isSupp ? numStr.slice(1) : numStr;
|
|
194
|
-
// Extract suffix if embedded in numStr (e.g., "1a")
|
|
195
|
-
const match = numPart.match(/^(\d+)([a-z])?$/i);
|
|
196
|
-
const num = match && match[1] ? parseInt(match[1], 10) : parseInt(numPart, 10);
|
|
197
|
-
const extractedSuffix = suffix || (match && match[2]) || null;
|
|
198
|
-
return { isSupp, num, suffix: extractedSuffix ? extractedSuffix.toLowerCase() : null };
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
/**
|
|
202
|
-
* Parse a reference list string like "1, 2, and 3" or "1a-c" or "1a-3b"
|
|
203
|
-
* Returns an array of {num, isSupp, suffix} objects
|
|
204
|
-
*/
|
|
205
|
-
export function parseReferenceList(listStr: string): ParsedRefNumber[] {
|
|
206
|
-
const results: ParsedRefNumber[] = [];
|
|
207
|
-
if (!listStr || typeof listStr !== 'string') return results;
|
|
208
|
-
|
|
209
|
-
// Normalize: replace "and" with comma, normalize dashes
|
|
210
|
-
let normalized = listStr
|
|
211
|
-
.replace(/\s+and\s+/gi, ', ')
|
|
212
|
-
.replace(/[–—]/g, '-') // en-dash, em-dash → hyphen
|
|
213
|
-
.replace(/&/g, ', '); // & → comma
|
|
214
|
-
|
|
215
|
-
// Split by comma (but not by dash, which indicates ranges)
|
|
216
|
-
const parts = normalized.split(/\s*,\s*/).filter((p) => p.trim());
|
|
217
|
-
|
|
218
|
-
let lastFullRef: { num: number; isSupp: boolean } | null = null; // Track the last full reference for implicit prefixes
|
|
219
|
-
|
|
220
|
-
for (const part of parts) {
|
|
221
|
-
const trimmed = part.trim();
|
|
222
|
-
if (!trimmed) continue;
|
|
223
|
-
|
|
224
|
-
// Check if this is a range (contains -)
|
|
225
|
-
if (trimmed.includes('-')) {
|
|
226
|
-
const parts = trimmed.split('-').map((s) => s.trim());
|
|
227
|
-
const start = parts[0] || '';
|
|
228
|
-
const end = parts[1] || '';
|
|
229
|
-
|
|
230
|
-
// Check if end is just a letter (e.g., "1a-c" where end is "c")
|
|
231
|
-
const endIsLetterOnly = /^[a-z]$/i.test(end);
|
|
232
|
-
|
|
233
|
-
const startRef = parseRefNumber(start);
|
|
234
|
-
// For letter-only end, don't parse as number
|
|
235
|
-
const endRef = endIsLetterOnly
|
|
236
|
-
? { num: startRef.num, isSupp: startRef.isSupp, suffix: end.toLowerCase() }
|
|
237
|
-
: parseRefNumber(end);
|
|
238
|
-
|
|
239
|
-
// Handle different range types:
|
|
240
|
-
// 1. Suffix-only range on same number: "1a-c" → 1a, 1b, 1c
|
|
241
|
-
// 2. Number range: "1-3" → 1, 2, 3
|
|
242
|
-
// 3. Cross-number suffix range: "1a-3b" → 1a...1z, 2a...2z, 3a, 3b (limited)
|
|
243
|
-
|
|
244
|
-
if (startRef.suffix && endRef.suffix && startRef.num !== endRef.num) {
|
|
245
|
-
// Cross-number suffix range: "1a-3b"
|
|
246
|
-
// For academic papers, limit intermediate figures to same suffix range
|
|
247
|
-
// e.g., "1a-3b" typically means 1a, 1b, 2a, 2b, 3a, 3b
|
|
248
|
-
const maxSuffix = Math.max(
|
|
249
|
-
startRef.suffix.charCodeAt(0),
|
|
250
|
-
endRef.suffix.charCodeAt(0)
|
|
251
|
-
);
|
|
252
|
-
|
|
253
|
-
for (let n = startRef.num; n <= endRef.num; n++) {
|
|
254
|
-
const suffixStart =
|
|
255
|
-
n === startRef.num ? startRef.suffix.charCodeAt(0) : 'a'.charCodeAt(0);
|
|
256
|
-
const suffixEnd = n === endRef.num ? endRef.suffix.charCodeAt(0) : maxSuffix;
|
|
257
|
-
|
|
258
|
-
for (let s = suffixStart; s <= suffixEnd; s++) {
|
|
259
|
-
results.push({
|
|
260
|
-
num: n,
|
|
261
|
-
isSupp: startRef.isSupp,
|
|
262
|
-
suffix: String.fromCharCode(s),
|
|
263
|
-
});
|
|
264
|
-
}
|
|
265
|
-
}
|
|
266
|
-
lastFullRef = { num: endRef.num, isSupp: startRef.isSupp };
|
|
267
|
-
} else if (startRef.suffix || endRef.suffix) {
|
|
268
|
-
// Suffix range on same number: "1a-c"
|
|
269
|
-
const num: number = startRef.num !== 0 ? startRef.num : (lastFullRef ? lastFullRef.num : 1);
|
|
270
|
-
const isSupp: boolean = startRef.isSupp ? startRef.isSupp : (lastFullRef ? lastFullRef.isSupp : false);
|
|
271
|
-
const startCode = (startRef.suffix || 'a').charCodeAt(0);
|
|
272
|
-
const endCode = (endRef.suffix || 'a').charCodeAt(0);
|
|
273
|
-
|
|
274
|
-
for (let code = startCode; code <= endCode; code++) {
|
|
275
|
-
results.push({
|
|
276
|
-
num,
|
|
277
|
-
isSupp,
|
|
278
|
-
suffix: String.fromCharCode(code),
|
|
279
|
-
});
|
|
280
|
-
}
|
|
281
|
-
lastFullRef = { num, isSupp };
|
|
282
|
-
} else {
|
|
283
|
-
// Pure number range: "1-3"
|
|
284
|
-
for (let n = startRef.num; n <= endRef.num; n++) {
|
|
285
|
-
results.push({
|
|
286
|
-
num: n,
|
|
287
|
-
isSupp: startRef.isSupp,
|
|
288
|
-
suffix: null,
|
|
289
|
-
});
|
|
290
|
-
}
|
|
291
|
-
lastFullRef = { num: endRef.num, isSupp: startRef.isSupp };
|
|
292
|
-
}
|
|
293
|
-
} else {
|
|
294
|
-
// Single reference or implicit suffix
|
|
295
|
-
// Check if it's just a letter (implicit prefix from previous number)
|
|
296
|
-
if (/^[a-z]$/i.test(trimmed) && lastFullRef) {
|
|
297
|
-
// Implicit prefix: "b" after "1a" means "1b"
|
|
298
|
-
results.push({
|
|
299
|
-
num: lastFullRef.num,
|
|
300
|
-
isSupp: lastFullRef.isSupp,
|
|
301
|
-
suffix: trimmed.toLowerCase(),
|
|
302
|
-
});
|
|
303
|
-
} else {
|
|
304
|
-
// Full reference: "1", "1a", "S1", "S1a"
|
|
305
|
-
const ref = parseRefNumber(trimmed);
|
|
306
|
-
results.push(ref);
|
|
307
|
-
lastFullRef = { num: ref.num, isSupp: ref.isSupp };
|
|
308
|
-
}
|
|
309
|
-
}
|
|
310
|
-
}
|
|
311
|
-
|
|
312
|
-
return results;
|
|
313
|
-
}
|
|
314
|
-
|
|
315
|
-
/**
|
|
316
|
-
* Build a registry of figure/table labels from .md files
|
|
317
|
-
* Scans for {#fig:label} and {#tbl:label} anchors
|
|
318
|
-
*
|
|
319
|
-
* IMPORTANT: This function requires either explicit sections or a rev.yaml/sections.yaml config.
|
|
320
|
-
* It will NOT guess by scanning all .md files, as this leads to incorrect numbering
|
|
321
|
-
* when temporary files (paper_clean.md, etc.) exist in the directory.
|
|
322
|
-
*/
|
|
323
|
-
export function buildRegistry(directory: string, sections?: string[]): Registry {
|
|
324
|
-
if (typeof directory !== 'string') {
|
|
325
|
-
throw new TypeError(`directory must be a string, got ${typeof directory}`);
|
|
326
|
-
}
|
|
327
|
-
|
|
328
|
-
const figures = new Map<string, FigureInfo>();
|
|
329
|
-
const tables = new Map<string, FigureInfo>();
|
|
330
|
-
const equations = new Map<string, FigureInfo>();
|
|
331
|
-
|
|
332
|
-
// Counters for numbering (separate for main and supplementary)
|
|
333
|
-
let figNum = 0;
|
|
334
|
-
let figSuppNum = 0;
|
|
335
|
-
let tblNum = 0;
|
|
336
|
-
let tblSuppNum = 0;
|
|
337
|
-
let eqNum = 0;
|
|
338
|
-
|
|
339
|
-
let orderedFiles: string[];
|
|
340
|
-
|
|
341
|
-
if (Array.isArray(sections) && sections.length > 0) {
|
|
342
|
-
// Use explicitly provided section files - most reliable
|
|
343
|
-
orderedFiles = sections.filter((f) => fs.existsSync(path.join(directory, f)));
|
|
344
|
-
} else {
|
|
345
|
-
// Try to determine sections from config files (rev.yaml or sections.yaml)
|
|
346
|
-
orderedFiles = discoverSectionFiles(directory);
|
|
347
|
-
// If no config found, return empty registry rather than guessing
|
|
348
|
-
// This prevents bugs from scanning wrong files
|
|
349
|
-
}
|
|
350
|
-
|
|
351
|
-
// Determine if a file is supplementary
|
|
352
|
-
const isSupplementary = (filename: string): boolean =>
|
|
353
|
-
filename.toLowerCase().includes('supp') || filename.toLowerCase().includes('appendix');
|
|
354
|
-
|
|
355
|
-
// Process each file in order
|
|
356
|
-
for (const file of orderedFiles) {
|
|
357
|
-
const filePath = path.join(directory, file);
|
|
358
|
-
const content = fs.readFileSync(filePath, 'utf-8');
|
|
359
|
-
const isSupp = isSupplementary(file);
|
|
360
|
-
|
|
361
|
-
// Find all anchors
|
|
362
|
-
let match: RegExpExecArray | null;
|
|
363
|
-
ANCHOR_PATTERN.lastIndex = 0;
|
|
364
|
-
while ((match = ANCHOR_PATTERN.exec(content)) !== null) {
|
|
365
|
-
const typeRaw = match[1];
|
|
366
|
-
const labelRaw = match[2];
|
|
367
|
-
if (!typeRaw || !labelRaw) continue;
|
|
368
|
-
|
|
369
|
-
const type = typeRaw.toLowerCase();
|
|
370
|
-
const label = labelRaw;
|
|
371
|
-
|
|
372
|
-
if (type === 'fig') {
|
|
373
|
-
if (isSupp) {
|
|
374
|
-
figSuppNum++;
|
|
375
|
-
figures.set(label, { label, num: figSuppNum, isSupp: true, file });
|
|
376
|
-
} else {
|
|
377
|
-
figNum++;
|
|
378
|
-
figures.set(label, { label, num: figNum, isSupp: false, file });
|
|
379
|
-
}
|
|
380
|
-
} else if (type === 'tbl') {
|
|
381
|
-
if (isSupp) {
|
|
382
|
-
tblSuppNum++;
|
|
383
|
-
tables.set(label, { label, num: tblSuppNum, isSupp: true, file });
|
|
384
|
-
} else {
|
|
385
|
-
tblNum++;
|
|
386
|
-
tables.set(label, { label, num: tblNum, isSupp: false, file });
|
|
387
|
-
}
|
|
388
|
-
} else if (type === 'eq') {
|
|
389
|
-
eqNum++;
|
|
390
|
-
equations.set(label, { label, num: eqNum, isSupp: false, file });
|
|
391
|
-
}
|
|
392
|
-
}
|
|
393
|
-
}
|
|
394
|
-
|
|
395
|
-
// Build reverse lookup: number → label
|
|
396
|
-
const byNumber: Registry['byNumber'] = {
|
|
397
|
-
fig: new Map(),
|
|
398
|
-
figS: new Map(),
|
|
399
|
-
tbl: new Map(),
|
|
400
|
-
tblS: new Map(),
|
|
401
|
-
eq: new Map(),
|
|
402
|
-
};
|
|
403
|
-
|
|
404
|
-
for (const [label, info] of figures) {
|
|
405
|
-
const key = info.isSupp ? 'figS' : 'fig';
|
|
406
|
-
byNumber[key].set(info.num, label);
|
|
407
|
-
}
|
|
408
|
-
for (const [label, info] of tables) {
|
|
409
|
-
const key = info.isSupp ? 'tblS' : 'tbl';
|
|
410
|
-
byNumber[key].set(info.num, label);
|
|
411
|
-
}
|
|
412
|
-
for (const [label, info] of equations) {
|
|
413
|
-
byNumber.eq.set(info.num, label);
|
|
414
|
-
}
|
|
415
|
-
|
|
416
|
-
return { figures, tables, equations, byNumber };
|
|
417
|
-
}
|
|
418
|
-
|
|
419
|
-
/**
|
|
420
|
-
* Get the display string for a label (e.g., "Figure 1", "Table S2")
|
|
421
|
-
*/
|
|
422
|
-
export function labelToDisplay(
|
|
423
|
-
type: 'fig' | 'tbl' | 'eq',
|
|
424
|
-
label: string,
|
|
425
|
-
registry: Registry
|
|
426
|
-
): string | null {
|
|
427
|
-
if (!registry || !registry.figures) return null;
|
|
428
|
-
|
|
429
|
-
const collection =
|
|
430
|
-
type === 'fig' ? registry.figures : type === 'tbl' ? registry.tables : registry.equations;
|
|
431
|
-
|
|
432
|
-
const info = collection.get(label);
|
|
433
|
-
if (!info) return null;
|
|
434
|
-
|
|
435
|
-
const prefix = type === 'fig' ? 'Figure' : type === 'tbl' ? 'Table' : 'Equation';
|
|
436
|
-
const numStr = info.isSupp ? `S${info.num}` : `${info.num}`;
|
|
437
|
-
|
|
438
|
-
return `${prefix} ${numStr}`;
|
|
439
|
-
}
|
|
440
|
-
|
|
441
|
-
/**
|
|
442
|
-
* Get the label for a display number (e.g., "fig:heatmap" from Figure 1)
|
|
443
|
-
*/
|
|
444
|
-
export function numberToLabel(
|
|
445
|
-
type: 'fig' | 'tbl' | 'eq',
|
|
446
|
-
num: number,
|
|
447
|
-
isSupp: boolean,
|
|
448
|
-
registry: Registry
|
|
449
|
-
): string | null {
|
|
450
|
-
if (!registry || !registry.byNumber) return null;
|
|
451
|
-
|
|
452
|
-
const key = isSupp ? (`${type}S` as keyof Registry['byNumber']) : type;
|
|
453
|
-
return registry.byNumber[key]?.get(num) || null;
|
|
454
|
-
}
|
|
455
|
-
|
|
456
|
-
/**
|
|
457
|
-
* Detect all hardcoded references in text
|
|
458
|
-
*/
|
|
459
|
-
export function detectHardcodedRefs(text: string): DetectedRef[] {
|
|
460
|
-
if (typeof text !== 'string') {
|
|
461
|
-
throw new TypeError(`text must be a string, got ${typeof text}`);
|
|
462
|
-
}
|
|
463
|
-
|
|
464
|
-
const refs: DetectedRef[] = [];
|
|
465
|
-
|
|
466
|
-
for (const [type, pattern] of Object.entries(DETECTION_PATTERNS)) {
|
|
467
|
-
pattern.lastIndex = 0;
|
|
468
|
-
let match: RegExpExecArray | null;
|
|
469
|
-
|
|
470
|
-
while ((match = pattern.exec(text)) !== null) {
|
|
471
|
-
// Pattern groups:
|
|
472
|
-
// [1] = type prefix (Figure, Fig., etc.)
|
|
473
|
-
// [2] = reference list string (e.g., "1, 2, and 3" or "1a-3b")
|
|
474
|
-
|
|
475
|
-
const listStr = match[2];
|
|
476
|
-
if (!listStr) continue;
|
|
477
|
-
const numbers = parseReferenceList(listStr);
|
|
478
|
-
|
|
479
|
-
// Skip if no valid numbers were parsed
|
|
480
|
-
if (numbers.length === 0) continue;
|
|
481
|
-
|
|
482
|
-
refs.push({
|
|
483
|
-
type: normalizeType(type) as 'fig' | 'tbl' | 'eq',
|
|
484
|
-
match: match[0],
|
|
485
|
-
numbers,
|
|
486
|
-
position: match.index,
|
|
487
|
-
});
|
|
488
|
-
}
|
|
489
|
-
}
|
|
490
|
-
|
|
491
|
-
// Sort by position
|
|
492
|
-
refs.sort((a, b) => a.position - b.position);
|
|
493
|
-
return refs;
|
|
494
|
-
}
|
|
495
|
-
|
|
496
|
-
/**
|
|
497
|
-
* Convert hardcoded references to @-style references
|
|
498
|
-
*/
|
|
499
|
-
export function convertHardcodedRefs(text: string, registry: Registry): ConversionResult {
|
|
500
|
-
// Input validation delegated to detectHardcodedRefs
|
|
501
|
-
const refs = detectHardcodedRefs(text);
|
|
502
|
-
const conversions: Array<{ from: string; to: string }> = [];
|
|
503
|
-
const warnings: string[] = [];
|
|
504
|
-
|
|
505
|
-
// Process in reverse order to preserve positions
|
|
506
|
-
let result = text;
|
|
507
|
-
for (let i = refs.length - 1; i >= 0; i--) {
|
|
508
|
-
const ref = refs[i];
|
|
509
|
-
if (!ref) continue;
|
|
510
|
-
|
|
511
|
-
// Build replacement
|
|
512
|
-
const labels: string[] = [];
|
|
513
|
-
for (const { num, isSupp } of ref.numbers) {
|
|
514
|
-
const label = numberToLabel(ref.type, num, isSupp, registry);
|
|
515
|
-
if (label) {
|
|
516
|
-
labels.push(`@${ref.type}:${label}`);
|
|
517
|
-
} else {
|
|
518
|
-
const displayNum = isSupp ? `S${num}` : `${num}`;
|
|
519
|
-
warnings.push(`Unknown reference: ${ref.type} ${displayNum} (no matching label)`);
|
|
520
|
-
labels.push(ref.match); // Keep original if no match
|
|
521
|
-
}
|
|
522
|
-
}
|
|
523
|
-
|
|
524
|
-
if (labels.length > 0 && !labels.includes(ref.match)) {
|
|
525
|
-
const replacement = labels.join('; ');
|
|
526
|
-
|
|
527
|
-
// Skip if the @-syntax already appears in the preceding text
|
|
528
|
-
// This prevents duplication when import restores @fig:x and then we see "Fig. 1"
|
|
529
|
-
// e.g., "@fig:map@fig:map{++@fig:map++}" or "@fig:mapFigure 1" patterns
|
|
530
|
-
const textBefore = result.slice(Math.max(0, ref.position - REF_CONTEXT_WINDOW), ref.position);
|
|
531
|
-
const alreadyHasRef = labels.some((label) => textBefore.includes(label));
|
|
532
|
-
if (alreadyHasRef) {
|
|
533
|
-
continue; // Skip - ref already present nearby
|
|
534
|
-
}
|
|
535
|
-
|
|
536
|
-
result =
|
|
537
|
-
result.slice(0, ref.position) + replacement + result.slice(ref.position + ref.match.length);
|
|
538
|
-
|
|
539
|
-
conversions.push({
|
|
540
|
-
from: ref.match,
|
|
541
|
-
to: replacement,
|
|
542
|
-
});
|
|
543
|
-
}
|
|
544
|
-
}
|
|
545
|
-
|
|
546
|
-
return { converted: result, conversions, warnings };
|
|
547
|
-
}
|
|
548
|
-
|
|
549
|
-
/**
|
|
550
|
-
* Detect @-style references in text
|
|
551
|
-
*/
|
|
552
|
-
export function detectDynamicRefs(text: string): DynamicRef[] {
|
|
553
|
-
if (typeof text !== 'string') {
|
|
554
|
-
throw new TypeError(`text must be a string, got ${typeof text}`);
|
|
555
|
-
}
|
|
556
|
-
|
|
557
|
-
const refs: DynamicRef[] = [];
|
|
558
|
-
REF_PATTERN.lastIndex = 0;
|
|
559
|
-
let match: RegExpExecArray | null;
|
|
560
|
-
|
|
561
|
-
while ((match = REF_PATTERN.exec(text)) !== null) {
|
|
562
|
-
const type = match[1];
|
|
563
|
-
const label = match[2];
|
|
564
|
-
if (!type || !label) continue;
|
|
565
|
-
refs.push({
|
|
566
|
-
type: type as 'fig' | 'tbl' | 'eq',
|
|
567
|
-
label: label,
|
|
568
|
-
match: match[0],
|
|
569
|
-
position: match.index,
|
|
570
|
-
});
|
|
571
|
-
}
|
|
572
|
-
|
|
573
|
-
return refs;
|
|
574
|
-
}
|
|
575
|
-
|
|
576
|
-
/**
|
|
577
|
-
* Get reference status for a file/text
|
|
578
|
-
*/
|
|
579
|
-
export function getRefStatus(text: string, registry: Registry): RefStatus {
|
|
580
|
-
const dynamic = detectDynamicRefs(text);
|
|
581
|
-
const hardcoded = detectHardcodedRefs(text) as HardcodedRef[];
|
|
582
|
-
|
|
583
|
-
// Count anchors in this text
|
|
584
|
-
ANCHOR_PATTERN.lastIndex = 0;
|
|
585
|
-
let figCount = 0,
|
|
586
|
-
tblCount = 0,
|
|
587
|
-
eqCount = 0;
|
|
588
|
-
let match: RegExpExecArray | null;
|
|
589
|
-
while ((match = ANCHOR_PATTERN.exec(text)) !== null) {
|
|
590
|
-
const type = match[1];
|
|
591
|
-
if (!type) continue;
|
|
592
|
-
if (type === 'fig') figCount++;
|
|
593
|
-
else if (type === 'tbl') tblCount++;
|
|
594
|
-
else if (type === 'eq') eqCount++;
|
|
595
|
-
}
|
|
596
|
-
|
|
597
|
-
return {
|
|
598
|
-
dynamic,
|
|
599
|
-
hardcoded,
|
|
600
|
-
anchors: { figures: figCount, tables: tblCount, equations: eqCount },
|
|
601
|
-
};
|
|
602
|
-
}
|
|
603
|
-
|
|
604
|
-
/**
|
|
605
|
-
* Detect forward references in combined text
|
|
606
|
-
* A forward reference is a @ref that appears before its {#anchor} definition
|
|
607
|
-
*/
|
|
608
|
-
export function detectForwardRefs(text: string): {
|
|
609
|
-
forwardRefs: Array<{ type: string; label: string; match: string; position: number }>;
|
|
610
|
-
anchorPositions: Map<string, number>;
|
|
611
|
-
} {
|
|
612
|
-
// Build map of anchor positions: "fig:label" -> position
|
|
613
|
-
const anchorPositions = new Map<string, number>();
|
|
614
|
-
ANCHOR_PATTERN.lastIndex = 0;
|
|
615
|
-
let match: RegExpExecArray | null;
|
|
616
|
-
while ((match = ANCHOR_PATTERN.exec(text)) !== null) {
|
|
617
|
-
const type = match[1];
|
|
618
|
-
const label = match[2];
|
|
619
|
-
if (!type || !label) continue;
|
|
620
|
-
const key = `${type}:${label}`;
|
|
621
|
-
// Only store first occurrence (in case of duplicates)
|
|
622
|
-
if (!anchorPositions.has(key)) {
|
|
623
|
-
anchorPositions.set(key, match.index);
|
|
624
|
-
}
|
|
625
|
-
}
|
|
626
|
-
|
|
627
|
-
// Find all references
|
|
628
|
-
const refs = detectDynamicRefs(text);
|
|
629
|
-
|
|
630
|
-
// Filter to only forward references
|
|
631
|
-
const forwardRefs = refs.filter((ref) => {
|
|
632
|
-
const key = `${ref.type}:${ref.label}`;
|
|
633
|
-
const anchorPos = anchorPositions.get(key);
|
|
634
|
-
// Forward ref if anchor doesn't exist or appears after the reference
|
|
635
|
-
return anchorPos === undefined || ref.position < anchorPos;
|
|
636
|
-
});
|
|
637
|
-
|
|
638
|
-
return { forwardRefs, anchorPositions };
|
|
639
|
-
}
|
|
640
|
-
|
|
641
|
-
/**
|
|
642
|
-
* Resolve forward references to display format
|
|
643
|
-
* Only resolves refs that appear before their anchor definition
|
|
644
|
-
* Leaves other refs for pandoc-crossref to handle (preserves clickable links)
|
|
645
|
-
*/
|
|
646
|
-
export function resolveForwardRefs(
|
|
647
|
-
text: string,
|
|
648
|
-
registry: Registry
|
|
649
|
-
): {
|
|
650
|
-
text: string;
|
|
651
|
-
resolved: Array<{ from: string; to: string; position: number }>;
|
|
652
|
-
unresolved: Array<{ ref: string; position: number }>;
|
|
653
|
-
} {
|
|
654
|
-
const { forwardRefs } = detectForwardRefs(text);
|
|
655
|
-
const resolved: Array<{ from: string; to: string; position: number }> = [];
|
|
656
|
-
const unresolved: Array<{ ref: string; position: number }> = [];
|
|
657
|
-
|
|
658
|
-
// Process in reverse order to preserve positions
|
|
659
|
-
let result = text;
|
|
660
|
-
for (let i = forwardRefs.length - 1; i >= 0; i--) {
|
|
661
|
-
const ref = forwardRefs[i];
|
|
662
|
-
if (!ref) continue;
|
|
663
|
-
const display = labelToDisplay(ref.type as 'fig' | 'tbl' | 'eq', ref.label, registry);
|
|
664
|
-
|
|
665
|
-
if (display) {
|
|
666
|
-
result =
|
|
667
|
-
result.slice(0, ref.position) + display + result.slice(ref.position + ref.match.length);
|
|
668
|
-
resolved.push({
|
|
669
|
-
from: ref.match,
|
|
670
|
-
to: display,
|
|
671
|
-
position: ref.position,
|
|
672
|
-
});
|
|
673
|
-
} else {
|
|
674
|
-
unresolved.push({
|
|
675
|
-
ref: ref.match,
|
|
676
|
-
position: ref.position,
|
|
677
|
-
});
|
|
678
|
-
}
|
|
679
|
-
}
|
|
680
|
-
|
|
681
|
-
return { text: result, resolved, unresolved };
|
|
682
|
-
}
|
|
683
|
-
|
|
684
|
-
/**
|
|
685
|
-
* Resolve ALL supplementary references and strip supplementary anchor labels.
|
|
686
|
-
*
|
|
687
|
-
* pandoc-crossref cannot produce "Figure S1" numbering — it numbers all figures
|
|
688
|
-
* sequentially. This function resolves every @fig:label / @tbl:label that points
|
|
689
|
-
* to a supplementary item to plain text ("Figure S1", "Table S1") and removes
|
|
690
|
-
* the {#fig:label} / {#tbl:label} attributes so pandoc-crossref ignores them.
|
|
691
|
-
*/
|
|
692
|
-
export function resolveSupplementaryRefs(
|
|
693
|
-
text: string,
|
|
694
|
-
registry: Registry
|
|
695
|
-
): {
|
|
696
|
-
text: string;
|
|
697
|
-
resolved: Array<{ from: string; to: string }>;
|
|
698
|
-
} {
|
|
699
|
-
const resolved: Array<{ from: string; to: string }> = [];
|
|
700
|
-
let result = text;
|
|
701
|
-
|
|
702
|
-
// Collect supplementary labels
|
|
703
|
-
const suppLabels = new Set<string>();
|
|
704
|
-
for (const [label, info] of registry.figures) {
|
|
705
|
-
if (info.isSupp) suppLabels.add(`fig:${label}`);
|
|
706
|
-
}
|
|
707
|
-
for (const [label, info] of registry.tables) {
|
|
708
|
-
if (info.isSupp) suppLabels.add(`tbl:${label}`);
|
|
709
|
-
}
|
|
710
|
-
|
|
711
|
-
if (suppLabels.size === 0) return { text: result, resolved };
|
|
712
|
-
|
|
713
|
-
// 1. Replace all @fig:label / @tbl:label references to supplementary items
|
|
714
|
-
const refs = detectDynamicRefs(result);
|
|
715
|
-
// Process in reverse to preserve positions
|
|
716
|
-
for (let i = refs.length - 1; i >= 0; i--) {
|
|
717
|
-
const ref = refs[i];
|
|
718
|
-
if (!ref) continue;
|
|
719
|
-
const key = `${ref.type}:${ref.label}`;
|
|
720
|
-
if (!suppLabels.has(key)) continue;
|
|
721
|
-
|
|
722
|
-
const display = labelToDisplay(ref.type as 'fig' | 'tbl' | 'eq', ref.label, registry);
|
|
723
|
-
if (display) {
|
|
724
|
-
result =
|
|
725
|
-
result.slice(0, ref.position) + display + result.slice(ref.position + ref.match.length);
|
|
726
|
-
resolved.push({ from: ref.match, to: display });
|
|
727
|
-
}
|
|
728
|
-
}
|
|
729
|
-
|
|
730
|
-
// 2. Strip {#fig:label} and {#tbl:label} attributes from supplementary anchors
|
|
731
|
-
// so pandoc-crossref does not re-number them
|
|
732
|
-
for (const key of suppLabels) {
|
|
733
|
-
// Match {#fig:label ...} or just {#fig:label}
|
|
734
|
-
const escaped = key.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
735
|
-
const pattern = new RegExp(`\\{#${escaped}(?:\\s[^}]*)?\\}`, 'g');
|
|
736
|
-
result = result.replace(pattern, (match) => {
|
|
737
|
-
resolved.push({ from: match, to: '(stripped)' });
|
|
738
|
-
return '';
|
|
739
|
-
});
|
|
740
|
-
}
|
|
741
|
-
|
|
742
|
-
return { text: result, resolved };
|
|
743
|
-
}
|
|
744
|
-
|
|
745
|
-
/**
|
|
746
|
-
* Format registry for display
|
|
747
|
-
*/
|
|
748
|
-
export function formatRegistry(registry: Registry): string {
|
|
749
|
-
const lines: string[] = [];
|
|
750
|
-
|
|
751
|
-
if (registry.figures.size > 0) {
|
|
752
|
-
lines.push('Figures:');
|
|
753
|
-
for (const [label, info] of registry.figures) {
|
|
754
|
-
const num = info.isSupp ? `S${info.num}` : info.num;
|
|
755
|
-
lines.push(` Figure ${num}: @fig:${label} (${info.file})`);
|
|
756
|
-
}
|
|
757
|
-
}
|
|
758
|
-
|
|
759
|
-
if (registry.tables.size > 0) {
|
|
760
|
-
if (lines.length > 0) lines.push('');
|
|
761
|
-
lines.push('Tables:');
|
|
762
|
-
for (const [label, info] of registry.tables) {
|
|
763
|
-
const num = info.isSupp ? `S${info.num}` : info.num;
|
|
764
|
-
lines.push(` Table ${num}: @tbl:${label} (${info.file})`);
|
|
765
|
-
}
|
|
766
|
-
}
|
|
767
|
-
|
|
768
|
-
if (registry.equations.size > 0) {
|
|
769
|
-
if (lines.length > 0) lines.push('');
|
|
770
|
-
lines.push('Equations:');
|
|
771
|
-
for (const [label, info] of registry.equations) {
|
|
772
|
-
lines.push(` Equation ${info.num}: @eq:${label} (${info.file})`);
|
|
773
|
-
}
|
|
774
|
-
}
|
|
775
|
-
|
|
776
|
-
if (lines.length === 0) {
|
|
777
|
-
lines.push('No figure/table anchors found.');
|
|
778
|
-
}
|
|
779
|
-
|
|
780
|
-
return lines.join('\n');
|
|
781
|
-
}
|
|
1
|
+
/**
|
|
2
|
+
* Cross-reference handling - dynamic figure/table references
|
|
3
|
+
*
|
|
4
|
+
* Enables:
|
|
5
|
+
* - @fig:label syntax in source (auto-numbered)
|
|
6
|
+
* - Conversion to "Figure 1" in Word output
|
|
7
|
+
* - Auto-conversion back during import
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import * as fs from 'fs';
|
|
11
|
+
import * as path from 'path';
|
|
12
|
+
import YAML from 'yaml';
|
|
13
|
+
import type {
|
|
14
|
+
RefNumber,
|
|
15
|
+
HardcodedRef,
|
|
16
|
+
DynamicRef,
|
|
17
|
+
FigureInfo,
|
|
18
|
+
Registry,
|
|
19
|
+
RefStatus,
|
|
20
|
+
ConversionResult,
|
|
21
|
+
} from './types.js';
|
|
22
|
+
|
|
23
|
+
// =============================================================================
|
|
24
|
+
// Constants
|
|
25
|
+
// =============================================================================
|
|
26
|
+
|
|
27
|
+
/** Characters of context to check before a reference for deduplication */
|
|
28
|
+
const REF_CONTEXT_WINDOW = 100;
|
|
29
|
+
|
|
30
|
+
/** Minimum word length for similarity calculations */
|
|
31
|
+
const MIN_WORD_LENGTH = 2;
|
|
32
|
+
|
|
33
|
+
// =============================================================================
|
|
34
|
+
// Type Definitions (Internal)
|
|
35
|
+
// =============================================================================
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Reference info (internal use in registry building)
|
|
39
|
+
*/
|
|
40
|
+
interface RefInfo {
|
|
41
|
+
label: string;
|
|
42
|
+
num: number;
|
|
43
|
+
isSupp: boolean;
|
|
44
|
+
file: string;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Parsed reference number components
|
|
49
|
+
*/
|
|
50
|
+
interface ParsedRefNumber {
|
|
51
|
+
isSupp: boolean;
|
|
52
|
+
num: number;
|
|
53
|
+
suffix: string | null;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Detected reference with parsed numbers
|
|
58
|
+
*/
|
|
59
|
+
interface DetectedRef {
|
|
60
|
+
type: 'fig' | 'tbl' | 'eq';
|
|
61
|
+
match: string;
|
|
62
|
+
numbers: ParsedRefNumber[];
|
|
63
|
+
position: number;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// =============================================================================
|
|
67
|
+
// Internal Helpers
|
|
68
|
+
// =============================================================================
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Discover section files from a directory by reading config files
|
|
72
|
+
* Only returns files explicitly defined in rev.yaml or sections.yaml
|
|
73
|
+
* Returns empty array if no config found (caller should handle this)
|
|
74
|
+
*/
|
|
75
|
+
function discoverSectionFiles(directory: string): string[] {
|
|
76
|
+
// Try rev.yaml first
|
|
77
|
+
const revYamlPath = path.join(directory, 'rev.yaml');
|
|
78
|
+
if (fs.existsSync(revYamlPath)) {
|
|
79
|
+
try {
|
|
80
|
+
const config = YAML.parse(fs.readFileSync(revYamlPath, 'utf-8'));
|
|
81
|
+
if (config.sections && Array.isArray(config.sections) && config.sections.length > 0) {
|
|
82
|
+
return config.sections.filter((f: string) => fs.existsSync(path.join(directory, f)));
|
|
83
|
+
}
|
|
84
|
+
} catch (e) {
|
|
85
|
+
if (process.env.DEBUG) {
|
|
86
|
+
console.warn('crossref: YAML parse error in rev.yaml:', (e as Error).message);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// Try sections.yaml
|
|
92
|
+
const sectionsPath = path.join(directory, 'sections.yaml');
|
|
93
|
+
if (fs.existsSync(sectionsPath)) {
|
|
94
|
+
try {
|
|
95
|
+
const config = YAML.parse(fs.readFileSync(sectionsPath, 'utf-8'));
|
|
96
|
+
if (config.sections) {
|
|
97
|
+
const sectionOrder = Object.entries(config.sections)
|
|
98
|
+
.sort((a, b) => ((a[1] as any).order ?? 999) - ((b[1] as any).order ?? 999))
|
|
99
|
+
.map(([file]) => file);
|
|
100
|
+
return sectionOrder.filter((f) => fs.existsSync(path.join(directory, f)));
|
|
101
|
+
}
|
|
102
|
+
} catch (e) {
|
|
103
|
+
if (process.env.DEBUG) {
|
|
104
|
+
console.warn('crossref: YAML parse error in sections.yaml:', (e as Error).message);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// No config found - return empty array
|
|
110
|
+
// Caller must handle this (either error or use explicit sections)
|
|
111
|
+
return [];
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// =============================================================================
|
|
115
|
+
// Detection Patterns
|
|
116
|
+
// =============================================================================
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Patterns for detecting hardcoded references
|
|
120
|
+
* Matches complex patterns including:
|
|
121
|
+
* - Simple: "Figure 1", "Fig. 2a", "Table S1"
|
|
122
|
+
* - Ranges: "Figures 1-3", "Fig. 1a-c", "Figs. 1a-3b"
|
|
123
|
+
* - Lists: "Figures 1, 2, and 3", "Fig. 1a, b, c", "Tables 1 & 2"
|
|
124
|
+
* - Mixed: "Figs. 1, 3-5, and 7"
|
|
125
|
+
*
|
|
126
|
+
* Uses a simpler base pattern and parses the full match for lists
|
|
127
|
+
*/
|
|
128
|
+
const DETECTION_PATTERNS: Record<string, RegExp> = {
|
|
129
|
+
// Captures the full reference including lists with "and"
|
|
130
|
+
// Group 1: type prefix (Figure, Fig., etc.)
|
|
131
|
+
// Group 2: reference list (parsed by parseReferenceList())
|
|
132
|
+
// Matches: "1", "1a", "1-3", "1a-c", "1, 2, 3", "1 and 2", "1, 2 and 3", "1, 2, and 3"
|
|
133
|
+
// Separator: comma/dash/ampersand, optionally followed by "and"
|
|
134
|
+
// Standalone letters must be followed by separator, punctuation, or word boundary
|
|
135
|
+
// Also handles: "see Figure 1", "(Fig. 1)", "in Figures 1–3"
|
|
136
|
+
// Note: 'gi' flag makes these case-insensitive, so "figure 1" is also matched
|
|
137
|
+
figure: /\b(Figures?|Figs?\.?)\s+((?:\d+|S\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+|S\d+)[a-z]?|[a-z]\b))*)/gi,
|
|
138
|
+
|
|
139
|
+
table: /\b(Tables?|Tabs?\.?)\s+((?:\d+|S\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+|S\d+)[a-z]?|[a-z]\b))*)/gi,
|
|
140
|
+
|
|
141
|
+
equation: /\b(Equations?|Eqs?\.?)\s+((?:\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+)[a-z]?|[a-z]\b))*)/gi,
|
|
142
|
+
};
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Patterns to EXCLUDE from detection (false positives)
|
|
146
|
+
* These look like references but aren't (e.g., "Table of Contents", "Figure skating")
|
|
147
|
+
*/
|
|
148
|
+
const EXCLUSION_PATTERNS = [
|
|
149
|
+
/\bTable\s+of\s+Contents?\b/gi,
|
|
150
|
+
/\bFigure\s+skating\b/gi,
|
|
151
|
+
/\bFigure\s+out\b/gi,
|
|
152
|
+
/\bFigure\s+it\b/gi,
|
|
153
|
+
/\bTable\s+setting/gi,
|
|
154
|
+
/\bEquation\s+editor\b/gi,
|
|
155
|
+
];
|
|
156
|
+
|
|
157
|
+
/**
|
|
158
|
+
* Pattern for extracting anchors from markdown: {#fig:label}, {#tbl:label}
|
|
159
|
+
*/
|
|
160
|
+
const ANCHOR_PATTERN = /\{#(fig|tbl|eq):([a-zA-Z0-9_-]+)/gi;
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Pattern for @-style references: @fig:label, @tbl:label
|
|
164
|
+
*/
|
|
165
|
+
const REF_PATTERN = /@(fig|tbl|eq):([a-zA-Z0-9_-]+)/gi;
|
|
166
|
+
|
|
167
|
+
// =============================================================================
|
|
168
|
+
// Public API
|
|
169
|
+
// =============================================================================
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Normalize a reference type to standard form
|
|
173
|
+
*/
|
|
174
|
+
export function normalizeType(typeStr: string): 'fig' | 'tbl' | 'eq' | string {
|
|
175
|
+
if (typeof typeStr !== 'string') {
|
|
176
|
+
throw new TypeError(`typeStr must be a string, got ${typeof typeStr}`);
|
|
177
|
+
}
|
|
178
|
+
const lower = typeStr.toLowerCase().replace(/\.$/, '');
|
|
179
|
+
if (lower.startsWith('fig')) return 'fig';
|
|
180
|
+
if (lower.startsWith('tab')) return 'tbl';
|
|
181
|
+
if (lower.startsWith('eq')) return 'eq';
|
|
182
|
+
return lower;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
/**
|
|
186
|
+
* Parse a reference number, handling supplementary (S1, S2) and letter suffixes (1a, 1b)
|
|
187
|
+
*/
|
|
188
|
+
export function parseRefNumber(numStr: string, suffix: string | null = null): ParsedRefNumber {
|
|
189
|
+
if (!numStr || typeof numStr !== 'string') {
|
|
190
|
+
return { isSupp: false, num: 0, suffix: suffix || null };
|
|
191
|
+
}
|
|
192
|
+
const isSupp = numStr.toUpperCase().startsWith('S');
|
|
193
|
+
const numPart = isSupp ? numStr.slice(1) : numStr;
|
|
194
|
+
// Extract suffix if embedded in numStr (e.g., "1a")
|
|
195
|
+
const match = numPart.match(/^(\d+)([a-z])?$/i);
|
|
196
|
+
const num = match && match[1] ? parseInt(match[1], 10) : parseInt(numPart, 10);
|
|
197
|
+
const extractedSuffix = suffix || (match && match[2]) || null;
|
|
198
|
+
return { isSupp, num, suffix: extractedSuffix ? extractedSuffix.toLowerCase() : null };
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* Parse a reference list string like "1, 2, and 3" or "1a-c" or "1a-3b"
|
|
203
|
+
* Returns an array of {num, isSupp, suffix} objects
|
|
204
|
+
*/
|
|
205
|
+
export function parseReferenceList(listStr: string): ParsedRefNumber[] {
|
|
206
|
+
const results: ParsedRefNumber[] = [];
|
|
207
|
+
if (!listStr || typeof listStr !== 'string') return results;
|
|
208
|
+
|
|
209
|
+
// Normalize: replace "and" with comma, normalize dashes
|
|
210
|
+
let normalized = listStr
|
|
211
|
+
.replace(/\s+and\s+/gi, ', ')
|
|
212
|
+
.replace(/[–—]/g, '-') // en-dash, em-dash → hyphen
|
|
213
|
+
.replace(/&/g, ', '); // & → comma
|
|
214
|
+
|
|
215
|
+
// Split by comma (but not by dash, which indicates ranges)
|
|
216
|
+
const parts = normalized.split(/\s*,\s*/).filter((p) => p.trim());
|
|
217
|
+
|
|
218
|
+
let lastFullRef: { num: number; isSupp: boolean } | null = null; // Track the last full reference for implicit prefixes
|
|
219
|
+
|
|
220
|
+
for (const part of parts) {
|
|
221
|
+
const trimmed = part.trim();
|
|
222
|
+
if (!trimmed) continue;
|
|
223
|
+
|
|
224
|
+
// Check if this is a range (contains -)
|
|
225
|
+
if (trimmed.includes('-')) {
|
|
226
|
+
const parts = trimmed.split('-').map((s) => s.trim());
|
|
227
|
+
const start = parts[0] || '';
|
|
228
|
+
const end = parts[1] || '';
|
|
229
|
+
|
|
230
|
+
// Check if end is just a letter (e.g., "1a-c" where end is "c")
|
|
231
|
+
const endIsLetterOnly = /^[a-z]$/i.test(end);
|
|
232
|
+
|
|
233
|
+
const startRef = parseRefNumber(start);
|
|
234
|
+
// For letter-only end, don't parse as number
|
|
235
|
+
const endRef = endIsLetterOnly
|
|
236
|
+
? { num: startRef.num, isSupp: startRef.isSupp, suffix: end.toLowerCase() }
|
|
237
|
+
: parseRefNumber(end);
|
|
238
|
+
|
|
239
|
+
// Handle different range types:
|
|
240
|
+
// 1. Suffix-only range on same number: "1a-c" → 1a, 1b, 1c
|
|
241
|
+
// 2. Number range: "1-3" → 1, 2, 3
|
|
242
|
+
// 3. Cross-number suffix range: "1a-3b" → 1a...1z, 2a...2z, 3a, 3b (limited)
|
|
243
|
+
|
|
244
|
+
if (startRef.suffix && endRef.suffix && startRef.num !== endRef.num) {
|
|
245
|
+
// Cross-number suffix range: "1a-3b"
|
|
246
|
+
// For academic papers, limit intermediate figures to same suffix range
|
|
247
|
+
// e.g., "1a-3b" typically means 1a, 1b, 2a, 2b, 3a, 3b
|
|
248
|
+
const maxSuffix = Math.max(
|
|
249
|
+
startRef.suffix.charCodeAt(0),
|
|
250
|
+
endRef.suffix.charCodeAt(0)
|
|
251
|
+
);
|
|
252
|
+
|
|
253
|
+
for (let n = startRef.num; n <= endRef.num; n++) {
|
|
254
|
+
const suffixStart =
|
|
255
|
+
n === startRef.num ? startRef.suffix.charCodeAt(0) : 'a'.charCodeAt(0);
|
|
256
|
+
const suffixEnd = n === endRef.num ? endRef.suffix.charCodeAt(0) : maxSuffix;
|
|
257
|
+
|
|
258
|
+
for (let s = suffixStart; s <= suffixEnd; s++) {
|
|
259
|
+
results.push({
|
|
260
|
+
num: n,
|
|
261
|
+
isSupp: startRef.isSupp,
|
|
262
|
+
suffix: String.fromCharCode(s),
|
|
263
|
+
});
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
lastFullRef = { num: endRef.num, isSupp: startRef.isSupp };
|
|
267
|
+
} else if (startRef.suffix || endRef.suffix) {
|
|
268
|
+
// Suffix range on same number: "1a-c"
|
|
269
|
+
const num: number = startRef.num !== 0 ? startRef.num : (lastFullRef ? lastFullRef.num : 1);
|
|
270
|
+
const isSupp: boolean = startRef.isSupp ? startRef.isSupp : (lastFullRef ? lastFullRef.isSupp : false);
|
|
271
|
+
const startCode = (startRef.suffix || 'a').charCodeAt(0);
|
|
272
|
+
const endCode = (endRef.suffix || 'a').charCodeAt(0);
|
|
273
|
+
|
|
274
|
+
for (let code = startCode; code <= endCode; code++) {
|
|
275
|
+
results.push({
|
|
276
|
+
num,
|
|
277
|
+
isSupp,
|
|
278
|
+
suffix: String.fromCharCode(code),
|
|
279
|
+
});
|
|
280
|
+
}
|
|
281
|
+
lastFullRef = { num, isSupp };
|
|
282
|
+
} else {
|
|
283
|
+
// Pure number range: "1-3"
|
|
284
|
+
for (let n = startRef.num; n <= endRef.num; n++) {
|
|
285
|
+
results.push({
|
|
286
|
+
num: n,
|
|
287
|
+
isSupp: startRef.isSupp,
|
|
288
|
+
suffix: null,
|
|
289
|
+
});
|
|
290
|
+
}
|
|
291
|
+
lastFullRef = { num: endRef.num, isSupp: startRef.isSupp };
|
|
292
|
+
}
|
|
293
|
+
} else {
|
|
294
|
+
// Single reference or implicit suffix
|
|
295
|
+
// Check if it's just a letter (implicit prefix from previous number)
|
|
296
|
+
if (/^[a-z]$/i.test(trimmed) && lastFullRef) {
|
|
297
|
+
// Implicit prefix: "b" after "1a" means "1b"
|
|
298
|
+
results.push({
|
|
299
|
+
num: lastFullRef.num,
|
|
300
|
+
isSupp: lastFullRef.isSupp,
|
|
301
|
+
suffix: trimmed.toLowerCase(),
|
|
302
|
+
});
|
|
303
|
+
} else {
|
|
304
|
+
// Full reference: "1", "1a", "S1", "S1a"
|
|
305
|
+
const ref = parseRefNumber(trimmed);
|
|
306
|
+
results.push(ref);
|
|
307
|
+
lastFullRef = { num: ref.num, isSupp: ref.isSupp };
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
return results;
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
/**
|
|
316
|
+
* Build a registry of figure/table labels from .md files
|
|
317
|
+
* Scans for {#fig:label} and {#tbl:label} anchors
|
|
318
|
+
*
|
|
319
|
+
* IMPORTANT: This function requires either explicit sections or a rev.yaml/sections.yaml config.
|
|
320
|
+
* It will NOT guess by scanning all .md files, as this leads to incorrect numbering
|
|
321
|
+
* when temporary files (paper_clean.md, etc.) exist in the directory.
|
|
322
|
+
*/
|
|
323
|
+
export function buildRegistry(directory: string, sections?: string[]): Registry {
|
|
324
|
+
if (typeof directory !== 'string') {
|
|
325
|
+
throw new TypeError(`directory must be a string, got ${typeof directory}`);
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
const figures = new Map<string, FigureInfo>();
|
|
329
|
+
const tables = new Map<string, FigureInfo>();
|
|
330
|
+
const equations = new Map<string, FigureInfo>();
|
|
331
|
+
|
|
332
|
+
// Counters for numbering (separate for main and supplementary)
|
|
333
|
+
let figNum = 0;
|
|
334
|
+
let figSuppNum = 0;
|
|
335
|
+
let tblNum = 0;
|
|
336
|
+
let tblSuppNum = 0;
|
|
337
|
+
let eqNum = 0;
|
|
338
|
+
|
|
339
|
+
let orderedFiles: string[];
|
|
340
|
+
|
|
341
|
+
if (Array.isArray(sections) && sections.length > 0) {
|
|
342
|
+
// Use explicitly provided section files - most reliable
|
|
343
|
+
orderedFiles = sections.filter((f) => fs.existsSync(path.join(directory, f)));
|
|
344
|
+
} else {
|
|
345
|
+
// Try to determine sections from config files (rev.yaml or sections.yaml)
|
|
346
|
+
orderedFiles = discoverSectionFiles(directory);
|
|
347
|
+
// If no config found, return empty registry rather than guessing
|
|
348
|
+
// This prevents bugs from scanning wrong files
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
// Determine if a file is supplementary
|
|
352
|
+
const isSupplementary = (filename: string): boolean =>
|
|
353
|
+
filename.toLowerCase().includes('supp') || filename.toLowerCase().includes('appendix');
|
|
354
|
+
|
|
355
|
+
// Process each file in order
|
|
356
|
+
for (const file of orderedFiles) {
|
|
357
|
+
const filePath = path.join(directory, file);
|
|
358
|
+
const content = fs.readFileSync(filePath, 'utf-8');
|
|
359
|
+
const isSupp = isSupplementary(file);
|
|
360
|
+
|
|
361
|
+
// Find all anchors
|
|
362
|
+
let match: RegExpExecArray | null;
|
|
363
|
+
ANCHOR_PATTERN.lastIndex = 0;
|
|
364
|
+
while ((match = ANCHOR_PATTERN.exec(content)) !== null) {
|
|
365
|
+
const typeRaw = match[1];
|
|
366
|
+
const labelRaw = match[2];
|
|
367
|
+
if (!typeRaw || !labelRaw) continue;
|
|
368
|
+
|
|
369
|
+
const type = typeRaw.toLowerCase();
|
|
370
|
+
const label = labelRaw;
|
|
371
|
+
|
|
372
|
+
if (type === 'fig') {
|
|
373
|
+
if (isSupp) {
|
|
374
|
+
figSuppNum++;
|
|
375
|
+
figures.set(label, { label, num: figSuppNum, isSupp: true, file });
|
|
376
|
+
} else {
|
|
377
|
+
figNum++;
|
|
378
|
+
figures.set(label, { label, num: figNum, isSupp: false, file });
|
|
379
|
+
}
|
|
380
|
+
} else if (type === 'tbl') {
|
|
381
|
+
if (isSupp) {
|
|
382
|
+
tblSuppNum++;
|
|
383
|
+
tables.set(label, { label, num: tblSuppNum, isSupp: true, file });
|
|
384
|
+
} else {
|
|
385
|
+
tblNum++;
|
|
386
|
+
tables.set(label, { label, num: tblNum, isSupp: false, file });
|
|
387
|
+
}
|
|
388
|
+
} else if (type === 'eq') {
|
|
389
|
+
eqNum++;
|
|
390
|
+
equations.set(label, { label, num: eqNum, isSupp: false, file });
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
// Build reverse lookup: number → label
|
|
396
|
+
const byNumber: Registry['byNumber'] = {
|
|
397
|
+
fig: new Map(),
|
|
398
|
+
figS: new Map(),
|
|
399
|
+
tbl: new Map(),
|
|
400
|
+
tblS: new Map(),
|
|
401
|
+
eq: new Map(),
|
|
402
|
+
};
|
|
403
|
+
|
|
404
|
+
for (const [label, info] of figures) {
|
|
405
|
+
const key = info.isSupp ? 'figS' : 'fig';
|
|
406
|
+
byNumber[key].set(info.num, label);
|
|
407
|
+
}
|
|
408
|
+
for (const [label, info] of tables) {
|
|
409
|
+
const key = info.isSupp ? 'tblS' : 'tbl';
|
|
410
|
+
byNumber[key].set(info.num, label);
|
|
411
|
+
}
|
|
412
|
+
for (const [label, info] of equations) {
|
|
413
|
+
byNumber.eq.set(info.num, label);
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
return { figures, tables, equations, byNumber };
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
/**
|
|
420
|
+
* Get the display string for a label (e.g., "Figure 1", "Table S2")
|
|
421
|
+
*/
|
|
422
|
+
export function labelToDisplay(
|
|
423
|
+
type: 'fig' | 'tbl' | 'eq',
|
|
424
|
+
label: string,
|
|
425
|
+
registry: Registry
|
|
426
|
+
): string | null {
|
|
427
|
+
if (!registry || !registry.figures) return null;
|
|
428
|
+
|
|
429
|
+
const collection =
|
|
430
|
+
type === 'fig' ? registry.figures : type === 'tbl' ? registry.tables : registry.equations;
|
|
431
|
+
|
|
432
|
+
const info = collection.get(label);
|
|
433
|
+
if (!info) return null;
|
|
434
|
+
|
|
435
|
+
const prefix = type === 'fig' ? 'Figure' : type === 'tbl' ? 'Table' : 'Equation';
|
|
436
|
+
const numStr = info.isSupp ? `S${info.num}` : `${info.num}`;
|
|
437
|
+
|
|
438
|
+
return `${prefix} ${numStr}`;
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
/**
|
|
442
|
+
* Get the label for a display number (e.g., "fig:heatmap" from Figure 1)
|
|
443
|
+
*/
|
|
444
|
+
export function numberToLabel(
|
|
445
|
+
type: 'fig' | 'tbl' | 'eq',
|
|
446
|
+
num: number,
|
|
447
|
+
isSupp: boolean,
|
|
448
|
+
registry: Registry
|
|
449
|
+
): string | null {
|
|
450
|
+
if (!registry || !registry.byNumber) return null;
|
|
451
|
+
|
|
452
|
+
const key = isSupp ? (`${type}S` as keyof Registry['byNumber']) : type;
|
|
453
|
+
return registry.byNumber[key]?.get(num) || null;
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
/**
|
|
457
|
+
* Detect all hardcoded references in text
|
|
458
|
+
*/
|
|
459
|
+
export function detectHardcodedRefs(text: string): DetectedRef[] {
|
|
460
|
+
if (typeof text !== 'string') {
|
|
461
|
+
throw new TypeError(`text must be a string, got ${typeof text}`);
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
const refs: DetectedRef[] = [];
|
|
465
|
+
|
|
466
|
+
for (const [type, pattern] of Object.entries(DETECTION_PATTERNS)) {
|
|
467
|
+
pattern.lastIndex = 0;
|
|
468
|
+
let match: RegExpExecArray | null;
|
|
469
|
+
|
|
470
|
+
while ((match = pattern.exec(text)) !== null) {
|
|
471
|
+
// Pattern groups:
|
|
472
|
+
// [1] = type prefix (Figure, Fig., etc.)
|
|
473
|
+
// [2] = reference list string (e.g., "1, 2, and 3" or "1a-3b")
|
|
474
|
+
|
|
475
|
+
const listStr = match[2];
|
|
476
|
+
if (!listStr) continue;
|
|
477
|
+
const numbers = parseReferenceList(listStr);
|
|
478
|
+
|
|
479
|
+
// Skip if no valid numbers were parsed
|
|
480
|
+
if (numbers.length === 0) continue;
|
|
481
|
+
|
|
482
|
+
refs.push({
|
|
483
|
+
type: normalizeType(type) as 'fig' | 'tbl' | 'eq',
|
|
484
|
+
match: match[0],
|
|
485
|
+
numbers,
|
|
486
|
+
position: match.index,
|
|
487
|
+
});
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
// Sort by position
|
|
492
|
+
refs.sort((a, b) => a.position - b.position);
|
|
493
|
+
return refs;
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
/**
|
|
497
|
+
* Convert hardcoded references to @-style references
|
|
498
|
+
*/
|
|
499
|
+
export function convertHardcodedRefs(text: string, registry: Registry): ConversionResult {
|
|
500
|
+
// Input validation delegated to detectHardcodedRefs
|
|
501
|
+
const refs = detectHardcodedRefs(text);
|
|
502
|
+
const conversions: Array<{ from: string; to: string }> = [];
|
|
503
|
+
const warnings: string[] = [];
|
|
504
|
+
|
|
505
|
+
// Process in reverse order to preserve positions
|
|
506
|
+
let result = text;
|
|
507
|
+
for (let i = refs.length - 1; i >= 0; i--) {
|
|
508
|
+
const ref = refs[i];
|
|
509
|
+
if (!ref) continue;
|
|
510
|
+
|
|
511
|
+
// Build replacement
|
|
512
|
+
const labels: string[] = [];
|
|
513
|
+
for (const { num, isSupp } of ref.numbers) {
|
|
514
|
+
const label = numberToLabel(ref.type, num, isSupp, registry);
|
|
515
|
+
if (label) {
|
|
516
|
+
labels.push(`@${ref.type}:${label}`);
|
|
517
|
+
} else {
|
|
518
|
+
const displayNum = isSupp ? `S${num}` : `${num}`;
|
|
519
|
+
warnings.push(`Unknown reference: ${ref.type} ${displayNum} (no matching label)`);
|
|
520
|
+
labels.push(ref.match); // Keep original if no match
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
if (labels.length > 0 && !labels.includes(ref.match)) {
|
|
525
|
+
const replacement = labels.join('; ');
|
|
526
|
+
|
|
527
|
+
// Skip if the @-syntax already appears in the preceding text
|
|
528
|
+
// This prevents duplication when import restores @fig:x and then we see "Fig. 1"
|
|
529
|
+
// e.g., "@fig:map@fig:map{++@fig:map++}" or "@fig:mapFigure 1" patterns
|
|
530
|
+
const textBefore = result.slice(Math.max(0, ref.position - REF_CONTEXT_WINDOW), ref.position);
|
|
531
|
+
const alreadyHasRef = labels.some((label) => textBefore.includes(label));
|
|
532
|
+
if (alreadyHasRef) {
|
|
533
|
+
continue; // Skip - ref already present nearby
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
result =
|
|
537
|
+
result.slice(0, ref.position) + replacement + result.slice(ref.position + ref.match.length);
|
|
538
|
+
|
|
539
|
+
conversions.push({
|
|
540
|
+
from: ref.match,
|
|
541
|
+
to: replacement,
|
|
542
|
+
});
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
return { converted: result, conversions, warnings };
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
/**
|
|
550
|
+
* Detect @-style references in text
|
|
551
|
+
*/
|
|
552
|
+
export function detectDynamicRefs(text: string): DynamicRef[] {
|
|
553
|
+
if (typeof text !== 'string') {
|
|
554
|
+
throw new TypeError(`text must be a string, got ${typeof text}`);
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
const refs: DynamicRef[] = [];
|
|
558
|
+
REF_PATTERN.lastIndex = 0;
|
|
559
|
+
let match: RegExpExecArray | null;
|
|
560
|
+
|
|
561
|
+
while ((match = REF_PATTERN.exec(text)) !== null) {
|
|
562
|
+
const type = match[1];
|
|
563
|
+
const label = match[2];
|
|
564
|
+
if (!type || !label) continue;
|
|
565
|
+
refs.push({
|
|
566
|
+
type: type as 'fig' | 'tbl' | 'eq',
|
|
567
|
+
label: label,
|
|
568
|
+
match: match[0],
|
|
569
|
+
position: match.index,
|
|
570
|
+
});
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
return refs;
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
/**
|
|
577
|
+
* Get reference status for a file/text
|
|
578
|
+
*/
|
|
579
|
+
export function getRefStatus(text: string, registry: Registry): RefStatus {
|
|
580
|
+
const dynamic = detectDynamicRefs(text);
|
|
581
|
+
const hardcoded = detectHardcodedRefs(text) as HardcodedRef[];
|
|
582
|
+
|
|
583
|
+
// Count anchors in this text
|
|
584
|
+
ANCHOR_PATTERN.lastIndex = 0;
|
|
585
|
+
let figCount = 0,
|
|
586
|
+
tblCount = 0,
|
|
587
|
+
eqCount = 0;
|
|
588
|
+
let match: RegExpExecArray | null;
|
|
589
|
+
while ((match = ANCHOR_PATTERN.exec(text)) !== null) {
|
|
590
|
+
const type = match[1];
|
|
591
|
+
if (!type) continue;
|
|
592
|
+
if (type === 'fig') figCount++;
|
|
593
|
+
else if (type === 'tbl') tblCount++;
|
|
594
|
+
else if (type === 'eq') eqCount++;
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
return {
|
|
598
|
+
dynamic,
|
|
599
|
+
hardcoded,
|
|
600
|
+
anchors: { figures: figCount, tables: tblCount, equations: eqCount },
|
|
601
|
+
};
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
/**
|
|
605
|
+
* Detect forward references in combined text
|
|
606
|
+
* A forward reference is a @ref that appears before its {#anchor} definition
|
|
607
|
+
*/
|
|
608
|
+
export function detectForwardRefs(text: string): {
|
|
609
|
+
forwardRefs: Array<{ type: string; label: string; match: string; position: number }>;
|
|
610
|
+
anchorPositions: Map<string, number>;
|
|
611
|
+
} {
|
|
612
|
+
// Build map of anchor positions: "fig:label" -> position
|
|
613
|
+
const anchorPositions = new Map<string, number>();
|
|
614
|
+
ANCHOR_PATTERN.lastIndex = 0;
|
|
615
|
+
let match: RegExpExecArray | null;
|
|
616
|
+
while ((match = ANCHOR_PATTERN.exec(text)) !== null) {
|
|
617
|
+
const type = match[1];
|
|
618
|
+
const label = match[2];
|
|
619
|
+
if (!type || !label) continue;
|
|
620
|
+
const key = `${type}:${label}`;
|
|
621
|
+
// Only store first occurrence (in case of duplicates)
|
|
622
|
+
if (!anchorPositions.has(key)) {
|
|
623
|
+
anchorPositions.set(key, match.index);
|
|
624
|
+
}
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
// Find all references
|
|
628
|
+
const refs = detectDynamicRefs(text);
|
|
629
|
+
|
|
630
|
+
// Filter to only forward references
|
|
631
|
+
const forwardRefs = refs.filter((ref) => {
|
|
632
|
+
const key = `${ref.type}:${ref.label}`;
|
|
633
|
+
const anchorPos = anchorPositions.get(key);
|
|
634
|
+
// Forward ref if anchor doesn't exist or appears after the reference
|
|
635
|
+
return anchorPos === undefined || ref.position < anchorPos;
|
|
636
|
+
});
|
|
637
|
+
|
|
638
|
+
return { forwardRefs, anchorPositions };
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
/**
|
|
642
|
+
* Resolve forward references to display format
|
|
643
|
+
* Only resolves refs that appear before their anchor definition
|
|
644
|
+
* Leaves other refs for pandoc-crossref to handle (preserves clickable links)
|
|
645
|
+
*/
|
|
646
|
+
export function resolveForwardRefs(
|
|
647
|
+
text: string,
|
|
648
|
+
registry: Registry
|
|
649
|
+
): {
|
|
650
|
+
text: string;
|
|
651
|
+
resolved: Array<{ from: string; to: string; position: number }>;
|
|
652
|
+
unresolved: Array<{ ref: string; position: number }>;
|
|
653
|
+
} {
|
|
654
|
+
const { forwardRefs } = detectForwardRefs(text);
|
|
655
|
+
const resolved: Array<{ from: string; to: string; position: number }> = [];
|
|
656
|
+
const unresolved: Array<{ ref: string; position: number }> = [];
|
|
657
|
+
|
|
658
|
+
// Process in reverse order to preserve positions
|
|
659
|
+
let result = text;
|
|
660
|
+
for (let i = forwardRefs.length - 1; i >= 0; i--) {
|
|
661
|
+
const ref = forwardRefs[i];
|
|
662
|
+
if (!ref) continue;
|
|
663
|
+
const display = labelToDisplay(ref.type as 'fig' | 'tbl' | 'eq', ref.label, registry);
|
|
664
|
+
|
|
665
|
+
if (display) {
|
|
666
|
+
result =
|
|
667
|
+
result.slice(0, ref.position) + display + result.slice(ref.position + ref.match.length);
|
|
668
|
+
resolved.push({
|
|
669
|
+
from: ref.match,
|
|
670
|
+
to: display,
|
|
671
|
+
position: ref.position,
|
|
672
|
+
});
|
|
673
|
+
} else {
|
|
674
|
+
unresolved.push({
|
|
675
|
+
ref: ref.match,
|
|
676
|
+
position: ref.position,
|
|
677
|
+
});
|
|
678
|
+
}
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
return { text: result, resolved, unresolved };
|
|
682
|
+
}
|
|
683
|
+
|
|
684
|
+
/**
|
|
685
|
+
* Resolve ALL supplementary references and strip supplementary anchor labels.
|
|
686
|
+
*
|
|
687
|
+
* pandoc-crossref cannot produce "Figure S1" numbering — it numbers all figures
|
|
688
|
+
* sequentially. This function resolves every @fig:label / @tbl:label that points
|
|
689
|
+
* to a supplementary item to plain text ("Figure S1", "Table S1") and removes
|
|
690
|
+
* the {#fig:label} / {#tbl:label} attributes so pandoc-crossref ignores them.
|
|
691
|
+
*/
|
|
692
|
+
export function resolveSupplementaryRefs(
|
|
693
|
+
text: string,
|
|
694
|
+
registry: Registry
|
|
695
|
+
): {
|
|
696
|
+
text: string;
|
|
697
|
+
resolved: Array<{ from: string; to: string }>;
|
|
698
|
+
} {
|
|
699
|
+
const resolved: Array<{ from: string; to: string }> = [];
|
|
700
|
+
let result = text;
|
|
701
|
+
|
|
702
|
+
// Collect supplementary labels
|
|
703
|
+
const suppLabels = new Set<string>();
|
|
704
|
+
for (const [label, info] of registry.figures) {
|
|
705
|
+
if (info.isSupp) suppLabels.add(`fig:${label}`);
|
|
706
|
+
}
|
|
707
|
+
for (const [label, info] of registry.tables) {
|
|
708
|
+
if (info.isSupp) suppLabels.add(`tbl:${label}`);
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
if (suppLabels.size === 0) return { text: result, resolved };
|
|
712
|
+
|
|
713
|
+
// 1. Replace all @fig:label / @tbl:label references to supplementary items
|
|
714
|
+
const refs = detectDynamicRefs(result);
|
|
715
|
+
// Process in reverse to preserve positions
|
|
716
|
+
for (let i = refs.length - 1; i >= 0; i--) {
|
|
717
|
+
const ref = refs[i];
|
|
718
|
+
if (!ref) continue;
|
|
719
|
+
const key = `${ref.type}:${ref.label}`;
|
|
720
|
+
if (!suppLabels.has(key)) continue;
|
|
721
|
+
|
|
722
|
+
const display = labelToDisplay(ref.type as 'fig' | 'tbl' | 'eq', ref.label, registry);
|
|
723
|
+
if (display) {
|
|
724
|
+
result =
|
|
725
|
+
result.slice(0, ref.position) + display + result.slice(ref.position + ref.match.length);
|
|
726
|
+
resolved.push({ from: ref.match, to: display });
|
|
727
|
+
}
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
// 2. Strip {#fig:label} and {#tbl:label} attributes from supplementary anchors
|
|
731
|
+
// so pandoc-crossref does not re-number them
|
|
732
|
+
for (const key of suppLabels) {
|
|
733
|
+
// Match {#fig:label ...} or just {#fig:label}
|
|
734
|
+
const escaped = key.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
735
|
+
const pattern = new RegExp(`\\{#${escaped}(?:\\s[^}]*)?\\}`, 'g');
|
|
736
|
+
result = result.replace(pattern, (match) => {
|
|
737
|
+
resolved.push({ from: match, to: '(stripped)' });
|
|
738
|
+
return '';
|
|
739
|
+
});
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
return { text: result, resolved };
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
/**
|
|
746
|
+
* Format registry for display
|
|
747
|
+
*/
|
|
748
|
+
export function formatRegistry(registry: Registry): string {
|
|
749
|
+
const lines: string[] = [];
|
|
750
|
+
|
|
751
|
+
if (registry.figures.size > 0) {
|
|
752
|
+
lines.push('Figures:');
|
|
753
|
+
for (const [label, info] of registry.figures) {
|
|
754
|
+
const num = info.isSupp ? `S${info.num}` : info.num;
|
|
755
|
+
lines.push(` Figure ${num}: @fig:${label} (${info.file})`);
|
|
756
|
+
}
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
if (registry.tables.size > 0) {
|
|
760
|
+
if (lines.length > 0) lines.push('');
|
|
761
|
+
lines.push('Tables:');
|
|
762
|
+
for (const [label, info] of registry.tables) {
|
|
763
|
+
const num = info.isSupp ? `S${info.num}` : info.num;
|
|
764
|
+
lines.push(` Table ${num}: @tbl:${label} (${info.file})`);
|
|
765
|
+
}
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
if (registry.equations.size > 0) {
|
|
769
|
+
if (lines.length > 0) lines.push('');
|
|
770
|
+
lines.push('Equations:');
|
|
771
|
+
for (const [label, info] of registry.equations) {
|
|
772
|
+
lines.push(` Equation ${info.num}: @eq:${label} (${info.file})`);
|
|
773
|
+
}
|
|
774
|
+
}
|
|
775
|
+
|
|
776
|
+
if (lines.length === 0) {
|
|
777
|
+
lines.push('No figure/table anchors found.');
|
|
778
|
+
}
|
|
779
|
+
|
|
780
|
+
return lines.join('\n');
|
|
781
|
+
}
|